]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.simd] cmd/compile: alphabetize SIMD intrinsics
authorDavid Chase <drchase@google.com>
Tue, 17 Jun 2025 14:43:59 +0000 (10:43 -0400)
committerDavid Chase <drchase@google.com>
Tue, 17 Jun 2025 16:05:31 +0000 (09:05 -0700)
This is the output of CL 682036

Change-Id: I432c6e059dff7019a6bba6b777ea7fe48990278f
Reviewed-on: https://go-review.googlesource.com/c/go/+/682295
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
src/cmd/compile/internal/ssagen/simdintrinsics.go

index b86c81516601bc5f3540d788c47505a9c9ddc2b6..4b1f8a212ae2f8ad612a1b361560b17fb772ca99 100644 (file)
@@ -11,1437 +11,1413 @@ import (
 const simdPackage = "simd"
 
 func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily)) {
-       addF(simdPackage, "Float32x16.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.Sqrt", opLen1(ssa.OpSqrtFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.Ceil", opLen1(ssa.OpCeilFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.Floor", opLen1(ssa.OpFloorFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.Round", opLen1(ssa.OpRoundFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.Sqrt", opLen1(ssa.OpSqrtFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.Trunc", opLen1(ssa.OpTruncFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x8.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.Ceil", opLen1(ssa.OpCeilFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.Floor", opLen1(ssa.OpFloorFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.Round", opLen1(ssa.OpRoundFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.Sqrt", opLen1(ssa.OpSqrtFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.Trunc", opLen1(ssa.OpTruncFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x2.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.Ceil", opLen1(ssa.OpCeilFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.Floor", opLen1(ssa.OpFloorFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.Round", opLen1(ssa.OpRoundFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.Sqrt", opLen1(ssa.OpSqrtFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.Trunc", opLen1(ssa.OpTruncFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.Ceil", opLen1(ssa.OpCeilFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.Floor", opLen1(ssa.OpFloorFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.Round", opLen1(ssa.OpRoundFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.Sqrt", opLen1(ssa.OpSqrtFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.Trunc", opLen1(ssa.OpTruncFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.Sqrt", opLen1(ssa.OpSqrtFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x16.Absolute", opLen1(ssa.OpAbsoluteInt8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.Absolute", opLen1(ssa.OpAbsoluteInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x64.Absolute", opLen1(ssa.OpAbsoluteInt8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.Absolute", opLen1(ssa.OpAbsoluteInt16x8, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int16x16.Absolute", opLen1(ssa.OpAbsoluteInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.PopCount", opLen1(ssa.OpPopCountInt16x16, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int16x32.Absolute", opLen1(ssa.OpAbsoluteInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x32.PopCount", opLen1(ssa.OpPopCountInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.Absolute", opLen1(ssa.OpAbsoluteInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.PopCount", opLen1(ssa.OpPopCountInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x16.Absolute", opLen1(ssa.OpAbsoluteInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.PopCount", opLen1(ssa.OpPopCountInt32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Int32x4.Absolute", opLen1(ssa.OpAbsoluteInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.PopCount", opLen1(ssa.OpPopCountInt32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int32x8.Absolute", opLen1(ssa.OpAbsoluteInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.PopCount", opLen1(ssa.OpPopCountInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.Absolute", opLen1(ssa.OpAbsoluteInt32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Int64x2.Absolute", opLen1(ssa.OpAbsoluteInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.PopCount", opLen1(ssa.OpPopCountInt64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int64x4.Absolute", opLen1(ssa.OpAbsoluteInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.PopCount", opLen1(ssa.OpPopCountInt64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int64x8.Absolute", opLen1(ssa.OpAbsoluteInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.PopCount", opLen1(ssa.OpPopCountInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.Absolute", opLen1(ssa.OpAbsoluteInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x16.PopCount", opLen1(ssa.OpPopCountInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.Absolute", opLen1(ssa.OpAbsoluteInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x32.PopCount", opLen1(ssa.OpPopCountInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.Absolute", opLen1(ssa.OpAbsoluteInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x64.PopCount", opLen1(ssa.OpPopCountInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x16.PopCount", opLen1(ssa.OpPopCountUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.PopCount", opLen1(ssa.OpPopCountUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.PopCount", opLen1(ssa.OpPopCountUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x16.PopCount", opLen1(ssa.OpPopCountUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.PopCount", opLen1(ssa.OpPopCountUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.PopCount", opLen1(ssa.OpPopCountUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x2.PopCount", opLen1(ssa.OpPopCountUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.PopCount", opLen1(ssa.OpPopCountUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.PopCount", opLen1(ssa.OpPopCountUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.PopCount", opLen1(ssa.OpPopCountUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.PopCount", opLen1(ssa.OpPopCountUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.PopCount", opLen1(ssa.OpPopCountUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.Add", opLen2(ssa.OpAddFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.And", opLen2(ssa.OpAndFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.AndNot", opLen2(ssa.OpAndNotFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.Div", opLen2(ssa.OpDivFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.Equal", opLen2(ssa.OpEqualFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.Greater", opLen2(ssa.OpGreaterFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.IsNan", opLen2(ssa.OpIsNanFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.Less", opLen2(ssa.OpLessFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.LessEqual", opLen2(ssa.OpLessEqualFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedApproximateReciprocal", opLen2(ssa.OpMaskedApproximateReciprocalFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedApproximateReciprocalOfSqrt", opLen2(ssa.OpMaskedApproximateReciprocalOfSqrtFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedSqrt", opLen2(ssa.OpMaskedSqrtFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.Max", opLen2(ssa.OpMaxFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.Min", opLen2(ssa.OpMinFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.Mul", opLen2(ssa.OpMulFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MulByPowOf2", opLen2(ssa.OpMulByPowOf2Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.NotEqual", opLen2(ssa.OpNotEqualFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.Or", opLen2(ssa.OpOrFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.Sub", opLen2(ssa.OpSubFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.Xor", opLen2(ssa.OpXorFloat32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float32x4.Add", opLen2(ssa.OpAddFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.AddSub", opLen2(ssa.OpAddSubFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.And", opLen2(ssa.OpAndFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.AndNot", opLen2(ssa.OpAndNotFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.Div", opLen2(ssa.OpDivFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.Equal", opLen2(ssa.OpEqualFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.Greater", opLen2(ssa.OpGreaterFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.IsNan", opLen2(ssa.OpIsNanFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.Less", opLen2(ssa.OpLessFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.LessEqual", opLen2(ssa.OpLessEqualFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedApproximateReciprocal", opLen2(ssa.OpMaskedApproximateReciprocalFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedApproximateReciprocalOfSqrt", opLen2(ssa.OpMaskedApproximateReciprocalOfSqrtFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedSqrt", opLen2(ssa.OpMaskedSqrtFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.Max", opLen2(ssa.OpMaxFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.Min", opLen2(ssa.OpMinFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.Mul", opLen2(ssa.OpMulFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MulByPowOf2", opLen2(ssa.OpMulByPowOf2Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.NotEqual", opLen2(ssa.OpNotEqualFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.Or", opLen2(ssa.OpOrFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.PairwiseAdd", opLen2(ssa.OpPairwiseAddFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.PairwiseSub", opLen2(ssa.OpPairwiseSubFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.Sub", opLen2(ssa.OpSubFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.Xor", opLen2(ssa.OpXorFloat32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x8.Add", opLen2(ssa.OpAddFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.AddSub", opLen2(ssa.OpAddSubFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.And", opLen2(ssa.OpAndFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.AndNot", opLen2(ssa.OpAndNotFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.Div", opLen2(ssa.OpDivFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.Equal", opLen2(ssa.OpEqualFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.Greater", opLen2(ssa.OpGreaterFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.IsNan", opLen2(ssa.OpIsNanFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.Less", opLen2(ssa.OpLessFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.LessEqual", opLen2(ssa.OpLessEqualFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedApproximateReciprocal", opLen2(ssa.OpMaskedApproximateReciprocalFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedApproximateReciprocalOfSqrt", opLen2(ssa.OpMaskedApproximateReciprocalOfSqrtFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedSqrt", opLen2(ssa.OpMaskedSqrtFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.Max", opLen2(ssa.OpMaxFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.Min", opLen2(ssa.OpMinFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.Mul", opLen2(ssa.OpMulFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MulByPowOf2", opLen2(ssa.OpMulByPowOf2Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.NotEqual", opLen2(ssa.OpNotEqualFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.Or", opLen2(ssa.OpOrFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.PairwiseAdd", opLen2(ssa.OpPairwiseAddFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.PairwiseSub", opLen2(ssa.OpPairwiseSubFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.Sub", opLen2(ssa.OpSubFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.Xor", opLen2(ssa.OpXorFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.Add", opLen2(ssa.OpAddFloat32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float64x2.Add", opLen2(ssa.OpAddFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.AddSub", opLen2(ssa.OpAddSubFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.And", opLen2(ssa.OpAndFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.AndNot", opLen2(ssa.OpAndNotFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.Div", opLen2(ssa.OpDivFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.DotProdBroadcast", opLen2(ssa.OpDotProdBroadcastFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.Equal", opLen2(ssa.OpEqualFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.Greater", opLen2(ssa.OpGreaterFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.IsNan", opLen2(ssa.OpIsNanFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.Less", opLen2(ssa.OpLessFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.LessEqual", opLen2(ssa.OpLessEqualFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedApproximateReciprocal", opLen2(ssa.OpMaskedApproximateReciprocalFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedApproximateReciprocalOfSqrt", opLen2(ssa.OpMaskedApproximateReciprocalOfSqrtFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedSqrt", opLen2(ssa.OpMaskedSqrtFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.Max", opLen2(ssa.OpMaxFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.Min", opLen2(ssa.OpMinFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.Mul", opLen2(ssa.OpMulFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MulByPowOf2", opLen2(ssa.OpMulByPowOf2Float64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.NotEqual", opLen2(ssa.OpNotEqualFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.Or", opLen2(ssa.OpOrFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.PairwiseAdd", opLen2(ssa.OpPairwiseAddFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.PairwiseSub", opLen2(ssa.OpPairwiseSubFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.Sub", opLen2(ssa.OpSubFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.Xor", opLen2(ssa.OpXorFloat64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float64x4.Add", opLen2(ssa.OpAddFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.Add", opLen2(ssa.OpAddFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x16.Add", opLen2(ssa.OpAddInt8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.Add", opLen2(ssa.OpAddInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x64.Add", opLen2(ssa.OpAddInt8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.Add", opLen2(ssa.OpAddInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.Add", opLen2(ssa.OpAddInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.Add", opLen2(ssa.OpAddInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.Add", opLen2(ssa.OpAddInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.Add", opLen2(ssa.OpAddInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.Add", opLen2(ssa.OpAddInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x2.Add", opLen2(ssa.OpAddInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.Add", opLen2(ssa.OpAddInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.Add", opLen2(ssa.OpAddInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.Add", opLen2(ssa.OpAddUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.Add", opLen2(ssa.OpAddUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.Add", opLen2(ssa.OpAddUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x8.Add", opLen2(ssa.OpAddUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.Add", opLen2(ssa.OpAddUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.Add", opLen2(ssa.OpAddUint16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint32x4.Add", opLen2(ssa.OpAddUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.Add", opLen2(ssa.OpAddUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.Add", opLen2(ssa.OpAddUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint64x2.Add", opLen2(ssa.OpAddUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x4.Add", opLen2(ssa.OpAddUint64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x8.Add", opLen2(ssa.OpAddUint64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.AddSub", opLen2(ssa.OpAddSubFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.AddSub", opLen2(ssa.OpAddSubFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x2.AddSub", opLen2(ssa.OpAddSubFloat64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float64x4.AddSub", opLen2(ssa.OpAddSubFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x4.And", opLen2(ssa.OpAndFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.And", opLen2(ssa.OpAndFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.And", opLen2(ssa.OpAndFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.And", opLen2(ssa.OpAndFloat64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float64x4.And", opLen2(ssa.OpAndFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.AndNot", opLen2(ssa.OpAndNotFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.Div", opLen2(ssa.OpDivFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.Equal", opLen2(ssa.OpEqualFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.Greater", opLen2(ssa.OpGreaterFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.IsNan", opLen2(ssa.OpIsNanFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.Less", opLen2(ssa.OpLessFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.LessEqual", opLen2(ssa.OpLessEqualFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedApproximateReciprocal", opLen2(ssa.OpMaskedApproximateReciprocalFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedApproximateReciprocalOfSqrt", opLen2(ssa.OpMaskedApproximateReciprocalOfSqrtFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedSqrt", opLen2(ssa.OpMaskedSqrtFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.Max", opLen2(ssa.OpMaxFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.Min", opLen2(ssa.OpMinFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.Mul", opLen2(ssa.OpMulFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MulByPowOf2", opLen2(ssa.OpMulByPowOf2Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.NotEqual", opLen2(ssa.OpNotEqualFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.Or", opLen2(ssa.OpOrFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.PairwiseAdd", opLen2(ssa.OpPairwiseAddFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.PairwiseSub", opLen2(ssa.OpPairwiseSubFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.Sub", opLen2(ssa.OpSubFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.Xor", opLen2(ssa.OpXorFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.Add", opLen2(ssa.OpAddFloat64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float64x8.And", opLen2(ssa.OpAndFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.AndNot", opLen2(ssa.OpAndNotFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.Div", opLen2(ssa.OpDivFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.Equal", opLen2(ssa.OpEqualFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.Greater", opLen2(ssa.OpGreaterFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.IsNan", opLen2(ssa.OpIsNanFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.Less", opLen2(ssa.OpLessFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.LessEqual", opLen2(ssa.OpLessEqualFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedApproximateReciprocal", opLen2(ssa.OpMaskedApproximateReciprocalFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedApproximateReciprocalOfSqrt", opLen2(ssa.OpMaskedApproximateReciprocalOfSqrtFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedSqrt", opLen2(ssa.OpMaskedSqrtFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.Max", opLen2(ssa.OpMaxFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.Min", opLen2(ssa.OpMinFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.Mul", opLen2(ssa.OpMulFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.MulByPowOf2", opLen2(ssa.OpMulByPowOf2Float64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.NotEqual", opLen2(ssa.OpNotEqualFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.Or", opLen2(ssa.OpOrFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.Sub", opLen2(ssa.OpSubFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.Xor", opLen2(ssa.OpXorFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x16.Add", opLen2(ssa.OpAddInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x16.And", opLen2(ssa.OpAndInt8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.And", opLen2(ssa.OpAndInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x8.And", opLen2(ssa.OpAndInt16x8, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int16x16.And", opLen2(ssa.OpAndInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.AndNot", opLen2(ssa.OpAndNotInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.Equal", opLen2(ssa.OpEqualInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.Greater", opLen2(ssa.OpGreaterInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.GreaterEqual", opLen2(ssa.OpGreaterEqualInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.Less", opLen2(ssa.OpLessInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.LessEqual", opLen2(ssa.OpLessEqualInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.Max", opLen2(ssa.OpMaxInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.Min", opLen2(ssa.OpMinInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.MulHigh", opLen2(ssa.OpMulHighInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.MulLow", opLen2(ssa.OpMulLowInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.NotEqual", opLen2(ssa.OpNotEqualInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.Or", opLen2(ssa.OpOrInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.PairDotProd", opLen2(ssa.OpPairDotProdInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.PairwiseAdd", opLen2(ssa.OpPairwiseAddInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.PairwiseSub", opLen2(ssa.OpPairwiseSubInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.SaturatedPairwiseAdd", opLen2(ssa.OpSaturatedPairwiseAddInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.SaturatedPairwiseSub", opLen2(ssa.OpSaturatedPairwiseSubInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.SaturatedSub", opLen2(ssa.OpSaturatedSubInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.Sign", opLen2(ssa.OpSignInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.Sub", opLen2(ssa.OpSubInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.Xor", opLen2(ssa.OpXorInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.Add", opLen2(ssa.OpAddInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x32.Equal", opLen2(ssa.OpEqualInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x32.Greater", opLen2(ssa.OpGreaterInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x32.GreaterEqual", opLen2(ssa.OpGreaterEqualInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x32.Less", opLen2(ssa.OpLessInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x32.LessEqual", opLen2(ssa.OpLessEqualInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x32.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x32.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x32.Max", opLen2(ssa.OpMaxInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x32.Min", opLen2(ssa.OpMinInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x32.MulHigh", opLen2(ssa.OpMulHighInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x32.MulLow", opLen2(ssa.OpMulLowInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x32.NotEqual", opLen2(ssa.OpNotEqualInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x32.PairDotProd", opLen2(ssa.OpPairDotProdInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x32.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x32.SaturatedSub", opLen2(ssa.OpSaturatedSubInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x32.Sub", opLen2(ssa.OpSubInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.Add", opLen2(ssa.OpAddInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.And", opLen2(ssa.OpAndInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.AndNot", opLen2(ssa.OpAndNotInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.Equal", opLen2(ssa.OpEqualInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.Greater", opLen2(ssa.OpGreaterInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.GreaterEqual", opLen2(ssa.OpGreaterEqualInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.Less", opLen2(ssa.OpLessInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.LessEqual", opLen2(ssa.OpLessEqualInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.Max", opLen2(ssa.OpMaxInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.Min", opLen2(ssa.OpMinInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.MulHigh", opLen2(ssa.OpMulHighInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.MulLow", opLen2(ssa.OpMulLowInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.NotEqual", opLen2(ssa.OpNotEqualInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.Or", opLen2(ssa.OpOrInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.PairDotProd", opLen2(ssa.OpPairDotProdInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.PairwiseAdd", opLen2(ssa.OpPairwiseAddInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.PairwiseSub", opLen2(ssa.OpPairwiseSubInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.SaturatedPairwiseAdd", opLen2(ssa.OpSaturatedPairwiseAddInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.SaturatedPairwiseSub", opLen2(ssa.OpSaturatedPairwiseSubInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.SaturatedSub", opLen2(ssa.OpSaturatedSubInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.Sign", opLen2(ssa.OpSignInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.Sub", opLen2(ssa.OpSubInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.Xor", opLen2(ssa.OpXorInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x16.Add", opLen2(ssa.OpAddInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.AndNot", opLen2(ssa.OpAndNotInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.Equal", opLen2(ssa.OpEqualInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.Greater", opLen2(ssa.OpGreaterInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.GreaterEqual", opLen2(ssa.OpGreaterEqualInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.Less", opLen2(ssa.OpLessInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.LessEqual", opLen2(ssa.OpLessEqualInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.Max", opLen2(ssa.OpMaxInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.Min", opLen2(ssa.OpMinInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.MulLow", opLen2(ssa.OpMulLowInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.NotEqual", opLen2(ssa.OpNotEqualInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.Sub", opLen2(ssa.OpSubInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.Xor", opLen2(ssa.OpXorInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.Add", opLen2(ssa.OpAddInt32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int32x4.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.AndNot", opLen2(ssa.OpAndNotInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.Equal", opLen2(ssa.OpEqualInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.Greater", opLen2(ssa.OpGreaterInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.GreaterEqual", opLen2(ssa.OpGreaterEqualInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.Less", opLen2(ssa.OpLessInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.LessEqual", opLen2(ssa.OpLessEqualInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.Max", opLen2(ssa.OpMaxInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.Min", opLen2(ssa.OpMinInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.MulLow", opLen2(ssa.OpMulLowInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.NotEqual", opLen2(ssa.OpNotEqualInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.PairwiseAdd", opLen2(ssa.OpPairwiseAddInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.PairwiseSub", opLen2(ssa.OpPairwiseSubInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.Sign", opLen2(ssa.OpSignInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.Sub", opLen2(ssa.OpSubInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.Xor", opLen2(ssa.OpXorInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.Add", opLen2(ssa.OpAddInt32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int32x8.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.AndNot", opLen2(ssa.OpAndNotInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.Equal", opLen2(ssa.OpEqualInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.Greater", opLen2(ssa.OpGreaterInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.GreaterEqual", opLen2(ssa.OpGreaterEqualInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.Less", opLen2(ssa.OpLessInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.LessEqual", opLen2(ssa.OpLessEqualInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.Max", opLen2(ssa.OpMaxInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.Min", opLen2(ssa.OpMinInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.MulLow", opLen2(ssa.OpMulLowInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.NotEqual", opLen2(ssa.OpNotEqualInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.PairwiseAdd", opLen2(ssa.OpPairwiseAddInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.PairwiseSub", opLen2(ssa.OpPairwiseSubInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.Sign", opLen2(ssa.OpSignInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.Sub", opLen2(ssa.OpSubInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.Xor", opLen2(ssa.OpXorInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x2.Add", opLen2(ssa.OpAddInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x16.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Int64x2.And", opLen2(ssa.OpAndInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.AndNot", opLen2(ssa.OpAndNotInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.Equal", opLen2(ssa.OpEqualInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.Greater", opLen2(ssa.OpGreaterInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.GreaterEqual", opLen2(ssa.OpGreaterEqualInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.Less", opLen2(ssa.OpLessInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.LessEqual", opLen2(ssa.OpLessEqualInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.Max", opLen2(ssa.OpMaxInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.Min", opLen2(ssa.OpMinInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.MulLow", opLen2(ssa.OpMulLowInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.NotEqual", opLen2(ssa.OpNotEqualInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.Or", opLen2(ssa.OpOrInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.Sub", opLen2(ssa.OpSubInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.Xor", opLen2(ssa.OpXorInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.Add", opLen2(ssa.OpAddInt64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int64x4.And", opLen2(ssa.OpAndInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.AndNot", opLen2(ssa.OpAndNotInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.Equal", opLen2(ssa.OpEqualInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.Greater", opLen2(ssa.OpGreaterInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.GreaterEqual", opLen2(ssa.OpGreaterEqualInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.Less", opLen2(ssa.OpLessInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.LessEqual", opLen2(ssa.OpLessEqualInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.Max", opLen2(ssa.OpMaxInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.Min", opLen2(ssa.OpMinInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.MulLow", opLen2(ssa.OpMulLowInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.NotEqual", opLen2(ssa.OpNotEqualInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.Or", opLen2(ssa.OpOrInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.Sub", opLen2(ssa.OpSubInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.Xor", opLen2(ssa.OpXorInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.Add", opLen2(ssa.OpAddInt64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Int64x8.And", opLen2(ssa.OpAndInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.AndNot", opLen2(ssa.OpAndNotInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.Equal", opLen2(ssa.OpEqualInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.Greater", opLen2(ssa.OpGreaterInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.GreaterEqual", opLen2(ssa.OpGreaterEqualInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.Less", opLen2(ssa.OpLessInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.LessEqual", opLen2(ssa.OpLessEqualInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.Max", opLen2(ssa.OpMaxInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.Min", opLen2(ssa.OpMinInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.MulLow", opLen2(ssa.OpMulLowInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.NotEqual", opLen2(ssa.OpNotEqualInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.Or", opLen2(ssa.OpOrInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.Sub", opLen2(ssa.OpSubInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.Xor", opLen2(ssa.OpXorInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.Add", opLen2(ssa.OpAddInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x16.And", opLen2(ssa.OpAndInt8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x16.And", opLen2(ssa.OpAndUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.And", opLen2(ssa.OpAndUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x8.And", opLen2(ssa.OpAndUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.And", opLen2(ssa.OpAndUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x4.And", opLen2(ssa.OpAndUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.And", opLen2(ssa.OpAndUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.And", opLen2(ssa.OpAndUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint64x2.And", opLen2(ssa.OpAndUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x4.And", opLen2(ssa.OpAndUint64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x8.And", opLen2(ssa.OpAndUint64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.AndNot", opLen2(ssa.OpAndNotFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.AndNot", opLen2(ssa.OpAndNotFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.AndNot", opLen2(ssa.OpAndNotFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.AndNot", opLen2(ssa.OpAndNotFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.AndNot", opLen2(ssa.OpAndNotFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.AndNot", opLen2(ssa.OpAndNotFloat64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Int8x16.AndNot", opLen2(ssa.OpAndNotInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x16.Equal", opLen2(ssa.OpEqualInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x16.Greater", opLen2(ssa.OpGreaterInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x16.GreaterEqual", opLen2(ssa.OpGreaterEqualInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x16.Less", opLen2(ssa.OpLessInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x16.LessEqual", opLen2(ssa.OpLessEqualInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x16.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x16.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x16.Max", opLen2(ssa.OpMaxInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x16.Min", opLen2(ssa.OpMinInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x16.NotEqual", opLen2(ssa.OpNotEqualInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x16.Or", opLen2(ssa.OpOrInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x16.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x16.SaturatedSub", opLen2(ssa.OpSaturatedSubInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x16.Sign", opLen2(ssa.OpSignInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x16.Sub", opLen2(ssa.OpSubInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x16.Xor", opLen2(ssa.OpXorInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.Add", opLen2(ssa.OpAddInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x32.And", opLen2(ssa.OpAndInt8x32, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int8x32.AndNot", opLen2(ssa.OpAndNotInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x32.Equal", opLen2(ssa.OpEqualInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x32.Greater", opLen2(ssa.OpGreaterInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x32.GreaterEqual", opLen2(ssa.OpGreaterEqualInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x32.Less", opLen2(ssa.OpLessInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x32.LessEqual", opLen2(ssa.OpLessEqualInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x32.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x32.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x32.Max", opLen2(ssa.OpMaxInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x32.Min", opLen2(ssa.OpMinInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x32.NotEqual", opLen2(ssa.OpNotEqualInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x32.Or", opLen2(ssa.OpOrInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x32.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x32.SaturatedSub", opLen2(ssa.OpSaturatedSubInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x32.Sign", opLen2(ssa.OpSignInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x32.Sub", opLen2(ssa.OpSubInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x32.Xor", opLen2(ssa.OpXorInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.Add", opLen2(ssa.OpAddInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x64.Equal", opLen2(ssa.OpEqualInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x64.Greater", opLen2(ssa.OpGreaterInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x64.GreaterEqual", opLen2(ssa.OpGreaterEqualInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x64.Less", opLen2(ssa.OpLessInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x64.LessEqual", opLen2(ssa.OpLessEqualInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x64.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x64.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x64.Max", opLen2(ssa.OpMaxInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x64.Min", opLen2(ssa.OpMinInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x64.NotEqual", opLen2(ssa.OpNotEqualInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x64.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x64.SaturatedSub", opLen2(ssa.OpSaturatedSubInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x64.Sub", opLen2(ssa.OpSubInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x16.Add", opLen2(ssa.OpAddUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.And", opLen2(ssa.OpAndUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x8.AndNot", opLen2(ssa.OpAndNotInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.AndNot", opLen2(ssa.OpAndNotInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x4.AndNot", opLen2(ssa.OpAndNotInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.AndNot", opLen2(ssa.OpAndNotInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.AndNot", opLen2(ssa.OpAndNotInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x2.AndNot", opLen2(ssa.OpAndNotInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.AndNot", opLen2(ssa.OpAndNotInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.AndNot", opLen2(ssa.OpAndNotInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.AndNot", opLen2(ssa.OpAndNotUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.AndNot", opLen2(ssa.OpAndNotUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x8.AndNot", opLen2(ssa.OpAndNotUint16x8, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint16x16.AndNot", opLen2(ssa.OpAndNotUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.Average", opLen2(ssa.OpAverageUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.Equal", opLen2(ssa.OpEqualUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.Greater", opLen2(ssa.OpGreaterUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.GreaterEqual", opLen2(ssa.OpGreaterEqualUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.Less", opLen2(ssa.OpLessUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.LessEqual", opLen2(ssa.OpLessEqualUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.Max", opLen2(ssa.OpMaxUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.Min", opLen2(ssa.OpMinUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.MulHigh", opLen2(ssa.OpMulHighUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.NotEqual", opLen2(ssa.OpNotEqualUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.Or", opLen2(ssa.OpOrUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.PairwiseAdd", opLen2(ssa.OpPairwiseAddUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.PairwiseSub", opLen2(ssa.OpPairwiseSubUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.SaturatedSub", opLen2(ssa.OpSaturatedSubUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.Sub", opLen2(ssa.OpSubUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.Xor", opLen2(ssa.OpXorUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x32.Add", opLen2(ssa.OpAddUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.Average", opLen2(ssa.OpAverageUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.Equal", opLen2(ssa.OpEqualUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.Greater", opLen2(ssa.OpGreaterUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.GreaterEqual", opLen2(ssa.OpGreaterEqualUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.Less", opLen2(ssa.OpLessUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.LessEqual", opLen2(ssa.OpLessEqualUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.Max", opLen2(ssa.OpMaxUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.Min", opLen2(ssa.OpMinUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.MulHigh", opLen2(ssa.OpMulHighUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.NotEqual", opLen2(ssa.OpNotEqualUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.SaturatedSub", opLen2(ssa.OpSaturatedSubUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.Sub", opLen2(ssa.OpSubUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.Add", opLen2(ssa.OpAddUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.And", opLen2(ssa.OpAndUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.AndNot", opLen2(ssa.OpAndNotUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.Average", opLen2(ssa.OpAverageUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.Equal", opLen2(ssa.OpEqualUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.Greater", opLen2(ssa.OpGreaterUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.GreaterEqual", opLen2(ssa.OpGreaterEqualUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.Less", opLen2(ssa.OpLessUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.LessEqual", opLen2(ssa.OpLessEqualUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.Max", opLen2(ssa.OpMaxUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.Min", opLen2(ssa.OpMinUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.MulHigh", opLen2(ssa.OpMulHighUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.NotEqual", opLen2(ssa.OpNotEqualUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.Or", opLen2(ssa.OpOrUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.PairwiseAdd", opLen2(ssa.OpPairwiseAddUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.PairwiseSub", opLen2(ssa.OpPairwiseSubUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.SaturatedSub", opLen2(ssa.OpSaturatedSubUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.Sub", opLen2(ssa.OpSubUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.Xor", opLen2(ssa.OpXorUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x16.Add", opLen2(ssa.OpAddUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.And", opLen2(ssa.OpAndUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.AndNot", opLen2(ssa.OpAndNotUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.Equal", opLen2(ssa.OpEqualUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.Greater", opLen2(ssa.OpGreaterUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.GreaterEqual", opLen2(ssa.OpGreaterEqualUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.Less", opLen2(ssa.OpLessUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.LessEqual", opLen2(ssa.OpLessEqualUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.Max", opLen2(ssa.OpMaxUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.Min", opLen2(ssa.OpMinUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.NotEqual", opLen2(ssa.OpNotEqualUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.Or", opLen2(ssa.OpOrUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.Sub", opLen2(ssa.OpSubUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.Xor", opLen2(ssa.OpXorUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.Add", opLen2(ssa.OpAddUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.And", opLen2(ssa.OpAndUint32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint32x4.AndNot", opLen2(ssa.OpAndNotUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.Equal", opLen2(ssa.OpEqualUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.Greater", opLen2(ssa.OpGreaterUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.GreaterEqual", opLen2(ssa.OpGreaterEqualUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.Less", opLen2(ssa.OpLessUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.LessEqual", opLen2(ssa.OpLessEqualUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.Max", opLen2(ssa.OpMaxUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.Min", opLen2(ssa.OpMinUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.MulEvenWiden", opLen2(ssa.OpMulEvenWidenUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.NotEqual", opLen2(ssa.OpNotEqualUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.Or", opLen2(ssa.OpOrUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.PairwiseAdd", opLen2(ssa.OpPairwiseAddUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.PairwiseSub", opLen2(ssa.OpPairwiseSubUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.Sub", opLen2(ssa.OpSubUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.Xor", opLen2(ssa.OpXorUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.Add", opLen2(ssa.OpAddUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.And", opLen2(ssa.OpAndUint32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint32x8.AndNot", opLen2(ssa.OpAndNotUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.Equal", opLen2(ssa.OpEqualUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.Greater", opLen2(ssa.OpGreaterUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.GreaterEqual", opLen2(ssa.OpGreaterEqualUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.Less", opLen2(ssa.OpLessUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.LessEqual", opLen2(ssa.OpLessEqualUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.Max", opLen2(ssa.OpMaxUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.Min", opLen2(ssa.OpMinUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.MulEvenWiden", opLen2(ssa.OpMulEvenWidenUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.NotEqual", opLen2(ssa.OpNotEqualUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.Or", opLen2(ssa.OpOrUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.PairwiseAdd", opLen2(ssa.OpPairwiseAddUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.PairwiseSub", opLen2(ssa.OpPairwiseSubUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.Sub", opLen2(ssa.OpSubUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.Xor", opLen2(ssa.OpXorUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x2.Add", opLen2(ssa.OpAddUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x2.And", opLen2(ssa.OpAndUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x16.AndNot", opLen2(ssa.OpAndNotUint32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Uint64x2.AndNot", opLen2(ssa.OpAndNotUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x2.Equal", opLen2(ssa.OpEqualUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x2.Greater", opLen2(ssa.OpGreaterUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x2.GreaterEqual", opLen2(ssa.OpGreaterEqualUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x2.Less", opLen2(ssa.OpLessUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x2.LessEqual", opLen2(ssa.OpLessEqualUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x2.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x2.Max", opLen2(ssa.OpMaxUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x2.Min", opLen2(ssa.OpMinUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x2.MulEvenWiden", opLen2(ssa.OpMulEvenWidenUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x2.NotEqual", opLen2(ssa.OpNotEqualUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x2.Or", opLen2(ssa.OpOrUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x2.Sub", opLen2(ssa.OpSubUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x2.Xor", opLen2(ssa.OpXorUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.Add", opLen2(ssa.OpAddUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x4.And", opLen2(ssa.OpAndUint64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint64x4.AndNot", opLen2(ssa.OpAndNotUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x4.Equal", opLen2(ssa.OpEqualUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x4.Greater", opLen2(ssa.OpGreaterUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x4.GreaterEqual", opLen2(ssa.OpGreaterEqualUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x4.Less", opLen2(ssa.OpLessUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x4.LessEqual", opLen2(ssa.OpLessEqualUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x4.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x4.Max", opLen2(ssa.OpMaxUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x4.Min", opLen2(ssa.OpMinUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x4.MulEvenWiden", opLen2(ssa.OpMulEvenWidenUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x4.NotEqual", opLen2(ssa.OpNotEqualUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x4.Or", opLen2(ssa.OpOrUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x4.Sub", opLen2(ssa.OpSubUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x4.Xor", opLen2(ssa.OpXorUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.Add", opLen2(ssa.OpAddUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x8.And", opLen2(ssa.OpAndUint64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Uint64x8.AndNot", opLen2(ssa.OpAndNotUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x8.Equal", opLen2(ssa.OpEqualUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x8.Greater", opLen2(ssa.OpGreaterUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x8.GreaterEqual", opLen2(ssa.OpGreaterEqualUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x8.Less", opLen2(ssa.OpLessUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x8.LessEqual", opLen2(ssa.OpLessEqualUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x8.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x8.Max", opLen2(ssa.OpMaxUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x8.Min", opLen2(ssa.OpMinUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x8.MulEvenWiden", opLen2(ssa.OpMulEvenWidenUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x8.NotEqual", opLen2(ssa.OpNotEqualUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x8.Or", opLen2(ssa.OpOrUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x8.Sub", opLen2(ssa.OpSubUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x8.Xor", opLen2(ssa.OpXorUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.Add", opLen2(ssa.OpAddUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.And", opLen2(ssa.OpAndUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.AndNot", opLen2(ssa.OpAndNotUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x4.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.ApproximateReciprocal", opLen1(ssa.OpApproximateReciprocalFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.ApproximateReciprocalOfSqrt", opLen1(ssa.OpApproximateReciprocalOfSqrtFloat64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Uint8x16.Average", opLen2(ssa.OpAverageUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.Equal", opLen2(ssa.OpEqualUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.Greater", opLen2(ssa.OpGreaterUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.GreaterEqual", opLen2(ssa.OpGreaterEqualUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.Less", opLen2(ssa.OpLessUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.LessEqual", opLen2(ssa.OpLessEqualUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.Max", opLen2(ssa.OpMaxUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.Min", opLen2(ssa.OpMinUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.NotEqual", opLen2(ssa.OpNotEqualUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.Or", opLen2(ssa.OpOrUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.SaturatedSub", opLen2(ssa.OpSaturatedSubUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.Sub", opLen2(ssa.OpSubUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.Xor", opLen2(ssa.OpXorUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.Add", opLen2(ssa.OpAddUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.And", opLen2(ssa.OpAndUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.AndNot", opLen2(ssa.OpAndNotUint8x32, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint8x32.Average", opLen2(ssa.OpAverageUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.Equal", opLen2(ssa.OpEqualUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.Greater", opLen2(ssa.OpGreaterUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.GreaterEqual", opLen2(ssa.OpGreaterEqualUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.Less", opLen2(ssa.OpLessUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.LessEqual", opLen2(ssa.OpLessEqualUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.Max", opLen2(ssa.OpMaxUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.Min", opLen2(ssa.OpMinUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.NotEqual", opLen2(ssa.OpNotEqualUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.Or", opLen2(ssa.OpOrUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.SaturatedSub", opLen2(ssa.OpSaturatedSubUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.Sub", opLen2(ssa.OpSubUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.Xor", opLen2(ssa.OpXorUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.Add", opLen2(ssa.OpAddUint8x64, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Uint8x64.Average", opLen2(ssa.OpAverageUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x64.Equal", opLen2(ssa.OpEqualUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x64.Greater", opLen2(ssa.OpGreaterUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x64.GreaterEqual", opLen2(ssa.OpGreaterEqualUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x64.Less", opLen2(ssa.OpLessUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x64.LessEqual", opLen2(ssa.OpLessEqualUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x64.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x64.Max", opLen2(ssa.OpMaxUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x64.Min", opLen2(ssa.OpMinUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x64.NotEqual", opLen2(ssa.OpNotEqualUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x64.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x64.SaturatedSub", opLen2(ssa.OpSaturatedSubUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x64.Sub", opLen2(ssa.OpSubUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.FusedMultiplyAdd132", opLen3(ssa.OpFusedMultiplyAdd132Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.FusedMultiplyAdd213", opLen3(ssa.OpFusedMultiplyAdd213Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.FusedMultiplyAdd231", opLen3(ssa.OpFusedMultiplyAdd231Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.FusedMultiplyAddSub132", opLen3(ssa.OpFusedMultiplyAddSub132Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.FusedMultiplyAddSub213", opLen3(ssa.OpFusedMultiplyAddSub213Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.FusedMultiplyAddSub231", opLen3(ssa.OpFusedMultiplyAddSub231Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.FusedMultiplySub132", opLen3(ssa.OpFusedMultiplySub132Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.FusedMultiplySub213", opLen3(ssa.OpFusedMultiplySub213Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.FusedMultiplySub231", opLen3(ssa.OpFusedMultiplySub231Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.FusedMultiplySubAdd132", opLen3(ssa.OpFusedMultiplySubAdd132Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.FusedMultiplySubAdd213", opLen3(ssa.OpFusedMultiplySubAdd213Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.FusedMultiplySubAdd231", opLen3(ssa.OpFusedMultiplySubAdd231Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.FusedNegativeMultiplyAdd132", opLen3(ssa.OpFusedNegativeMultiplyAdd132Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.FusedNegativeMultiplyAdd213", opLen3(ssa.OpFusedNegativeMultiplyAdd213Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.FusedNegativeMultiplyAdd231", opLen3(ssa.OpFusedNegativeMultiplyAdd231Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.FusedNegativeMultiplySub132", opLen3(ssa.OpFusedNegativeMultiplySub132Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.FusedNegativeMultiplySub213", opLen3(ssa.OpFusedNegativeMultiplySub213Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.FusedNegativeMultiplySub231", opLen3(ssa.OpFusedNegativeMultiplySub231Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedAdd", opLen3(ssa.OpMaskedAddFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedAnd", opLen3(ssa.OpMaskedAndFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedAndNot", opLen3(ssa.OpMaskedAndNotFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedDiv", opLen3(ssa.OpMaskedDivFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedEqual", opLen3(ssa.OpMaskedEqualFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedGreater", opLen3(ssa.OpMaskedGreaterFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedIsNan", opLen3(ssa.OpMaskedIsNanFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedLess", opLen3(ssa.OpMaskedLessFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedMax", opLen3(ssa.OpMaskedMaxFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedMin", opLen3(ssa.OpMaskedMinFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedMul", opLen3(ssa.OpMaskedMulFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedMulByPowOf2", opLen3(ssa.OpMaskedMulByPowOf2Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedOr", opLen3(ssa.OpMaskedOrFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedSub", opLen3(ssa.OpMaskedSubFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedXor", opLen3(ssa.OpMaskedXorFloat32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x4.FusedMultiplyAdd132", opLen3(ssa.OpFusedMultiplyAdd132Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.FusedMultiplyAdd213", opLen3(ssa.OpFusedMultiplyAdd213Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.FusedMultiplyAdd231", opLen3(ssa.OpFusedMultiplyAdd231Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.FusedMultiplyAddSub132", opLen3(ssa.OpFusedMultiplyAddSub132Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.FusedMultiplyAddSub213", opLen3(ssa.OpFusedMultiplyAddSub213Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.FusedMultiplyAddSub231", opLen3(ssa.OpFusedMultiplyAddSub231Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.FusedMultiplySub132", opLen3(ssa.OpFusedMultiplySub132Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.FusedMultiplySub213", opLen3(ssa.OpFusedMultiplySub213Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.FusedMultiplySub231", opLen3(ssa.OpFusedMultiplySub231Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.FusedMultiplySubAdd132", opLen3(ssa.OpFusedMultiplySubAdd132Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.FusedMultiplySubAdd213", opLen3(ssa.OpFusedMultiplySubAdd213Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.FusedMultiplySubAdd231", opLen3(ssa.OpFusedMultiplySubAdd231Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.FusedNegativeMultiplyAdd132", opLen3(ssa.OpFusedNegativeMultiplyAdd132Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.FusedNegativeMultiplyAdd213", opLen3(ssa.OpFusedNegativeMultiplyAdd213Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.FusedNegativeMultiplyAdd231", opLen3(ssa.OpFusedNegativeMultiplyAdd231Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.FusedNegativeMultiplySub132", opLen3(ssa.OpFusedNegativeMultiplySub132Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.FusedNegativeMultiplySub213", opLen3(ssa.OpFusedNegativeMultiplySub213Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.FusedNegativeMultiplySub231", opLen3(ssa.OpFusedNegativeMultiplySub231Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedAdd", opLen3(ssa.OpMaskedAddFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedAnd", opLen3(ssa.OpMaskedAndFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedAndNot", opLen3(ssa.OpMaskedAndNotFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedDiv", opLen3(ssa.OpMaskedDivFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedEqual", opLen3(ssa.OpMaskedEqualFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedGreater", opLen3(ssa.OpMaskedGreaterFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedIsNan", opLen3(ssa.OpMaskedIsNanFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedLess", opLen3(ssa.OpMaskedLessFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedMax", opLen3(ssa.OpMaskedMaxFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedMin", opLen3(ssa.OpMaskedMinFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedMul", opLen3(ssa.OpMaskedMulFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedMulByPowOf2", opLen3(ssa.OpMaskedMulByPowOf2Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedOr", opLen3(ssa.OpMaskedOrFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedSub", opLen3(ssa.OpMaskedSubFloat32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedXor", opLen3(ssa.OpMaskedXorFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x8.Average", opLen2(ssa.OpAverageUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.Average", opLen2(ssa.OpAverageUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.Average", opLen2(ssa.OpAverageUint16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.Ceil", opLen1(ssa.OpCeilFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.Ceil", opLen1(ssa.OpCeilFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x2.Ceil", opLen1(ssa.OpCeilFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.Ceil", opLen1(ssa.OpCeilFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x4.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.DiffWithCeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.DiffWithCeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.DiffWithCeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.DiffWithCeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.DiffWithCeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.DiffWithCeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.DiffWithFloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.DiffWithFloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.DiffWithFloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.DiffWithFloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.DiffWithFloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.DiffWithFloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.DiffWithRoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.DiffWithRoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.DiffWithRoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.DiffWithRoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.DiffWithRoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.DiffWithRoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.DiffWithTruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.DiffWithTruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.DiffWithTruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.DiffWithTruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.DiffWithTruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.DiffWithTruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.Div", opLen2(ssa.OpDivFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.Div", opLen2(ssa.OpDivFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.Div", opLen2(ssa.OpDivFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.Div", opLen2(ssa.OpDivFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.Div", opLen2(ssa.OpDivFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.Div", opLen2(ssa.OpDivFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.DotProdBroadcast", opLen2(ssa.OpDotProdBroadcastFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x16.Equal", opLen2(ssa.OpEqualInt8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.Equal", opLen2(ssa.OpEqualInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x8.Equal", opLen2(ssa.OpEqualInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.Equal", opLen2(ssa.OpEqualInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x4.Equal", opLen2(ssa.OpEqualInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.Equal", opLen2(ssa.OpEqualInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x2.Equal", opLen2(ssa.OpEqualInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.Equal", opLen2(ssa.OpEqualInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x4.Equal", opLen2(ssa.OpEqualFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.Equal", opLen2(ssa.OpEqualFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.Equal", opLen2(ssa.OpEqualFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.Equal", opLen2(ssa.OpEqualFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.Equal", opLen2(ssa.OpEqualFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.Equal", opLen2(ssa.OpEqualFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x64.Equal", opLen2(ssa.OpEqualInt8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x32.Equal", opLen2(ssa.OpEqualInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x16.Equal", opLen2(ssa.OpEqualInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x8.Equal", opLen2(ssa.OpEqualInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.Equal", opLen2(ssa.OpEqualUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.Equal", opLen2(ssa.OpEqualUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.Equal", opLen2(ssa.OpEqualUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x8.Equal", opLen2(ssa.OpEqualUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.Equal", opLen2(ssa.OpEqualUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.Equal", opLen2(ssa.OpEqualUint16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint32x4.Equal", opLen2(ssa.OpEqualUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.Equal", opLen2(ssa.OpEqualUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.Equal", opLen2(ssa.OpEqualUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint64x2.Equal", opLen2(ssa.OpEqualUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x4.Equal", opLen2(ssa.OpEqualUint64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x8.Equal", opLen2(ssa.OpEqualUint64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.Floor", opLen1(ssa.OpFloorFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.Floor", opLen1(ssa.OpFloorFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x2.Floor", opLen1(ssa.OpFloorFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.Floor", opLen1(ssa.OpFloorFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x4.FloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpFloorSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.FloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpFloorSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.FloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpFloorSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.FloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpFloorSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.FloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpFloorSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.FloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpFloorSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.FusedMultiplyAdd132", opLen3(ssa.OpFusedMultiplyAdd132Float32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x8.FusedMultiplyAdd132", opLen3(ssa.OpFusedMultiplyAdd132Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.FusedMultiplyAdd213", opLen3(ssa.OpFusedMultiplyAdd213Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.FusedMultiplyAdd231", opLen3(ssa.OpFusedMultiplyAdd231Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.FusedMultiplyAddSub132", opLen3(ssa.OpFusedMultiplyAddSub132Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.FusedMultiplyAddSub213", opLen3(ssa.OpFusedMultiplyAddSub213Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.FusedMultiplyAddSub231", opLen3(ssa.OpFusedMultiplyAddSub231Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.FusedMultiplySub132", opLen3(ssa.OpFusedMultiplySub132Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.FusedMultiplySub213", opLen3(ssa.OpFusedMultiplySub213Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.FusedMultiplySub231", opLen3(ssa.OpFusedMultiplySub231Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.FusedMultiplySubAdd132", opLen3(ssa.OpFusedMultiplySubAdd132Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.FusedMultiplySubAdd213", opLen3(ssa.OpFusedMultiplySubAdd213Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.FusedMultiplySubAdd231", opLen3(ssa.OpFusedMultiplySubAdd231Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.FusedNegativeMultiplyAdd132", opLen3(ssa.OpFusedNegativeMultiplyAdd132Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.FusedNegativeMultiplyAdd213", opLen3(ssa.OpFusedNegativeMultiplyAdd213Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.FusedNegativeMultiplyAdd231", opLen3(ssa.OpFusedNegativeMultiplyAdd231Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.FusedNegativeMultiplySub132", opLen3(ssa.OpFusedNegativeMultiplySub132Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.FusedNegativeMultiplySub213", opLen3(ssa.OpFusedNegativeMultiplySub213Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.FusedNegativeMultiplySub231", opLen3(ssa.OpFusedNegativeMultiplySub231Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedAdd", opLen3(ssa.OpMaskedAddFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedAnd", opLen3(ssa.OpMaskedAndFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedAndNot", opLen3(ssa.OpMaskedAndNotFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedDiv", opLen3(ssa.OpMaskedDivFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedEqual", opLen3(ssa.OpMaskedEqualFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedGreater", opLen3(ssa.OpMaskedGreaterFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedIsNan", opLen3(ssa.OpMaskedIsNanFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedLess", opLen3(ssa.OpMaskedLessFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedMax", opLen3(ssa.OpMaskedMaxFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedMin", opLen3(ssa.OpMaskedMinFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedMul", opLen3(ssa.OpMaskedMulFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedMulByPowOf2", opLen3(ssa.OpMaskedMulByPowOf2Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedOr", opLen3(ssa.OpMaskedOrFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedSub", opLen3(ssa.OpMaskedSubFloat32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedXor", opLen3(ssa.OpMaskedXorFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.FusedMultiplyAdd132", opLen3(ssa.OpFusedMultiplyAdd132Float32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float64x2.FusedMultiplyAdd132", opLen3(ssa.OpFusedMultiplyAdd132Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.FusedMultiplyAdd132", opLen3(ssa.OpFusedMultiplyAdd132Float64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.FusedMultiplyAdd132", opLen3(ssa.OpFusedMultiplyAdd132Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.FusedMultiplyAdd213", opLen3(ssa.OpFusedMultiplyAdd213Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.FusedMultiplyAdd213", opLen3(ssa.OpFusedMultiplyAdd213Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.FusedMultiplyAdd213", opLen3(ssa.OpFusedMultiplyAdd213Float32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float64x2.FusedMultiplyAdd213", opLen3(ssa.OpFusedMultiplyAdd213Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.FusedMultiplyAdd213", opLen3(ssa.OpFusedMultiplyAdd213Float64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.FusedMultiplyAdd213", opLen3(ssa.OpFusedMultiplyAdd213Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.FusedMultiplyAdd231", opLen3(ssa.OpFusedMultiplyAdd231Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.FusedMultiplyAdd231", opLen3(ssa.OpFusedMultiplyAdd231Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.FusedMultiplyAdd231", opLen3(ssa.OpFusedMultiplyAdd231Float32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float64x2.FusedMultiplyAdd231", opLen3(ssa.OpFusedMultiplyAdd231Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.FusedMultiplyAdd231", opLen3(ssa.OpFusedMultiplyAdd231Float64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.FusedMultiplyAdd231", opLen3(ssa.OpFusedMultiplyAdd231Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.FusedMultiplyAddSub132", opLen3(ssa.OpFusedMultiplyAddSub132Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.FusedMultiplyAddSub132", opLen3(ssa.OpFusedMultiplyAddSub132Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.FusedMultiplyAddSub132", opLen3(ssa.OpFusedMultiplyAddSub132Float32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float64x2.FusedMultiplyAddSub132", opLen3(ssa.OpFusedMultiplyAddSub132Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.FusedMultiplyAddSub132", opLen3(ssa.OpFusedMultiplyAddSub132Float64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.FusedMultiplyAddSub132", opLen3(ssa.OpFusedMultiplyAddSub132Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.FusedMultiplyAddSub213", opLen3(ssa.OpFusedMultiplyAddSub213Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.FusedMultiplyAddSub213", opLen3(ssa.OpFusedMultiplyAddSub213Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.FusedMultiplyAddSub213", opLen3(ssa.OpFusedMultiplyAddSub213Float32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float64x2.FusedMultiplyAddSub213", opLen3(ssa.OpFusedMultiplyAddSub213Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.FusedMultiplyAddSub213", opLen3(ssa.OpFusedMultiplyAddSub213Float64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.FusedMultiplyAddSub213", opLen3(ssa.OpFusedMultiplyAddSub213Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.FusedMultiplyAddSub231", opLen3(ssa.OpFusedMultiplyAddSub231Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.FusedMultiplyAddSub231", opLen3(ssa.OpFusedMultiplyAddSub231Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.FusedMultiplyAddSub231", opLen3(ssa.OpFusedMultiplyAddSub231Float32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float64x2.FusedMultiplyAddSub231", opLen3(ssa.OpFusedMultiplyAddSub231Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.FusedMultiplyAddSub231", opLen3(ssa.OpFusedMultiplyAddSub231Float64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.FusedMultiplyAddSub231", opLen3(ssa.OpFusedMultiplyAddSub231Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.FusedMultiplySub132", opLen3(ssa.OpFusedMultiplySub132Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.FusedMultiplySub132", opLen3(ssa.OpFusedMultiplySub132Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.FusedMultiplySub132", opLen3(ssa.OpFusedMultiplySub132Float32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float64x2.FusedMultiplySub132", opLen3(ssa.OpFusedMultiplySub132Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.FusedMultiplySub132", opLen3(ssa.OpFusedMultiplySub132Float64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.FusedMultiplySub132", opLen3(ssa.OpFusedMultiplySub132Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.FusedMultiplySub213", opLen3(ssa.OpFusedMultiplySub213Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.FusedMultiplySub213", opLen3(ssa.OpFusedMultiplySub213Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.FusedMultiplySub213", opLen3(ssa.OpFusedMultiplySub213Float32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float64x2.FusedMultiplySub213", opLen3(ssa.OpFusedMultiplySub213Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.FusedMultiplySub213", opLen3(ssa.OpFusedMultiplySub213Float64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.FusedMultiplySub213", opLen3(ssa.OpFusedMultiplySub213Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.FusedMultiplySub231", opLen3(ssa.OpFusedMultiplySub231Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.FusedMultiplySub231", opLen3(ssa.OpFusedMultiplySub231Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.FusedMultiplySub231", opLen3(ssa.OpFusedMultiplySub231Float32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float64x2.FusedMultiplySub231", opLen3(ssa.OpFusedMultiplySub231Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.FusedMultiplySub231", opLen3(ssa.OpFusedMultiplySub231Float64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.FusedMultiplySub231", opLen3(ssa.OpFusedMultiplySub231Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.FusedMultiplySubAdd132", opLen3(ssa.OpFusedMultiplySubAdd132Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.FusedMultiplySubAdd132", opLen3(ssa.OpFusedMultiplySubAdd132Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.FusedMultiplySubAdd132", opLen3(ssa.OpFusedMultiplySubAdd132Float32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float64x2.FusedMultiplySubAdd132", opLen3(ssa.OpFusedMultiplySubAdd132Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.FusedMultiplySubAdd132", opLen3(ssa.OpFusedMultiplySubAdd132Float64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.FusedMultiplySubAdd132", opLen3(ssa.OpFusedMultiplySubAdd132Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.FusedMultiplySubAdd213", opLen3(ssa.OpFusedMultiplySubAdd213Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.FusedMultiplySubAdd213", opLen3(ssa.OpFusedMultiplySubAdd213Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.FusedMultiplySubAdd213", opLen3(ssa.OpFusedMultiplySubAdd213Float32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float64x2.FusedMultiplySubAdd213", opLen3(ssa.OpFusedMultiplySubAdd213Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.FusedMultiplySubAdd213", opLen3(ssa.OpFusedMultiplySubAdd213Float64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.FusedMultiplySubAdd213", opLen3(ssa.OpFusedMultiplySubAdd213Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.FusedMultiplySubAdd231", opLen3(ssa.OpFusedMultiplySubAdd231Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.FusedMultiplySubAdd231", opLen3(ssa.OpFusedMultiplySubAdd231Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.FusedMultiplySubAdd231", opLen3(ssa.OpFusedMultiplySubAdd231Float32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float64x2.FusedMultiplySubAdd231", opLen3(ssa.OpFusedMultiplySubAdd231Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.FusedMultiplySubAdd231", opLen3(ssa.OpFusedMultiplySubAdd231Float64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.FusedMultiplySubAdd231", opLen3(ssa.OpFusedMultiplySubAdd231Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.FusedNegativeMultiplyAdd132", opLen3(ssa.OpFusedNegativeMultiplyAdd132Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.FusedNegativeMultiplyAdd132", opLen3(ssa.OpFusedNegativeMultiplyAdd132Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.FusedNegativeMultiplyAdd132", opLen3(ssa.OpFusedNegativeMultiplyAdd132Float32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float64x2.FusedNegativeMultiplyAdd132", opLen3(ssa.OpFusedNegativeMultiplyAdd132Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.FusedNegativeMultiplyAdd132", opLen3(ssa.OpFusedNegativeMultiplyAdd132Float64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.FusedNegativeMultiplyAdd132", opLen3(ssa.OpFusedNegativeMultiplyAdd132Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.FusedNegativeMultiplyAdd213", opLen3(ssa.OpFusedNegativeMultiplyAdd213Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.FusedNegativeMultiplyAdd213", opLen3(ssa.OpFusedNegativeMultiplyAdd213Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.FusedNegativeMultiplyAdd213", opLen3(ssa.OpFusedNegativeMultiplyAdd213Float32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float64x2.FusedNegativeMultiplyAdd213", opLen3(ssa.OpFusedNegativeMultiplyAdd213Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.FusedNegativeMultiplyAdd213", opLen3(ssa.OpFusedNegativeMultiplyAdd213Float64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.FusedNegativeMultiplyAdd213", opLen3(ssa.OpFusedNegativeMultiplyAdd213Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.FusedNegativeMultiplyAdd231", opLen3(ssa.OpFusedNegativeMultiplyAdd231Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.FusedNegativeMultiplyAdd231", opLen3(ssa.OpFusedNegativeMultiplyAdd231Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.FusedNegativeMultiplyAdd231", opLen3(ssa.OpFusedNegativeMultiplyAdd231Float32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float64x2.FusedNegativeMultiplyAdd231", opLen3(ssa.OpFusedNegativeMultiplyAdd231Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.FusedNegativeMultiplyAdd231", opLen3(ssa.OpFusedNegativeMultiplyAdd231Float64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.FusedNegativeMultiplyAdd231", opLen3(ssa.OpFusedNegativeMultiplyAdd231Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.FusedNegativeMultiplySub132", opLen3(ssa.OpFusedNegativeMultiplySub132Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.FusedNegativeMultiplySub132", opLen3(ssa.OpFusedNegativeMultiplySub132Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.FusedNegativeMultiplySub132", opLen3(ssa.OpFusedNegativeMultiplySub132Float32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float64x2.FusedNegativeMultiplySub132", opLen3(ssa.OpFusedNegativeMultiplySub132Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.FusedNegativeMultiplySub132", opLen3(ssa.OpFusedNegativeMultiplySub132Float64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.FusedNegativeMultiplySub132", opLen3(ssa.OpFusedNegativeMultiplySub132Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.FusedNegativeMultiplySub213", opLen3(ssa.OpFusedNegativeMultiplySub213Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.FusedNegativeMultiplySub213", opLen3(ssa.OpFusedNegativeMultiplySub213Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.FusedNegativeMultiplySub213", opLen3(ssa.OpFusedNegativeMultiplySub213Float32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float64x2.FusedNegativeMultiplySub213", opLen3(ssa.OpFusedNegativeMultiplySub213Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.FusedNegativeMultiplySub213", opLen3(ssa.OpFusedNegativeMultiplySub213Float64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.FusedNegativeMultiplySub213", opLen3(ssa.OpFusedNegativeMultiplySub213Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.FusedNegativeMultiplySub231", opLen3(ssa.OpFusedNegativeMultiplySub231Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.FusedNegativeMultiplySub231", opLen3(ssa.OpFusedNegativeMultiplySub231Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.FusedNegativeMultiplySub231", opLen3(ssa.OpFusedNegativeMultiplySub231Float32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float64x2.FusedNegativeMultiplySub231", opLen3(ssa.OpFusedNegativeMultiplySub231Float64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedAdd", opLen3(ssa.OpMaskedAddFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedAnd", opLen3(ssa.OpMaskedAndFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedAndNot", opLen3(ssa.OpMaskedAndNotFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedDiv", opLen3(ssa.OpMaskedDivFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedEqual", opLen3(ssa.OpMaskedEqualFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedGreater", opLen3(ssa.OpMaskedGreaterFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedIsNan", opLen3(ssa.OpMaskedIsNanFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedLess", opLen3(ssa.OpMaskedLessFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedMax", opLen3(ssa.OpMaskedMaxFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedMin", opLen3(ssa.OpMaskedMinFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedMul", opLen3(ssa.OpMaskedMulFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedMulByPowOf2", opLen3(ssa.OpMaskedMulByPowOf2Float64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedOr", opLen3(ssa.OpMaskedOrFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedSub", opLen3(ssa.OpMaskedSubFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedXor", opLen3(ssa.OpMaskedXorFloat64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x4.FusedMultiplyAdd132", opLen3(ssa.OpFusedMultiplyAdd132Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.FusedMultiplyAdd213", opLen3(ssa.OpFusedMultiplyAdd213Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.FusedMultiplyAdd231", opLen3(ssa.OpFusedMultiplyAdd231Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.FusedMultiplyAddSub132", opLen3(ssa.OpFusedMultiplyAddSub132Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.FusedMultiplyAddSub213", opLen3(ssa.OpFusedMultiplyAddSub213Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.FusedMultiplyAddSub231", opLen3(ssa.OpFusedMultiplyAddSub231Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.FusedMultiplySub132", opLen3(ssa.OpFusedMultiplySub132Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.FusedMultiplySub213", opLen3(ssa.OpFusedMultiplySub213Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.FusedMultiplySub231", opLen3(ssa.OpFusedMultiplySub231Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.FusedMultiplySubAdd132", opLen3(ssa.OpFusedMultiplySubAdd132Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.FusedMultiplySubAdd213", opLen3(ssa.OpFusedMultiplySubAdd213Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.FusedMultiplySubAdd231", opLen3(ssa.OpFusedMultiplySubAdd231Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.FusedNegativeMultiplyAdd132", opLen3(ssa.OpFusedNegativeMultiplyAdd132Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.FusedNegativeMultiplyAdd213", opLen3(ssa.OpFusedNegativeMultiplyAdd213Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.FusedNegativeMultiplyAdd231", opLen3(ssa.OpFusedNegativeMultiplyAdd231Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.FusedNegativeMultiplySub132", opLen3(ssa.OpFusedNegativeMultiplySub132Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.FusedNegativeMultiplySub213", opLen3(ssa.OpFusedNegativeMultiplySub213Float64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x4.FusedNegativeMultiplySub231", opLen3(ssa.OpFusedNegativeMultiplySub231Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedAdd", opLen3(ssa.OpMaskedAddFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedAnd", opLen3(ssa.OpMaskedAndFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedAndNot", opLen3(ssa.OpMaskedAndNotFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedDiv", opLen3(ssa.OpMaskedDivFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedEqual", opLen3(ssa.OpMaskedEqualFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedGreater", opLen3(ssa.OpMaskedGreaterFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedIsNan", opLen3(ssa.OpMaskedIsNanFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedLess", opLen3(ssa.OpMaskedLessFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedMax", opLen3(ssa.OpMaskedMaxFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedMin", opLen3(ssa.OpMaskedMinFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedMul", opLen3(ssa.OpMaskedMulFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedMulByPowOf2", opLen3(ssa.OpMaskedMulByPowOf2Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedOr", opLen3(ssa.OpMaskedOrFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedSub", opLen3(ssa.OpMaskedSubFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedXor", opLen3(ssa.OpMaskedXorFloat64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x8.FusedMultiplyAdd132", opLen3(ssa.OpFusedMultiplyAdd132Float64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.FusedMultiplyAdd213", opLen3(ssa.OpFusedMultiplyAdd213Float64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.FusedMultiplyAdd231", opLen3(ssa.OpFusedMultiplyAdd231Float64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.FusedMultiplyAddSub132", opLen3(ssa.OpFusedMultiplyAddSub132Float64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.FusedMultiplyAddSub213", opLen3(ssa.OpFusedMultiplyAddSub213Float64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.FusedMultiplyAddSub231", opLen3(ssa.OpFusedMultiplyAddSub231Float64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.FusedMultiplySub132", opLen3(ssa.OpFusedMultiplySub132Float64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.FusedMultiplySub213", opLen3(ssa.OpFusedMultiplySub213Float64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.FusedMultiplySub231", opLen3(ssa.OpFusedMultiplySub231Float64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.FusedMultiplySubAdd132", opLen3(ssa.OpFusedMultiplySubAdd132Float64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.FusedMultiplySubAdd213", opLen3(ssa.OpFusedMultiplySubAdd213Float64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.FusedMultiplySubAdd231", opLen3(ssa.OpFusedMultiplySubAdd231Float64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.FusedNegativeMultiplyAdd132", opLen3(ssa.OpFusedNegativeMultiplyAdd132Float64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.FusedNegativeMultiplyAdd213", opLen3(ssa.OpFusedNegativeMultiplyAdd213Float64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.FusedNegativeMultiplyAdd231", opLen3(ssa.OpFusedNegativeMultiplyAdd231Float64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.FusedNegativeMultiplySub132", opLen3(ssa.OpFusedNegativeMultiplySub132Float64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.FusedNegativeMultiplySub213", opLen3(ssa.OpFusedNegativeMultiplySub213Float64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float64x8.FusedNegativeMultiplySub231", opLen3(ssa.OpFusedNegativeMultiplySub231Float64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedAdd", opLen3(ssa.OpMaskedAddFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedAnd", opLen3(ssa.OpMaskedAndFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedAndNot", opLen3(ssa.OpMaskedAndNotFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedDiv", opLen3(ssa.OpMaskedDivFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedEqual", opLen3(ssa.OpMaskedEqualFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedGreater", opLen3(ssa.OpMaskedGreaterFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedIsNan", opLen3(ssa.OpMaskedIsNanFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedLess", opLen3(ssa.OpMaskedLessFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedMax", opLen3(ssa.OpMaskedMaxFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedMin", opLen3(ssa.OpMaskedMinFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedMul", opLen3(ssa.OpMaskedMulFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedMulByPowOf2", opLen3(ssa.OpMaskedMulByPowOf2Float64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedOr", opLen3(ssa.OpMaskedOrFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedSub", opLen3(ssa.OpMaskedSubFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedXor", opLen3(ssa.OpMaskedXorFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x16.MaskedAdd", opLen3(ssa.OpMaskedAddInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.MaskedEqual", opLen3(ssa.OpMaskedEqualInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.MaskedLess", opLen3(ssa.OpMaskedLessInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.MaskedMax", opLen3(ssa.OpMaskedMaxInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.MaskedMin", opLen3(ssa.OpMaskedMinInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.MaskedMulHigh", opLen3(ssa.OpMaskedMulHighInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.MaskedPairDotProd", opLen3(ssa.OpMaskedPairDotProdInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x16.MaskedSub", opLen3(ssa.OpMaskedSubInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.MaskedAdd", opLen3(ssa.OpMaskedAddInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x32.MaskedEqual", opLen3(ssa.OpMaskedEqualInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x32.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x32.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x32.MaskedLess", opLen3(ssa.OpMaskedLessInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x32.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x32.MaskedMax", opLen3(ssa.OpMaskedMaxInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x32.MaskedMin", opLen3(ssa.OpMaskedMinInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x32.MaskedMulHigh", opLen3(ssa.OpMaskedMulHighInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x32.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x32.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x32.MaskedPairDotProd", opLen3(ssa.OpMaskedPairDotProdInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x32.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x32.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x32.MaskedSub", opLen3(ssa.OpMaskedSubInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.MaskedAdd", opLen3(ssa.OpMaskedAddInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.MaskedEqual", opLen3(ssa.OpMaskedEqualInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.MaskedLess", opLen3(ssa.OpMaskedLessInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.MaskedMax", opLen3(ssa.OpMaskedMaxInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.MaskedMin", opLen3(ssa.OpMaskedMinInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.MaskedMulHigh", opLen3(ssa.OpMaskedMulHighInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.MaskedPairDotProd", opLen3(ssa.OpMaskedPairDotProdInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x8.MaskedSub", opLen3(ssa.OpMaskedSubInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x16.MaskedAdd", opLen3(ssa.OpMaskedAddInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.MaskedAnd", opLen3(ssa.OpMaskedAndInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.MaskedAndNot", opLen3(ssa.OpMaskedAndNotInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.MaskedEqual", opLen3(ssa.OpMaskedEqualInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.MaskedLess", opLen3(ssa.OpMaskedLessInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.MaskedMax", opLen3(ssa.OpMaskedMaxInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.MaskedMin", opLen3(ssa.OpMaskedMinInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.MaskedOr", opLen3(ssa.OpMaskedOrInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.MaskedSub", opLen3(ssa.OpMaskedSubInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.MaskedXor", opLen3(ssa.OpMaskedXorInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.PairDotProdAccumulate", opLen3(ssa.OpPairDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.SaturatedPairDotProdAccumulate", opLen3(ssa.OpSaturatedPairDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.MaskedAdd", opLen3(ssa.OpMaskedAddInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.MaskedAnd", opLen3(ssa.OpMaskedAndInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.MaskedAndNot", opLen3(ssa.OpMaskedAndNotInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.MaskedEqual", opLen3(ssa.OpMaskedEqualInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.MaskedLess", opLen3(ssa.OpMaskedLessInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.MaskedMax", opLen3(ssa.OpMaskedMaxInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.MaskedMin", opLen3(ssa.OpMaskedMinInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.MaskedOr", opLen3(ssa.OpMaskedOrInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.MaskedSub", opLen3(ssa.OpMaskedSubInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.MaskedXor", opLen3(ssa.OpMaskedXorInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.PairDotProdAccumulate", opLen3(ssa.OpPairDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.SaturatedPairDotProdAccumulate", opLen3(ssa.OpSaturatedPairDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.MaskedAdd", opLen3(ssa.OpMaskedAddInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.MaskedAnd", opLen3(ssa.OpMaskedAndInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.MaskedAndNot", opLen3(ssa.OpMaskedAndNotInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.MaskedEqual", opLen3(ssa.OpMaskedEqualInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.MaskedLess", opLen3(ssa.OpMaskedLessInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.MaskedMax", opLen3(ssa.OpMaskedMaxInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.MaskedMin", opLen3(ssa.OpMaskedMinInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.MaskedOr", opLen3(ssa.OpMaskedOrInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.MaskedSub", opLen3(ssa.OpMaskedSubInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.MaskedXor", opLen3(ssa.OpMaskedXorInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.PairDotProdAccumulate", opLen3(ssa.OpPairDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.SaturatedPairDotProdAccumulate", opLen3(ssa.OpSaturatedPairDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x2.MaskedAdd", opLen3(ssa.OpMaskedAddInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.MaskedAnd", opLen3(ssa.OpMaskedAndInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.MaskedAndNot", opLen3(ssa.OpMaskedAndNotInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.MaskedEqual", opLen3(ssa.OpMaskedEqualInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.MaskedLess", opLen3(ssa.OpMaskedLessInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.MaskedMax", opLen3(ssa.OpMaskedMaxInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.MaskedMin", opLen3(ssa.OpMaskedMinInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.MaskedMulEvenWiden", opLen3(ssa.OpMaskedMulEvenWidenInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.MaskedOr", opLen3(ssa.OpMaskedOrInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.MaskedSub", opLen3(ssa.OpMaskedSubInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x2.MaskedXor", opLen3(ssa.OpMaskedXorInt64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int64x4.MaskedAdd", opLen3(ssa.OpMaskedAddInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.MaskedAnd", opLen3(ssa.OpMaskedAndInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.MaskedAndNot", opLen3(ssa.OpMaskedAndNotInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.MaskedEqual", opLen3(ssa.OpMaskedEqualInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.MaskedLess", opLen3(ssa.OpMaskedLessInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.MaskedMax", opLen3(ssa.OpMaskedMaxInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.MaskedMin", opLen3(ssa.OpMaskedMinInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.MaskedMulEvenWiden", opLen3(ssa.OpMaskedMulEvenWidenInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.MaskedOr", opLen3(ssa.OpMaskedOrInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.MaskedSub", opLen3(ssa.OpMaskedSubInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x4.MaskedXor", opLen3(ssa.OpMaskedXorInt64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int64x8.MaskedAdd", opLen3(ssa.OpMaskedAddInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.MaskedAnd", opLen3(ssa.OpMaskedAndInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.MaskedAndNot", opLen3(ssa.OpMaskedAndNotInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.MaskedEqual", opLen3(ssa.OpMaskedEqualInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.MaskedLess", opLen3(ssa.OpMaskedLessInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.MaskedMax", opLen3(ssa.OpMaskedMaxInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.MaskedMin", opLen3(ssa.OpMaskedMinInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.MaskedMulEvenWiden", opLen3(ssa.OpMaskedMulEvenWidenInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.MaskedOr", opLen3(ssa.OpMaskedOrInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.MaskedSub", opLen3(ssa.OpMaskedSubInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x8.MaskedXor", opLen3(ssa.OpMaskedXorInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x16.Greater", opLen2(ssa.OpGreaterInt8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.Greater", opLen2(ssa.OpGreaterInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x8.Greater", opLen2(ssa.OpGreaterInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.Greater", opLen2(ssa.OpGreaterInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x4.Greater", opLen2(ssa.OpGreaterInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.Greater", opLen2(ssa.OpGreaterInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x4.Greater", opLen2(ssa.OpGreaterInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x4.Greater", opLen2(ssa.OpGreaterFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.Greater", opLen2(ssa.OpGreaterFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.Greater", opLen2(ssa.OpGreaterFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.Greater", opLen2(ssa.OpGreaterFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.Greater", opLen2(ssa.OpGreaterFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.Greater", opLen2(ssa.OpGreaterFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x64.Greater", opLen2(ssa.OpGreaterInt8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x32.Greater", opLen2(ssa.OpGreaterInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x16.Greater", opLen2(ssa.OpGreaterInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x2.Greater", opLen2(ssa.OpGreaterInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x8.Greater", opLen2(ssa.OpGreaterInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.Greater", opLen2(ssa.OpGreaterUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.Greater", opLen2(ssa.OpGreaterUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.Greater", opLen2(ssa.OpGreaterUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x8.Greater", opLen2(ssa.OpGreaterUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.Greater", opLen2(ssa.OpGreaterUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.Greater", opLen2(ssa.OpGreaterUint16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint32x4.Greater", opLen2(ssa.OpGreaterUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.Greater", opLen2(ssa.OpGreaterUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.Greater", opLen2(ssa.OpGreaterUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint64x2.Greater", opLen2(ssa.OpGreaterUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x4.Greater", opLen2(ssa.OpGreaterUint64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x8.Greater", opLen2(ssa.OpGreaterUint64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x16.GreaterEqual", opLen2(ssa.OpGreaterEqualInt8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.GreaterEqual", opLen2(ssa.OpGreaterEqualInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x64.GreaterEqual", opLen2(ssa.OpGreaterEqualInt8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.GreaterEqual", opLen2(ssa.OpGreaterEqualInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.GreaterEqual", opLen2(ssa.OpGreaterEqualInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.GreaterEqual", opLen2(ssa.OpGreaterEqualInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.GreaterEqual", opLen2(ssa.OpGreaterEqualInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.GreaterEqual", opLen2(ssa.OpGreaterEqualInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.GreaterEqual", opLen2(ssa.OpGreaterEqualInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x2.GreaterEqual", opLen2(ssa.OpGreaterEqualInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.GreaterEqual", opLen2(ssa.OpGreaterEqualInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.GreaterEqual", opLen2(ssa.OpGreaterEqualInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.GreaterEqual", opLen2(ssa.OpGreaterEqualUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.GreaterEqual", opLen2(ssa.OpGreaterEqualUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.GreaterEqual", opLen2(ssa.OpGreaterEqualUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x8.GreaterEqual", opLen2(ssa.OpGreaterEqualUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.GreaterEqual", opLen2(ssa.OpGreaterEqualUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.GreaterEqual", opLen2(ssa.OpGreaterEqualUint16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint32x4.GreaterEqual", opLen2(ssa.OpGreaterEqualUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.GreaterEqual", opLen2(ssa.OpGreaterEqualUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.GreaterEqual", opLen2(ssa.OpGreaterEqualUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint64x2.GreaterEqual", opLen2(ssa.OpGreaterEqualUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x4.GreaterEqual", opLen2(ssa.OpGreaterEqualUint64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x8.GreaterEqual", opLen2(ssa.OpGreaterEqualUint64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.IsNan", opLen2(ssa.OpIsNanFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.IsNan", opLen2(ssa.OpIsNanFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.IsNan", opLen2(ssa.OpIsNanFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.IsNan", opLen2(ssa.OpIsNanFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.IsNan", opLen2(ssa.OpIsNanFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.IsNan", opLen2(ssa.OpIsNanFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.Less", opLen2(ssa.OpLessFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.Less", opLen2(ssa.OpLessFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.Less", opLen2(ssa.OpLessFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.Less", opLen2(ssa.OpLessFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.Less", opLen2(ssa.OpLessFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.Less", opLen2(ssa.OpLessFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x16.Less", opLen2(ssa.OpLessInt8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.Less", opLen2(ssa.OpLessInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x64.Less", opLen2(ssa.OpLessInt8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.Less", opLen2(ssa.OpLessInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.Less", opLen2(ssa.OpLessInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.Less", opLen2(ssa.OpLessInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.Less", opLen2(ssa.OpLessInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.Less", opLen2(ssa.OpLessInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.Less", opLen2(ssa.OpLessInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x2.Less", opLen2(ssa.OpLessInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.Less", opLen2(ssa.OpLessInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.Less", opLen2(ssa.OpLessInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.Less", opLen2(ssa.OpLessUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.Less", opLen2(ssa.OpLessUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.Less", opLen2(ssa.OpLessUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x8.Less", opLen2(ssa.OpLessUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.Less", opLen2(ssa.OpLessUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.Less", opLen2(ssa.OpLessUint16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint32x4.Less", opLen2(ssa.OpLessUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.Less", opLen2(ssa.OpLessUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.Less", opLen2(ssa.OpLessUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint64x2.Less", opLen2(ssa.OpLessUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x4.Less", opLen2(ssa.OpLessUint64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x8.Less", opLen2(ssa.OpLessUint64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.LessEqual", opLen2(ssa.OpLessEqualFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.LessEqual", opLen2(ssa.OpLessEqualFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.LessEqual", opLen2(ssa.OpLessEqualFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.LessEqual", opLen2(ssa.OpLessEqualFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.LessEqual", opLen2(ssa.OpLessEqualFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.LessEqual", opLen2(ssa.OpLessEqualFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x16.LessEqual", opLen2(ssa.OpLessEqualInt8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.LessEqual", opLen2(ssa.OpLessEqualInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x64.LessEqual", opLen2(ssa.OpLessEqualInt8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.LessEqual", opLen2(ssa.OpLessEqualInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.LessEqual", opLen2(ssa.OpLessEqualInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.LessEqual", opLen2(ssa.OpLessEqualInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.LessEqual", opLen2(ssa.OpLessEqualInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.LessEqual", opLen2(ssa.OpLessEqualInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.LessEqual", opLen2(ssa.OpLessEqualInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x2.LessEqual", opLen2(ssa.OpLessEqualInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.LessEqual", opLen2(ssa.OpLessEqualInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.LessEqual", opLen2(ssa.OpLessEqualInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.LessEqual", opLen2(ssa.OpLessEqualUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.LessEqual", opLen2(ssa.OpLessEqualUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.LessEqual", opLen2(ssa.OpLessEqualUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x8.LessEqual", opLen2(ssa.OpLessEqualUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.LessEqual", opLen2(ssa.OpLessEqualUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.LessEqual", opLen2(ssa.OpLessEqualUint16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint32x4.LessEqual", opLen2(ssa.OpLessEqualUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.LessEqual", opLen2(ssa.OpLessEqualUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.LessEqual", opLen2(ssa.OpLessEqualUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint64x2.LessEqual", opLen2(ssa.OpLessEqualUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x4.LessEqual", opLen2(ssa.OpLessEqualUint64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x8.LessEqual", opLen2(ssa.OpLessEqualUint64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x16.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x64.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x2.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.MaskedAbsolute", opLen2(ssa.OpMaskedAbsoluteInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedAdd", opLen3(ssa.OpMaskedAddFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedAdd", opLen3(ssa.OpMaskedAddFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedAdd", opLen3(ssa.OpMaskedAddFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedAdd", opLen3(ssa.OpMaskedAddFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedAdd", opLen3(ssa.OpMaskedAddFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedAdd", opLen3(ssa.OpMaskedAddFloat64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Int8x16.MaskedAdd", opLen3(ssa.OpMaskedAddInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x16.MaskedEqual", opLen3(ssa.OpMaskedEqualInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x16.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x16.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x16.MaskedLess", opLen3(ssa.OpMaskedLessInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x16.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x16.MaskedMax", opLen3(ssa.OpMaskedMaxInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x16.MaskedMin", opLen3(ssa.OpMaskedMinInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x16.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x16.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x16.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubInt8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x16.MaskedSub", opLen3(ssa.OpMaskedSubInt8x16, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int8x32.MaskedAdd", opLen3(ssa.OpMaskedAddInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x32.MaskedEqual", opLen3(ssa.OpMaskedEqualInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x32.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x32.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x32.MaskedLess", opLen3(ssa.OpMaskedLessInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x32.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x32.MaskedMax", opLen3(ssa.OpMaskedMaxInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x32.MaskedMin", opLen3(ssa.OpMaskedMinInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x32.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x32.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x32.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubInt8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x32.MaskedSub", opLen3(ssa.OpMaskedSubInt8x32, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int8x64.MaskedAdd", opLen3(ssa.OpMaskedAddInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x64.MaskedEqual", opLen3(ssa.OpMaskedEqualInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x64.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x64.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x64.MaskedLess", opLen3(ssa.OpMaskedLessInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x64.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x64.MaskedMax", opLen3(ssa.OpMaskedMaxInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x64.MaskedMin", opLen3(ssa.OpMaskedMinInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x64.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x64.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x64.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubInt8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x64.MaskedSub", opLen3(ssa.OpMaskedSubInt8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.MaskedAdd", opLen3(ssa.OpMaskedAddInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.MaskedAdd", opLen3(ssa.OpMaskedAddInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.MaskedAdd", opLen3(ssa.OpMaskedAddInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.MaskedAdd", opLen3(ssa.OpMaskedAddInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.MaskedAdd", opLen3(ssa.OpMaskedAddInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.MaskedAdd", opLen3(ssa.OpMaskedAddInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x2.MaskedAdd", opLen3(ssa.OpMaskedAddInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.MaskedAdd", opLen3(ssa.OpMaskedAddInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.MaskedAdd", opLen3(ssa.OpMaskedAddInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.MaskedAdd", opLen3(ssa.OpMaskedAddUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.MaskedAdd", opLen3(ssa.OpMaskedAddUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.MaskedAdd", opLen3(ssa.OpMaskedAddUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x8.MaskedAdd", opLen3(ssa.OpMaskedAddUint16x8, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint16x16.MaskedAdd", opLen3(ssa.OpMaskedAddUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.MaskedAverage", opLen3(ssa.OpMaskedAverageUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.MaskedEqual", opLen3(ssa.OpMaskedEqualUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.MaskedLess", opLen3(ssa.OpMaskedLessUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.MaskedMax", opLen3(ssa.OpMaskedMaxUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.MaskedMin", opLen3(ssa.OpMaskedMinUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.MaskedMulHigh", opLen3(ssa.OpMaskedMulHighUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.MaskedSaturatedUnsignedSignedPairDotProd", opLen3(ssa.OpMaskedSaturatedUnsignedSignedPairDotProdUint16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint16x16.MaskedSub", opLen3(ssa.OpMaskedSubUint16x16, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint16x32.MaskedAdd", opLen3(ssa.OpMaskedAddUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.MaskedAverage", opLen3(ssa.OpMaskedAverageUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.MaskedEqual", opLen3(ssa.OpMaskedEqualUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.MaskedLess", opLen3(ssa.OpMaskedLessUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.MaskedMax", opLen3(ssa.OpMaskedMaxUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.MaskedMin", opLen3(ssa.OpMaskedMinUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.MaskedMulHigh", opLen3(ssa.OpMaskedMulHighUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.MaskedSaturatedUnsignedSignedPairDotProd", opLen3(ssa.OpMaskedSaturatedUnsignedSignedPairDotProdUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x32.MaskedSub", opLen3(ssa.OpMaskedSubUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.MaskedAdd", opLen3(ssa.OpMaskedAddUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.MaskedAverage", opLen3(ssa.OpMaskedAverageUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.MaskedEqual", opLen3(ssa.OpMaskedEqualUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.MaskedLess", opLen3(ssa.OpMaskedLessUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.MaskedMax", opLen3(ssa.OpMaskedMaxUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.MaskedMin", opLen3(ssa.OpMaskedMinUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.MaskedMulHigh", opLen3(ssa.OpMaskedMulHighUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.MaskedSaturatedUnsignedSignedPairDotProd", opLen3(ssa.OpMaskedSaturatedUnsignedSignedPairDotProdUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x8.MaskedSub", opLen3(ssa.OpMaskedSubUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x16.MaskedAdd", opLen3(ssa.OpMaskedAddUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.MaskedAnd", opLen3(ssa.OpMaskedAndUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.MaskedAndNot", opLen3(ssa.OpMaskedAndNotUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.MaskedEqual", opLen3(ssa.OpMaskedEqualUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.MaskedLess", opLen3(ssa.OpMaskedLessUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.MaskedMax", opLen3(ssa.OpMaskedMaxUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.MaskedMin", opLen3(ssa.OpMaskedMinUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.MaskedOr", opLen3(ssa.OpMaskedOrUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.MaskedSub", opLen3(ssa.OpMaskedSubUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.MaskedXor", opLen3(ssa.OpMaskedXorUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateUint32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Uint32x4.MaskedAdd", opLen3(ssa.OpMaskedAddUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.MaskedAdd", opLen3(ssa.OpMaskedAddUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.MaskedAdd", opLen3(ssa.OpMaskedAddUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint64x2.MaskedAdd", opLen3(ssa.OpMaskedAddUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x4.MaskedAdd", opLen3(ssa.OpMaskedAddUint64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x8.MaskedAdd", opLen3(ssa.OpMaskedAddUint64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedAnd", opLen3(ssa.OpMaskedAndFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedAnd", opLen3(ssa.OpMaskedAndFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedAnd", opLen3(ssa.OpMaskedAndFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedAnd", opLen3(ssa.OpMaskedAndFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedAnd", opLen3(ssa.OpMaskedAndFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedAnd", opLen3(ssa.OpMaskedAndFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.MaskedAnd", opLen3(ssa.OpMaskedAndInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.MaskedAnd", opLen3(ssa.OpMaskedAndInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.MaskedAnd", opLen3(ssa.OpMaskedAndInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x2.MaskedAnd", opLen3(ssa.OpMaskedAndInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.MaskedAnd", opLen3(ssa.OpMaskedAndInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.MaskedAnd", opLen3(ssa.OpMaskedAndInt64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Uint32x4.MaskedAnd", opLen3(ssa.OpMaskedAndUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.MaskedAnd", opLen3(ssa.OpMaskedAndUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.MaskedAnd", opLen3(ssa.OpMaskedAndUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint64x2.MaskedAnd", opLen3(ssa.OpMaskedAndUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x4.MaskedAnd", opLen3(ssa.OpMaskedAndUint64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x8.MaskedAnd", opLen3(ssa.OpMaskedAndUint64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedAndNot", opLen3(ssa.OpMaskedAndNotFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedAndNot", opLen3(ssa.OpMaskedAndNotFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedAndNot", opLen3(ssa.OpMaskedAndNotFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedAndNot", opLen3(ssa.OpMaskedAndNotFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedAndNot", opLen3(ssa.OpMaskedAndNotFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedAndNot", opLen3(ssa.OpMaskedAndNotFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.MaskedAndNot", opLen3(ssa.OpMaskedAndNotInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.MaskedAndNot", opLen3(ssa.OpMaskedAndNotInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.MaskedAndNot", opLen3(ssa.OpMaskedAndNotInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x2.MaskedAndNot", opLen3(ssa.OpMaskedAndNotInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.MaskedAndNot", opLen3(ssa.OpMaskedAndNotInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.MaskedAndNot", opLen3(ssa.OpMaskedAndNotInt64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Uint32x4.MaskedAndNot", opLen3(ssa.OpMaskedAndNotUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.MaskedAndNot", opLen3(ssa.OpMaskedAndNotUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.MaskedAndNot", opLen3(ssa.OpMaskedAndNotUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint64x2.MaskedAndNot", opLen3(ssa.OpMaskedAndNotUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x4.MaskedAndNot", opLen3(ssa.OpMaskedAndNotUint64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x8.MaskedAndNot", opLen3(ssa.OpMaskedAndNotUint64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedApproximateReciprocal", opLen2(ssa.OpMaskedApproximateReciprocalFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedApproximateReciprocal", opLen2(ssa.OpMaskedApproximateReciprocalFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedApproximateReciprocal", opLen2(ssa.OpMaskedApproximateReciprocalFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedApproximateReciprocal", opLen2(ssa.OpMaskedApproximateReciprocalFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedApproximateReciprocal", opLen2(ssa.OpMaskedApproximateReciprocalFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedApproximateReciprocal", opLen2(ssa.OpMaskedApproximateReciprocalFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedApproximateReciprocalOfSqrt", opLen2(ssa.OpMaskedApproximateReciprocalOfSqrtFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedApproximateReciprocalOfSqrt", opLen2(ssa.OpMaskedApproximateReciprocalOfSqrtFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedApproximateReciprocalOfSqrt", opLen2(ssa.OpMaskedApproximateReciprocalOfSqrtFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedApproximateReciprocalOfSqrt", opLen2(ssa.OpMaskedApproximateReciprocalOfSqrtFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedApproximateReciprocalOfSqrt", opLen2(ssa.OpMaskedApproximateReciprocalOfSqrtFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedApproximateReciprocalOfSqrt", opLen2(ssa.OpMaskedApproximateReciprocalOfSqrtFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.MaskedAverage", opLen3(ssa.OpMaskedAverageUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.MaskedAverage", opLen3(ssa.OpMaskedAverageUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.MaskedAverage", opLen3(ssa.OpMaskedAverageUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x8.MaskedAverage", opLen3(ssa.OpMaskedAverageUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.MaskedAverage", opLen3(ssa.OpMaskedAverageUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.MaskedAverage", opLen3(ssa.OpMaskedAverageUint16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedCeilSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedCeilSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedCeilSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedCeilSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedCeilSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedCeilSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedDiffWithCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedDiffWithCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedDiffWithCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedDiffWithCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedDiffWithCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedDiffWithCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedDiffWithFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedDiffWithFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedDiffWithFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedDiffWithFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedDiffWithFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedDiffWithFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedDiffWithRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedDiffWithRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedDiffWithRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedDiffWithRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedDiffWithRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedDiffWithRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedDiffWithTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedDiffWithTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedDiffWithTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedDiffWithTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedDiffWithTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedDiffWithTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedDiv", opLen3(ssa.OpMaskedDivFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedDiv", opLen3(ssa.OpMaskedDivFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedDiv", opLen3(ssa.OpMaskedDivFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedDiv", opLen3(ssa.OpMaskedDivFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedDiv", opLen3(ssa.OpMaskedDivFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedDiv", opLen3(ssa.OpMaskedDivFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedEqual", opLen3(ssa.OpMaskedEqualFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedEqual", opLen3(ssa.OpMaskedEqualFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedEqual", opLen3(ssa.OpMaskedEqualFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedEqual", opLen3(ssa.OpMaskedEqualFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedEqual", opLen3(ssa.OpMaskedEqualFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedEqual", opLen3(ssa.OpMaskedEqualFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x16.MaskedEqual", opLen3(ssa.OpMaskedEqualInt8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.MaskedEqual", opLen3(ssa.OpMaskedEqualInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x64.MaskedEqual", opLen3(ssa.OpMaskedEqualInt8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.MaskedEqual", opLen3(ssa.OpMaskedEqualInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.MaskedEqual", opLen3(ssa.OpMaskedEqualInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.MaskedEqual", opLen3(ssa.OpMaskedEqualInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.MaskedEqual", opLen3(ssa.OpMaskedEqualInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.MaskedEqual", opLen3(ssa.OpMaskedEqualInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.MaskedEqual", opLen3(ssa.OpMaskedEqualInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x2.MaskedEqual", opLen3(ssa.OpMaskedEqualInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.MaskedEqual", opLen3(ssa.OpMaskedEqualInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.MaskedEqual", opLen3(ssa.OpMaskedEqualInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.MaskedEqual", opLen3(ssa.OpMaskedEqualUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.MaskedEqual", opLen3(ssa.OpMaskedEqualUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.MaskedEqual", opLen3(ssa.OpMaskedEqualUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x8.MaskedEqual", opLen3(ssa.OpMaskedEqualUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.MaskedEqual", opLen3(ssa.OpMaskedEqualUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.MaskedEqual", opLen3(ssa.OpMaskedEqualUint16x32, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Uint32x4.MaskedEqual", opLen3(ssa.OpMaskedEqualUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.MaskedLess", opLen3(ssa.OpMaskedLessUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.MaskedMax", opLen3(ssa.OpMaskedMaxUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.MaskedMin", opLen3(ssa.OpMaskedMinUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.MaskedOr", opLen3(ssa.OpMaskedOrUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.MaskedSub", opLen3(ssa.OpMaskedSubUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.MaskedXor", opLen3(ssa.OpMaskedXorUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.MaskedAdd", opLen3(ssa.OpMaskedAddUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.MaskedAnd", opLen3(ssa.OpMaskedAndUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.MaskedAndNot", opLen3(ssa.OpMaskedAndNotUint32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint32x8.MaskedEqual", opLen3(ssa.OpMaskedEqualUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.MaskedLess", opLen3(ssa.OpMaskedLessUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.MaskedMax", opLen3(ssa.OpMaskedMaxUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.MaskedMin", opLen3(ssa.OpMaskedMinUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.MaskedOr", opLen3(ssa.OpMaskedOrUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.MaskedSub", opLen3(ssa.OpMaskedSubUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.MaskedXor", opLen3(ssa.OpMaskedXorUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x2.MaskedAdd", opLen3(ssa.OpMaskedAddUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x2.MaskedAnd", opLen3(ssa.OpMaskedAndUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x2.MaskedAndNot", opLen3(ssa.OpMaskedAndNotUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x16.MaskedEqual", opLen3(ssa.OpMaskedEqualUint32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Uint64x2.MaskedEqual", opLen3(ssa.OpMaskedEqualUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x2.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x2.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x2.MaskedLess", opLen3(ssa.OpMaskedLessUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x2.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x2.MaskedMax", opLen3(ssa.OpMaskedMaxUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x2.MaskedMin", opLen3(ssa.OpMaskedMinUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x2.MaskedMulEvenWiden", opLen3(ssa.OpMaskedMulEvenWidenUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x2.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x2.MaskedOr", opLen3(ssa.OpMaskedOrUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x2.MaskedSub", opLen3(ssa.OpMaskedSubUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x2.MaskedXor", opLen3(ssa.OpMaskedXorUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.MaskedAdd", opLen3(ssa.OpMaskedAddUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x4.MaskedAnd", opLen3(ssa.OpMaskedAndUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x4.MaskedAndNot", opLen3(ssa.OpMaskedAndNotUint64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint64x4.MaskedEqual", opLen3(ssa.OpMaskedEqualUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x4.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x4.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x4.MaskedLess", opLen3(ssa.OpMaskedLessUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x4.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x4.MaskedMax", opLen3(ssa.OpMaskedMaxUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x4.MaskedMin", opLen3(ssa.OpMaskedMinUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x4.MaskedMulEvenWiden", opLen3(ssa.OpMaskedMulEvenWidenUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x4.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x4.MaskedOr", opLen3(ssa.OpMaskedOrUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x4.MaskedSub", opLen3(ssa.OpMaskedSubUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x4.MaskedXor", opLen3(ssa.OpMaskedXorUint64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint64x8.MaskedAdd", opLen3(ssa.OpMaskedAddUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x8.MaskedAnd", opLen3(ssa.OpMaskedAndUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x8.MaskedAndNot", opLen3(ssa.OpMaskedAndNotUint64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Uint64x8.MaskedEqual", opLen3(ssa.OpMaskedEqualUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x8.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x8.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x8.MaskedLess", opLen3(ssa.OpMaskedLessUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x8.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x8.MaskedMax", opLen3(ssa.OpMaskedMaxUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x8.MaskedMin", opLen3(ssa.OpMaskedMinUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x8.MaskedMulEvenWiden", opLen3(ssa.OpMaskedMulEvenWidenUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x8.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x8.MaskedOr", opLen3(ssa.OpMaskedOrUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x8.MaskedSub", opLen3(ssa.OpMaskedSubUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x8.MaskedXor", opLen3(ssa.OpMaskedXorUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.MaskedAdd", opLen3(ssa.OpMaskedAddUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.MaskedAverage", opLen3(ssa.OpMaskedAverageUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.MaskedEqual", opLen3(ssa.OpMaskedEqualUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.MaskedLess", opLen3(ssa.OpMaskedLessUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.MaskedMax", opLen3(ssa.OpMaskedMaxUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.MaskedMin", opLen3(ssa.OpMaskedMinUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x16.MaskedSub", opLen3(ssa.OpMaskedSubUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.MaskedAdd", opLen3(ssa.OpMaskedAddUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.MaskedAverage", opLen3(ssa.OpMaskedAverageUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.MaskedEqual", opLen3(ssa.OpMaskedEqualUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.MaskedLess", opLen3(ssa.OpMaskedLessUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.MaskedMax", opLen3(ssa.OpMaskedMaxUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.MaskedMin", opLen3(ssa.OpMaskedMinUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x32.MaskedSub", opLen3(ssa.OpMaskedSubUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.MaskedAdd", opLen3(ssa.OpMaskedAddUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x64.MaskedAverage", opLen3(ssa.OpMaskedAverageUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x64.MaskedEqual", opLen3(ssa.OpMaskedEqualUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x64.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x64.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x64.MaskedLess", opLen3(ssa.OpMaskedLessUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x64.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x64.MaskedMax", opLen3(ssa.OpMaskedMaxUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x64.MaskedMin", opLen3(ssa.OpMaskedMinUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x64.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x64.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x64.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x64.MaskedSub", opLen3(ssa.OpMaskedSubUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedFusedMultiplyAdd132", opLen4(ssa.OpMaskedFusedMultiplyAdd132Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedFusedMultiplyAdd213", opLen4(ssa.OpMaskedFusedMultiplyAdd213Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedFusedMultiplyAdd231", opLen4(ssa.OpMaskedFusedMultiplyAdd231Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedFusedMultiplyAddSub132", opLen4(ssa.OpMaskedFusedMultiplyAddSub132Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedFusedMultiplyAddSub213", opLen4(ssa.OpMaskedFusedMultiplyAddSub213Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedFusedMultiplyAddSub231", opLen4(ssa.OpMaskedFusedMultiplyAddSub231Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedFusedMultiplySub132", opLen4(ssa.OpMaskedFusedMultiplySub132Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedFusedMultiplySub213", opLen4(ssa.OpMaskedFusedMultiplySub213Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedFusedMultiplySub231", opLen4(ssa.OpMaskedFusedMultiplySub231Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedFusedMultiplySubAdd132", opLen4(ssa.OpMaskedFusedMultiplySubAdd132Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedFusedMultiplySubAdd213", opLen4(ssa.OpMaskedFusedMultiplySubAdd213Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedFusedMultiplySubAdd231", opLen4(ssa.OpMaskedFusedMultiplySubAdd231Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedFusedNegativeMultiplyAdd132", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd132Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedFusedNegativeMultiplyAdd213", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd213Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedFusedNegativeMultiplyAdd231", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd231Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedFusedNegativeMultiplySub132", opLen4(ssa.OpMaskedFusedNegativeMultiplySub132Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedFusedNegativeMultiplySub213", opLen4(ssa.OpMaskedFusedNegativeMultiplySub213Float32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedFusedNegativeMultiplySub231", opLen4(ssa.OpMaskedFusedNegativeMultiplySub231Float32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedFloorSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedFloorSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedFloorSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedFloorSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedFloorSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedFloorSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
        addF(simdPackage, "Float32x4.MaskedFusedMultiplyAdd132", opLen4(ssa.OpMaskedFusedMultiplyAdd132Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedFusedMultiplyAdd213", opLen4(ssa.OpMaskedFusedMultiplyAdd213Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedFusedMultiplyAdd231", opLen4(ssa.OpMaskedFusedMultiplyAdd231Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedFusedMultiplyAddSub132", opLen4(ssa.OpMaskedFusedMultiplyAddSub132Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedFusedMultiplyAddSub213", opLen4(ssa.OpMaskedFusedMultiplyAddSub213Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedFusedMultiplyAddSub231", opLen4(ssa.OpMaskedFusedMultiplyAddSub231Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedFusedMultiplySub132", opLen4(ssa.OpMaskedFusedMultiplySub132Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedFusedMultiplySub213", opLen4(ssa.OpMaskedFusedMultiplySub213Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedFusedMultiplySub231", opLen4(ssa.OpMaskedFusedMultiplySub231Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedFusedMultiplySubAdd132", opLen4(ssa.OpMaskedFusedMultiplySubAdd132Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedFusedMultiplySubAdd213", opLen4(ssa.OpMaskedFusedMultiplySubAdd213Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedFusedMultiplySubAdd231", opLen4(ssa.OpMaskedFusedMultiplySubAdd231Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedFusedNegativeMultiplyAdd132", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd132Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedFusedNegativeMultiplyAdd213", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd213Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedFusedNegativeMultiplyAdd231", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd231Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedFusedNegativeMultiplySub132", opLen4(ssa.OpMaskedFusedNegativeMultiplySub132Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedFusedNegativeMultiplySub213", opLen4(ssa.OpMaskedFusedNegativeMultiplySub213Float32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedFusedNegativeMultiplySub231", opLen4(ssa.OpMaskedFusedNegativeMultiplySub231Float32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x8.MaskedFusedMultiplyAdd132", opLen4(ssa.OpMaskedFusedMultiplyAdd132Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedFusedMultiplyAdd213", opLen4(ssa.OpMaskedFusedMultiplyAdd213Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedFusedMultiplyAdd231", opLen4(ssa.OpMaskedFusedMultiplyAdd231Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedFusedMultiplyAddSub132", opLen4(ssa.OpMaskedFusedMultiplyAddSub132Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedFusedMultiplyAddSub213", opLen4(ssa.OpMaskedFusedMultiplyAddSub213Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedFusedMultiplyAddSub231", opLen4(ssa.OpMaskedFusedMultiplyAddSub231Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedFusedMultiplySub132", opLen4(ssa.OpMaskedFusedMultiplySub132Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedFusedMultiplySub213", opLen4(ssa.OpMaskedFusedMultiplySub213Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedFusedMultiplySub231", opLen4(ssa.OpMaskedFusedMultiplySub231Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedFusedMultiplySubAdd132", opLen4(ssa.OpMaskedFusedMultiplySubAdd132Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedFusedMultiplySubAdd213", opLen4(ssa.OpMaskedFusedMultiplySubAdd213Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedFusedMultiplySubAdd231", opLen4(ssa.OpMaskedFusedMultiplySubAdd231Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedFusedNegativeMultiplyAdd132", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd132Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedFusedNegativeMultiplyAdd213", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd213Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedFusedNegativeMultiplyAdd231", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd231Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedFusedNegativeMultiplySub132", opLen4(ssa.OpMaskedFusedNegativeMultiplySub132Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedFusedNegativeMultiplySub213", opLen4(ssa.OpMaskedFusedNegativeMultiplySub213Float32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedFusedNegativeMultiplySub231", opLen4(ssa.OpMaskedFusedNegativeMultiplySub231Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedFusedMultiplyAdd132", opLen4(ssa.OpMaskedFusedMultiplyAdd132Float32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float64x2.MaskedFusedMultiplyAdd132", opLen4(ssa.OpMaskedFusedMultiplyAdd132Float64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedFusedMultiplyAdd213", opLen4(ssa.OpMaskedFusedMultiplyAdd213Float64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedFusedMultiplyAdd231", opLen4(ssa.OpMaskedFusedMultiplyAdd231Float64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedFusedMultiplyAddSub132", opLen4(ssa.OpMaskedFusedMultiplyAddSub132Float64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedFusedMultiplyAddSub213", opLen4(ssa.OpMaskedFusedMultiplyAddSub213Float64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedFusedMultiplyAddSub231", opLen4(ssa.OpMaskedFusedMultiplyAddSub231Float64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedFusedMultiplySub132", opLen4(ssa.OpMaskedFusedMultiplySub132Float64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedFusedMultiplySub213", opLen4(ssa.OpMaskedFusedMultiplySub213Float64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedFusedMultiplySub231", opLen4(ssa.OpMaskedFusedMultiplySub231Float64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedFusedMultiplySubAdd132", opLen4(ssa.OpMaskedFusedMultiplySubAdd132Float64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedFusedMultiplySubAdd213", opLen4(ssa.OpMaskedFusedMultiplySubAdd213Float64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedFusedMultiplySubAdd231", opLen4(ssa.OpMaskedFusedMultiplySubAdd231Float64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedFusedNegativeMultiplyAdd132", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd132Float64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedFusedNegativeMultiplyAdd213", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd213Float64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedFusedNegativeMultiplyAdd231", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd231Float64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedFusedNegativeMultiplySub132", opLen4(ssa.OpMaskedFusedNegativeMultiplySub132Float64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedFusedNegativeMultiplySub213", opLen4(ssa.OpMaskedFusedNegativeMultiplySub213Float64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedFusedNegativeMultiplySub231", opLen4(ssa.OpMaskedFusedNegativeMultiplySub231Float64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float64x4.MaskedFusedMultiplyAdd132", opLen4(ssa.OpMaskedFusedMultiplyAdd132Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedFusedMultiplyAdd213", opLen4(ssa.OpMaskedFusedMultiplyAdd213Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedFusedMultiplyAdd231", opLen4(ssa.OpMaskedFusedMultiplyAdd231Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedFusedMultiplyAddSub132", opLen4(ssa.OpMaskedFusedMultiplyAddSub132Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedFusedMultiplyAddSub213", opLen4(ssa.OpMaskedFusedMultiplyAddSub213Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedFusedMultiplyAddSub231", opLen4(ssa.OpMaskedFusedMultiplyAddSub231Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedFusedMultiplySub132", opLen4(ssa.OpMaskedFusedMultiplySub132Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedFusedMultiplySub213", opLen4(ssa.OpMaskedFusedMultiplySub213Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedFusedMultiplySub231", opLen4(ssa.OpMaskedFusedMultiplySub231Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedFusedMultiplySubAdd132", opLen4(ssa.OpMaskedFusedMultiplySubAdd132Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedFusedMultiplySubAdd213", opLen4(ssa.OpMaskedFusedMultiplySubAdd213Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedFusedMultiplySubAdd231", opLen4(ssa.OpMaskedFusedMultiplySubAdd231Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedFusedNegativeMultiplyAdd132", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd132Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedFusedNegativeMultiplyAdd213", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd213Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedFusedNegativeMultiplyAdd231", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd231Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedFusedNegativeMultiplySub132", opLen4(ssa.OpMaskedFusedNegativeMultiplySub132Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedFusedNegativeMultiplySub213", opLen4(ssa.OpMaskedFusedNegativeMultiplySub213Float64x4, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedFusedNegativeMultiplySub231", opLen4(ssa.OpMaskedFusedNegativeMultiplySub231Float64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x8.MaskedFusedMultiplyAdd132", opLen4(ssa.OpMaskedFusedMultiplyAdd132Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedFusedMultiplyAdd213", opLen4(ssa.OpMaskedFusedMultiplyAdd213Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedFusedMultiplyAdd213", opLen4(ssa.OpMaskedFusedMultiplyAdd213Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedFusedMultiplyAdd213", opLen4(ssa.OpMaskedFusedMultiplyAdd213Float32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedFusedMultiplyAdd213", opLen4(ssa.OpMaskedFusedMultiplyAdd213Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedFusedMultiplyAdd213", opLen4(ssa.OpMaskedFusedMultiplyAdd213Float64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x8.MaskedFusedMultiplyAdd213", opLen4(ssa.OpMaskedFusedMultiplyAdd213Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedFusedMultiplyAdd231", opLen4(ssa.OpMaskedFusedMultiplyAdd231Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedFusedMultiplyAdd231", opLen4(ssa.OpMaskedFusedMultiplyAdd231Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedFusedMultiplyAdd231", opLen4(ssa.OpMaskedFusedMultiplyAdd231Float32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedFusedMultiplyAdd231", opLen4(ssa.OpMaskedFusedMultiplyAdd231Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedFusedMultiplyAdd231", opLen4(ssa.OpMaskedFusedMultiplyAdd231Float64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x8.MaskedFusedMultiplyAdd231", opLen4(ssa.OpMaskedFusedMultiplyAdd231Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedFusedMultiplyAddSub132", opLen4(ssa.OpMaskedFusedMultiplyAddSub132Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedFusedMultiplyAddSub132", opLen4(ssa.OpMaskedFusedMultiplyAddSub132Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedFusedMultiplyAddSub132", opLen4(ssa.OpMaskedFusedMultiplyAddSub132Float32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedFusedMultiplyAddSub132", opLen4(ssa.OpMaskedFusedMultiplyAddSub132Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedFusedMultiplyAddSub132", opLen4(ssa.OpMaskedFusedMultiplyAddSub132Float64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x8.MaskedFusedMultiplyAddSub132", opLen4(ssa.OpMaskedFusedMultiplyAddSub132Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedFusedMultiplyAddSub213", opLen4(ssa.OpMaskedFusedMultiplyAddSub213Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedFusedMultiplyAddSub213", opLen4(ssa.OpMaskedFusedMultiplyAddSub213Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedFusedMultiplyAddSub213", opLen4(ssa.OpMaskedFusedMultiplyAddSub213Float32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedFusedMultiplyAddSub213", opLen4(ssa.OpMaskedFusedMultiplyAddSub213Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedFusedMultiplyAddSub213", opLen4(ssa.OpMaskedFusedMultiplyAddSub213Float64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x8.MaskedFusedMultiplyAddSub213", opLen4(ssa.OpMaskedFusedMultiplyAddSub213Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedFusedMultiplyAddSub231", opLen4(ssa.OpMaskedFusedMultiplyAddSub231Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedFusedMultiplyAddSub231", opLen4(ssa.OpMaskedFusedMultiplyAddSub231Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedFusedMultiplyAddSub231", opLen4(ssa.OpMaskedFusedMultiplyAddSub231Float32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedFusedMultiplyAddSub231", opLen4(ssa.OpMaskedFusedMultiplyAddSub231Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedFusedMultiplyAddSub231", opLen4(ssa.OpMaskedFusedMultiplyAddSub231Float64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x8.MaskedFusedMultiplyAddSub231", opLen4(ssa.OpMaskedFusedMultiplyAddSub231Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedFusedMultiplySub132", opLen4(ssa.OpMaskedFusedMultiplySub132Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedFusedMultiplySub132", opLen4(ssa.OpMaskedFusedMultiplySub132Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedFusedMultiplySub132", opLen4(ssa.OpMaskedFusedMultiplySub132Float32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedFusedMultiplySub132", opLen4(ssa.OpMaskedFusedMultiplySub132Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedFusedMultiplySub132", opLen4(ssa.OpMaskedFusedMultiplySub132Float64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x8.MaskedFusedMultiplySub132", opLen4(ssa.OpMaskedFusedMultiplySub132Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedFusedMultiplySub213", opLen4(ssa.OpMaskedFusedMultiplySub213Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedFusedMultiplySub213", opLen4(ssa.OpMaskedFusedMultiplySub213Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedFusedMultiplySub213", opLen4(ssa.OpMaskedFusedMultiplySub213Float32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedFusedMultiplySub213", opLen4(ssa.OpMaskedFusedMultiplySub213Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedFusedMultiplySub213", opLen4(ssa.OpMaskedFusedMultiplySub213Float64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x8.MaskedFusedMultiplySub213", opLen4(ssa.OpMaskedFusedMultiplySub213Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedFusedMultiplySub231", opLen4(ssa.OpMaskedFusedMultiplySub231Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedFusedMultiplySub231", opLen4(ssa.OpMaskedFusedMultiplySub231Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedFusedMultiplySub231", opLen4(ssa.OpMaskedFusedMultiplySub231Float32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedFusedMultiplySub231", opLen4(ssa.OpMaskedFusedMultiplySub231Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedFusedMultiplySub231", opLen4(ssa.OpMaskedFusedMultiplySub231Float64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x8.MaskedFusedMultiplySub231", opLen4(ssa.OpMaskedFusedMultiplySub231Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedFusedMultiplySubAdd132", opLen4(ssa.OpMaskedFusedMultiplySubAdd132Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedFusedMultiplySubAdd132", opLen4(ssa.OpMaskedFusedMultiplySubAdd132Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedFusedMultiplySubAdd132", opLen4(ssa.OpMaskedFusedMultiplySubAdd132Float32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedFusedMultiplySubAdd132", opLen4(ssa.OpMaskedFusedMultiplySubAdd132Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedFusedMultiplySubAdd132", opLen4(ssa.OpMaskedFusedMultiplySubAdd132Float64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x8.MaskedFusedMultiplySubAdd132", opLen4(ssa.OpMaskedFusedMultiplySubAdd132Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedFusedMultiplySubAdd213", opLen4(ssa.OpMaskedFusedMultiplySubAdd213Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedFusedMultiplySubAdd213", opLen4(ssa.OpMaskedFusedMultiplySubAdd213Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedFusedMultiplySubAdd213", opLen4(ssa.OpMaskedFusedMultiplySubAdd213Float32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedFusedMultiplySubAdd213", opLen4(ssa.OpMaskedFusedMultiplySubAdd213Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedFusedMultiplySubAdd213", opLen4(ssa.OpMaskedFusedMultiplySubAdd213Float64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x8.MaskedFusedMultiplySubAdd213", opLen4(ssa.OpMaskedFusedMultiplySubAdd213Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedFusedMultiplySubAdd231", opLen4(ssa.OpMaskedFusedMultiplySubAdd231Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedFusedMultiplySubAdd231", opLen4(ssa.OpMaskedFusedMultiplySubAdd231Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedFusedMultiplySubAdd231", opLen4(ssa.OpMaskedFusedMultiplySubAdd231Float32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedFusedMultiplySubAdd231", opLen4(ssa.OpMaskedFusedMultiplySubAdd231Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedFusedMultiplySubAdd231", opLen4(ssa.OpMaskedFusedMultiplySubAdd231Float64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x8.MaskedFusedMultiplySubAdd231", opLen4(ssa.OpMaskedFusedMultiplySubAdd231Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedFusedNegativeMultiplyAdd132", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd132Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedFusedNegativeMultiplyAdd132", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd132Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedFusedNegativeMultiplyAdd132", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd132Float32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedFusedNegativeMultiplyAdd132", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd132Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedFusedNegativeMultiplyAdd132", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd132Float64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x8.MaskedFusedNegativeMultiplyAdd132", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd132Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedFusedNegativeMultiplyAdd213", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd213Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedFusedNegativeMultiplyAdd213", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd213Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedFusedNegativeMultiplyAdd213", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd213Float32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedFusedNegativeMultiplyAdd213", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd213Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedFusedNegativeMultiplyAdd213", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd213Float64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x8.MaskedFusedNegativeMultiplyAdd213", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd213Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedFusedNegativeMultiplyAdd231", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd231Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedFusedNegativeMultiplyAdd231", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd231Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedFusedNegativeMultiplyAdd231", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd231Float32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedFusedNegativeMultiplyAdd231", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd231Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedFusedNegativeMultiplyAdd231", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd231Float64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x8.MaskedFusedNegativeMultiplyAdd231", opLen4(ssa.OpMaskedFusedNegativeMultiplyAdd231Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedFusedNegativeMultiplySub132", opLen4(ssa.OpMaskedFusedNegativeMultiplySub132Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedFusedNegativeMultiplySub132", opLen4(ssa.OpMaskedFusedNegativeMultiplySub132Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedFusedNegativeMultiplySub132", opLen4(ssa.OpMaskedFusedNegativeMultiplySub132Float32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedFusedNegativeMultiplySub132", opLen4(ssa.OpMaskedFusedNegativeMultiplySub132Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedFusedNegativeMultiplySub132", opLen4(ssa.OpMaskedFusedNegativeMultiplySub132Float64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x8.MaskedFusedNegativeMultiplySub132", opLen4(ssa.OpMaskedFusedNegativeMultiplySub132Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedFusedNegativeMultiplySub213", opLen4(ssa.OpMaskedFusedNegativeMultiplySub213Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedFusedNegativeMultiplySub213", opLen4(ssa.OpMaskedFusedNegativeMultiplySub213Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedFusedNegativeMultiplySub213", opLen4(ssa.OpMaskedFusedNegativeMultiplySub213Float32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedFusedNegativeMultiplySub213", opLen4(ssa.OpMaskedFusedNegativeMultiplySub213Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedFusedNegativeMultiplySub213", opLen4(ssa.OpMaskedFusedNegativeMultiplySub213Float64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x8.MaskedFusedNegativeMultiplySub213", opLen4(ssa.OpMaskedFusedNegativeMultiplySub213Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedFusedNegativeMultiplySub231", opLen4(ssa.OpMaskedFusedNegativeMultiplySub231Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedFusedNegativeMultiplySub231", opLen4(ssa.OpMaskedFusedNegativeMultiplySub231Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedFusedNegativeMultiplySub231", opLen4(ssa.OpMaskedFusedNegativeMultiplySub231Float32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedFusedNegativeMultiplySub231", opLen4(ssa.OpMaskedFusedNegativeMultiplySub231Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedFusedNegativeMultiplySub231", opLen4(ssa.OpMaskedFusedNegativeMultiplySub231Float64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x8.MaskedFusedNegativeMultiplySub231", opLen4(ssa.OpMaskedFusedNegativeMultiplySub231Float64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.MaskedPairDotProdAccumulate", opLen4(ssa.OpMaskedPairDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.MaskedSaturatedPairDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedPairDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x16.MaskedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedGreater", opLen3(ssa.OpMaskedGreaterFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedGreater", opLen3(ssa.OpMaskedGreaterFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedGreater", opLen3(ssa.OpMaskedGreaterFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedGreater", opLen3(ssa.OpMaskedGreaterFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedGreater", opLen3(ssa.OpMaskedGreaterFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedGreater", opLen3(ssa.OpMaskedGreaterFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x16.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x64.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x2.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.MaskedGreater", opLen3(ssa.OpMaskedGreaterInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x8.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint32x4.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint64x2.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x4.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x8.MaskedGreater", opLen3(ssa.OpMaskedGreaterUint64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x16.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x64.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x2.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x8.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint32x4.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint64x2.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x4.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x8.MaskedGreaterEqual", opLen3(ssa.OpMaskedGreaterEqualUint64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedIsNan", opLen3(ssa.OpMaskedIsNanFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedIsNan", opLen3(ssa.OpMaskedIsNanFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedIsNan", opLen3(ssa.OpMaskedIsNanFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedIsNan", opLen3(ssa.OpMaskedIsNanFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedIsNan", opLen3(ssa.OpMaskedIsNanFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedIsNan", opLen3(ssa.OpMaskedIsNanFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedLess", opLen3(ssa.OpMaskedLessFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedLess", opLen3(ssa.OpMaskedLessFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedLess", opLen3(ssa.OpMaskedLessFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedLess", opLen3(ssa.OpMaskedLessFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedLess", opLen3(ssa.OpMaskedLessFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedLess", opLen3(ssa.OpMaskedLessFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x16.MaskedLess", opLen3(ssa.OpMaskedLessInt8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.MaskedLess", opLen3(ssa.OpMaskedLessInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x64.MaskedLess", opLen3(ssa.OpMaskedLessInt8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.MaskedLess", opLen3(ssa.OpMaskedLessInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.MaskedLess", opLen3(ssa.OpMaskedLessInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.MaskedLess", opLen3(ssa.OpMaskedLessInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.MaskedLess", opLen3(ssa.OpMaskedLessInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.MaskedLess", opLen3(ssa.OpMaskedLessInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.MaskedLess", opLen3(ssa.OpMaskedLessInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x2.MaskedLess", opLen3(ssa.OpMaskedLessInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.MaskedLess", opLen3(ssa.OpMaskedLessInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.MaskedLess", opLen3(ssa.OpMaskedLessInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.MaskedLess", opLen3(ssa.OpMaskedLessUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.MaskedLess", opLen3(ssa.OpMaskedLessUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.MaskedLess", opLen3(ssa.OpMaskedLessUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x8.MaskedLess", opLen3(ssa.OpMaskedLessUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.MaskedLess", opLen3(ssa.OpMaskedLessUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.MaskedLess", opLen3(ssa.OpMaskedLessUint16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint32x4.MaskedLess", opLen3(ssa.OpMaskedLessUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.MaskedLess", opLen3(ssa.OpMaskedLessUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.MaskedLess", opLen3(ssa.OpMaskedLessUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint64x2.MaskedLess", opLen3(ssa.OpMaskedLessUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x4.MaskedLess", opLen3(ssa.OpMaskedLessUint64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x8.MaskedLess", opLen3(ssa.OpMaskedLessUint64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x16.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x64.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x2.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x8.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint32x4.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint64x2.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x4.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x8.MaskedLessEqual", opLen3(ssa.OpMaskedLessEqualUint64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedMax", opLen3(ssa.OpMaskedMaxFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedMax", opLen3(ssa.OpMaskedMaxFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedMax", opLen3(ssa.OpMaskedMaxFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedMax", opLen3(ssa.OpMaskedMaxFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedMax", opLen3(ssa.OpMaskedMaxFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedMax", opLen3(ssa.OpMaskedMaxFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x16.MaskedMax", opLen3(ssa.OpMaskedMaxInt8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.MaskedMax", opLen3(ssa.OpMaskedMaxInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x64.MaskedMax", opLen3(ssa.OpMaskedMaxInt8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.MaskedMax", opLen3(ssa.OpMaskedMaxInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.MaskedMax", opLen3(ssa.OpMaskedMaxInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.MaskedMax", opLen3(ssa.OpMaskedMaxInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.MaskedMax", opLen3(ssa.OpMaskedMaxInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.MaskedMax", opLen3(ssa.OpMaskedMaxInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.MaskedMax", opLen3(ssa.OpMaskedMaxInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x2.MaskedMax", opLen3(ssa.OpMaskedMaxInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.MaskedMax", opLen3(ssa.OpMaskedMaxInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.MaskedMax", opLen3(ssa.OpMaskedMaxInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.MaskedMax", opLen3(ssa.OpMaskedMaxUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.MaskedMax", opLen3(ssa.OpMaskedMaxUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.MaskedMax", opLen3(ssa.OpMaskedMaxUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x8.MaskedMax", opLen3(ssa.OpMaskedMaxUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.MaskedMax", opLen3(ssa.OpMaskedMaxUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.MaskedMax", opLen3(ssa.OpMaskedMaxUint16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint32x4.MaskedMax", opLen3(ssa.OpMaskedMaxUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.MaskedMax", opLen3(ssa.OpMaskedMaxUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.MaskedMax", opLen3(ssa.OpMaskedMaxUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint64x2.MaskedMax", opLen3(ssa.OpMaskedMaxUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x4.MaskedMax", opLen3(ssa.OpMaskedMaxUint64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x8.MaskedMax", opLen3(ssa.OpMaskedMaxUint64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedMin", opLen3(ssa.OpMaskedMinFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedMin", opLen3(ssa.OpMaskedMinFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedMin", opLen3(ssa.OpMaskedMinFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedMin", opLen3(ssa.OpMaskedMinFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedMin", opLen3(ssa.OpMaskedMinFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedMin", opLen3(ssa.OpMaskedMinFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x16.MaskedMin", opLen3(ssa.OpMaskedMinInt8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.MaskedMin", opLen3(ssa.OpMaskedMinInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x64.MaskedMin", opLen3(ssa.OpMaskedMinInt8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.MaskedMin", opLen3(ssa.OpMaskedMinInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.MaskedMin", opLen3(ssa.OpMaskedMinInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.MaskedMin", opLen3(ssa.OpMaskedMinInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.MaskedMin", opLen3(ssa.OpMaskedMinInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.MaskedMin", opLen3(ssa.OpMaskedMinInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.MaskedMin", opLen3(ssa.OpMaskedMinInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x2.MaskedMin", opLen3(ssa.OpMaskedMinInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.MaskedMin", opLen3(ssa.OpMaskedMinInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.MaskedMin", opLen3(ssa.OpMaskedMinInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.MaskedMin", opLen3(ssa.OpMaskedMinUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.MaskedMin", opLen3(ssa.OpMaskedMinUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.MaskedMin", opLen3(ssa.OpMaskedMinUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x8.MaskedMin", opLen3(ssa.OpMaskedMinUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.MaskedMin", opLen3(ssa.OpMaskedMinUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.MaskedMin", opLen3(ssa.OpMaskedMinUint16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint32x4.MaskedMin", opLen3(ssa.OpMaskedMinUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.MaskedMin", opLen3(ssa.OpMaskedMinUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.MaskedMin", opLen3(ssa.OpMaskedMinUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint64x2.MaskedMin", opLen3(ssa.OpMaskedMinUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x4.MaskedMin", opLen3(ssa.OpMaskedMinUint64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x8.MaskedMin", opLen3(ssa.OpMaskedMinUint64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedMul", opLen3(ssa.OpMaskedMulFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedMul", opLen3(ssa.OpMaskedMulFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedMul", opLen3(ssa.OpMaskedMulFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedMul", opLen3(ssa.OpMaskedMulFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedMul", opLen3(ssa.OpMaskedMulFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedMul", opLen3(ssa.OpMaskedMulFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedMulByPowOf2", opLen3(ssa.OpMaskedMulByPowOf2Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedMulByPowOf2", opLen3(ssa.OpMaskedMulByPowOf2Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedMulByPowOf2", opLen3(ssa.OpMaskedMulByPowOf2Float32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedMulByPowOf2", opLen3(ssa.OpMaskedMulByPowOf2Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedMulByPowOf2", opLen3(ssa.OpMaskedMulByPowOf2Float64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedMulByPowOf2", opLen3(ssa.OpMaskedMulByPowOf2Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x2.MaskedMulEvenWiden", opLen3(ssa.OpMaskedMulEvenWidenInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.MaskedMulEvenWiden", opLen3(ssa.OpMaskedMulEvenWidenInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.MaskedMulEvenWiden", opLen3(ssa.OpMaskedMulEvenWidenInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint64x2.MaskedMulEvenWiden", opLen3(ssa.OpMaskedMulEvenWidenUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x4.MaskedMulEvenWiden", opLen3(ssa.OpMaskedMulEvenWidenUint64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x8.MaskedMulEvenWiden", opLen3(ssa.OpMaskedMulEvenWidenUint64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.MaskedMulHigh", opLen3(ssa.OpMaskedMulHighInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.MaskedMulHigh", opLen3(ssa.OpMaskedMulHighInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.MaskedMulHigh", opLen3(ssa.OpMaskedMulHighInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x8.MaskedMulHigh", opLen3(ssa.OpMaskedMulHighUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.MaskedMulHigh", opLen3(ssa.OpMaskedMulHighUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.MaskedMulHigh", opLen3(ssa.OpMaskedMulHighUint16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x2.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.MaskedMulLow", opLen3(ssa.OpMaskedMulLowInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x16.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x64.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x2.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x8.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint32x4.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint64x2.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x4.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x8.MaskedNotEqual", opLen3(ssa.OpMaskedNotEqualUint64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedOr", opLen3(ssa.OpMaskedOrFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedOr", opLen3(ssa.OpMaskedOrFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedOr", opLen3(ssa.OpMaskedOrFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedOr", opLen3(ssa.OpMaskedOrFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedOr", opLen3(ssa.OpMaskedOrFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedOr", opLen3(ssa.OpMaskedOrFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.MaskedOr", opLen3(ssa.OpMaskedOrInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.MaskedOr", opLen3(ssa.OpMaskedOrInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.MaskedOr", opLen3(ssa.OpMaskedOrInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x2.MaskedOr", opLen3(ssa.OpMaskedOrInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.MaskedOr", opLen3(ssa.OpMaskedOrInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.MaskedOr", opLen3(ssa.OpMaskedOrInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint32x4.MaskedOr", opLen3(ssa.OpMaskedOrUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.MaskedOr", opLen3(ssa.OpMaskedOrUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.MaskedOr", opLen3(ssa.OpMaskedOrUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint64x2.MaskedOr", opLen3(ssa.OpMaskedOrUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x4.MaskedOr", opLen3(ssa.OpMaskedOrUint64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x8.MaskedOr", opLen3(ssa.OpMaskedOrUint64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.MaskedPairDotProd", opLen3(ssa.OpMaskedPairDotProdInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.MaskedPairDotProd", opLen3(ssa.OpMaskedPairDotProdInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.MaskedPairDotProd", opLen3(ssa.OpMaskedPairDotProdInt16x32, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Int32x4.MaskedPairDotProdAccumulate", opLen4(ssa.OpMaskedPairDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.MaskedSaturatedPairDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedPairDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x4.MaskedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int32x8.MaskedPairDotProdAccumulate", opLen4(ssa.OpMaskedPairDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.MaskedPairDotProdAccumulate", opLen4(ssa.OpMaskedPairDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x16.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x64.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x2.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.MaskedPopCount", opLen2(ssa.OpMaskedPopCountInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x8.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint32x4.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint64x2.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x4.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x8.MaskedPopCount", opLen2(ssa.OpMaskedPopCountUint64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Int8x16.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddInt8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x64.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddInt8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x8.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.MaskedSaturatedAdd", opLen3(ssa.OpMaskedSaturatedAddUint16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.MaskedSaturatedPairDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedPairDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int32x8.MaskedSaturatedPairDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedPairDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.MaskedSaturatedPairDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedPairDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x16.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubInt8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x64.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubInt8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x8.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.MaskedSaturatedSub", opLen3(ssa.OpMaskedSaturatedSubUint16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x8.MaskedSaturatedUnsignedSignedPairDotProd", opLen3(ssa.OpMaskedSaturatedUnsignedSignedPairDotProdUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.MaskedSaturatedUnsignedSignedPairDotProd", opLen3(ssa.OpMaskedSaturatedUnsignedSignedPairDotProdUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.MaskedSaturatedUnsignedSignedPairDotProd", opLen3(ssa.OpMaskedSaturatedUnsignedSignedPairDotProdUint16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int32x8.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x8.MaskedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x16.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x16.MaskedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x16.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Uint32x4.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x4.MaskedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint32x8.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint32x8.MaskedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Float32x4.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.CeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpCeilSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.CeilWithPrecision", opLen1Imm8(ssa.OpCeilWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.DiffWithCeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.DiffWithCeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.DiffWithCeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.DiffWithCeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.DiffWithCeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.DiffWithCeilSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.DiffWithCeilWithPrecision", opLen1Imm8(ssa.OpDiffWithCeilWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.DiffWithFloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.DiffWithFloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.DiffWithFloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.DiffWithFloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.DiffWithFloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.DiffWithFloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.DiffWithFloorWithPrecision", opLen1Imm8(ssa.OpDiffWithFloorWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.DiffWithRoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.DiffWithRoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.DiffWithRoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.DiffWithRoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.DiffWithRoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.DiffWithRoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.DiffWithRoundWithPrecision", opLen1Imm8(ssa.OpDiffWithRoundWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.DiffWithTruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.DiffWithTruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.DiffWithTruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.DiffWithTruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.DiffWithTruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.DiffWithTruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.DiffWithTruncWithPrecision", opLen1Imm8(ssa.OpDiffWithTruncWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.FloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpFloorSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.FloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpFloorSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.FloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpFloorSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.FloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpFloorSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.FloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpFloorSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.FloorSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpFloorSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.FloorWithPrecision", opLen1Imm8(ssa.OpFloorWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Uint32x16.MaskedSaturatedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedSqrt", opLen2(ssa.OpMaskedSqrtFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedSqrt", opLen2(ssa.OpMaskedSqrtFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedSqrt", opLen2(ssa.OpMaskedSqrtFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedSqrt", opLen2(ssa.OpMaskedSqrtFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedSqrt", opLen2(ssa.OpMaskedSqrtFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedSqrt", opLen2(ssa.OpMaskedSqrtFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedSub", opLen3(ssa.OpMaskedSubFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedSub", opLen3(ssa.OpMaskedSubFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedSub", opLen3(ssa.OpMaskedSubFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedSub", opLen3(ssa.OpMaskedSubFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedSub", opLen3(ssa.OpMaskedSubFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedSub", opLen3(ssa.OpMaskedSubFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x16.MaskedSub", opLen3(ssa.OpMaskedSubInt8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.MaskedSub", opLen3(ssa.OpMaskedSubInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x64.MaskedSub", opLen3(ssa.OpMaskedSubInt8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.MaskedSub", opLen3(ssa.OpMaskedSubInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.MaskedSub", opLen3(ssa.OpMaskedSubInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.MaskedSub", opLen3(ssa.OpMaskedSubInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.MaskedSub", opLen3(ssa.OpMaskedSubInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.MaskedSub", opLen3(ssa.OpMaskedSubInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.MaskedSub", opLen3(ssa.OpMaskedSubInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x2.MaskedSub", opLen3(ssa.OpMaskedSubInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.MaskedSub", opLen3(ssa.OpMaskedSubInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.MaskedSub", opLen3(ssa.OpMaskedSubInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.MaskedSub", opLen3(ssa.OpMaskedSubUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.MaskedSub", opLen3(ssa.OpMaskedSubUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.MaskedSub", opLen3(ssa.OpMaskedSubUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x8.MaskedSub", opLen3(ssa.OpMaskedSubUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.MaskedSub", opLen3(ssa.OpMaskedSubUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.MaskedSub", opLen3(ssa.OpMaskedSubUint16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint32x4.MaskedSub", opLen3(ssa.OpMaskedSubUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.MaskedSub", opLen3(ssa.OpMaskedSubUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.MaskedSub", opLen3(ssa.OpMaskedSubUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint64x2.MaskedSub", opLen3(ssa.OpMaskedSubUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x4.MaskedSub", opLen3(ssa.OpMaskedSubUint64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x8.MaskedSub", opLen3(ssa.OpMaskedSubUint64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedTruncSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedTruncSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedTruncSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedTruncSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedTruncSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedTruncSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Int32x4.MaskedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.MaskedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.MaskedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint32x4.MaskedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.MaskedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.MaskedUnsignedSignedQuadDotProdAccumulate", opLen4(ssa.OpMaskedUnsignedSignedQuadDotProdAccumulateUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MaskedXor", opLen3(ssa.OpMaskedXorFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MaskedXor", opLen3(ssa.OpMaskedXorFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MaskedXor", opLen3(ssa.OpMaskedXorFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MaskedXor", opLen3(ssa.OpMaskedXorFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MaskedXor", opLen3(ssa.OpMaskedXorFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.MaskedXor", opLen3(ssa.OpMaskedXorFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.MaskedXor", opLen3(ssa.OpMaskedXorInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.MaskedXor", opLen3(ssa.OpMaskedXorInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.MaskedXor", opLen3(ssa.OpMaskedXorInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x2.MaskedXor", opLen3(ssa.OpMaskedXorInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.MaskedXor", opLen3(ssa.OpMaskedXorInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.MaskedXor", opLen3(ssa.OpMaskedXorInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint32x4.MaskedXor", opLen3(ssa.OpMaskedXorUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.MaskedXor", opLen3(ssa.OpMaskedXorUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.MaskedXor", opLen3(ssa.OpMaskedXorUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint64x2.MaskedXor", opLen3(ssa.OpMaskedXorUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x4.MaskedXor", opLen3(ssa.OpMaskedXorUint64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x8.MaskedXor", opLen3(ssa.OpMaskedXorUint64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.Max", opLen2(ssa.OpMaxFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.Max", opLen2(ssa.OpMaxFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.Max", opLen2(ssa.OpMaxFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.Max", opLen2(ssa.OpMaxFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.Max", opLen2(ssa.OpMaxFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.Max", opLen2(ssa.OpMaxFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x16.Max", opLen2(ssa.OpMaxInt8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.Max", opLen2(ssa.OpMaxInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x64.Max", opLen2(ssa.OpMaxInt8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.Max", opLen2(ssa.OpMaxInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.Max", opLen2(ssa.OpMaxInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.Max", opLen2(ssa.OpMaxInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.Max", opLen2(ssa.OpMaxInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.Max", opLen2(ssa.OpMaxInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.Max", opLen2(ssa.OpMaxInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x2.Max", opLen2(ssa.OpMaxInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.Max", opLen2(ssa.OpMaxInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.Max", opLen2(ssa.OpMaxInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.Max", opLen2(ssa.OpMaxUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.Max", opLen2(ssa.OpMaxUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.Max", opLen2(ssa.OpMaxUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x8.Max", opLen2(ssa.OpMaxUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.Max", opLen2(ssa.OpMaxUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.Max", opLen2(ssa.OpMaxUint16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint32x4.Max", opLen2(ssa.OpMaxUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.Max", opLen2(ssa.OpMaxUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.Max", opLen2(ssa.OpMaxUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint64x2.Max", opLen2(ssa.OpMaxUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x4.Max", opLen2(ssa.OpMaxUint64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x8.Max", opLen2(ssa.OpMaxUint64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.Min", opLen2(ssa.OpMinFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.Min", opLen2(ssa.OpMinFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.Min", opLen2(ssa.OpMinFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.Min", opLen2(ssa.OpMinFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.Min", opLen2(ssa.OpMinFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.Min", opLen2(ssa.OpMinFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x16.Min", opLen2(ssa.OpMinInt8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.Min", opLen2(ssa.OpMinInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x64.Min", opLen2(ssa.OpMinInt8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.Min", opLen2(ssa.OpMinInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.Min", opLen2(ssa.OpMinInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.Min", opLen2(ssa.OpMinInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.Min", opLen2(ssa.OpMinInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.Min", opLen2(ssa.OpMinInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.Min", opLen2(ssa.OpMinInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x2.Min", opLen2(ssa.OpMinInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.Min", opLen2(ssa.OpMinInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.Min", opLen2(ssa.OpMinInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.Min", opLen2(ssa.OpMinUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.Min", opLen2(ssa.OpMinUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.Min", opLen2(ssa.OpMinUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x8.Min", opLen2(ssa.OpMinUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.Min", opLen2(ssa.OpMinUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.Min", opLen2(ssa.OpMinUint16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint32x4.Min", opLen2(ssa.OpMinUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.Min", opLen2(ssa.OpMinUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.Min", opLen2(ssa.OpMinUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint64x2.Min", opLen2(ssa.OpMinUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x4.Min", opLen2(ssa.OpMinUint64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x8.Min", opLen2(ssa.OpMinUint64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.Mul", opLen2(ssa.OpMulFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.Mul", opLen2(ssa.OpMulFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.Mul", opLen2(ssa.OpMulFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.Mul", opLen2(ssa.OpMulFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.Mul", opLen2(ssa.OpMulFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.Mul", opLen2(ssa.OpMulFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.MulByPowOf2", opLen2(ssa.OpMulByPowOf2Float32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.MulByPowOf2", opLen2(ssa.OpMulByPowOf2Float32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.MulByPowOf2", opLen2(ssa.OpMulByPowOf2Float32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.MulByPowOf2", opLen2(ssa.OpMulByPowOf2Float64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.MulByPowOf2", opLen2(ssa.OpMulByPowOf2Float64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.MulByPowOf2", opLen2(ssa.OpMulByPowOf2Float64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x2.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.MulEvenWiden", opLen2(ssa.OpMulEvenWidenInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint32x4.MulEvenWiden", opLen2(ssa.OpMulEvenWidenUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.MulEvenWiden", opLen2(ssa.OpMulEvenWidenUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x2.MulEvenWiden", opLen2(ssa.OpMulEvenWidenUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x4.MulEvenWiden", opLen2(ssa.OpMulEvenWidenUint64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x8.MulEvenWiden", opLen2(ssa.OpMulEvenWidenUint64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.MulHigh", opLen2(ssa.OpMulHighInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.MulHigh", opLen2(ssa.OpMulHighInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.MulHigh", opLen2(ssa.OpMulHighInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x8.MulHigh", opLen2(ssa.OpMulHighUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.MulHigh", opLen2(ssa.OpMulHighUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.MulHigh", opLen2(ssa.OpMulHighUint16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.MulLow", opLen2(ssa.OpMulLowInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.MulLow", opLen2(ssa.OpMulLowInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.MulLow", opLen2(ssa.OpMulLowInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.MulLow", opLen2(ssa.OpMulLowInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.MulLow", opLen2(ssa.OpMulLowInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.MulLow", opLen2(ssa.OpMulLowInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x2.MulLow", opLen2(ssa.OpMulLowInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.MulLow", opLen2(ssa.OpMulLowInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.MulLow", opLen2(ssa.OpMulLowInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.NotEqual", opLen2(ssa.OpNotEqualFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.NotEqual", opLen2(ssa.OpNotEqualFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.NotEqual", opLen2(ssa.OpNotEqualFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.NotEqual", opLen2(ssa.OpNotEqualFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.NotEqual", opLen2(ssa.OpNotEqualFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.NotEqual", opLen2(ssa.OpNotEqualFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x16.NotEqual", opLen2(ssa.OpNotEqualInt8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.NotEqual", opLen2(ssa.OpNotEqualInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x64.NotEqual", opLen2(ssa.OpNotEqualInt8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.NotEqual", opLen2(ssa.OpNotEqualInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.NotEqual", opLen2(ssa.OpNotEqualInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.NotEqual", opLen2(ssa.OpNotEqualInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.NotEqual", opLen2(ssa.OpNotEqualInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.NotEqual", opLen2(ssa.OpNotEqualInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.NotEqual", opLen2(ssa.OpNotEqualInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x2.NotEqual", opLen2(ssa.OpNotEqualInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.NotEqual", opLen2(ssa.OpNotEqualInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.NotEqual", opLen2(ssa.OpNotEqualInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.NotEqual", opLen2(ssa.OpNotEqualUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.NotEqual", opLen2(ssa.OpNotEqualUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.NotEqual", opLen2(ssa.OpNotEqualUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x8.NotEqual", opLen2(ssa.OpNotEqualUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.NotEqual", opLen2(ssa.OpNotEqualUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.NotEqual", opLen2(ssa.OpNotEqualUint16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint32x4.NotEqual", opLen2(ssa.OpNotEqualUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.NotEqual", opLen2(ssa.OpNotEqualUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.NotEqual", opLen2(ssa.OpNotEqualUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint64x2.NotEqual", opLen2(ssa.OpNotEqualUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x4.NotEqual", opLen2(ssa.OpNotEqualUint64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x8.NotEqual", opLen2(ssa.OpNotEqualUint64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.Or", opLen2(ssa.OpOrFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.Or", opLen2(ssa.OpOrFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.Or", opLen2(ssa.OpOrFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.Or", opLen2(ssa.OpOrFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.Or", opLen2(ssa.OpOrFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.Or", opLen2(ssa.OpOrFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x16.Or", opLen2(ssa.OpOrInt8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.Or", opLen2(ssa.OpOrInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x8.Or", opLen2(ssa.OpOrInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.Or", opLen2(ssa.OpOrInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x4.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x2.Or", opLen2(ssa.OpOrInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.Or", opLen2(ssa.OpOrInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.Or", opLen2(ssa.OpOrInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.Or", opLen2(ssa.OpOrUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.Or", opLen2(ssa.OpOrUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x8.Or", opLen2(ssa.OpOrUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.Or", opLen2(ssa.OpOrUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x4.Or", opLen2(ssa.OpOrUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.Or", opLen2(ssa.OpOrUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.Or", opLen2(ssa.OpOrUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint64x2.Or", opLen2(ssa.OpOrUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x4.Or", opLen2(ssa.OpOrUint64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x8.Or", opLen2(ssa.OpOrUint64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.PairDotProd", opLen2(ssa.OpPairDotProdInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.PairDotProd", opLen2(ssa.OpPairDotProdInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.PairDotProd", opLen2(ssa.OpPairDotProdInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.PairDotProdAccumulate", opLen3(ssa.OpPairDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.PairDotProdAccumulate", opLen3(ssa.OpPairDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.PairDotProdAccumulate", opLen3(ssa.OpPairDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.PairwiseAdd", opLen2(ssa.OpPairwiseAddFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.PairwiseAdd", opLen2(ssa.OpPairwiseAddFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x2.PairwiseAdd", opLen2(ssa.OpPairwiseAddFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.PairwiseAdd", opLen2(ssa.OpPairwiseAddFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x8.PairwiseAdd", opLen2(ssa.OpPairwiseAddInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.PairwiseAdd", opLen2(ssa.OpPairwiseAddInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x4.PairwiseAdd", opLen2(ssa.OpPairwiseAddInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.PairwiseAdd", opLen2(ssa.OpPairwiseAddInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x8.PairwiseAdd", opLen2(ssa.OpPairwiseAddUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.PairwiseAdd", opLen2(ssa.OpPairwiseAddUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x4.PairwiseAdd", opLen2(ssa.OpPairwiseAddUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.PairwiseAdd", opLen2(ssa.OpPairwiseAddUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x4.PairwiseSub", opLen2(ssa.OpPairwiseSubFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.PairwiseSub", opLen2(ssa.OpPairwiseSubFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x2.PairwiseSub", opLen2(ssa.OpPairwiseSubFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.PairwiseSub", opLen2(ssa.OpPairwiseSubFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x8.PairwiseSub", opLen2(ssa.OpPairwiseSubInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.PairwiseSub", opLen2(ssa.OpPairwiseSubInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x4.PairwiseSub", opLen2(ssa.OpPairwiseSubInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.PairwiseSub", opLen2(ssa.OpPairwiseSubInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x8.PairwiseSub", opLen2(ssa.OpPairwiseSubUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.PairwiseSub", opLen2(ssa.OpPairwiseSubUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x4.PairwiseSub", opLen2(ssa.OpPairwiseSubUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.PairwiseSub", opLen2(ssa.OpPairwiseSubUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x16.PopCount", opLen1(ssa.OpPopCountInt8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.PopCount", opLen1(ssa.OpPopCountInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x64.PopCount", opLen1(ssa.OpPopCountInt8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.PopCount", opLen1(ssa.OpPopCountInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.PopCount", opLen1(ssa.OpPopCountInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.PopCount", opLen1(ssa.OpPopCountInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.PopCount", opLen1(ssa.OpPopCountInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.PopCount", opLen1(ssa.OpPopCountInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.PopCount", opLen1(ssa.OpPopCountInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x2.PopCount", opLen1(ssa.OpPopCountInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.PopCount", opLen1(ssa.OpPopCountInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.PopCount", opLen1(ssa.OpPopCountInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.PopCount", opLen1(ssa.OpPopCountUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.PopCount", opLen1(ssa.OpPopCountUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.PopCount", opLen1(ssa.OpPopCountUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x8.PopCount", opLen1(ssa.OpPopCountUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.PopCount", opLen1(ssa.OpPopCountUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.PopCount", opLen1(ssa.OpPopCountUint16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint32x4.PopCount", opLen1(ssa.OpPopCountUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.PopCount", opLen1(ssa.OpPopCountUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.PopCount", opLen1(ssa.OpPopCountUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint64x2.PopCount", opLen1(ssa.OpPopCountUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x4.PopCount", opLen1(ssa.OpPopCountUint64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x8.PopCount", opLen1(ssa.OpPopCountUint64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.Round", opLen1(ssa.OpRoundFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.Round", opLen1(ssa.OpRoundFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x2.Round", opLen1(ssa.OpRoundFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.Round", opLen1(ssa.OpRoundFloat64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float32x4.RoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpRoundSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
        addF(simdPackage, "Float32x8.RoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpRoundSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
        addF(simdPackage, "Float32x16.RoundSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpRoundSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
@@ -1454,6 +1430,94 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Float64x2.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
        addF(simdPackage, "Float64x4.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
        addF(simdPackage, "Float64x8.RoundWithPrecision", opLen1Imm8(ssa.OpRoundWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Int8x16.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x64.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.SaturatedAdd", opLen2(ssa.OpSaturatedAddInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x8.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.SaturatedAdd", opLen2(ssa.OpSaturatedAddUint16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.SaturatedPairDotProdAccumulate", opLen3(ssa.OpSaturatedPairDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.SaturatedPairDotProdAccumulate", opLen3(ssa.OpSaturatedPairDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.SaturatedPairDotProdAccumulate", opLen3(ssa.OpSaturatedPairDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.SaturatedPairwiseAdd", opLen2(ssa.OpSaturatedPairwiseAddInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.SaturatedPairwiseAdd", opLen2(ssa.OpSaturatedPairwiseAddInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x8.SaturatedPairwiseSub", opLen2(ssa.OpSaturatedPairwiseSubInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.SaturatedPairwiseSub", opLen2(ssa.OpSaturatedPairwiseSubInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x16.SaturatedSub", opLen2(ssa.OpSaturatedSubInt8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.SaturatedSub", opLen2(ssa.OpSaturatedSubInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x64.SaturatedSub", opLen2(ssa.OpSaturatedSubInt8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.SaturatedSub", opLen2(ssa.OpSaturatedSubInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.SaturatedSub", opLen2(ssa.OpSaturatedSubInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.SaturatedSub", opLen2(ssa.OpSaturatedSubInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.SaturatedSub", opLen2(ssa.OpSaturatedSubUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.SaturatedSub", opLen2(ssa.OpSaturatedSubUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.SaturatedSub", opLen2(ssa.OpSaturatedSubUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x8.SaturatedSub", opLen2(ssa.OpSaturatedSubUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.SaturatedSub", opLen2(ssa.OpSaturatedSubUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.SaturatedSub", opLen2(ssa.OpSaturatedSubUint16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x8.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.SaturatedUnsignedSignedPairDotProd", opLen2(ssa.OpSaturatedUnsignedSignedPairDotProdUint16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint32x4.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x16.Sign", opLen2(ssa.OpSignInt8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.Sign", opLen2(ssa.OpSignInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x8.Sign", opLen2(ssa.OpSignInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.Sign", opLen2(ssa.OpSignInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x4.Sign", opLen2(ssa.OpSignInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.Sign", opLen2(ssa.OpSignInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x4.Sqrt", opLen1(ssa.OpSqrtFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.Sqrt", opLen1(ssa.OpSqrtFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.Sqrt", opLen1(ssa.OpSqrtFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.Sqrt", opLen1(ssa.OpSqrtFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.Sqrt", opLen1(ssa.OpSqrtFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.Sqrt", opLen1(ssa.OpSqrtFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.Sub", opLen2(ssa.OpSubFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.Sub", opLen2(ssa.OpSubFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.Sub", opLen2(ssa.OpSubFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.Sub", opLen2(ssa.OpSubFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.Sub", opLen2(ssa.OpSubFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.Sub", opLen2(ssa.OpSubFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x16.Sub", opLen2(ssa.OpSubInt8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.Sub", opLen2(ssa.OpSubInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x64.Sub", opLen2(ssa.OpSubInt8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.Sub", opLen2(ssa.OpSubInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.Sub", opLen2(ssa.OpSubInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.Sub", opLen2(ssa.OpSubInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x4.Sub", opLen2(ssa.OpSubInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.Sub", opLen2(ssa.OpSubInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.Sub", opLen2(ssa.OpSubInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x2.Sub", opLen2(ssa.OpSubInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.Sub", opLen2(ssa.OpSubInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.Sub", opLen2(ssa.OpSubInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.Sub", opLen2(ssa.OpSubUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.Sub", opLen2(ssa.OpSubUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.Sub", opLen2(ssa.OpSubUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x8.Sub", opLen2(ssa.OpSubUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.Sub", opLen2(ssa.OpSubUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.Sub", opLen2(ssa.OpSubUint16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint32x4.Sub", opLen2(ssa.OpSubUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.Sub", opLen2(ssa.OpSubUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.Sub", opLen2(ssa.OpSubUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint64x2.Sub", opLen2(ssa.OpSubUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x4.Sub", opLen2(ssa.OpSubUint64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x8.Sub", opLen2(ssa.OpSubUint64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.Trunc", opLen1(ssa.OpTruncFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.Trunc", opLen1(ssa.OpTruncFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x2.Trunc", opLen1(ssa.OpTruncFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.Trunc", opLen1(ssa.OpTruncFloat64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float32x4.TruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpTruncSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
        addF(simdPackage, "Float32x8.TruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpTruncSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
        addF(simdPackage, "Float32x16.TruncSuppressExceptionWithPrecision", opLen1Imm8(ssa.OpTruncSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
@@ -1466,102 +1530,38 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Float64x2.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
        addF(simdPackage, "Float64x4.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
        addF(simdPackage, "Float64x8.TruncWithPrecision", opLen1Imm8(ssa.OpTruncWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedCeilSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedCeilSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedCeilSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedCeilSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedCeilSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedCeilSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedCeilWithPrecision", opLen2Imm8(ssa.OpMaskedCeilWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedDiffWithCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedDiffWithCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedDiffWithCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedDiffWithCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedDiffWithCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedDiffWithCeilSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedDiffWithCeilWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithCeilWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedDiffWithFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedDiffWithFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedDiffWithFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedDiffWithFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedDiffWithFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedDiffWithFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedDiffWithFloorWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithFloorWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedDiffWithRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedDiffWithRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedDiffWithRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedDiffWithRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedDiffWithRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedDiffWithRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedDiffWithRoundWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithRoundWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedDiffWithTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedDiffWithTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedDiffWithTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedDiffWithTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedDiffWithTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedDiffWithTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedDiffWithTruncWithPrecision", opLen2Imm8(ssa.OpMaskedDiffWithTruncWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedFloorSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedFloorSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedFloorSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedFloorSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedFloorSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedFloorSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedFloorSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedFloorWithPrecision", opLen2Imm8(ssa.OpMaskedFloorWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedRoundSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedRoundSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedRoundWithPrecision", opLen2Imm8(ssa.OpMaskedRoundWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedTruncSuppressExceptionWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedTruncSuppressExceptionWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedTruncSuppressExceptionWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedTruncSuppressExceptionWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedTruncSuppressExceptionWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedTruncSuppressExceptionWithPrecision", opLen2Imm8(ssa.OpMaskedTruncSuppressExceptionWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float32x4.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat32x4, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float32x8.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat32x8, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float32x16.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat32x16, types.TypeVec512, 4), sys.AMD64)
-       addF(simdPackage, "Float64x2.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat64x2, types.TypeVec128, 4), sys.AMD64)
-       addF(simdPackage, "Float64x4.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat64x4, types.TypeVec256, 4), sys.AMD64)
-       addF(simdPackage, "Float64x8.MaskedTruncWithPrecision", opLen2Imm8(ssa.OpMaskedTruncWithPrecisionFloat64x8, types.TypeVec512, 4), sys.AMD64)
+       addF(simdPackage, "Int32x4.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint32x4.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.UnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpUnsignedSignedQuadDotProdAccumulateUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x4.Xor", opLen2(ssa.OpXorFloat32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x8.Xor", opLen2(ssa.OpXorFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.Xor", opLen2(ssa.OpXorFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x2.Xor", opLen2(ssa.OpXorFloat64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x4.Xor", opLen2(ssa.OpXorFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.Xor", opLen2(ssa.OpXorFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x16.Xor", opLen2(ssa.OpXorInt8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.Xor", opLen2(ssa.OpXorInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x8.Xor", opLen2(ssa.OpXorInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.Xor", opLen2(ssa.OpXorInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x4.Xor", opLen2(ssa.OpXorInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x8.Xor", opLen2(ssa.OpXorInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.Xor", opLen2(ssa.OpXorInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x2.Xor", opLen2(ssa.OpXorInt64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x4.Xor", opLen2(ssa.OpXorInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.Xor", opLen2(ssa.OpXorInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.Xor", opLen2(ssa.OpXorUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.Xor", opLen2(ssa.OpXorUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x8.Xor", opLen2(ssa.OpXorUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.Xor", opLen2(ssa.OpXorUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x4.Xor", opLen2(ssa.OpXorUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.Xor", opLen2(ssa.OpXorUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.Xor", opLen2(ssa.OpXorUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint64x2.Xor", opLen2(ssa.OpXorUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x4.Xor", opLen2(ssa.OpXorUint64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x8.Xor", opLen2(ssa.OpXorUint64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float32x16.AsFloat64x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
        addF(simdPackage, "Float32x16.AsInt16x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
        addF(simdPackage, "Float32x16.AsInt32x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
@@ -1832,6 +1832,34 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint8x64.AsUint16x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
        addF(simdPackage, "Uint8x64.AsUint32x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
        addF(simdPackage, "Uint8x64.AsUint64x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
+       addF(simdPackage, "LoadInt8x64", simdLoad(), sys.AMD64)
+       addF(simdPackage, "Int8x64.Store", simdStore(), sys.AMD64)
+       addF(simdPackage, "LoadMask8x64", simdLoad(), sys.AMD64)
+       addF(simdPackage, "Mask8x64.Store", simdStore(), sys.AMD64)
+       addF(simdPackage, "LoadInt16x32", simdLoad(), sys.AMD64)
+       addF(simdPackage, "Int16x32.Store", simdStore(), sys.AMD64)
+       addF(simdPackage, "LoadMask16x32", simdLoad(), sys.AMD64)
+       addF(simdPackage, "Mask16x32.Store", simdStore(), sys.AMD64)
+       addF(simdPackage, "LoadInt32x16", simdLoad(), sys.AMD64)
+       addF(simdPackage, "Int32x16.Store", simdStore(), sys.AMD64)
+       addF(simdPackage, "LoadMask32x16", simdLoad(), sys.AMD64)
+       addF(simdPackage, "Mask32x16.Store", simdStore(), sys.AMD64)
+       addF(simdPackage, "LoadInt64x8", simdLoad(), sys.AMD64)
+       addF(simdPackage, "Int64x8.Store", simdStore(), sys.AMD64)
+       addF(simdPackage, "LoadMask64x8", simdLoad(), sys.AMD64)
+       addF(simdPackage, "Mask64x8.Store", simdStore(), sys.AMD64)
+       addF(simdPackage, "LoadFloat32x16", simdLoad(), sys.AMD64)
+       addF(simdPackage, "Float32x16.Store", simdStore(), sys.AMD64)
+       addF(simdPackage, "LoadFloat64x8", simdLoad(), sys.AMD64)
+       addF(simdPackage, "Float64x8.Store", simdStore(), sys.AMD64)
+       addF(simdPackage, "LoadUint8x64", simdLoad(), sys.AMD64)
+       addF(simdPackage, "Uint8x64.Store", simdStore(), sys.AMD64)
+       addF(simdPackage, "LoadUint16x32", simdLoad(), sys.AMD64)
+       addF(simdPackage, "Uint16x32.Store", simdStore(), sys.AMD64)
+       addF(simdPackage, "LoadUint32x16", simdLoad(), sys.AMD64)
+       addF(simdPackage, "Uint32x16.Store", simdStore(), sys.AMD64)
+       addF(simdPackage, "LoadUint64x8", simdLoad(), sys.AMD64)
+       addF(simdPackage, "Uint64x8.Store", simdStore(), sys.AMD64)
        addF(simdPackage, "LoadInt8x16", simdLoad(), sys.AMD64)
        addF(simdPackage, "Int8x16.Store", simdStore(), sys.AMD64)
        addF(simdPackage, "LoadInt16x8", simdLoad(), sys.AMD64)
@@ -1888,34 +1916,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Mask8x32.Store", simdStore(), sys.AMD64)
        addF(simdPackage, "LoadMask16x16", simdLoad(), sys.AMD64)
        addF(simdPackage, "Mask16x16.Store", simdStore(), sys.AMD64)
-       addF(simdPackage, "LoadInt8x64", simdLoad(), sys.AMD64)
-       addF(simdPackage, "Int8x64.Store", simdStore(), sys.AMD64)
-       addF(simdPackage, "LoadMask8x64", simdLoad(), sys.AMD64)
-       addF(simdPackage, "Mask8x64.Store", simdStore(), sys.AMD64)
-       addF(simdPackage, "LoadInt16x32", simdLoad(), sys.AMD64)
-       addF(simdPackage, "Int16x32.Store", simdStore(), sys.AMD64)
-       addF(simdPackage, "LoadMask16x32", simdLoad(), sys.AMD64)
-       addF(simdPackage, "Mask16x32.Store", simdStore(), sys.AMD64)
-       addF(simdPackage, "LoadInt32x16", simdLoad(), sys.AMD64)
-       addF(simdPackage, "Int32x16.Store", simdStore(), sys.AMD64)
-       addF(simdPackage, "LoadMask32x16", simdLoad(), sys.AMD64)
-       addF(simdPackage, "Mask32x16.Store", simdStore(), sys.AMD64)
-       addF(simdPackage, "LoadInt64x8", simdLoad(), sys.AMD64)
-       addF(simdPackage, "Int64x8.Store", simdStore(), sys.AMD64)
-       addF(simdPackage, "LoadMask64x8", simdLoad(), sys.AMD64)
-       addF(simdPackage, "Mask64x8.Store", simdStore(), sys.AMD64)
-       addF(simdPackage, "LoadFloat32x16", simdLoad(), sys.AMD64)
-       addF(simdPackage, "Float32x16.Store", simdStore(), sys.AMD64)
-       addF(simdPackage, "LoadFloat64x8", simdLoad(), sys.AMD64)
-       addF(simdPackage, "Float64x8.Store", simdStore(), sys.AMD64)
-       addF(simdPackage, "LoadUint8x64", simdLoad(), sys.AMD64)
-       addF(simdPackage, "Uint8x64.Store", simdStore(), sys.AMD64)
-       addF(simdPackage, "LoadUint16x32", simdLoad(), sys.AMD64)
-       addF(simdPackage, "Uint16x32.Store", simdStore(), sys.AMD64)
-       addF(simdPackage, "LoadUint32x16", simdLoad(), sys.AMD64)
-       addF(simdPackage, "Uint32x16.Store", simdStore(), sys.AMD64)
-       addF(simdPackage, "LoadUint64x8", simdLoad(), sys.AMD64)
-       addF(simdPackage, "Uint64x8.Store", simdStore(), sys.AMD64)
        addF(simdPackage, "Mask16x16.AsInt16x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
        addF(simdPackage, "Int16x16.AsMask16x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
        addF(simdPackage, "Mask16x16.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)