From 2080415aa2e65dc174d2f8f8876cc16aa9c2b7c3 Mon Sep 17 00:00:00 2001
From: David Chase <drchase@google.com>
Date: Fri, 1 Aug 2025 09:23:45 -0400
Subject: [PATCH] [dev.simd] simd: add emulations for missing AVX2 comparisons

this also removes AVX512 versions of the operations
that would use the same names, but not run on AVX2-only

includes files generated by simdgen CL 692355

Change-Id: Iff29042245b7688133fed49a03e681e85235b8a8
Reviewed-on: https://go-review.googlesource.com/c/go/+/692335
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
---
 src/cmd/compile/internal/amd64/simdssa.go     |   16 -
 .../compile/internal/ssa/_gen/simdAMD64.rules |   72 -
 .../compile/internal/ssa/_gen/simdAMD64ops.go |   16 -
 .../internal/ssa/_gen/simdgenericOps.go       |   72 -
 src/cmd/compile/internal/ssa/opGen.go         |  704 --------
 src/cmd/compile/internal/ssa/rewriteAMD64.go  | 1440 -----------------
 .../compile/internal/ssagen/simdintrinsics.go |   72 -
 src/simd/compare_test.go                      |  166 +-
 src/simd/genfiles.go                          |  136 ++
 src/simd/ops_amd64.go                         |  360 -----
 src/simd/slice_amd64.go                       |  636 ++++++++
 11 files changed, 859 insertions(+), 2831 deletions(-)

diff --git a/src/cmd/compile/internal/amd64/simdssa.go b/src/cmd/compile/internal/amd64/simdssa.go
index 73a947a88a..3ec8b484fb 100644
--- a/src/cmd/compile/internal/amd64/simdssa.go
+++ b/src/cmd/compile/internal/amd64/simdssa.go
@@ -886,29 +886,13 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
 
 	case ssa.OpAMD64VCMPPS512,
 		ssa.OpAMD64VCMPPD512,
-		ssa.OpAMD64VPCMPUB128,
-		ssa.OpAMD64VPCMPUB256,
 		ssa.OpAMD64VPCMPUB512,
-		ssa.OpAMD64VPCMPUW128,
-		ssa.OpAMD64VPCMPUW256,
 		ssa.OpAMD64VPCMPUW512,
-		ssa.OpAMD64VPCMPUD128,
-		ssa.OpAMD64VPCMPUD256,
 		ssa.OpAMD64VPCMPUD512,
-		ssa.OpAMD64VPCMPUQ128,
-		ssa.OpAMD64VPCMPUQ256,
 		ssa.OpAMD64VPCMPUQ512,
-		ssa.OpAMD64VPCMPB128,
-		ssa.OpAMD64VPCMPB256,
 		ssa.OpAMD64VPCMPB512,
-		ssa.OpAMD64VPCMPW128,
-		ssa.OpAMD64VPCMPW256,
 		ssa.OpAMD64VPCMPW512,
-		ssa.OpAMD64VPCMPD128,
-		ssa.OpAMD64VPCMPD256,
 		ssa.OpAMD64VPCMPD512,
-		ssa.OpAMD64VPCMPQ128,
-		ssa.OpAMD64VPCMPQ256,
 		ssa.OpAMD64VPCMPQ512:
 		p = simdV2kImm8(s, v)
 
diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
index e7c5a1a97d..9670f035ba 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
@@ -590,17 +590,9 @@
 (GreaterInt64x2 ...) => (VPCMPGTQ128 ...)
 (GreaterInt64x4 ...) => (VPCMPGTQ256 ...)
 (GreaterInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPGTQ512 x y))
-(GreaterUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [14] x y))
-(GreaterUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [14] x y))
 (GreaterUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [14] x y))
-(GreaterUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [14] x y))
-(GreaterUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [14] x y))
 (GreaterUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [14] x y))
-(GreaterUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [14] x y))
-(GreaterUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [14] x y))
 (GreaterUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [14] x y))
-(GreaterUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [14] x y))
-(GreaterUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [14] x y))
 (GreaterUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [14] x y))
 (GreaterEqualFloat32x4 x y) => (VCMPPS128 [13] x y)
 (GreaterEqualFloat32x8 x y) => (VCMPPS256 [13] x y)
@@ -608,29 +600,13 @@
 (GreaterEqualFloat64x2 x y) => (VCMPPD128 [13] x y)
 (GreaterEqualFloat64x4 x y) => (VCMPPD256 [13] x y)
 (GreaterEqualFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [13] x y))
-(GreaterEqualInt8x16 x y) => (VPMOVMToVec8x16 (VPCMPB128 [13] x y))
-(GreaterEqualInt8x32 x y) => (VPMOVMToVec8x32 (VPCMPB256 [13] x y))
 (GreaterEqualInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPB512 [13] x y))
-(GreaterEqualInt16x8 x y) => (VPMOVMToVec16x8 (VPCMPW128 [13] x y))
-(GreaterEqualInt16x16 x y) => (VPMOVMToVec16x16 (VPCMPW256 [13] x y))
 (GreaterEqualInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [13] x y))
-(GreaterEqualInt32x4 x y) => (VPMOVMToVec32x4 (VPCMPD128 [13] x y))
-(GreaterEqualInt32x8 x y) => (VPMOVMToVec32x8 (VPCMPD256 [13] x y))
 (GreaterEqualInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [13] x y))
-(GreaterEqualInt64x2 x y) => (VPMOVMToVec64x2 (VPCMPQ128 [13] x y))
-(GreaterEqualInt64x4 x y) => (VPMOVMToVec64x4 (VPCMPQ256 [13] x y))
 (GreaterEqualInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [13] x y))
-(GreaterEqualUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [13] x y))
-(GreaterEqualUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [13] x y))
 (GreaterEqualUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [13] x y))
-(GreaterEqualUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [13] x y))
-(GreaterEqualUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [13] x y))
 (GreaterEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [13] x y))
-(GreaterEqualUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [13] x y))
-(GreaterEqualUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [13] x y))
 (GreaterEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [13] x y))
-(GreaterEqualUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [13] x y))
-(GreaterEqualUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [13] x y))
 (GreaterEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [13] x y))
 (GreaterEqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [13] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
 (GreaterEqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [13] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
@@ -710,29 +686,13 @@
 (LessFloat64x2 x y) => (VCMPPD128 [1] x y)
 (LessFloat64x4 x y) => (VCMPPD256 [1] x y)
 (LessFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [1] x y))
-(LessInt8x16 x y) => (VPMOVMToVec8x16 (VPCMPB128 [1] x y))
-(LessInt8x32 x y) => (VPMOVMToVec8x32 (VPCMPB256 [1] x y))
 (LessInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPB512 [1] x y))
-(LessInt16x8 x y) => (VPMOVMToVec16x8 (VPCMPW128 [1] x y))
-(LessInt16x16 x y) => (VPMOVMToVec16x16 (VPCMPW256 [1] x y))
 (LessInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [1] x y))
-(LessInt32x4 x y) => (VPMOVMToVec32x4 (VPCMPD128 [1] x y))
-(LessInt32x8 x y) => (VPMOVMToVec32x8 (VPCMPD256 [1] x y))
 (LessInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [1] x y))
-(LessInt64x2 x y) => (VPMOVMToVec64x2 (VPCMPQ128 [1] x y))
-(LessInt64x4 x y) => (VPMOVMToVec64x4 (VPCMPQ256 [1] x y))
 (LessInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [1] x y))
-(LessUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [1] x y))
-(LessUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [1] x y))
 (LessUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [1] x y))
-(LessUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [1] x y))
-(LessUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [1] x y))
 (LessUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [1] x y))
-(LessUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [1] x y))
-(LessUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [1] x y))
 (LessUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [1] x y))
-(LessUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [1] x y))
-(LessUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [1] x y))
 (LessUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [1] x y))
 (LessEqualFloat32x4 x y) => (VCMPPS128 [2] x y)
 (LessEqualFloat32x8 x y) => (VCMPPS256 [2] x y)
@@ -740,29 +700,13 @@
 (LessEqualFloat64x2 x y) => (VCMPPD128 [2] x y)
 (LessEqualFloat64x4 x y) => (VCMPPD256 [2] x y)
 (LessEqualFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [2] x y))
-(LessEqualInt8x16 x y) => (VPMOVMToVec8x16 (VPCMPB128 [2] x y))
-(LessEqualInt8x32 x y) => (VPMOVMToVec8x32 (VPCMPB256 [2] x y))
 (LessEqualInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPB512 [2] x y))
-(LessEqualInt16x8 x y) => (VPMOVMToVec16x8 (VPCMPW128 [2] x y))
-(LessEqualInt16x16 x y) => (VPMOVMToVec16x16 (VPCMPW256 [2] x y))
 (LessEqualInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [2] x y))
-(LessEqualInt32x4 x y) => (VPMOVMToVec32x4 (VPCMPD128 [2] x y))
-(LessEqualInt32x8 x y) => (VPMOVMToVec32x8 (VPCMPD256 [2] x y))
 (LessEqualInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [2] x y))
-(LessEqualInt64x2 x y) => (VPMOVMToVec64x2 (VPCMPQ128 [2] x y))
-(LessEqualInt64x4 x y) => (VPMOVMToVec64x4 (VPCMPQ256 [2] x y))
 (LessEqualInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [2] x y))
-(LessEqualUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [2] x y))
-(LessEqualUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [2] x y))
 (LessEqualUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [2] x y))
-(LessEqualUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [2] x y))
-(LessEqualUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [2] x y))
 (LessEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [2] x y))
-(LessEqualUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [2] x y))
-(LessEqualUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [2] x y))
 (LessEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [2] x y))
-(LessEqualUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [2] x y))
-(LessEqualUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [2] x y))
 (LessEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [2] x y))
 (LessEqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [2] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
 (LessEqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [2] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
@@ -1050,29 +994,13 @@
 (NotEqualFloat64x2 x y) => (VCMPPD128 [4] x y)
 (NotEqualFloat64x4 x y) => (VCMPPD256 [4] x y)
 (NotEqualFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [4] x y))
-(NotEqualInt8x16 x y) => (VPMOVMToVec8x16 (VPCMPB128 [4] x y))
-(NotEqualInt8x32 x y) => (VPMOVMToVec8x32 (VPCMPB256 [4] x y))
 (NotEqualInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPB512 [4] x y))
-(NotEqualInt16x8 x y) => (VPMOVMToVec16x8 (VPCMPW128 [4] x y))
-(NotEqualInt16x16 x y) => (VPMOVMToVec16x16 (VPCMPW256 [4] x y))
 (NotEqualInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [4] x y))
-(NotEqualInt32x4 x y) => (VPMOVMToVec32x4 (VPCMPD128 [4] x y))
-(NotEqualInt32x8 x y) => (VPMOVMToVec32x8 (VPCMPD256 [4] x y))
 (NotEqualInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [4] x y))
-(NotEqualInt64x2 x y) => (VPMOVMToVec64x2 (VPCMPQ128 [4] x y))
-(NotEqualInt64x4 x y) => (VPMOVMToVec64x4 (VPCMPQ256 [4] x y))
 (NotEqualInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [4] x y))
-(NotEqualUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [4] x y))
-(NotEqualUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [4] x y))
 (NotEqualUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [4] x y))
-(NotEqualUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [4] x y))
-(NotEqualUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [4] x y))
 (NotEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [4] x y))
-(NotEqualUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [4] x y))
-(NotEqualUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [4] x y))
 (NotEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [4] x y))
-(NotEqualUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [4] x y))
-(NotEqualUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [4] x y))
 (NotEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [4] x y))
 (NotEqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [4] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
 (NotEqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [4] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
index 5d388a4531..61abaa5e97 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
@@ -986,29 +986,13 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
 		{name: "VEXTRACTF64X4256", argLength: 1, reg: w11, asm: "VEXTRACTF64X4", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
 		{name: "VEXTRACTI128128", argLength: 1, reg: v11, asm: "VEXTRACTI128", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VEXTRACTI64X4256", argLength: 1, reg: w11, asm: "VEXTRACTI64X4", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
-		{name: "VPCMPUB128", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
-		{name: "VPCMPUB256", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
 		{name: "VPCMPUB512", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
-		{name: "VPCMPUW128", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
-		{name: "VPCMPUW256", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
 		{name: "VPCMPUW512", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
-		{name: "VPCMPUD128", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
-		{name: "VPCMPUD256", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
 		{name: "VPCMPUD512", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
-		{name: "VPCMPUQ128", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
-		{name: "VPCMPUQ256", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
 		{name: "VPCMPUQ512", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
-		{name: "VPCMPB128", argLength: 2, reg: w2k, asm: "VPCMPB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
-		{name: "VPCMPB256", argLength: 2, reg: w2k, asm: "VPCMPB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
 		{name: "VPCMPB512", argLength: 2, reg: w2k, asm: "VPCMPB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
-		{name: "VPCMPW128", argLength: 2, reg: w2k, asm: "VPCMPW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
-		{name: "VPCMPW256", argLength: 2, reg: w2k, asm: "VPCMPW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
 		{name: "VPCMPW512", argLength: 2, reg: w2k, asm: "VPCMPW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
-		{name: "VPCMPD128", argLength: 2, reg: w2k, asm: "VPCMPD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
-		{name: "VPCMPD256", argLength: 2, reg: w2k, asm: "VPCMPD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
 		{name: "VPCMPD512", argLength: 2, reg: w2k, asm: "VPCMPD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
-		{name: "VPCMPQ128", argLength: 2, reg: w2k, asm: "VPCMPQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
-		{name: "VPCMPQ256", argLength: 2, reg: w2k, asm: "VPCMPQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
 		{name: "VPCMPQ512", argLength: 2, reg: w2k, asm: "VPCMPQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
 		{name: "VPROLD128", argLength: 1, reg: w11, asm: "VPROLD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
 		{name: "VPROLD256", argLength: 1, reg: w11, asm: "VPROLD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
index f120dcddd0..4f2b1a9121 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
@@ -514,17 +514,9 @@ func simdGenericOps() []opData {
 		{name: "GreaterEqualFloat64x2", argLength: 2, commutative: false},
 		{name: "GreaterEqualFloat64x4", argLength: 2, commutative: false},
 		{name: "GreaterEqualFloat64x8", argLength: 2, commutative: false},
-		{name: "GreaterEqualInt8x16", argLength: 2, commutative: false},
-		{name: "GreaterEqualInt8x32", argLength: 2, commutative: false},
 		{name: "GreaterEqualInt8x64", argLength: 2, commutative: false},
-		{name: "GreaterEqualInt16x8", argLength: 2, commutative: false},
-		{name: "GreaterEqualInt16x16", argLength: 2, commutative: false},
 		{name: "GreaterEqualInt16x32", argLength: 2, commutative: false},
-		{name: "GreaterEqualInt32x4", argLength: 2, commutative: false},
-		{name: "GreaterEqualInt32x8", argLength: 2, commutative: false},
 		{name: "GreaterEqualInt32x16", argLength: 2, commutative: false},
-		{name: "GreaterEqualInt64x2", argLength: 2, commutative: false},
-		{name: "GreaterEqualInt64x4", argLength: 2, commutative: false},
 		{name: "GreaterEqualInt64x8", argLength: 2, commutative: false},
 		{name: "GreaterEqualMaskedFloat32x4", argLength: 3, commutative: false},
 		{name: "GreaterEqualMaskedFloat32x8", argLength: 3, commutative: false},
@@ -556,17 +548,9 @@ func simdGenericOps() []opData {
 		{name: "GreaterEqualMaskedUint64x2", argLength: 3, commutative: false},
 		{name: "GreaterEqualMaskedUint64x4", argLength: 3, commutative: false},
 		{name: "GreaterEqualMaskedUint64x8", argLength: 3, commutative: false},
-		{name: "GreaterEqualUint8x16", argLength: 2, commutative: false},
-		{name: "GreaterEqualUint8x32", argLength: 2, commutative: false},
 		{name: "GreaterEqualUint8x64", argLength: 2, commutative: false},
-		{name: "GreaterEqualUint16x8", argLength: 2, commutative: false},
-		{name: "GreaterEqualUint16x16", argLength: 2, commutative: false},
 		{name: "GreaterEqualUint16x32", argLength: 2, commutative: false},
-		{name: "GreaterEqualUint32x4", argLength: 2, commutative: false},
-		{name: "GreaterEqualUint32x8", argLength: 2, commutative: false},
 		{name: "GreaterEqualUint32x16", argLength: 2, commutative: false},
-		{name: "GreaterEqualUint64x2", argLength: 2, commutative: false},
-		{name: "GreaterEqualUint64x4", argLength: 2, commutative: false},
 		{name: "GreaterEqualUint64x8", argLength: 2, commutative: false},
 		{name: "GreaterFloat32x4", argLength: 2, commutative: false},
 		{name: "GreaterFloat32x8", argLength: 2, commutative: false},
@@ -616,17 +600,9 @@ func simdGenericOps() []opData {
 		{name: "GreaterMaskedUint64x2", argLength: 3, commutative: false},
 		{name: "GreaterMaskedUint64x4", argLength: 3, commutative: false},
 		{name: "GreaterMaskedUint64x8", argLength: 3, commutative: false},
-		{name: "GreaterUint8x16", argLength: 2, commutative: false},
-		{name: "GreaterUint8x32", argLength: 2, commutative: false},
 		{name: "GreaterUint8x64", argLength: 2, commutative: false},
-		{name: "GreaterUint16x8", argLength: 2, commutative: false},
-		{name: "GreaterUint16x16", argLength: 2, commutative: false},
 		{name: "GreaterUint16x32", argLength: 2, commutative: false},
-		{name: "GreaterUint32x4", argLength: 2, commutative: false},
-		{name: "GreaterUint32x8", argLength: 2, commutative: false},
 		{name: "GreaterUint32x16", argLength: 2, commutative: false},
-		{name: "GreaterUint64x2", argLength: 2, commutative: false},
-		{name: "GreaterUint64x4", argLength: 2, commutative: false},
 		{name: "GreaterUint64x8", argLength: 2, commutative: false},
 		{name: "IsNanFloat32x4", argLength: 2, commutative: true},
 		{name: "IsNanFloat32x8", argLength: 2, commutative: true},
@@ -646,17 +622,9 @@ func simdGenericOps() []opData {
 		{name: "LessEqualFloat64x2", argLength: 2, commutative: false},
 		{name: "LessEqualFloat64x4", argLength: 2, commutative: false},
 		{name: "LessEqualFloat64x8", argLength: 2, commutative: false},
-		{name: "LessEqualInt8x16", argLength: 2, commutative: false},
-		{name: "LessEqualInt8x32", argLength: 2, commutative: false},
 		{name: "LessEqualInt8x64", argLength: 2, commutative: false},
-		{name: "LessEqualInt16x8", argLength: 2, commutative: false},
-		{name: "LessEqualInt16x16", argLength: 2, commutative: false},
 		{name: "LessEqualInt16x32", argLength: 2, commutative: false},
-		{name: "LessEqualInt32x4", argLength: 2, commutative: false},
-		{name: "LessEqualInt32x8", argLength: 2, commutative: false},
 		{name: "LessEqualInt32x16", argLength: 2, commutative: false},
-		{name: "LessEqualInt64x2", argLength: 2, commutative: false},
-		{name: "LessEqualInt64x4", argLength: 2, commutative: false},
 		{name: "LessEqualInt64x8", argLength: 2, commutative: false},
 		{name: "LessEqualMaskedFloat32x4", argLength: 3, commutative: false},
 		{name: "LessEqualMaskedFloat32x8", argLength: 3, commutative: false},
@@ -688,17 +656,9 @@ func simdGenericOps() []opData {
 		{name: "LessEqualMaskedUint64x2", argLength: 3, commutative: false},
 		{name: "LessEqualMaskedUint64x4", argLength: 3, commutative: false},
 		{name: "LessEqualMaskedUint64x8", argLength: 3, commutative: false},
-		{name: "LessEqualUint8x16", argLength: 2, commutative: false},
-		{name: "LessEqualUint8x32", argLength: 2, commutative: false},
 		{name: "LessEqualUint8x64", argLength: 2, commutative: false},
-		{name: "LessEqualUint16x8", argLength: 2, commutative: false},
-		{name: "LessEqualUint16x16", argLength: 2, commutative: false},
 		{name: "LessEqualUint16x32", argLength: 2, commutative: false},
-		{name: "LessEqualUint32x4", argLength: 2, commutative: false},
-		{name: "LessEqualUint32x8", argLength: 2, commutative: false},
 		{name: "LessEqualUint32x16", argLength: 2, commutative: false},
-		{name: "LessEqualUint64x2", argLength: 2, commutative: false},
-		{name: "LessEqualUint64x4", argLength: 2, commutative: false},
 		{name: "LessEqualUint64x8", argLength: 2, commutative: false},
 		{name: "LessFloat32x4", argLength: 2, commutative: false},
 		{name: "LessFloat32x8", argLength: 2, commutative: false},
@@ -706,17 +666,9 @@ func simdGenericOps() []opData {
 		{name: "LessFloat64x2", argLength: 2, commutative: false},
 		{name: "LessFloat64x4", argLength: 2, commutative: false},
 		{name: "LessFloat64x8", argLength: 2, commutative: false},
-		{name: "LessInt8x16", argLength: 2, commutative: false},
-		{name: "LessInt8x32", argLength: 2, commutative: false},
 		{name: "LessInt8x64", argLength: 2, commutative: false},
-		{name: "LessInt16x8", argLength: 2, commutative: false},
-		{name: "LessInt16x16", argLength: 2, commutative: false},
 		{name: "LessInt16x32", argLength: 2, commutative: false},
-		{name: "LessInt32x4", argLength: 2, commutative: false},
-		{name: "LessInt32x8", argLength: 2, commutative: false},
 		{name: "LessInt32x16", argLength: 2, commutative: false},
-		{name: "LessInt64x2", argLength: 2, commutative: false},
-		{name: "LessInt64x4", argLength: 2, commutative: false},
 		{name: "LessInt64x8", argLength: 2, commutative: false},
 		{name: "LessMaskedFloat32x4", argLength: 3, commutative: false},
 		{name: "LessMaskedFloat32x8", argLength: 3, commutative: false},
@@ -748,17 +700,9 @@ func simdGenericOps() []opData {
 		{name: "LessMaskedUint64x2", argLength: 3, commutative: false},
 		{name: "LessMaskedUint64x4", argLength: 3, commutative: false},
 		{name: "LessMaskedUint64x8", argLength: 3, commutative: false},
-		{name: "LessUint8x16", argLength: 2, commutative: false},
-		{name: "LessUint8x32", argLength: 2, commutative: false},
 		{name: "LessUint8x64", argLength: 2, commutative: false},
-		{name: "LessUint16x8", argLength: 2, commutative: false},
-		{name: "LessUint16x16", argLength: 2, commutative: false},
 		{name: "LessUint16x32", argLength: 2, commutative: false},
-		{name: "LessUint32x4", argLength: 2, commutative: false},
-		{name: "LessUint32x8", argLength: 2, commutative: false},
 		{name: "LessUint32x16", argLength: 2, commutative: false},
-		{name: "LessUint64x2", argLength: 2, commutative: false},
-		{name: "LessUint64x4", argLength: 2, commutative: false},
 		{name: "LessUint64x8", argLength: 2, commutative: false},
 		{name: "MaxFloat32x4", argLength: 2, commutative: true},
 		{name: "MaxFloat32x8", argLength: 2, commutative: true},
@@ -986,17 +930,9 @@ func simdGenericOps() []opData {
 		{name: "NotEqualFloat64x2", argLength: 2, commutative: true},
 		{name: "NotEqualFloat64x4", argLength: 2, commutative: true},
 		{name: "NotEqualFloat64x8", argLength: 2, commutative: true},
-		{name: "NotEqualInt8x16", argLength: 2, commutative: true},
-		{name: "NotEqualInt8x32", argLength: 2, commutative: true},
 		{name: "NotEqualInt8x64", argLength: 2, commutative: true},
-		{name: "NotEqualInt16x8", argLength: 2, commutative: true},
-		{name: "NotEqualInt16x16", argLength: 2, commutative: true},
 		{name: "NotEqualInt16x32", argLength: 2, commutative: true},
-		{name: "NotEqualInt32x4", argLength: 2, commutative: true},
-		{name: "NotEqualInt32x8", argLength: 2, commutative: true},
 		{name: "NotEqualInt32x16", argLength: 2, commutative: true},
-		{name: "NotEqualInt64x2", argLength: 2, commutative: true},
-		{name: "NotEqualInt64x4", argLength: 2, commutative: true},
 		{name: "NotEqualInt64x8", argLength: 2, commutative: true},
 		{name: "NotEqualMaskedFloat32x4", argLength: 3, commutative: true},
 		{name: "NotEqualMaskedFloat32x8", argLength: 3, commutative: true},
@@ -1028,17 +964,9 @@ func simdGenericOps() []opData {
 		{name: "NotEqualMaskedUint64x2", argLength: 3, commutative: true},
 		{name: "NotEqualMaskedUint64x4", argLength: 3, commutative: true},
 		{name: "NotEqualMaskedUint64x8", argLength: 3, commutative: true},
-		{name: "NotEqualUint8x16", argLength: 2, commutative: true},
-		{name: "NotEqualUint8x32", argLength: 2, commutative: true},
 		{name: "NotEqualUint8x64", argLength: 2, commutative: true},
-		{name: "NotEqualUint16x8", argLength: 2, commutative: true},
-		{name: "NotEqualUint16x16", argLength: 2, commutative: true},
 		{name: "NotEqualUint16x32", argLength: 2, commutative: true},
-		{name: "NotEqualUint32x4", argLength: 2, commutative: true},
-		{name: "NotEqualUint32x8", argLength: 2, commutative: true},
 		{name: "NotEqualUint32x16", argLength: 2, commutative: true},
-		{name: "NotEqualUint64x2", argLength: 2, commutative: true},
-		{name: "NotEqualUint64x4", argLength: 2, commutative: true},
 		{name: "NotEqualUint64x8", argLength: 2, commutative: true},
 		{name: "OnesCountInt8x16", argLength: 1, commutative: false},
 		{name: "OnesCountInt8x32", argLength: 1, commutative: false},
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 6e0ffd1540..7bcbf1b615 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -2208,29 +2208,13 @@ const (
 	OpAMD64VEXTRACTF64X4256
 	OpAMD64VEXTRACTI128128
 	OpAMD64VEXTRACTI64X4256
-	OpAMD64VPCMPUB128
-	OpAMD64VPCMPUB256
 	OpAMD64VPCMPUB512
-	OpAMD64VPCMPUW128
-	OpAMD64VPCMPUW256
 	OpAMD64VPCMPUW512
-	OpAMD64VPCMPUD128
-	OpAMD64VPCMPUD256
 	OpAMD64VPCMPUD512
-	OpAMD64VPCMPUQ128
-	OpAMD64VPCMPUQ256
 	OpAMD64VPCMPUQ512
-	OpAMD64VPCMPB128
-	OpAMD64VPCMPB256
 	OpAMD64VPCMPB512
-	OpAMD64VPCMPW128
-	OpAMD64VPCMPW256
 	OpAMD64VPCMPW512
-	OpAMD64VPCMPD128
-	OpAMD64VPCMPD256
 	OpAMD64VPCMPD512
-	OpAMD64VPCMPQ128
-	OpAMD64VPCMPQ256
 	OpAMD64VPCMPQ512
 	OpAMD64VPROLD128
 	OpAMD64VPROLD256
@@ -5155,17 +5139,9 @@ const (
 	OpGreaterEqualFloat64x2
 	OpGreaterEqualFloat64x4
 	OpGreaterEqualFloat64x8
-	OpGreaterEqualInt8x16
-	OpGreaterEqualInt8x32
 	OpGreaterEqualInt8x64
-	OpGreaterEqualInt16x8
-	OpGreaterEqualInt16x16
 	OpGreaterEqualInt16x32
-	OpGreaterEqualInt32x4
-	OpGreaterEqualInt32x8
 	OpGreaterEqualInt32x16
-	OpGreaterEqualInt64x2
-	OpGreaterEqualInt64x4
 	OpGreaterEqualInt64x8
 	OpGreaterEqualMaskedFloat32x4
 	OpGreaterEqualMaskedFloat32x8
@@ -5197,17 +5173,9 @@ const (
 	OpGreaterEqualMaskedUint64x2
 	OpGreaterEqualMaskedUint64x4
 	OpGreaterEqualMaskedUint64x8
-	OpGreaterEqualUint8x16
-	OpGreaterEqualUint8x32
 	OpGreaterEqualUint8x64
-	OpGreaterEqualUint16x8
-	OpGreaterEqualUint16x16
 	OpGreaterEqualUint16x32
-	OpGreaterEqualUint32x4
-	OpGreaterEqualUint32x8
 	OpGreaterEqualUint32x16
-	OpGreaterEqualUint64x2
-	OpGreaterEqualUint64x4
 	OpGreaterEqualUint64x8
 	OpGreaterFloat32x4
 	OpGreaterFloat32x8
@@ -5257,17 +5225,9 @@ const (
 	OpGreaterMaskedUint64x2
 	OpGreaterMaskedUint64x4
 	OpGreaterMaskedUint64x8
-	OpGreaterUint8x16
-	OpGreaterUint8x32
 	OpGreaterUint8x64
-	OpGreaterUint16x8
-	OpGreaterUint16x16
 	OpGreaterUint16x32
-	OpGreaterUint32x4
-	OpGreaterUint32x8
 	OpGreaterUint32x16
-	OpGreaterUint64x2
-	OpGreaterUint64x4
 	OpGreaterUint64x8
 	OpIsNanFloat32x4
 	OpIsNanFloat32x8
@@ -5287,17 +5247,9 @@ const (
 	OpLessEqualFloat64x2
 	OpLessEqualFloat64x4
 	OpLessEqualFloat64x8
-	OpLessEqualInt8x16
-	OpLessEqualInt8x32
 	OpLessEqualInt8x64
-	OpLessEqualInt16x8
-	OpLessEqualInt16x16
 	OpLessEqualInt16x32
-	OpLessEqualInt32x4
-	OpLessEqualInt32x8
 	OpLessEqualInt32x16
-	OpLessEqualInt64x2
-	OpLessEqualInt64x4
 	OpLessEqualInt64x8
 	OpLessEqualMaskedFloat32x4
 	OpLessEqualMaskedFloat32x8
@@ -5329,17 +5281,9 @@ const (
 	OpLessEqualMaskedUint64x2
 	OpLessEqualMaskedUint64x4
 	OpLessEqualMaskedUint64x8
-	OpLessEqualUint8x16
-	OpLessEqualUint8x32
 	OpLessEqualUint8x64
-	OpLessEqualUint16x8
-	OpLessEqualUint16x16
 	OpLessEqualUint16x32
-	OpLessEqualUint32x4
-	OpLessEqualUint32x8
 	OpLessEqualUint32x16
-	OpLessEqualUint64x2
-	OpLessEqualUint64x4
 	OpLessEqualUint64x8
 	OpLessFloat32x4
 	OpLessFloat32x8
@@ -5347,17 +5291,9 @@ const (
 	OpLessFloat64x2
 	OpLessFloat64x4
 	OpLessFloat64x8
-	OpLessInt8x16
-	OpLessInt8x32
 	OpLessInt8x64
-	OpLessInt16x8
-	OpLessInt16x16
 	OpLessInt16x32
-	OpLessInt32x4
-	OpLessInt32x8
 	OpLessInt32x16
-	OpLessInt64x2
-	OpLessInt64x4
 	OpLessInt64x8
 	OpLessMaskedFloat32x4
 	OpLessMaskedFloat32x8
@@ -5389,17 +5325,9 @@ const (
 	OpLessMaskedUint64x2
 	OpLessMaskedUint64x4
 	OpLessMaskedUint64x8
-	OpLessUint8x16
-	OpLessUint8x32
 	OpLessUint8x64
-	OpLessUint16x8
-	OpLessUint16x16
 	OpLessUint16x32
-	OpLessUint32x4
-	OpLessUint32x8
 	OpLessUint32x16
-	OpLessUint64x2
-	OpLessUint64x4
 	OpLessUint64x8
 	OpMaxFloat32x4
 	OpMaxFloat32x8
@@ -5627,17 +5555,9 @@ const (
 	OpNotEqualFloat64x2
 	OpNotEqualFloat64x4
 	OpNotEqualFloat64x8
-	OpNotEqualInt8x16
-	OpNotEqualInt8x32
 	OpNotEqualInt8x64
-	OpNotEqualInt16x8
-	OpNotEqualInt16x16
 	OpNotEqualInt16x32
-	OpNotEqualInt32x4
-	OpNotEqualInt32x8
 	OpNotEqualInt32x16
-	OpNotEqualInt64x2
-	OpNotEqualInt64x4
 	OpNotEqualInt64x8
 	OpNotEqualMaskedFloat32x4
 	OpNotEqualMaskedFloat32x8
@@ -5669,17 +5589,9 @@ const (
 	OpNotEqualMaskedUint64x2
 	OpNotEqualMaskedUint64x4
 	OpNotEqualMaskedUint64x8
-	OpNotEqualUint8x16
-	OpNotEqualUint8x32
 	OpNotEqualUint8x64
-	OpNotEqualUint16x8
-	OpNotEqualUint16x16
 	OpNotEqualUint16x32
-	OpNotEqualUint32x4
-	OpNotEqualUint32x8
 	OpNotEqualUint32x16
-	OpNotEqualUint64x2
-	OpNotEqualUint64x4
 	OpNotEqualUint64x8
 	OpOnesCountInt8x16
 	OpOnesCountInt8x32
@@ -34328,36 +34240,6 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
-	{
-		name:    "VPCMPUB128",
-		auxType: auxUInt8,
-		argLen:  2,
-		asm:     x86.AVPCMPUB,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-			outputs: []outputInfo{
-				{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-			},
-		},
-	},
-	{
-		name:    "VPCMPUB256",
-		auxType: auxUInt8,
-		argLen:  2,
-		asm:     x86.AVPCMPUB,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-			outputs: []outputInfo{
-				{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-			},
-		},
-	},
 	{
 		name:    "VPCMPUB512",
 		auxType: auxUInt8,
@@ -34373,36 +34255,6 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
-	{
-		name:    "VPCMPUW128",
-		auxType: auxUInt8,
-		argLen:  2,
-		asm:     x86.AVPCMPUW,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-			outputs: []outputInfo{
-				{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-			},
-		},
-	},
-	{
-		name:    "VPCMPUW256",
-		auxType: auxUInt8,
-		argLen:  2,
-		asm:     x86.AVPCMPUW,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-			outputs: []outputInfo{
-				{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-			},
-		},
-	},
 	{
 		name:    "VPCMPUW512",
 		auxType: auxUInt8,
@@ -34418,36 +34270,6 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
-	{
-		name:    "VPCMPUD128",
-		auxType: auxUInt8,
-		argLen:  2,
-		asm:     x86.AVPCMPUD,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-			outputs: []outputInfo{
-				{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-			},
-		},
-	},
-	{
-		name:    "VPCMPUD256",
-		auxType: auxUInt8,
-		argLen:  2,
-		asm:     x86.AVPCMPUD,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-			outputs: []outputInfo{
-				{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-			},
-		},
-	},
 	{
 		name:    "VPCMPUD512",
 		auxType: auxUInt8,
@@ -34463,36 +34285,6 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
-	{
-		name:    "VPCMPUQ128",
-		auxType: auxUInt8,
-		argLen:  2,
-		asm:     x86.AVPCMPUQ,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-			outputs: []outputInfo{
-				{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-			},
-		},
-	},
-	{
-		name:    "VPCMPUQ256",
-		auxType: auxUInt8,
-		argLen:  2,
-		asm:     x86.AVPCMPUQ,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-			outputs: []outputInfo{
-				{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-			},
-		},
-	},
 	{
 		name:    "VPCMPUQ512",
 		auxType: auxUInt8,
@@ -34508,36 +34300,6 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
-	{
-		name:    "VPCMPB128",
-		auxType: auxUInt8,
-		argLen:  2,
-		asm:     x86.AVPCMPB,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-			outputs: []outputInfo{
-				{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-			},
-		},
-	},
-	{
-		name:    "VPCMPB256",
-		auxType: auxUInt8,
-		argLen:  2,
-		asm:     x86.AVPCMPB,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-			outputs: []outputInfo{
-				{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-			},
-		},
-	},
 	{
 		name:    "VPCMPB512",
 		auxType: auxUInt8,
@@ -34553,36 +34315,6 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
-	{
-		name:    "VPCMPW128",
-		auxType: auxUInt8,
-		argLen:  2,
-		asm:     x86.AVPCMPW,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-			outputs: []outputInfo{
-				{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-			},
-		},
-	},
-	{
-		name:    "VPCMPW256",
-		auxType: auxUInt8,
-		argLen:  2,
-		asm:     x86.AVPCMPW,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-			outputs: []outputInfo{
-				{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-			},
-		},
-	},
 	{
 		name:    "VPCMPW512",
 		auxType: auxUInt8,
@@ -34598,36 +34330,6 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
-	{
-		name:    "VPCMPD128",
-		auxType: auxUInt8,
-		argLen:  2,
-		asm:     x86.AVPCMPD,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-			outputs: []outputInfo{
-				{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-			},
-		},
-	},
-	{
-		name:    "VPCMPD256",
-		auxType: auxUInt8,
-		argLen:  2,
-		asm:     x86.AVPCMPD,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-			outputs: []outputInfo{
-				{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-			},
-		},
-	},
 	{
 		name:    "VPCMPD512",
 		auxType: auxUInt8,
@@ -34643,36 +34345,6 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
-	{
-		name:    "VPCMPQ128",
-		auxType: auxUInt8,
-		argLen:  2,
-		asm:     x86.AVPCMPQ,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-			outputs: []outputInfo{
-				{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-			},
-		},
-	},
-	{
-		name:    "VPCMPQ256",
-		auxType: auxUInt8,
-		argLen:  2,
-		asm:     x86.AVPCMPQ,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-				{1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-			},
-			outputs: []outputInfo{
-				{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-			},
-		},
-	},
 	{
 		name:    "VPCMPQ512",
 		auxType: auxUInt8,
@@ -66750,61 +66422,21 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "GreaterEqualInt8x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualInt8x32",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "GreaterEqualInt8x64",
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "GreaterEqualInt16x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualInt16x16",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "GreaterEqualInt16x32",
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "GreaterEqualInt32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualInt32x8",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "GreaterEqualInt32x16",
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "GreaterEqualInt64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualInt64x4",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "GreaterEqualInt64x8",
 		argLen:  2,
@@ -66960,61 +66592,21 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "GreaterEqualUint8x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualUint8x32",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "GreaterEqualUint8x64",
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "GreaterEqualUint16x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualUint16x16",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "GreaterEqualUint16x32",
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "GreaterEqualUint32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualUint32x8",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "GreaterEqualUint32x16",
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "GreaterEqualUint64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "GreaterEqualUint64x4",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "GreaterEqualUint64x8",
 		argLen:  2,
@@ -67260,61 +66852,21 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "GreaterUint8x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "GreaterUint8x32",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "GreaterUint8x64",
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "GreaterUint16x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "GreaterUint16x16",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "GreaterUint16x32",
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "GreaterUint32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "GreaterUint32x8",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "GreaterUint32x16",
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "GreaterUint64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "GreaterUint64x4",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "GreaterUint64x8",
 		argLen:  2,
@@ -67422,61 +66974,21 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "LessEqualInt8x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "LessEqualInt8x32",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "LessEqualInt8x64",
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "LessEqualInt16x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "LessEqualInt16x16",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "LessEqualInt16x32",
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "LessEqualInt32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "LessEqualInt32x8",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "LessEqualInt32x16",
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "LessEqualInt64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "LessEqualInt64x4",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "LessEqualInt64x8",
 		argLen:  2,
@@ -67632,61 +67144,21 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "LessEqualUint8x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "LessEqualUint8x32",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "LessEqualUint8x64",
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "LessEqualUint16x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "LessEqualUint16x16",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "LessEqualUint16x32",
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "LessEqualUint32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "LessEqualUint32x8",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "LessEqualUint32x16",
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "LessEqualUint64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "LessEqualUint64x4",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "LessEqualUint64x8",
 		argLen:  2,
@@ -67722,61 +67194,21 @@ var opcodeTable = [...]opInfo{
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "LessInt8x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "LessInt8x32",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "LessInt8x64",
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "LessInt16x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "LessInt16x16",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "LessInt16x32",
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "LessInt32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "LessInt32x8",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "LessInt32x16",
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "LessInt64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "LessInt64x4",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "LessInt64x8",
 		argLen:  2,
@@ -67932,61 +67364,21 @@ var opcodeTable = [...]opInfo{
 		argLen:  3,
 		generic: true,
 	},
-	{
-		name:    "LessUint8x16",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "LessUint8x32",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "LessUint8x64",
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "LessUint16x8",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "LessUint16x16",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "LessUint16x32",
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "LessUint32x4",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "LessUint32x8",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "LessUint32x16",
 		argLen:  2,
 		generic: true,
 	},
-	{
-		name:    "LessUint64x2",
-		argLen:  2,
-		generic: true,
-	},
-	{
-		name:    "LessUint64x4",
-		argLen:  2,
-		generic: true,
-	},
 	{
 		name:    "LessUint64x8",
 		argLen:  2,
@@ -69312,72 +68704,24 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "NotEqualInt8x16",
-		argLen:      2,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualInt8x32",
-		argLen:      2,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:        "NotEqualInt8x64",
 		argLen:      2,
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "NotEqualInt16x8",
-		argLen:      2,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualInt16x16",
-		argLen:      2,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:        "NotEqualInt16x32",
 		argLen:      2,
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "NotEqualInt32x4",
-		argLen:      2,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualInt32x8",
-		argLen:      2,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:        "NotEqualInt32x16",
 		argLen:      2,
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "NotEqualInt64x2",
-		argLen:      2,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualInt64x4",
-		argLen:      2,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:        "NotEqualInt64x8",
 		argLen:      2,
@@ -69564,72 +68908,24 @@ var opcodeTable = [...]opInfo{
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "NotEqualUint8x16",
-		argLen:      2,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualUint8x32",
-		argLen:      2,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:        "NotEqualUint8x64",
 		argLen:      2,
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "NotEqualUint16x8",
-		argLen:      2,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualUint16x16",
-		argLen:      2,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:        "NotEqualUint16x32",
 		argLen:      2,
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "NotEqualUint32x4",
-		argLen:      2,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualUint32x8",
-		argLen:      2,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:        "NotEqualUint32x16",
 		argLen:      2,
 		commutative: true,
 		generic:     true,
 	},
-	{
-		name:        "NotEqualUint64x2",
-		argLen:      2,
-		commutative: true,
-		generic:     true,
-	},
-	{
-		name:        "NotEqualUint64x4",
-		argLen:      2,
-		commutative: true,
-		generic:     true,
-	},
 	{
 		name:        "NotEqualUint64x8",
 		argLen:      2,
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index 0bdc0e63b7..0e2e2311f0 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -2304,28 +2304,12 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpGreaterEqualFloat64x4(v)
 	case OpGreaterEqualFloat64x8:
 		return rewriteValueAMD64_OpGreaterEqualFloat64x8(v)
-	case OpGreaterEqualInt16x16:
-		return rewriteValueAMD64_OpGreaterEqualInt16x16(v)
 	case OpGreaterEqualInt16x32:
 		return rewriteValueAMD64_OpGreaterEqualInt16x32(v)
-	case OpGreaterEqualInt16x8:
-		return rewriteValueAMD64_OpGreaterEqualInt16x8(v)
 	case OpGreaterEqualInt32x16:
 		return rewriteValueAMD64_OpGreaterEqualInt32x16(v)
-	case OpGreaterEqualInt32x4:
-		return rewriteValueAMD64_OpGreaterEqualInt32x4(v)
-	case OpGreaterEqualInt32x8:
-		return rewriteValueAMD64_OpGreaterEqualInt32x8(v)
-	case OpGreaterEqualInt64x2:
-		return rewriteValueAMD64_OpGreaterEqualInt64x2(v)
-	case OpGreaterEqualInt64x4:
-		return rewriteValueAMD64_OpGreaterEqualInt64x4(v)
 	case OpGreaterEqualInt64x8:
 		return rewriteValueAMD64_OpGreaterEqualInt64x8(v)
-	case OpGreaterEqualInt8x16:
-		return rewriteValueAMD64_OpGreaterEqualInt8x16(v)
-	case OpGreaterEqualInt8x32:
-		return rewriteValueAMD64_OpGreaterEqualInt8x32(v)
 	case OpGreaterEqualInt8x64:
 		return rewriteValueAMD64_OpGreaterEqualInt8x64(v)
 	case OpGreaterEqualMaskedFloat32x16:
@@ -2388,28 +2372,12 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpGreaterEqualMaskedUint8x32(v)
 	case OpGreaterEqualMaskedUint8x64:
 		return rewriteValueAMD64_OpGreaterEqualMaskedUint8x64(v)
-	case OpGreaterEqualUint16x16:
-		return rewriteValueAMD64_OpGreaterEqualUint16x16(v)
 	case OpGreaterEqualUint16x32:
 		return rewriteValueAMD64_OpGreaterEqualUint16x32(v)
-	case OpGreaterEqualUint16x8:
-		return rewriteValueAMD64_OpGreaterEqualUint16x8(v)
 	case OpGreaterEqualUint32x16:
 		return rewriteValueAMD64_OpGreaterEqualUint32x16(v)
-	case OpGreaterEqualUint32x4:
-		return rewriteValueAMD64_OpGreaterEqualUint32x4(v)
-	case OpGreaterEqualUint32x8:
-		return rewriteValueAMD64_OpGreaterEqualUint32x8(v)
-	case OpGreaterEqualUint64x2:
-		return rewriteValueAMD64_OpGreaterEqualUint64x2(v)
-	case OpGreaterEqualUint64x4:
-		return rewriteValueAMD64_OpGreaterEqualUint64x4(v)
 	case OpGreaterEqualUint64x8:
 		return rewriteValueAMD64_OpGreaterEqualUint64x8(v)
-	case OpGreaterEqualUint8x16:
-		return rewriteValueAMD64_OpGreaterEqualUint8x16(v)
-	case OpGreaterEqualUint8x32:
-		return rewriteValueAMD64_OpGreaterEqualUint8x32(v)
 	case OpGreaterEqualUint8x64:
 		return rewriteValueAMD64_OpGreaterEqualUint8x64(v)
 	case OpGreaterFloat32x16:
@@ -2516,28 +2484,12 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpGreaterMaskedUint8x32(v)
 	case OpGreaterMaskedUint8x64:
 		return rewriteValueAMD64_OpGreaterMaskedUint8x64(v)
-	case OpGreaterUint16x16:
-		return rewriteValueAMD64_OpGreaterUint16x16(v)
 	case OpGreaterUint16x32:
 		return rewriteValueAMD64_OpGreaterUint16x32(v)
-	case OpGreaterUint16x8:
-		return rewriteValueAMD64_OpGreaterUint16x8(v)
 	case OpGreaterUint32x16:
 		return rewriteValueAMD64_OpGreaterUint32x16(v)
-	case OpGreaterUint32x4:
-		return rewriteValueAMD64_OpGreaterUint32x4(v)
-	case OpGreaterUint32x8:
-		return rewriteValueAMD64_OpGreaterUint32x8(v)
-	case OpGreaterUint64x2:
-		return rewriteValueAMD64_OpGreaterUint64x2(v)
-	case OpGreaterUint64x4:
-		return rewriteValueAMD64_OpGreaterUint64x4(v)
 	case OpGreaterUint64x8:
 		return rewriteValueAMD64_OpGreaterUint64x8(v)
-	case OpGreaterUint8x16:
-		return rewriteValueAMD64_OpGreaterUint8x16(v)
-	case OpGreaterUint8x32:
-		return rewriteValueAMD64_OpGreaterUint8x32(v)
 	case OpGreaterUint8x64:
 		return rewriteValueAMD64_OpGreaterUint8x64(v)
 	case OpHasCPUFeature:
@@ -2639,28 +2591,12 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpLessEqualFloat64x4(v)
 	case OpLessEqualFloat64x8:
 		return rewriteValueAMD64_OpLessEqualFloat64x8(v)
-	case OpLessEqualInt16x16:
-		return rewriteValueAMD64_OpLessEqualInt16x16(v)
 	case OpLessEqualInt16x32:
 		return rewriteValueAMD64_OpLessEqualInt16x32(v)
-	case OpLessEqualInt16x8:
-		return rewriteValueAMD64_OpLessEqualInt16x8(v)
 	case OpLessEqualInt32x16:
 		return rewriteValueAMD64_OpLessEqualInt32x16(v)
-	case OpLessEqualInt32x4:
-		return rewriteValueAMD64_OpLessEqualInt32x4(v)
-	case OpLessEqualInt32x8:
-		return rewriteValueAMD64_OpLessEqualInt32x8(v)
-	case OpLessEqualInt64x2:
-		return rewriteValueAMD64_OpLessEqualInt64x2(v)
-	case OpLessEqualInt64x4:
-		return rewriteValueAMD64_OpLessEqualInt64x4(v)
 	case OpLessEqualInt64x8:
 		return rewriteValueAMD64_OpLessEqualInt64x8(v)
-	case OpLessEqualInt8x16:
-		return rewriteValueAMD64_OpLessEqualInt8x16(v)
-	case OpLessEqualInt8x32:
-		return rewriteValueAMD64_OpLessEqualInt8x32(v)
 	case OpLessEqualInt8x64:
 		return rewriteValueAMD64_OpLessEqualInt8x64(v)
 	case OpLessEqualMaskedFloat32x16:
@@ -2723,28 +2659,12 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpLessEqualMaskedUint8x32(v)
 	case OpLessEqualMaskedUint8x64:
 		return rewriteValueAMD64_OpLessEqualMaskedUint8x64(v)
-	case OpLessEqualUint16x16:
-		return rewriteValueAMD64_OpLessEqualUint16x16(v)
 	case OpLessEqualUint16x32:
 		return rewriteValueAMD64_OpLessEqualUint16x32(v)
-	case OpLessEqualUint16x8:
-		return rewriteValueAMD64_OpLessEqualUint16x8(v)
 	case OpLessEqualUint32x16:
 		return rewriteValueAMD64_OpLessEqualUint32x16(v)
-	case OpLessEqualUint32x4:
-		return rewriteValueAMD64_OpLessEqualUint32x4(v)
-	case OpLessEqualUint32x8:
-		return rewriteValueAMD64_OpLessEqualUint32x8(v)
-	case OpLessEqualUint64x2:
-		return rewriteValueAMD64_OpLessEqualUint64x2(v)
-	case OpLessEqualUint64x4:
-		return rewriteValueAMD64_OpLessEqualUint64x4(v)
 	case OpLessEqualUint64x8:
 		return rewriteValueAMD64_OpLessEqualUint64x8(v)
-	case OpLessEqualUint8x16:
-		return rewriteValueAMD64_OpLessEqualUint8x16(v)
-	case OpLessEqualUint8x32:
-		return rewriteValueAMD64_OpLessEqualUint8x32(v)
 	case OpLessEqualUint8x64:
 		return rewriteValueAMD64_OpLessEqualUint8x64(v)
 	case OpLessFloat32x16:
@@ -2759,28 +2679,12 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpLessFloat64x4(v)
 	case OpLessFloat64x8:
 		return rewriteValueAMD64_OpLessFloat64x8(v)
-	case OpLessInt16x16:
-		return rewriteValueAMD64_OpLessInt16x16(v)
 	case OpLessInt16x32:
 		return rewriteValueAMD64_OpLessInt16x32(v)
-	case OpLessInt16x8:
-		return rewriteValueAMD64_OpLessInt16x8(v)
 	case OpLessInt32x16:
 		return rewriteValueAMD64_OpLessInt32x16(v)
-	case OpLessInt32x4:
-		return rewriteValueAMD64_OpLessInt32x4(v)
-	case OpLessInt32x8:
-		return rewriteValueAMD64_OpLessInt32x8(v)
-	case OpLessInt64x2:
-		return rewriteValueAMD64_OpLessInt64x2(v)
-	case OpLessInt64x4:
-		return rewriteValueAMD64_OpLessInt64x4(v)
 	case OpLessInt64x8:
 		return rewriteValueAMD64_OpLessInt64x8(v)
-	case OpLessInt8x16:
-		return rewriteValueAMD64_OpLessInt8x16(v)
-	case OpLessInt8x32:
-		return rewriteValueAMD64_OpLessInt8x32(v)
 	case OpLessInt8x64:
 		return rewriteValueAMD64_OpLessInt8x64(v)
 	case OpLessMaskedFloat32x16:
@@ -2843,28 +2747,12 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpLessMaskedUint8x32(v)
 	case OpLessMaskedUint8x64:
 		return rewriteValueAMD64_OpLessMaskedUint8x64(v)
-	case OpLessUint16x16:
-		return rewriteValueAMD64_OpLessUint16x16(v)
 	case OpLessUint16x32:
 		return rewriteValueAMD64_OpLessUint16x32(v)
-	case OpLessUint16x8:
-		return rewriteValueAMD64_OpLessUint16x8(v)
 	case OpLessUint32x16:
 		return rewriteValueAMD64_OpLessUint32x16(v)
-	case OpLessUint32x4:
-		return rewriteValueAMD64_OpLessUint32x4(v)
-	case OpLessUint32x8:
-		return rewriteValueAMD64_OpLessUint32x8(v)
-	case OpLessUint64x2:
-		return rewriteValueAMD64_OpLessUint64x2(v)
-	case OpLessUint64x4:
-		return rewriteValueAMD64_OpLessUint64x4(v)
 	case OpLessUint64x8:
 		return rewriteValueAMD64_OpLessUint64x8(v)
-	case OpLessUint8x16:
-		return rewriteValueAMD64_OpLessUint8x16(v)
-	case OpLessUint8x32:
-		return rewriteValueAMD64_OpLessUint8x32(v)
 	case OpLessUint8x64:
 		return rewriteValueAMD64_OpLessUint8x64(v)
 	case OpLoad:
@@ -3583,28 +3471,12 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpNotEqualFloat64x4(v)
 	case OpNotEqualFloat64x8:
 		return rewriteValueAMD64_OpNotEqualFloat64x8(v)
-	case OpNotEqualInt16x16:
-		return rewriteValueAMD64_OpNotEqualInt16x16(v)
 	case OpNotEqualInt16x32:
 		return rewriteValueAMD64_OpNotEqualInt16x32(v)
-	case OpNotEqualInt16x8:
-		return rewriteValueAMD64_OpNotEqualInt16x8(v)
 	case OpNotEqualInt32x16:
 		return rewriteValueAMD64_OpNotEqualInt32x16(v)
-	case OpNotEqualInt32x4:
-		return rewriteValueAMD64_OpNotEqualInt32x4(v)
-	case OpNotEqualInt32x8:
-		return rewriteValueAMD64_OpNotEqualInt32x8(v)
-	case OpNotEqualInt64x2:
-		return rewriteValueAMD64_OpNotEqualInt64x2(v)
-	case OpNotEqualInt64x4:
-		return rewriteValueAMD64_OpNotEqualInt64x4(v)
 	case OpNotEqualInt64x8:
 		return rewriteValueAMD64_OpNotEqualInt64x8(v)
-	case OpNotEqualInt8x16:
-		return rewriteValueAMD64_OpNotEqualInt8x16(v)
-	case OpNotEqualInt8x32:
-		return rewriteValueAMD64_OpNotEqualInt8x32(v)
 	case OpNotEqualInt8x64:
 		return rewriteValueAMD64_OpNotEqualInt8x64(v)
 	case OpNotEqualMaskedFloat32x16:
@@ -3667,28 +3539,12 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpNotEqualMaskedUint8x32(v)
 	case OpNotEqualMaskedUint8x64:
 		return rewriteValueAMD64_OpNotEqualMaskedUint8x64(v)
-	case OpNotEqualUint16x16:
-		return rewriteValueAMD64_OpNotEqualUint16x16(v)
 	case OpNotEqualUint16x32:
 		return rewriteValueAMD64_OpNotEqualUint16x32(v)
-	case OpNotEqualUint16x8:
-		return rewriteValueAMD64_OpNotEqualUint16x8(v)
 	case OpNotEqualUint32x16:
 		return rewriteValueAMD64_OpNotEqualUint32x16(v)
-	case OpNotEqualUint32x4:
-		return rewriteValueAMD64_OpNotEqualUint32x4(v)
-	case OpNotEqualUint32x8:
-		return rewriteValueAMD64_OpNotEqualUint32x8(v)
-	case OpNotEqualUint64x2:
-		return rewriteValueAMD64_OpNotEqualUint64x2(v)
-	case OpNotEqualUint64x4:
-		return rewriteValueAMD64_OpNotEqualUint64x4(v)
 	case OpNotEqualUint64x8:
 		return rewriteValueAMD64_OpNotEqualUint64x8(v)
-	case OpNotEqualUint8x16:
-		return rewriteValueAMD64_OpNotEqualUint8x16(v)
-	case OpNotEqualUint8x32:
-		return rewriteValueAMD64_OpNotEqualUint8x32(v)
 	case OpNotEqualUint8x64:
 		return rewriteValueAMD64_OpNotEqualUint8x64(v)
 	case OpOffPtr:
@@ -37872,24 +37728,6 @@ func rewriteValueAMD64_OpGreaterEqualFloat64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpGreaterEqualInt16x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualInt16x16 x y)
-	// result: (VPMOVMToVec16x16 (VPCMPW256 [13] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpGreaterEqualInt16x32(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -37908,24 +37746,6 @@ func rewriteValueAMD64_OpGreaterEqualInt16x32(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpGreaterEqualInt16x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualInt16x8 x y)
-	// result: (VPMOVMToVec16x8 (VPCMPW128 [13] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpGreaterEqualInt32x16(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -37944,78 +37764,6 @@ func rewriteValueAMD64_OpGreaterEqualInt32x16(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpGreaterEqualInt32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualInt32x4 x y)
-	// result: (VPMOVMToVec32x4 (VPCMPD128 [13] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualInt32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualInt32x8 x y)
-	// result: (VPMOVMToVec32x8 (VPCMPD256 [13] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualInt64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualInt64x2 x y)
-	// result: (VPMOVMToVec64x2 (VPCMPQ128 [13] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualInt64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualInt64x4 x y)
-	// result: (VPMOVMToVec64x4 (VPCMPQ256 [13] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpGreaterEqualInt64x8(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -38034,42 +37782,6 @@ func rewriteValueAMD64_OpGreaterEqualInt64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpGreaterEqualInt8x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualInt8x16 x y)
-	// result: (VPMOVMToVec8x16 (VPCMPB128 [13] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualInt8x32(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualInt8x32 x y)
-	// result: (VPMOVMToVec8x32 (VPCMPB256 [13] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpGreaterEqualInt8x64(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -38748,24 +38460,6 @@ func rewriteValueAMD64_OpGreaterEqualMaskedUint8x64(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpGreaterEqualUint16x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualUint16x16 x y)
-	// result: (VPMOVMToVec16x16 (VPCMPUW256 [13] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpGreaterEqualUint16x32(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -38784,24 +38478,6 @@ func rewriteValueAMD64_OpGreaterEqualUint16x32(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpGreaterEqualUint16x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualUint16x8 x y)
-	// result: (VPMOVMToVec16x8 (VPCMPUW128 [13] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpGreaterEqualUint32x16(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -38820,78 +38496,6 @@ func rewriteValueAMD64_OpGreaterEqualUint32x16(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpGreaterEqualUint32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualUint32x4 x y)
-	// result: (VPMOVMToVec32x4 (VPCMPUD128 [13] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualUint32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualUint32x8 x y)
-	// result: (VPMOVMToVec32x8 (VPCMPUD256 [13] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualUint64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualUint64x2 x y)
-	// result: (VPMOVMToVec64x2 (VPCMPUQ128 [13] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualUint64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualUint64x4 x y)
-	// result: (VPMOVMToVec64x4 (VPCMPUQ256 [13] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpGreaterEqualUint64x8(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -38910,42 +38514,6 @@ func rewriteValueAMD64_OpGreaterEqualUint64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpGreaterEqualUint8x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualUint8x16 x y)
-	// result: (VPMOVMToVec8x16 (VPCMPUB128 [13] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterEqualUint8x32(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterEqualUint8x32 x y)
-	// result: (VPMOVMToVec8x32 (VPCMPUB256 [13] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(13)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpGreaterEqualUint8x64(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -39784,24 +39352,6 @@ func rewriteValueAMD64_OpGreaterMaskedUint8x64(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpGreaterUint16x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterUint16x16 x y)
-	// result: (VPMOVMToVec16x16 (VPCMPUW256 [14] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpGreaterUint16x32(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -39820,24 +39370,6 @@ func rewriteValueAMD64_OpGreaterUint16x32(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpGreaterUint16x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterUint16x8 x y)
-	// result: (VPMOVMToVec16x8 (VPCMPUW128 [14] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpGreaterUint32x16(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -39856,78 +39388,6 @@ func rewriteValueAMD64_OpGreaterUint32x16(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpGreaterUint32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterUint32x4 x y)
-	// result: (VPMOVMToVec32x4 (VPCMPUD128 [14] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterUint32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterUint32x8 x y)
-	// result: (VPMOVMToVec32x8 (VPCMPUD256 [14] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterUint64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterUint64x2 x y)
-	// result: (VPMOVMToVec64x2 (VPCMPUQ128 [14] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterUint64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterUint64x4 x y)
-	// result: (VPMOVMToVec64x4 (VPCMPUQ256 [14] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpGreaterUint64x8(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -39946,42 +39406,6 @@ func rewriteValueAMD64_OpGreaterUint64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpGreaterUint8x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterUint8x16 x y)
-	// result: (VPMOVMToVec8x16 (VPCMPUB128 [14] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpGreaterUint8x32(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (GreaterUint8x32 x y)
-	// result: (VPMOVMToVec8x32 (VPCMPUB256 [14] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(14)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpGreaterUint8x64(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -40699,24 +40123,6 @@ func rewriteValueAMD64_OpLessEqualFloat64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpLessEqualInt16x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualInt16x16 x y)
-	// result: (VPMOVMToVec16x16 (VPCMPW256 [2] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpLessEqualInt16x32(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -40735,24 +40141,6 @@ func rewriteValueAMD64_OpLessEqualInt16x32(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpLessEqualInt16x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualInt16x8 x y)
-	// result: (VPMOVMToVec16x8 (VPCMPW128 [2] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpLessEqualInt32x16(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -40771,78 +40159,6 @@ func rewriteValueAMD64_OpLessEqualInt32x16(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpLessEqualInt32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualInt32x4 x y)
-	// result: (VPMOVMToVec32x4 (VPCMPD128 [2] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualInt32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualInt32x8 x y)
-	// result: (VPMOVMToVec32x8 (VPCMPD256 [2] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualInt64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualInt64x2 x y)
-	// result: (VPMOVMToVec64x2 (VPCMPQ128 [2] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualInt64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualInt64x4 x y)
-	// result: (VPMOVMToVec64x4 (VPCMPQ256 [2] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpLessEqualInt64x8(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -40861,42 +40177,6 @@ func rewriteValueAMD64_OpLessEqualInt64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpLessEqualInt8x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualInt8x16 x y)
-	// result: (VPMOVMToVec8x16 (VPCMPB128 [2] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualInt8x32(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualInt8x32 x y)
-	// result: (VPMOVMToVec8x32 (VPCMPB256 [2] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpLessEqualInt8x64(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -41575,24 +40855,6 @@ func rewriteValueAMD64_OpLessEqualMaskedUint8x64(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpLessEqualUint16x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualUint16x16 x y)
-	// result: (VPMOVMToVec16x16 (VPCMPUW256 [2] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpLessEqualUint16x32(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -41611,24 +40873,6 @@ func rewriteValueAMD64_OpLessEqualUint16x32(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpLessEqualUint16x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualUint16x8 x y)
-	// result: (VPMOVMToVec16x8 (VPCMPUW128 [2] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpLessEqualUint32x16(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -41647,78 +40891,6 @@ func rewriteValueAMD64_OpLessEqualUint32x16(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpLessEqualUint32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualUint32x4 x y)
-	// result: (VPMOVMToVec32x4 (VPCMPUD128 [2] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualUint32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualUint32x8 x y)
-	// result: (VPMOVMToVec32x8 (VPCMPUD256 [2] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualUint64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualUint64x2 x y)
-	// result: (VPMOVMToVec64x2 (VPCMPUQ128 [2] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualUint64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualUint64x4 x y)
-	// result: (VPMOVMToVec64x4 (VPCMPUQ256 [2] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpLessEqualUint64x8(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -41737,42 +40909,6 @@ func rewriteValueAMD64_OpLessEqualUint64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpLessEqualUint8x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualUint8x16 x y)
-	// result: (VPMOVMToVec8x16 (VPCMPUB128 [2] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessEqualUint8x32(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessEqualUint8x32 x y)
-	// result: (VPMOVMToVec8x32 (VPCMPUB256 [2] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(2)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpLessEqualUint8x64(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -41883,24 +41019,6 @@ func rewriteValueAMD64_OpLessFloat64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpLessInt16x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessInt16x16 x y)
-	// result: (VPMOVMToVec16x16 (VPCMPW256 [1] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpLessInt16x32(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -41919,24 +41037,6 @@ func rewriteValueAMD64_OpLessInt16x32(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpLessInt16x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessInt16x8 x y)
-	// result: (VPMOVMToVec16x8 (VPCMPW128 [1] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpLessInt32x16(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -41955,78 +41055,6 @@ func rewriteValueAMD64_OpLessInt32x16(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpLessInt32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessInt32x4 x y)
-	// result: (VPMOVMToVec32x4 (VPCMPD128 [1] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessInt32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessInt32x8 x y)
-	// result: (VPMOVMToVec32x8 (VPCMPD256 [1] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessInt64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessInt64x2 x y)
-	// result: (VPMOVMToVec64x2 (VPCMPQ128 [1] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessInt64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessInt64x4 x y)
-	// result: (VPMOVMToVec64x4 (VPCMPQ256 [1] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpLessInt64x8(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -42045,42 +41073,6 @@ func rewriteValueAMD64_OpLessInt64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpLessInt8x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessInt8x16 x y)
-	// result: (VPMOVMToVec8x16 (VPCMPB128 [1] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessInt8x32(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessInt8x32 x y)
-	// result: (VPMOVMToVec8x32 (VPCMPB256 [1] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpLessInt8x64(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -42759,24 +41751,6 @@ func rewriteValueAMD64_OpLessMaskedUint8x64(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpLessUint16x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessUint16x16 x y)
-	// result: (VPMOVMToVec16x16 (VPCMPUW256 [1] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpLessUint16x32(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -42795,24 +41769,6 @@ func rewriteValueAMD64_OpLessUint16x32(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpLessUint16x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessUint16x8 x y)
-	// result: (VPMOVMToVec16x8 (VPCMPUW128 [1] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpLessUint32x16(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -42831,78 +41787,6 @@ func rewriteValueAMD64_OpLessUint32x16(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpLessUint32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessUint32x4 x y)
-	// result: (VPMOVMToVec32x4 (VPCMPUD128 [1] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessUint32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessUint32x8 x y)
-	// result: (VPMOVMToVec32x8 (VPCMPUD256 [1] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessUint64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessUint64x2 x y)
-	// result: (VPMOVMToVec64x2 (VPCMPUQ128 [1] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessUint64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessUint64x4 x y)
-	// result: (VPMOVMToVec64x4 (VPCMPUQ256 [1] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpLessUint64x8(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -42921,42 +41805,6 @@ func rewriteValueAMD64_OpLessUint64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpLessUint8x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessUint8x16 x y)
-	// result: (VPMOVMToVec8x16 (VPCMPUB128 [1] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpLessUint8x32(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (LessUint8x32 x y)
-	// result: (VPMOVMToVec8x32 (VPCMPUB256 [1] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(1)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpLessUint8x64(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -47070,24 +45918,6 @@ func rewriteValueAMD64_OpNotEqualFloat64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpNotEqualInt16x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualInt16x16 x y)
-	// result: (VPMOVMToVec16x16 (VPCMPW256 [4] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpNotEqualInt16x32(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -47106,24 +45936,6 @@ func rewriteValueAMD64_OpNotEqualInt16x32(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpNotEqualInt16x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualInt16x8 x y)
-	// result: (VPMOVMToVec16x8 (VPCMPW128 [4] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpNotEqualInt32x16(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -47142,78 +45954,6 @@ func rewriteValueAMD64_OpNotEqualInt32x16(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpNotEqualInt32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualInt32x4 x y)
-	// result: (VPMOVMToVec32x4 (VPCMPD128 [4] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualInt32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualInt32x8 x y)
-	// result: (VPMOVMToVec32x8 (VPCMPD256 [4] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualInt64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualInt64x2 x y)
-	// result: (VPMOVMToVec64x2 (VPCMPQ128 [4] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualInt64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualInt64x4 x y)
-	// result: (VPMOVMToVec64x4 (VPCMPQ256 [4] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpNotEqualInt64x8(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -47232,42 +45972,6 @@ func rewriteValueAMD64_OpNotEqualInt64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpNotEqualInt8x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualInt8x16 x y)
-	// result: (VPMOVMToVec8x16 (VPCMPB128 [4] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualInt8x32(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualInt8x32 x y)
-	// result: (VPMOVMToVec8x32 (VPCMPB256 [4] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpNotEqualInt8x64(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -47946,24 +46650,6 @@ func rewriteValueAMD64_OpNotEqualMaskedUint8x64(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpNotEqualUint16x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualUint16x16 x y)
-	// result: (VPMOVMToVec16x16 (VPCMPUW256 [4] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec16x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpNotEqualUint16x32(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -47982,24 +46668,6 @@ func rewriteValueAMD64_OpNotEqualUint16x32(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpNotEqualUint16x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualUint16x8 x y)
-	// result: (VPMOVMToVec16x8 (VPCMPUW128 [4] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec16x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpNotEqualUint32x16(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -48018,78 +46686,6 @@ func rewriteValueAMD64_OpNotEqualUint32x16(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpNotEqualUint32x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualUint32x4 x y)
-	// result: (VPMOVMToVec32x4 (VPCMPUD128 [4] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec32x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualUint32x8(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualUint32x8 x y)
-	// result: (VPMOVMToVec32x8 (VPCMPUD256 [4] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec32x8)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualUint64x2(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualUint64x2 x y)
-	// result: (VPMOVMToVec64x2 (VPCMPUQ128 [4] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec64x2)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualUint64x4(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualUint64x4 x y)
-	// result: (VPMOVMToVec64x4 (VPCMPUQ256 [4] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec64x4)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpNotEqualUint64x8(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -48108,42 +46704,6 @@ func rewriteValueAMD64_OpNotEqualUint64x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpNotEqualUint8x16(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualUint8x16 x y)
-	// result: (VPMOVMToVec8x16 (VPCMPUB128 [4] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec8x16)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB128, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
-func rewriteValueAMD64_OpNotEqualUint8x32(v *Value) bool {
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	b := v.Block
-	typ := &b.Func.Config.Types
-	// match: (NotEqualUint8x32 x y)
-	// result: (VPMOVMToVec8x32 (VPCMPUB256 [4] x y))
-	for {
-		x := v_0
-		y := v_1
-		v.reset(OpAMD64VPMOVMToVec8x32)
-		v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB256, typ.Mask)
-		v0.AuxInt = uint8ToAuxInt(4)
-		v0.AddArg2(x, y)
-		v.AddArg(v0)
-		return true
-	}
-}
 func rewriteValueAMD64_OpNotEqualUint8x64(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go
index 7a95a4450d..682a37e91b 100644
--- a/src/cmd/compile/internal/ssagen/simdintrinsics.go
+++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go
@@ -602,17 +602,9 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Float64x2.Greater", opLen2(ssa.OpGreaterFloat64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float64x4.Greater", opLen2(ssa.OpGreaterFloat64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x8.Greater", opLen2(ssa.OpGreaterFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.Greater", opLen2(ssa.OpGreaterUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.Greater", opLen2(ssa.OpGreaterUint8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint8x64.Greater", opLen2(ssa.OpGreaterUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.Greater", opLen2(ssa.OpGreaterUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.Greater", opLen2(ssa.OpGreaterUint16x16, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint16x32.Greater", opLen2(ssa.OpGreaterUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.Greater", opLen2(ssa.OpGreaterUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.Greater", opLen2(ssa.OpGreaterUint32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint32x16.Greater", opLen2(ssa.OpGreaterUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.Greater", opLen2(ssa.OpGreaterUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.Greater", opLen2(ssa.OpGreaterUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.Greater", opLen2(ssa.OpGreaterUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat32x8, types.TypeVec256), sys.AMD64)
@@ -620,29 +612,13 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Float64x2.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float64x4.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x8.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.GreaterEqual", opLen2(ssa.OpGreaterEqualInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.GreaterEqual", opLen2(ssa.OpGreaterEqualInt8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int8x64.GreaterEqual", opLen2(ssa.OpGreaterEqualInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.GreaterEqual", opLen2(ssa.OpGreaterEqualInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.GreaterEqual", opLen2(ssa.OpGreaterEqualInt16x16, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int16x32.GreaterEqual", opLen2(ssa.OpGreaterEqualInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.GreaterEqual", opLen2(ssa.OpGreaterEqualInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.GreaterEqual", opLen2(ssa.OpGreaterEqualInt32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int32x16.GreaterEqual", opLen2(ssa.OpGreaterEqualInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.GreaterEqual", opLen2(ssa.OpGreaterEqualInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.GreaterEqual", opLen2(ssa.OpGreaterEqualInt64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int64x8.GreaterEqual", opLen2(ssa.OpGreaterEqualInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.GreaterEqual", opLen2(ssa.OpGreaterEqualUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.GreaterEqual", opLen2(ssa.OpGreaterEqualUint8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint8x64.GreaterEqual", opLen2(ssa.OpGreaterEqualUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.GreaterEqual", opLen2(ssa.OpGreaterEqualUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.GreaterEqual", opLen2(ssa.OpGreaterEqualUint16x16, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint16x32.GreaterEqual", opLen2(ssa.OpGreaterEqualUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.GreaterEqual", opLen2(ssa.OpGreaterEqualUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.GreaterEqual", opLen2(ssa.OpGreaterEqualUint32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint32x16.GreaterEqual", opLen2(ssa.OpGreaterEqualUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.GreaterEqual", opLen2(ssa.OpGreaterEqualUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.GreaterEqual", opLen2(ssa.OpGreaterEqualUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.GreaterEqual", opLen2(ssa.OpGreaterEqualUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64)
@@ -722,29 +698,13 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Float64x2.Less", opLen2(ssa.OpLessFloat64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float64x4.Less", opLen2(ssa.OpLessFloat64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x8.Less", opLen2(ssa.OpLessFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.Less", opLen2(ssa.OpLessInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.Less", opLen2(ssa.OpLessInt8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int8x64.Less", opLen2(ssa.OpLessInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.Less", opLen2(ssa.OpLessInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.Less", opLen2(ssa.OpLessInt16x16, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int16x32.Less", opLen2(ssa.OpLessInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.Less", opLen2(ssa.OpLessInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.Less", opLen2(ssa.OpLessInt32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int32x16.Less", opLen2(ssa.OpLessInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.Less", opLen2(ssa.OpLessInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.Less", opLen2(ssa.OpLessInt64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int64x8.Less", opLen2(ssa.OpLessInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.Less", opLen2(ssa.OpLessUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.Less", opLen2(ssa.OpLessUint8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint8x64.Less", opLen2(ssa.OpLessUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.Less", opLen2(ssa.OpLessUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.Less", opLen2(ssa.OpLessUint16x16, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint16x32.Less", opLen2(ssa.OpLessUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.Less", opLen2(ssa.OpLessUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.Less", opLen2(ssa.OpLessUint32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint32x16.Less", opLen2(ssa.OpLessUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.Less", opLen2(ssa.OpLessUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.Less", opLen2(ssa.OpLessUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.Less", opLen2(ssa.OpLessUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.LessEqual", opLen2(ssa.OpLessEqualFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.LessEqual", opLen2(ssa.OpLessEqualFloat32x8, types.TypeVec256), sys.AMD64)
@@ -752,29 +712,13 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Float64x2.LessEqual", opLen2(ssa.OpLessEqualFloat64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float64x4.LessEqual", opLen2(ssa.OpLessEqualFloat64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x8.LessEqual", opLen2(ssa.OpLessEqualFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.LessEqual", opLen2(ssa.OpLessEqualInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.LessEqual", opLen2(ssa.OpLessEqualInt8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int8x64.LessEqual", opLen2(ssa.OpLessEqualInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.LessEqual", opLen2(ssa.OpLessEqualInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.LessEqual", opLen2(ssa.OpLessEqualInt16x16, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int16x32.LessEqual", opLen2(ssa.OpLessEqualInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.LessEqual", opLen2(ssa.OpLessEqualInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.LessEqual", opLen2(ssa.OpLessEqualInt32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int32x16.LessEqual", opLen2(ssa.OpLessEqualInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.LessEqual", opLen2(ssa.OpLessEqualInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.LessEqual", opLen2(ssa.OpLessEqualInt64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int64x8.LessEqual", opLen2(ssa.OpLessEqualInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.LessEqual", opLen2(ssa.OpLessEqualUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.LessEqual", opLen2(ssa.OpLessEqualUint8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint8x64.LessEqual", opLen2(ssa.OpLessEqualUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.LessEqual", opLen2(ssa.OpLessEqualUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.LessEqual", opLen2(ssa.OpLessEqualUint16x16, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint16x32.LessEqual", opLen2(ssa.OpLessEqualUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.LessEqual", opLen2(ssa.OpLessEqualUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.LessEqual", opLen2(ssa.OpLessEqualUint32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint32x16.LessEqual", opLen2(ssa.OpLessEqualUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.LessEqual", opLen2(ssa.OpLessEqualUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.LessEqual", opLen2(ssa.OpLessEqualUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.LessEqual", opLen2(ssa.OpLessEqualUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64)
@@ -1062,29 +1006,13 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
 	addF(simdPackage, "Float64x2.NotEqual", opLen2(ssa.OpNotEqualFloat64x2, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float64x4.NotEqual", opLen2(ssa.OpNotEqualFloat64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Float64x8.NotEqual", opLen2(ssa.OpNotEqualFloat64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int8x16.NotEqual", opLen2(ssa.OpNotEqualInt8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int8x32.NotEqual", opLen2(ssa.OpNotEqualInt8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int8x64.NotEqual", opLen2(ssa.OpNotEqualInt8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int16x8.NotEqual", opLen2(ssa.OpNotEqualInt16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int16x16.NotEqual", opLen2(ssa.OpNotEqualInt16x16, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int16x32.NotEqual", opLen2(ssa.OpNotEqualInt16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int32x4.NotEqual", opLen2(ssa.OpNotEqualInt32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int32x8.NotEqual", opLen2(ssa.OpNotEqualInt32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int32x16.NotEqual", opLen2(ssa.OpNotEqualInt32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Int64x2.NotEqual", opLen2(ssa.OpNotEqualInt64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Int64x4.NotEqual", opLen2(ssa.OpNotEqualInt64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Int64x8.NotEqual", opLen2(ssa.OpNotEqualInt64x8, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint8x16.NotEqual", opLen2(ssa.OpNotEqualUint8x16, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint8x32.NotEqual", opLen2(ssa.OpNotEqualUint8x32, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint8x64.NotEqual", opLen2(ssa.OpNotEqualUint8x64, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint16x8.NotEqual", opLen2(ssa.OpNotEqualUint16x8, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint16x16.NotEqual", opLen2(ssa.OpNotEqualUint16x16, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint16x32.NotEqual", opLen2(ssa.OpNotEqualUint16x32, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint32x4.NotEqual", opLen2(ssa.OpNotEqualUint32x4, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint32x8.NotEqual", opLen2(ssa.OpNotEqualUint32x8, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint32x16.NotEqual", opLen2(ssa.OpNotEqualUint32x16, types.TypeVec512), sys.AMD64)
-	addF(simdPackage, "Uint64x2.NotEqual", opLen2(ssa.OpNotEqualUint64x2, types.TypeVec128), sys.AMD64)
-	addF(simdPackage, "Uint64x4.NotEqual", opLen2(ssa.OpNotEqualUint64x4, types.TypeVec256), sys.AMD64)
 	addF(simdPackage, "Uint64x8.NotEqual", opLen2(ssa.OpNotEqualUint64x8, types.TypeVec512), sys.AMD64)
 	addF(simdPackage, "Float32x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64)
 	addF(simdPackage, "Float32x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64)
diff --git a/src/simd/compare_test.go b/src/simd/compare_test.go
index 19b1f3886d..7fd20cf5d7 100644
--- a/src/simd/compare_test.go
+++ b/src/simd/compare_test.go
@@ -59,17 +59,32 @@ func TestLess(t *testing.T) {
 	testFloat64x2Compare(t, simd.Float64x2.Less, lessSlice[float64])
 	testFloat64x4Compare(t, simd.Float64x4.Less, lessSlice[float64])
 
-	if comparisonFixed {
-		testInt16x16Compare(t, simd.Int16x16.Less, lessSlice[int16])
-		testInt16x8Compare(t, simd.Int16x8.Less, lessSlice[int16])
-		testInt32x4Compare(t, simd.Int32x4.Less, lessSlice[int32])
-		testInt32x8Compare(t, simd.Int32x8.Less, lessSlice[int32])
-		testInt64x2Compare(t, simd.Int64x2.Less, lessSlice[int64])
-		testInt64x4Compare(t, simd.Int64x4.Less, lessSlice[int64])
-		testInt8x16Compare(t, simd.Int8x16.Less, lessSlice[int8])
-		testInt8x32Compare(t, simd.Int8x32.Less, lessSlice[int8])
-
-	}
+	testInt16x16Compare(t, simd.Int16x16.Less, lessSlice[int16])
+	testInt16x8Compare(t, simd.Int16x8.Less, lessSlice[int16])
+	testInt32x4Compare(t, simd.Int32x4.Less, lessSlice[int32])
+	testInt32x8Compare(t, simd.Int32x8.Less, lessSlice[int32])
+	testInt64x2Compare(t, simd.Int64x2.Less, lessSlice[int64])
+	testInt64x4Compare(t, simd.Int64x4.Less, lessSlice[int64])
+	testInt8x16Compare(t, simd.Int8x16.Less, lessSlice[int8])
+	testInt8x32Compare(t, simd.Int8x32.Less, lessSlice[int8])
+
+	testInt16x16Compare(t, simd.Int16x16.Less, lessSlice[int16])
+	testInt16x8Compare(t, simd.Int16x8.Less, lessSlice[int16])
+	testInt32x4Compare(t, simd.Int32x4.Less, lessSlice[int32])
+	testInt32x8Compare(t, simd.Int32x8.Less, lessSlice[int32])
+	testInt64x2Compare(t, simd.Int64x2.Less, lessSlice[int64])
+	testInt64x4Compare(t, simd.Int64x4.Less, lessSlice[int64])
+	testInt8x16Compare(t, simd.Int8x16.Less, lessSlice[int8])
+	testInt8x32Compare(t, simd.Int8x32.Less, lessSlice[int8])
+
+	testUint16x16Compare(t, simd.Uint16x16.Less, lessSlice[uint16])
+	testUint16x8Compare(t, simd.Uint16x8.Less, lessSlice[uint16])
+	testUint32x4Compare(t, simd.Uint32x4.Less, lessSlice[uint32])
+	testUint32x8Compare(t, simd.Uint32x8.Less, lessSlice[uint32])
+	testUint64x2Compare(t, simd.Uint64x2.Less, lessSlice[uint64])
+	testUint64x4Compare(t, simd.Uint64x4.Less, lessSlice[uint64])
+	testUint8x16Compare(t, simd.Uint8x16.Less, lessSlice[uint8])
+	testUint8x32Compare(t, simd.Uint8x32.Less, lessSlice[uint8])
 
 	if simd.HasAVX512() {
 		testUint16x16Compare(t, simd.Uint16x16.Less, lessSlice[uint16])
@@ -100,28 +115,25 @@ func TestLessEqual(t *testing.T) {
 	testFloat64x2Compare(t, simd.Float64x2.LessEqual, lessEqualSlice[float64])
 	testFloat64x4Compare(t, simd.Float64x4.LessEqual, lessEqualSlice[float64])
 
-	if comparisonFixed {
-		testInt16x16Compare(t, simd.Int16x16.LessEqual, lessEqualSlice[int16])
-		testInt16x8Compare(t, simd.Int16x8.LessEqual, lessEqualSlice[int16])
-		testInt32x4Compare(t, simd.Int32x4.LessEqual, lessEqualSlice[int32])
-		testInt32x8Compare(t, simd.Int32x8.LessEqual, lessEqualSlice[int32])
-		testInt64x2Compare(t, simd.Int64x2.LessEqual, lessEqualSlice[int64])
-		testInt64x4Compare(t, simd.Int64x4.LessEqual, lessEqualSlice[int64])
-		testInt8x16Compare(t, simd.Int8x16.LessEqual, lessEqualSlice[int8])
-		testInt8x32Compare(t, simd.Int8x32.LessEqual, lessEqualSlice[int8])
-
-	}
+	testInt16x16Compare(t, simd.Int16x16.LessEqual, lessEqualSlice[int16])
+	testInt16x8Compare(t, simd.Int16x8.LessEqual, lessEqualSlice[int16])
+	testInt32x4Compare(t, simd.Int32x4.LessEqual, lessEqualSlice[int32])
+	testInt32x8Compare(t, simd.Int32x8.LessEqual, lessEqualSlice[int32])
+	testInt64x2Compare(t, simd.Int64x2.LessEqual, lessEqualSlice[int64])
+	testInt64x4Compare(t, simd.Int64x4.LessEqual, lessEqualSlice[int64])
+	testInt8x16Compare(t, simd.Int8x16.LessEqual, lessEqualSlice[int8])
+	testInt8x32Compare(t, simd.Int8x32.LessEqual, lessEqualSlice[int8])
+
+	testUint16x16Compare(t, simd.Uint16x16.LessEqual, lessEqualSlice[uint16])
+	testUint16x8Compare(t, simd.Uint16x8.LessEqual, lessEqualSlice[uint16])
+	testUint32x4Compare(t, simd.Uint32x4.LessEqual, lessEqualSlice[uint32])
+	testUint32x8Compare(t, simd.Uint32x8.LessEqual, lessEqualSlice[uint32])
+	testUint64x2Compare(t, simd.Uint64x2.LessEqual, lessEqualSlice[uint64])
+	testUint64x4Compare(t, simd.Uint64x4.LessEqual, lessEqualSlice[uint64])
+	testUint8x16Compare(t, simd.Uint8x16.LessEqual, lessEqualSlice[uint8])
+	testUint8x32Compare(t, simd.Uint8x32.LessEqual, lessEqualSlice[uint8])
 
 	if simd.HasAVX512() {
-		testUint16x16Compare(t, simd.Uint16x16.LessEqual, lessEqualSlice[uint16])
-		testUint16x8Compare(t, simd.Uint16x8.LessEqual, lessEqualSlice[uint16])
-		testUint32x4Compare(t, simd.Uint32x4.LessEqual, lessEqualSlice[uint32])
-		testUint32x8Compare(t, simd.Uint32x8.LessEqual, lessEqualSlice[uint32])
-		testUint64x2Compare(t, simd.Uint64x2.LessEqual, lessEqualSlice[uint64])
-		testUint64x4Compare(t, simd.Uint64x4.LessEqual, lessEqualSlice[uint64])
-		testUint8x16Compare(t, simd.Uint8x16.LessEqual, lessEqualSlice[uint8])
-		testUint8x32Compare(t, simd.Uint8x32.LessEqual, lessEqualSlice[uint8])
-
 		testFloat32x16Compare(t, simd.Float32x16.LessEqual, lessEqualSlice[float32])
 		testFloat64x8Compare(t, simd.Float64x8.LessEqual, lessEqualSlice[float64])
 		testInt8x64Compare(t, simd.Int8x64.LessEqual, lessEqualSlice[int8])
@@ -151,16 +163,17 @@ func TestGreater(t *testing.T) {
 	testInt8x16Compare(t, simd.Int8x16.Greater, greaterSlice[int8])
 	testInt8x32Compare(t, simd.Int8x32.Greater, greaterSlice[int8])
 
-	if simd.HasAVX512() {
-		testUint16x16Compare(t, simd.Uint16x16.Greater, greaterSlice[uint16])
-		testUint16x8Compare(t, simd.Uint16x8.Greater, greaterSlice[uint16])
-		testUint32x4Compare(t, simd.Uint32x4.Greater, greaterSlice[uint32])
-		testUint32x8Compare(t, simd.Uint32x8.Greater, greaterSlice[uint32])
+	testUint16x16Compare(t, simd.Uint16x16.Greater, greaterSlice[uint16])
+	testUint16x8Compare(t, simd.Uint16x8.Greater, greaterSlice[uint16])
+	testUint32x4Compare(t, simd.Uint32x4.Greater, greaterSlice[uint32])
+	testUint32x8Compare(t, simd.Uint32x8.Greater, greaterSlice[uint32])
 
-		testUint64x2Compare(t, simd.Uint64x2.Greater, greaterSlice[uint64])
-		testUint64x4Compare(t, simd.Uint64x4.Greater, greaterSlice[uint64])
-		testUint8x16Compare(t, simd.Uint8x16.Greater, greaterSlice[uint8])
-		testUint8x32Compare(t, simd.Uint8x32.Greater, greaterSlice[uint8])
+	testUint64x2Compare(t, simd.Uint64x2.Greater, greaterSlice[uint64])
+	testUint64x4Compare(t, simd.Uint64x4.Greater, greaterSlice[uint64])
+	testUint8x16Compare(t, simd.Uint8x16.Greater, greaterSlice[uint8])
+	testUint8x32Compare(t, simd.Uint8x32.Greater, greaterSlice[uint8])
+
+	if simd.HasAVX512() {
 
 		testFloat32x16Compare(t, simd.Float32x16.Greater, greaterSlice[float32])
 		testFloat64x8Compare(t, simd.Float64x8.Greater, greaterSlice[float64])
@@ -181,28 +194,25 @@ func TestGreaterEqual(t *testing.T) {
 	testFloat64x2Compare(t, simd.Float64x2.GreaterEqual, greaterEqualSlice[float64])
 	testFloat64x4Compare(t, simd.Float64x4.GreaterEqual, greaterEqualSlice[float64])
 
-	if comparisonFixed {
-		testInt16x16Compare(t, simd.Int16x16.GreaterEqual, greaterEqualSlice[int16])
-		testInt16x8Compare(t, simd.Int16x8.GreaterEqual, greaterEqualSlice[int16])
-		testInt32x4Compare(t, simd.Int32x4.GreaterEqual, greaterEqualSlice[int32])
-		testInt32x8Compare(t, simd.Int32x8.GreaterEqual, greaterEqualSlice[int32])
-		testInt64x2Compare(t, simd.Int64x2.GreaterEqual, greaterEqualSlice[int64])
-		testInt64x4Compare(t, simd.Int64x4.GreaterEqual, greaterEqualSlice[int64])
-		testInt8x16Compare(t, simd.Int8x16.GreaterEqual, greaterEqualSlice[int8])
-		testInt8x32Compare(t, simd.Int8x32.GreaterEqual, greaterEqualSlice[int8])
-
-	}
+	testInt16x16Compare(t, simd.Int16x16.GreaterEqual, greaterEqualSlice[int16])
+	testInt16x8Compare(t, simd.Int16x8.GreaterEqual, greaterEqualSlice[int16])
+	testInt32x4Compare(t, simd.Int32x4.GreaterEqual, greaterEqualSlice[int32])
+	testInt32x8Compare(t, simd.Int32x8.GreaterEqual, greaterEqualSlice[int32])
+	testInt64x2Compare(t, simd.Int64x2.GreaterEqual, greaterEqualSlice[int64])
+	testInt64x4Compare(t, simd.Int64x4.GreaterEqual, greaterEqualSlice[int64])
+	testInt8x16Compare(t, simd.Int8x16.GreaterEqual, greaterEqualSlice[int8])
+	testInt8x32Compare(t, simd.Int8x32.GreaterEqual, greaterEqualSlice[int8])
+
+	testUint16x16Compare(t, simd.Uint16x16.GreaterEqual, greaterEqualSlice[uint16])
+	testUint16x8Compare(t, simd.Uint16x8.GreaterEqual, greaterEqualSlice[uint16])
+	testUint32x4Compare(t, simd.Uint32x4.GreaterEqual, greaterEqualSlice[uint32])
+	testUint32x8Compare(t, simd.Uint32x8.GreaterEqual, greaterEqualSlice[uint32])
+	testUint64x2Compare(t, simd.Uint64x2.GreaterEqual, greaterEqualSlice[uint64])
+	testUint64x4Compare(t, simd.Uint64x4.GreaterEqual, greaterEqualSlice[uint64])
+	testUint8x16Compare(t, simd.Uint8x16.GreaterEqual, greaterEqualSlice[uint8])
+	testUint8x32Compare(t, simd.Uint8x32.GreaterEqual, greaterEqualSlice[uint8])
 
 	if simd.HasAVX512() {
-		testUint16x16Compare(t, simd.Uint16x16.GreaterEqual, greaterEqualSlice[uint16])
-		testUint16x8Compare(t, simd.Uint16x8.GreaterEqual, greaterEqualSlice[uint16])
-		testUint32x4Compare(t, simd.Uint32x4.GreaterEqual, greaterEqualSlice[uint32])
-		testUint32x8Compare(t, simd.Uint32x8.GreaterEqual, greaterEqualSlice[uint32])
-		testUint64x2Compare(t, simd.Uint64x2.GreaterEqual, greaterEqualSlice[uint64])
-		testUint64x4Compare(t, simd.Uint64x4.GreaterEqual, greaterEqualSlice[uint64])
-		testUint8x16Compare(t, simd.Uint8x16.GreaterEqual, greaterEqualSlice[uint8])
-		testUint8x32Compare(t, simd.Uint8x32.GreaterEqual, greaterEqualSlice[uint8])
-
 		testFloat32x16Compare(t, simd.Float32x16.GreaterEqual, greaterEqualSlice[float32])
 		testFloat64x8Compare(t, simd.Float64x8.GreaterEqual, greaterEqualSlice[float64])
 		testInt8x64Compare(t, simd.Int8x64.GreaterEqual, greaterEqualSlice[int8])
@@ -260,25 +270,23 @@ func TestNotEqual(t *testing.T) {
 	testFloat64x2Compare(t, simd.Float64x2.NotEqual, notEqualSlice[float64])
 	testFloat64x4Compare(t, simd.Float64x4.NotEqual, notEqualSlice[float64])
 
-	if comparisonFixed {
-		testInt16x16Compare(t, simd.Int16x16.NotEqual, notEqualSlice[int16])
-		testInt16x8Compare(t, simd.Int16x8.NotEqual, notEqualSlice[int16])
-		testInt32x4Compare(t, simd.Int32x4.NotEqual, notEqualSlice[int32])
-		testInt32x8Compare(t, simd.Int32x8.NotEqual, notEqualSlice[int32])
-		testInt64x2Compare(t, simd.Int64x2.NotEqual, notEqualSlice[int64])
-		testInt64x4Compare(t, simd.Int64x4.NotEqual, notEqualSlice[int64])
-		testInt8x16Compare(t, simd.Int8x16.NotEqual, notEqualSlice[int8])
-		testInt8x32Compare(t, simd.Int8x32.NotEqual, notEqualSlice[int8])
-
-		testUint16x16Compare(t, simd.Uint16x16.NotEqual, notEqualSlice[uint16])
-		testUint16x8Compare(t, simd.Uint16x8.NotEqual, notEqualSlice[uint16])
-		testUint32x4Compare(t, simd.Uint32x4.NotEqual, notEqualSlice[uint32])
-		testUint32x8Compare(t, simd.Uint32x8.NotEqual, notEqualSlice[uint32])
-		testUint64x2Compare(t, simd.Uint64x2.NotEqual, notEqualSlice[uint64])
-		testUint64x4Compare(t, simd.Uint64x4.NotEqual, notEqualSlice[uint64])
-		testUint8x16Compare(t, simd.Uint8x16.NotEqual, notEqualSlice[uint8])
-		testUint8x32Compare(t, simd.Uint8x32.NotEqual, notEqualSlice[uint8])
-	}
+	testInt16x16Compare(t, simd.Int16x16.NotEqual, notEqualSlice[int16])
+	testInt16x8Compare(t, simd.Int16x8.NotEqual, notEqualSlice[int16])
+	testInt32x4Compare(t, simd.Int32x4.NotEqual, notEqualSlice[int32])
+	testInt32x8Compare(t, simd.Int32x8.NotEqual, notEqualSlice[int32])
+	testInt64x2Compare(t, simd.Int64x2.NotEqual, notEqualSlice[int64])
+	testInt64x4Compare(t, simd.Int64x4.NotEqual, notEqualSlice[int64])
+	testInt8x16Compare(t, simd.Int8x16.NotEqual, notEqualSlice[int8])
+	testInt8x32Compare(t, simd.Int8x32.NotEqual, notEqualSlice[int8])
+
+	testUint16x16Compare(t, simd.Uint16x16.NotEqual, notEqualSlice[uint16])
+	testUint16x8Compare(t, simd.Uint16x8.NotEqual, notEqualSlice[uint16])
+	testUint32x4Compare(t, simd.Uint32x4.NotEqual, notEqualSlice[uint32])
+	testUint32x8Compare(t, simd.Uint32x8.NotEqual, notEqualSlice[uint32])
+	testUint64x2Compare(t, simd.Uint64x2.NotEqual, notEqualSlice[uint64])
+	testUint64x4Compare(t, simd.Uint64x4.NotEqual, notEqualSlice[uint64])
+	testUint8x16Compare(t, simd.Uint8x16.NotEqual, notEqualSlice[uint8])
+	testUint8x32Compare(t, simd.Uint8x32.NotEqual, notEqualSlice[uint8])
 
 	if simd.HasAVX512() {
 		testFloat32x16Compare(t, simd.Float32x16.NotEqual, notEqualSlice[float32])
diff --git a/src/simd/genfiles.go b/src/simd/genfiles.go
index 8b36da71ab..022ddd1681 100644
--- a/src/simd/genfiles.go
+++ b/src/simd/genfiles.go
@@ -87,6 +87,16 @@ var ternaryFlaky = &shapes{ // for tests that support flaky equality
 	floats: []int{32},
 }
 
+var avx2SignedComparisons = &shapes{
+	vecs: []int{128, 256},
+	ints: []int{8, 16, 32, 64},
+}
+
+var avx2UnsignedComparisons = &shapes{
+	vecs:  []int{128, 256},
+	uints: []int{8, 16, 32, 64},
+}
+
 type templateData struct {
 	Vec    string // the type of the vector, e.g. Float32x4
 	AOrAn  string // for documentation, the article "a" or "an"
@@ -486,6 +496,130 @@ func (x {{.Vec}}) StoreSlicePart(s []{{.Type}}) {
 }
 `)
 
+func (t templateData) CPUfeature() string {
+	switch t.Vwidth {
+	case 128:
+		return "AVX"
+	case 256:
+		return "AVX2"
+	case 512:
+		return "AVX512"
+	}
+	panic(fmt.Errorf("unexpected vector width %d", t.Vwidth))
+}
+
+var avx2SignedComparisonsTemplate = shapedTemplateOf(avx2SignedComparisons, "avx2 signed comparisons", `
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature {{.CPUfeature}}
+func (x {{.Vec}}) Less(y {{.Vec}}) Mask{{.WxC}} {
+	return y.Greater(x)
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature {{.CPUfeature}}
+func (x {{.Vec}}) GreaterEqual(y {{.Vec}}) Mask{{.WxC}} {
+	ones := x.Equal(x).AsInt{{.WxC}}()
+	return y.Greater(x).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature {{.CPUfeature}}
+func (x {{.Vec}}) LessEqual(y {{.Vec}}) Mask{{.WxC}} {
+	ones := x.Equal(x).AsInt{{.WxC}}()
+	return x.Greater(y).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature {{.CPUfeature}}
+func (x {{.Vec}}) NotEqual(y {{.Vec}}) Mask{{.WxC}} {
+	ones := x.Equal(x).AsInt{{.WxC}}()
+	return x.Equal(y).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}()	
+}
+`)
+
+// CPUfeatureAVX2if8 return AVX2 if the element width is 8,
+// otherwise, it returns CPUfeature.  This is for the cpufeature
+// of unsigned comparison emulation, which uses shifts for all
+// the sizes > 8 (shifts are AVX) but must use broadcast (AVX2)
+// for bytes.
+func (t templateData) CPUfeatureAVX2if8() string {
+	if t.Width == 8 {
+		return "AVX2"
+	}
+	return t.CPUfeature()
+}
+
+var avx2UnsignedComparisonsTemplate = shapedTemplateOf(avx2UnsignedComparisons, "avx2 unsigned comparisons", `
+// Greater returns a mask whose elements indicate whether x > y
+//
+// Emulated, CPU Feature {{.CPUfeatureAVX2if8}}
+func (x {{.Vec}}) Greater(y {{.Vec}}) Mask{{.WxC}} {
+	a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
+{{- if eq .Width 8}}
+	signs := BroadcastInt{{.WxC}}(-1 << ({{.Width}}-1))
+{{- else}}
+	ones := x.Equal(x).AsInt{{.WxC}}()
+	signs := ones.ShiftAllLeft({{.Width}}-1)
+{{- end }}
+	return a.Xor(signs).Greater(b.Xor(signs))
+}
+
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature {{.CPUfeatureAVX2if8}}
+func (x {{.Vec}}) Less(y {{.Vec}}) Mask{{.WxC}} {
+	a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
+{{- if eq .Width 8}}
+	signs := BroadcastInt{{.WxC}}(-1 << ({{.Width}}-1))
+{{- else}}
+	ones := x.Equal(x).AsInt{{.WxC}}()
+	signs := ones.ShiftAllLeft({{.Width}}-1)
+{{- end }}
+	return b.Xor(signs).Greater(a.Xor(signs))
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature {{.CPUfeatureAVX2if8}}
+func (x {{.Vec}}) GreaterEqual(y {{.Vec}}) Mask{{.WxC}} {
+	a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
+	ones := x.Equal(x).AsInt{{.WxC}}()
+{{- if eq .Width 8}}
+	signs := BroadcastInt{{.WxC}}(-1 << ({{.Width}}-1))
+{{- else}}
+	signs := ones.ShiftAllLeft({{.Width}}-1)
+{{- end }}
+	return b.Xor(signs).Greater(a.Xor(signs)).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature {{.CPUfeatureAVX2if8}}
+func (x {{.Vec}}) LessEqual(y {{.Vec}}) Mask{{.WxC}} {
+	a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
+	ones := x.Equal(x).AsInt{{.WxC}}()
+{{- if eq .Width 8}}
+	signs := BroadcastInt{{.WxC}}(-1 << ({{.Width}}-1))
+{{- else}}
+	signs := ones.ShiftAllLeft({{.Width}}-1)
+{{- end }}
+	return a.Xor(signs).Greater(b.Xor(signs)).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature {{.CPUfeature}}
+func (x {{.Vec}}) NotEqual(y {{.Vec}}) Mask{{.WxC}} {
+	a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}()
+	ones := x.Equal(x).AsInt{{.WxC}}()
+	return a.Equal(b).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}()
+}
+`)
+
 var unsafePATemplate = templateOf("unsafe PA helper", `
 // pa{{.Vec}} returns a type-unsafe pointer to array that can
 // only be used with partial load/store operations that only
@@ -591,6 +725,8 @@ func main() {
 			avx2SmallLoadSlicePartTemplate,
 			avx2MaskedTemplate,
 			avx512MaskedTemplate,
+			avx2SignedComparisonsTemplate,
+			avx2UnsignedComparisonsTemplate,
 			broadcastTemplate,
 		)
 	}
diff --git a/src/simd/ops_amd64.go b/src/simd/ops_amd64.go
index 5b7754a961..d78bb699ea 100644
--- a/src/simd/ops_amd64.go
+++ b/src/simd/ops_amd64.go
@@ -3822,61 +3822,21 @@ func (x Float64x4) Greater(y Float64x4) Mask64x4
 // Asm: VCMPPD, CPU Feature: AVX512
 func (x Float64x8) Greater(y Float64x8) Mask64x8
 
-// Greater compares for greater than.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x16) Greater(y Uint8x16) Mask8x16
-
-// Greater compares for greater than.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x32) Greater(y Uint8x32) Mask8x32
-
 // Greater compares for greater than.
 //
 // Asm: VPCMPUB, CPU Feature: AVX512
 func (x Uint8x64) Greater(y Uint8x64) Mask8x64
 
-// Greater compares for greater than.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x8) Greater(y Uint16x8) Mask16x8
-
-// Greater compares for greater than.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x16) Greater(y Uint16x16) Mask16x16
-
 // Greater compares for greater than.
 //
 // Asm: VPCMPUW, CPU Feature: AVX512
 func (x Uint16x32) Greater(y Uint16x32) Mask16x32
 
-// Greater compares for greater than.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x4) Greater(y Uint32x4) Mask32x4
-
-// Greater compares for greater than.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x8) Greater(y Uint32x8) Mask32x8
-
 // Greater compares for greater than.
 //
 // Asm: VPCMPUD, CPU Feature: AVX512
 func (x Uint32x16) Greater(y Uint32x16) Mask32x16
 
-// Greater compares for greater than.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x2) Greater(y Uint64x2) Mask64x2
-
-// Greater compares for greater than.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x4) Greater(y Uint64x4) Mask64x4
-
 // Greater compares for greater than.
 //
 // Asm: VPCMPUQ, CPU Feature: AVX512
@@ -3914,121 +3874,41 @@ func (x Float64x4) GreaterEqual(y Float64x4) Mask64x4
 // Asm: VCMPPD, CPU Feature: AVX512
 func (x Float64x8) GreaterEqual(y Float64x8) Mask64x8
 
-// GreaterEqual compares for greater than or equal.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x16) GreaterEqual(y Int8x16) Mask8x16
-
-// GreaterEqual compares for greater than or equal.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x32) GreaterEqual(y Int8x32) Mask8x32
-
 // GreaterEqual compares for greater than or equal.
 //
 // Asm: VPCMPB, CPU Feature: AVX512
 func (x Int8x64) GreaterEqual(y Int8x64) Mask8x64
 
-// GreaterEqual compares for greater than or equal.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x8) GreaterEqual(y Int16x8) Mask16x8
-
-// GreaterEqual compares for greater than or equal.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x16) GreaterEqual(y Int16x16) Mask16x16
-
 // GreaterEqual compares for greater than or equal.
 //
 // Asm: VPCMPW, CPU Feature: AVX512
 func (x Int16x32) GreaterEqual(y Int16x32) Mask16x32
 
-// GreaterEqual compares for greater than or equal.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x4) GreaterEqual(y Int32x4) Mask32x4
-
-// GreaterEqual compares for greater than or equal.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x8) GreaterEqual(y Int32x8) Mask32x8
-
 // GreaterEqual compares for greater than or equal.
 //
 // Asm: VPCMPD, CPU Feature: AVX512
 func (x Int32x16) GreaterEqual(y Int32x16) Mask32x16
 
-// GreaterEqual compares for greater than or equal.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x2) GreaterEqual(y Int64x2) Mask64x2
-
-// GreaterEqual compares for greater than or equal.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x4) GreaterEqual(y Int64x4) Mask64x4
-
 // GreaterEqual compares for greater than or equal.
 //
 // Asm: VPCMPQ, CPU Feature: AVX512
 func (x Int64x8) GreaterEqual(y Int64x8) Mask64x8
 
-// GreaterEqual compares for greater than or equal.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x16) GreaterEqual(y Uint8x16) Mask8x16
-
-// GreaterEqual compares for greater than or equal.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x32) GreaterEqual(y Uint8x32) Mask8x32
-
 // GreaterEqual compares for greater than or equal.
 //
 // Asm: VPCMPUB, CPU Feature: AVX512
 func (x Uint8x64) GreaterEqual(y Uint8x64) Mask8x64
 
-// GreaterEqual compares for greater than or equal.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x8) GreaterEqual(y Uint16x8) Mask16x8
-
-// GreaterEqual compares for greater than or equal.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x16) GreaterEqual(y Uint16x16) Mask16x16
-
 // GreaterEqual compares for greater than or equal.
 //
 // Asm: VPCMPUW, CPU Feature: AVX512
 func (x Uint16x32) GreaterEqual(y Uint16x32) Mask16x32
 
-// GreaterEqual compares for greater than or equal.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x4) GreaterEqual(y Uint32x4) Mask32x4
-
-// GreaterEqual compares for greater than or equal.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x8) GreaterEqual(y Uint32x8) Mask32x8
-
 // GreaterEqual compares for greater than or equal.
 //
 // Asm: VPCMPUD, CPU Feature: AVX512
 func (x Uint32x16) GreaterEqual(y Uint32x16) Mask32x16
 
-// GreaterEqual compares for greater than or equal.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x2) GreaterEqual(y Uint64x2) Mask64x2
-
-// GreaterEqual compares for greater than or equal.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x4) GreaterEqual(y Uint64x4) Mask64x4
-
 // GreaterEqual compares for greater than or equal.
 //
 // Asm: VPCMPUQ, CPU Feature: AVX512
@@ -4566,121 +4446,41 @@ func (x Float64x4) Less(y Float64x4) Mask64x4
 // Asm: VCMPPD, CPU Feature: AVX512
 func (x Float64x8) Less(y Float64x8) Mask64x8
 
-// Less compares for less than.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x16) Less(y Int8x16) Mask8x16
-
-// Less compares for less than.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x32) Less(y Int8x32) Mask8x32
-
 // Less compares for less than.
 //
 // Asm: VPCMPB, CPU Feature: AVX512
 func (x Int8x64) Less(y Int8x64) Mask8x64
 
-// Less compares for less than.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x8) Less(y Int16x8) Mask16x8
-
-// Less compares for less than.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x16) Less(y Int16x16) Mask16x16
-
 // Less compares for less than.
 //
 // Asm: VPCMPW, CPU Feature: AVX512
 func (x Int16x32) Less(y Int16x32) Mask16x32
 
-// Less compares for less than.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x4) Less(y Int32x4) Mask32x4
-
-// Less compares for less than.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x8) Less(y Int32x8) Mask32x8
-
 // Less compares for less than.
 //
 // Asm: VPCMPD, CPU Feature: AVX512
 func (x Int32x16) Less(y Int32x16) Mask32x16
 
-// Less compares for less than.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x2) Less(y Int64x2) Mask64x2
-
-// Less compares for less than.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x4) Less(y Int64x4) Mask64x4
-
 // Less compares for less than.
 //
 // Asm: VPCMPQ, CPU Feature: AVX512
 func (x Int64x8) Less(y Int64x8) Mask64x8
 
-// Less compares for less than.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x16) Less(y Uint8x16) Mask8x16
-
-// Less compares for less than.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x32) Less(y Uint8x32) Mask8x32
-
 // Less compares for less than.
 //
 // Asm: VPCMPUB, CPU Feature: AVX512
 func (x Uint8x64) Less(y Uint8x64) Mask8x64
 
-// Less compares for less than.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x8) Less(y Uint16x8) Mask16x8
-
-// Less compares for less than.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x16) Less(y Uint16x16) Mask16x16
-
 // Less compares for less than.
 //
 // Asm: VPCMPUW, CPU Feature: AVX512
 func (x Uint16x32) Less(y Uint16x32) Mask16x32
 
-// Less compares for less than.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x4) Less(y Uint32x4) Mask32x4
-
-// Less compares for less than.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x8) Less(y Uint32x8) Mask32x8
-
 // Less compares for less than.
 //
 // Asm: VPCMPUD, CPU Feature: AVX512
 func (x Uint32x16) Less(y Uint32x16) Mask32x16
 
-// Less compares for less than.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x2) Less(y Uint64x2) Mask64x2
-
-// Less compares for less than.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x4) Less(y Uint64x4) Mask64x4
-
 // Less compares for less than.
 //
 // Asm: VPCMPUQ, CPU Feature: AVX512
@@ -4718,121 +4518,41 @@ func (x Float64x4) LessEqual(y Float64x4) Mask64x4
 // Asm: VCMPPD, CPU Feature: AVX512
 func (x Float64x8) LessEqual(y Float64x8) Mask64x8
 
-// LessEqual compares for less than or equal.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x16) LessEqual(y Int8x16) Mask8x16
-
-// LessEqual compares for less than or equal.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x32) LessEqual(y Int8x32) Mask8x32
-
 // LessEqual compares for less than or equal.
 //
 // Asm: VPCMPB, CPU Feature: AVX512
 func (x Int8x64) LessEqual(y Int8x64) Mask8x64
 
-// LessEqual compares for less than or equal.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x8) LessEqual(y Int16x8) Mask16x8
-
-// LessEqual compares for less than or equal.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x16) LessEqual(y Int16x16) Mask16x16
-
 // LessEqual compares for less than or equal.
 //
 // Asm: VPCMPW, CPU Feature: AVX512
 func (x Int16x32) LessEqual(y Int16x32) Mask16x32
 
-// LessEqual compares for less than or equal.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x4) LessEqual(y Int32x4) Mask32x4
-
-// LessEqual compares for less than or equal.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x8) LessEqual(y Int32x8) Mask32x8
-
 // LessEqual compares for less than or equal.
 //
 // Asm: VPCMPD, CPU Feature: AVX512
 func (x Int32x16) LessEqual(y Int32x16) Mask32x16
 
-// LessEqual compares for less than or equal.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x2) LessEqual(y Int64x2) Mask64x2
-
-// LessEqual compares for less than or equal.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x4) LessEqual(y Int64x4) Mask64x4
-
 // LessEqual compares for less than or equal.
 //
 // Asm: VPCMPQ, CPU Feature: AVX512
 func (x Int64x8) LessEqual(y Int64x8) Mask64x8
 
-// LessEqual compares for less than or equal.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x16) LessEqual(y Uint8x16) Mask8x16
-
-// LessEqual compares for less than or equal.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x32) LessEqual(y Uint8x32) Mask8x32
-
 // LessEqual compares for less than or equal.
 //
 // Asm: VPCMPUB, CPU Feature: AVX512
 func (x Uint8x64) LessEqual(y Uint8x64) Mask8x64
 
-// LessEqual compares for less than or equal.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x8) LessEqual(y Uint16x8) Mask16x8
-
-// LessEqual compares for less than or equal.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x16) LessEqual(y Uint16x16) Mask16x16
-
 // LessEqual compares for less than or equal.
 //
 // Asm: VPCMPUW, CPU Feature: AVX512
 func (x Uint16x32) LessEqual(y Uint16x32) Mask16x32
 
-// LessEqual compares for less than or equal.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x4) LessEqual(y Uint32x4) Mask32x4
-
-// LessEqual compares for less than or equal.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x8) LessEqual(y Uint32x8) Mask32x8
-
 // LessEqual compares for less than or equal.
 //
 // Asm: VPCMPUD, CPU Feature: AVX512
 func (x Uint32x16) LessEqual(y Uint32x16) Mask32x16
 
-// LessEqual compares for less than or equal.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2
-
-// LessEqual compares for less than or equal.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x4) LessEqual(y Uint64x4) Mask64x4
-
 // LessEqual compares for less than or equal.
 //
 // Asm: VPCMPUQ, CPU Feature: AVX512
@@ -6644,121 +6364,41 @@ func (x Float64x4) NotEqual(y Float64x4) Mask64x4
 // Asm: VCMPPD, CPU Feature: AVX512
 func (x Float64x8) NotEqual(y Float64x8) Mask64x8
 
-// NotEqual compares for inequality.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x16) NotEqual(y Int8x16) Mask8x16
-
-// NotEqual compares for inequality.
-//
-// Asm: VPCMPB, CPU Feature: AVX512
-func (x Int8x32) NotEqual(y Int8x32) Mask8x32
-
 // NotEqual compares for inequality.
 //
 // Asm: VPCMPB, CPU Feature: AVX512
 func (x Int8x64) NotEqual(y Int8x64) Mask8x64
 
-// NotEqual compares for inequality.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x8) NotEqual(y Int16x8) Mask16x8
-
-// NotEqual compares for inequality.
-//
-// Asm: VPCMPW, CPU Feature: AVX512
-func (x Int16x16) NotEqual(y Int16x16) Mask16x16
-
 // NotEqual compares for inequality.
 //
 // Asm: VPCMPW, CPU Feature: AVX512
 func (x Int16x32) NotEqual(y Int16x32) Mask16x32
 
-// NotEqual compares for inequality.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x4) NotEqual(y Int32x4) Mask32x4
-
-// NotEqual compares for inequality.
-//
-// Asm: VPCMPD, CPU Feature: AVX512
-func (x Int32x8) NotEqual(y Int32x8) Mask32x8
-
 // NotEqual compares for inequality.
 //
 // Asm: VPCMPD, CPU Feature: AVX512
 func (x Int32x16) NotEqual(y Int32x16) Mask32x16
 
-// NotEqual compares for inequality.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x2) NotEqual(y Int64x2) Mask64x2
-
-// NotEqual compares for inequality.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512
-func (x Int64x4) NotEqual(y Int64x4) Mask64x4
-
 // NotEqual compares for inequality.
 //
 // Asm: VPCMPQ, CPU Feature: AVX512
 func (x Int64x8) NotEqual(y Int64x8) Mask64x8
 
-// NotEqual compares for inequality.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x16) NotEqual(y Uint8x16) Mask8x16
-
-// NotEqual compares for inequality.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512
-func (x Uint8x32) NotEqual(y Uint8x32) Mask8x32
-
 // NotEqual compares for inequality.
 //
 // Asm: VPCMPUB, CPU Feature: AVX512
 func (x Uint8x64) NotEqual(y Uint8x64) Mask8x64
 
-// NotEqual compares for inequality.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x8) NotEqual(y Uint16x8) Mask16x8
-
-// NotEqual compares for inequality.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512
-func (x Uint16x16) NotEqual(y Uint16x16) Mask16x16
-
 // NotEqual compares for inequality.
 //
 // Asm: VPCMPUW, CPU Feature: AVX512
 func (x Uint16x32) NotEqual(y Uint16x32) Mask16x32
 
-// NotEqual compares for inequality.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x4) NotEqual(y Uint32x4) Mask32x4
-
-// NotEqual compares for inequality.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512
-func (x Uint32x8) NotEqual(y Uint32x8) Mask32x8
-
 // NotEqual compares for inequality.
 //
 // Asm: VPCMPUD, CPU Feature: AVX512
 func (x Uint32x16) NotEqual(y Uint32x16) Mask32x16
 
-// NotEqual compares for inequality.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x2) NotEqual(y Uint64x2) Mask64x2
-
-// NotEqual compares for inequality.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512
-func (x Uint64x4) NotEqual(y Uint64x4) Mask64x4
-
 // NotEqual compares for inequality.
 //
 // Asm: VPCMPUQ, CPU Feature: AVX512
diff --git a/src/simd/slice_amd64.go b/src/simd/slice_amd64.go
index 8e721d9027..3ad2672a05 100644
--- a/src/simd/slice_amd64.go
+++ b/src/simd/slice_amd64.go
@@ -1500,6 +1500,642 @@ func (x Float64x8) Merge(y Float64x8, mask Mask64x8) Float64x8 {
 	return iy.blendMasked(ix, mask).AsFloat64x8()
 }
 
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature AVX
+func (x Int8x16) Less(y Int8x16) Mask8x16 {
+	return y.Greater(x)
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature AVX
+func (x Int8x16) GreaterEqual(y Int8x16) Mask8x16 {
+	ones := x.Equal(x).AsInt8x16()
+	return y.Greater(x).AsInt8x16().Xor(ones).AsMask8x16()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature AVX
+func (x Int8x16) LessEqual(y Int8x16) Mask8x16 {
+	ones := x.Equal(x).AsInt8x16()
+	return x.Greater(y).AsInt8x16().Xor(ones).AsMask8x16()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature AVX
+func (x Int8x16) NotEqual(y Int8x16) Mask8x16 {
+	ones := x.Equal(x).AsInt8x16()
+	return x.Equal(y).AsInt8x16().Xor(ones).AsMask8x16()
+}
+
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature AVX
+func (x Int16x8) Less(y Int16x8) Mask16x8 {
+	return y.Greater(x)
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature AVX
+func (x Int16x8) GreaterEqual(y Int16x8) Mask16x8 {
+	ones := x.Equal(x).AsInt16x8()
+	return y.Greater(x).AsInt16x8().Xor(ones).AsMask16x8()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature AVX
+func (x Int16x8) LessEqual(y Int16x8) Mask16x8 {
+	ones := x.Equal(x).AsInt16x8()
+	return x.Greater(y).AsInt16x8().Xor(ones).AsMask16x8()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature AVX
+func (x Int16x8) NotEqual(y Int16x8) Mask16x8 {
+	ones := x.Equal(x).AsInt16x8()
+	return x.Equal(y).AsInt16x8().Xor(ones).AsMask16x8()
+}
+
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature AVX
+func (x Int32x4) Less(y Int32x4) Mask32x4 {
+	return y.Greater(x)
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature AVX
+func (x Int32x4) GreaterEqual(y Int32x4) Mask32x4 {
+	ones := x.Equal(x).AsInt32x4()
+	return y.Greater(x).AsInt32x4().Xor(ones).AsMask32x4()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature AVX
+func (x Int32x4) LessEqual(y Int32x4) Mask32x4 {
+	ones := x.Equal(x).AsInt32x4()
+	return x.Greater(y).AsInt32x4().Xor(ones).AsMask32x4()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature AVX
+func (x Int32x4) NotEqual(y Int32x4) Mask32x4 {
+	ones := x.Equal(x).AsInt32x4()
+	return x.Equal(y).AsInt32x4().Xor(ones).AsMask32x4()
+}
+
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature AVX
+func (x Int64x2) Less(y Int64x2) Mask64x2 {
+	return y.Greater(x)
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature AVX
+func (x Int64x2) GreaterEqual(y Int64x2) Mask64x2 {
+	ones := x.Equal(x).AsInt64x2()
+	return y.Greater(x).AsInt64x2().Xor(ones).AsMask64x2()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature AVX
+func (x Int64x2) LessEqual(y Int64x2) Mask64x2 {
+	ones := x.Equal(x).AsInt64x2()
+	return x.Greater(y).AsInt64x2().Xor(ones).AsMask64x2()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature AVX
+func (x Int64x2) NotEqual(y Int64x2) Mask64x2 {
+	ones := x.Equal(x).AsInt64x2()
+	return x.Equal(y).AsInt64x2().Xor(ones).AsMask64x2()
+}
+
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature AVX2
+func (x Int8x32) Less(y Int8x32) Mask8x32 {
+	return y.Greater(x)
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature AVX2
+func (x Int8x32) GreaterEqual(y Int8x32) Mask8x32 {
+	ones := x.Equal(x).AsInt8x32()
+	return y.Greater(x).AsInt8x32().Xor(ones).AsMask8x32()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature AVX2
+func (x Int8x32) LessEqual(y Int8x32) Mask8x32 {
+	ones := x.Equal(x).AsInt8x32()
+	return x.Greater(y).AsInt8x32().Xor(ones).AsMask8x32()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature AVX2
+func (x Int8x32) NotEqual(y Int8x32) Mask8x32 {
+	ones := x.Equal(x).AsInt8x32()
+	return x.Equal(y).AsInt8x32().Xor(ones).AsMask8x32()
+}
+
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature AVX2
+func (x Int16x16) Less(y Int16x16) Mask16x16 {
+	return y.Greater(x)
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature AVX2
+func (x Int16x16) GreaterEqual(y Int16x16) Mask16x16 {
+	ones := x.Equal(x).AsInt16x16()
+	return y.Greater(x).AsInt16x16().Xor(ones).AsMask16x16()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature AVX2
+func (x Int16x16) LessEqual(y Int16x16) Mask16x16 {
+	ones := x.Equal(x).AsInt16x16()
+	return x.Greater(y).AsInt16x16().Xor(ones).AsMask16x16()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature AVX2
+func (x Int16x16) NotEqual(y Int16x16) Mask16x16 {
+	ones := x.Equal(x).AsInt16x16()
+	return x.Equal(y).AsInt16x16().Xor(ones).AsMask16x16()
+}
+
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature AVX2
+func (x Int32x8) Less(y Int32x8) Mask32x8 {
+	return y.Greater(x)
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature AVX2
+func (x Int32x8) GreaterEqual(y Int32x8) Mask32x8 {
+	ones := x.Equal(x).AsInt32x8()
+	return y.Greater(x).AsInt32x8().Xor(ones).AsMask32x8()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature AVX2
+func (x Int32x8) LessEqual(y Int32x8) Mask32x8 {
+	ones := x.Equal(x).AsInt32x8()
+	return x.Greater(y).AsInt32x8().Xor(ones).AsMask32x8()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature AVX2
+func (x Int32x8) NotEqual(y Int32x8) Mask32x8 {
+	ones := x.Equal(x).AsInt32x8()
+	return x.Equal(y).AsInt32x8().Xor(ones).AsMask32x8()
+}
+
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature AVX2
+func (x Int64x4) Less(y Int64x4) Mask64x4 {
+	return y.Greater(x)
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature AVX2
+func (x Int64x4) GreaterEqual(y Int64x4) Mask64x4 {
+	ones := x.Equal(x).AsInt64x4()
+	return y.Greater(x).AsInt64x4().Xor(ones).AsMask64x4()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature AVX2
+func (x Int64x4) LessEqual(y Int64x4) Mask64x4 {
+	ones := x.Equal(x).AsInt64x4()
+	return x.Greater(y).AsInt64x4().Xor(ones).AsMask64x4()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature AVX2
+func (x Int64x4) NotEqual(y Int64x4) Mask64x4 {
+	ones := x.Equal(x).AsInt64x4()
+	return x.Equal(y).AsInt64x4().Xor(ones).AsMask64x4()
+}
+
+// Greater returns a mask whose elements indicate whether x > y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint8x16) Greater(y Uint8x16) Mask8x16 {
+	a, b := x.AsInt8x16(), y.AsInt8x16()
+	signs := BroadcastInt8x16(-1 << (8 - 1))
+	return a.Xor(signs).Greater(b.Xor(signs))
+}
+
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint8x16) Less(y Uint8x16) Mask8x16 {
+	a, b := x.AsInt8x16(), y.AsInt8x16()
+	signs := BroadcastInt8x16(-1 << (8 - 1))
+	return b.Xor(signs).Greater(a.Xor(signs))
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint8x16) GreaterEqual(y Uint8x16) Mask8x16 {
+	a, b := x.AsInt8x16(), y.AsInt8x16()
+	ones := x.Equal(x).AsInt8x16()
+	signs := BroadcastInt8x16(-1 << (8 - 1))
+	return b.Xor(signs).Greater(a.Xor(signs)).AsInt8x16().Xor(ones).AsMask8x16()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint8x16) LessEqual(y Uint8x16) Mask8x16 {
+	a, b := x.AsInt8x16(), y.AsInt8x16()
+	ones := x.Equal(x).AsInt8x16()
+	signs := BroadcastInt8x16(-1 << (8 - 1))
+	return a.Xor(signs).Greater(b.Xor(signs)).AsInt8x16().Xor(ones).AsMask8x16()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature AVX
+func (x Uint8x16) NotEqual(y Uint8x16) Mask8x16 {
+	a, b := x.AsInt8x16(), y.AsInt8x16()
+	ones := x.Equal(x).AsInt8x16()
+	return a.Equal(b).AsInt8x16().Xor(ones).AsMask8x16()
+}
+
+// Greater returns a mask whose elements indicate whether x > y
+//
+// Emulated, CPU Feature AVX
+func (x Uint16x8) Greater(y Uint16x8) Mask16x8 {
+	a, b := x.AsInt16x8(), y.AsInt16x8()
+	ones := x.Equal(x).AsInt16x8()
+	signs := ones.ShiftAllLeft(16 - 1)
+	return a.Xor(signs).Greater(b.Xor(signs))
+}
+
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature AVX
+func (x Uint16x8) Less(y Uint16x8) Mask16x8 {
+	a, b := x.AsInt16x8(), y.AsInt16x8()
+	ones := x.Equal(x).AsInt16x8()
+	signs := ones.ShiftAllLeft(16 - 1)
+	return b.Xor(signs).Greater(a.Xor(signs))
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature AVX
+func (x Uint16x8) GreaterEqual(y Uint16x8) Mask16x8 {
+	a, b := x.AsInt16x8(), y.AsInt16x8()
+	ones := x.Equal(x).AsInt16x8()
+	signs := ones.ShiftAllLeft(16 - 1)
+	return b.Xor(signs).Greater(a.Xor(signs)).AsInt16x8().Xor(ones).AsMask16x8()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature AVX
+func (x Uint16x8) LessEqual(y Uint16x8) Mask16x8 {
+	a, b := x.AsInt16x8(), y.AsInt16x8()
+	ones := x.Equal(x).AsInt16x8()
+	signs := ones.ShiftAllLeft(16 - 1)
+	return a.Xor(signs).Greater(b.Xor(signs)).AsInt16x8().Xor(ones).AsMask16x8()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature AVX
+func (x Uint16x8) NotEqual(y Uint16x8) Mask16x8 {
+	a, b := x.AsInt16x8(), y.AsInt16x8()
+	ones := x.Equal(x).AsInt16x8()
+	return a.Equal(b).AsInt16x8().Xor(ones).AsMask16x8()
+}
+
+// Greater returns a mask whose elements indicate whether x > y
+//
+// Emulated, CPU Feature AVX
+func (x Uint32x4) Greater(y Uint32x4) Mask32x4 {
+	a, b := x.AsInt32x4(), y.AsInt32x4()
+	ones := x.Equal(x).AsInt32x4()
+	signs := ones.ShiftAllLeft(32 - 1)
+	return a.Xor(signs).Greater(b.Xor(signs))
+}
+
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature AVX
+func (x Uint32x4) Less(y Uint32x4) Mask32x4 {
+	a, b := x.AsInt32x4(), y.AsInt32x4()
+	ones := x.Equal(x).AsInt32x4()
+	signs := ones.ShiftAllLeft(32 - 1)
+	return b.Xor(signs).Greater(a.Xor(signs))
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature AVX
+func (x Uint32x4) GreaterEqual(y Uint32x4) Mask32x4 {
+	a, b := x.AsInt32x4(), y.AsInt32x4()
+	ones := x.Equal(x).AsInt32x4()
+	signs := ones.ShiftAllLeft(32 - 1)
+	return b.Xor(signs).Greater(a.Xor(signs)).AsInt32x4().Xor(ones).AsMask32x4()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature AVX
+func (x Uint32x4) LessEqual(y Uint32x4) Mask32x4 {
+	a, b := x.AsInt32x4(), y.AsInt32x4()
+	ones := x.Equal(x).AsInt32x4()
+	signs := ones.ShiftAllLeft(32 - 1)
+	return a.Xor(signs).Greater(b.Xor(signs)).AsInt32x4().Xor(ones).AsMask32x4()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature AVX
+func (x Uint32x4) NotEqual(y Uint32x4) Mask32x4 {
+	a, b := x.AsInt32x4(), y.AsInt32x4()
+	ones := x.Equal(x).AsInt32x4()
+	return a.Equal(b).AsInt32x4().Xor(ones).AsMask32x4()
+}
+
+// Greater returns a mask whose elements indicate whether x > y
+//
+// Emulated, CPU Feature AVX
+func (x Uint64x2) Greater(y Uint64x2) Mask64x2 {
+	a, b := x.AsInt64x2(), y.AsInt64x2()
+	ones := x.Equal(x).AsInt64x2()
+	signs := ones.ShiftAllLeft(64 - 1)
+	return a.Xor(signs).Greater(b.Xor(signs))
+}
+
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature AVX
+func (x Uint64x2) Less(y Uint64x2) Mask64x2 {
+	a, b := x.AsInt64x2(), y.AsInt64x2()
+	ones := x.Equal(x).AsInt64x2()
+	signs := ones.ShiftAllLeft(64 - 1)
+	return b.Xor(signs).Greater(a.Xor(signs))
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature AVX
+func (x Uint64x2) GreaterEqual(y Uint64x2) Mask64x2 {
+	a, b := x.AsInt64x2(), y.AsInt64x2()
+	ones := x.Equal(x).AsInt64x2()
+	signs := ones.ShiftAllLeft(64 - 1)
+	return b.Xor(signs).Greater(a.Xor(signs)).AsInt64x2().Xor(ones).AsMask64x2()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature AVX
+func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2 {
+	a, b := x.AsInt64x2(), y.AsInt64x2()
+	ones := x.Equal(x).AsInt64x2()
+	signs := ones.ShiftAllLeft(64 - 1)
+	return a.Xor(signs).Greater(b.Xor(signs)).AsInt64x2().Xor(ones).AsMask64x2()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature AVX
+func (x Uint64x2) NotEqual(y Uint64x2) Mask64x2 {
+	a, b := x.AsInt64x2(), y.AsInt64x2()
+	ones := x.Equal(x).AsInt64x2()
+	return a.Equal(b).AsInt64x2().Xor(ones).AsMask64x2()
+}
+
+// Greater returns a mask whose elements indicate whether x > y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint8x32) Greater(y Uint8x32) Mask8x32 {
+	a, b := x.AsInt8x32(), y.AsInt8x32()
+	signs := BroadcastInt8x32(-1 << (8 - 1))
+	return a.Xor(signs).Greater(b.Xor(signs))
+}
+
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint8x32) Less(y Uint8x32) Mask8x32 {
+	a, b := x.AsInt8x32(), y.AsInt8x32()
+	signs := BroadcastInt8x32(-1 << (8 - 1))
+	return b.Xor(signs).Greater(a.Xor(signs))
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint8x32) GreaterEqual(y Uint8x32) Mask8x32 {
+	a, b := x.AsInt8x32(), y.AsInt8x32()
+	ones := x.Equal(x).AsInt8x32()
+	signs := BroadcastInt8x32(-1 << (8 - 1))
+	return b.Xor(signs).Greater(a.Xor(signs)).AsInt8x32().Xor(ones).AsMask8x32()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint8x32) LessEqual(y Uint8x32) Mask8x32 {
+	a, b := x.AsInt8x32(), y.AsInt8x32()
+	ones := x.Equal(x).AsInt8x32()
+	signs := BroadcastInt8x32(-1 << (8 - 1))
+	return a.Xor(signs).Greater(b.Xor(signs)).AsInt8x32().Xor(ones).AsMask8x32()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint8x32) NotEqual(y Uint8x32) Mask8x32 {
+	a, b := x.AsInt8x32(), y.AsInt8x32()
+	ones := x.Equal(x).AsInt8x32()
+	return a.Equal(b).AsInt8x32().Xor(ones).AsMask8x32()
+}
+
+// Greater returns a mask whose elements indicate whether x > y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint16x16) Greater(y Uint16x16) Mask16x16 {
+	a, b := x.AsInt16x16(), y.AsInt16x16()
+	ones := x.Equal(x).AsInt16x16()
+	signs := ones.ShiftAllLeft(16 - 1)
+	return a.Xor(signs).Greater(b.Xor(signs))
+}
+
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint16x16) Less(y Uint16x16) Mask16x16 {
+	a, b := x.AsInt16x16(), y.AsInt16x16()
+	ones := x.Equal(x).AsInt16x16()
+	signs := ones.ShiftAllLeft(16 - 1)
+	return b.Xor(signs).Greater(a.Xor(signs))
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint16x16) GreaterEqual(y Uint16x16) Mask16x16 {
+	a, b := x.AsInt16x16(), y.AsInt16x16()
+	ones := x.Equal(x).AsInt16x16()
+	signs := ones.ShiftAllLeft(16 - 1)
+	return b.Xor(signs).Greater(a.Xor(signs)).AsInt16x16().Xor(ones).AsMask16x16()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint16x16) LessEqual(y Uint16x16) Mask16x16 {
+	a, b := x.AsInt16x16(), y.AsInt16x16()
+	ones := x.Equal(x).AsInt16x16()
+	signs := ones.ShiftAllLeft(16 - 1)
+	return a.Xor(signs).Greater(b.Xor(signs)).AsInt16x16().Xor(ones).AsMask16x16()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint16x16) NotEqual(y Uint16x16) Mask16x16 {
+	a, b := x.AsInt16x16(), y.AsInt16x16()
+	ones := x.Equal(x).AsInt16x16()
+	return a.Equal(b).AsInt16x16().Xor(ones).AsMask16x16()
+}
+
+// Greater returns a mask whose elements indicate whether x > y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint32x8) Greater(y Uint32x8) Mask32x8 {
+	a, b := x.AsInt32x8(), y.AsInt32x8()
+	ones := x.Equal(x).AsInt32x8()
+	signs := ones.ShiftAllLeft(32 - 1)
+	return a.Xor(signs).Greater(b.Xor(signs))
+}
+
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint32x8) Less(y Uint32x8) Mask32x8 {
+	a, b := x.AsInt32x8(), y.AsInt32x8()
+	ones := x.Equal(x).AsInt32x8()
+	signs := ones.ShiftAllLeft(32 - 1)
+	return b.Xor(signs).Greater(a.Xor(signs))
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint32x8) GreaterEqual(y Uint32x8) Mask32x8 {
+	a, b := x.AsInt32x8(), y.AsInt32x8()
+	ones := x.Equal(x).AsInt32x8()
+	signs := ones.ShiftAllLeft(32 - 1)
+	return b.Xor(signs).Greater(a.Xor(signs)).AsInt32x8().Xor(ones).AsMask32x8()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint32x8) LessEqual(y Uint32x8) Mask32x8 {
+	a, b := x.AsInt32x8(), y.AsInt32x8()
+	ones := x.Equal(x).AsInt32x8()
+	signs := ones.ShiftAllLeft(32 - 1)
+	return a.Xor(signs).Greater(b.Xor(signs)).AsInt32x8().Xor(ones).AsMask32x8()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint32x8) NotEqual(y Uint32x8) Mask32x8 {
+	a, b := x.AsInt32x8(), y.AsInt32x8()
+	ones := x.Equal(x).AsInt32x8()
+	return a.Equal(b).AsInt32x8().Xor(ones).AsMask32x8()
+}
+
+// Greater returns a mask whose elements indicate whether x > y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint64x4) Greater(y Uint64x4) Mask64x4 {
+	a, b := x.AsInt64x4(), y.AsInt64x4()
+	ones := x.Equal(x).AsInt64x4()
+	signs := ones.ShiftAllLeft(64 - 1)
+	return a.Xor(signs).Greater(b.Xor(signs))
+}
+
+// Less returns a mask whose elements indicate whether x < y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint64x4) Less(y Uint64x4) Mask64x4 {
+	a, b := x.AsInt64x4(), y.AsInt64x4()
+	ones := x.Equal(x).AsInt64x4()
+	signs := ones.ShiftAllLeft(64 - 1)
+	return b.Xor(signs).Greater(a.Xor(signs))
+}
+
+// GreaterEqual returns a mask whose elements indicate whether x >= y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint64x4) GreaterEqual(y Uint64x4) Mask64x4 {
+	a, b := x.AsInt64x4(), y.AsInt64x4()
+	ones := x.Equal(x).AsInt64x4()
+	signs := ones.ShiftAllLeft(64 - 1)
+	return b.Xor(signs).Greater(a.Xor(signs)).AsInt64x4().Xor(ones).AsMask64x4()
+}
+
+// LessEqual returns a mask whose elements indicate whether x <= y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint64x4) LessEqual(y Uint64x4) Mask64x4 {
+	a, b := x.AsInt64x4(), y.AsInt64x4()
+	ones := x.Equal(x).AsInt64x4()
+	signs := ones.ShiftAllLeft(64 - 1)
+	return a.Xor(signs).Greater(b.Xor(signs)).AsInt64x4().Xor(ones).AsMask64x4()
+}
+
+// NotEqual returns a mask whose elements indicate whether x != y
+//
+// Emulated, CPU Feature AVX2
+func (x Uint64x4) NotEqual(y Uint64x4) Mask64x4 {
+	a, b := x.AsInt64x4(), y.AsInt64x4()
+	ones := x.Equal(x).AsInt64x4()
+	return a.Equal(b).AsInt64x4().Xor(ones).AsMask64x4()
+}
+
 // BroadcastInt8x16 returns a vector with the input
 // x assigned to all elements of the output.
 //
-- 
2.52.0