From 2080415aa2e65dc174d2f8f8876cc16aa9c2b7c3 Mon Sep 17 00:00:00 2001 From: David Chase Date: Fri, 1 Aug 2025 09:23:45 -0400 Subject: [PATCH] [dev.simd] simd: add emulations for missing AVX2 comparisons this also removes AVX512 versions of the operations that would use the same names, but not run on AVX2-only includes files generated by simdgen CL 692355 Change-Id: Iff29042245b7688133fed49a03e681e85235b8a8 Reviewed-on: https://go-review.googlesource.com/c/go/+/692335 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- src/cmd/compile/internal/amd64/simdssa.go | 16 - .../compile/internal/ssa/_gen/simdAMD64.rules | 72 - .../compile/internal/ssa/_gen/simdAMD64ops.go | 16 - .../internal/ssa/_gen/simdgenericOps.go | 72 - src/cmd/compile/internal/ssa/opGen.go | 704 -------- src/cmd/compile/internal/ssa/rewriteAMD64.go | 1440 ----------------- .../compile/internal/ssagen/simdintrinsics.go | 72 - src/simd/compare_test.go | 166 +- src/simd/genfiles.go | 136 ++ src/simd/ops_amd64.go | 360 ----- src/simd/slice_amd64.go | 636 ++++++++ 11 files changed, 859 insertions(+), 2831 deletions(-) diff --git a/src/cmd/compile/internal/amd64/simdssa.go b/src/cmd/compile/internal/amd64/simdssa.go index 73a947a88a..3ec8b484fb 100644 --- a/src/cmd/compile/internal/amd64/simdssa.go +++ b/src/cmd/compile/internal/amd64/simdssa.go @@ -886,29 +886,13 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { case ssa.OpAMD64VCMPPS512, ssa.OpAMD64VCMPPD512, - ssa.OpAMD64VPCMPUB128, - ssa.OpAMD64VPCMPUB256, ssa.OpAMD64VPCMPUB512, - ssa.OpAMD64VPCMPUW128, - ssa.OpAMD64VPCMPUW256, ssa.OpAMD64VPCMPUW512, - ssa.OpAMD64VPCMPUD128, - ssa.OpAMD64VPCMPUD256, ssa.OpAMD64VPCMPUD512, - ssa.OpAMD64VPCMPUQ128, - ssa.OpAMD64VPCMPUQ256, ssa.OpAMD64VPCMPUQ512, - ssa.OpAMD64VPCMPB128, - ssa.OpAMD64VPCMPB256, ssa.OpAMD64VPCMPB512, - ssa.OpAMD64VPCMPW128, - ssa.OpAMD64VPCMPW256, ssa.OpAMD64VPCMPW512, - ssa.OpAMD64VPCMPD128, - ssa.OpAMD64VPCMPD256, ssa.OpAMD64VPCMPD512, - ssa.OpAMD64VPCMPQ128, - ssa.OpAMD64VPCMPQ256, ssa.OpAMD64VPCMPQ512: p = simdV2kImm8(s, v) diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules index e7c5a1a97d..9670f035ba 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules @@ -590,17 +590,9 @@ (GreaterInt64x2 ...) => (VPCMPGTQ128 ...) (GreaterInt64x4 ...) => (VPCMPGTQ256 ...) (GreaterInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPGTQ512 x y)) -(GreaterUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [14] x y)) -(GreaterUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [14] x y)) (GreaterUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [14] x y)) -(GreaterUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [14] x y)) -(GreaterUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [14] x y)) (GreaterUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [14] x y)) -(GreaterUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [14] x y)) -(GreaterUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [14] x y)) (GreaterUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [14] x y)) -(GreaterUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [14] x y)) -(GreaterUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [14] x y)) (GreaterUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [14] x y)) (GreaterEqualFloat32x4 x y) => (VCMPPS128 [13] x y) (GreaterEqualFloat32x8 x y) => (VCMPPS256 [13] x y) @@ -608,29 +600,13 @@ (GreaterEqualFloat64x2 x y) => (VCMPPD128 [13] x y) (GreaterEqualFloat64x4 x y) => (VCMPPD256 [13] x y) (GreaterEqualFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [13] x y)) -(GreaterEqualInt8x16 x y) => (VPMOVMToVec8x16 (VPCMPB128 [13] x y)) -(GreaterEqualInt8x32 x y) => (VPMOVMToVec8x32 (VPCMPB256 [13] x y)) (GreaterEqualInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPB512 [13] x y)) -(GreaterEqualInt16x8 x y) => (VPMOVMToVec16x8 (VPCMPW128 [13] x y)) -(GreaterEqualInt16x16 x y) => (VPMOVMToVec16x16 (VPCMPW256 [13] x y)) (GreaterEqualInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [13] x y)) -(GreaterEqualInt32x4 x y) => (VPMOVMToVec32x4 (VPCMPD128 [13] x y)) -(GreaterEqualInt32x8 x y) => (VPMOVMToVec32x8 (VPCMPD256 [13] x y)) (GreaterEqualInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [13] x y)) -(GreaterEqualInt64x2 x y) => (VPMOVMToVec64x2 (VPCMPQ128 [13] x y)) -(GreaterEqualInt64x4 x y) => (VPMOVMToVec64x4 (VPCMPQ256 [13] x y)) (GreaterEqualInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [13] x y)) -(GreaterEqualUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [13] x y)) -(GreaterEqualUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [13] x y)) (GreaterEqualUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [13] x y)) -(GreaterEqualUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [13] x y)) -(GreaterEqualUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [13] x y)) (GreaterEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [13] x y)) -(GreaterEqualUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [13] x y)) -(GreaterEqualUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [13] x y)) (GreaterEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [13] x y)) -(GreaterEqualUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [13] x y)) -(GreaterEqualUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [13] x y)) (GreaterEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [13] x y)) (GreaterEqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [13] x y (VPMOVVec32x4ToM mask))) (GreaterEqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [13] x y (VPMOVVec32x8ToM mask))) @@ -710,29 +686,13 @@ (LessFloat64x2 x y) => (VCMPPD128 [1] x y) (LessFloat64x4 x y) => (VCMPPD256 [1] x y) (LessFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [1] x y)) -(LessInt8x16 x y) => (VPMOVMToVec8x16 (VPCMPB128 [1] x y)) -(LessInt8x32 x y) => (VPMOVMToVec8x32 (VPCMPB256 [1] x y)) (LessInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPB512 [1] x y)) -(LessInt16x8 x y) => (VPMOVMToVec16x8 (VPCMPW128 [1] x y)) -(LessInt16x16 x y) => (VPMOVMToVec16x16 (VPCMPW256 [1] x y)) (LessInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [1] x y)) -(LessInt32x4 x y) => (VPMOVMToVec32x4 (VPCMPD128 [1] x y)) -(LessInt32x8 x y) => (VPMOVMToVec32x8 (VPCMPD256 [1] x y)) (LessInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [1] x y)) -(LessInt64x2 x y) => (VPMOVMToVec64x2 (VPCMPQ128 [1] x y)) -(LessInt64x4 x y) => (VPMOVMToVec64x4 (VPCMPQ256 [1] x y)) (LessInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [1] x y)) -(LessUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [1] x y)) -(LessUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [1] x y)) (LessUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [1] x y)) -(LessUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [1] x y)) -(LessUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [1] x y)) (LessUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [1] x y)) -(LessUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [1] x y)) -(LessUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [1] x y)) (LessUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [1] x y)) -(LessUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [1] x y)) -(LessUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [1] x y)) (LessUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [1] x y)) (LessEqualFloat32x4 x y) => (VCMPPS128 [2] x y) (LessEqualFloat32x8 x y) => (VCMPPS256 [2] x y) @@ -740,29 +700,13 @@ (LessEqualFloat64x2 x y) => (VCMPPD128 [2] x y) (LessEqualFloat64x4 x y) => (VCMPPD256 [2] x y) (LessEqualFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [2] x y)) -(LessEqualInt8x16 x y) => (VPMOVMToVec8x16 (VPCMPB128 [2] x y)) -(LessEqualInt8x32 x y) => (VPMOVMToVec8x32 (VPCMPB256 [2] x y)) (LessEqualInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPB512 [2] x y)) -(LessEqualInt16x8 x y) => (VPMOVMToVec16x8 (VPCMPW128 [2] x y)) -(LessEqualInt16x16 x y) => (VPMOVMToVec16x16 (VPCMPW256 [2] x y)) (LessEqualInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [2] x y)) -(LessEqualInt32x4 x y) => (VPMOVMToVec32x4 (VPCMPD128 [2] x y)) -(LessEqualInt32x8 x y) => (VPMOVMToVec32x8 (VPCMPD256 [2] x y)) (LessEqualInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [2] x y)) -(LessEqualInt64x2 x y) => (VPMOVMToVec64x2 (VPCMPQ128 [2] x y)) -(LessEqualInt64x4 x y) => (VPMOVMToVec64x4 (VPCMPQ256 [2] x y)) (LessEqualInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [2] x y)) -(LessEqualUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [2] x y)) -(LessEqualUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [2] x y)) (LessEqualUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [2] x y)) -(LessEqualUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [2] x y)) -(LessEqualUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [2] x y)) (LessEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [2] x y)) -(LessEqualUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [2] x y)) -(LessEqualUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [2] x y)) (LessEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [2] x y)) -(LessEqualUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [2] x y)) -(LessEqualUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [2] x y)) (LessEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [2] x y)) (LessEqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [2] x y (VPMOVVec32x4ToM mask))) (LessEqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [2] x y (VPMOVVec32x8ToM mask))) @@ -1050,29 +994,13 @@ (NotEqualFloat64x2 x y) => (VCMPPD128 [4] x y) (NotEqualFloat64x4 x y) => (VCMPPD256 [4] x y) (NotEqualFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [4] x y)) -(NotEqualInt8x16 x y) => (VPMOVMToVec8x16 (VPCMPB128 [4] x y)) -(NotEqualInt8x32 x y) => (VPMOVMToVec8x32 (VPCMPB256 [4] x y)) (NotEqualInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPB512 [4] x y)) -(NotEqualInt16x8 x y) => (VPMOVMToVec16x8 (VPCMPW128 [4] x y)) -(NotEqualInt16x16 x y) => (VPMOVMToVec16x16 (VPCMPW256 [4] x y)) (NotEqualInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [4] x y)) -(NotEqualInt32x4 x y) => (VPMOVMToVec32x4 (VPCMPD128 [4] x y)) -(NotEqualInt32x8 x y) => (VPMOVMToVec32x8 (VPCMPD256 [4] x y)) (NotEqualInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [4] x y)) -(NotEqualInt64x2 x y) => (VPMOVMToVec64x2 (VPCMPQ128 [4] x y)) -(NotEqualInt64x4 x y) => (VPMOVMToVec64x4 (VPCMPQ256 [4] x y)) (NotEqualInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [4] x y)) -(NotEqualUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [4] x y)) -(NotEqualUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [4] x y)) (NotEqualUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [4] x y)) -(NotEqualUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [4] x y)) -(NotEqualUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [4] x y)) (NotEqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [4] x y)) -(NotEqualUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [4] x y)) -(NotEqualUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [4] x y)) (NotEqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [4] x y)) -(NotEqualUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [4] x y)) -(NotEqualUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [4] x y)) (NotEqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [4] x y)) (NotEqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [4] x y (VPMOVVec32x4ToM mask))) (NotEqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [4] x y (VPMOVVec32x8ToM mask))) diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go index 5d388a4531..61abaa5e97 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go @@ -986,29 +986,13 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VEXTRACTF64X4256", argLength: 1, reg: w11, asm: "VEXTRACTF64X4", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VEXTRACTI128128", argLength: 1, reg: v11, asm: "VEXTRACTI128", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VEXTRACTI64X4256", argLength: 1, reg: w11, asm: "VEXTRACTI64X4", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VPCMPUB128", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPUB256", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPUB512", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPUW128", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPUW256", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPUW512", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPUD128", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPUD256", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPUD512", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPUQ128", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPUQ256", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPUQ512", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPB128", argLength: 2, reg: w2k, asm: "VPCMPB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPB256", argLength: 2, reg: w2k, asm: "VPCMPB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPB512", argLength: 2, reg: w2k, asm: "VPCMPB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPW128", argLength: 2, reg: w2k, asm: "VPCMPW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPW256", argLength: 2, reg: w2k, asm: "VPCMPW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPW512", argLength: 2, reg: w2k, asm: "VPCMPW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPD128", argLength: 2, reg: w2k, asm: "VPCMPD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPD256", argLength: 2, reg: w2k, asm: "VPCMPD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPD512", argLength: 2, reg: w2k, asm: "VPCMPD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPQ128", argLength: 2, reg: w2k, asm: "VPCMPQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false}, - {name: "VPCMPQ256", argLength: 2, reg: w2k, asm: "VPCMPQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPQ512", argLength: 2, reg: w2k, asm: "VPCMPQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPROLD128", argLength: 1, reg: w11, asm: "VPROLD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPROLD256", argLength: 1, reg: w11, asm: "VPROLD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false}, diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go index f120dcddd0..4f2b1a9121 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go +++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go @@ -514,17 +514,9 @@ func simdGenericOps() []opData { {name: "GreaterEqualFloat64x2", argLength: 2, commutative: false}, {name: "GreaterEqualFloat64x4", argLength: 2, commutative: false}, {name: "GreaterEqualFloat64x8", argLength: 2, commutative: false}, - {name: "GreaterEqualInt8x16", argLength: 2, commutative: false}, - {name: "GreaterEqualInt8x32", argLength: 2, commutative: false}, {name: "GreaterEqualInt8x64", argLength: 2, commutative: false}, - {name: "GreaterEqualInt16x8", argLength: 2, commutative: false}, - {name: "GreaterEqualInt16x16", argLength: 2, commutative: false}, {name: "GreaterEqualInt16x32", argLength: 2, commutative: false}, - {name: "GreaterEqualInt32x4", argLength: 2, commutative: false}, - {name: "GreaterEqualInt32x8", argLength: 2, commutative: false}, {name: "GreaterEqualInt32x16", argLength: 2, commutative: false}, - {name: "GreaterEqualInt64x2", argLength: 2, commutative: false}, - {name: "GreaterEqualInt64x4", argLength: 2, commutative: false}, {name: "GreaterEqualInt64x8", argLength: 2, commutative: false}, {name: "GreaterEqualMaskedFloat32x4", argLength: 3, commutative: false}, {name: "GreaterEqualMaskedFloat32x8", argLength: 3, commutative: false}, @@ -556,17 +548,9 @@ func simdGenericOps() []opData { {name: "GreaterEqualMaskedUint64x2", argLength: 3, commutative: false}, {name: "GreaterEqualMaskedUint64x4", argLength: 3, commutative: false}, {name: "GreaterEqualMaskedUint64x8", argLength: 3, commutative: false}, - {name: "GreaterEqualUint8x16", argLength: 2, commutative: false}, - {name: "GreaterEqualUint8x32", argLength: 2, commutative: false}, {name: "GreaterEqualUint8x64", argLength: 2, commutative: false}, - {name: "GreaterEqualUint16x8", argLength: 2, commutative: false}, - {name: "GreaterEqualUint16x16", argLength: 2, commutative: false}, {name: "GreaterEqualUint16x32", argLength: 2, commutative: false}, - {name: "GreaterEqualUint32x4", argLength: 2, commutative: false}, - {name: "GreaterEqualUint32x8", argLength: 2, commutative: false}, {name: "GreaterEqualUint32x16", argLength: 2, commutative: false}, - {name: "GreaterEqualUint64x2", argLength: 2, commutative: false}, - {name: "GreaterEqualUint64x4", argLength: 2, commutative: false}, {name: "GreaterEqualUint64x8", argLength: 2, commutative: false}, {name: "GreaterFloat32x4", argLength: 2, commutative: false}, {name: "GreaterFloat32x8", argLength: 2, commutative: false}, @@ -616,17 +600,9 @@ func simdGenericOps() []opData { {name: "GreaterMaskedUint64x2", argLength: 3, commutative: false}, {name: "GreaterMaskedUint64x4", argLength: 3, commutative: false}, {name: "GreaterMaskedUint64x8", argLength: 3, commutative: false}, - {name: "GreaterUint8x16", argLength: 2, commutative: false}, - {name: "GreaterUint8x32", argLength: 2, commutative: false}, {name: "GreaterUint8x64", argLength: 2, commutative: false}, - {name: "GreaterUint16x8", argLength: 2, commutative: false}, - {name: "GreaterUint16x16", argLength: 2, commutative: false}, {name: "GreaterUint16x32", argLength: 2, commutative: false}, - {name: "GreaterUint32x4", argLength: 2, commutative: false}, - {name: "GreaterUint32x8", argLength: 2, commutative: false}, {name: "GreaterUint32x16", argLength: 2, commutative: false}, - {name: "GreaterUint64x2", argLength: 2, commutative: false}, - {name: "GreaterUint64x4", argLength: 2, commutative: false}, {name: "GreaterUint64x8", argLength: 2, commutative: false}, {name: "IsNanFloat32x4", argLength: 2, commutative: true}, {name: "IsNanFloat32x8", argLength: 2, commutative: true}, @@ -646,17 +622,9 @@ func simdGenericOps() []opData { {name: "LessEqualFloat64x2", argLength: 2, commutative: false}, {name: "LessEqualFloat64x4", argLength: 2, commutative: false}, {name: "LessEqualFloat64x8", argLength: 2, commutative: false}, - {name: "LessEqualInt8x16", argLength: 2, commutative: false}, - {name: "LessEqualInt8x32", argLength: 2, commutative: false}, {name: "LessEqualInt8x64", argLength: 2, commutative: false}, - {name: "LessEqualInt16x8", argLength: 2, commutative: false}, - {name: "LessEqualInt16x16", argLength: 2, commutative: false}, {name: "LessEqualInt16x32", argLength: 2, commutative: false}, - {name: "LessEqualInt32x4", argLength: 2, commutative: false}, - {name: "LessEqualInt32x8", argLength: 2, commutative: false}, {name: "LessEqualInt32x16", argLength: 2, commutative: false}, - {name: "LessEqualInt64x2", argLength: 2, commutative: false}, - {name: "LessEqualInt64x4", argLength: 2, commutative: false}, {name: "LessEqualInt64x8", argLength: 2, commutative: false}, {name: "LessEqualMaskedFloat32x4", argLength: 3, commutative: false}, {name: "LessEqualMaskedFloat32x8", argLength: 3, commutative: false}, @@ -688,17 +656,9 @@ func simdGenericOps() []opData { {name: "LessEqualMaskedUint64x2", argLength: 3, commutative: false}, {name: "LessEqualMaskedUint64x4", argLength: 3, commutative: false}, {name: "LessEqualMaskedUint64x8", argLength: 3, commutative: false}, - {name: "LessEqualUint8x16", argLength: 2, commutative: false}, - {name: "LessEqualUint8x32", argLength: 2, commutative: false}, {name: "LessEqualUint8x64", argLength: 2, commutative: false}, - {name: "LessEqualUint16x8", argLength: 2, commutative: false}, - {name: "LessEqualUint16x16", argLength: 2, commutative: false}, {name: "LessEqualUint16x32", argLength: 2, commutative: false}, - {name: "LessEqualUint32x4", argLength: 2, commutative: false}, - {name: "LessEqualUint32x8", argLength: 2, commutative: false}, {name: "LessEqualUint32x16", argLength: 2, commutative: false}, - {name: "LessEqualUint64x2", argLength: 2, commutative: false}, - {name: "LessEqualUint64x4", argLength: 2, commutative: false}, {name: "LessEqualUint64x8", argLength: 2, commutative: false}, {name: "LessFloat32x4", argLength: 2, commutative: false}, {name: "LessFloat32x8", argLength: 2, commutative: false}, @@ -706,17 +666,9 @@ func simdGenericOps() []opData { {name: "LessFloat64x2", argLength: 2, commutative: false}, {name: "LessFloat64x4", argLength: 2, commutative: false}, {name: "LessFloat64x8", argLength: 2, commutative: false}, - {name: "LessInt8x16", argLength: 2, commutative: false}, - {name: "LessInt8x32", argLength: 2, commutative: false}, {name: "LessInt8x64", argLength: 2, commutative: false}, - {name: "LessInt16x8", argLength: 2, commutative: false}, - {name: "LessInt16x16", argLength: 2, commutative: false}, {name: "LessInt16x32", argLength: 2, commutative: false}, - {name: "LessInt32x4", argLength: 2, commutative: false}, - {name: "LessInt32x8", argLength: 2, commutative: false}, {name: "LessInt32x16", argLength: 2, commutative: false}, - {name: "LessInt64x2", argLength: 2, commutative: false}, - {name: "LessInt64x4", argLength: 2, commutative: false}, {name: "LessInt64x8", argLength: 2, commutative: false}, {name: "LessMaskedFloat32x4", argLength: 3, commutative: false}, {name: "LessMaskedFloat32x8", argLength: 3, commutative: false}, @@ -748,17 +700,9 @@ func simdGenericOps() []opData { {name: "LessMaskedUint64x2", argLength: 3, commutative: false}, {name: "LessMaskedUint64x4", argLength: 3, commutative: false}, {name: "LessMaskedUint64x8", argLength: 3, commutative: false}, - {name: "LessUint8x16", argLength: 2, commutative: false}, - {name: "LessUint8x32", argLength: 2, commutative: false}, {name: "LessUint8x64", argLength: 2, commutative: false}, - {name: "LessUint16x8", argLength: 2, commutative: false}, - {name: "LessUint16x16", argLength: 2, commutative: false}, {name: "LessUint16x32", argLength: 2, commutative: false}, - {name: "LessUint32x4", argLength: 2, commutative: false}, - {name: "LessUint32x8", argLength: 2, commutative: false}, {name: "LessUint32x16", argLength: 2, commutative: false}, - {name: "LessUint64x2", argLength: 2, commutative: false}, - {name: "LessUint64x4", argLength: 2, commutative: false}, {name: "LessUint64x8", argLength: 2, commutative: false}, {name: "MaxFloat32x4", argLength: 2, commutative: true}, {name: "MaxFloat32x8", argLength: 2, commutative: true}, @@ -986,17 +930,9 @@ func simdGenericOps() []opData { {name: "NotEqualFloat64x2", argLength: 2, commutative: true}, {name: "NotEqualFloat64x4", argLength: 2, commutative: true}, {name: "NotEqualFloat64x8", argLength: 2, commutative: true}, - {name: "NotEqualInt8x16", argLength: 2, commutative: true}, - {name: "NotEqualInt8x32", argLength: 2, commutative: true}, {name: "NotEqualInt8x64", argLength: 2, commutative: true}, - {name: "NotEqualInt16x8", argLength: 2, commutative: true}, - {name: "NotEqualInt16x16", argLength: 2, commutative: true}, {name: "NotEqualInt16x32", argLength: 2, commutative: true}, - {name: "NotEqualInt32x4", argLength: 2, commutative: true}, - {name: "NotEqualInt32x8", argLength: 2, commutative: true}, {name: "NotEqualInt32x16", argLength: 2, commutative: true}, - {name: "NotEqualInt64x2", argLength: 2, commutative: true}, - {name: "NotEqualInt64x4", argLength: 2, commutative: true}, {name: "NotEqualInt64x8", argLength: 2, commutative: true}, {name: "NotEqualMaskedFloat32x4", argLength: 3, commutative: true}, {name: "NotEqualMaskedFloat32x8", argLength: 3, commutative: true}, @@ -1028,17 +964,9 @@ func simdGenericOps() []opData { {name: "NotEqualMaskedUint64x2", argLength: 3, commutative: true}, {name: "NotEqualMaskedUint64x4", argLength: 3, commutative: true}, {name: "NotEqualMaskedUint64x8", argLength: 3, commutative: true}, - {name: "NotEqualUint8x16", argLength: 2, commutative: true}, - {name: "NotEqualUint8x32", argLength: 2, commutative: true}, {name: "NotEqualUint8x64", argLength: 2, commutative: true}, - {name: "NotEqualUint16x8", argLength: 2, commutative: true}, - {name: "NotEqualUint16x16", argLength: 2, commutative: true}, {name: "NotEqualUint16x32", argLength: 2, commutative: true}, - {name: "NotEqualUint32x4", argLength: 2, commutative: true}, - {name: "NotEqualUint32x8", argLength: 2, commutative: true}, {name: "NotEqualUint32x16", argLength: 2, commutative: true}, - {name: "NotEqualUint64x2", argLength: 2, commutative: true}, - {name: "NotEqualUint64x4", argLength: 2, commutative: true}, {name: "NotEqualUint64x8", argLength: 2, commutative: true}, {name: "OnesCountInt8x16", argLength: 1, commutative: false}, {name: "OnesCountInt8x32", argLength: 1, commutative: false}, diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 6e0ffd1540..7bcbf1b615 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -2208,29 +2208,13 @@ const ( OpAMD64VEXTRACTF64X4256 OpAMD64VEXTRACTI128128 OpAMD64VEXTRACTI64X4256 - OpAMD64VPCMPUB128 - OpAMD64VPCMPUB256 OpAMD64VPCMPUB512 - OpAMD64VPCMPUW128 - OpAMD64VPCMPUW256 OpAMD64VPCMPUW512 - OpAMD64VPCMPUD128 - OpAMD64VPCMPUD256 OpAMD64VPCMPUD512 - OpAMD64VPCMPUQ128 - OpAMD64VPCMPUQ256 OpAMD64VPCMPUQ512 - OpAMD64VPCMPB128 - OpAMD64VPCMPB256 OpAMD64VPCMPB512 - OpAMD64VPCMPW128 - OpAMD64VPCMPW256 OpAMD64VPCMPW512 - OpAMD64VPCMPD128 - OpAMD64VPCMPD256 OpAMD64VPCMPD512 - OpAMD64VPCMPQ128 - OpAMD64VPCMPQ256 OpAMD64VPCMPQ512 OpAMD64VPROLD128 OpAMD64VPROLD256 @@ -5155,17 +5139,9 @@ const ( OpGreaterEqualFloat64x2 OpGreaterEqualFloat64x4 OpGreaterEqualFloat64x8 - OpGreaterEqualInt8x16 - OpGreaterEqualInt8x32 OpGreaterEqualInt8x64 - OpGreaterEqualInt16x8 - OpGreaterEqualInt16x16 OpGreaterEqualInt16x32 - OpGreaterEqualInt32x4 - OpGreaterEqualInt32x8 OpGreaterEqualInt32x16 - OpGreaterEqualInt64x2 - OpGreaterEqualInt64x4 OpGreaterEqualInt64x8 OpGreaterEqualMaskedFloat32x4 OpGreaterEqualMaskedFloat32x8 @@ -5197,17 +5173,9 @@ const ( OpGreaterEqualMaskedUint64x2 OpGreaterEqualMaskedUint64x4 OpGreaterEqualMaskedUint64x8 - OpGreaterEqualUint8x16 - OpGreaterEqualUint8x32 OpGreaterEqualUint8x64 - OpGreaterEqualUint16x8 - OpGreaterEqualUint16x16 OpGreaterEqualUint16x32 - OpGreaterEqualUint32x4 - OpGreaterEqualUint32x8 OpGreaterEqualUint32x16 - OpGreaterEqualUint64x2 - OpGreaterEqualUint64x4 OpGreaterEqualUint64x8 OpGreaterFloat32x4 OpGreaterFloat32x8 @@ -5257,17 +5225,9 @@ const ( OpGreaterMaskedUint64x2 OpGreaterMaskedUint64x4 OpGreaterMaskedUint64x8 - OpGreaterUint8x16 - OpGreaterUint8x32 OpGreaterUint8x64 - OpGreaterUint16x8 - OpGreaterUint16x16 OpGreaterUint16x32 - OpGreaterUint32x4 - OpGreaterUint32x8 OpGreaterUint32x16 - OpGreaterUint64x2 - OpGreaterUint64x4 OpGreaterUint64x8 OpIsNanFloat32x4 OpIsNanFloat32x8 @@ -5287,17 +5247,9 @@ const ( OpLessEqualFloat64x2 OpLessEqualFloat64x4 OpLessEqualFloat64x8 - OpLessEqualInt8x16 - OpLessEqualInt8x32 OpLessEqualInt8x64 - OpLessEqualInt16x8 - OpLessEqualInt16x16 OpLessEqualInt16x32 - OpLessEqualInt32x4 - OpLessEqualInt32x8 OpLessEqualInt32x16 - OpLessEqualInt64x2 - OpLessEqualInt64x4 OpLessEqualInt64x8 OpLessEqualMaskedFloat32x4 OpLessEqualMaskedFloat32x8 @@ -5329,17 +5281,9 @@ const ( OpLessEqualMaskedUint64x2 OpLessEqualMaskedUint64x4 OpLessEqualMaskedUint64x8 - OpLessEqualUint8x16 - OpLessEqualUint8x32 OpLessEqualUint8x64 - OpLessEqualUint16x8 - OpLessEqualUint16x16 OpLessEqualUint16x32 - OpLessEqualUint32x4 - OpLessEqualUint32x8 OpLessEqualUint32x16 - OpLessEqualUint64x2 - OpLessEqualUint64x4 OpLessEqualUint64x8 OpLessFloat32x4 OpLessFloat32x8 @@ -5347,17 +5291,9 @@ const ( OpLessFloat64x2 OpLessFloat64x4 OpLessFloat64x8 - OpLessInt8x16 - OpLessInt8x32 OpLessInt8x64 - OpLessInt16x8 - OpLessInt16x16 OpLessInt16x32 - OpLessInt32x4 - OpLessInt32x8 OpLessInt32x16 - OpLessInt64x2 - OpLessInt64x4 OpLessInt64x8 OpLessMaskedFloat32x4 OpLessMaskedFloat32x8 @@ -5389,17 +5325,9 @@ const ( OpLessMaskedUint64x2 OpLessMaskedUint64x4 OpLessMaskedUint64x8 - OpLessUint8x16 - OpLessUint8x32 OpLessUint8x64 - OpLessUint16x8 - OpLessUint16x16 OpLessUint16x32 - OpLessUint32x4 - OpLessUint32x8 OpLessUint32x16 - OpLessUint64x2 - OpLessUint64x4 OpLessUint64x8 OpMaxFloat32x4 OpMaxFloat32x8 @@ -5627,17 +5555,9 @@ const ( OpNotEqualFloat64x2 OpNotEqualFloat64x4 OpNotEqualFloat64x8 - OpNotEqualInt8x16 - OpNotEqualInt8x32 OpNotEqualInt8x64 - OpNotEqualInt16x8 - OpNotEqualInt16x16 OpNotEqualInt16x32 - OpNotEqualInt32x4 - OpNotEqualInt32x8 OpNotEqualInt32x16 - OpNotEqualInt64x2 - OpNotEqualInt64x4 OpNotEqualInt64x8 OpNotEqualMaskedFloat32x4 OpNotEqualMaskedFloat32x8 @@ -5669,17 +5589,9 @@ const ( OpNotEqualMaskedUint64x2 OpNotEqualMaskedUint64x4 OpNotEqualMaskedUint64x8 - OpNotEqualUint8x16 - OpNotEqualUint8x32 OpNotEqualUint8x64 - OpNotEqualUint16x8 - OpNotEqualUint16x16 OpNotEqualUint16x32 - OpNotEqualUint32x4 - OpNotEqualUint32x8 OpNotEqualUint32x16 - OpNotEqualUint64x2 - OpNotEqualUint64x4 OpNotEqualUint64x8 OpOnesCountInt8x16 OpOnesCountInt8x32 @@ -34328,36 +34240,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPCMPUB128", - auxType: auxUInt8, - argLen: 2, - asm: x86.AVPCMPUB, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, - { - name: "VPCMPUB256", - auxType: auxUInt8, - argLen: 2, - asm: x86.AVPCMPUB, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, { name: "VPCMPUB512", auxType: auxUInt8, @@ -34373,36 +34255,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPCMPUW128", - auxType: auxUInt8, - argLen: 2, - asm: x86.AVPCMPUW, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, - { - name: "VPCMPUW256", - auxType: auxUInt8, - argLen: 2, - asm: x86.AVPCMPUW, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, { name: "VPCMPUW512", auxType: auxUInt8, @@ -34418,36 +34270,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPCMPUD128", - auxType: auxUInt8, - argLen: 2, - asm: x86.AVPCMPUD, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, - { - name: "VPCMPUD256", - auxType: auxUInt8, - argLen: 2, - asm: x86.AVPCMPUD, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, { name: "VPCMPUD512", auxType: auxUInt8, @@ -34463,36 +34285,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPCMPUQ128", - auxType: auxUInt8, - argLen: 2, - asm: x86.AVPCMPUQ, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, - { - name: "VPCMPUQ256", - auxType: auxUInt8, - argLen: 2, - asm: x86.AVPCMPUQ, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, { name: "VPCMPUQ512", auxType: auxUInt8, @@ -34508,36 +34300,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPCMPB128", - auxType: auxUInt8, - argLen: 2, - asm: x86.AVPCMPB, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, - { - name: "VPCMPB256", - auxType: auxUInt8, - argLen: 2, - asm: x86.AVPCMPB, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, { name: "VPCMPB512", auxType: auxUInt8, @@ -34553,36 +34315,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPCMPW128", - auxType: auxUInt8, - argLen: 2, - asm: x86.AVPCMPW, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, - { - name: "VPCMPW256", - auxType: auxUInt8, - argLen: 2, - asm: x86.AVPCMPW, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, { name: "VPCMPW512", auxType: auxUInt8, @@ -34598,36 +34330,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPCMPD128", - auxType: auxUInt8, - argLen: 2, - asm: x86.AVPCMPD, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, - { - name: "VPCMPD256", - auxType: auxUInt8, - argLen: 2, - asm: x86.AVPCMPD, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, { name: "VPCMPD512", auxType: auxUInt8, @@ -34643,36 +34345,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPCMPQ128", - auxType: auxUInt8, - argLen: 2, - asm: x86.AVPCMPQ, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, - { - name: "VPCMPQ256", - auxType: auxUInt8, - argLen: 2, - asm: x86.AVPCMPQ, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, { name: "VPCMPQ512", auxType: auxUInt8, @@ -66750,61 +66422,21 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, - { - name: "GreaterEqualInt8x16", - argLen: 2, - generic: true, - }, - { - name: "GreaterEqualInt8x32", - argLen: 2, - generic: true, - }, { name: "GreaterEqualInt8x64", argLen: 2, generic: true, }, - { - name: "GreaterEqualInt16x8", - argLen: 2, - generic: true, - }, - { - name: "GreaterEqualInt16x16", - argLen: 2, - generic: true, - }, { name: "GreaterEqualInt16x32", argLen: 2, generic: true, }, - { - name: "GreaterEqualInt32x4", - argLen: 2, - generic: true, - }, - { - name: "GreaterEqualInt32x8", - argLen: 2, - generic: true, - }, { name: "GreaterEqualInt32x16", argLen: 2, generic: true, }, - { - name: "GreaterEqualInt64x2", - argLen: 2, - generic: true, - }, - { - name: "GreaterEqualInt64x4", - argLen: 2, - generic: true, - }, { name: "GreaterEqualInt64x8", argLen: 2, @@ -66960,61 +66592,21 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, - { - name: "GreaterEqualUint8x16", - argLen: 2, - generic: true, - }, - { - name: "GreaterEqualUint8x32", - argLen: 2, - generic: true, - }, { name: "GreaterEqualUint8x64", argLen: 2, generic: true, }, - { - name: "GreaterEqualUint16x8", - argLen: 2, - generic: true, - }, - { - name: "GreaterEqualUint16x16", - argLen: 2, - generic: true, - }, { name: "GreaterEqualUint16x32", argLen: 2, generic: true, }, - { - name: "GreaterEqualUint32x4", - argLen: 2, - generic: true, - }, - { - name: "GreaterEqualUint32x8", - argLen: 2, - generic: true, - }, { name: "GreaterEqualUint32x16", argLen: 2, generic: true, }, - { - name: "GreaterEqualUint64x2", - argLen: 2, - generic: true, - }, - { - name: "GreaterEqualUint64x4", - argLen: 2, - generic: true, - }, { name: "GreaterEqualUint64x8", argLen: 2, @@ -67260,61 +66852,21 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, - { - name: "GreaterUint8x16", - argLen: 2, - generic: true, - }, - { - name: "GreaterUint8x32", - argLen: 2, - generic: true, - }, { name: "GreaterUint8x64", argLen: 2, generic: true, }, - { - name: "GreaterUint16x8", - argLen: 2, - generic: true, - }, - { - name: "GreaterUint16x16", - argLen: 2, - generic: true, - }, { name: "GreaterUint16x32", argLen: 2, generic: true, }, - { - name: "GreaterUint32x4", - argLen: 2, - generic: true, - }, - { - name: "GreaterUint32x8", - argLen: 2, - generic: true, - }, { name: "GreaterUint32x16", argLen: 2, generic: true, }, - { - name: "GreaterUint64x2", - argLen: 2, - generic: true, - }, - { - name: "GreaterUint64x4", - argLen: 2, - generic: true, - }, { name: "GreaterUint64x8", argLen: 2, @@ -67422,61 +66974,21 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, - { - name: "LessEqualInt8x16", - argLen: 2, - generic: true, - }, - { - name: "LessEqualInt8x32", - argLen: 2, - generic: true, - }, { name: "LessEqualInt8x64", argLen: 2, generic: true, }, - { - name: "LessEqualInt16x8", - argLen: 2, - generic: true, - }, - { - name: "LessEqualInt16x16", - argLen: 2, - generic: true, - }, { name: "LessEqualInt16x32", argLen: 2, generic: true, }, - { - name: "LessEqualInt32x4", - argLen: 2, - generic: true, - }, - { - name: "LessEqualInt32x8", - argLen: 2, - generic: true, - }, { name: "LessEqualInt32x16", argLen: 2, generic: true, }, - { - name: "LessEqualInt64x2", - argLen: 2, - generic: true, - }, - { - name: "LessEqualInt64x4", - argLen: 2, - generic: true, - }, { name: "LessEqualInt64x8", argLen: 2, @@ -67632,61 +67144,21 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, - { - name: "LessEqualUint8x16", - argLen: 2, - generic: true, - }, - { - name: "LessEqualUint8x32", - argLen: 2, - generic: true, - }, { name: "LessEqualUint8x64", argLen: 2, generic: true, }, - { - name: "LessEqualUint16x8", - argLen: 2, - generic: true, - }, - { - name: "LessEqualUint16x16", - argLen: 2, - generic: true, - }, { name: "LessEqualUint16x32", argLen: 2, generic: true, }, - { - name: "LessEqualUint32x4", - argLen: 2, - generic: true, - }, - { - name: "LessEqualUint32x8", - argLen: 2, - generic: true, - }, { name: "LessEqualUint32x16", argLen: 2, generic: true, }, - { - name: "LessEqualUint64x2", - argLen: 2, - generic: true, - }, - { - name: "LessEqualUint64x4", - argLen: 2, - generic: true, - }, { name: "LessEqualUint64x8", argLen: 2, @@ -67722,61 +67194,21 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, - { - name: "LessInt8x16", - argLen: 2, - generic: true, - }, - { - name: "LessInt8x32", - argLen: 2, - generic: true, - }, { name: "LessInt8x64", argLen: 2, generic: true, }, - { - name: "LessInt16x8", - argLen: 2, - generic: true, - }, - { - name: "LessInt16x16", - argLen: 2, - generic: true, - }, { name: "LessInt16x32", argLen: 2, generic: true, }, - { - name: "LessInt32x4", - argLen: 2, - generic: true, - }, - { - name: "LessInt32x8", - argLen: 2, - generic: true, - }, { name: "LessInt32x16", argLen: 2, generic: true, }, - { - name: "LessInt64x2", - argLen: 2, - generic: true, - }, - { - name: "LessInt64x4", - argLen: 2, - generic: true, - }, { name: "LessInt64x8", argLen: 2, @@ -67932,61 +67364,21 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, - { - name: "LessUint8x16", - argLen: 2, - generic: true, - }, - { - name: "LessUint8x32", - argLen: 2, - generic: true, - }, { name: "LessUint8x64", argLen: 2, generic: true, }, - { - name: "LessUint16x8", - argLen: 2, - generic: true, - }, - { - name: "LessUint16x16", - argLen: 2, - generic: true, - }, { name: "LessUint16x32", argLen: 2, generic: true, }, - { - name: "LessUint32x4", - argLen: 2, - generic: true, - }, - { - name: "LessUint32x8", - argLen: 2, - generic: true, - }, { name: "LessUint32x16", argLen: 2, generic: true, }, - { - name: "LessUint64x2", - argLen: 2, - generic: true, - }, - { - name: "LessUint64x4", - argLen: 2, - generic: true, - }, { name: "LessUint64x8", argLen: 2, @@ -69312,72 +68704,24 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, - { - name: "NotEqualInt8x16", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "NotEqualInt8x32", - argLen: 2, - commutative: true, - generic: true, - }, { name: "NotEqualInt8x64", argLen: 2, commutative: true, generic: true, }, - { - name: "NotEqualInt16x8", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "NotEqualInt16x16", - argLen: 2, - commutative: true, - generic: true, - }, { name: "NotEqualInt16x32", argLen: 2, commutative: true, generic: true, }, - { - name: "NotEqualInt32x4", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "NotEqualInt32x8", - argLen: 2, - commutative: true, - generic: true, - }, { name: "NotEqualInt32x16", argLen: 2, commutative: true, generic: true, }, - { - name: "NotEqualInt64x2", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "NotEqualInt64x4", - argLen: 2, - commutative: true, - generic: true, - }, { name: "NotEqualInt64x8", argLen: 2, @@ -69564,72 +68908,24 @@ var opcodeTable = [...]opInfo{ commutative: true, generic: true, }, - { - name: "NotEqualUint8x16", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "NotEqualUint8x32", - argLen: 2, - commutative: true, - generic: true, - }, { name: "NotEqualUint8x64", argLen: 2, commutative: true, generic: true, }, - { - name: "NotEqualUint16x8", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "NotEqualUint16x16", - argLen: 2, - commutative: true, - generic: true, - }, { name: "NotEqualUint16x32", argLen: 2, commutative: true, generic: true, }, - { - name: "NotEqualUint32x4", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "NotEqualUint32x8", - argLen: 2, - commutative: true, - generic: true, - }, { name: "NotEqualUint32x16", argLen: 2, commutative: true, generic: true, }, - { - name: "NotEqualUint64x2", - argLen: 2, - commutative: true, - generic: true, - }, - { - name: "NotEqualUint64x4", - argLen: 2, - commutative: true, - generic: true, - }, { name: "NotEqualUint64x8", argLen: 2, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 0bdc0e63b7..0e2e2311f0 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -2304,28 +2304,12 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpGreaterEqualFloat64x4(v) case OpGreaterEqualFloat64x8: return rewriteValueAMD64_OpGreaterEqualFloat64x8(v) - case OpGreaterEqualInt16x16: - return rewriteValueAMD64_OpGreaterEqualInt16x16(v) case OpGreaterEqualInt16x32: return rewriteValueAMD64_OpGreaterEqualInt16x32(v) - case OpGreaterEqualInt16x8: - return rewriteValueAMD64_OpGreaterEqualInt16x8(v) case OpGreaterEqualInt32x16: return rewriteValueAMD64_OpGreaterEqualInt32x16(v) - case OpGreaterEqualInt32x4: - return rewriteValueAMD64_OpGreaterEqualInt32x4(v) - case OpGreaterEqualInt32x8: - return rewriteValueAMD64_OpGreaterEqualInt32x8(v) - case OpGreaterEqualInt64x2: - return rewriteValueAMD64_OpGreaterEqualInt64x2(v) - case OpGreaterEqualInt64x4: - return rewriteValueAMD64_OpGreaterEqualInt64x4(v) case OpGreaterEqualInt64x8: return rewriteValueAMD64_OpGreaterEqualInt64x8(v) - case OpGreaterEqualInt8x16: - return rewriteValueAMD64_OpGreaterEqualInt8x16(v) - case OpGreaterEqualInt8x32: - return rewriteValueAMD64_OpGreaterEqualInt8x32(v) case OpGreaterEqualInt8x64: return rewriteValueAMD64_OpGreaterEqualInt8x64(v) case OpGreaterEqualMaskedFloat32x16: @@ -2388,28 +2372,12 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpGreaterEqualMaskedUint8x32(v) case OpGreaterEqualMaskedUint8x64: return rewriteValueAMD64_OpGreaterEqualMaskedUint8x64(v) - case OpGreaterEqualUint16x16: - return rewriteValueAMD64_OpGreaterEqualUint16x16(v) case OpGreaterEqualUint16x32: return rewriteValueAMD64_OpGreaterEqualUint16x32(v) - case OpGreaterEqualUint16x8: - return rewriteValueAMD64_OpGreaterEqualUint16x8(v) case OpGreaterEqualUint32x16: return rewriteValueAMD64_OpGreaterEqualUint32x16(v) - case OpGreaterEqualUint32x4: - return rewriteValueAMD64_OpGreaterEqualUint32x4(v) - case OpGreaterEqualUint32x8: - return rewriteValueAMD64_OpGreaterEqualUint32x8(v) - case OpGreaterEqualUint64x2: - return rewriteValueAMD64_OpGreaterEqualUint64x2(v) - case OpGreaterEqualUint64x4: - return rewriteValueAMD64_OpGreaterEqualUint64x4(v) case OpGreaterEqualUint64x8: return rewriteValueAMD64_OpGreaterEqualUint64x8(v) - case OpGreaterEqualUint8x16: - return rewriteValueAMD64_OpGreaterEqualUint8x16(v) - case OpGreaterEqualUint8x32: - return rewriteValueAMD64_OpGreaterEqualUint8x32(v) case OpGreaterEqualUint8x64: return rewriteValueAMD64_OpGreaterEqualUint8x64(v) case OpGreaterFloat32x16: @@ -2516,28 +2484,12 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpGreaterMaskedUint8x32(v) case OpGreaterMaskedUint8x64: return rewriteValueAMD64_OpGreaterMaskedUint8x64(v) - case OpGreaterUint16x16: - return rewriteValueAMD64_OpGreaterUint16x16(v) case OpGreaterUint16x32: return rewriteValueAMD64_OpGreaterUint16x32(v) - case OpGreaterUint16x8: - return rewriteValueAMD64_OpGreaterUint16x8(v) case OpGreaterUint32x16: return rewriteValueAMD64_OpGreaterUint32x16(v) - case OpGreaterUint32x4: - return rewriteValueAMD64_OpGreaterUint32x4(v) - case OpGreaterUint32x8: - return rewriteValueAMD64_OpGreaterUint32x8(v) - case OpGreaterUint64x2: - return rewriteValueAMD64_OpGreaterUint64x2(v) - case OpGreaterUint64x4: - return rewriteValueAMD64_OpGreaterUint64x4(v) case OpGreaterUint64x8: return rewriteValueAMD64_OpGreaterUint64x8(v) - case OpGreaterUint8x16: - return rewriteValueAMD64_OpGreaterUint8x16(v) - case OpGreaterUint8x32: - return rewriteValueAMD64_OpGreaterUint8x32(v) case OpGreaterUint8x64: return rewriteValueAMD64_OpGreaterUint8x64(v) case OpHasCPUFeature: @@ -2639,28 +2591,12 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpLessEqualFloat64x4(v) case OpLessEqualFloat64x8: return rewriteValueAMD64_OpLessEqualFloat64x8(v) - case OpLessEqualInt16x16: - return rewriteValueAMD64_OpLessEqualInt16x16(v) case OpLessEqualInt16x32: return rewriteValueAMD64_OpLessEqualInt16x32(v) - case OpLessEqualInt16x8: - return rewriteValueAMD64_OpLessEqualInt16x8(v) case OpLessEqualInt32x16: return rewriteValueAMD64_OpLessEqualInt32x16(v) - case OpLessEqualInt32x4: - return rewriteValueAMD64_OpLessEqualInt32x4(v) - case OpLessEqualInt32x8: - return rewriteValueAMD64_OpLessEqualInt32x8(v) - case OpLessEqualInt64x2: - return rewriteValueAMD64_OpLessEqualInt64x2(v) - case OpLessEqualInt64x4: - return rewriteValueAMD64_OpLessEqualInt64x4(v) case OpLessEqualInt64x8: return rewriteValueAMD64_OpLessEqualInt64x8(v) - case OpLessEqualInt8x16: - return rewriteValueAMD64_OpLessEqualInt8x16(v) - case OpLessEqualInt8x32: - return rewriteValueAMD64_OpLessEqualInt8x32(v) case OpLessEqualInt8x64: return rewriteValueAMD64_OpLessEqualInt8x64(v) case OpLessEqualMaskedFloat32x16: @@ -2723,28 +2659,12 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpLessEqualMaskedUint8x32(v) case OpLessEqualMaskedUint8x64: return rewriteValueAMD64_OpLessEqualMaskedUint8x64(v) - case OpLessEqualUint16x16: - return rewriteValueAMD64_OpLessEqualUint16x16(v) case OpLessEqualUint16x32: return rewriteValueAMD64_OpLessEqualUint16x32(v) - case OpLessEqualUint16x8: - return rewriteValueAMD64_OpLessEqualUint16x8(v) case OpLessEqualUint32x16: return rewriteValueAMD64_OpLessEqualUint32x16(v) - case OpLessEqualUint32x4: - return rewriteValueAMD64_OpLessEqualUint32x4(v) - case OpLessEqualUint32x8: - return rewriteValueAMD64_OpLessEqualUint32x8(v) - case OpLessEqualUint64x2: - return rewriteValueAMD64_OpLessEqualUint64x2(v) - case OpLessEqualUint64x4: - return rewriteValueAMD64_OpLessEqualUint64x4(v) case OpLessEqualUint64x8: return rewriteValueAMD64_OpLessEqualUint64x8(v) - case OpLessEqualUint8x16: - return rewriteValueAMD64_OpLessEqualUint8x16(v) - case OpLessEqualUint8x32: - return rewriteValueAMD64_OpLessEqualUint8x32(v) case OpLessEqualUint8x64: return rewriteValueAMD64_OpLessEqualUint8x64(v) case OpLessFloat32x16: @@ -2759,28 +2679,12 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpLessFloat64x4(v) case OpLessFloat64x8: return rewriteValueAMD64_OpLessFloat64x8(v) - case OpLessInt16x16: - return rewriteValueAMD64_OpLessInt16x16(v) case OpLessInt16x32: return rewriteValueAMD64_OpLessInt16x32(v) - case OpLessInt16x8: - return rewriteValueAMD64_OpLessInt16x8(v) case OpLessInt32x16: return rewriteValueAMD64_OpLessInt32x16(v) - case OpLessInt32x4: - return rewriteValueAMD64_OpLessInt32x4(v) - case OpLessInt32x8: - return rewriteValueAMD64_OpLessInt32x8(v) - case OpLessInt64x2: - return rewriteValueAMD64_OpLessInt64x2(v) - case OpLessInt64x4: - return rewriteValueAMD64_OpLessInt64x4(v) case OpLessInt64x8: return rewriteValueAMD64_OpLessInt64x8(v) - case OpLessInt8x16: - return rewriteValueAMD64_OpLessInt8x16(v) - case OpLessInt8x32: - return rewriteValueAMD64_OpLessInt8x32(v) case OpLessInt8x64: return rewriteValueAMD64_OpLessInt8x64(v) case OpLessMaskedFloat32x16: @@ -2843,28 +2747,12 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpLessMaskedUint8x32(v) case OpLessMaskedUint8x64: return rewriteValueAMD64_OpLessMaskedUint8x64(v) - case OpLessUint16x16: - return rewriteValueAMD64_OpLessUint16x16(v) case OpLessUint16x32: return rewriteValueAMD64_OpLessUint16x32(v) - case OpLessUint16x8: - return rewriteValueAMD64_OpLessUint16x8(v) case OpLessUint32x16: return rewriteValueAMD64_OpLessUint32x16(v) - case OpLessUint32x4: - return rewriteValueAMD64_OpLessUint32x4(v) - case OpLessUint32x8: - return rewriteValueAMD64_OpLessUint32x8(v) - case OpLessUint64x2: - return rewriteValueAMD64_OpLessUint64x2(v) - case OpLessUint64x4: - return rewriteValueAMD64_OpLessUint64x4(v) case OpLessUint64x8: return rewriteValueAMD64_OpLessUint64x8(v) - case OpLessUint8x16: - return rewriteValueAMD64_OpLessUint8x16(v) - case OpLessUint8x32: - return rewriteValueAMD64_OpLessUint8x32(v) case OpLessUint8x64: return rewriteValueAMD64_OpLessUint8x64(v) case OpLoad: @@ -3583,28 +3471,12 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpNotEqualFloat64x4(v) case OpNotEqualFloat64x8: return rewriteValueAMD64_OpNotEqualFloat64x8(v) - case OpNotEqualInt16x16: - return rewriteValueAMD64_OpNotEqualInt16x16(v) case OpNotEqualInt16x32: return rewriteValueAMD64_OpNotEqualInt16x32(v) - case OpNotEqualInt16x8: - return rewriteValueAMD64_OpNotEqualInt16x8(v) case OpNotEqualInt32x16: return rewriteValueAMD64_OpNotEqualInt32x16(v) - case OpNotEqualInt32x4: - return rewriteValueAMD64_OpNotEqualInt32x4(v) - case OpNotEqualInt32x8: - return rewriteValueAMD64_OpNotEqualInt32x8(v) - case OpNotEqualInt64x2: - return rewriteValueAMD64_OpNotEqualInt64x2(v) - case OpNotEqualInt64x4: - return rewriteValueAMD64_OpNotEqualInt64x4(v) case OpNotEqualInt64x8: return rewriteValueAMD64_OpNotEqualInt64x8(v) - case OpNotEqualInt8x16: - return rewriteValueAMD64_OpNotEqualInt8x16(v) - case OpNotEqualInt8x32: - return rewriteValueAMD64_OpNotEqualInt8x32(v) case OpNotEqualInt8x64: return rewriteValueAMD64_OpNotEqualInt8x64(v) case OpNotEqualMaskedFloat32x16: @@ -3667,28 +3539,12 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpNotEqualMaskedUint8x32(v) case OpNotEqualMaskedUint8x64: return rewriteValueAMD64_OpNotEqualMaskedUint8x64(v) - case OpNotEqualUint16x16: - return rewriteValueAMD64_OpNotEqualUint16x16(v) case OpNotEqualUint16x32: return rewriteValueAMD64_OpNotEqualUint16x32(v) - case OpNotEqualUint16x8: - return rewriteValueAMD64_OpNotEqualUint16x8(v) case OpNotEqualUint32x16: return rewriteValueAMD64_OpNotEqualUint32x16(v) - case OpNotEqualUint32x4: - return rewriteValueAMD64_OpNotEqualUint32x4(v) - case OpNotEqualUint32x8: - return rewriteValueAMD64_OpNotEqualUint32x8(v) - case OpNotEqualUint64x2: - return rewriteValueAMD64_OpNotEqualUint64x2(v) - case OpNotEqualUint64x4: - return rewriteValueAMD64_OpNotEqualUint64x4(v) case OpNotEqualUint64x8: return rewriteValueAMD64_OpNotEqualUint64x8(v) - case OpNotEqualUint8x16: - return rewriteValueAMD64_OpNotEqualUint8x16(v) - case OpNotEqualUint8x32: - return rewriteValueAMD64_OpNotEqualUint8x32(v) case OpNotEqualUint8x64: return rewriteValueAMD64_OpNotEqualUint8x64(v) case OpOffPtr: @@ -37872,24 +37728,6 @@ func rewriteValueAMD64_OpGreaterEqualFloat64x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpGreaterEqualInt16x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualInt16x16 x y) - // result: (VPMOVMToVec16x16 (VPCMPW256 [13] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpGreaterEqualInt16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -37908,24 +37746,6 @@ func rewriteValueAMD64_OpGreaterEqualInt16x32(v *Value) bool { return true } } -func rewriteValueAMD64_OpGreaterEqualInt16x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualInt16x8 x y) - // result: (VPMOVMToVec16x8 (VPCMPW128 [13] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpGreaterEqualInt32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -37944,78 +37764,6 @@ func rewriteValueAMD64_OpGreaterEqualInt32x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpGreaterEqualInt32x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualInt32x4 x y) - // result: (VPMOVMToVec32x4 (VPCMPD128 [13] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpGreaterEqualInt32x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualInt32x8 x y) - // result: (VPMOVMToVec32x8 (VPCMPD256 [13] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpGreaterEqualInt64x2(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualInt64x2 x y) - // result: (VPMOVMToVec64x2 (VPCMPQ128 [13] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpGreaterEqualInt64x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualInt64x4 x y) - // result: (VPMOVMToVec64x4 (VPCMPQ256 [13] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpGreaterEqualInt64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -38034,42 +37782,6 @@ func rewriteValueAMD64_OpGreaterEqualInt64x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpGreaterEqualInt8x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualInt8x16 x y) - // result: (VPMOVMToVec8x16 (VPCMPB128 [13] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpGreaterEqualInt8x32(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualInt8x32 x y) - // result: (VPMOVMToVec8x32 (VPCMPB256 [13] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpGreaterEqualInt8x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -38748,24 +38460,6 @@ func rewriteValueAMD64_OpGreaterEqualMaskedUint8x64(v *Value) bool { return true } } -func rewriteValueAMD64_OpGreaterEqualUint16x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualUint16x16 x y) - // result: (VPMOVMToVec16x16 (VPCMPUW256 [13] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpGreaterEqualUint16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -38784,24 +38478,6 @@ func rewriteValueAMD64_OpGreaterEqualUint16x32(v *Value) bool { return true } } -func rewriteValueAMD64_OpGreaterEqualUint16x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualUint16x8 x y) - // result: (VPMOVMToVec16x8 (VPCMPUW128 [13] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpGreaterEqualUint32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -38820,78 +38496,6 @@ func rewriteValueAMD64_OpGreaterEqualUint32x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpGreaterEqualUint32x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualUint32x4 x y) - // result: (VPMOVMToVec32x4 (VPCMPUD128 [13] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpGreaterEqualUint32x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualUint32x8 x y) - // result: (VPMOVMToVec32x8 (VPCMPUD256 [13] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpGreaterEqualUint64x2(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualUint64x2 x y) - // result: (VPMOVMToVec64x2 (VPCMPUQ128 [13] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpGreaterEqualUint64x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualUint64x4 x y) - // result: (VPMOVMToVec64x4 (VPCMPUQ256 [13] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpGreaterEqualUint64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -38910,42 +38514,6 @@ func rewriteValueAMD64_OpGreaterEqualUint64x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpGreaterEqualUint8x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualUint8x16 x y) - // result: (VPMOVMToVec8x16 (VPCMPUB128 [13] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpGreaterEqualUint8x32(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterEqualUint8x32 x y) - // result: (VPMOVMToVec8x32 (VPCMPUB256 [13] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(13) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpGreaterEqualUint8x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -39784,24 +39352,6 @@ func rewriteValueAMD64_OpGreaterMaskedUint8x64(v *Value) bool { return true } } -func rewriteValueAMD64_OpGreaterUint16x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterUint16x16 x y) - // result: (VPMOVMToVec16x16 (VPCMPUW256 [14] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpGreaterUint16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -39820,24 +39370,6 @@ func rewriteValueAMD64_OpGreaterUint16x32(v *Value) bool { return true } } -func rewriteValueAMD64_OpGreaterUint16x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterUint16x8 x y) - // result: (VPMOVMToVec16x8 (VPCMPUW128 [14] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpGreaterUint32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -39856,78 +39388,6 @@ func rewriteValueAMD64_OpGreaterUint32x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpGreaterUint32x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterUint32x4 x y) - // result: (VPMOVMToVec32x4 (VPCMPUD128 [14] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpGreaterUint32x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterUint32x8 x y) - // result: (VPMOVMToVec32x8 (VPCMPUD256 [14] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpGreaterUint64x2(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterUint64x2 x y) - // result: (VPMOVMToVec64x2 (VPCMPUQ128 [14] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpGreaterUint64x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterUint64x4 x y) - // result: (VPMOVMToVec64x4 (VPCMPUQ256 [14] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpGreaterUint64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -39946,42 +39406,6 @@ func rewriteValueAMD64_OpGreaterUint64x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpGreaterUint8x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterUint8x16 x y) - // result: (VPMOVMToVec8x16 (VPCMPUB128 [14] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpGreaterUint8x32(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterUint8x32 x y) - // result: (VPMOVMToVec8x32 (VPCMPUB256 [14] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpGreaterUint8x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -40699,24 +40123,6 @@ func rewriteValueAMD64_OpLessEqualFloat64x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpLessEqualInt16x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualInt16x16 x y) - // result: (VPMOVMToVec16x16 (VPCMPW256 [2] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(2) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpLessEqualInt16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -40735,24 +40141,6 @@ func rewriteValueAMD64_OpLessEqualInt16x32(v *Value) bool { return true } } -func rewriteValueAMD64_OpLessEqualInt16x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualInt16x8 x y) - // result: (VPMOVMToVec16x8 (VPCMPW128 [2] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(2) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpLessEqualInt32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -40771,78 +40159,6 @@ func rewriteValueAMD64_OpLessEqualInt32x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpLessEqualInt32x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualInt32x4 x y) - // result: (VPMOVMToVec32x4 (VPCMPD128 [2] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(2) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpLessEqualInt32x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualInt32x8 x y) - // result: (VPMOVMToVec32x8 (VPCMPD256 [2] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(2) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpLessEqualInt64x2(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualInt64x2 x y) - // result: (VPMOVMToVec64x2 (VPCMPQ128 [2] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(2) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpLessEqualInt64x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualInt64x4 x y) - // result: (VPMOVMToVec64x4 (VPCMPQ256 [2] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(2) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpLessEqualInt64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -40861,42 +40177,6 @@ func rewriteValueAMD64_OpLessEqualInt64x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpLessEqualInt8x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualInt8x16 x y) - // result: (VPMOVMToVec8x16 (VPCMPB128 [2] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(2) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpLessEqualInt8x32(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualInt8x32 x y) - // result: (VPMOVMToVec8x32 (VPCMPB256 [2] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(2) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpLessEqualInt8x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -41575,24 +40855,6 @@ func rewriteValueAMD64_OpLessEqualMaskedUint8x64(v *Value) bool { return true } } -func rewriteValueAMD64_OpLessEqualUint16x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualUint16x16 x y) - // result: (VPMOVMToVec16x16 (VPCMPUW256 [2] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(2) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpLessEqualUint16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -41611,24 +40873,6 @@ func rewriteValueAMD64_OpLessEqualUint16x32(v *Value) bool { return true } } -func rewriteValueAMD64_OpLessEqualUint16x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualUint16x8 x y) - // result: (VPMOVMToVec16x8 (VPCMPUW128 [2] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(2) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpLessEqualUint32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -41647,78 +40891,6 @@ func rewriteValueAMD64_OpLessEqualUint32x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpLessEqualUint32x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualUint32x4 x y) - // result: (VPMOVMToVec32x4 (VPCMPUD128 [2] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(2) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpLessEqualUint32x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualUint32x8 x y) - // result: (VPMOVMToVec32x8 (VPCMPUD256 [2] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(2) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpLessEqualUint64x2(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualUint64x2 x y) - // result: (VPMOVMToVec64x2 (VPCMPUQ128 [2] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(2) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpLessEqualUint64x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualUint64x4 x y) - // result: (VPMOVMToVec64x4 (VPCMPUQ256 [2] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(2) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpLessEqualUint64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -41737,42 +40909,6 @@ func rewriteValueAMD64_OpLessEqualUint64x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpLessEqualUint8x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualUint8x16 x y) - // result: (VPMOVMToVec8x16 (VPCMPUB128 [2] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(2) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpLessEqualUint8x32(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessEqualUint8x32 x y) - // result: (VPMOVMToVec8x32 (VPCMPUB256 [2] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(2) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpLessEqualUint8x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -41883,24 +41019,6 @@ func rewriteValueAMD64_OpLessFloat64x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpLessInt16x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessInt16x16 x y) - // result: (VPMOVMToVec16x16 (VPCMPW256 [1] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(1) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpLessInt16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -41919,24 +41037,6 @@ func rewriteValueAMD64_OpLessInt16x32(v *Value) bool { return true } } -func rewriteValueAMD64_OpLessInt16x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessInt16x8 x y) - // result: (VPMOVMToVec16x8 (VPCMPW128 [1] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(1) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpLessInt32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -41955,78 +41055,6 @@ func rewriteValueAMD64_OpLessInt32x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpLessInt32x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessInt32x4 x y) - // result: (VPMOVMToVec32x4 (VPCMPD128 [1] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(1) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpLessInt32x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessInt32x8 x y) - // result: (VPMOVMToVec32x8 (VPCMPD256 [1] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(1) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpLessInt64x2(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessInt64x2 x y) - // result: (VPMOVMToVec64x2 (VPCMPQ128 [1] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(1) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpLessInt64x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessInt64x4 x y) - // result: (VPMOVMToVec64x4 (VPCMPQ256 [1] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(1) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpLessInt64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -42045,42 +41073,6 @@ func rewriteValueAMD64_OpLessInt64x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpLessInt8x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessInt8x16 x y) - // result: (VPMOVMToVec8x16 (VPCMPB128 [1] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(1) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpLessInt8x32(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessInt8x32 x y) - // result: (VPMOVMToVec8x32 (VPCMPB256 [1] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(1) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpLessInt8x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -42759,24 +41751,6 @@ func rewriteValueAMD64_OpLessMaskedUint8x64(v *Value) bool { return true } } -func rewriteValueAMD64_OpLessUint16x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessUint16x16 x y) - // result: (VPMOVMToVec16x16 (VPCMPUW256 [1] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(1) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpLessUint16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -42795,24 +41769,6 @@ func rewriteValueAMD64_OpLessUint16x32(v *Value) bool { return true } } -func rewriteValueAMD64_OpLessUint16x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessUint16x8 x y) - // result: (VPMOVMToVec16x8 (VPCMPUW128 [1] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(1) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpLessUint32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -42831,78 +41787,6 @@ func rewriteValueAMD64_OpLessUint32x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpLessUint32x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessUint32x4 x y) - // result: (VPMOVMToVec32x4 (VPCMPUD128 [1] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(1) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpLessUint32x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessUint32x8 x y) - // result: (VPMOVMToVec32x8 (VPCMPUD256 [1] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(1) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpLessUint64x2(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessUint64x2 x y) - // result: (VPMOVMToVec64x2 (VPCMPUQ128 [1] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(1) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpLessUint64x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessUint64x4 x y) - // result: (VPMOVMToVec64x4 (VPCMPUQ256 [1] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(1) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpLessUint64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -42921,42 +41805,6 @@ func rewriteValueAMD64_OpLessUint64x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpLessUint8x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessUint8x16 x y) - // result: (VPMOVMToVec8x16 (VPCMPUB128 [1] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(1) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpLessUint8x32(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (LessUint8x32 x y) - // result: (VPMOVMToVec8x32 (VPCMPUB256 [1] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(1) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpLessUint8x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -47070,24 +45918,6 @@ func rewriteValueAMD64_OpNotEqualFloat64x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpNotEqualInt16x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualInt16x16 x y) - // result: (VPMOVMToVec16x16 (VPCMPW256 [4] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpNotEqualInt16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -47106,24 +45936,6 @@ func rewriteValueAMD64_OpNotEqualInt16x32(v *Value) bool { return true } } -func rewriteValueAMD64_OpNotEqualInt16x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualInt16x8 x y) - // result: (VPMOVMToVec16x8 (VPCMPW128 [4] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpNotEqualInt32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -47142,78 +45954,6 @@ func rewriteValueAMD64_OpNotEqualInt32x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpNotEqualInt32x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualInt32x4 x y) - // result: (VPMOVMToVec32x4 (VPCMPD128 [4] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpNotEqualInt32x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualInt32x8 x y) - // result: (VPMOVMToVec32x8 (VPCMPD256 [4] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpNotEqualInt64x2(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualInt64x2 x y) - // result: (VPMOVMToVec64x2 (VPCMPQ128 [4] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpNotEqualInt64x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualInt64x4 x y) - // result: (VPMOVMToVec64x4 (VPCMPQ256 [4] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpNotEqualInt64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -47232,42 +45972,6 @@ func rewriteValueAMD64_OpNotEqualInt64x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpNotEqualInt8x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualInt8x16 x y) - // result: (VPMOVMToVec8x16 (VPCMPB128 [4] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpNotEqualInt8x32(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualInt8x32 x y) - // result: (VPMOVMToVec8x32 (VPCMPB256 [4] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpNotEqualInt8x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -47946,24 +46650,6 @@ func rewriteValueAMD64_OpNotEqualMaskedUint8x64(v *Value) bool { return true } } -func rewriteValueAMD64_OpNotEqualUint16x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualUint16x16 x y) - // result: (VPMOVMToVec16x16 (VPCMPUW256 [4] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpNotEqualUint16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -47982,24 +46668,6 @@ func rewriteValueAMD64_OpNotEqualUint16x32(v *Value) bool { return true } } -func rewriteValueAMD64_OpNotEqualUint16x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualUint16x8 x y) - // result: (VPMOVMToVec16x8 (VPCMPUW128 [4] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpNotEqualUint32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -48018,78 +46686,6 @@ func rewriteValueAMD64_OpNotEqualUint32x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpNotEqualUint32x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualUint32x4 x y) - // result: (VPMOVMToVec32x4 (VPCMPUD128 [4] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpNotEqualUint32x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualUint32x8 x y) - // result: (VPMOVMToVec32x8 (VPCMPUD256 [4] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpNotEqualUint64x2(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualUint64x2 x y) - // result: (VPMOVMToVec64x2 (VPCMPUQ128 [4] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpNotEqualUint64x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualUint64x4 x y) - // result: (VPMOVMToVec64x4 (VPCMPUQ256 [4] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpNotEqualUint64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -48108,42 +46704,6 @@ func rewriteValueAMD64_OpNotEqualUint64x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpNotEqualUint8x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualUint8x16 x y) - // result: (VPMOVMToVec8x16 (VPCMPUB128 [4] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB128, typ.Mask) - v0.AuxInt = uint8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpNotEqualUint8x32(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (NotEqualUint8x32 x y) - // result: (VPMOVMToVec8x32 (VPCMPUB256 [4] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB256, typ.Mask) - v0.AuxInt = uint8ToAuxInt(4) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpNotEqualUint8x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go index 7a95a4450d..682a37e91b 100644 --- a/src/cmd/compile/internal/ssagen/simdintrinsics.go +++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go @@ -602,17 +602,9 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Float64x2.Greater", opLen2(ssa.OpGreaterFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x4.Greater", opLen2(ssa.OpGreaterFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x8.Greater", opLen2(ssa.OpGreaterFloat64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.Greater", opLen2(ssa.OpGreaterUint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x32.Greater", opLen2(ssa.OpGreaterUint8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint8x64.Greater", opLen2(ssa.OpGreaterUint8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint16x8.Greater", opLen2(ssa.OpGreaterUint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.Greater", opLen2(ssa.OpGreaterUint16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint16x32.Greater", opLen2(ssa.OpGreaterUint16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.Greater", opLen2(ssa.OpGreaterUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.Greater", opLen2(ssa.OpGreaterUint32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint32x16.Greater", opLen2(ssa.OpGreaterUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.Greater", opLen2(ssa.OpGreaterUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.Greater", opLen2(ssa.OpGreaterUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.Greater", opLen2(ssa.OpGreaterUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat32x8, types.TypeVec256), sys.AMD64) @@ -620,29 +612,13 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Float64x2.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x4.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x8.GreaterEqual", opLen2(ssa.OpGreaterEqualFloat64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int8x16.GreaterEqual", opLen2(ssa.OpGreaterEqualInt8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x32.GreaterEqual", opLen2(ssa.OpGreaterEqualInt8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x64.GreaterEqual", opLen2(ssa.OpGreaterEqualInt8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.GreaterEqual", opLen2(ssa.OpGreaterEqualInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.GreaterEqual", opLen2(ssa.OpGreaterEqualInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x32.GreaterEqual", opLen2(ssa.OpGreaterEqualInt16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.GreaterEqual", opLen2(ssa.OpGreaterEqualInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.GreaterEqual", opLen2(ssa.OpGreaterEqualInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int32x16.GreaterEqual", opLen2(ssa.OpGreaterEqualInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.GreaterEqual", opLen2(ssa.OpGreaterEqualInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.GreaterEqual", opLen2(ssa.OpGreaterEqualInt64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int64x8.GreaterEqual", opLen2(ssa.OpGreaterEqualInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.GreaterEqual", opLen2(ssa.OpGreaterEqualUint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x32.GreaterEqual", opLen2(ssa.OpGreaterEqualUint8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint8x64.GreaterEqual", opLen2(ssa.OpGreaterEqualUint8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint16x8.GreaterEqual", opLen2(ssa.OpGreaterEqualUint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.GreaterEqual", opLen2(ssa.OpGreaterEqualUint16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint16x32.GreaterEqual", opLen2(ssa.OpGreaterEqualUint16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.GreaterEqual", opLen2(ssa.OpGreaterEqualUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.GreaterEqual", opLen2(ssa.OpGreaterEqualUint32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint32x16.GreaterEqual", opLen2(ssa.OpGreaterEqualUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.GreaterEqual", opLen2(ssa.OpGreaterEqualUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.GreaterEqual", opLen2(ssa.OpGreaterEqualUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.GreaterEqual", opLen2(ssa.OpGreaterEqualUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.GreaterEqualMasked", opLen3(ssa.OpGreaterEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64) @@ -722,29 +698,13 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Float64x2.Less", opLen2(ssa.OpLessFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x4.Less", opLen2(ssa.OpLessFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x8.Less", opLen2(ssa.OpLessFloat64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int8x16.Less", opLen2(ssa.OpLessInt8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x32.Less", opLen2(ssa.OpLessInt8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x64.Less", opLen2(ssa.OpLessInt8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.Less", opLen2(ssa.OpLessInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.Less", opLen2(ssa.OpLessInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x32.Less", opLen2(ssa.OpLessInt16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.Less", opLen2(ssa.OpLessInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.Less", opLen2(ssa.OpLessInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int32x16.Less", opLen2(ssa.OpLessInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.Less", opLen2(ssa.OpLessInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.Less", opLen2(ssa.OpLessInt64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int64x8.Less", opLen2(ssa.OpLessInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.Less", opLen2(ssa.OpLessUint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x32.Less", opLen2(ssa.OpLessUint8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint8x64.Less", opLen2(ssa.OpLessUint8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint16x8.Less", opLen2(ssa.OpLessUint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.Less", opLen2(ssa.OpLessUint16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint16x32.Less", opLen2(ssa.OpLessUint16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.Less", opLen2(ssa.OpLessUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.Less", opLen2(ssa.OpLessUint32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint32x16.Less", opLen2(ssa.OpLessUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.Less", opLen2(ssa.OpLessUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.Less", opLen2(ssa.OpLessUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.Less", opLen2(ssa.OpLessUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.LessEqual", opLen2(ssa.OpLessEqualFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.LessEqual", opLen2(ssa.OpLessEqualFloat32x8, types.TypeVec256), sys.AMD64) @@ -752,29 +712,13 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Float64x2.LessEqual", opLen2(ssa.OpLessEqualFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x4.LessEqual", opLen2(ssa.OpLessEqualFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x8.LessEqual", opLen2(ssa.OpLessEqualFloat64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int8x16.LessEqual", opLen2(ssa.OpLessEqualInt8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x32.LessEqual", opLen2(ssa.OpLessEqualInt8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x64.LessEqual", opLen2(ssa.OpLessEqualInt8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.LessEqual", opLen2(ssa.OpLessEqualInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.LessEqual", opLen2(ssa.OpLessEqualInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x32.LessEqual", opLen2(ssa.OpLessEqualInt16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.LessEqual", opLen2(ssa.OpLessEqualInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.LessEqual", opLen2(ssa.OpLessEqualInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int32x16.LessEqual", opLen2(ssa.OpLessEqualInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.LessEqual", opLen2(ssa.OpLessEqualInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.LessEqual", opLen2(ssa.OpLessEqualInt64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int64x8.LessEqual", opLen2(ssa.OpLessEqualInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.LessEqual", opLen2(ssa.OpLessEqualUint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x32.LessEqual", opLen2(ssa.OpLessEqualUint8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint8x64.LessEqual", opLen2(ssa.OpLessEqualUint8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint16x8.LessEqual", opLen2(ssa.OpLessEqualUint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.LessEqual", opLen2(ssa.OpLessEqualUint16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint16x32.LessEqual", opLen2(ssa.OpLessEqualUint16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.LessEqual", opLen2(ssa.OpLessEqualUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.LessEqual", opLen2(ssa.OpLessEqualUint32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint32x16.LessEqual", opLen2(ssa.OpLessEqualUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.LessEqual", opLen2(ssa.OpLessEqualUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.LessEqual", opLen2(ssa.OpLessEqualUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.LessEqual", opLen2(ssa.OpLessEqualUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.LessEqualMasked", opLen3(ssa.OpLessEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64) @@ -1062,29 +1006,13 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Float64x2.NotEqual", opLen2(ssa.OpNotEqualFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x4.NotEqual", opLen2(ssa.OpNotEqualFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x8.NotEqual", opLen2(ssa.OpNotEqualFloat64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int8x16.NotEqual", opLen2(ssa.OpNotEqualInt8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int8x32.NotEqual", opLen2(ssa.OpNotEqualInt8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x64.NotEqual", opLen2(ssa.OpNotEqualInt8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int16x8.NotEqual", opLen2(ssa.OpNotEqualInt16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int16x16.NotEqual", opLen2(ssa.OpNotEqualInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x32.NotEqual", opLen2(ssa.OpNotEqualInt16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int32x4.NotEqual", opLen2(ssa.OpNotEqualInt32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int32x8.NotEqual", opLen2(ssa.OpNotEqualInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int32x16.NotEqual", opLen2(ssa.OpNotEqualInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.NotEqual", opLen2(ssa.OpNotEqualInt64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Int64x4.NotEqual", opLen2(ssa.OpNotEqualInt64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int64x8.NotEqual", opLen2(ssa.OpNotEqualInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.NotEqual", opLen2(ssa.OpNotEqualUint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x32.NotEqual", opLen2(ssa.OpNotEqualUint8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint8x64.NotEqual", opLen2(ssa.OpNotEqualUint8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint16x8.NotEqual", opLen2(ssa.OpNotEqualUint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.NotEqual", opLen2(ssa.OpNotEqualUint16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint16x32.NotEqual", opLen2(ssa.OpNotEqualUint16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.NotEqual", opLen2(ssa.OpNotEqualUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.NotEqual", opLen2(ssa.OpNotEqualUint32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint32x16.NotEqual", opLen2(ssa.OpNotEqualUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.NotEqual", opLen2(ssa.OpNotEqualUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.NotEqual", opLen2(ssa.OpNotEqualUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.NotEqual", opLen2(ssa.OpNotEqualUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.NotEqualMasked", opLen3(ssa.OpNotEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64) diff --git a/src/simd/compare_test.go b/src/simd/compare_test.go index 19b1f3886d..7fd20cf5d7 100644 --- a/src/simd/compare_test.go +++ b/src/simd/compare_test.go @@ -59,17 +59,32 @@ func TestLess(t *testing.T) { testFloat64x2Compare(t, simd.Float64x2.Less, lessSlice[float64]) testFloat64x4Compare(t, simd.Float64x4.Less, lessSlice[float64]) - if comparisonFixed { - testInt16x16Compare(t, simd.Int16x16.Less, lessSlice[int16]) - testInt16x8Compare(t, simd.Int16x8.Less, lessSlice[int16]) - testInt32x4Compare(t, simd.Int32x4.Less, lessSlice[int32]) - testInt32x8Compare(t, simd.Int32x8.Less, lessSlice[int32]) - testInt64x2Compare(t, simd.Int64x2.Less, lessSlice[int64]) - testInt64x4Compare(t, simd.Int64x4.Less, lessSlice[int64]) - testInt8x16Compare(t, simd.Int8x16.Less, lessSlice[int8]) - testInt8x32Compare(t, simd.Int8x32.Less, lessSlice[int8]) - - } + testInt16x16Compare(t, simd.Int16x16.Less, lessSlice[int16]) + testInt16x8Compare(t, simd.Int16x8.Less, lessSlice[int16]) + testInt32x4Compare(t, simd.Int32x4.Less, lessSlice[int32]) + testInt32x8Compare(t, simd.Int32x8.Less, lessSlice[int32]) + testInt64x2Compare(t, simd.Int64x2.Less, lessSlice[int64]) + testInt64x4Compare(t, simd.Int64x4.Less, lessSlice[int64]) + testInt8x16Compare(t, simd.Int8x16.Less, lessSlice[int8]) + testInt8x32Compare(t, simd.Int8x32.Less, lessSlice[int8]) + + testInt16x16Compare(t, simd.Int16x16.Less, lessSlice[int16]) + testInt16x8Compare(t, simd.Int16x8.Less, lessSlice[int16]) + testInt32x4Compare(t, simd.Int32x4.Less, lessSlice[int32]) + testInt32x8Compare(t, simd.Int32x8.Less, lessSlice[int32]) + testInt64x2Compare(t, simd.Int64x2.Less, lessSlice[int64]) + testInt64x4Compare(t, simd.Int64x4.Less, lessSlice[int64]) + testInt8x16Compare(t, simd.Int8x16.Less, lessSlice[int8]) + testInt8x32Compare(t, simd.Int8x32.Less, lessSlice[int8]) + + testUint16x16Compare(t, simd.Uint16x16.Less, lessSlice[uint16]) + testUint16x8Compare(t, simd.Uint16x8.Less, lessSlice[uint16]) + testUint32x4Compare(t, simd.Uint32x4.Less, lessSlice[uint32]) + testUint32x8Compare(t, simd.Uint32x8.Less, lessSlice[uint32]) + testUint64x2Compare(t, simd.Uint64x2.Less, lessSlice[uint64]) + testUint64x4Compare(t, simd.Uint64x4.Less, lessSlice[uint64]) + testUint8x16Compare(t, simd.Uint8x16.Less, lessSlice[uint8]) + testUint8x32Compare(t, simd.Uint8x32.Less, lessSlice[uint8]) if simd.HasAVX512() { testUint16x16Compare(t, simd.Uint16x16.Less, lessSlice[uint16]) @@ -100,28 +115,25 @@ func TestLessEqual(t *testing.T) { testFloat64x2Compare(t, simd.Float64x2.LessEqual, lessEqualSlice[float64]) testFloat64x4Compare(t, simd.Float64x4.LessEqual, lessEqualSlice[float64]) - if comparisonFixed { - testInt16x16Compare(t, simd.Int16x16.LessEqual, lessEqualSlice[int16]) - testInt16x8Compare(t, simd.Int16x8.LessEqual, lessEqualSlice[int16]) - testInt32x4Compare(t, simd.Int32x4.LessEqual, lessEqualSlice[int32]) - testInt32x8Compare(t, simd.Int32x8.LessEqual, lessEqualSlice[int32]) - testInt64x2Compare(t, simd.Int64x2.LessEqual, lessEqualSlice[int64]) - testInt64x4Compare(t, simd.Int64x4.LessEqual, lessEqualSlice[int64]) - testInt8x16Compare(t, simd.Int8x16.LessEqual, lessEqualSlice[int8]) - testInt8x32Compare(t, simd.Int8x32.LessEqual, lessEqualSlice[int8]) - - } + testInt16x16Compare(t, simd.Int16x16.LessEqual, lessEqualSlice[int16]) + testInt16x8Compare(t, simd.Int16x8.LessEqual, lessEqualSlice[int16]) + testInt32x4Compare(t, simd.Int32x4.LessEqual, lessEqualSlice[int32]) + testInt32x8Compare(t, simd.Int32x8.LessEqual, lessEqualSlice[int32]) + testInt64x2Compare(t, simd.Int64x2.LessEqual, lessEqualSlice[int64]) + testInt64x4Compare(t, simd.Int64x4.LessEqual, lessEqualSlice[int64]) + testInt8x16Compare(t, simd.Int8x16.LessEqual, lessEqualSlice[int8]) + testInt8x32Compare(t, simd.Int8x32.LessEqual, lessEqualSlice[int8]) + + testUint16x16Compare(t, simd.Uint16x16.LessEqual, lessEqualSlice[uint16]) + testUint16x8Compare(t, simd.Uint16x8.LessEqual, lessEqualSlice[uint16]) + testUint32x4Compare(t, simd.Uint32x4.LessEqual, lessEqualSlice[uint32]) + testUint32x8Compare(t, simd.Uint32x8.LessEqual, lessEqualSlice[uint32]) + testUint64x2Compare(t, simd.Uint64x2.LessEqual, lessEqualSlice[uint64]) + testUint64x4Compare(t, simd.Uint64x4.LessEqual, lessEqualSlice[uint64]) + testUint8x16Compare(t, simd.Uint8x16.LessEqual, lessEqualSlice[uint8]) + testUint8x32Compare(t, simd.Uint8x32.LessEqual, lessEqualSlice[uint8]) if simd.HasAVX512() { - testUint16x16Compare(t, simd.Uint16x16.LessEqual, lessEqualSlice[uint16]) - testUint16x8Compare(t, simd.Uint16x8.LessEqual, lessEqualSlice[uint16]) - testUint32x4Compare(t, simd.Uint32x4.LessEqual, lessEqualSlice[uint32]) - testUint32x8Compare(t, simd.Uint32x8.LessEqual, lessEqualSlice[uint32]) - testUint64x2Compare(t, simd.Uint64x2.LessEqual, lessEqualSlice[uint64]) - testUint64x4Compare(t, simd.Uint64x4.LessEqual, lessEqualSlice[uint64]) - testUint8x16Compare(t, simd.Uint8x16.LessEqual, lessEqualSlice[uint8]) - testUint8x32Compare(t, simd.Uint8x32.LessEqual, lessEqualSlice[uint8]) - testFloat32x16Compare(t, simd.Float32x16.LessEqual, lessEqualSlice[float32]) testFloat64x8Compare(t, simd.Float64x8.LessEqual, lessEqualSlice[float64]) testInt8x64Compare(t, simd.Int8x64.LessEqual, lessEqualSlice[int8]) @@ -151,16 +163,17 @@ func TestGreater(t *testing.T) { testInt8x16Compare(t, simd.Int8x16.Greater, greaterSlice[int8]) testInt8x32Compare(t, simd.Int8x32.Greater, greaterSlice[int8]) - if simd.HasAVX512() { - testUint16x16Compare(t, simd.Uint16x16.Greater, greaterSlice[uint16]) - testUint16x8Compare(t, simd.Uint16x8.Greater, greaterSlice[uint16]) - testUint32x4Compare(t, simd.Uint32x4.Greater, greaterSlice[uint32]) - testUint32x8Compare(t, simd.Uint32x8.Greater, greaterSlice[uint32]) + testUint16x16Compare(t, simd.Uint16x16.Greater, greaterSlice[uint16]) + testUint16x8Compare(t, simd.Uint16x8.Greater, greaterSlice[uint16]) + testUint32x4Compare(t, simd.Uint32x4.Greater, greaterSlice[uint32]) + testUint32x8Compare(t, simd.Uint32x8.Greater, greaterSlice[uint32]) - testUint64x2Compare(t, simd.Uint64x2.Greater, greaterSlice[uint64]) - testUint64x4Compare(t, simd.Uint64x4.Greater, greaterSlice[uint64]) - testUint8x16Compare(t, simd.Uint8x16.Greater, greaterSlice[uint8]) - testUint8x32Compare(t, simd.Uint8x32.Greater, greaterSlice[uint8]) + testUint64x2Compare(t, simd.Uint64x2.Greater, greaterSlice[uint64]) + testUint64x4Compare(t, simd.Uint64x4.Greater, greaterSlice[uint64]) + testUint8x16Compare(t, simd.Uint8x16.Greater, greaterSlice[uint8]) + testUint8x32Compare(t, simd.Uint8x32.Greater, greaterSlice[uint8]) + + if simd.HasAVX512() { testFloat32x16Compare(t, simd.Float32x16.Greater, greaterSlice[float32]) testFloat64x8Compare(t, simd.Float64x8.Greater, greaterSlice[float64]) @@ -181,28 +194,25 @@ func TestGreaterEqual(t *testing.T) { testFloat64x2Compare(t, simd.Float64x2.GreaterEqual, greaterEqualSlice[float64]) testFloat64x4Compare(t, simd.Float64x4.GreaterEqual, greaterEqualSlice[float64]) - if comparisonFixed { - testInt16x16Compare(t, simd.Int16x16.GreaterEqual, greaterEqualSlice[int16]) - testInt16x8Compare(t, simd.Int16x8.GreaterEqual, greaterEqualSlice[int16]) - testInt32x4Compare(t, simd.Int32x4.GreaterEqual, greaterEqualSlice[int32]) - testInt32x8Compare(t, simd.Int32x8.GreaterEqual, greaterEqualSlice[int32]) - testInt64x2Compare(t, simd.Int64x2.GreaterEqual, greaterEqualSlice[int64]) - testInt64x4Compare(t, simd.Int64x4.GreaterEqual, greaterEqualSlice[int64]) - testInt8x16Compare(t, simd.Int8x16.GreaterEqual, greaterEqualSlice[int8]) - testInt8x32Compare(t, simd.Int8x32.GreaterEqual, greaterEqualSlice[int8]) - - } + testInt16x16Compare(t, simd.Int16x16.GreaterEqual, greaterEqualSlice[int16]) + testInt16x8Compare(t, simd.Int16x8.GreaterEqual, greaterEqualSlice[int16]) + testInt32x4Compare(t, simd.Int32x4.GreaterEqual, greaterEqualSlice[int32]) + testInt32x8Compare(t, simd.Int32x8.GreaterEqual, greaterEqualSlice[int32]) + testInt64x2Compare(t, simd.Int64x2.GreaterEqual, greaterEqualSlice[int64]) + testInt64x4Compare(t, simd.Int64x4.GreaterEqual, greaterEqualSlice[int64]) + testInt8x16Compare(t, simd.Int8x16.GreaterEqual, greaterEqualSlice[int8]) + testInt8x32Compare(t, simd.Int8x32.GreaterEqual, greaterEqualSlice[int8]) + + testUint16x16Compare(t, simd.Uint16x16.GreaterEqual, greaterEqualSlice[uint16]) + testUint16x8Compare(t, simd.Uint16x8.GreaterEqual, greaterEqualSlice[uint16]) + testUint32x4Compare(t, simd.Uint32x4.GreaterEqual, greaterEqualSlice[uint32]) + testUint32x8Compare(t, simd.Uint32x8.GreaterEqual, greaterEqualSlice[uint32]) + testUint64x2Compare(t, simd.Uint64x2.GreaterEqual, greaterEqualSlice[uint64]) + testUint64x4Compare(t, simd.Uint64x4.GreaterEqual, greaterEqualSlice[uint64]) + testUint8x16Compare(t, simd.Uint8x16.GreaterEqual, greaterEqualSlice[uint8]) + testUint8x32Compare(t, simd.Uint8x32.GreaterEqual, greaterEqualSlice[uint8]) if simd.HasAVX512() { - testUint16x16Compare(t, simd.Uint16x16.GreaterEqual, greaterEqualSlice[uint16]) - testUint16x8Compare(t, simd.Uint16x8.GreaterEqual, greaterEqualSlice[uint16]) - testUint32x4Compare(t, simd.Uint32x4.GreaterEqual, greaterEqualSlice[uint32]) - testUint32x8Compare(t, simd.Uint32x8.GreaterEqual, greaterEqualSlice[uint32]) - testUint64x2Compare(t, simd.Uint64x2.GreaterEqual, greaterEqualSlice[uint64]) - testUint64x4Compare(t, simd.Uint64x4.GreaterEqual, greaterEqualSlice[uint64]) - testUint8x16Compare(t, simd.Uint8x16.GreaterEqual, greaterEqualSlice[uint8]) - testUint8x32Compare(t, simd.Uint8x32.GreaterEqual, greaterEqualSlice[uint8]) - testFloat32x16Compare(t, simd.Float32x16.GreaterEqual, greaterEqualSlice[float32]) testFloat64x8Compare(t, simd.Float64x8.GreaterEqual, greaterEqualSlice[float64]) testInt8x64Compare(t, simd.Int8x64.GreaterEqual, greaterEqualSlice[int8]) @@ -260,25 +270,23 @@ func TestNotEqual(t *testing.T) { testFloat64x2Compare(t, simd.Float64x2.NotEqual, notEqualSlice[float64]) testFloat64x4Compare(t, simd.Float64x4.NotEqual, notEqualSlice[float64]) - if comparisonFixed { - testInt16x16Compare(t, simd.Int16x16.NotEqual, notEqualSlice[int16]) - testInt16x8Compare(t, simd.Int16x8.NotEqual, notEqualSlice[int16]) - testInt32x4Compare(t, simd.Int32x4.NotEqual, notEqualSlice[int32]) - testInt32x8Compare(t, simd.Int32x8.NotEqual, notEqualSlice[int32]) - testInt64x2Compare(t, simd.Int64x2.NotEqual, notEqualSlice[int64]) - testInt64x4Compare(t, simd.Int64x4.NotEqual, notEqualSlice[int64]) - testInt8x16Compare(t, simd.Int8x16.NotEqual, notEqualSlice[int8]) - testInt8x32Compare(t, simd.Int8x32.NotEqual, notEqualSlice[int8]) - - testUint16x16Compare(t, simd.Uint16x16.NotEqual, notEqualSlice[uint16]) - testUint16x8Compare(t, simd.Uint16x8.NotEqual, notEqualSlice[uint16]) - testUint32x4Compare(t, simd.Uint32x4.NotEqual, notEqualSlice[uint32]) - testUint32x8Compare(t, simd.Uint32x8.NotEqual, notEqualSlice[uint32]) - testUint64x2Compare(t, simd.Uint64x2.NotEqual, notEqualSlice[uint64]) - testUint64x4Compare(t, simd.Uint64x4.NotEqual, notEqualSlice[uint64]) - testUint8x16Compare(t, simd.Uint8x16.NotEqual, notEqualSlice[uint8]) - testUint8x32Compare(t, simd.Uint8x32.NotEqual, notEqualSlice[uint8]) - } + testInt16x16Compare(t, simd.Int16x16.NotEqual, notEqualSlice[int16]) + testInt16x8Compare(t, simd.Int16x8.NotEqual, notEqualSlice[int16]) + testInt32x4Compare(t, simd.Int32x4.NotEqual, notEqualSlice[int32]) + testInt32x8Compare(t, simd.Int32x8.NotEqual, notEqualSlice[int32]) + testInt64x2Compare(t, simd.Int64x2.NotEqual, notEqualSlice[int64]) + testInt64x4Compare(t, simd.Int64x4.NotEqual, notEqualSlice[int64]) + testInt8x16Compare(t, simd.Int8x16.NotEqual, notEqualSlice[int8]) + testInt8x32Compare(t, simd.Int8x32.NotEqual, notEqualSlice[int8]) + + testUint16x16Compare(t, simd.Uint16x16.NotEqual, notEqualSlice[uint16]) + testUint16x8Compare(t, simd.Uint16x8.NotEqual, notEqualSlice[uint16]) + testUint32x4Compare(t, simd.Uint32x4.NotEqual, notEqualSlice[uint32]) + testUint32x8Compare(t, simd.Uint32x8.NotEqual, notEqualSlice[uint32]) + testUint64x2Compare(t, simd.Uint64x2.NotEqual, notEqualSlice[uint64]) + testUint64x4Compare(t, simd.Uint64x4.NotEqual, notEqualSlice[uint64]) + testUint8x16Compare(t, simd.Uint8x16.NotEqual, notEqualSlice[uint8]) + testUint8x32Compare(t, simd.Uint8x32.NotEqual, notEqualSlice[uint8]) if simd.HasAVX512() { testFloat32x16Compare(t, simd.Float32x16.NotEqual, notEqualSlice[float32]) diff --git a/src/simd/genfiles.go b/src/simd/genfiles.go index 8b36da71ab..022ddd1681 100644 --- a/src/simd/genfiles.go +++ b/src/simd/genfiles.go @@ -87,6 +87,16 @@ var ternaryFlaky = &shapes{ // for tests that support flaky equality floats: []int{32}, } +var avx2SignedComparisons = &shapes{ + vecs: []int{128, 256}, + ints: []int{8, 16, 32, 64}, +} + +var avx2UnsignedComparisons = &shapes{ + vecs: []int{128, 256}, + uints: []int{8, 16, 32, 64}, +} + type templateData struct { Vec string // the type of the vector, e.g. Float32x4 AOrAn string // for documentation, the article "a" or "an" @@ -486,6 +496,130 @@ func (x {{.Vec}}) StoreSlicePart(s []{{.Type}}) { } `) +func (t templateData) CPUfeature() string { + switch t.Vwidth { + case 128: + return "AVX" + case 256: + return "AVX2" + case 512: + return "AVX512" + } + panic(fmt.Errorf("unexpected vector width %d", t.Vwidth)) +} + +var avx2SignedComparisonsTemplate = shapedTemplateOf(avx2SignedComparisons, "avx2 signed comparisons", ` +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature {{.CPUfeature}} +func (x {{.Vec}}) Less(y {{.Vec}}) Mask{{.WxC}} { + return y.Greater(x) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature {{.CPUfeature}} +func (x {{.Vec}}) GreaterEqual(y {{.Vec}}) Mask{{.WxC}} { + ones := x.Equal(x).AsInt{{.WxC}}() + return y.Greater(x).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature {{.CPUfeature}} +func (x {{.Vec}}) LessEqual(y {{.Vec}}) Mask{{.WxC}} { + ones := x.Equal(x).AsInt{{.WxC}}() + return x.Greater(y).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature {{.CPUfeature}} +func (x {{.Vec}}) NotEqual(y {{.Vec}}) Mask{{.WxC}} { + ones := x.Equal(x).AsInt{{.WxC}}() + return x.Equal(y).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}() +} +`) + +// CPUfeatureAVX2if8 return AVX2 if the element width is 8, +// otherwise, it returns CPUfeature. This is for the cpufeature +// of unsigned comparison emulation, which uses shifts for all +// the sizes > 8 (shifts are AVX) but must use broadcast (AVX2) +// for bytes. +func (t templateData) CPUfeatureAVX2if8() string { + if t.Width == 8 { + return "AVX2" + } + return t.CPUfeature() +} + +var avx2UnsignedComparisonsTemplate = shapedTemplateOf(avx2UnsignedComparisons, "avx2 unsigned comparisons", ` +// Greater returns a mask whose elements indicate whether x > y +// +// Emulated, CPU Feature {{.CPUfeatureAVX2if8}} +func (x {{.Vec}}) Greater(y {{.Vec}}) Mask{{.WxC}} { + a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}() +{{- if eq .Width 8}} + signs := BroadcastInt{{.WxC}}(-1 << ({{.Width}}-1)) +{{- else}} + ones := x.Equal(x).AsInt{{.WxC}}() + signs := ones.ShiftAllLeft({{.Width}}-1) +{{- end }} + return a.Xor(signs).Greater(b.Xor(signs)) +} + +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature {{.CPUfeatureAVX2if8}} +func (x {{.Vec}}) Less(y {{.Vec}}) Mask{{.WxC}} { + a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}() +{{- if eq .Width 8}} + signs := BroadcastInt{{.WxC}}(-1 << ({{.Width}}-1)) +{{- else}} + ones := x.Equal(x).AsInt{{.WxC}}() + signs := ones.ShiftAllLeft({{.Width}}-1) +{{- end }} + return b.Xor(signs).Greater(a.Xor(signs)) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature {{.CPUfeatureAVX2if8}} +func (x {{.Vec}}) GreaterEqual(y {{.Vec}}) Mask{{.WxC}} { + a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}() + ones := x.Equal(x).AsInt{{.WxC}}() +{{- if eq .Width 8}} + signs := BroadcastInt{{.WxC}}(-1 << ({{.Width}}-1)) +{{- else}} + signs := ones.ShiftAllLeft({{.Width}}-1) +{{- end }} + return b.Xor(signs).Greater(a.Xor(signs)).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature {{.CPUfeatureAVX2if8}} +func (x {{.Vec}}) LessEqual(y {{.Vec}}) Mask{{.WxC}} { + a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}() + ones := x.Equal(x).AsInt{{.WxC}}() +{{- if eq .Width 8}} + signs := BroadcastInt{{.WxC}}(-1 << ({{.Width}}-1)) +{{- else}} + signs := ones.ShiftAllLeft({{.Width}}-1) +{{- end }} + return a.Xor(signs).Greater(b.Xor(signs)).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature {{.CPUfeature}} +func (x {{.Vec}}) NotEqual(y {{.Vec}}) Mask{{.WxC}} { + a, b := x.AsInt{{.WxC}}(), y.AsInt{{.WxC}}() + ones := x.Equal(x).AsInt{{.WxC}}() + return a.Equal(b).AsInt{{.WxC}}().Xor(ones).AsMask{{.WxC}}() +} +`) + var unsafePATemplate = templateOf("unsafe PA helper", ` // pa{{.Vec}} returns a type-unsafe pointer to array that can // only be used with partial load/store operations that only @@ -591,6 +725,8 @@ func main() { avx2SmallLoadSlicePartTemplate, avx2MaskedTemplate, avx512MaskedTemplate, + avx2SignedComparisonsTemplate, + avx2UnsignedComparisonsTemplate, broadcastTemplate, ) } diff --git a/src/simd/ops_amd64.go b/src/simd/ops_amd64.go index 5b7754a961..d78bb699ea 100644 --- a/src/simd/ops_amd64.go +++ b/src/simd/ops_amd64.go @@ -3822,61 +3822,21 @@ func (x Float64x4) Greater(y Float64x4) Mask64x4 // Asm: VCMPPD, CPU Feature: AVX512 func (x Float64x8) Greater(y Float64x8) Mask64x8 -// Greater compares for greater than. -// -// Asm: VPCMPUB, CPU Feature: AVX512 -func (x Uint8x16) Greater(y Uint8x16) Mask8x16 - -// Greater compares for greater than. -// -// Asm: VPCMPUB, CPU Feature: AVX512 -func (x Uint8x32) Greater(y Uint8x32) Mask8x32 - // Greater compares for greater than. // // Asm: VPCMPUB, CPU Feature: AVX512 func (x Uint8x64) Greater(y Uint8x64) Mask8x64 -// Greater compares for greater than. -// -// Asm: VPCMPUW, CPU Feature: AVX512 -func (x Uint16x8) Greater(y Uint16x8) Mask16x8 - -// Greater compares for greater than. -// -// Asm: VPCMPUW, CPU Feature: AVX512 -func (x Uint16x16) Greater(y Uint16x16) Mask16x16 - // Greater compares for greater than. // // Asm: VPCMPUW, CPU Feature: AVX512 func (x Uint16x32) Greater(y Uint16x32) Mask16x32 -// Greater compares for greater than. -// -// Asm: VPCMPUD, CPU Feature: AVX512 -func (x Uint32x4) Greater(y Uint32x4) Mask32x4 - -// Greater compares for greater than. -// -// Asm: VPCMPUD, CPU Feature: AVX512 -func (x Uint32x8) Greater(y Uint32x8) Mask32x8 - // Greater compares for greater than. // // Asm: VPCMPUD, CPU Feature: AVX512 func (x Uint32x16) Greater(y Uint32x16) Mask32x16 -// Greater compares for greater than. -// -// Asm: VPCMPUQ, CPU Feature: AVX512 -func (x Uint64x2) Greater(y Uint64x2) Mask64x2 - -// Greater compares for greater than. -// -// Asm: VPCMPUQ, CPU Feature: AVX512 -func (x Uint64x4) Greater(y Uint64x4) Mask64x4 - // Greater compares for greater than. // // Asm: VPCMPUQ, CPU Feature: AVX512 @@ -3914,121 +3874,41 @@ func (x Float64x4) GreaterEqual(y Float64x4) Mask64x4 // Asm: VCMPPD, CPU Feature: AVX512 func (x Float64x8) GreaterEqual(y Float64x8) Mask64x8 -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPB, CPU Feature: AVX512 -func (x Int8x16) GreaterEqual(y Int8x16) Mask8x16 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPB, CPU Feature: AVX512 -func (x Int8x32) GreaterEqual(y Int8x32) Mask8x32 - // GreaterEqual compares for greater than or equal. // // Asm: VPCMPB, CPU Feature: AVX512 func (x Int8x64) GreaterEqual(y Int8x64) Mask8x64 -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPW, CPU Feature: AVX512 -func (x Int16x8) GreaterEqual(y Int16x8) Mask16x8 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPW, CPU Feature: AVX512 -func (x Int16x16) GreaterEqual(y Int16x16) Mask16x16 - // GreaterEqual compares for greater than or equal. // // Asm: VPCMPW, CPU Feature: AVX512 func (x Int16x32) GreaterEqual(y Int16x32) Mask16x32 -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPD, CPU Feature: AVX512 -func (x Int32x4) GreaterEqual(y Int32x4) Mask32x4 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPD, CPU Feature: AVX512 -func (x Int32x8) GreaterEqual(y Int32x8) Mask32x8 - // GreaterEqual compares for greater than or equal. // // Asm: VPCMPD, CPU Feature: AVX512 func (x Int32x16) GreaterEqual(y Int32x16) Mask32x16 -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPQ, CPU Feature: AVX512 -func (x Int64x2) GreaterEqual(y Int64x2) Mask64x2 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPQ, CPU Feature: AVX512 -func (x Int64x4) GreaterEqual(y Int64x4) Mask64x4 - // GreaterEqual compares for greater than or equal. // // Asm: VPCMPQ, CPU Feature: AVX512 func (x Int64x8) GreaterEqual(y Int64x8) Mask64x8 -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPUB, CPU Feature: AVX512 -func (x Uint8x16) GreaterEqual(y Uint8x16) Mask8x16 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPUB, CPU Feature: AVX512 -func (x Uint8x32) GreaterEqual(y Uint8x32) Mask8x32 - // GreaterEqual compares for greater than or equal. // // Asm: VPCMPUB, CPU Feature: AVX512 func (x Uint8x64) GreaterEqual(y Uint8x64) Mask8x64 -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPUW, CPU Feature: AVX512 -func (x Uint16x8) GreaterEqual(y Uint16x8) Mask16x8 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPUW, CPU Feature: AVX512 -func (x Uint16x16) GreaterEqual(y Uint16x16) Mask16x16 - // GreaterEqual compares for greater than or equal. // // Asm: VPCMPUW, CPU Feature: AVX512 func (x Uint16x32) GreaterEqual(y Uint16x32) Mask16x32 -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPUD, CPU Feature: AVX512 -func (x Uint32x4) GreaterEqual(y Uint32x4) Mask32x4 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPUD, CPU Feature: AVX512 -func (x Uint32x8) GreaterEqual(y Uint32x8) Mask32x8 - // GreaterEqual compares for greater than or equal. // // Asm: VPCMPUD, CPU Feature: AVX512 func (x Uint32x16) GreaterEqual(y Uint32x16) Mask32x16 -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPUQ, CPU Feature: AVX512 -func (x Uint64x2) GreaterEqual(y Uint64x2) Mask64x2 - -// GreaterEqual compares for greater than or equal. -// -// Asm: VPCMPUQ, CPU Feature: AVX512 -func (x Uint64x4) GreaterEqual(y Uint64x4) Mask64x4 - // GreaterEqual compares for greater than or equal. // // Asm: VPCMPUQ, CPU Feature: AVX512 @@ -4566,121 +4446,41 @@ func (x Float64x4) Less(y Float64x4) Mask64x4 // Asm: VCMPPD, CPU Feature: AVX512 func (x Float64x8) Less(y Float64x8) Mask64x8 -// Less compares for less than. -// -// Asm: VPCMPB, CPU Feature: AVX512 -func (x Int8x16) Less(y Int8x16) Mask8x16 - -// Less compares for less than. -// -// Asm: VPCMPB, CPU Feature: AVX512 -func (x Int8x32) Less(y Int8x32) Mask8x32 - // Less compares for less than. // // Asm: VPCMPB, CPU Feature: AVX512 func (x Int8x64) Less(y Int8x64) Mask8x64 -// Less compares for less than. -// -// Asm: VPCMPW, CPU Feature: AVX512 -func (x Int16x8) Less(y Int16x8) Mask16x8 - -// Less compares for less than. -// -// Asm: VPCMPW, CPU Feature: AVX512 -func (x Int16x16) Less(y Int16x16) Mask16x16 - // Less compares for less than. // // Asm: VPCMPW, CPU Feature: AVX512 func (x Int16x32) Less(y Int16x32) Mask16x32 -// Less compares for less than. -// -// Asm: VPCMPD, CPU Feature: AVX512 -func (x Int32x4) Less(y Int32x4) Mask32x4 - -// Less compares for less than. -// -// Asm: VPCMPD, CPU Feature: AVX512 -func (x Int32x8) Less(y Int32x8) Mask32x8 - // Less compares for less than. // // Asm: VPCMPD, CPU Feature: AVX512 func (x Int32x16) Less(y Int32x16) Mask32x16 -// Less compares for less than. -// -// Asm: VPCMPQ, CPU Feature: AVX512 -func (x Int64x2) Less(y Int64x2) Mask64x2 - -// Less compares for less than. -// -// Asm: VPCMPQ, CPU Feature: AVX512 -func (x Int64x4) Less(y Int64x4) Mask64x4 - // Less compares for less than. // // Asm: VPCMPQ, CPU Feature: AVX512 func (x Int64x8) Less(y Int64x8) Mask64x8 -// Less compares for less than. -// -// Asm: VPCMPUB, CPU Feature: AVX512 -func (x Uint8x16) Less(y Uint8x16) Mask8x16 - -// Less compares for less than. -// -// Asm: VPCMPUB, CPU Feature: AVX512 -func (x Uint8x32) Less(y Uint8x32) Mask8x32 - // Less compares for less than. // // Asm: VPCMPUB, CPU Feature: AVX512 func (x Uint8x64) Less(y Uint8x64) Mask8x64 -// Less compares for less than. -// -// Asm: VPCMPUW, CPU Feature: AVX512 -func (x Uint16x8) Less(y Uint16x8) Mask16x8 - -// Less compares for less than. -// -// Asm: VPCMPUW, CPU Feature: AVX512 -func (x Uint16x16) Less(y Uint16x16) Mask16x16 - // Less compares for less than. // // Asm: VPCMPUW, CPU Feature: AVX512 func (x Uint16x32) Less(y Uint16x32) Mask16x32 -// Less compares for less than. -// -// Asm: VPCMPUD, CPU Feature: AVX512 -func (x Uint32x4) Less(y Uint32x4) Mask32x4 - -// Less compares for less than. -// -// Asm: VPCMPUD, CPU Feature: AVX512 -func (x Uint32x8) Less(y Uint32x8) Mask32x8 - // Less compares for less than. // // Asm: VPCMPUD, CPU Feature: AVX512 func (x Uint32x16) Less(y Uint32x16) Mask32x16 -// Less compares for less than. -// -// Asm: VPCMPUQ, CPU Feature: AVX512 -func (x Uint64x2) Less(y Uint64x2) Mask64x2 - -// Less compares for less than. -// -// Asm: VPCMPUQ, CPU Feature: AVX512 -func (x Uint64x4) Less(y Uint64x4) Mask64x4 - // Less compares for less than. // // Asm: VPCMPUQ, CPU Feature: AVX512 @@ -4718,121 +4518,41 @@ func (x Float64x4) LessEqual(y Float64x4) Mask64x4 // Asm: VCMPPD, CPU Feature: AVX512 func (x Float64x8) LessEqual(y Float64x8) Mask64x8 -// LessEqual compares for less than or equal. -// -// Asm: VPCMPB, CPU Feature: AVX512 -func (x Int8x16) LessEqual(y Int8x16) Mask8x16 - -// LessEqual compares for less than or equal. -// -// Asm: VPCMPB, CPU Feature: AVX512 -func (x Int8x32) LessEqual(y Int8x32) Mask8x32 - // LessEqual compares for less than or equal. // // Asm: VPCMPB, CPU Feature: AVX512 func (x Int8x64) LessEqual(y Int8x64) Mask8x64 -// LessEqual compares for less than or equal. -// -// Asm: VPCMPW, CPU Feature: AVX512 -func (x Int16x8) LessEqual(y Int16x8) Mask16x8 - -// LessEqual compares for less than or equal. -// -// Asm: VPCMPW, CPU Feature: AVX512 -func (x Int16x16) LessEqual(y Int16x16) Mask16x16 - // LessEqual compares for less than or equal. // // Asm: VPCMPW, CPU Feature: AVX512 func (x Int16x32) LessEqual(y Int16x32) Mask16x32 -// LessEqual compares for less than or equal. -// -// Asm: VPCMPD, CPU Feature: AVX512 -func (x Int32x4) LessEqual(y Int32x4) Mask32x4 - -// LessEqual compares for less than or equal. -// -// Asm: VPCMPD, CPU Feature: AVX512 -func (x Int32x8) LessEqual(y Int32x8) Mask32x8 - // LessEqual compares for less than or equal. // // Asm: VPCMPD, CPU Feature: AVX512 func (x Int32x16) LessEqual(y Int32x16) Mask32x16 -// LessEqual compares for less than or equal. -// -// Asm: VPCMPQ, CPU Feature: AVX512 -func (x Int64x2) LessEqual(y Int64x2) Mask64x2 - -// LessEqual compares for less than or equal. -// -// Asm: VPCMPQ, CPU Feature: AVX512 -func (x Int64x4) LessEqual(y Int64x4) Mask64x4 - // LessEqual compares for less than or equal. // // Asm: VPCMPQ, CPU Feature: AVX512 func (x Int64x8) LessEqual(y Int64x8) Mask64x8 -// LessEqual compares for less than or equal. -// -// Asm: VPCMPUB, CPU Feature: AVX512 -func (x Uint8x16) LessEqual(y Uint8x16) Mask8x16 - -// LessEqual compares for less than or equal. -// -// Asm: VPCMPUB, CPU Feature: AVX512 -func (x Uint8x32) LessEqual(y Uint8x32) Mask8x32 - // LessEqual compares for less than or equal. // // Asm: VPCMPUB, CPU Feature: AVX512 func (x Uint8x64) LessEqual(y Uint8x64) Mask8x64 -// LessEqual compares for less than or equal. -// -// Asm: VPCMPUW, CPU Feature: AVX512 -func (x Uint16x8) LessEqual(y Uint16x8) Mask16x8 - -// LessEqual compares for less than or equal. -// -// Asm: VPCMPUW, CPU Feature: AVX512 -func (x Uint16x16) LessEqual(y Uint16x16) Mask16x16 - // LessEqual compares for less than or equal. // // Asm: VPCMPUW, CPU Feature: AVX512 func (x Uint16x32) LessEqual(y Uint16x32) Mask16x32 -// LessEqual compares for less than or equal. -// -// Asm: VPCMPUD, CPU Feature: AVX512 -func (x Uint32x4) LessEqual(y Uint32x4) Mask32x4 - -// LessEqual compares for less than or equal. -// -// Asm: VPCMPUD, CPU Feature: AVX512 -func (x Uint32x8) LessEqual(y Uint32x8) Mask32x8 - // LessEqual compares for less than or equal. // // Asm: VPCMPUD, CPU Feature: AVX512 func (x Uint32x16) LessEqual(y Uint32x16) Mask32x16 -// LessEqual compares for less than or equal. -// -// Asm: VPCMPUQ, CPU Feature: AVX512 -func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2 - -// LessEqual compares for less than or equal. -// -// Asm: VPCMPUQ, CPU Feature: AVX512 -func (x Uint64x4) LessEqual(y Uint64x4) Mask64x4 - // LessEqual compares for less than or equal. // // Asm: VPCMPUQ, CPU Feature: AVX512 @@ -6644,121 +6364,41 @@ func (x Float64x4) NotEqual(y Float64x4) Mask64x4 // Asm: VCMPPD, CPU Feature: AVX512 func (x Float64x8) NotEqual(y Float64x8) Mask64x8 -// NotEqual compares for inequality. -// -// Asm: VPCMPB, CPU Feature: AVX512 -func (x Int8x16) NotEqual(y Int8x16) Mask8x16 - -// NotEqual compares for inequality. -// -// Asm: VPCMPB, CPU Feature: AVX512 -func (x Int8x32) NotEqual(y Int8x32) Mask8x32 - // NotEqual compares for inequality. // // Asm: VPCMPB, CPU Feature: AVX512 func (x Int8x64) NotEqual(y Int8x64) Mask8x64 -// NotEqual compares for inequality. -// -// Asm: VPCMPW, CPU Feature: AVX512 -func (x Int16x8) NotEqual(y Int16x8) Mask16x8 - -// NotEqual compares for inequality. -// -// Asm: VPCMPW, CPU Feature: AVX512 -func (x Int16x16) NotEqual(y Int16x16) Mask16x16 - // NotEqual compares for inequality. // // Asm: VPCMPW, CPU Feature: AVX512 func (x Int16x32) NotEqual(y Int16x32) Mask16x32 -// NotEqual compares for inequality. -// -// Asm: VPCMPD, CPU Feature: AVX512 -func (x Int32x4) NotEqual(y Int32x4) Mask32x4 - -// NotEqual compares for inequality. -// -// Asm: VPCMPD, CPU Feature: AVX512 -func (x Int32x8) NotEqual(y Int32x8) Mask32x8 - // NotEqual compares for inequality. // // Asm: VPCMPD, CPU Feature: AVX512 func (x Int32x16) NotEqual(y Int32x16) Mask32x16 -// NotEqual compares for inequality. -// -// Asm: VPCMPQ, CPU Feature: AVX512 -func (x Int64x2) NotEqual(y Int64x2) Mask64x2 - -// NotEqual compares for inequality. -// -// Asm: VPCMPQ, CPU Feature: AVX512 -func (x Int64x4) NotEqual(y Int64x4) Mask64x4 - // NotEqual compares for inequality. // // Asm: VPCMPQ, CPU Feature: AVX512 func (x Int64x8) NotEqual(y Int64x8) Mask64x8 -// NotEqual compares for inequality. -// -// Asm: VPCMPUB, CPU Feature: AVX512 -func (x Uint8x16) NotEqual(y Uint8x16) Mask8x16 - -// NotEqual compares for inequality. -// -// Asm: VPCMPUB, CPU Feature: AVX512 -func (x Uint8x32) NotEqual(y Uint8x32) Mask8x32 - // NotEqual compares for inequality. // // Asm: VPCMPUB, CPU Feature: AVX512 func (x Uint8x64) NotEqual(y Uint8x64) Mask8x64 -// NotEqual compares for inequality. -// -// Asm: VPCMPUW, CPU Feature: AVX512 -func (x Uint16x8) NotEqual(y Uint16x8) Mask16x8 - -// NotEqual compares for inequality. -// -// Asm: VPCMPUW, CPU Feature: AVX512 -func (x Uint16x16) NotEqual(y Uint16x16) Mask16x16 - // NotEqual compares for inequality. // // Asm: VPCMPUW, CPU Feature: AVX512 func (x Uint16x32) NotEqual(y Uint16x32) Mask16x32 -// NotEqual compares for inequality. -// -// Asm: VPCMPUD, CPU Feature: AVX512 -func (x Uint32x4) NotEqual(y Uint32x4) Mask32x4 - -// NotEqual compares for inequality. -// -// Asm: VPCMPUD, CPU Feature: AVX512 -func (x Uint32x8) NotEqual(y Uint32x8) Mask32x8 - // NotEqual compares for inequality. // // Asm: VPCMPUD, CPU Feature: AVX512 func (x Uint32x16) NotEqual(y Uint32x16) Mask32x16 -// NotEqual compares for inequality. -// -// Asm: VPCMPUQ, CPU Feature: AVX512 -func (x Uint64x2) NotEqual(y Uint64x2) Mask64x2 - -// NotEqual compares for inequality. -// -// Asm: VPCMPUQ, CPU Feature: AVX512 -func (x Uint64x4) NotEqual(y Uint64x4) Mask64x4 - // NotEqual compares for inequality. // // Asm: VPCMPUQ, CPU Feature: AVX512 diff --git a/src/simd/slice_amd64.go b/src/simd/slice_amd64.go index 8e721d9027..3ad2672a05 100644 --- a/src/simd/slice_amd64.go +++ b/src/simd/slice_amd64.go @@ -1500,6 +1500,642 @@ func (x Float64x8) Merge(y Float64x8, mask Mask64x8) Float64x8 { return iy.blendMasked(ix, mask).AsFloat64x8() } +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature AVX +func (x Int8x16) Less(y Int8x16) Mask8x16 { + return y.Greater(x) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature AVX +func (x Int8x16) GreaterEqual(y Int8x16) Mask8x16 { + ones := x.Equal(x).AsInt8x16() + return y.Greater(x).AsInt8x16().Xor(ones).AsMask8x16() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature AVX +func (x Int8x16) LessEqual(y Int8x16) Mask8x16 { + ones := x.Equal(x).AsInt8x16() + return x.Greater(y).AsInt8x16().Xor(ones).AsMask8x16() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature AVX +func (x Int8x16) NotEqual(y Int8x16) Mask8x16 { + ones := x.Equal(x).AsInt8x16() + return x.Equal(y).AsInt8x16().Xor(ones).AsMask8x16() +} + +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature AVX +func (x Int16x8) Less(y Int16x8) Mask16x8 { + return y.Greater(x) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature AVX +func (x Int16x8) GreaterEqual(y Int16x8) Mask16x8 { + ones := x.Equal(x).AsInt16x8() + return y.Greater(x).AsInt16x8().Xor(ones).AsMask16x8() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature AVX +func (x Int16x8) LessEqual(y Int16x8) Mask16x8 { + ones := x.Equal(x).AsInt16x8() + return x.Greater(y).AsInt16x8().Xor(ones).AsMask16x8() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature AVX +func (x Int16x8) NotEqual(y Int16x8) Mask16x8 { + ones := x.Equal(x).AsInt16x8() + return x.Equal(y).AsInt16x8().Xor(ones).AsMask16x8() +} + +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature AVX +func (x Int32x4) Less(y Int32x4) Mask32x4 { + return y.Greater(x) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature AVX +func (x Int32x4) GreaterEqual(y Int32x4) Mask32x4 { + ones := x.Equal(x).AsInt32x4() + return y.Greater(x).AsInt32x4().Xor(ones).AsMask32x4() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature AVX +func (x Int32x4) LessEqual(y Int32x4) Mask32x4 { + ones := x.Equal(x).AsInt32x4() + return x.Greater(y).AsInt32x4().Xor(ones).AsMask32x4() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature AVX +func (x Int32x4) NotEqual(y Int32x4) Mask32x4 { + ones := x.Equal(x).AsInt32x4() + return x.Equal(y).AsInt32x4().Xor(ones).AsMask32x4() +} + +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature AVX +func (x Int64x2) Less(y Int64x2) Mask64x2 { + return y.Greater(x) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature AVX +func (x Int64x2) GreaterEqual(y Int64x2) Mask64x2 { + ones := x.Equal(x).AsInt64x2() + return y.Greater(x).AsInt64x2().Xor(ones).AsMask64x2() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature AVX +func (x Int64x2) LessEqual(y Int64x2) Mask64x2 { + ones := x.Equal(x).AsInt64x2() + return x.Greater(y).AsInt64x2().Xor(ones).AsMask64x2() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature AVX +func (x Int64x2) NotEqual(y Int64x2) Mask64x2 { + ones := x.Equal(x).AsInt64x2() + return x.Equal(y).AsInt64x2().Xor(ones).AsMask64x2() +} + +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature AVX2 +func (x Int8x32) Less(y Int8x32) Mask8x32 { + return y.Greater(x) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature AVX2 +func (x Int8x32) GreaterEqual(y Int8x32) Mask8x32 { + ones := x.Equal(x).AsInt8x32() + return y.Greater(x).AsInt8x32().Xor(ones).AsMask8x32() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature AVX2 +func (x Int8x32) LessEqual(y Int8x32) Mask8x32 { + ones := x.Equal(x).AsInt8x32() + return x.Greater(y).AsInt8x32().Xor(ones).AsMask8x32() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature AVX2 +func (x Int8x32) NotEqual(y Int8x32) Mask8x32 { + ones := x.Equal(x).AsInt8x32() + return x.Equal(y).AsInt8x32().Xor(ones).AsMask8x32() +} + +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature AVX2 +func (x Int16x16) Less(y Int16x16) Mask16x16 { + return y.Greater(x) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature AVX2 +func (x Int16x16) GreaterEqual(y Int16x16) Mask16x16 { + ones := x.Equal(x).AsInt16x16() + return y.Greater(x).AsInt16x16().Xor(ones).AsMask16x16() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature AVX2 +func (x Int16x16) LessEqual(y Int16x16) Mask16x16 { + ones := x.Equal(x).AsInt16x16() + return x.Greater(y).AsInt16x16().Xor(ones).AsMask16x16() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature AVX2 +func (x Int16x16) NotEqual(y Int16x16) Mask16x16 { + ones := x.Equal(x).AsInt16x16() + return x.Equal(y).AsInt16x16().Xor(ones).AsMask16x16() +} + +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature AVX2 +func (x Int32x8) Less(y Int32x8) Mask32x8 { + return y.Greater(x) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature AVX2 +func (x Int32x8) GreaterEqual(y Int32x8) Mask32x8 { + ones := x.Equal(x).AsInt32x8() + return y.Greater(x).AsInt32x8().Xor(ones).AsMask32x8() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature AVX2 +func (x Int32x8) LessEqual(y Int32x8) Mask32x8 { + ones := x.Equal(x).AsInt32x8() + return x.Greater(y).AsInt32x8().Xor(ones).AsMask32x8() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature AVX2 +func (x Int32x8) NotEqual(y Int32x8) Mask32x8 { + ones := x.Equal(x).AsInt32x8() + return x.Equal(y).AsInt32x8().Xor(ones).AsMask32x8() +} + +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature AVX2 +func (x Int64x4) Less(y Int64x4) Mask64x4 { + return y.Greater(x) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature AVX2 +func (x Int64x4) GreaterEqual(y Int64x4) Mask64x4 { + ones := x.Equal(x).AsInt64x4() + return y.Greater(x).AsInt64x4().Xor(ones).AsMask64x4() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature AVX2 +func (x Int64x4) LessEqual(y Int64x4) Mask64x4 { + ones := x.Equal(x).AsInt64x4() + return x.Greater(y).AsInt64x4().Xor(ones).AsMask64x4() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature AVX2 +func (x Int64x4) NotEqual(y Int64x4) Mask64x4 { + ones := x.Equal(x).AsInt64x4() + return x.Equal(y).AsInt64x4().Xor(ones).AsMask64x4() +} + +// Greater returns a mask whose elements indicate whether x > y +// +// Emulated, CPU Feature AVX2 +func (x Uint8x16) Greater(y Uint8x16) Mask8x16 { + a, b := x.AsInt8x16(), y.AsInt8x16() + signs := BroadcastInt8x16(-1 << (8 - 1)) + return a.Xor(signs).Greater(b.Xor(signs)) +} + +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature AVX2 +func (x Uint8x16) Less(y Uint8x16) Mask8x16 { + a, b := x.AsInt8x16(), y.AsInt8x16() + signs := BroadcastInt8x16(-1 << (8 - 1)) + return b.Xor(signs).Greater(a.Xor(signs)) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature AVX2 +func (x Uint8x16) GreaterEqual(y Uint8x16) Mask8x16 { + a, b := x.AsInt8x16(), y.AsInt8x16() + ones := x.Equal(x).AsInt8x16() + signs := BroadcastInt8x16(-1 << (8 - 1)) + return b.Xor(signs).Greater(a.Xor(signs)).AsInt8x16().Xor(ones).AsMask8x16() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature AVX2 +func (x Uint8x16) LessEqual(y Uint8x16) Mask8x16 { + a, b := x.AsInt8x16(), y.AsInt8x16() + ones := x.Equal(x).AsInt8x16() + signs := BroadcastInt8x16(-1 << (8 - 1)) + return a.Xor(signs).Greater(b.Xor(signs)).AsInt8x16().Xor(ones).AsMask8x16() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature AVX +func (x Uint8x16) NotEqual(y Uint8x16) Mask8x16 { + a, b := x.AsInt8x16(), y.AsInt8x16() + ones := x.Equal(x).AsInt8x16() + return a.Equal(b).AsInt8x16().Xor(ones).AsMask8x16() +} + +// Greater returns a mask whose elements indicate whether x > y +// +// Emulated, CPU Feature AVX +func (x Uint16x8) Greater(y Uint16x8) Mask16x8 { + a, b := x.AsInt16x8(), y.AsInt16x8() + ones := x.Equal(x).AsInt16x8() + signs := ones.ShiftAllLeft(16 - 1) + return a.Xor(signs).Greater(b.Xor(signs)) +} + +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature AVX +func (x Uint16x8) Less(y Uint16x8) Mask16x8 { + a, b := x.AsInt16x8(), y.AsInt16x8() + ones := x.Equal(x).AsInt16x8() + signs := ones.ShiftAllLeft(16 - 1) + return b.Xor(signs).Greater(a.Xor(signs)) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature AVX +func (x Uint16x8) GreaterEqual(y Uint16x8) Mask16x8 { + a, b := x.AsInt16x8(), y.AsInt16x8() + ones := x.Equal(x).AsInt16x8() + signs := ones.ShiftAllLeft(16 - 1) + return b.Xor(signs).Greater(a.Xor(signs)).AsInt16x8().Xor(ones).AsMask16x8() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature AVX +func (x Uint16x8) LessEqual(y Uint16x8) Mask16x8 { + a, b := x.AsInt16x8(), y.AsInt16x8() + ones := x.Equal(x).AsInt16x8() + signs := ones.ShiftAllLeft(16 - 1) + return a.Xor(signs).Greater(b.Xor(signs)).AsInt16x8().Xor(ones).AsMask16x8() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature AVX +func (x Uint16x8) NotEqual(y Uint16x8) Mask16x8 { + a, b := x.AsInt16x8(), y.AsInt16x8() + ones := x.Equal(x).AsInt16x8() + return a.Equal(b).AsInt16x8().Xor(ones).AsMask16x8() +} + +// Greater returns a mask whose elements indicate whether x > y +// +// Emulated, CPU Feature AVX +func (x Uint32x4) Greater(y Uint32x4) Mask32x4 { + a, b := x.AsInt32x4(), y.AsInt32x4() + ones := x.Equal(x).AsInt32x4() + signs := ones.ShiftAllLeft(32 - 1) + return a.Xor(signs).Greater(b.Xor(signs)) +} + +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature AVX +func (x Uint32x4) Less(y Uint32x4) Mask32x4 { + a, b := x.AsInt32x4(), y.AsInt32x4() + ones := x.Equal(x).AsInt32x4() + signs := ones.ShiftAllLeft(32 - 1) + return b.Xor(signs).Greater(a.Xor(signs)) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature AVX +func (x Uint32x4) GreaterEqual(y Uint32x4) Mask32x4 { + a, b := x.AsInt32x4(), y.AsInt32x4() + ones := x.Equal(x).AsInt32x4() + signs := ones.ShiftAllLeft(32 - 1) + return b.Xor(signs).Greater(a.Xor(signs)).AsInt32x4().Xor(ones).AsMask32x4() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature AVX +func (x Uint32x4) LessEqual(y Uint32x4) Mask32x4 { + a, b := x.AsInt32x4(), y.AsInt32x4() + ones := x.Equal(x).AsInt32x4() + signs := ones.ShiftAllLeft(32 - 1) + return a.Xor(signs).Greater(b.Xor(signs)).AsInt32x4().Xor(ones).AsMask32x4() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature AVX +func (x Uint32x4) NotEqual(y Uint32x4) Mask32x4 { + a, b := x.AsInt32x4(), y.AsInt32x4() + ones := x.Equal(x).AsInt32x4() + return a.Equal(b).AsInt32x4().Xor(ones).AsMask32x4() +} + +// Greater returns a mask whose elements indicate whether x > y +// +// Emulated, CPU Feature AVX +func (x Uint64x2) Greater(y Uint64x2) Mask64x2 { + a, b := x.AsInt64x2(), y.AsInt64x2() + ones := x.Equal(x).AsInt64x2() + signs := ones.ShiftAllLeft(64 - 1) + return a.Xor(signs).Greater(b.Xor(signs)) +} + +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature AVX +func (x Uint64x2) Less(y Uint64x2) Mask64x2 { + a, b := x.AsInt64x2(), y.AsInt64x2() + ones := x.Equal(x).AsInt64x2() + signs := ones.ShiftAllLeft(64 - 1) + return b.Xor(signs).Greater(a.Xor(signs)) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature AVX +func (x Uint64x2) GreaterEqual(y Uint64x2) Mask64x2 { + a, b := x.AsInt64x2(), y.AsInt64x2() + ones := x.Equal(x).AsInt64x2() + signs := ones.ShiftAllLeft(64 - 1) + return b.Xor(signs).Greater(a.Xor(signs)).AsInt64x2().Xor(ones).AsMask64x2() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature AVX +func (x Uint64x2) LessEqual(y Uint64x2) Mask64x2 { + a, b := x.AsInt64x2(), y.AsInt64x2() + ones := x.Equal(x).AsInt64x2() + signs := ones.ShiftAllLeft(64 - 1) + return a.Xor(signs).Greater(b.Xor(signs)).AsInt64x2().Xor(ones).AsMask64x2() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature AVX +func (x Uint64x2) NotEqual(y Uint64x2) Mask64x2 { + a, b := x.AsInt64x2(), y.AsInt64x2() + ones := x.Equal(x).AsInt64x2() + return a.Equal(b).AsInt64x2().Xor(ones).AsMask64x2() +} + +// Greater returns a mask whose elements indicate whether x > y +// +// Emulated, CPU Feature AVX2 +func (x Uint8x32) Greater(y Uint8x32) Mask8x32 { + a, b := x.AsInt8x32(), y.AsInt8x32() + signs := BroadcastInt8x32(-1 << (8 - 1)) + return a.Xor(signs).Greater(b.Xor(signs)) +} + +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature AVX2 +func (x Uint8x32) Less(y Uint8x32) Mask8x32 { + a, b := x.AsInt8x32(), y.AsInt8x32() + signs := BroadcastInt8x32(-1 << (8 - 1)) + return b.Xor(signs).Greater(a.Xor(signs)) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature AVX2 +func (x Uint8x32) GreaterEqual(y Uint8x32) Mask8x32 { + a, b := x.AsInt8x32(), y.AsInt8x32() + ones := x.Equal(x).AsInt8x32() + signs := BroadcastInt8x32(-1 << (8 - 1)) + return b.Xor(signs).Greater(a.Xor(signs)).AsInt8x32().Xor(ones).AsMask8x32() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature AVX2 +func (x Uint8x32) LessEqual(y Uint8x32) Mask8x32 { + a, b := x.AsInt8x32(), y.AsInt8x32() + ones := x.Equal(x).AsInt8x32() + signs := BroadcastInt8x32(-1 << (8 - 1)) + return a.Xor(signs).Greater(b.Xor(signs)).AsInt8x32().Xor(ones).AsMask8x32() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature AVX2 +func (x Uint8x32) NotEqual(y Uint8x32) Mask8x32 { + a, b := x.AsInt8x32(), y.AsInt8x32() + ones := x.Equal(x).AsInt8x32() + return a.Equal(b).AsInt8x32().Xor(ones).AsMask8x32() +} + +// Greater returns a mask whose elements indicate whether x > y +// +// Emulated, CPU Feature AVX2 +func (x Uint16x16) Greater(y Uint16x16) Mask16x16 { + a, b := x.AsInt16x16(), y.AsInt16x16() + ones := x.Equal(x).AsInt16x16() + signs := ones.ShiftAllLeft(16 - 1) + return a.Xor(signs).Greater(b.Xor(signs)) +} + +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature AVX2 +func (x Uint16x16) Less(y Uint16x16) Mask16x16 { + a, b := x.AsInt16x16(), y.AsInt16x16() + ones := x.Equal(x).AsInt16x16() + signs := ones.ShiftAllLeft(16 - 1) + return b.Xor(signs).Greater(a.Xor(signs)) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature AVX2 +func (x Uint16x16) GreaterEqual(y Uint16x16) Mask16x16 { + a, b := x.AsInt16x16(), y.AsInt16x16() + ones := x.Equal(x).AsInt16x16() + signs := ones.ShiftAllLeft(16 - 1) + return b.Xor(signs).Greater(a.Xor(signs)).AsInt16x16().Xor(ones).AsMask16x16() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature AVX2 +func (x Uint16x16) LessEqual(y Uint16x16) Mask16x16 { + a, b := x.AsInt16x16(), y.AsInt16x16() + ones := x.Equal(x).AsInt16x16() + signs := ones.ShiftAllLeft(16 - 1) + return a.Xor(signs).Greater(b.Xor(signs)).AsInt16x16().Xor(ones).AsMask16x16() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature AVX2 +func (x Uint16x16) NotEqual(y Uint16x16) Mask16x16 { + a, b := x.AsInt16x16(), y.AsInt16x16() + ones := x.Equal(x).AsInt16x16() + return a.Equal(b).AsInt16x16().Xor(ones).AsMask16x16() +} + +// Greater returns a mask whose elements indicate whether x > y +// +// Emulated, CPU Feature AVX2 +func (x Uint32x8) Greater(y Uint32x8) Mask32x8 { + a, b := x.AsInt32x8(), y.AsInt32x8() + ones := x.Equal(x).AsInt32x8() + signs := ones.ShiftAllLeft(32 - 1) + return a.Xor(signs).Greater(b.Xor(signs)) +} + +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature AVX2 +func (x Uint32x8) Less(y Uint32x8) Mask32x8 { + a, b := x.AsInt32x8(), y.AsInt32x8() + ones := x.Equal(x).AsInt32x8() + signs := ones.ShiftAllLeft(32 - 1) + return b.Xor(signs).Greater(a.Xor(signs)) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature AVX2 +func (x Uint32x8) GreaterEqual(y Uint32x8) Mask32x8 { + a, b := x.AsInt32x8(), y.AsInt32x8() + ones := x.Equal(x).AsInt32x8() + signs := ones.ShiftAllLeft(32 - 1) + return b.Xor(signs).Greater(a.Xor(signs)).AsInt32x8().Xor(ones).AsMask32x8() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature AVX2 +func (x Uint32x8) LessEqual(y Uint32x8) Mask32x8 { + a, b := x.AsInt32x8(), y.AsInt32x8() + ones := x.Equal(x).AsInt32x8() + signs := ones.ShiftAllLeft(32 - 1) + return a.Xor(signs).Greater(b.Xor(signs)).AsInt32x8().Xor(ones).AsMask32x8() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature AVX2 +func (x Uint32x8) NotEqual(y Uint32x8) Mask32x8 { + a, b := x.AsInt32x8(), y.AsInt32x8() + ones := x.Equal(x).AsInt32x8() + return a.Equal(b).AsInt32x8().Xor(ones).AsMask32x8() +} + +// Greater returns a mask whose elements indicate whether x > y +// +// Emulated, CPU Feature AVX2 +func (x Uint64x4) Greater(y Uint64x4) Mask64x4 { + a, b := x.AsInt64x4(), y.AsInt64x4() + ones := x.Equal(x).AsInt64x4() + signs := ones.ShiftAllLeft(64 - 1) + return a.Xor(signs).Greater(b.Xor(signs)) +} + +// Less returns a mask whose elements indicate whether x < y +// +// Emulated, CPU Feature AVX2 +func (x Uint64x4) Less(y Uint64x4) Mask64x4 { + a, b := x.AsInt64x4(), y.AsInt64x4() + ones := x.Equal(x).AsInt64x4() + signs := ones.ShiftAllLeft(64 - 1) + return b.Xor(signs).Greater(a.Xor(signs)) +} + +// GreaterEqual returns a mask whose elements indicate whether x >= y +// +// Emulated, CPU Feature AVX2 +func (x Uint64x4) GreaterEqual(y Uint64x4) Mask64x4 { + a, b := x.AsInt64x4(), y.AsInt64x4() + ones := x.Equal(x).AsInt64x4() + signs := ones.ShiftAllLeft(64 - 1) + return b.Xor(signs).Greater(a.Xor(signs)).AsInt64x4().Xor(ones).AsMask64x4() +} + +// LessEqual returns a mask whose elements indicate whether x <= y +// +// Emulated, CPU Feature AVX2 +func (x Uint64x4) LessEqual(y Uint64x4) Mask64x4 { + a, b := x.AsInt64x4(), y.AsInt64x4() + ones := x.Equal(x).AsInt64x4() + signs := ones.ShiftAllLeft(64 - 1) + return a.Xor(signs).Greater(b.Xor(signs)).AsInt64x4().Xor(ones).AsMask64x4() +} + +// NotEqual returns a mask whose elements indicate whether x != y +// +// Emulated, CPU Feature AVX2 +func (x Uint64x4) NotEqual(y Uint64x4) Mask64x4 { + a, b := x.AsInt64x4(), y.AsInt64x4() + ones := x.Equal(x).AsInt64x4() + return a.Equal(b).AsInt64x4().Xor(ones).AsMask64x4() +} + // BroadcastInt8x16 returns a vector with the input // x assigned to all elements of the output. // -- 2.52.0