From aab8b173a96449110319455e8015fc140e43766e Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Wed, 9 Jul 2025 16:24:34 +0000 Subject: [PATCH] [dev.simd] cmd/compile, simd: Int64x2 Greater and Uint* Equal This CL is generated by CL 686817. Change-Id: I19b8e468594514b2b1c99f8ad766f78b5e194c80 Reviewed-on: https://go-review.googlesource.com/c/go/+/686876 TryBot-Bypass: David Chase Reviewed-by: David Chase --- src/cmd/compile/internal/amd64/simdssa.go | 11 +- .../compile/internal/ssa/_gen/simdAMD64.rules | 18 +- .../compile/internal/ssa/_gen/simdAMD64ops.go | 17 +- src/cmd/compile/internal/ssa/opGen.go | 173 ++++++++-------- src/cmd/compile/internal/ssa/rewriteAMD64.go | 189 ++---------------- .../compile/internal/ssagen/simdintrinsics.go | 18 +- src/simd/ops_amd64.go | 90 ++++----- src/simd/simd_wrapped_test.go | 4 +- 8 files changed, 188 insertions(+), 332 deletions(-) diff --git a/src/cmd/compile/internal/amd64/simdssa.go b/src/cmd/compile/internal/amd64/simdssa.go index d87548c27f..12a8c857bd 100644 --- a/src/cmd/compile/internal/amd64/simdssa.go +++ b/src/cmd/compile/internal/amd64/simdssa.go @@ -115,6 +115,7 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPCMPGTW256, ssa.OpAMD64VPCMPGTD128, ssa.OpAMD64VPCMPGTD256, + ssa.OpAMD64VPCMPGTQ128, ssa.OpAMD64VPCMPGTQ256, ssa.OpAMD64VMAXPS128, ssa.OpAMD64VMAXPS256, @@ -688,25 +689,25 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPCMPW512, ssa.OpAMD64VPCMPD512, ssa.OpAMD64VPCMPQ512, + ssa.OpAMD64VPCMPUB512, + ssa.OpAMD64VPCMPUW512, + ssa.OpAMD64VPCMPUD512, + ssa.OpAMD64VPCMPUQ512, ssa.OpAMD64VPCMPUB128, ssa.OpAMD64VPCMPUB256, - ssa.OpAMD64VPCMPUB512, ssa.OpAMD64VPCMPUW128, ssa.OpAMD64VPCMPUW256, - ssa.OpAMD64VPCMPUW512, ssa.OpAMD64VPCMPUD128, ssa.OpAMD64VPCMPUD256, - ssa.OpAMD64VPCMPUD512, ssa.OpAMD64VPCMPUQ128, ssa.OpAMD64VPCMPUQ256, - ssa.OpAMD64VPCMPUQ512, - ssa.OpAMD64VPCMPQ128, ssa.OpAMD64VPCMPB128, ssa.OpAMD64VPCMPB256, ssa.OpAMD64VPCMPW128, ssa.OpAMD64VPCMPW256, ssa.OpAMD64VPCMPD128, ssa.OpAMD64VPCMPD256, + ssa.OpAMD64VPCMPQ128, ssa.OpAMD64VPCMPQ256: p = simdV2kImm8(s, v) diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules index 7ea24fe95c..09ab9b840a 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules @@ -283,17 +283,17 @@ (EqualInt64x2 ...) => (VPCMPEQQ128 ...) (EqualInt64x4 ...) => (VPCMPEQQ256 ...) (EqualInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [0] x y)) -(EqualUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [0] x y)) -(EqualUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [0] x y)) +(EqualUint8x16 ...) => (VPCMPEQB128 ...) +(EqualUint8x32 ...) => (VPCMPEQB256 ...) (EqualUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [0] x y)) -(EqualUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [0] x y)) -(EqualUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [0] x y)) +(EqualUint16x8 ...) => (VPCMPEQW128 ...) +(EqualUint16x16 ...) => (VPCMPEQW256 ...) (EqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [0] x y)) -(EqualUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [0] x y)) -(EqualUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [0] x y)) +(EqualUint32x4 ...) => (VPCMPEQD128 ...) +(EqualUint32x8 ...) => (VPCMPEQD256 ...) (EqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [0] x y)) -(EqualUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [0] x y)) -(EqualUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [0] x y)) +(EqualUint64x2 ...) => (VPCMPEQQ128 ...) +(EqualUint64x4 ...) => (VPCMPEQQ256 ...) (EqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [0] x y)) (EqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [0] x y (VPMOVVec32x4ToM mask))) (EqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [0] x y (VPMOVVec32x8ToM mask))) @@ -428,7 +428,7 @@ (GreaterInt32x4 ...) => (VPCMPGTD128 ...) (GreaterInt32x8 ...) => (VPCMPGTD256 ...) (GreaterInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [14] x y)) -(GreaterInt64x2 x y) => (VPMOVMToVec64x2 (VPCMPQ128 [14] x y)) +(GreaterInt64x2 ...) => (VPCMPGTQ128 ...) (GreaterInt64x4 ...) => (VPCMPGTQ256 ...) (GreaterInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [14] x y)) (GreaterUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [14] x y)) diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go index 09cfcfb4d9..f0a149f7d8 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go @@ -436,6 +436,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPANDQMasked128", argLength: 3, reg: w2kw, asm: "VPANDQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPANDNQMasked128", argLength: 3, reg: w2kw, asm: "VPANDNQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPCMPEQQ128", argLength: 2, reg: v21, asm: "VPCMPEQQ", commutative: true, typ: "Vec128", resultInArg0: false}, + {name: "VPCMPGTQ128", argLength: 2, reg: v21, asm: "VPCMPGTQ", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPMAXSQ128", argLength: 2, reg: w21, asm: "VPMAXSQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMAXSQMasked128", argLength: 3, reg: w2kw, asm: "VPMAXSQ", commutative: true, typ: "Vec128", resultInArg0: false}, {name: "VPMINSQ128", argLength: 2, reg: w21, asm: "VPMINSQ", commutative: true, typ: "Vec128", resultInArg0: false}, @@ -837,36 +838,36 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VINSERTI128256", argLength: 2, reg: v21, asm: "VINSERTI128", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPCMPB512", argLength: 2, reg: w2k, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPBMasked512", argLength: 3, reg: w2kk, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPCMPUW256", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPUWMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VPCMPUW256", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPUW512", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPUWMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPCMPUW128", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPUWMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VPCMPUW128", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPUD512", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPUDMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPCMPUD128", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPUDMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPCMPUD256", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VPCMPUD128", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPUDMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPCMPUQ128", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VPCMPUD256", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPUQMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPCMPUQ256", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VPCMPUQ128", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPUQMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VPCMPUQ256", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPUQ512", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPUQMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, - {name: "VPCMPUB128", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPUBMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VGF2P8AFFINEQB128", argLength: 2, reg: w21, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VGF2P8AFFINEINVQB128", argLength: 2, reg: w21, asm: "VGF2P8AFFINEINVQB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VGF2P8AFFINEINVQBMasked128", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEINVQB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VGF2P8AFFINEQBMasked128", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VPCMPUB256", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VPCMPUB128", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPUBMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VGF2P8AFFINEQB256", argLength: 2, reg: w21, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VGF2P8AFFINEINVQB256", argLength: 2, reg: w21, asm: "VGF2P8AFFINEINVQB", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VGF2P8AFFINEINVQBMasked256", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEINVQB", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VGF2P8AFFINEQBMasked256", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPCMPUB256", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPUB512", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPUBMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VGF2P8AFFINEQB512", argLength: 2, reg: w21, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index edc88dfbc6..d9fea94fc3 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1629,6 +1629,7 @@ const ( OpAMD64VPANDQMasked128 OpAMD64VPANDNQMasked128 OpAMD64VPCMPEQQ128 + OpAMD64VPCMPGTQ128 OpAMD64VPMAXSQ128 OpAMD64VPMAXSQMasked128 OpAMD64VPMINSQ128 @@ -2030,36 +2031,36 @@ const ( OpAMD64VINSERTI128256 OpAMD64VPCMPB512 OpAMD64VPCMPBMasked512 - OpAMD64VPCMPUW256 OpAMD64VPCMPUWMasked256 + OpAMD64VPCMPUW256 OpAMD64VPCMPUW512 OpAMD64VPCMPUWMasked512 - OpAMD64VPCMPUW128 OpAMD64VPCMPUWMasked128 + OpAMD64VPCMPUW128 OpAMD64VPCMPUD512 OpAMD64VPCMPUDMasked512 - OpAMD64VPCMPUD128 OpAMD64VPCMPUDMasked128 - OpAMD64VPCMPUD256 + OpAMD64VPCMPUD128 OpAMD64VPCMPUDMasked256 - OpAMD64VPCMPUQ128 + OpAMD64VPCMPUD256 OpAMD64VPCMPUQMasked128 - OpAMD64VPCMPUQ256 + OpAMD64VPCMPUQ128 OpAMD64VPCMPUQMasked256 + OpAMD64VPCMPUQ256 OpAMD64VPCMPUQ512 OpAMD64VPCMPUQMasked512 - OpAMD64VPCMPUB128 OpAMD64VPCMPUBMasked128 OpAMD64VGF2P8AFFINEQB128 OpAMD64VGF2P8AFFINEINVQB128 OpAMD64VGF2P8AFFINEINVQBMasked128 OpAMD64VGF2P8AFFINEQBMasked128 - OpAMD64VPCMPUB256 + OpAMD64VPCMPUB128 OpAMD64VPCMPUBMasked256 OpAMD64VGF2P8AFFINEQB256 OpAMD64VGF2P8AFFINEINVQB256 OpAMD64VGF2P8AFFINEINVQBMasked256 OpAMD64VGF2P8AFFINEQBMasked256 + OpAMD64VPCMPUB256 OpAMD64VPCMPUB512 OpAMD64VPCMPUBMasked512 OpAMD64VGF2P8AFFINEQB512 @@ -25058,6 +25059,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPCMPGTQ128", + argLen: 2, + asm: x86.AVPCMPGTQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "VPMAXSQ128", argLen: 2, @@ -31113,15 +31128,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPUW256", + name: "VPCMPUWMasked256", auxType: auxInt8, - argLen: 2, + argLen: 3, commutative: true, asm: x86.AVPCMPUW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 @@ -31129,16 +31145,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPUWMasked256", - auxType: auxInt8, - argLen: 3, - commutative: true, - asm: x86.AVPCMPUW, + name: "VPCMPUW256", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPCMPUW, reg: regInfo{ inputs: []inputInfo{ - {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 @@ -31179,15 +31193,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPUW128", + name: "VPCMPUWMasked128", auxType: auxInt8, - argLen: 2, + argLen: 3, commutative: true, asm: x86.AVPCMPUW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 @@ -31195,16 +31210,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPUWMasked128", - auxType: auxInt8, - argLen: 3, - commutative: true, - asm: x86.AVPCMPUW, + name: "VPCMPUW128", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPCMPUW, reg: regInfo{ inputs: []inputInfo{ - {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 @@ -31244,22 +31257,6 @@ var opcodeTable = [...]opInfo{ }, }, }, - { - name: "VPCMPUD128", - auxType: auxInt8, - argLen: 2, - commutative: true, - asm: x86.AVPCMPUD, - reg: regInfo{ - inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - }, - outputs: []outputInfo{ - {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - }, - }, - }, { name: "VPCMPUDMasked128", auxType: auxInt8, @@ -31278,11 +31275,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPUD256", - auxType: auxInt8, - argLen: 2, - commutative: true, - asm: x86.AVPCMPUD, + name: "VPCMPUD128", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPCMPUD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -31311,11 +31307,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPUQ128", - auxType: auxInt8, - argLen: 2, - commutative: true, - asm: x86.AVPCMPUQ, + name: "VPCMPUD256", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPCMPUD, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -31344,11 +31339,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPUQ256", - auxType: auxInt8, - argLen: 2, - commutative: true, - asm: x86.AVPCMPUQ, + name: "VPCMPUQ128", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPCMPUQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -31377,11 +31371,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPUQ512", - auxType: auxInt8, - argLen: 2, - commutative: true, - asm: x86.AVPCMPUQ, + name: "VPCMPUQ256", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPCMPUQ, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -31393,16 +31386,15 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPUQMasked512", + name: "VPCMPUQ512", auxType: auxInt8, - argLen: 3, + argLen: 2, commutative: true, asm: x86.AVPCMPUQ, reg: regInfo{ inputs: []inputInfo{ - {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 @@ -31410,15 +31402,16 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPUB128", + name: "VPCMPUQMasked512", auxType: auxInt8, - argLen: 2, + argLen: 3, commutative: true, - asm: x86.AVPCMPUB, + asm: x86.AVPCMPUQ, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 @@ -31505,11 +31498,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPCMPUB256", - auxType: auxInt8, - argLen: 2, - commutative: true, - asm: x86.AVPCMPUB, + name: "VPCMPUB128", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPCMPUB, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -31599,6 +31591,21 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPCMPUB256", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPCMPUB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + }, + }, + }, { name: "VPCMPUB512", auxType: auxInt8, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 2e27077e81..4dd1fcbcb7 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -1530,27 +1530,35 @@ func rewriteValueAMD64(v *Value) bool { case OpEqualMaskedUint8x64: return rewriteValueAMD64_OpEqualMaskedUint8x64(v) case OpEqualUint16x16: - return rewriteValueAMD64_OpEqualUint16x16(v) + v.Op = OpAMD64VPCMPEQW256 + return true case OpEqualUint16x32: return rewriteValueAMD64_OpEqualUint16x32(v) case OpEqualUint16x8: - return rewriteValueAMD64_OpEqualUint16x8(v) + v.Op = OpAMD64VPCMPEQW128 + return true case OpEqualUint32x16: return rewriteValueAMD64_OpEqualUint32x16(v) case OpEqualUint32x4: - return rewriteValueAMD64_OpEqualUint32x4(v) + v.Op = OpAMD64VPCMPEQD128 + return true case OpEqualUint32x8: - return rewriteValueAMD64_OpEqualUint32x8(v) + v.Op = OpAMD64VPCMPEQD256 + return true case OpEqualUint64x2: - return rewriteValueAMD64_OpEqualUint64x2(v) + v.Op = OpAMD64VPCMPEQQ128 + return true case OpEqualUint64x4: - return rewriteValueAMD64_OpEqualUint64x4(v) + v.Op = OpAMD64VPCMPEQQ256 + return true case OpEqualUint64x8: return rewriteValueAMD64_OpEqualUint64x8(v) case OpEqualUint8x16: - return rewriteValueAMD64_OpEqualUint8x16(v) + v.Op = OpAMD64VPCMPEQB128 + return true case OpEqualUint8x32: - return rewriteValueAMD64_OpEqualUint8x32(v) + v.Op = OpAMD64VPCMPEQB256 + return true case OpEqualUint8x64: return rewriteValueAMD64_OpEqualUint8x64(v) case OpFMA: @@ -1914,7 +1922,8 @@ func rewriteValueAMD64(v *Value) bool { v.Op = OpAMD64VPCMPGTD256 return true case OpGreaterInt64x2: - return rewriteValueAMD64_OpGreaterInt64x2(v) + v.Op = OpAMD64VPCMPGTQ128 + return true case OpGreaterInt64x4: v.Op = OpAMD64VPCMPGTQ256 return true @@ -33212,24 +33221,6 @@ func rewriteValueAMD64_OpEqualMaskedUint8x64(v *Value) bool { return true } } -func rewriteValueAMD64_OpEqualUint16x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (EqualUint16x16 x y) - // result: (VPMOVMToVec16x16 (VPCMPUW256 [0] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW256, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpEqualUint16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -33248,24 +33239,6 @@ func rewriteValueAMD64_OpEqualUint16x32(v *Value) bool { return true } } -func rewriteValueAMD64_OpEqualUint16x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (EqualUint16x8 x y) - // result: (VPMOVMToVec16x8 (VPCMPUW128 [0] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec16x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW128, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpEqualUint32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -33284,78 +33257,6 @@ func rewriteValueAMD64_OpEqualUint32x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpEqualUint32x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (EqualUint32x4 x y) - // result: (VPMOVMToVec32x4 (VPCMPUD128 [0] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD128, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpEqualUint32x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (EqualUint32x8 x y) - // result: (VPMOVMToVec32x8 (VPCMPUD256 [0] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec32x8) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD256, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpEqualUint64x2(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (EqualUint64x2 x y) - // result: (VPMOVMToVec64x2 (VPCMPUQ128 [0] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ128, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpEqualUint64x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (EqualUint64x4 x y) - // result: (VPMOVMToVec64x4 (VPCMPUQ256 [0] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x4) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ256, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpEqualUint64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -33374,42 +33275,6 @@ func rewriteValueAMD64_OpEqualUint64x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpEqualUint8x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (EqualUint8x16 x y) - // result: (VPMOVMToVec8x16 (VPCMPUB128 [0] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x16) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB128, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} -func rewriteValueAMD64_OpEqualUint8x32(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (EqualUint8x32 x y) - // result: (VPMOVMToVec8x32 (VPCMPUB256 [0] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec8x32) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB256, typ.Mask) - v0.AuxInt = int8ToAuxInt(0) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpEqualUint8x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -35875,24 +35740,6 @@ func rewriteValueAMD64_OpGreaterInt32x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpGreaterInt64x2(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (GreaterInt64x2 x y) - // result: (VPMOVMToVec64x2 (VPCMPQ128 [14] x y)) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPMOVMToVec64x2) - v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ128, typ.Mask) - v0.AuxInt = int8ToAuxInt(14) - v0.AddArg2(x, y) - v.AddArg(v0) - return true - } -} func rewriteValueAMD64_OpGreaterInt64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go index c6e8961738..15351b678b 100644 --- a/src/cmd/compile/internal/ssagen/simdintrinsics.go +++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go @@ -284,6 +284,14 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Int32x8.Equal", opLen2(ssa.OpEqualInt32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int64x2.Equal", opLen2(ssa.OpEqualInt64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int64x4.Equal", opLen2(ssa.OpEqualInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x16.Equal", opLen2(ssa.OpEqualUint8x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x32.Equal", opLen2(ssa.OpEqualUint8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x8.Equal", opLen2(ssa.OpEqualUint16x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x16.Equal", opLen2(ssa.OpEqualUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x4.Equal", opLen2(ssa.OpEqualUint32x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x8.Equal", opLen2(ssa.OpEqualUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x2.Equal", opLen2(ssa.OpEqualUint64x2, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x4.Equal", opLen2(ssa.OpEqualUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x4.Equal", opLen2(ssa.OpEqualFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.Equal", opLen2(ssa.OpEqualFloat32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x16.Equal", opLen2(ssa.OpEqualFloat32x16, types.TypeVec512), sys.AMD64) @@ -294,17 +302,9 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Int16x32.Equal", opLen2(ssa.OpEqualInt16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int32x16.Equal", opLen2(ssa.OpEqualInt32x16, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int64x8.Equal", opLen2(ssa.OpEqualInt64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint8x16.Equal", opLen2(ssa.OpEqualUint8x16, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint8x32.Equal", opLen2(ssa.OpEqualUint8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint8x64.Equal", opLen2(ssa.OpEqualUint8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint16x8.Equal", opLen2(ssa.OpEqualUint16x8, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint16x16.Equal", opLen2(ssa.OpEqualUint16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint16x32.Equal", opLen2(ssa.OpEqualUint16x32, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint32x4.Equal", opLen2(ssa.OpEqualUint32x4, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint32x8.Equal", opLen2(ssa.OpEqualUint32x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint32x16.Equal", opLen2(ssa.OpEqualUint32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Uint64x2.Equal", opLen2(ssa.OpEqualUint64x2, types.TypeVec128), sys.AMD64) - addF(simdPackage, "Uint64x4.Equal", opLen2(ssa.OpEqualUint64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint64x8.Equal", opLen2(ssa.OpEqualUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Float32x4.EqualMasked", opLen3(ssa.OpEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.EqualMasked", opLen3(ssa.OpEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64) @@ -430,6 +430,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Int16x16.Greater", opLen2(ssa.OpGreaterInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int32x4.Greater", opLen2(ssa.OpGreaterInt32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int32x8.Greater", opLen2(ssa.OpGreaterInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x2.Greater", opLen2(ssa.OpGreaterInt64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int64x4.Greater", opLen2(ssa.OpGreaterInt64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float32x4.Greater", opLen2(ssa.OpGreaterFloat32x4, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float32x8.Greater", opLen2(ssa.OpGreaterFloat32x8, types.TypeVec256), sys.AMD64) @@ -440,7 +441,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Int8x64.Greater", opLen2(ssa.OpGreaterInt8x64, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x32.Greater", opLen2(ssa.OpGreaterInt16x32, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int32x16.Greater", opLen2(ssa.OpGreaterInt32x16, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Int64x2.Greater", opLen2(ssa.OpGreaterInt64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int64x8.Greater", opLen2(ssa.OpGreaterInt64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Uint8x16.Greater", opLen2(ssa.OpGreaterUint8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint8x32.Greater", opLen2(ssa.OpGreaterUint8x32, types.TypeVec256), sys.AMD64) diff --git a/src/simd/ops_amd64.go b/src/simd/ops_amd64.go index 26a0d3e9ad..55c4b32db0 100644 --- a/src/simd/ops_amd64.go +++ b/src/simd/ops_amd64.go @@ -1429,6 +1429,46 @@ func (x Int64x2) Equal(y Int64x2) Mask64x2 // Asm: VPCMPEQQ, CPU Feature: AVX2 func (x Int64x4) Equal(y Int64x4) Mask64x4 +// Equal compares for equality. +// +// Asm: VPCMPEQB, CPU Feature: AVX +func (x Uint8x16) Equal(y Uint8x16) Mask8x16 + +// Equal compares for equality. +// +// Asm: VPCMPEQB, CPU Feature: AVX2 +func (x Uint8x32) Equal(y Uint8x32) Mask8x32 + +// Equal compares for equality. +// +// Asm: VPCMPEQW, CPU Feature: AVX +func (x Uint16x8) Equal(y Uint16x8) Mask16x8 + +// Equal compares for equality. +// +// Asm: VPCMPEQW, CPU Feature: AVX2 +func (x Uint16x16) Equal(y Uint16x16) Mask16x16 + +// Equal compares for equality. +// +// Asm: VPCMPEQD, CPU Feature: AVX +func (x Uint32x4) Equal(y Uint32x4) Mask32x4 + +// Equal compares for equality. +// +// Asm: VPCMPEQD, CPU Feature: AVX2 +func (x Uint32x8) Equal(y Uint32x8) Mask32x8 + +// Equal compares for equality. +// +// Asm: VPCMPEQQ, CPU Feature: AVX +func (x Uint64x2) Equal(y Uint64x2) Mask64x2 + +// Equal compares for equality. +// +// Asm: VPCMPEQQ, CPU Feature: AVX2 +func (x Uint64x4) Equal(y Uint64x4) Mask64x4 + // Equal compares for equality. // // Asm: VCMPPS, CPU Feature: AVX @@ -1479,61 +1519,21 @@ func (x Int32x16) Equal(y Int32x16) Mask32x16 // Asm: VPCMPQ, CPU Feature: AVX512EVEX func (x Int64x8) Equal(y Int64x8) Mask64x8 -// Equal compares for equality, masked. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x16) Equal(y Uint8x16) Mask8x16 - -// Equal compares for equality, masked. -// -// Asm: VPCMPUB, CPU Feature: AVX512EVEX -func (x Uint8x32) Equal(y Uint8x32) Mask8x32 - // Equal compares for equality, masked. // // Asm: VPCMPUB, CPU Feature: AVX512EVEX func (x Uint8x64) Equal(y Uint8x64) Mask8x64 -// Equal compares for equality, masked. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x8) Equal(y Uint16x8) Mask16x8 - -// Equal compares for equality, masked. -// -// Asm: VPCMPUW, CPU Feature: AVX512EVEX -func (x Uint16x16) Equal(y Uint16x16) Mask16x16 - // Equal compares for equality, masked. // // Asm: VPCMPUW, CPU Feature: AVX512EVEX func (x Uint16x32) Equal(y Uint16x32) Mask16x32 -// Equal compares for equality, masked. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x4) Equal(y Uint32x4) Mask32x4 - -// Equal compares for equality, masked. -// -// Asm: VPCMPUD, CPU Feature: AVX512EVEX -func (x Uint32x8) Equal(y Uint32x8) Mask32x8 - // Equal compares for equality, masked. // // Asm: VPCMPUD, CPU Feature: AVX512EVEX func (x Uint32x16) Equal(y Uint32x16) Mask32x16 -// Equal compares for equality, masked. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x2) Equal(y Uint64x2) Mask64x2 - -// Equal compares for equality, masked. -// -// Asm: VPCMPUQ, CPU Feature: AVX512EVEX -func (x Uint64x4) Equal(y Uint64x4) Mask64x4 - // Equal compares for equality, masked. // // Asm: VPCMPUQ, CPU Feature: AVX512EVEX @@ -2245,6 +2245,11 @@ func (x Int32x4) Greater(y Int32x4) Mask32x4 // Asm: VPCMPGTD, CPU Feature: AVX2 func (x Int32x8) Greater(y Int32x8) Mask32x8 +// Greater compares for greater than. +// +// Asm: VPCMPGTQ, CPU Feature: AVX +func (x Int64x2) Greater(y Int64x2) Int64x2 + // Greater compares for greater than. // // Asm: VPCMPGTQ, CPU Feature: AVX2 @@ -2295,11 +2300,6 @@ func (x Int16x32) Greater(y Int16x32) Mask16x32 // Asm: VPCMPD, CPU Feature: AVX512EVEX func (x Int32x16) Greater(y Int32x16) Mask32x16 -// Greater compares for greater than. -// -// Asm: VPCMPQ, CPU Feature: AVX512EVEX -func (x Int64x2) Greater(y Int64x2) Mask64x2 - // Greater compares for greater than. // // Asm: VPCMPQ, CPU Feature: AVX512EVEX diff --git a/src/simd/simd_wrapped_test.go b/src/simd/simd_wrapped_test.go index bdbb25bfce..181a937d7e 100644 --- a/src/simd/simd_wrapped_test.go +++ b/src/simd/simd_wrapped_test.go @@ -4018,6 +4018,8 @@ func testInt64x2Binary(t *testing.T, v0 []int64, v1 []int64, want []int64, which gotv = vec0.And(vec1) case "AndNot": gotv = vec0.AndNot(vec1) + case "Greater": + gotv = vec0.Greater(vec1) case "Max": gotv = vec0.Max(vec1) case "Min": @@ -4113,8 +4115,6 @@ func testInt64x2Compare(t *testing.T, v0 []int64, v1 []int64, want []int64, whic switch which { case "Equal": gotv = vec0.Equal(vec1).AsInt64x2() - case "Greater": - gotv = vec0.Greater(vec1).AsInt64x2() case "GreaterEqual": gotv = vec0.GreaterEqual(vec1).AsInt64x2() case "Less": -- 2.52.0