]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.simd] cmd/compile, simd: Int64x2 Greater and Uint* Equal
authorJunyang Shao <shaojunyang@google.com>
Wed, 9 Jul 2025 16:24:34 +0000 (16:24 +0000)
committerJunyang Shao <shaojunyang@google.com>
Wed, 9 Jul 2025 18:06:40 +0000 (11:06 -0700)
This CL is generated by CL 686817.

Change-Id: I19b8e468594514b2b1c99f8ad766f78b5e194c80
Reviewed-on: https://go-review.googlesource.com/c/go/+/686876
TryBot-Bypass: David Chase <drchase@google.com>
Reviewed-by: David Chase <drchase@google.com>
src/cmd/compile/internal/amd64/simdssa.go
src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
src/cmd/compile/internal/ssagen/simdintrinsics.go
src/simd/ops_amd64.go
src/simd/simd_wrapped_test.go

index d87548c27faa5be908d1f67593a976c44cab7b4b..12a8c857bd4feeed6f3bda3ed6b4e6c767128d87 100644 (file)
@@ -115,6 +115,7 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPCMPGTW256,
                ssa.OpAMD64VPCMPGTD128,
                ssa.OpAMD64VPCMPGTD256,
+               ssa.OpAMD64VPCMPGTQ128,
                ssa.OpAMD64VPCMPGTQ256,
                ssa.OpAMD64VMAXPS128,
                ssa.OpAMD64VMAXPS256,
@@ -688,25 +689,25 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPCMPW512,
                ssa.OpAMD64VPCMPD512,
                ssa.OpAMD64VPCMPQ512,
+               ssa.OpAMD64VPCMPUB512,
+               ssa.OpAMD64VPCMPUW512,
+               ssa.OpAMD64VPCMPUD512,
+               ssa.OpAMD64VPCMPUQ512,
                ssa.OpAMD64VPCMPUB128,
                ssa.OpAMD64VPCMPUB256,
-               ssa.OpAMD64VPCMPUB512,
                ssa.OpAMD64VPCMPUW128,
                ssa.OpAMD64VPCMPUW256,
-               ssa.OpAMD64VPCMPUW512,
                ssa.OpAMD64VPCMPUD128,
                ssa.OpAMD64VPCMPUD256,
-               ssa.OpAMD64VPCMPUD512,
                ssa.OpAMD64VPCMPUQ128,
                ssa.OpAMD64VPCMPUQ256,
-               ssa.OpAMD64VPCMPUQ512,
-               ssa.OpAMD64VPCMPQ128,
                ssa.OpAMD64VPCMPB128,
                ssa.OpAMD64VPCMPB256,
                ssa.OpAMD64VPCMPW128,
                ssa.OpAMD64VPCMPW256,
                ssa.OpAMD64VPCMPD128,
                ssa.OpAMD64VPCMPD256,
+               ssa.OpAMD64VPCMPQ128,
                ssa.OpAMD64VPCMPQ256:
                p = simdV2kImm8(s, v)
 
index 7ea24fe95cea229533099bc81443499b1e206102..09ab9b840aeff9ed5b3322b760db1474c4f17c18 100644 (file)
 (EqualInt64x2 ...) => (VPCMPEQQ128 ...)
 (EqualInt64x4 ...) => (VPCMPEQQ256 ...)
 (EqualInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [0] x y))
-(EqualUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [0] x y))
-(EqualUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [0] x y))
+(EqualUint8x16 ...) => (VPCMPEQB128 ...)
+(EqualUint8x32 ...) => (VPCMPEQB256 ...)
 (EqualUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [0] x y))
-(EqualUint16x8 x y) => (VPMOVMToVec16x8 (VPCMPUW128 [0] x y))
-(EqualUint16x16 x y) => (VPMOVMToVec16x16 (VPCMPUW256 [0] x y))
+(EqualUint16x8 ...) => (VPCMPEQW128 ...)
+(EqualUint16x16 ...) => (VPCMPEQW256 ...)
 (EqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [0] x y))
-(EqualUint32x4 x y) => (VPMOVMToVec32x4 (VPCMPUD128 [0] x y))
-(EqualUint32x8 x y) => (VPMOVMToVec32x8 (VPCMPUD256 [0] x y))
+(EqualUint32x4 ...) => (VPCMPEQD128 ...)
+(EqualUint32x8 ...) => (VPCMPEQD256 ...)
 (EqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [0] x y))
-(EqualUint64x2 x y) => (VPMOVMToVec64x2 (VPCMPUQ128 [0] x y))
-(EqualUint64x4 x y) => (VPMOVMToVec64x4 (VPCMPUQ256 [0] x y))
+(EqualUint64x2 ...) => (VPCMPEQQ128 ...)
+(EqualUint64x4 ...) => (VPCMPEQQ256 ...)
 (EqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [0] x y))
 (EqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [0] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
 (EqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [0] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
 (GreaterInt32x4 ...) => (VPCMPGTD128 ...)
 (GreaterInt32x8 ...) => (VPCMPGTD256 ...)
 (GreaterInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [14] x y))
-(GreaterInt64x2 x y) => (VPMOVMToVec64x2 (VPCMPQ128 [14] x y))
+(GreaterInt64x2 ...) => (VPCMPGTQ128 ...)
 (GreaterInt64x4 ...) => (VPCMPGTQ256 ...)
 (GreaterInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [14] x y))
 (GreaterUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [14] x y))
index 09cfcfb4d9a41b1662a4b82574b7e010a6c8ddc8..f0a149f7d8a88b520980ead680e41ea9236a5047 100644 (file)
@@ -436,6 +436,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
                {name: "VPANDQMasked128", argLength: 3, reg: w2kw, asm: "VPANDQ", commutative: true, typ: "Vec128", resultInArg0: false},
                {name: "VPANDNQMasked128", argLength: 3, reg: w2kw, asm: "VPANDNQ", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPCMPEQQ128", argLength: 2, reg: v21, asm: "VPCMPEQQ", commutative: true, typ: "Vec128", resultInArg0: false},
+               {name: "VPCMPGTQ128", argLength: 2, reg: v21, asm: "VPCMPGTQ", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPMAXSQ128", argLength: 2, reg: w21, asm: "VPMAXSQ", commutative: true, typ: "Vec128", resultInArg0: false},
                {name: "VPMAXSQMasked128", argLength: 3, reg: w2kw, asm: "VPMAXSQ", commutative: true, typ: "Vec128", resultInArg0: false},
                {name: "VPMINSQ128", argLength: 2, reg: w21, asm: "VPMINSQ", commutative: true, typ: "Vec128", resultInArg0: false},
@@ -837,36 +838,36 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
                {name: "VINSERTI128256", argLength: 2, reg: v21, asm: "VINSERTI128", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VPCMPB512", argLength: 2, reg: w2k, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                {name: "VPCMPBMasked512", argLength: 3, reg: w2kk, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
-               {name: "VPCMPUW256", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                {name: "VPCMPUWMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+               {name: "VPCMPUW256", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VPCMPUW512", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                {name: "VPCMPUWMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
-               {name: "VPCMPUW128", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                {name: "VPCMPUWMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+               {name: "VPCMPUW128", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VPCMPUD512", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                {name: "VPCMPUDMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
-               {name: "VPCMPUD128", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                {name: "VPCMPUDMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
-               {name: "VPCMPUD256", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+               {name: "VPCMPUD128", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VPCMPUDMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
-               {name: "VPCMPUQ128", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+               {name: "VPCMPUD256", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VPCMPUQMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
-               {name: "VPCMPUQ256", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+               {name: "VPCMPUQ128", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VPCMPUQMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+               {name: "VPCMPUQ256", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VPCMPUQ512", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                {name: "VPCMPUQMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
-               {name: "VPCMPUB128", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                {name: "VPCMPUBMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                {name: "VGF2P8AFFINEQB128", argLength: 2, reg: w21, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VGF2P8AFFINEINVQB128", argLength: 2, reg: w21, asm: "VGF2P8AFFINEINVQB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VGF2P8AFFINEINVQBMasked128", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEINVQB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VGF2P8AFFINEQBMasked128", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
-               {name: "VPCMPUB256", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+               {name: "VPCMPUB128", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VPCMPUBMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                {name: "VGF2P8AFFINEQB256", argLength: 2, reg: w21, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VGF2P8AFFINEINVQB256", argLength: 2, reg: w21, asm: "VGF2P8AFFINEINVQB", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VGF2P8AFFINEINVQBMasked256", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEINVQB", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VGF2P8AFFINEQBMasked256", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VPCMPUB256", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VPCMPUB512", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                {name: "VPCMPUBMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                {name: "VGF2P8AFFINEQB512", argLength: 2, reg: w21, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
index edc88dfbc6d473a6dc080468f911c2e38dfdbd26..d9fea94fc3f65ed54d0ff22784a121c770006309 100644 (file)
@@ -1629,6 +1629,7 @@ const (
        OpAMD64VPANDQMasked128
        OpAMD64VPANDNQMasked128
        OpAMD64VPCMPEQQ128
+       OpAMD64VPCMPGTQ128
        OpAMD64VPMAXSQ128
        OpAMD64VPMAXSQMasked128
        OpAMD64VPMINSQ128
@@ -2030,36 +2031,36 @@ const (
        OpAMD64VINSERTI128256
        OpAMD64VPCMPB512
        OpAMD64VPCMPBMasked512
-       OpAMD64VPCMPUW256
        OpAMD64VPCMPUWMasked256
+       OpAMD64VPCMPUW256
        OpAMD64VPCMPUW512
        OpAMD64VPCMPUWMasked512
-       OpAMD64VPCMPUW128
        OpAMD64VPCMPUWMasked128
+       OpAMD64VPCMPUW128
        OpAMD64VPCMPUD512
        OpAMD64VPCMPUDMasked512
-       OpAMD64VPCMPUD128
        OpAMD64VPCMPUDMasked128
-       OpAMD64VPCMPUD256
+       OpAMD64VPCMPUD128
        OpAMD64VPCMPUDMasked256
-       OpAMD64VPCMPUQ128
+       OpAMD64VPCMPUD256
        OpAMD64VPCMPUQMasked128
-       OpAMD64VPCMPUQ256
+       OpAMD64VPCMPUQ128
        OpAMD64VPCMPUQMasked256
+       OpAMD64VPCMPUQ256
        OpAMD64VPCMPUQ512
        OpAMD64VPCMPUQMasked512
-       OpAMD64VPCMPUB128
        OpAMD64VPCMPUBMasked128
        OpAMD64VGF2P8AFFINEQB128
        OpAMD64VGF2P8AFFINEINVQB128
        OpAMD64VGF2P8AFFINEINVQBMasked128
        OpAMD64VGF2P8AFFINEQBMasked128
-       OpAMD64VPCMPUB256
+       OpAMD64VPCMPUB128
        OpAMD64VPCMPUBMasked256
        OpAMD64VGF2P8AFFINEQB256
        OpAMD64VGF2P8AFFINEINVQB256
        OpAMD64VGF2P8AFFINEINVQBMasked256
        OpAMD64VGF2P8AFFINEQBMasked256
+       OpAMD64VPCMPUB256
        OpAMD64VPCMPUB512
        OpAMD64VPCMPUBMasked512
        OpAMD64VGF2P8AFFINEQB512
@@ -25058,6 +25059,20 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:   "VPCMPGTQ128",
+               argLen: 2,
+               asm:    x86.AVPCMPGTQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                               {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
        {
                name:        "VPMAXSQ128",
                argLen:      2,
@@ -31113,15 +31128,16 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPUW256",
+               name:        "VPCMPUWMasked256",
                auxType:     auxInt8,
-               argLen:      2,
+               argLen:      3,
                commutative: true,
                asm:         x86.AVPCMPUW,
                reg: regInfo{
                        inputs: []inputInfo{
-                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-                               {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                               {1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
                        },
                        outputs: []outputInfo{
                                {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -31129,16 +31145,14 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPUWMasked256",
-               auxType:     auxInt8,
-               argLen:      3,
-               commutative: true,
-               asm:         x86.AVPCMPUW,
+               name:    "VPCMPUW256",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPCMPUW,
                reg: regInfo{
                        inputs: []inputInfo{
-                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-                               {1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                               {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
                        },
                        outputs: []outputInfo{
                                {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -31179,15 +31193,16 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPUW128",
+               name:        "VPCMPUWMasked128",
                auxType:     auxInt8,
-               argLen:      2,
+               argLen:      3,
                commutative: true,
                asm:         x86.AVPCMPUW,
                reg: regInfo{
                        inputs: []inputInfo{
-                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-                               {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                               {1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
                        },
                        outputs: []outputInfo{
                                {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -31195,16 +31210,14 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPUWMasked128",
-               auxType:     auxInt8,
-               argLen:      3,
-               commutative: true,
-               asm:         x86.AVPCMPUW,
+               name:    "VPCMPUW128",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPCMPUW,
                reg: regInfo{
                        inputs: []inputInfo{
-                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-                               {1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                               {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
                        },
                        outputs: []outputInfo{
                                {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -31244,22 +31257,6 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
-       {
-               name:        "VPCMPUD128",
-               auxType:     auxInt8,
-               argLen:      2,
-               commutative: true,
-               asm:         x86.AVPCMPUD,
-               reg: regInfo{
-                       inputs: []inputInfo{
-                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-                               {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-                       },
-                       outputs: []outputInfo{
-                               {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-                       },
-               },
-       },
        {
                name:        "VPCMPUDMasked128",
                auxType:     auxInt8,
@@ -31278,11 +31275,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPUD256",
-               auxType:     auxInt8,
-               argLen:      2,
-               commutative: true,
-               asm:         x86.AVPCMPUD,
+               name:    "VPCMPUD128",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPCMPUD,
                reg: regInfo{
                        inputs: []inputInfo{
                                {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
@@ -31311,11 +31307,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPUQ128",
-               auxType:     auxInt8,
-               argLen:      2,
-               commutative: true,
-               asm:         x86.AVPCMPUQ,
+               name:    "VPCMPUD256",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPCMPUD,
                reg: regInfo{
                        inputs: []inputInfo{
                                {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
@@ -31344,11 +31339,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPUQ256",
-               auxType:     auxInt8,
-               argLen:      2,
-               commutative: true,
-               asm:         x86.AVPCMPUQ,
+               name:    "VPCMPUQ128",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPCMPUQ,
                reg: regInfo{
                        inputs: []inputInfo{
                                {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
@@ -31377,11 +31371,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPUQ512",
-               auxType:     auxInt8,
-               argLen:      2,
-               commutative: true,
-               asm:         x86.AVPCMPUQ,
+               name:    "VPCMPUQ256",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPCMPUQ,
                reg: regInfo{
                        inputs: []inputInfo{
                                {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
@@ -31393,16 +31386,15 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPUQMasked512",
+               name:        "VPCMPUQ512",
                auxType:     auxInt8,
-               argLen:      3,
+               argLen:      2,
                commutative: true,
                asm:         x86.AVPCMPUQ,
                reg: regInfo{
                        inputs: []inputInfo{
-                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-                               {1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                               {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
                        },
                        outputs: []outputInfo{
                                {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -31410,15 +31402,16 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPUB128",
+               name:        "VPCMPUQMasked512",
                auxType:     auxInt8,
-               argLen:      2,
+               argLen:      3,
                commutative: true,
-               asm:         x86.AVPCMPUB,
+               asm:         x86.AVPCMPUQ,
                reg: regInfo{
                        inputs: []inputInfo{
-                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-                               {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                               {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                               {0, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                               {1, 2147418112},        // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
                        },
                        outputs: []outputInfo{
                                {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -31505,11 +31498,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPUB256",
-               auxType:     auxInt8,
-               argLen:      2,
-               commutative: true,
-               asm:         x86.AVPCMPUB,
+               name:    "VPCMPUB128",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPCMPUB,
                reg: regInfo{
                        inputs: []inputInfo{
                                {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
@@ -31599,6 +31591,21 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:    "VPCMPUB256",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPCMPUB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                               {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+                       },
+               },
+       },
        {
                name:        "VPCMPUB512",
                auxType:     auxInt8,
index 2e27077e81926b70c680b8034d2b1e5c92bb796e..4dd1fcbcb75815c9014e398d1882aae6ec2a4920 100644 (file)
@@ -1530,27 +1530,35 @@ func rewriteValueAMD64(v *Value) bool {
        case OpEqualMaskedUint8x64:
                return rewriteValueAMD64_OpEqualMaskedUint8x64(v)
        case OpEqualUint16x16:
-               return rewriteValueAMD64_OpEqualUint16x16(v)
+               v.Op = OpAMD64VPCMPEQW256
+               return true
        case OpEqualUint16x32:
                return rewriteValueAMD64_OpEqualUint16x32(v)
        case OpEqualUint16x8:
-               return rewriteValueAMD64_OpEqualUint16x8(v)
+               v.Op = OpAMD64VPCMPEQW128
+               return true
        case OpEqualUint32x16:
                return rewriteValueAMD64_OpEqualUint32x16(v)
        case OpEqualUint32x4:
-               return rewriteValueAMD64_OpEqualUint32x4(v)
+               v.Op = OpAMD64VPCMPEQD128
+               return true
        case OpEqualUint32x8:
-               return rewriteValueAMD64_OpEqualUint32x8(v)
+               v.Op = OpAMD64VPCMPEQD256
+               return true
        case OpEqualUint64x2:
-               return rewriteValueAMD64_OpEqualUint64x2(v)
+               v.Op = OpAMD64VPCMPEQQ128
+               return true
        case OpEqualUint64x4:
-               return rewriteValueAMD64_OpEqualUint64x4(v)
+               v.Op = OpAMD64VPCMPEQQ256
+               return true
        case OpEqualUint64x8:
                return rewriteValueAMD64_OpEqualUint64x8(v)
        case OpEqualUint8x16:
-               return rewriteValueAMD64_OpEqualUint8x16(v)
+               v.Op = OpAMD64VPCMPEQB128
+               return true
        case OpEqualUint8x32:
-               return rewriteValueAMD64_OpEqualUint8x32(v)
+               v.Op = OpAMD64VPCMPEQB256
+               return true
        case OpEqualUint8x64:
                return rewriteValueAMD64_OpEqualUint8x64(v)
        case OpFMA:
@@ -1914,7 +1922,8 @@ func rewriteValueAMD64(v *Value) bool {
                v.Op = OpAMD64VPCMPGTD256
                return true
        case OpGreaterInt64x2:
-               return rewriteValueAMD64_OpGreaterInt64x2(v)
+               v.Op = OpAMD64VPCMPGTQ128
+               return true
        case OpGreaterInt64x4:
                v.Op = OpAMD64VPCMPGTQ256
                return true
@@ -33212,24 +33221,6 @@ func rewriteValueAMD64_OpEqualMaskedUint8x64(v *Value) bool {
                return true
        }
 }
-func rewriteValueAMD64_OpEqualUint16x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualUint16x16 x y)
-       // result: (VPMOVMToVec16x16 (VPCMPUW256 [0] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec16x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW256, typ.Mask)
-               v0.AuxInt = int8ToAuxInt(0)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
 func rewriteValueAMD64_OpEqualUint16x32(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
@@ -33248,24 +33239,6 @@ func rewriteValueAMD64_OpEqualUint16x32(v *Value) bool {
                return true
        }
 }
-func rewriteValueAMD64_OpEqualUint16x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualUint16x8 x y)
-       // result: (VPMOVMToVec16x8 (VPCMPUW128 [0] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec16x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW128, typ.Mask)
-               v0.AuxInt = int8ToAuxInt(0)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
 func rewriteValueAMD64_OpEqualUint32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
@@ -33284,78 +33257,6 @@ func rewriteValueAMD64_OpEqualUint32x16(v *Value) bool {
                return true
        }
 }
-func rewriteValueAMD64_OpEqualUint32x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualUint32x4 x y)
-       // result: (VPMOVMToVec32x4 (VPCMPUD128 [0] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD128, typ.Mask)
-               v0.AuxInt = int8ToAuxInt(0)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualUint32x8(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualUint32x8 x y)
-       // result: (VPMOVMToVec32x8 (VPCMPUD256 [0] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec32x8)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD256, typ.Mask)
-               v0.AuxInt = int8ToAuxInt(0)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualUint64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualUint64x2 x y)
-       // result: (VPMOVMToVec64x2 (VPCMPUQ128 [0] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ128, typ.Mask)
-               v0.AuxInt = int8ToAuxInt(0)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualUint64x4(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualUint64x4 x y)
-       // result: (VPMOVMToVec64x4 (VPCMPUQ256 [0] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x4)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ256, typ.Mask)
-               v0.AuxInt = int8ToAuxInt(0)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
 func rewriteValueAMD64_OpEqualUint64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
@@ -33374,42 +33275,6 @@ func rewriteValueAMD64_OpEqualUint64x8(v *Value) bool {
                return true
        }
 }
-func rewriteValueAMD64_OpEqualUint8x16(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualUint8x16 x y)
-       // result: (VPMOVMToVec8x16 (VPCMPUB128 [0] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec8x16)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB128, typ.Mask)
-               v0.AuxInt = int8ToAuxInt(0)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
-func rewriteValueAMD64_OpEqualUint8x32(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (EqualUint8x32 x y)
-       // result: (VPMOVMToVec8x32 (VPCMPUB256 [0] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec8x32)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB256, typ.Mask)
-               v0.AuxInt = int8ToAuxInt(0)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
 func rewriteValueAMD64_OpEqualUint8x64(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
@@ -35875,24 +35740,6 @@ func rewriteValueAMD64_OpGreaterInt32x16(v *Value) bool {
                return true
        }
 }
-func rewriteValueAMD64_OpGreaterInt64x2(v *Value) bool {
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       b := v.Block
-       typ := &b.Func.Config.Types
-       // match: (GreaterInt64x2 x y)
-       // result: (VPMOVMToVec64x2 (VPCMPQ128 [14] x y))
-       for {
-               x := v_0
-               y := v_1
-               v.reset(OpAMD64VPMOVMToVec64x2)
-               v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ128, typ.Mask)
-               v0.AuxInt = int8ToAuxInt(14)
-               v0.AddArg2(x, y)
-               v.AddArg(v0)
-               return true
-       }
-}
 func rewriteValueAMD64_OpGreaterInt64x8(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
index c6e8961738741bc3e0beb8c651eb071cde1873a1..15351b678b49e833127ad357d8798452046f04b9 100644 (file)
@@ -284,6 +284,14 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Int32x8.Equal", opLen2(ssa.OpEqualInt32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int64x2.Equal", opLen2(ssa.OpEqualInt64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int64x4.Equal", opLen2(ssa.OpEqualInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x16.Equal", opLen2(ssa.OpEqualUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.Equal", opLen2(ssa.OpEqualUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x8.Equal", opLen2(ssa.OpEqualUint16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x16.Equal", opLen2(ssa.OpEqualUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x4.Equal", opLen2(ssa.OpEqualUint32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x8.Equal", opLen2(ssa.OpEqualUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x2.Equal", opLen2(ssa.OpEqualUint64x2, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x4.Equal", opLen2(ssa.OpEqualUint64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float32x4.Equal", opLen2(ssa.OpEqualFloat32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x8.Equal", opLen2(ssa.OpEqualFloat32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float32x16.Equal", opLen2(ssa.OpEqualFloat32x16, types.TypeVec512), sys.AMD64)
@@ -294,17 +302,9 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Int16x32.Equal", opLen2(ssa.OpEqualInt16x32, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Int32x16.Equal", opLen2(ssa.OpEqualInt32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Int64x8.Equal", opLen2(ssa.OpEqualInt64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.Equal", opLen2(ssa.OpEqualUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.Equal", opLen2(ssa.OpEqualUint8x32, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint8x64.Equal", opLen2(ssa.OpEqualUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint16x8.Equal", opLen2(ssa.OpEqualUint16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint16x16.Equal", opLen2(ssa.OpEqualUint16x16, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint16x32.Equal", opLen2(ssa.OpEqualUint16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint32x4.Equal", opLen2(ssa.OpEqualUint32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint32x8.Equal", opLen2(ssa.OpEqualUint32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint32x16.Equal", opLen2(ssa.OpEqualUint32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint64x2.Equal", opLen2(ssa.OpEqualUint64x2, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint64x4.Equal", opLen2(ssa.OpEqualUint64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint64x8.Equal", opLen2(ssa.OpEqualUint64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float32x4.EqualMasked", opLen3(ssa.OpEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x8.EqualMasked", opLen3(ssa.OpEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64)
@@ -430,6 +430,7 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Int16x16.Greater", opLen2(ssa.OpGreaterInt16x16, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int32x4.Greater", opLen2(ssa.OpGreaterInt32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int32x8.Greater", opLen2(ssa.OpGreaterInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x2.Greater", opLen2(ssa.OpGreaterInt64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int64x4.Greater", opLen2(ssa.OpGreaterInt64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float32x4.Greater", opLen2(ssa.OpGreaterFloat32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x8.Greater", opLen2(ssa.OpGreaterFloat32x8, types.TypeVec256), sys.AMD64)
@@ -440,7 +441,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Int8x64.Greater", opLen2(ssa.OpGreaterInt8x64, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Int16x32.Greater", opLen2(ssa.OpGreaterInt16x32, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Int32x16.Greater", opLen2(ssa.OpGreaterInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int64x2.Greater", opLen2(ssa.OpGreaterInt64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int64x8.Greater", opLen2(ssa.OpGreaterInt64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Uint8x16.Greater", opLen2(ssa.OpGreaterUint8x16, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint8x32.Greater", opLen2(ssa.OpGreaterUint8x32, types.TypeVec256), sys.AMD64)
index 26a0d3e9ad4e7476db0b561f63743d3789c403d1..55c4b32db00bf5b58b406a9da98552df6a09d752 100644 (file)
@@ -1429,6 +1429,46 @@ func (x Int64x2) Equal(y Int64x2) Mask64x2
 // Asm: VPCMPEQQ, CPU Feature: AVX2
 func (x Int64x4) Equal(y Int64x4) Mask64x4
 
+// Equal compares for equality.
+//
+// Asm: VPCMPEQB, CPU Feature: AVX
+func (x Uint8x16) Equal(y Uint8x16) Mask8x16
+
+// Equal compares for equality.
+//
+// Asm: VPCMPEQB, CPU Feature: AVX2
+func (x Uint8x32) Equal(y Uint8x32) Mask8x32
+
+// Equal compares for equality.
+//
+// Asm: VPCMPEQW, CPU Feature: AVX
+func (x Uint16x8) Equal(y Uint16x8) Mask16x8
+
+// Equal compares for equality.
+//
+// Asm: VPCMPEQW, CPU Feature: AVX2
+func (x Uint16x16) Equal(y Uint16x16) Mask16x16
+
+// Equal compares for equality.
+//
+// Asm: VPCMPEQD, CPU Feature: AVX
+func (x Uint32x4) Equal(y Uint32x4) Mask32x4
+
+// Equal compares for equality.
+//
+// Asm: VPCMPEQD, CPU Feature: AVX2
+func (x Uint32x8) Equal(y Uint32x8) Mask32x8
+
+// Equal compares for equality.
+//
+// Asm: VPCMPEQQ, CPU Feature: AVX
+func (x Uint64x2) Equal(y Uint64x2) Mask64x2
+
+// Equal compares for equality.
+//
+// Asm: VPCMPEQQ, CPU Feature: AVX2
+func (x Uint64x4) Equal(y Uint64x4) Mask64x4
+
 // Equal compares for equality.
 //
 // Asm: VCMPPS, CPU Feature: AVX
@@ -1479,61 +1519,21 @@ func (x Int32x16) Equal(y Int32x16) Mask32x16
 // Asm: VPCMPQ, CPU Feature: AVX512EVEX
 func (x Int64x8) Equal(y Int64x8) Mask64x8
 
-// Equal compares for equality, masked.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x16) Equal(y Uint8x16) Mask8x16
-
-// Equal compares for equality, masked.
-//
-// Asm: VPCMPUB, CPU Feature: AVX512EVEX
-func (x Uint8x32) Equal(y Uint8x32) Mask8x32
-
 // Equal compares for equality, masked.
 //
 // Asm: VPCMPUB, CPU Feature: AVX512EVEX
 func (x Uint8x64) Equal(y Uint8x64) Mask8x64
 
-// Equal compares for equality, masked.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x8) Equal(y Uint16x8) Mask16x8
-
-// Equal compares for equality, masked.
-//
-// Asm: VPCMPUW, CPU Feature: AVX512EVEX
-func (x Uint16x16) Equal(y Uint16x16) Mask16x16
-
 // Equal compares for equality, masked.
 //
 // Asm: VPCMPUW, CPU Feature: AVX512EVEX
 func (x Uint16x32) Equal(y Uint16x32) Mask16x32
 
-// Equal compares for equality, masked.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x4) Equal(y Uint32x4) Mask32x4
-
-// Equal compares for equality, masked.
-//
-// Asm: VPCMPUD, CPU Feature: AVX512EVEX
-func (x Uint32x8) Equal(y Uint32x8) Mask32x8
-
 // Equal compares for equality, masked.
 //
 // Asm: VPCMPUD, CPU Feature: AVX512EVEX
 func (x Uint32x16) Equal(y Uint32x16) Mask32x16
 
-// Equal compares for equality, masked.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x2) Equal(y Uint64x2) Mask64x2
-
-// Equal compares for equality, masked.
-//
-// Asm: VPCMPUQ, CPU Feature: AVX512EVEX
-func (x Uint64x4) Equal(y Uint64x4) Mask64x4
-
 // Equal compares for equality, masked.
 //
 // Asm: VPCMPUQ, CPU Feature: AVX512EVEX
@@ -2245,6 +2245,11 @@ func (x Int32x4) Greater(y Int32x4) Mask32x4
 // Asm: VPCMPGTD, CPU Feature: AVX2
 func (x Int32x8) Greater(y Int32x8) Mask32x8
 
+// Greater compares for greater than.
+//
+// Asm: VPCMPGTQ, CPU Feature: AVX
+func (x Int64x2) Greater(y Int64x2) Int64x2
+
 // Greater compares for greater than.
 //
 // Asm: VPCMPGTQ, CPU Feature: AVX2
@@ -2295,11 +2300,6 @@ func (x Int16x32) Greater(y Int16x32) Mask16x32
 // Asm: VPCMPD, CPU Feature: AVX512EVEX
 func (x Int32x16) Greater(y Int32x16) Mask32x16
 
-// Greater compares for greater than.
-//
-// Asm: VPCMPQ, CPU Feature: AVX512EVEX
-func (x Int64x2) Greater(y Int64x2) Mask64x2
-
 // Greater compares for greater than.
 //
 // Asm: VPCMPQ, CPU Feature: AVX512EVEX
index bdbb25bfce4e6a01a6c5b68a271ef6ed266d0656..181a937d7ebc244ba997ada71b279d8d7768285b 100644 (file)
@@ -4018,6 +4018,8 @@ func testInt64x2Binary(t *testing.T, v0 []int64, v1 []int64, want []int64, which
                gotv = vec0.And(vec1)
        case "AndNot":
                gotv = vec0.AndNot(vec1)
+       case "Greater":
+               gotv = vec0.Greater(vec1)
        case "Max":
                gotv = vec0.Max(vec1)
        case "Min":
@@ -4113,8 +4115,6 @@ func testInt64x2Compare(t *testing.T, v0 []int64, v1 []int64, want []int64, whic
        switch which {
        case "Equal":
                gotv = vec0.Equal(vec1).AsInt64x2()
-       case "Greater":
-               gotv = vec0.Greater(vec1).AsInt64x2()
        case "GreaterEqual":
                gotv = vec0.GreaterEqual(vec1).AsInt64x2()
        case "Less":