]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.simd] simd: make "best" instruction choice also depend on commutativity
authorDavid Chase <drchase@google.com>
Wed, 3 Dec 2025 17:49:30 +0000 (12:49 -0500)
committerDavid Chase <drchase@google.com>
Wed, 3 Dec 2025 19:12:10 +0000 (11:12 -0800)
the compare-based-on-immediate instructions are sometimes commutative,
sometimes not.  In this case, that means the instruction cannot be
commutative.

also improve the comments for comparisons.

Change-Id: I83a55fa5ffbd6cbbaf5cb23b3e8a68a5da8aae2f
Reviewed-on: https://go-review.googlesource.com/c/go/+/726440
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
Reviewed-by: Austin Clements <austin@google.com>
src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
src/simd/_gen/simdgen/gen_simdMachineOps.go
src/simd/_gen/simdgen/gen_utility.go
src/simd/_gen/simdgen/ops/Compares/categories.yaml
src/simd/ops_amd64.go

index 82f5bfdb3f1e0cc15d698a5509047c0b983ad0e7..0727f626fb12182d16070c8a6a8061e0ca9081b8 100644 (file)
@@ -1231,18 +1231,18 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
                {name: "VSUBPSMasked512", argLength: 3, reg: w2kw, asm: "VSUBPS", commutative: false, typ: "Vec512", resultInArg0: false},
                {name: "SHA1RNDS4128", argLength: 2, reg: v21, asm: "SHA1RNDS4", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
                {name: "VAESKEYGENASSIST128", argLength: 1, reg: v11, asm: "VAESKEYGENASSIST", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
-               {name: "VCMPPD128", argLength: 2, reg: v21, asm: "VCMPPD", aux: "UInt8", commutative: true, typ: "Vec128", resultInArg0: false},
-               {name: "VCMPPD256", argLength: 2, reg: v21, asm: "VCMPPD", aux: "UInt8", commutative: true, typ: "Vec256", resultInArg0: false},
-               {name: "VCMPPD512", argLength: 2, reg: w2k, asm: "VCMPPD", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
-               {name: "VCMPPDMasked128", argLength: 3, reg: w2kk, asm: "VCMPPD", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
-               {name: "VCMPPDMasked256", argLength: 3, reg: w2kk, asm: "VCMPPD", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
-               {name: "VCMPPDMasked512", argLength: 3, reg: w2kk, asm: "VCMPPD", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
-               {name: "VCMPPS128", argLength: 2, reg: v21, asm: "VCMPPS", aux: "UInt8", commutative: true, typ: "Vec128", resultInArg0: false},
-               {name: "VCMPPS256", argLength: 2, reg: v21, asm: "VCMPPS", aux: "UInt8", commutative: true, typ: "Vec256", resultInArg0: false},
-               {name: "VCMPPS512", argLength: 2, reg: w2k, asm: "VCMPPS", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
-               {name: "VCMPPSMasked128", argLength: 3, reg: w2kk, asm: "VCMPPS", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
-               {name: "VCMPPSMasked256", argLength: 3, reg: w2kk, asm: "VCMPPS", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
-               {name: "VCMPPSMasked512", argLength: 3, reg: w2kk, asm: "VCMPPS", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
+               {name: "VCMPPD128", argLength: 2, reg: v21, asm: "VCMPPD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VCMPPD256", argLength: 2, reg: v21, asm: "VCMPPD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VCMPPD512", argLength: 2, reg: w2k, asm: "VCMPPD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+               {name: "VCMPPDMasked128", argLength: 3, reg: w2kk, asm: "VCMPPD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+               {name: "VCMPPDMasked256", argLength: 3, reg: w2kk, asm: "VCMPPD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+               {name: "VCMPPDMasked512", argLength: 3, reg: w2kk, asm: "VCMPPD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+               {name: "VCMPPS128", argLength: 2, reg: v21, asm: "VCMPPS", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VCMPPS256", argLength: 2, reg: v21, asm: "VCMPPS", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VCMPPS512", argLength: 2, reg: w2k, asm: "VCMPPS", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+               {name: "VCMPPSMasked128", argLength: 3, reg: w2kk, asm: "VCMPPS", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+               {name: "VCMPPSMasked256", argLength: 3, reg: w2kk, asm: "VCMPPS", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+               {name: "VCMPPSMasked512", argLength: 3, reg: w2kk, asm: "VCMPPS", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VEXTRACTF64X4256", argLength: 1, reg: w11, asm: "VEXTRACTF64X4", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VEXTRACTF128128", argLength: 1, reg: v11, asm: "VEXTRACTF128", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VEXTRACTI64X4256", argLength: 1, reg: w11, asm: "VEXTRACTI64X4", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
@@ -1270,37 +1270,37 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
                {name: "VPALIGNRMasked256", argLength: 3, reg: w2kw, asm: "VPALIGNR", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VPALIGNRMasked512", argLength: 3, reg: w2kw, asm: "VPALIGNR", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false},
                {name: "VPCMPB512", argLength: 2, reg: w2k, asm: "VPCMPB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
-               {name: "VPCMPBMasked128", argLength: 3, reg: w2kk, asm: "VPCMPB", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
-               {name: "VPCMPBMasked256", argLength: 3, reg: w2kk, asm: "VPCMPB", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
-               {name: "VPCMPBMasked512", argLength: 3, reg: w2kk, asm: "VPCMPB", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
+               {name: "VPCMPBMasked128", argLength: 3, reg: w2kk, asm: "VPCMPB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+               {name: "VPCMPBMasked256", argLength: 3, reg: w2kk, asm: "VPCMPB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+               {name: "VPCMPBMasked512", argLength: 3, reg: w2kk, asm: "VPCMPB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VPCMPD512", argLength: 2, reg: w2k, asm: "VPCMPD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
-               {name: "VPCMPDMasked128", argLength: 3, reg: w2kk, asm: "VPCMPD", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
-               {name: "VPCMPDMasked256", argLength: 3, reg: w2kk, asm: "VPCMPD", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
-               {name: "VPCMPDMasked512", argLength: 3, reg: w2kk, asm: "VPCMPD", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
+               {name: "VPCMPDMasked128", argLength: 3, reg: w2kk, asm: "VPCMPD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+               {name: "VPCMPDMasked256", argLength: 3, reg: w2kk, asm: "VPCMPD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+               {name: "VPCMPDMasked512", argLength: 3, reg: w2kk, asm: "VPCMPD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VPCMPQ512", argLength: 2, reg: w2k, asm: "VPCMPQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
-               {name: "VPCMPQMasked128", argLength: 3, reg: w2kk, asm: "VPCMPQ", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
-               {name: "VPCMPQMasked256", argLength: 3, reg: w2kk, asm: "VPCMPQ", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
-               {name: "VPCMPQMasked512", argLength: 3, reg: w2kk, asm: "VPCMPQ", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
+               {name: "VPCMPQMasked128", argLength: 3, reg: w2kk, asm: "VPCMPQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+               {name: "VPCMPQMasked256", argLength: 3, reg: w2kk, asm: "VPCMPQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+               {name: "VPCMPQMasked512", argLength: 3, reg: w2kk, asm: "VPCMPQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VPCMPUB512", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
-               {name: "VPCMPUBMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUB", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
-               {name: "VPCMPUBMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUB", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
-               {name: "VPCMPUBMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUB", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
+               {name: "VPCMPUBMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+               {name: "VPCMPUBMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+               {name: "VPCMPUBMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VPCMPUD512", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
-               {name: "VPCMPUDMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUD", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
-               {name: "VPCMPUDMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUD", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
-               {name: "VPCMPUDMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUD", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
+               {name: "VPCMPUDMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+               {name: "VPCMPUDMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+               {name: "VPCMPUDMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VPCMPUQ512", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
-               {name: "VPCMPUQMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUQ", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
-               {name: "VPCMPUQMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUQ", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
-               {name: "VPCMPUQMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUQ", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
+               {name: "VPCMPUQMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+               {name: "VPCMPUQMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+               {name: "VPCMPUQMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VPCMPUW512", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
-               {name: "VPCMPUWMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUW", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
-               {name: "VPCMPUWMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUW", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
-               {name: "VPCMPUWMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUW", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
+               {name: "VPCMPUWMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+               {name: "VPCMPUWMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+               {name: "VPCMPUWMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VPCMPW512", argLength: 2, reg: w2k, asm: "VPCMPW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
-               {name: "VPCMPWMasked128", argLength: 3, reg: w2kk, asm: "VPCMPW", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
-               {name: "VPCMPWMasked256", argLength: 3, reg: w2kk, asm: "VPCMPW", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
-               {name: "VPCMPWMasked512", argLength: 3, reg: w2kk, asm: "VPCMPW", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
+               {name: "VPCMPWMasked128", argLength: 3, reg: w2kk, asm: "VPCMPW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+               {name: "VPCMPWMasked256", argLength: 3, reg: w2kk, asm: "VPCMPW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+               {name: "VPCMPWMasked512", argLength: 3, reg: w2kk, asm: "VPCMPW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VPERM2F128256", argLength: 2, reg: v21, asm: "VPERM2F128", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VPERM2I128256", argLength: 2, reg: v21, asm: "VPERM2I128", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VPEXTRB128", argLength: 1, reg: wgp, asm: "VPEXTRB", aux: "UInt8", commutative: false, typ: "int8", resultInArg0: false},
index 3e589f87385b435478500e14a94729dbb83394a6..a875ac69f10d8493fb0a3927bacde606b8ca3240 100644 (file)
@@ -38629,11 +38629,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VCMPPD128",
-               auxType:     auxUInt8,
-               argLen:      2,
-               commutative: true,
-               asm:         x86.AVCMPPD,
+               name:    "VCMPPD128",
+               auxType: auxUInt8,
+               argLen:  2,
+               asm:     x86.AVCMPPD,
                reg: regInfo{
                        inputs: []inputInfo{
                                {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
@@ -38645,11 +38644,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VCMPPD256",
-               auxType:     auxUInt8,
-               argLen:      2,
-               commutative: true,
-               asm:         x86.AVCMPPD,
+               name:    "VCMPPD256",
+               auxType: auxUInt8,
+               argLen:  2,
+               asm:     x86.AVCMPPD,
                reg: regInfo{
                        inputs: []inputInfo{
                                {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
@@ -38661,11 +38659,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VCMPPD512",
-               auxType:     auxUInt8,
-               argLen:      2,
-               commutative: true,
-               asm:         x86.AVCMPPD,
+               name:    "VCMPPD512",
+               auxType: auxUInt8,
+               argLen:  2,
+               asm:     x86.AVCMPPD,
                reg: regInfo{
                        inputs: []inputInfo{
                                {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
@@ -38677,11 +38674,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VCMPPDMasked128",
-               auxType:     auxUInt8,
-               argLen:      3,
-               commutative: true,
-               asm:         x86.AVCMPPD,
+               name:    "VCMPPDMasked128",
+               auxType: auxUInt8,
+               argLen:  3,
+               asm:     x86.AVCMPPD,
                reg: regInfo{
                        inputs: []inputInfo{
                                {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -38694,11 +38690,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VCMPPDMasked256",
-               auxType:     auxUInt8,
-               argLen:      3,
-               commutative: true,
-               asm:         x86.AVCMPPD,
+               name:    "VCMPPDMasked256",
+               auxType: auxUInt8,
+               argLen:  3,
+               asm:     x86.AVCMPPD,
                reg: regInfo{
                        inputs: []inputInfo{
                                {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -38711,11 +38706,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VCMPPDMasked512",
-               auxType:     auxUInt8,
-               argLen:      3,
-               commutative: true,
-               asm:         x86.AVCMPPD,
+               name:    "VCMPPDMasked512",
+               auxType: auxUInt8,
+               argLen:  3,
+               asm:     x86.AVCMPPD,
                reg: regInfo{
                        inputs: []inputInfo{
                                {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -38728,11 +38722,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VCMPPS128",
-               auxType:     auxUInt8,
-               argLen:      2,
-               commutative: true,
-               asm:         x86.AVCMPPS,
+               name:    "VCMPPS128",
+               auxType: auxUInt8,
+               argLen:  2,
+               asm:     x86.AVCMPPS,
                reg: regInfo{
                        inputs: []inputInfo{
                                {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
@@ -38744,11 +38737,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VCMPPS256",
-               auxType:     auxUInt8,
-               argLen:      2,
-               commutative: true,
-               asm:         x86.AVCMPPS,
+               name:    "VCMPPS256",
+               auxType: auxUInt8,
+               argLen:  2,
+               asm:     x86.AVCMPPS,
                reg: regInfo{
                        inputs: []inputInfo{
                                {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
@@ -38760,11 +38752,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VCMPPS512",
-               auxType:     auxUInt8,
-               argLen:      2,
-               commutative: true,
-               asm:         x86.AVCMPPS,
+               name:    "VCMPPS512",
+               auxType: auxUInt8,
+               argLen:  2,
+               asm:     x86.AVCMPPS,
                reg: regInfo{
                        inputs: []inputInfo{
                                {0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
@@ -38776,11 +38767,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VCMPPSMasked128",
-               auxType:     auxUInt8,
-               argLen:      3,
-               commutative: true,
-               asm:         x86.AVCMPPS,
+               name:    "VCMPPSMasked128",
+               auxType: auxUInt8,
+               argLen:  3,
+               asm:     x86.AVCMPPS,
                reg: regInfo{
                        inputs: []inputInfo{
                                {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -38793,11 +38783,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VCMPPSMasked256",
-               auxType:     auxUInt8,
-               argLen:      3,
-               commutative: true,
-               asm:         x86.AVCMPPS,
+               name:    "VCMPPSMasked256",
+               auxType: auxUInt8,
+               argLen:  3,
+               asm:     x86.AVCMPPS,
                reg: regInfo{
                        inputs: []inputInfo{
                                {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -38810,11 +38799,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VCMPPSMasked512",
-               auxType:     auxUInt8,
-               argLen:      3,
-               commutative: true,
-               asm:         x86.AVCMPPS,
+               name:    "VCMPPSMasked512",
+               auxType: auxUInt8,
+               argLen:  3,
+               asm:     x86.AVCMPPS,
                reg: regInfo{
                        inputs: []inputInfo{
                                {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39237,11 +39225,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPBMasked128",
-               auxType:     auxUInt8,
-               argLen:      3,
-               commutative: true,
-               asm:         x86.AVPCMPB,
+               name:    "VPCMPBMasked128",
+               auxType: auxUInt8,
+               argLen:  3,
+               asm:     x86.AVPCMPB,
                reg: regInfo{
                        inputs: []inputInfo{
                                {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39254,11 +39241,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPBMasked256",
-               auxType:     auxUInt8,
-               argLen:      3,
-               commutative: true,
-               asm:         x86.AVPCMPB,
+               name:    "VPCMPBMasked256",
+               auxType: auxUInt8,
+               argLen:  3,
+               asm:     x86.AVPCMPB,
                reg: regInfo{
                        inputs: []inputInfo{
                                {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39271,11 +39257,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPBMasked512",
-               auxType:     auxUInt8,
-               argLen:      3,
-               commutative: true,
-               asm:         x86.AVPCMPB,
+               name:    "VPCMPBMasked512",
+               auxType: auxUInt8,
+               argLen:  3,
+               asm:     x86.AVPCMPB,
                reg: regInfo{
                        inputs: []inputInfo{
                                {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39303,11 +39288,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPDMasked128",
-               auxType:     auxUInt8,
-               argLen:      3,
-               commutative: true,
-               asm:         x86.AVPCMPD,
+               name:    "VPCMPDMasked128",
+               auxType: auxUInt8,
+               argLen:  3,
+               asm:     x86.AVPCMPD,
                reg: regInfo{
                        inputs: []inputInfo{
                                {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39320,11 +39304,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPDMasked256",
-               auxType:     auxUInt8,
-               argLen:      3,
-               commutative: true,
-               asm:         x86.AVPCMPD,
+               name:    "VPCMPDMasked256",
+               auxType: auxUInt8,
+               argLen:  3,
+               asm:     x86.AVPCMPD,
                reg: regInfo{
                        inputs: []inputInfo{
                                {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39337,11 +39320,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPDMasked512",
-               auxType:     auxUInt8,
-               argLen:      3,
-               commutative: true,
-               asm:         x86.AVPCMPD,
+               name:    "VPCMPDMasked512",
+               auxType: auxUInt8,
+               argLen:  3,
+               asm:     x86.AVPCMPD,
                reg: regInfo{
                        inputs: []inputInfo{
                                {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39369,11 +39351,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPQMasked128",
-               auxType:     auxUInt8,
-               argLen:      3,
-               commutative: true,
-               asm:         x86.AVPCMPQ,
+               name:    "VPCMPQMasked128",
+               auxType: auxUInt8,
+               argLen:  3,
+               asm:     x86.AVPCMPQ,
                reg: regInfo{
                        inputs: []inputInfo{
                                {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39386,11 +39367,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPQMasked256",
-               auxType:     auxUInt8,
-               argLen:      3,
-               commutative: true,
-               asm:         x86.AVPCMPQ,
+               name:    "VPCMPQMasked256",
+               auxType: auxUInt8,
+               argLen:  3,
+               asm:     x86.AVPCMPQ,
                reg: regInfo{
                        inputs: []inputInfo{
                                {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39403,11 +39383,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPQMasked512",
-               auxType:     auxUInt8,
-               argLen:      3,
-               commutative: true,
-               asm:         x86.AVPCMPQ,
+               name:    "VPCMPQMasked512",
+               auxType: auxUInt8,
+               argLen:  3,
+               asm:     x86.AVPCMPQ,
                reg: regInfo{
                        inputs: []inputInfo{
                                {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39435,11 +39414,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPUBMasked128",
-               auxType:     auxUInt8,
-               argLen:      3,
-               commutative: true,
-               asm:         x86.AVPCMPUB,
+               name:    "VPCMPUBMasked128",
+               auxType: auxUInt8,
+               argLen:  3,
+               asm:     x86.AVPCMPUB,
                reg: regInfo{
                        inputs: []inputInfo{
                                {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39452,11 +39430,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPUBMasked256",
-               auxType:     auxUInt8,
-               argLen:      3,
-               commutative: true,
-               asm:         x86.AVPCMPUB,
+               name:    "VPCMPUBMasked256",
+               auxType: auxUInt8,
+               argLen:  3,
+               asm:     x86.AVPCMPUB,
                reg: regInfo{
                        inputs: []inputInfo{
                                {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39469,11 +39446,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPUBMasked512",
-               auxType:     auxUInt8,
-               argLen:      3,
-               commutative: true,
-               asm:         x86.AVPCMPUB,
+               name:    "VPCMPUBMasked512",
+               auxType: auxUInt8,
+               argLen:  3,
+               asm:     x86.AVPCMPUB,
                reg: regInfo{
                        inputs: []inputInfo{
                                {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39501,11 +39477,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPUDMasked128",
-               auxType:     auxUInt8,
-               argLen:      3,
-               commutative: true,
-               asm:         x86.AVPCMPUD,
+               name:    "VPCMPUDMasked128",
+               auxType: auxUInt8,
+               argLen:  3,
+               asm:     x86.AVPCMPUD,
                reg: regInfo{
                        inputs: []inputInfo{
                                {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39518,11 +39493,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPUDMasked256",
-               auxType:     auxUInt8,
-               argLen:      3,
-               commutative: true,
-               asm:         x86.AVPCMPUD,
+               name:    "VPCMPUDMasked256",
+               auxType: auxUInt8,
+               argLen:  3,
+               asm:     x86.AVPCMPUD,
                reg: regInfo{
                        inputs: []inputInfo{
                                {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39535,11 +39509,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPUDMasked512",
-               auxType:     auxUInt8,
-               argLen:      3,
-               commutative: true,
-               asm:         x86.AVPCMPUD,
+               name:    "VPCMPUDMasked512",
+               auxType: auxUInt8,
+               argLen:  3,
+               asm:     x86.AVPCMPUD,
                reg: regInfo{
                        inputs: []inputInfo{
                                {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39567,11 +39540,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPUQMasked128",
-               auxType:     auxUInt8,
-               argLen:      3,
-               commutative: true,
-               asm:         x86.AVPCMPUQ,
+               name:    "VPCMPUQMasked128",
+               auxType: auxUInt8,
+               argLen:  3,
+               asm:     x86.AVPCMPUQ,
                reg: regInfo{
                        inputs: []inputInfo{
                                {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39584,11 +39556,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPUQMasked256",
-               auxType:     auxUInt8,
-               argLen:      3,
-               commutative: true,
-               asm:         x86.AVPCMPUQ,
+               name:    "VPCMPUQMasked256",
+               auxType: auxUInt8,
+               argLen:  3,
+               asm:     x86.AVPCMPUQ,
                reg: regInfo{
                        inputs: []inputInfo{
                                {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39601,11 +39572,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPUQMasked512",
-               auxType:     auxUInt8,
-               argLen:      3,
-               commutative: true,
-               asm:         x86.AVPCMPUQ,
+               name:    "VPCMPUQMasked512",
+               auxType: auxUInt8,
+               argLen:  3,
+               asm:     x86.AVPCMPUQ,
                reg: regInfo{
                        inputs: []inputInfo{
                                {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39633,11 +39603,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPUWMasked128",
-               auxType:     auxUInt8,
-               argLen:      3,
-               commutative: true,
-               asm:         x86.AVPCMPUW,
+               name:    "VPCMPUWMasked128",
+               auxType: auxUInt8,
+               argLen:  3,
+               asm:     x86.AVPCMPUW,
                reg: regInfo{
                        inputs: []inputInfo{
                                {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39650,11 +39619,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPUWMasked256",
-               auxType:     auxUInt8,
-               argLen:      3,
-               commutative: true,
-               asm:         x86.AVPCMPUW,
+               name:    "VPCMPUWMasked256",
+               auxType: auxUInt8,
+               argLen:  3,
+               asm:     x86.AVPCMPUW,
                reg: regInfo{
                        inputs: []inputInfo{
                                {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39667,11 +39635,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPUWMasked512",
-               auxType:     auxUInt8,
-               argLen:      3,
-               commutative: true,
-               asm:         x86.AVPCMPUW,
+               name:    "VPCMPUWMasked512",
+               auxType: auxUInt8,
+               argLen:  3,
+               asm:     x86.AVPCMPUW,
                reg: regInfo{
                        inputs: []inputInfo{
                                {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39699,11 +39666,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPWMasked128",
-               auxType:     auxUInt8,
-               argLen:      3,
-               commutative: true,
-               asm:         x86.AVPCMPW,
+               name:    "VPCMPWMasked128",
+               auxType: auxUInt8,
+               argLen:  3,
+               asm:     x86.AVPCMPW,
                reg: regInfo{
                        inputs: []inputInfo{
                                {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39716,11 +39682,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPWMasked256",
-               auxType:     auxUInt8,
-               argLen:      3,
-               commutative: true,
-               asm:         x86.AVPCMPW,
+               name:    "VPCMPWMasked256",
+               auxType: auxUInt8,
+               argLen:  3,
+               asm:     x86.AVPCMPW,
                reg: regInfo{
                        inputs: []inputInfo{
                                {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39733,11 +39698,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:        "VPCMPWMasked512",
-               auxType:     auxUInt8,
-               argLen:      3,
-               commutative: true,
-               asm:         x86.AVPCMPW,
+               name:    "VPCMPWMasked512",
+               auxType: auxUInt8,
+               argLen:  3,
+               asm:     x86.AVPCMPW,
                reg: regInfo{
                        inputs: []inputInfo{
                                {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
index a6fc4bfbaf0da38f7c6bb7b8d2a06fdcd6dacd7f..99956c56a06a548a9ae77fb701571ad436a0a35a 100644 (file)
@@ -28757,26 +28757,23 @@ func rewriteValueAMD64_OpAMD64VCMPPD512(v *Value) bool {
        // result: (VCMPPD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
        for {
                c := auxIntToUint8(v.AuxInt)
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       l := v_1
-                       if l.Op != OpAMD64VMOVDQUload512 {
-                               continue
-                       }
-                       off := auxIntToInt32(l.AuxInt)
-                       sym := auxToSym(l.Aux)
-                       mem := l.Args[1]
-                       ptr := l.Args[0]
-                       if !(canMergeLoad(v, l) && clobber(l)) {
-                               continue
-                       }
-                       v.reset(OpAMD64VCMPPD512load)
-                       v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
-                       v.Aux = symToAux(sym)
-                       v.AddArg3(x, ptr, mem)
-                       return true
+               x := v_0
+               l := v_1
+               if l.Op != OpAMD64VMOVDQUload512 {
+                       break
                }
-               break
+               off := auxIntToInt32(l.AuxInt)
+               sym := auxToSym(l.Aux)
+               mem := l.Args[1]
+               ptr := l.Args[0]
+               if !(canMergeLoad(v, l) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64VCMPPD512load)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+               v.Aux = symToAux(sym)
+               v.AddArg3(x, ptr, mem)
+               return true
        }
        return false
 }
@@ -28789,27 +28786,24 @@ func rewriteValueAMD64_OpAMD64VCMPPDMasked128(v *Value) bool {
        // result: (VCMPPDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
        for {
                c := auxIntToUint8(v.AuxInt)
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       l := v_1
-                       if l.Op != OpAMD64VMOVDQUload128 {
-                               continue
-                       }
-                       off := auxIntToInt32(l.AuxInt)
-                       sym := auxToSym(l.Aux)
-                       mem := l.Args[1]
-                       ptr := l.Args[0]
-                       mask := v_2
-                       if !(canMergeLoad(v, l) && clobber(l)) {
-                               continue
-                       }
-                       v.reset(OpAMD64VCMPPDMasked128load)
-                       v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
-                       v.Aux = symToAux(sym)
-                       v.AddArg4(x, ptr, mask, mem)
-                       return true
+               x := v_0
+               l := v_1
+               if l.Op != OpAMD64VMOVDQUload128 {
+                       break
                }
-               break
+               off := auxIntToInt32(l.AuxInt)
+               sym := auxToSym(l.Aux)
+               mem := l.Args[1]
+               ptr := l.Args[0]
+               mask := v_2
+               if !(canMergeLoad(v, l) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64VCMPPDMasked128load)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+               v.Aux = symToAux(sym)
+               v.AddArg4(x, ptr, mask, mem)
+               return true
        }
        return false
 }
@@ -28822,27 +28816,24 @@ func rewriteValueAMD64_OpAMD64VCMPPDMasked256(v *Value) bool {
        // result: (VCMPPDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
        for {
                c := auxIntToUint8(v.AuxInt)
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       l := v_1
-                       if l.Op != OpAMD64VMOVDQUload256 {
-                               continue
-                       }
-                       off := auxIntToInt32(l.AuxInt)
-                       sym := auxToSym(l.Aux)
-                       mem := l.Args[1]
-                       ptr := l.Args[0]
-                       mask := v_2
-                       if !(canMergeLoad(v, l) && clobber(l)) {
-                               continue
-                       }
-                       v.reset(OpAMD64VCMPPDMasked256load)
-                       v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
-                       v.Aux = symToAux(sym)
-                       v.AddArg4(x, ptr, mask, mem)
-                       return true
+               x := v_0
+               l := v_1
+               if l.Op != OpAMD64VMOVDQUload256 {
+                       break
                }
-               break
+               off := auxIntToInt32(l.AuxInt)
+               sym := auxToSym(l.Aux)
+               mem := l.Args[1]
+               ptr := l.Args[0]
+               mask := v_2
+               if !(canMergeLoad(v, l) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64VCMPPDMasked256load)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+               v.Aux = symToAux(sym)
+               v.AddArg4(x, ptr, mask, mem)
+               return true
        }
        return false
 }
@@ -28855,27 +28846,24 @@ func rewriteValueAMD64_OpAMD64VCMPPDMasked512(v *Value) bool {
        // result: (VCMPPDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
        for {
                c := auxIntToUint8(v.AuxInt)
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       l := v_1
-                       if l.Op != OpAMD64VMOVDQUload512 {
-                               continue
-                       }
-                       off := auxIntToInt32(l.AuxInt)
-                       sym := auxToSym(l.Aux)
-                       mem := l.Args[1]
-                       ptr := l.Args[0]
-                       mask := v_2
-                       if !(canMergeLoad(v, l) && clobber(l)) {
-                               continue
-                       }
-                       v.reset(OpAMD64VCMPPDMasked512load)
-                       v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
-                       v.Aux = symToAux(sym)
-                       v.AddArg4(x, ptr, mask, mem)
-                       return true
+               x := v_0
+               l := v_1
+               if l.Op != OpAMD64VMOVDQUload512 {
+                       break
                }
-               break
+               off := auxIntToInt32(l.AuxInt)
+               sym := auxToSym(l.Aux)
+               mem := l.Args[1]
+               ptr := l.Args[0]
+               mask := v_2
+               if !(canMergeLoad(v, l) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64VCMPPDMasked512load)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+               v.Aux = symToAux(sym)
+               v.AddArg4(x, ptr, mask, mem)
+               return true
        }
        return false
 }
@@ -28887,26 +28875,23 @@ func rewriteValueAMD64_OpAMD64VCMPPS512(v *Value) bool {
        // result: (VCMPPS512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
        for {
                c := auxIntToUint8(v.AuxInt)
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       l := v_1
-                       if l.Op != OpAMD64VMOVDQUload512 {
-                               continue
-                       }
-                       off := auxIntToInt32(l.AuxInt)
-                       sym := auxToSym(l.Aux)
-                       mem := l.Args[1]
-                       ptr := l.Args[0]
-                       if !(canMergeLoad(v, l) && clobber(l)) {
-                               continue
-                       }
-                       v.reset(OpAMD64VCMPPS512load)
-                       v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
-                       v.Aux = symToAux(sym)
-                       v.AddArg3(x, ptr, mem)
-                       return true
+               x := v_0
+               l := v_1
+               if l.Op != OpAMD64VMOVDQUload512 {
+                       break
                }
-               break
+               off := auxIntToInt32(l.AuxInt)
+               sym := auxToSym(l.Aux)
+               mem := l.Args[1]
+               ptr := l.Args[0]
+               if !(canMergeLoad(v, l) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64VCMPPS512load)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+               v.Aux = symToAux(sym)
+               v.AddArg3(x, ptr, mem)
+               return true
        }
        return false
 }
@@ -28919,27 +28904,24 @@ func rewriteValueAMD64_OpAMD64VCMPPSMasked128(v *Value) bool {
        // result: (VCMPPSMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
        for {
                c := auxIntToUint8(v.AuxInt)
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       l := v_1
-                       if l.Op != OpAMD64VMOVDQUload128 {
-                               continue
-                       }
-                       off := auxIntToInt32(l.AuxInt)
-                       sym := auxToSym(l.Aux)
-                       mem := l.Args[1]
-                       ptr := l.Args[0]
-                       mask := v_2
-                       if !(canMergeLoad(v, l) && clobber(l)) {
-                               continue
-                       }
-                       v.reset(OpAMD64VCMPPSMasked128load)
-                       v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
-                       v.Aux = symToAux(sym)
-                       v.AddArg4(x, ptr, mask, mem)
-                       return true
+               x := v_0
+               l := v_1
+               if l.Op != OpAMD64VMOVDQUload128 {
+                       break
                }
-               break
+               off := auxIntToInt32(l.AuxInt)
+               sym := auxToSym(l.Aux)
+               mem := l.Args[1]
+               ptr := l.Args[0]
+               mask := v_2
+               if !(canMergeLoad(v, l) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64VCMPPSMasked128load)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+               v.Aux = symToAux(sym)
+               v.AddArg4(x, ptr, mask, mem)
+               return true
        }
        return false
 }
@@ -28952,27 +28934,24 @@ func rewriteValueAMD64_OpAMD64VCMPPSMasked256(v *Value) bool {
        // result: (VCMPPSMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
        for {
                c := auxIntToUint8(v.AuxInt)
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       l := v_1
-                       if l.Op != OpAMD64VMOVDQUload256 {
-                               continue
-                       }
-                       off := auxIntToInt32(l.AuxInt)
-                       sym := auxToSym(l.Aux)
-                       mem := l.Args[1]
-                       ptr := l.Args[0]
-                       mask := v_2
-                       if !(canMergeLoad(v, l) && clobber(l)) {
-                               continue
-                       }
-                       v.reset(OpAMD64VCMPPSMasked256load)
-                       v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
-                       v.Aux = symToAux(sym)
-                       v.AddArg4(x, ptr, mask, mem)
-                       return true
+               x := v_0
+               l := v_1
+               if l.Op != OpAMD64VMOVDQUload256 {
+                       break
                }
-               break
+               off := auxIntToInt32(l.AuxInt)
+               sym := auxToSym(l.Aux)
+               mem := l.Args[1]
+               ptr := l.Args[0]
+               mask := v_2
+               if !(canMergeLoad(v, l) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64VCMPPSMasked256load)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+               v.Aux = symToAux(sym)
+               v.AddArg4(x, ptr, mask, mem)
+               return true
        }
        return false
 }
@@ -28985,27 +28964,24 @@ func rewriteValueAMD64_OpAMD64VCMPPSMasked512(v *Value) bool {
        // result: (VCMPPSMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
        for {
                c := auxIntToUint8(v.AuxInt)
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       l := v_1
-                       if l.Op != OpAMD64VMOVDQUload512 {
-                               continue
-                       }
-                       off := auxIntToInt32(l.AuxInt)
-                       sym := auxToSym(l.Aux)
-                       mem := l.Args[1]
-                       ptr := l.Args[0]
-                       mask := v_2
-                       if !(canMergeLoad(v, l) && clobber(l)) {
-                               continue
-                       }
-                       v.reset(OpAMD64VCMPPSMasked512load)
-                       v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
-                       v.Aux = symToAux(sym)
-                       v.AddArg4(x, ptr, mask, mem)
-                       return true
+               x := v_0
+               l := v_1
+               if l.Op != OpAMD64VMOVDQUload512 {
+                       break
                }
-               break
+               off := auxIntToInt32(l.AuxInt)
+               sym := auxToSym(l.Aux)
+               mem := l.Args[1]
+               ptr := l.Args[0]
+               mask := v_2
+               if !(canMergeLoad(v, l) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64VCMPPSMasked512load)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+               v.Aux = symToAux(sym)
+               v.AddArg4(x, ptr, mask, mem)
+               return true
        }
        return false
 }
@@ -52600,27 +52576,24 @@ func rewriteValueAMD64_OpAMD64VPCMPDMasked128(v *Value) bool {
        // result: (VPCMPDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
        for {
                c := auxIntToUint8(v.AuxInt)
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       l := v_1
-                       if l.Op != OpAMD64VMOVDQUload128 {
-                               continue
-                       }
-                       off := auxIntToInt32(l.AuxInt)
-                       sym := auxToSym(l.Aux)
-                       mem := l.Args[1]
-                       ptr := l.Args[0]
-                       mask := v_2
-                       if !(canMergeLoad(v, l) && clobber(l)) {
-                               continue
-                       }
-                       v.reset(OpAMD64VPCMPDMasked128load)
-                       v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
-                       v.Aux = symToAux(sym)
-                       v.AddArg4(x, ptr, mask, mem)
-                       return true
+               x := v_0
+               l := v_1
+               if l.Op != OpAMD64VMOVDQUload128 {
+                       break
                }
-               break
+               off := auxIntToInt32(l.AuxInt)
+               sym := auxToSym(l.Aux)
+               mem := l.Args[1]
+               ptr := l.Args[0]
+               mask := v_2
+               if !(canMergeLoad(v, l) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64VPCMPDMasked128load)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+               v.Aux = symToAux(sym)
+               v.AddArg4(x, ptr, mask, mem)
+               return true
        }
        return false
 }
@@ -52633,27 +52606,24 @@ func rewriteValueAMD64_OpAMD64VPCMPDMasked256(v *Value) bool {
        // result: (VPCMPDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
        for {
                c := auxIntToUint8(v.AuxInt)
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       l := v_1
-                       if l.Op != OpAMD64VMOVDQUload256 {
-                               continue
-                       }
-                       off := auxIntToInt32(l.AuxInt)
-                       sym := auxToSym(l.Aux)
-                       mem := l.Args[1]
-                       ptr := l.Args[0]
-                       mask := v_2
-                       if !(canMergeLoad(v, l) && clobber(l)) {
-                               continue
-                       }
-                       v.reset(OpAMD64VPCMPDMasked256load)
-                       v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
-                       v.Aux = symToAux(sym)
-                       v.AddArg4(x, ptr, mask, mem)
-                       return true
+               x := v_0
+               l := v_1
+               if l.Op != OpAMD64VMOVDQUload256 {
+                       break
                }
-               break
+               off := auxIntToInt32(l.AuxInt)
+               sym := auxToSym(l.Aux)
+               mem := l.Args[1]
+               ptr := l.Args[0]
+               mask := v_2
+               if !(canMergeLoad(v, l) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64VPCMPDMasked256load)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+               v.Aux = symToAux(sym)
+               v.AddArg4(x, ptr, mask, mem)
+               return true
        }
        return false
 }
@@ -52666,27 +52636,24 @@ func rewriteValueAMD64_OpAMD64VPCMPDMasked512(v *Value) bool {
        // result: (VPCMPDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
        for {
                c := auxIntToUint8(v.AuxInt)
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       l := v_1
-                       if l.Op != OpAMD64VMOVDQUload512 {
-                               continue
-                       }
-                       off := auxIntToInt32(l.AuxInt)
-                       sym := auxToSym(l.Aux)
-                       mem := l.Args[1]
-                       ptr := l.Args[0]
-                       mask := v_2
-                       if !(canMergeLoad(v, l) && clobber(l)) {
-                               continue
-                       }
-                       v.reset(OpAMD64VPCMPDMasked512load)
-                       v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
-                       v.Aux = symToAux(sym)
-                       v.AddArg4(x, ptr, mask, mem)
-                       return true
+               x := v_0
+               l := v_1
+               if l.Op != OpAMD64VMOVDQUload512 {
+                       break
                }
-               break
+               off := auxIntToInt32(l.AuxInt)
+               sym := auxToSym(l.Aux)
+               mem := l.Args[1]
+               ptr := l.Args[0]
+               mask := v_2
+               if !(canMergeLoad(v, l) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64VPCMPDMasked512load)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+               v.Aux = symToAux(sym)
+               v.AddArg4(x, ptr, mask, mem)
+               return true
        }
        return false
 }
@@ -52841,27 +52808,24 @@ func rewriteValueAMD64_OpAMD64VPCMPQMasked128(v *Value) bool {
        // result: (VPCMPQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
        for {
                c := auxIntToUint8(v.AuxInt)
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       l := v_1
-                       if l.Op != OpAMD64VMOVDQUload128 {
-                               continue
-                       }
-                       off := auxIntToInt32(l.AuxInt)
-                       sym := auxToSym(l.Aux)
-                       mem := l.Args[1]
-                       ptr := l.Args[0]
-                       mask := v_2
-                       if !(canMergeLoad(v, l) && clobber(l)) {
-                               continue
-                       }
-                       v.reset(OpAMD64VPCMPQMasked128load)
-                       v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
-                       v.Aux = symToAux(sym)
-                       v.AddArg4(x, ptr, mask, mem)
-                       return true
+               x := v_0
+               l := v_1
+               if l.Op != OpAMD64VMOVDQUload128 {
+                       break
                }
-               break
+               off := auxIntToInt32(l.AuxInt)
+               sym := auxToSym(l.Aux)
+               mem := l.Args[1]
+               ptr := l.Args[0]
+               mask := v_2
+               if !(canMergeLoad(v, l) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64VPCMPQMasked128load)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+               v.Aux = symToAux(sym)
+               v.AddArg4(x, ptr, mask, mem)
+               return true
        }
        return false
 }
@@ -52873,28 +52837,25 @@ func rewriteValueAMD64_OpAMD64VPCMPQMasked256(v *Value) bool {
        // cond: canMergeLoad(v, l) && clobber(l)
        // result: (VPCMPQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
        for {
-               c := auxIntToUint8(v.AuxInt)
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       l := v_1
-                       if l.Op != OpAMD64VMOVDQUload256 {
-                               continue
-                       }
-                       off := auxIntToInt32(l.AuxInt)
-                       sym := auxToSym(l.Aux)
-                       mem := l.Args[1]
-                       ptr := l.Args[0]
-                       mask := v_2
-                       if !(canMergeLoad(v, l) && clobber(l)) {
-                               continue
-                       }
-                       v.reset(OpAMD64VPCMPQMasked256load)
-                       v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
-                       v.Aux = symToAux(sym)
-                       v.AddArg4(x, ptr, mask, mem)
-                       return true
+               c := auxIntToUint8(v.AuxInt)
+               x := v_0
+               l := v_1
+               if l.Op != OpAMD64VMOVDQUload256 {
+                       break
                }
-               break
+               off := auxIntToInt32(l.AuxInt)
+               sym := auxToSym(l.Aux)
+               mem := l.Args[1]
+               ptr := l.Args[0]
+               mask := v_2
+               if !(canMergeLoad(v, l) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64VPCMPQMasked256load)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+               v.Aux = symToAux(sym)
+               v.AddArg4(x, ptr, mask, mem)
+               return true
        }
        return false
 }
@@ -52907,27 +52868,24 @@ func rewriteValueAMD64_OpAMD64VPCMPQMasked512(v *Value) bool {
        // result: (VPCMPQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
        for {
                c := auxIntToUint8(v.AuxInt)
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       l := v_1
-                       if l.Op != OpAMD64VMOVDQUload512 {
-                               continue
-                       }
-                       off := auxIntToInt32(l.AuxInt)
-                       sym := auxToSym(l.Aux)
-                       mem := l.Args[1]
-                       ptr := l.Args[0]
-                       mask := v_2
-                       if !(canMergeLoad(v, l) && clobber(l)) {
-                               continue
-                       }
-                       v.reset(OpAMD64VPCMPQMasked512load)
-                       v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
-                       v.Aux = symToAux(sym)
-                       v.AddArg4(x, ptr, mask, mem)
-                       return true
+               x := v_0
+               l := v_1
+               if l.Op != OpAMD64VMOVDQUload512 {
+                       break
                }
-               break
+               off := auxIntToInt32(l.AuxInt)
+               sym := auxToSym(l.Aux)
+               mem := l.Args[1]
+               ptr := l.Args[0]
+               mask := v_2
+               if !(canMergeLoad(v, l) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64VPCMPQMasked512load)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+               v.Aux = symToAux(sym)
+               v.AddArg4(x, ptr, mask, mem)
+               return true
        }
        return false
 }
@@ -52968,27 +52926,24 @@ func rewriteValueAMD64_OpAMD64VPCMPUDMasked128(v *Value) bool {
        // result: (VPCMPUDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
        for {
                c := auxIntToUint8(v.AuxInt)
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       l := v_1
-                       if l.Op != OpAMD64VMOVDQUload128 {
-                               continue
-                       }
-                       off := auxIntToInt32(l.AuxInt)
-                       sym := auxToSym(l.Aux)
-                       mem := l.Args[1]
-                       ptr := l.Args[0]
-                       mask := v_2
-                       if !(canMergeLoad(v, l) && clobber(l)) {
-                               continue
-                       }
-                       v.reset(OpAMD64VPCMPUDMasked128load)
-                       v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
-                       v.Aux = symToAux(sym)
-                       v.AddArg4(x, ptr, mask, mem)
-                       return true
+               x := v_0
+               l := v_1
+               if l.Op != OpAMD64VMOVDQUload128 {
+                       break
                }
-               break
+               off := auxIntToInt32(l.AuxInt)
+               sym := auxToSym(l.Aux)
+               mem := l.Args[1]
+               ptr := l.Args[0]
+               mask := v_2
+               if !(canMergeLoad(v, l) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64VPCMPUDMasked128load)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+               v.Aux = symToAux(sym)
+               v.AddArg4(x, ptr, mask, mem)
+               return true
        }
        return false
 }
@@ -53001,27 +52956,24 @@ func rewriteValueAMD64_OpAMD64VPCMPUDMasked256(v *Value) bool {
        // result: (VPCMPUDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
        for {
                c := auxIntToUint8(v.AuxInt)
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       l := v_1
-                       if l.Op != OpAMD64VMOVDQUload256 {
-                               continue
-                       }
-                       off := auxIntToInt32(l.AuxInt)
-                       sym := auxToSym(l.Aux)
-                       mem := l.Args[1]
-                       ptr := l.Args[0]
-                       mask := v_2
-                       if !(canMergeLoad(v, l) && clobber(l)) {
-                               continue
-                       }
-                       v.reset(OpAMD64VPCMPUDMasked256load)
-                       v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
-                       v.Aux = symToAux(sym)
-                       v.AddArg4(x, ptr, mask, mem)
-                       return true
+               x := v_0
+               l := v_1
+               if l.Op != OpAMD64VMOVDQUload256 {
+                       break
                }
-               break
+               off := auxIntToInt32(l.AuxInt)
+               sym := auxToSym(l.Aux)
+               mem := l.Args[1]
+               ptr := l.Args[0]
+               mask := v_2
+               if !(canMergeLoad(v, l) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64VPCMPUDMasked256load)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+               v.Aux = symToAux(sym)
+               v.AddArg4(x, ptr, mask, mem)
+               return true
        }
        return false
 }
@@ -53034,27 +52986,24 @@ func rewriteValueAMD64_OpAMD64VPCMPUDMasked512(v *Value) bool {
        // result: (VPCMPUDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
        for {
                c := auxIntToUint8(v.AuxInt)
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       l := v_1
-                       if l.Op != OpAMD64VMOVDQUload512 {
-                               continue
-                       }
-                       off := auxIntToInt32(l.AuxInt)
-                       sym := auxToSym(l.Aux)
-                       mem := l.Args[1]
-                       ptr := l.Args[0]
-                       mask := v_2
-                       if !(canMergeLoad(v, l) && clobber(l)) {
-                               continue
-                       }
-                       v.reset(OpAMD64VPCMPUDMasked512load)
-                       v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
-                       v.Aux = symToAux(sym)
-                       v.AddArg4(x, ptr, mask, mem)
-                       return true
+               x := v_0
+               l := v_1
+               if l.Op != OpAMD64VMOVDQUload512 {
+                       break
                }
-               break
+               off := auxIntToInt32(l.AuxInt)
+               sym := auxToSym(l.Aux)
+               mem := l.Args[1]
+               ptr := l.Args[0]
+               mask := v_2
+               if !(canMergeLoad(v, l) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64VPCMPUDMasked512load)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+               v.Aux = symToAux(sym)
+               v.AddArg4(x, ptr, mask, mem)
+               return true
        }
        return false
 }
@@ -53095,27 +53044,24 @@ func rewriteValueAMD64_OpAMD64VPCMPUQMasked128(v *Value) bool {
        // result: (VPCMPUQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
        for {
                c := auxIntToUint8(v.AuxInt)
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       l := v_1
-                       if l.Op != OpAMD64VMOVDQUload128 {
-                               continue
-                       }
-                       off := auxIntToInt32(l.AuxInt)
-                       sym := auxToSym(l.Aux)
-                       mem := l.Args[1]
-                       ptr := l.Args[0]
-                       mask := v_2
-                       if !(canMergeLoad(v, l) && clobber(l)) {
-                               continue
-                       }
-                       v.reset(OpAMD64VPCMPUQMasked128load)
-                       v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
-                       v.Aux = symToAux(sym)
-                       v.AddArg4(x, ptr, mask, mem)
-                       return true
+               x := v_0
+               l := v_1
+               if l.Op != OpAMD64VMOVDQUload128 {
+                       break
                }
-               break
+               off := auxIntToInt32(l.AuxInt)
+               sym := auxToSym(l.Aux)
+               mem := l.Args[1]
+               ptr := l.Args[0]
+               mask := v_2
+               if !(canMergeLoad(v, l) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64VPCMPUQMasked128load)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+               v.Aux = symToAux(sym)
+               v.AddArg4(x, ptr, mask, mem)
+               return true
        }
        return false
 }
@@ -53128,27 +53074,24 @@ func rewriteValueAMD64_OpAMD64VPCMPUQMasked256(v *Value) bool {
        // result: (VPCMPUQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
        for {
                c := auxIntToUint8(v.AuxInt)
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       l := v_1
-                       if l.Op != OpAMD64VMOVDQUload256 {
-                               continue
-                       }
-                       off := auxIntToInt32(l.AuxInt)
-                       sym := auxToSym(l.Aux)
-                       mem := l.Args[1]
-                       ptr := l.Args[0]
-                       mask := v_2
-                       if !(canMergeLoad(v, l) && clobber(l)) {
-                               continue
-                       }
-                       v.reset(OpAMD64VPCMPUQMasked256load)
-                       v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
-                       v.Aux = symToAux(sym)
-                       v.AddArg4(x, ptr, mask, mem)
-                       return true
+               x := v_0
+               l := v_1
+               if l.Op != OpAMD64VMOVDQUload256 {
+                       break
                }
-               break
+               off := auxIntToInt32(l.AuxInt)
+               sym := auxToSym(l.Aux)
+               mem := l.Args[1]
+               ptr := l.Args[0]
+               mask := v_2
+               if !(canMergeLoad(v, l) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64VPCMPUQMasked256load)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+               v.Aux = symToAux(sym)
+               v.AddArg4(x, ptr, mask, mem)
+               return true
        }
        return false
 }
@@ -53161,27 +53104,24 @@ func rewriteValueAMD64_OpAMD64VPCMPUQMasked512(v *Value) bool {
        // result: (VPCMPUQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
        for {
                c := auxIntToUint8(v.AuxInt)
-               for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-                       x := v_0
-                       l := v_1
-                       if l.Op != OpAMD64VMOVDQUload512 {
-                               continue
-                       }
-                       off := auxIntToInt32(l.AuxInt)
-                       sym := auxToSym(l.Aux)
-                       mem := l.Args[1]
-                       ptr := l.Args[0]
-                       mask := v_2
-                       if !(canMergeLoad(v, l) && clobber(l)) {
-                               continue
-                       }
-                       v.reset(OpAMD64VPCMPUQMasked512load)
-                       v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
-                       v.Aux = symToAux(sym)
-                       v.AddArg4(x, ptr, mask, mem)
-                       return true
+               x := v_0
+               l := v_1
+               if l.Op != OpAMD64VMOVDQUload512 {
+                       break
                }
-               break
+               off := auxIntToInt32(l.AuxInt)
+               sym := auxToSym(l.Aux)
+               mem := l.Args[1]
+               ptr := l.Args[0]
+               mask := v_2
+               if !(canMergeLoad(v, l) && clobber(l)) {
+                       break
+               }
+               v.reset(OpAMD64VPCMPUQMasked512load)
+               v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+               v.Aux = symToAux(sym)
+               v.AddArg4(x, ptr, mask, mem)
+               return true
        }
        return false
 }
index e8cf792d425471fe31a3e68e0afdae66be047038..3d99dd2a81a1a4344c46061af82e45532edbbb11 100644 (file)
@@ -98,6 +98,10 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
                        mOpOrder = append(mOpOrder, asm)
                        continue
                }
+               if !op.Commutative && other.Commutative { // if there's a non-commutative version of the op, it wins.
+                       best[asm] = op
+                       continue
+               }
                // see if "op" is better than "other"
                if countOverrides(op.In)+countOverrides(op.Out) < countOverrides(other.In)+countOverrides(other.Out) {
                        best[asm] = op
index 424ba41466321a23b3a5d83add67caed5e70f9ba..50dcdd26184fb73248d8622e44e2225fbcf080f2 100644 (file)
@@ -666,6 +666,12 @@ func dedupGodef(ops []Operation) ([]Operation, error) {
                                if i.MemFeatures == nil && j.MemFeatures != nil {
                                        return 1
                                }
+                               if i.Commutative != j.Commutative {
+                                       if j.Commutative {
+                                               return -1
+                                       }
+                                       return 1
+                               }
                                // Their order does not matter anymore, at least for now.
                                return 0
                        })
index aa07ade27e693de66ba48b675cfb69b8f5459114..4b639d7a34724989a92722869d4d2f041a270833 100644 (file)
   constImm: 0
   commutative: true
   documentation: !string |-
-    // NAME compares for equality.
+    // NAME returns x equals y, elementwise.
 - go: Less
   constImm: 1
   commutative: false
   documentation: !string |-
-    // NAME compares for less than.
+    // NAME returns x less-than y, elementwise.
 - go: LessEqual
   constImm: 2
   commutative: false
   documentation: !string |-
-    // NAME compares for less than or equal.
+    // NAME returns x less-than-or-equals y, elementwise.
 - go: IsNan # For float only.
   constImm: 3
   commutative: true
   constImm: 4
   commutative: true
   documentation: !string |-
-    // NAME compares for inequality.
+    // NAME returns x not-equals y, elementwise.
 - go: GreaterEqual
   constImm: 13
   commutative: false
   documentation: !string |-
-    // NAME compares for greater than or equal.
+    // NAME returns x greater-than-or-equals y, elementwise.
 - go: Greater
   constImm: 14
   commutative: false
   documentation: !string |-
-    // NAME compares for greater than.
+    // NAME returns x greater-than y, elementwise.
index 7280e873a0ae2f5a3e4866744d1c804620f604a9..82774e05adfdf6cc1df791267322a859f2eedb7e 100644 (file)
@@ -2022,152 +2022,152 @@ func (x Int8x64) DotProductQuadrupleSaturated(y Uint8x64) Int32x16
 
 /* Equal */
 
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
 //
 // Asm: VPCMPEQB, CPU Feature: AVX
 func (x Int8x16) Equal(y Int8x16) Mask8x16
 
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
 //
 // Asm: VPCMPEQB, CPU Feature: AVX2
 func (x Int8x32) Equal(y Int8x32) Mask8x32
 
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
 //
 // Asm: VPCMPEQB, CPU Feature: AVX512
 func (x Int8x64) Equal(y Int8x64) Mask8x64
 
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
 //
 // Asm: VPCMPEQW, CPU Feature: AVX
 func (x Int16x8) Equal(y Int16x8) Mask16x8
 
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
 //
 // Asm: VPCMPEQW, CPU Feature: AVX2
 func (x Int16x16) Equal(y Int16x16) Mask16x16
 
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
 //
 // Asm: VPCMPEQW, CPU Feature: AVX512
 func (x Int16x32) Equal(y Int16x32) Mask16x32
 
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
 //
 // Asm: VPCMPEQD, CPU Feature: AVX
 func (x Int32x4) Equal(y Int32x4) Mask32x4
 
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
 //
 // Asm: VPCMPEQD, CPU Feature: AVX2
 func (x Int32x8) Equal(y Int32x8) Mask32x8
 
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
 //
 // Asm: VPCMPEQD, CPU Feature: AVX512
 func (x Int32x16) Equal(y Int32x16) Mask32x16
 
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
 //
 // Asm: VPCMPEQQ, CPU Feature: AVX
 func (x Int64x2) Equal(y Int64x2) Mask64x2
 
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
 //
 // Asm: VPCMPEQQ, CPU Feature: AVX2
 func (x Int64x4) Equal(y Int64x4) Mask64x4
 
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
 //
 // Asm: VPCMPEQQ, CPU Feature: AVX512
 func (x Int64x8) Equal(y Int64x8) Mask64x8
 
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
 //
 // Asm: VPCMPEQB, CPU Feature: AVX
 func (x Uint8x16) Equal(y Uint8x16) Mask8x16
 
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
 //
 // Asm: VPCMPEQB, CPU Feature: AVX2
 func (x Uint8x32) Equal(y Uint8x32) Mask8x32
 
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
 //
 // Asm: VPCMPEQB, CPU Feature: AVX512
 func (x Uint8x64) Equal(y Uint8x64) Mask8x64
 
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
 //
 // Asm: VPCMPEQW, CPU Feature: AVX
 func (x Uint16x8) Equal(y Uint16x8) Mask16x8
 
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
 //
 // Asm: VPCMPEQW, CPU Feature: AVX2
 func (x Uint16x16) Equal(y Uint16x16) Mask16x16
 
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
 //
 // Asm: VPCMPEQW, CPU Feature: AVX512
 func (x Uint16x32) Equal(y Uint16x32) Mask16x32
 
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
 //
 // Asm: VPCMPEQD, CPU Feature: AVX
 func (x Uint32x4) Equal(y Uint32x4) Mask32x4
 
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
 //
 // Asm: VPCMPEQD, CPU Feature: AVX2
 func (x Uint32x8) Equal(y Uint32x8) Mask32x8
 
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
 //
 // Asm: VPCMPEQD, CPU Feature: AVX512
 func (x Uint32x16) Equal(y Uint32x16) Mask32x16
 
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
 //
 // Asm: VPCMPEQQ, CPU Feature: AVX
 func (x Uint64x2) Equal(y Uint64x2) Mask64x2
 
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
 //
 // Asm: VPCMPEQQ, CPU Feature: AVX2
 func (x Uint64x4) Equal(y Uint64x4) Mask64x4
 
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
 //
 // Asm: VPCMPEQQ, CPU Feature: AVX512
 func (x Uint64x8) Equal(y Uint64x8) Mask64x8
 
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
 //
 // Asm: VCMPPS, CPU Feature: AVX
 func (x Float32x4) Equal(y Float32x4) Mask32x4
 
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
 //
 // Asm: VCMPPS, CPU Feature: AVX
 func (x Float32x8) Equal(y Float32x8) Mask32x8
 
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
 //
 // Asm: VCMPPS, CPU Feature: AVX512
 func (x Float32x16) Equal(y Float32x16) Mask32x16
 
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
 //
 // Asm: VCMPPD, CPU Feature: AVX
 func (x Float64x2) Equal(y Float64x2) Mask64x2
 
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
 //
 // Asm: VCMPPD, CPU Feature: AVX
 func (x Float64x4) Equal(y Float64x4) Mask64x4
 
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
 //
 // Asm: VCMPPD, CPU Feature: AVX512
 func (x Float64x8) Equal(y Float64x8) Mask64x8
@@ -3081,184 +3081,184 @@ func (x Uint64x8) GetLo() Uint64x4
 
 /* Greater */
 
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
 //
 // Asm: VPCMPGTB, CPU Feature: AVX
 func (x Int8x16) Greater(y Int8x16) Mask8x16
 
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
 //
 // Asm: VPCMPGTB, CPU Feature: AVX2
 func (x Int8x32) Greater(y Int8x32) Mask8x32
 
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
 //
 // Asm: VPCMPGTB, CPU Feature: AVX512
 func (x Int8x64) Greater(y Int8x64) Mask8x64
 
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
 //
 // Asm: VPCMPGTW, CPU Feature: AVX
 func (x Int16x8) Greater(y Int16x8) Mask16x8
 
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
 //
 // Asm: VPCMPGTW, CPU Feature: AVX2
 func (x Int16x16) Greater(y Int16x16) Mask16x16
 
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
 //
 // Asm: VPCMPGTW, CPU Feature: AVX512
 func (x Int16x32) Greater(y Int16x32) Mask16x32
 
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
 //
 // Asm: VPCMPGTD, CPU Feature: AVX
 func (x Int32x4) Greater(y Int32x4) Mask32x4
 
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
 //
 // Asm: VPCMPGTD, CPU Feature: AVX2
 func (x Int32x8) Greater(y Int32x8) Mask32x8
 
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
 //
 // Asm: VPCMPGTD, CPU Feature: AVX512
 func (x Int32x16) Greater(y Int32x16) Mask32x16
 
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
 //
 // Asm: VPCMPGTQ, CPU Feature: AVX
 func (x Int64x2) Greater(y Int64x2) Mask64x2
 
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
 //
 // Asm: VPCMPGTQ, CPU Feature: AVX2
 func (x Int64x4) Greater(y Int64x4) Mask64x4
 
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
 //
 // Asm: VPCMPGTQ, CPU Feature: AVX512
 func (x Int64x8) Greater(y Int64x8) Mask64x8
 
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
 //
 // Asm: VCMPPS, CPU Feature: AVX
 func (x Float32x4) Greater(y Float32x4) Mask32x4
 
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
 //
 // Asm: VCMPPS, CPU Feature: AVX
 func (x Float32x8) Greater(y Float32x8) Mask32x8
 
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
 //
 // Asm: VCMPPS, CPU Feature: AVX512
 func (x Float32x16) Greater(y Float32x16) Mask32x16
 
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
 //
 // Asm: VCMPPD, CPU Feature: AVX
 func (x Float64x2) Greater(y Float64x2) Mask64x2
 
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
 //
 // Asm: VCMPPD, CPU Feature: AVX
 func (x Float64x4) Greater(y Float64x4) Mask64x4
 
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
 //
 // Asm: VCMPPD, CPU Feature: AVX512
 func (x Float64x8) Greater(y Float64x8) Mask64x8
 
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
 //
 // Asm: VPCMPUB, CPU Feature: AVX512
 func (x Uint8x64) Greater(y Uint8x64) Mask8x64
 
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
 //
 // Asm: VPCMPUW, CPU Feature: AVX512
 func (x Uint16x32) Greater(y Uint16x32) Mask16x32
 
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
 //
 // Asm: VPCMPUD, CPU Feature: AVX512
 func (x Uint32x16) Greater(y Uint32x16) Mask32x16
 
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
 //
 // Asm: VPCMPUQ, CPU Feature: AVX512
 func (x Uint64x8) Greater(y Uint64x8) Mask64x8
 
 /* GreaterEqual */
 
-// GreaterEqual compares for greater than or equal.
+// GreaterEqual returns x greater-than-or-equals y, elementwise.
 //
 // Asm: VCMPPS, CPU Feature: AVX
 func (x Float32x4) GreaterEqual(y Float32x4) Mask32x4
 
-// GreaterEqual compares for greater than or equal.
+// GreaterEqual returns x greater-than-or-equals y, elementwise.
 //
 // Asm: VCMPPS, CPU Feature: AVX
 func (x Float32x8) GreaterEqual(y Float32x8) Mask32x8
 
-// GreaterEqual compares for greater than or equal.
+// GreaterEqual returns x greater-than-or-equals y, elementwise.
 //
 // Asm: VCMPPS, CPU Feature: AVX512
 func (x Float32x16) GreaterEqual(y Float32x16) Mask32x16
 
-// GreaterEqual compares for greater than or equal.
+// GreaterEqual returns x greater-than-or-equals y, elementwise.
 //
 // Asm: VCMPPD, CPU Feature: AVX
 func (x Float64x2) GreaterEqual(y Float64x2) Mask64x2
 
-// GreaterEqual compares for greater than or equal.
+// GreaterEqual returns x greater-than-or-equals y, elementwise.
 //
 // Asm: VCMPPD, CPU Feature: AVX
 func (x Float64x4) GreaterEqual(y Float64x4) Mask64x4
 
-// GreaterEqual compares for greater than or equal.
+// GreaterEqual returns x greater-than-or-equals y, elementwise.
 //
 // Asm: VCMPPD, CPU Feature: AVX512
 func (x Float64x8) GreaterEqual(y Float64x8) Mask64x8
 
-// GreaterEqual compares for greater than or equal.
+// GreaterEqual returns x greater-than-or-equals y, elementwise.
 //
 // Asm: VPCMPB, CPU Feature: AVX512
 func (x Int8x64) GreaterEqual(y Int8x64) Mask8x64
 
-// GreaterEqual compares for greater than or equal.
+// GreaterEqual returns x greater-than-or-equals y, elementwise.
 //
 // Asm: VPCMPW, CPU Feature: AVX512
 func (x Int16x32) GreaterEqual(y Int16x32) Mask16x32
 
-// GreaterEqual compares for greater than or equal.
+// GreaterEqual returns x greater-than-or-equals y, elementwise.
 //
 // Asm: VPCMPD, CPU Feature: AVX512
 func (x Int32x16) GreaterEqual(y Int32x16) Mask32x16
 
-// GreaterEqual compares for greater than or equal.
+// GreaterEqual returns x greater-than-or-equals y, elementwise.
 //
 // Asm: VPCMPQ, CPU Feature: AVX512
 func (x Int64x8) GreaterEqual(y Int64x8) Mask64x8
 
-// GreaterEqual compares for greater than or equal.
+// GreaterEqual returns x greater-than-or-equals y, elementwise.
 //
 // Asm: VPCMPUB, CPU Feature: AVX512
 func (x Uint8x64) GreaterEqual(y Uint8x64) Mask8x64
 
-// GreaterEqual compares for greater than or equal.
+// GreaterEqual returns x greater-than-or-equals y, elementwise.
 //
 // Asm: VPCMPUW, CPU Feature: AVX512
 func (x Uint16x32) GreaterEqual(y Uint16x32) Mask16x32
 
-// GreaterEqual compares for greater than or equal.
+// GreaterEqual returns x greater-than-or-equals y, elementwise.
 //
 // Asm: VPCMPUD, CPU Feature: AVX512
 func (x Uint32x16) GreaterEqual(y Uint32x16) Mask32x16
 
-// GreaterEqual compares for greater than or equal.
+// GreaterEqual returns x greater-than-or-equals y, elementwise.
 //
 // Asm: VPCMPUQ, CPU Feature: AVX512
 func (x Uint64x8) GreaterEqual(y Uint64x8) Mask64x8
@@ -3547,144 +3547,144 @@ func (x Uint64x8) LeadingZeros() Uint64x8
 
 /* Less */
 
-// Less compares for less than.
+// Less returns x less-than y, elementwise.
 //
 // Asm: VCMPPS, CPU Feature: AVX
 func (x Float32x4) Less(y Float32x4) Mask32x4
 
-// Less compares for less than.
+// Less returns x less-than y, elementwise.
 //
 // Asm: VCMPPS, CPU Feature: AVX
 func (x Float32x8) Less(y Float32x8) Mask32x8
 
-// Less compares for less than.
+// Less returns x less-than y, elementwise.
 //
 // Asm: VCMPPS, CPU Feature: AVX512
 func (x Float32x16) Less(y Float32x16) Mask32x16
 
-// Less compares for less than.
+// Less returns x less-than y, elementwise.
 //
 // Asm: VCMPPD, CPU Feature: AVX
 func (x Float64x2) Less(y Float64x2) Mask64x2
 
-// Less compares for less than.
+// Less returns x less-than y, elementwise.
 //
 // Asm: VCMPPD, CPU Feature: AVX
 func (x Float64x4) Less(y Float64x4) Mask64x4
 
-// Less compares for less than.
+// Less returns x less-than y, elementwise.
 //
 // Asm: VCMPPD, CPU Feature: AVX512
 func (x Float64x8) Less(y Float64x8) Mask64x8
 
-// Less compares for less than.
+// Less returns x less-than y, elementwise.
 //
 // Asm: VPCMPB, CPU Feature: AVX512
 func (x Int8x64) Less(y Int8x64) Mask8x64
 
-// Less compares for less than.
+// Less returns x less-than y, elementwise.
 //
 // Asm: VPCMPW, CPU Feature: AVX512
 func (x Int16x32) Less(y Int16x32) Mask16x32
 
-// Less compares for less than.
+// Less returns x less-than y, elementwise.
 //
 // Asm: VPCMPD, CPU Feature: AVX512
 func (x Int32x16) Less(y Int32x16) Mask32x16
 
-// Less compares for less than.
+// Less returns x less-than y, elementwise.
 //
 // Asm: VPCMPQ, CPU Feature: AVX512
 func (x Int64x8) Less(y Int64x8) Mask64x8
 
-// Less compares for less than.
+// Less returns x less-than y, elementwise.
 //
 // Asm: VPCMPUB, CPU Feature: AVX512
 func (x Uint8x64) Less(y Uint8x64) Mask8x64
 
-// Less compares for less than.
+// Less returns x less-than y, elementwise.
 //
 // Asm: VPCMPUW, CPU Feature: AVX512
 func (x Uint16x32) Less(y Uint16x32) Mask16x32
 
-// Less compares for less than.
+// Less returns x less-than y, elementwise.
 //
 // Asm: VPCMPUD, CPU Feature: AVX512
 func (x Uint32x16) Less(y Uint32x16) Mask32x16
 
-// Less compares for less than.
+// Less returns x less-than y, elementwise.
 //
 // Asm: VPCMPUQ, CPU Feature: AVX512
 func (x Uint64x8) Less(y Uint64x8) Mask64x8
 
 /* LessEqual */
 
-// LessEqual compares for less than or equal.
+// LessEqual returns x less-than-or-equals y, elementwise.
 //
 // Asm: VCMPPS, CPU Feature: AVX
 func (x Float32x4) LessEqual(y Float32x4) Mask32x4
 
-// LessEqual compares for less than or equal.
+// LessEqual returns x less-than-or-equals y, elementwise.
 //
 // Asm: VCMPPS, CPU Feature: AVX
 func (x Float32x8) LessEqual(y Float32x8) Mask32x8
 
-// LessEqual compares for less than or equal.
+// LessEqual returns x less-than-or-equals y, elementwise.
 //
 // Asm: VCMPPS, CPU Feature: AVX512
 func (x Float32x16) LessEqual(y Float32x16) Mask32x16
 
-// LessEqual compares for less than or equal.
+// LessEqual returns x less-than-or-equals y, elementwise.
 //
 // Asm: VCMPPD, CPU Feature: AVX
 func (x Float64x2) LessEqual(y Float64x2) Mask64x2
 
-// LessEqual compares for less than or equal.
+// LessEqual returns x less-than-or-equals y, elementwise.
 //
 // Asm: VCMPPD, CPU Feature: AVX
 func (x Float64x4) LessEqual(y Float64x4) Mask64x4
 
-// LessEqual compares for less than or equal.
+// LessEqual returns x less-than-or-equals y, elementwise.
 //
 // Asm: VCMPPD, CPU Feature: AVX512
 func (x Float64x8) LessEqual(y Float64x8) Mask64x8
 
-// LessEqual compares for less than or equal.
+// LessEqual returns x less-than-or-equals y, elementwise.
 //
 // Asm: VPCMPB, CPU Feature: AVX512
 func (x Int8x64) LessEqual(y Int8x64) Mask8x64
 
-// LessEqual compares for less than or equal.
+// LessEqual returns x less-than-or-equals y, elementwise.
 //
 // Asm: VPCMPW, CPU Feature: AVX512
 func (x Int16x32) LessEqual(y Int16x32) Mask16x32
 
-// LessEqual compares for less than or equal.
+// LessEqual returns x less-than-or-equals y, elementwise.
 //
 // Asm: VPCMPD, CPU Feature: AVX512
 func (x Int32x16) LessEqual(y Int32x16) Mask32x16
 
-// LessEqual compares for less than or equal.
+// LessEqual returns x less-than-or-equals y, elementwise.
 //
 // Asm: VPCMPQ, CPU Feature: AVX512
 func (x Int64x8) LessEqual(y Int64x8) Mask64x8
 
-// LessEqual compares for less than or equal.
+// LessEqual returns x less-than-or-equals y, elementwise.
 //
 // Asm: VPCMPUB, CPU Feature: AVX512
 func (x Uint8x64) LessEqual(y Uint8x64) Mask8x64
 
-// LessEqual compares for less than or equal.
+// LessEqual returns x less-than-or-equals y, elementwise.
 //
 // Asm: VPCMPUW, CPU Feature: AVX512
 func (x Uint16x32) LessEqual(y Uint16x32) Mask16x32
 
-// LessEqual compares for less than or equal.
+// LessEqual returns x less-than-or-equals y, elementwise.
 //
 // Asm: VPCMPUD, CPU Feature: AVX512
 func (x Uint32x16) LessEqual(y Uint32x16) Mask32x16
 
-// LessEqual compares for less than or equal.
+// LessEqual returns x less-than-or-equals y, elementwise.
 //
 // Asm: VPCMPUQ, CPU Feature: AVX512
 func (x Uint64x8) LessEqual(y Uint64x8) Mask64x8
@@ -4271,72 +4271,72 @@ func (x Float64x8) MulSubAdd(y Float64x8, z Float64x8) Float64x8
 
 /* NotEqual */
 
-// NotEqual compares for inequality.
+// NotEqual returns x not-equals y, elementwise.
 //
 // Asm: VCMPPS, CPU Feature: AVX
 func (x Float32x4) NotEqual(y Float32x4) Mask32x4
 
-// NotEqual compares for inequality.
+// NotEqual returns x not-equals y, elementwise.
 //
 // Asm: VCMPPS, CPU Feature: AVX
 func (x Float32x8) NotEqual(y Float32x8) Mask32x8
 
-// NotEqual compares for inequality.
+// NotEqual returns x not-equals y, elementwise.
 //
 // Asm: VCMPPS, CPU Feature: AVX512
 func (x Float32x16) NotEqual(y Float32x16) Mask32x16
 
-// NotEqual compares for inequality.
+// NotEqual returns x not-equals y, elementwise.
 //
 // Asm: VCMPPD, CPU Feature: AVX
 func (x Float64x2) NotEqual(y Float64x2) Mask64x2
 
-// NotEqual compares for inequality.
+// NotEqual returns x not-equals y, elementwise.
 //
 // Asm: VCMPPD, CPU Feature: AVX
 func (x Float64x4) NotEqual(y Float64x4) Mask64x4
 
-// NotEqual compares for inequality.
+// NotEqual returns x not-equals y, elementwise.
 //
 // Asm: VCMPPD, CPU Feature: AVX512
 func (x Float64x8) NotEqual(y Float64x8) Mask64x8
 
-// NotEqual compares for inequality.
+// NotEqual returns x not-equals y, elementwise.
 //
 // Asm: VPCMPB, CPU Feature: AVX512
 func (x Int8x64) NotEqual(y Int8x64) Mask8x64
 
-// NotEqual compares for inequality.
+// NotEqual returns x not-equals y, elementwise.
 //
 // Asm: VPCMPW, CPU Feature: AVX512
 func (x Int16x32) NotEqual(y Int16x32) Mask16x32
 
-// NotEqual compares for inequality.
+// NotEqual returns x not-equals y, elementwise.
 //
 // Asm: VPCMPD, CPU Feature: AVX512
 func (x Int32x16) NotEqual(y Int32x16) Mask32x16
 
-// NotEqual compares for inequality.
+// NotEqual returns x not-equals y, elementwise.
 //
 // Asm: VPCMPQ, CPU Feature: AVX512
 func (x Int64x8) NotEqual(y Int64x8) Mask64x8
 
-// NotEqual compares for inequality.
+// NotEqual returns x not-equals y, elementwise.
 //
 // Asm: VPCMPUB, CPU Feature: AVX512
 func (x Uint8x64) NotEqual(y Uint8x64) Mask8x64
 
-// NotEqual compares for inequality.
+// NotEqual returns x not-equals y, elementwise.
 //
 // Asm: VPCMPUW, CPU Feature: AVX512
 func (x Uint16x32) NotEqual(y Uint16x32) Mask16x32
 
-// NotEqual compares for inequality.
+// NotEqual returns x not-equals y, elementwise.
 //
 // Asm: VPCMPUD, CPU Feature: AVX512
 func (x Uint32x16) NotEqual(y Uint32x16) Mask32x16
 
-// NotEqual compares for inequality.
+// NotEqual returns x not-equals y, elementwise.
 //
 // Asm: VPCMPUQ, CPU Feature: AVX512
 func (x Uint64x8) NotEqual(y Uint64x8) Mask64x8