From 4eb5c6e07b56b75033d98941c8fadd3304ee4965 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Wed, 17 Sep 2025 14:44:49 +0000 Subject: [PATCH] [dev.simd] cmd/compile, simd/_gen: add rewrite for const load ops This CL adds rewrite rules for ops with const imm8 that takes a load to its memory form. Change-Id: I74d0df48715ab48b88b04c8e1bfb3c6b8e528aeb Reviewed-on: https://go-review.googlesource.com/c/go/+/704635 TryBot-Bypass: Junyang Shao Reviewed-by: David Chase --- .../compile/internal/ssa/_gen/simdAMD64.rules | 138 + src/cmd/compile/internal/ssa/rewriteAMD64.go | 6742 +++++++++++++---- src/simd/_gen/simdgen/gen_simdrules.go | 18 +- 3 files changed, 5627 insertions(+), 1271 deletions(-) diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules index 65f47eb369..b6a7394a73 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules @@ -1527,6 +1527,30 @@ (VPANDNQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDNQMasked128load {sym} [off] x ptr mask mem) (VPANDNQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDNQMasked256load {sym} [off] x ptr mask mem) (VPANDNQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDNQMasked512load {sym} [off] x ptr mask mem) +(VRNDSCALEPS128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPS128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VRNDSCALEPS256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPS256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VRNDSCALEPS512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPS512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VRNDSCALEPD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPD128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VRNDSCALEPD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPD256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VRNDSCALEPD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPD512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VRNDSCALEPSMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPSMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VRNDSCALEPSMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPSMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VRNDSCALEPSMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPSMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VRNDSCALEPDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VRNDSCALEPDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VRNDSCALEPDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRNDSCALEPDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VREDUCEPS128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPS128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VREDUCEPS256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPS256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VREDUCEPS512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPS512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VREDUCEPD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPD128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VREDUCEPD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPD256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VREDUCEPD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPD512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VREDUCEPSMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPSMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VREDUCEPSMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPSMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VREDUCEPSMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPSMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VREDUCEPDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VREDUCEPDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VREDUCEPDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VREDUCEPDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) (VPACKSSDW512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDW512load {sym} [off] x ptr mem) (VPACKSSDWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked128load {sym} [off] x ptr mask mem) (VPACKSSDWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked256load {sym} [off] x ptr mask mem) @@ -1555,8 +1579,44 @@ (VDIVPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPDMasked512load {sym} [off] x ptr mask mem) (VPCMPEQD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQD512load {sym} [off] x ptr mem) (VPCMPEQQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQQ512load {sym} [off] x ptr mem) +(VCMPPS512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCMPPS512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) +(VCMPPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCMPPD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) +(VCMPPSMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPSMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VCMPPSMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPSMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VCMPPSMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPSMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VCMPPDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VCMPPDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VCMPPDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCMPPDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VPCMPDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VPCMPDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VPCMPDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VPCMPQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VPCMPQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VPCMPQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VPCMPUDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VPCMPUDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VPCMPUDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VPCMPUQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VPCMPUQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VPCMPUQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPCMPUQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VGF2P8AFFINEQB128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQB128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) +(VGF2P8AFFINEQB256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQB256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) +(VGF2P8AFFINEQB512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQB512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) +(VGF2P8AFFINEINVQB128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEINVQB128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) +(VGF2P8AFFINEINVQB256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEINVQB256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) +(VGF2P8AFFINEINVQB512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEINVQB512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) +(VGF2P8AFFINEINVQBMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEINVQBMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VGF2P8AFFINEINVQBMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEINVQBMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VGF2P8AFFINEINVQBMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEINVQBMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VGF2P8AFFINEQBMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQBMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VGF2P8AFFINEQBMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQBMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VGF2P8AFFINEQBMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VGF2P8AFFINEQBMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) (VPCMPGTD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPGTD512load {sym} [off] x ptr mem) (VPCMPGTQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPGTQ512load {sym} [off] x ptr mem) +(VPCMPUD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPUD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) +(VPCMPUQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPUQ512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) +(VPCMPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) +(VPCMPQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPQ512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) (VPUNPCKHDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKHDQ512load {sym} [off] x ptr mem) (VPUNPCKHQDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKHQDQ512load {sym} [off] x ptr mem) (VPUNPCKLDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLDQ512load {sym} [off] x ptr mem) @@ -1733,6 +1793,10 @@ (VPERMI2QMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2QMasked256load {sym} [off] x y ptr mask mem) (VPERMI2PDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PDMasked512load {sym} [off] x y ptr mask mem) (VPERMI2QMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2QMasked512load {sym} [off] x y ptr mask mem) +(VPSHUFD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHUFD512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VPSHUFDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHUFDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VPSHUFDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHUFDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VPSHUFDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHUFDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) (VPERMPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMPSMasked256load {sym} [off] x ptr mask mem) (VPERMDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMDMasked256load {sym} [off] x ptr mask mem) (VPERMPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMPSMasked512load {sym} [off] x ptr mask mem) @@ -1761,6 +1825,30 @@ (VRSQRT14PDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRSQRT14PDMasked128load {sym} [off] ptr mask mem) (VRSQRT14PDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRSQRT14PDMasked256load {sym} [off] ptr mask mem) (VRSQRT14PDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRSQRT14PDMasked512load {sym} [off] ptr mask mem) +(VPROLD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLD128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VPROLD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLD256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VPROLD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLD512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VPROLQ128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLQ128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VPROLQ256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLQ256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VPROLQ512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLQ512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VPROLDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VPROLDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VPROLDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VPROLQMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VPROLQMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VPROLQMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VPRORD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORD128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VPRORD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORD256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VPRORD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORD512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VPRORQ128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORQ128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VPRORQ256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORQ256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VPRORQ512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORQ512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VPRORDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VPRORDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VPRORDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VPRORQMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VPRORQMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VPRORQMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) (VPROLVD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLVD128load {sym} [off] x ptr mem) (VPROLVD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLVD256load {sym} [off] x ptr mem) (VPROLVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLVD512load {sym} [off] x ptr mem) @@ -1797,6 +1885,30 @@ (VSCALEFPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPDMasked128load {sym} [off] x ptr mask mem) (VSCALEFPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPDMasked256load {sym} [off] x ptr mask mem) (VSCALEFPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPDMasked512load {sym} [off] x ptr mask mem) +(VPSHLDD128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDD128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) +(VPSHLDD256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDD256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) +(VPSHLDD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) +(VPSHLDQ128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQ128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) +(VPSHLDQ256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQ256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) +(VPSHLDQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQ512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) +(VPSHLDDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VPSHLDDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VPSHLDDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VPSHLDQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VPSHLDQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VPSHLDQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VPSHRDD128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDD128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) +(VPSHRDD256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDD256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) +(VPSHRDD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) +(VPSHRDQ128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQ128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) +(VPSHRDQ256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQ256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) +(VPSHRDQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQ512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) +(VPSHRDDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VPSHRDDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VPSHRDDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VPSHRDQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VPSHRDQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) +(VPSHRDQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) (VPSLLVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLVD512load {sym} [off] x ptr mem) (VPSLLVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLVQ512load {sym} [off] x ptr mem) (VPSHLDVD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVD128load {sym} [off] x y ptr mem) @@ -1881,3 +1993,29 @@ (VPXORQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPXORQMasked512load {sym} [off] x ptr mask mem) (VPBLENDMDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPBLENDMDMasked512load {sym} [off] x ptr mask mem) (VPBLENDMQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPBLENDMQMasked512load {sym} [off] x ptr mask mem) +(VPSLLD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLD512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VPSLLQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLQ512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VPSLLDMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLDMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VPSLLDMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLDMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VPSLLDMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLDMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VPSLLQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLQMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VPSLLQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLQMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VPSLLQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLQMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VPSRLD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRLD512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VPSRLQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRLQ512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VPSRAD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAD512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VPSRAQ128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAQ128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VPSRAQ256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAQ256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VPSRAQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAQ512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) +(VPSRLDMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLDMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VPSRLDMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLDMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VPSRLDMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLDMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VPSRLQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLQMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VPSRLQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLQMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VPSRLQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLQMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VPSRADMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRADMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VPSRADMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRADMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VPSRADMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRADMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VPSRAQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAQMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VPSRAQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAQMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) +(VPSRAQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAQMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 471fa0c201..c0f5b4086a 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -523,6 +523,22 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64VADDPSMasked256(v) case OpAMD64VADDPSMasked512: return rewriteValueAMD64_OpAMD64VADDPSMasked512(v) + case OpAMD64VCMPPD512: + return rewriteValueAMD64_OpAMD64VCMPPD512(v) + case OpAMD64VCMPPDMasked128: + return rewriteValueAMD64_OpAMD64VCMPPDMasked128(v) + case OpAMD64VCMPPDMasked256: + return rewriteValueAMD64_OpAMD64VCMPPDMasked256(v) + case OpAMD64VCMPPDMasked512: + return rewriteValueAMD64_OpAMD64VCMPPDMasked512(v) + case OpAMD64VCMPPS512: + return rewriteValueAMD64_OpAMD64VCMPPS512(v) + case OpAMD64VCMPPSMasked128: + return rewriteValueAMD64_OpAMD64VCMPPSMasked128(v) + case OpAMD64VCMPPSMasked256: + return rewriteValueAMD64_OpAMD64VCMPPSMasked256(v) + case OpAMD64VCMPPSMasked512: + return rewriteValueAMD64_OpAMD64VCMPPSMasked512(v) case OpAMD64VCVTPS2UDQ128: return rewriteValueAMD64_OpAMD64VCVTPS2UDQ128(v) case OpAMD64VCVTPS2UDQ256: @@ -631,6 +647,30 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64VFMSUBADD213PSMasked256(v) case OpAMD64VFMSUBADD213PSMasked512: return rewriteValueAMD64_OpAMD64VFMSUBADD213PSMasked512(v) + case OpAMD64VGF2P8AFFINEINVQB128: + return rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQB128(v) + case OpAMD64VGF2P8AFFINEINVQB256: + return rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQB256(v) + case OpAMD64VGF2P8AFFINEINVQB512: + return rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQB512(v) + case OpAMD64VGF2P8AFFINEINVQBMasked128: + return rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQBMasked128(v) + case OpAMD64VGF2P8AFFINEINVQBMasked256: + return rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQBMasked256(v) + case OpAMD64VGF2P8AFFINEINVQBMasked512: + return rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQBMasked512(v) + case OpAMD64VGF2P8AFFINEQB128: + return rewriteValueAMD64_OpAMD64VGF2P8AFFINEQB128(v) + case OpAMD64VGF2P8AFFINEQB256: + return rewriteValueAMD64_OpAMD64VGF2P8AFFINEQB256(v) + case OpAMD64VGF2P8AFFINEQB512: + return rewriteValueAMD64_OpAMD64VGF2P8AFFINEQB512(v) + case OpAMD64VGF2P8AFFINEQBMasked128: + return rewriteValueAMD64_OpAMD64VGF2P8AFFINEQBMasked128(v) + case OpAMD64VGF2P8AFFINEQBMasked256: + return rewriteValueAMD64_OpAMD64VGF2P8AFFINEQBMasked256(v) + case OpAMD64VGF2P8AFFINEQBMasked512: + return rewriteValueAMD64_OpAMD64VGF2P8AFFINEQBMasked512(v) case OpAMD64VMAXPD512: return rewriteValueAMD64_OpAMD64VMAXPD512(v) case OpAMD64VMAXPDMasked128: @@ -807,6 +847,14 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64VPBROADCASTW256(v) case OpAMD64VPBROADCASTW512: return rewriteValueAMD64_OpAMD64VPBROADCASTW512(v) + case OpAMD64VPCMPD512: + return rewriteValueAMD64_OpAMD64VPCMPD512(v) + case OpAMD64VPCMPDMasked128: + return rewriteValueAMD64_OpAMD64VPCMPDMasked128(v) + case OpAMD64VPCMPDMasked256: + return rewriteValueAMD64_OpAMD64VPCMPDMasked256(v) + case OpAMD64VPCMPDMasked512: + return rewriteValueAMD64_OpAMD64VPCMPDMasked512(v) case OpAMD64VPCMPEQD512: return rewriteValueAMD64_OpAMD64VPCMPEQD512(v) case OpAMD64VPCMPEQQ512: @@ -815,6 +863,30 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64VPCMPGTD512(v) case OpAMD64VPCMPGTQ512: return rewriteValueAMD64_OpAMD64VPCMPGTQ512(v) + case OpAMD64VPCMPQ512: + return rewriteValueAMD64_OpAMD64VPCMPQ512(v) + case OpAMD64VPCMPQMasked128: + return rewriteValueAMD64_OpAMD64VPCMPQMasked128(v) + case OpAMD64VPCMPQMasked256: + return rewriteValueAMD64_OpAMD64VPCMPQMasked256(v) + case OpAMD64VPCMPQMasked512: + return rewriteValueAMD64_OpAMD64VPCMPQMasked512(v) + case OpAMD64VPCMPUD512: + return rewriteValueAMD64_OpAMD64VPCMPUD512(v) + case OpAMD64VPCMPUDMasked128: + return rewriteValueAMD64_OpAMD64VPCMPUDMasked128(v) + case OpAMD64VPCMPUDMasked256: + return rewriteValueAMD64_OpAMD64VPCMPUDMasked256(v) + case OpAMD64VPCMPUDMasked512: + return rewriteValueAMD64_OpAMD64VPCMPUDMasked512(v) + case OpAMD64VPCMPUQ512: + return rewriteValueAMD64_OpAMD64VPCMPUQ512(v) + case OpAMD64VPCMPUQMasked128: + return rewriteValueAMD64_OpAMD64VPCMPUQMasked128(v) + case OpAMD64VPCMPUQMasked256: + return rewriteValueAMD64_OpAMD64VPCMPUQMasked256(v) + case OpAMD64VPCMPUQMasked512: + return rewriteValueAMD64_OpAMD64VPCMPUQMasked512(v) case OpAMD64VPDPBUSD512: return rewriteValueAMD64_OpAMD64VPDPBUSD512(v) case OpAMD64VPDPBUSDMasked128: @@ -1115,6 +1187,30 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64VPORQMasked256(v) case OpAMD64VPORQMasked512: return rewriteValueAMD64_OpAMD64VPORQMasked512(v) + case OpAMD64VPROLD128: + return rewriteValueAMD64_OpAMD64VPROLD128(v) + case OpAMD64VPROLD256: + return rewriteValueAMD64_OpAMD64VPROLD256(v) + case OpAMD64VPROLD512: + return rewriteValueAMD64_OpAMD64VPROLD512(v) + case OpAMD64VPROLDMasked128: + return rewriteValueAMD64_OpAMD64VPROLDMasked128(v) + case OpAMD64VPROLDMasked256: + return rewriteValueAMD64_OpAMD64VPROLDMasked256(v) + case OpAMD64VPROLDMasked512: + return rewriteValueAMD64_OpAMD64VPROLDMasked512(v) + case OpAMD64VPROLQ128: + return rewriteValueAMD64_OpAMD64VPROLQ128(v) + case OpAMD64VPROLQ256: + return rewriteValueAMD64_OpAMD64VPROLQ256(v) + case OpAMD64VPROLQ512: + return rewriteValueAMD64_OpAMD64VPROLQ512(v) + case OpAMD64VPROLQMasked128: + return rewriteValueAMD64_OpAMD64VPROLQMasked128(v) + case OpAMD64VPROLQMasked256: + return rewriteValueAMD64_OpAMD64VPROLQMasked256(v) + case OpAMD64VPROLQMasked512: + return rewriteValueAMD64_OpAMD64VPROLQMasked512(v) case OpAMD64VPROLVD128: return rewriteValueAMD64_OpAMD64VPROLVD128(v) case OpAMD64VPROLVD256: @@ -1139,6 +1235,30 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64VPROLVQMasked256(v) case OpAMD64VPROLVQMasked512: return rewriteValueAMD64_OpAMD64VPROLVQMasked512(v) + case OpAMD64VPRORD128: + return rewriteValueAMD64_OpAMD64VPRORD128(v) + case OpAMD64VPRORD256: + return rewriteValueAMD64_OpAMD64VPRORD256(v) + case OpAMD64VPRORD512: + return rewriteValueAMD64_OpAMD64VPRORD512(v) + case OpAMD64VPRORDMasked128: + return rewriteValueAMD64_OpAMD64VPRORDMasked128(v) + case OpAMD64VPRORDMasked256: + return rewriteValueAMD64_OpAMD64VPRORDMasked256(v) + case OpAMD64VPRORDMasked512: + return rewriteValueAMD64_OpAMD64VPRORDMasked512(v) + case OpAMD64VPRORQ128: + return rewriteValueAMD64_OpAMD64VPRORQ128(v) + case OpAMD64VPRORQ256: + return rewriteValueAMD64_OpAMD64VPRORQ256(v) + case OpAMD64VPRORQ512: + return rewriteValueAMD64_OpAMD64VPRORQ512(v) + case OpAMD64VPRORQMasked128: + return rewriteValueAMD64_OpAMD64VPRORQMasked128(v) + case OpAMD64VPRORQMasked256: + return rewriteValueAMD64_OpAMD64VPRORQMasked256(v) + case OpAMD64VPRORQMasked512: + return rewriteValueAMD64_OpAMD64VPRORQMasked512(v) case OpAMD64VPRORVD128: return rewriteValueAMD64_OpAMD64VPRORVD128(v) case OpAMD64VPRORVD256: @@ -1163,6 +1283,30 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64VPRORVQMasked256(v) case OpAMD64VPRORVQMasked512: return rewriteValueAMD64_OpAMD64VPRORVQMasked512(v) + case OpAMD64VPSHLDD128: + return rewriteValueAMD64_OpAMD64VPSHLDD128(v) + case OpAMD64VPSHLDD256: + return rewriteValueAMD64_OpAMD64VPSHLDD256(v) + case OpAMD64VPSHLDD512: + return rewriteValueAMD64_OpAMD64VPSHLDD512(v) + case OpAMD64VPSHLDDMasked128: + return rewriteValueAMD64_OpAMD64VPSHLDDMasked128(v) + case OpAMD64VPSHLDDMasked256: + return rewriteValueAMD64_OpAMD64VPSHLDDMasked256(v) + case OpAMD64VPSHLDDMasked512: + return rewriteValueAMD64_OpAMD64VPSHLDDMasked512(v) + case OpAMD64VPSHLDQ128: + return rewriteValueAMD64_OpAMD64VPSHLDQ128(v) + case OpAMD64VPSHLDQ256: + return rewriteValueAMD64_OpAMD64VPSHLDQ256(v) + case OpAMD64VPSHLDQ512: + return rewriteValueAMD64_OpAMD64VPSHLDQ512(v) + case OpAMD64VPSHLDQMasked128: + return rewriteValueAMD64_OpAMD64VPSHLDQMasked128(v) + case OpAMD64VPSHLDQMasked256: + return rewriteValueAMD64_OpAMD64VPSHLDQMasked256(v) + case OpAMD64VPSHLDQMasked512: + return rewriteValueAMD64_OpAMD64VPSHLDQMasked512(v) case OpAMD64VPSHLDVD128: return rewriteValueAMD64_OpAMD64VPSHLDVD128(v) case OpAMD64VPSHLDVD256: @@ -1187,6 +1331,30 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64VPSHLDVQMasked256(v) case OpAMD64VPSHLDVQMasked512: return rewriteValueAMD64_OpAMD64VPSHLDVQMasked512(v) + case OpAMD64VPSHRDD128: + return rewriteValueAMD64_OpAMD64VPSHRDD128(v) + case OpAMD64VPSHRDD256: + return rewriteValueAMD64_OpAMD64VPSHRDD256(v) + case OpAMD64VPSHRDD512: + return rewriteValueAMD64_OpAMD64VPSHRDD512(v) + case OpAMD64VPSHRDDMasked128: + return rewriteValueAMD64_OpAMD64VPSHRDDMasked128(v) + case OpAMD64VPSHRDDMasked256: + return rewriteValueAMD64_OpAMD64VPSHRDDMasked256(v) + case OpAMD64VPSHRDDMasked512: + return rewriteValueAMD64_OpAMD64VPSHRDDMasked512(v) + case OpAMD64VPSHRDQ128: + return rewriteValueAMD64_OpAMD64VPSHRDQ128(v) + case OpAMD64VPSHRDQ256: + return rewriteValueAMD64_OpAMD64VPSHRDQ256(v) + case OpAMD64VPSHRDQ512: + return rewriteValueAMD64_OpAMD64VPSHRDQ512(v) + case OpAMD64VPSHRDQMasked128: + return rewriteValueAMD64_OpAMD64VPSHRDQMasked128(v) + case OpAMD64VPSHRDQMasked256: + return rewriteValueAMD64_OpAMD64VPSHRDQMasked256(v) + case OpAMD64VPSHRDQMasked512: + return rewriteValueAMD64_OpAMD64VPSHRDQMasked512(v) case OpAMD64VPSHRDVD128: return rewriteValueAMD64_OpAMD64VPSHRDVD128(v) case OpAMD64VPSHRDVD256: @@ -1211,30 +1379,54 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64VPSHRDVQMasked256(v) case OpAMD64VPSHRDVQMasked512: return rewriteValueAMD64_OpAMD64VPSHRDVQMasked512(v) + case OpAMD64VPSHUFD512: + return rewriteValueAMD64_OpAMD64VPSHUFD512(v) + case OpAMD64VPSHUFDMasked128: + return rewriteValueAMD64_OpAMD64VPSHUFDMasked128(v) + case OpAMD64VPSHUFDMasked256: + return rewriteValueAMD64_OpAMD64VPSHUFDMasked256(v) + case OpAMD64VPSHUFDMasked512: + return rewriteValueAMD64_OpAMD64VPSHUFDMasked512(v) case OpAMD64VPSLLD128: return rewriteValueAMD64_OpAMD64VPSLLD128(v) case OpAMD64VPSLLD256: return rewriteValueAMD64_OpAMD64VPSLLD256(v) case OpAMD64VPSLLD512: return rewriteValueAMD64_OpAMD64VPSLLD512(v) + case OpAMD64VPSLLD512const: + return rewriteValueAMD64_OpAMD64VPSLLD512const(v) case OpAMD64VPSLLDMasked128: return rewriteValueAMD64_OpAMD64VPSLLDMasked128(v) + case OpAMD64VPSLLDMasked128const: + return rewriteValueAMD64_OpAMD64VPSLLDMasked128const(v) case OpAMD64VPSLLDMasked256: return rewriteValueAMD64_OpAMD64VPSLLDMasked256(v) + case OpAMD64VPSLLDMasked256const: + return rewriteValueAMD64_OpAMD64VPSLLDMasked256const(v) case OpAMD64VPSLLDMasked512: return rewriteValueAMD64_OpAMD64VPSLLDMasked512(v) + case OpAMD64VPSLLDMasked512const: + return rewriteValueAMD64_OpAMD64VPSLLDMasked512const(v) case OpAMD64VPSLLQ128: return rewriteValueAMD64_OpAMD64VPSLLQ128(v) case OpAMD64VPSLLQ256: return rewriteValueAMD64_OpAMD64VPSLLQ256(v) case OpAMD64VPSLLQ512: return rewriteValueAMD64_OpAMD64VPSLLQ512(v) + case OpAMD64VPSLLQ512const: + return rewriteValueAMD64_OpAMD64VPSLLQ512const(v) case OpAMD64VPSLLQMasked128: return rewriteValueAMD64_OpAMD64VPSLLQMasked128(v) + case OpAMD64VPSLLQMasked128const: + return rewriteValueAMD64_OpAMD64VPSLLQMasked128const(v) case OpAMD64VPSLLQMasked256: return rewriteValueAMD64_OpAMD64VPSLLQMasked256(v) + case OpAMD64VPSLLQMasked256const: + return rewriteValueAMD64_OpAMD64VPSLLQMasked256const(v) case OpAMD64VPSLLQMasked512: return rewriteValueAMD64_OpAMD64VPSLLQMasked512(v) + case OpAMD64VPSLLQMasked512const: + return rewriteValueAMD64_OpAMD64VPSLLQMasked512const(v) case OpAMD64VPSLLVD512: return rewriteValueAMD64_OpAMD64VPSLLVD512(v) case OpAMD64VPSLLVDMasked128: @@ -1269,24 +1461,44 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64VPSRAD256(v) case OpAMD64VPSRAD512: return rewriteValueAMD64_OpAMD64VPSRAD512(v) + case OpAMD64VPSRAD512const: + return rewriteValueAMD64_OpAMD64VPSRAD512const(v) case OpAMD64VPSRADMasked128: return rewriteValueAMD64_OpAMD64VPSRADMasked128(v) + case OpAMD64VPSRADMasked128const: + return rewriteValueAMD64_OpAMD64VPSRADMasked128const(v) case OpAMD64VPSRADMasked256: return rewriteValueAMD64_OpAMD64VPSRADMasked256(v) + case OpAMD64VPSRADMasked256const: + return rewriteValueAMD64_OpAMD64VPSRADMasked256const(v) case OpAMD64VPSRADMasked512: return rewriteValueAMD64_OpAMD64VPSRADMasked512(v) + case OpAMD64VPSRADMasked512const: + return rewriteValueAMD64_OpAMD64VPSRADMasked512const(v) case OpAMD64VPSRAQ128: return rewriteValueAMD64_OpAMD64VPSRAQ128(v) + case OpAMD64VPSRAQ128const: + return rewriteValueAMD64_OpAMD64VPSRAQ128const(v) case OpAMD64VPSRAQ256: return rewriteValueAMD64_OpAMD64VPSRAQ256(v) + case OpAMD64VPSRAQ256const: + return rewriteValueAMD64_OpAMD64VPSRAQ256const(v) case OpAMD64VPSRAQ512: return rewriteValueAMD64_OpAMD64VPSRAQ512(v) + case OpAMD64VPSRAQ512const: + return rewriteValueAMD64_OpAMD64VPSRAQ512const(v) case OpAMD64VPSRAQMasked128: return rewriteValueAMD64_OpAMD64VPSRAQMasked128(v) + case OpAMD64VPSRAQMasked128const: + return rewriteValueAMD64_OpAMD64VPSRAQMasked128const(v) case OpAMD64VPSRAQMasked256: return rewriteValueAMD64_OpAMD64VPSRAQMasked256(v) + case OpAMD64VPSRAQMasked256const: + return rewriteValueAMD64_OpAMD64VPSRAQMasked256const(v) case OpAMD64VPSRAQMasked512: return rewriteValueAMD64_OpAMD64VPSRAQMasked512(v) + case OpAMD64VPSRAQMasked512const: + return rewriteValueAMD64_OpAMD64VPSRAQMasked512const(v) case OpAMD64VPSRAVD512: return rewriteValueAMD64_OpAMD64VPSRAVD512(v) case OpAMD64VPSRAVDMasked128: @@ -1319,6 +1531,22 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64VPSRAWMasked256(v) case OpAMD64VPSRAWMasked512: return rewriteValueAMD64_OpAMD64VPSRAWMasked512(v) + case OpAMD64VPSRLD512const: + return rewriteValueAMD64_OpAMD64VPSRLD512const(v) + case OpAMD64VPSRLDMasked128const: + return rewriteValueAMD64_OpAMD64VPSRLDMasked128const(v) + case OpAMD64VPSRLDMasked256const: + return rewriteValueAMD64_OpAMD64VPSRLDMasked256const(v) + case OpAMD64VPSRLDMasked512const: + return rewriteValueAMD64_OpAMD64VPSRLDMasked512const(v) + case OpAMD64VPSRLQ512const: + return rewriteValueAMD64_OpAMD64VPSRLQ512const(v) + case OpAMD64VPSRLQMasked128const: + return rewriteValueAMD64_OpAMD64VPSRLQMasked128const(v) + case OpAMD64VPSRLQMasked256const: + return rewriteValueAMD64_OpAMD64VPSRLQMasked256const(v) + case OpAMD64VPSRLQMasked512const: + return rewriteValueAMD64_OpAMD64VPSRLQMasked512const(v) case OpAMD64VPSRLVD512: return rewriteValueAMD64_OpAMD64VPSRLVD512(v) case OpAMD64VPSRLVDMasked128: @@ -1395,6 +1623,54 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64VRCP14PSMasked256(v) case OpAMD64VRCP14PSMasked512: return rewriteValueAMD64_OpAMD64VRCP14PSMasked512(v) + case OpAMD64VREDUCEPD128: + return rewriteValueAMD64_OpAMD64VREDUCEPD128(v) + case OpAMD64VREDUCEPD256: + return rewriteValueAMD64_OpAMD64VREDUCEPD256(v) + case OpAMD64VREDUCEPD512: + return rewriteValueAMD64_OpAMD64VREDUCEPD512(v) + case OpAMD64VREDUCEPDMasked128: + return rewriteValueAMD64_OpAMD64VREDUCEPDMasked128(v) + case OpAMD64VREDUCEPDMasked256: + return rewriteValueAMD64_OpAMD64VREDUCEPDMasked256(v) + case OpAMD64VREDUCEPDMasked512: + return rewriteValueAMD64_OpAMD64VREDUCEPDMasked512(v) + case OpAMD64VREDUCEPS128: + return rewriteValueAMD64_OpAMD64VREDUCEPS128(v) + case OpAMD64VREDUCEPS256: + return rewriteValueAMD64_OpAMD64VREDUCEPS256(v) + case OpAMD64VREDUCEPS512: + return rewriteValueAMD64_OpAMD64VREDUCEPS512(v) + case OpAMD64VREDUCEPSMasked128: + return rewriteValueAMD64_OpAMD64VREDUCEPSMasked128(v) + case OpAMD64VREDUCEPSMasked256: + return rewriteValueAMD64_OpAMD64VREDUCEPSMasked256(v) + case OpAMD64VREDUCEPSMasked512: + return rewriteValueAMD64_OpAMD64VREDUCEPSMasked512(v) + case OpAMD64VRNDSCALEPD128: + return rewriteValueAMD64_OpAMD64VRNDSCALEPD128(v) + case OpAMD64VRNDSCALEPD256: + return rewriteValueAMD64_OpAMD64VRNDSCALEPD256(v) + case OpAMD64VRNDSCALEPD512: + return rewriteValueAMD64_OpAMD64VRNDSCALEPD512(v) + case OpAMD64VRNDSCALEPDMasked128: + return rewriteValueAMD64_OpAMD64VRNDSCALEPDMasked128(v) + case OpAMD64VRNDSCALEPDMasked256: + return rewriteValueAMD64_OpAMD64VRNDSCALEPDMasked256(v) + case OpAMD64VRNDSCALEPDMasked512: + return rewriteValueAMD64_OpAMD64VRNDSCALEPDMasked512(v) + case OpAMD64VRNDSCALEPS128: + return rewriteValueAMD64_OpAMD64VRNDSCALEPS128(v) + case OpAMD64VRNDSCALEPS256: + return rewriteValueAMD64_OpAMD64VRNDSCALEPS256(v) + case OpAMD64VRNDSCALEPS512: + return rewriteValueAMD64_OpAMD64VRNDSCALEPS512(v) + case OpAMD64VRNDSCALEPSMasked128: + return rewriteValueAMD64_OpAMD64VRNDSCALEPSMasked128(v) + case OpAMD64VRNDSCALEPSMasked256: + return rewriteValueAMD64_OpAMD64VRNDSCALEPSMasked256(v) + case OpAMD64VRNDSCALEPSMasked512: + return rewriteValueAMD64_OpAMD64VRNDSCALEPSMasked512(v) case OpAMD64VRSQRT14PD128: return rewriteValueAMD64_OpAMD64VRSQRT14PD128(v) case OpAMD64VRSQRT14PD256: @@ -27680,6 +27956,266 @@ func rewriteValueAMD64_OpAMD64VADDPSMasked512(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64VCMPPD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCMPPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCMPPD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + for { + c := auxIntToUint8(v.AuxInt) + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VCMPPD512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VCMPPDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCMPPDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCMPPDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VCMPPDMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VCMPPDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCMPPDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCMPPDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VCMPPDMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VCMPPDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCMPPDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCMPPDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VCMPPDMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VCMPPS512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCMPPS512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCMPPS512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + for { + c := auxIntToUint8(v.AuxInt) + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VCMPPS512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VCMPPSMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCMPPSMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCMPPSMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VCMPPSMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VCMPPSMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCMPPSMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCMPPSMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VCMPPSMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VCMPPSMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCMPPSMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCMPPSMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VCMPPSMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} func rewriteValueAMD64_OpAMD64VCVTPS2UDQ128(v *Value) bool { v_0 := v.Args[0] // match: (VCVTPS2UDQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) @@ -29250,6 +29786,354 @@ func rewriteValueAMD64_OpAMD64VFMSUBADD213PSMasked512(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQB128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VGF2P8AFFINEINVQB128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VGF2P8AFFINEINVQB128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + for { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VGF2P8AFFINEINVQB128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQB256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VGF2P8AFFINEINVQB256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VGF2P8AFFINEINVQB256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + for { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VGF2P8AFFINEINVQB256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQB512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VGF2P8AFFINEINVQB512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VGF2P8AFFINEINVQB512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + for { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VGF2P8AFFINEINVQB512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQBMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VGF2P8AFFINEINVQBMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VGF2P8AFFINEINVQBMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VGF2P8AFFINEINVQBMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQBMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VGF2P8AFFINEINVQBMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VGF2P8AFFINEINVQBMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VGF2P8AFFINEINVQBMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VGF2P8AFFINEINVQBMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VGF2P8AFFINEINVQBMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VGF2P8AFFINEINVQBMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VGF2P8AFFINEINVQBMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQB128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VGF2P8AFFINEQB128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VGF2P8AFFINEQB128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + for { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VGF2P8AFFINEQB128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQB256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VGF2P8AFFINEQB256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VGF2P8AFFINEQB256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + for { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VGF2P8AFFINEQB256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQB512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VGF2P8AFFINEQB512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VGF2P8AFFINEQB512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + for { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VGF2P8AFFINEQB512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQBMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VGF2P8AFFINEQBMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VGF2P8AFFINEQBMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VGF2P8AFFINEQBMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQBMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VGF2P8AFFINEQBMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VGF2P8AFFINEQBMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VGF2P8AFFINEQBMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VGF2P8AFFINEQBMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VGF2P8AFFINEQBMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VGF2P8AFFINEQBMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VGF2P8AFFINEQBMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} func rewriteValueAMD64_OpAMD64VMAXPD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -34394,6 +35278,133 @@ func rewriteValueAMD64_OpAMD64VPBROADCASTW512(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64VPCMPD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPCMPD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPCMPD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + for { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPCMPD512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPCMPDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPCMPDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPCMPDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPCMPDMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPCMPDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPCMPDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPCMPDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPCMPDMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPCMPDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPCMPDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPCMPDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPCMPDMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} func rewriteValueAMD64_OpAMD64VPCMPEQD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -34508,6 +35519,387 @@ func rewriteValueAMD64_OpAMD64VPCMPGTQ512(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64VPCMPQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPCMPQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPCMPQ512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + for { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPCMPQ512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPCMPQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPCMPQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPCMPQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPCMPQMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPCMPQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPCMPQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPCMPQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPCMPQMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPCMPQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPCMPQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPCMPQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPCMPQMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPCMPUD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPCMPUD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPCMPUD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + for { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPCMPUD512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPCMPUDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPCMPUDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPCMPUDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPCMPUDMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPCMPUDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPCMPUDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPCMPUDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPCMPUDMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPCMPUDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPCMPUDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPCMPUDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPCMPUDMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPCMPUQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPCMPUQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPCMPUQ512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) + for { + c := auxIntToUint8(v.AuxInt) + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPCMPUQ512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPCMPUQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPCMPUQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPCMPUQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPCMPUQMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPCMPUQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPCMPUQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPCMPUQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPCMPUQMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPCMPUQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPCMPUQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPCMPUQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPCMPUQMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} func rewriteValueAMD64_OpAMD64VPDPBUSD512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] @@ -38788,15 +40180,14 @@ func rewriteValueAMD64_OpAMD64VPORQMasked512(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64VPROLVD128(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VPROLD128(v *Value) bool { v_0 := v.Args[0] - // match: (VPROLVD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // match: (VPROLD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLVD128load {sym} [off] x ptr mem) + // result: (VPROLD128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) for { - x := v_0 - l := v_1 + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload128 { break } @@ -38807,23 +40198,22 @@ func rewriteValueAMD64_OpAMD64VPROLVD128(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPROLVD128load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VPROLD128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPROLVD256(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VPROLD256(v *Value) bool { v_0 := v.Args[0] - // match: (VPROLVD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // match: (VPROLD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLVD256load {sym} [off] x ptr mem) + // result: (VPROLD256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) for { - x := v_0 - l := v_1 + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload256 { break } @@ -38834,23 +40224,22 @@ func rewriteValueAMD64_OpAMD64VPROLVD256(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPROLVD256load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VPROLD256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPROLVD512(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VPROLD512(v *Value) bool { v_0 := v.Args[0] - // match: (VPROLVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VPROLD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLVD512load {sym} [off] x ptr mem) + // result: (VPROLD512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) for { - x := v_0 - l := v_1 + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -38861,24 +40250,23 @@ func rewriteValueAMD64_OpAMD64VPROLVD512(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPROLVD512load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VPROLD512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPROLVDMasked128(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPROLDMasked128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPROLVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // match: (VPROLDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLVDMasked128load {sym} [off] x ptr mask mem) + // result: (VPROLDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) for { - x := v_0 - l := v_1 + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload128 { break } @@ -38886,28 +40274,27 @@ func rewriteValueAMD64_OpAMD64VPROLVDMasked128(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 + mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPROLVDMasked128load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VPROLDMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPROLVDMasked256(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPROLDMasked256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPROLVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // match: (VPROLDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLVDMasked256load {sym} [off] x ptr mask mem) + // result: (VPROLDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) for { - x := v_0 - l := v_1 + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload256 { break } @@ -38915,28 +40302,27 @@ func rewriteValueAMD64_OpAMD64VPROLVDMasked256(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 + mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPROLVDMasked256load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VPROLDMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPROLVDMasked512(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPROLDMasked512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPROLVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // match: (VPROLDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLVDMasked512load {sym} [off] x ptr mask mem) + // result: (VPROLDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) for { - x := v_0 - l := v_1 + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -38944,27 +40330,26 @@ func rewriteValueAMD64_OpAMD64VPROLVDMasked512(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 + mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPROLVDMasked512load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VPROLDMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPROLVQ128(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VPROLQ128(v *Value) bool { v_0 := v.Args[0] - // match: (VPROLVQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // match: (VPROLQ128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLVQ128load {sym} [off] x ptr mem) + // result: (VPROLQ128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) for { - x := v_0 - l := v_1 + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload128 { break } @@ -38975,23 +40360,22 @@ func rewriteValueAMD64_OpAMD64VPROLVQ128(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPROLVQ128load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VPROLQ128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPROLVQ256(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VPROLQ256(v *Value) bool { v_0 := v.Args[0] - // match: (VPROLVQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // match: (VPROLQ256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLVQ256load {sym} [off] x ptr mem) + // result: (VPROLQ256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) for { - x := v_0 - l := v_1 + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload256 { break } @@ -39002,23 +40386,22 @@ func rewriteValueAMD64_OpAMD64VPROLVQ256(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPROLVQ256load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VPROLQ256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPROLVQ512(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VPROLQ512(v *Value) bool { v_0 := v.Args[0] - // match: (VPROLVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VPROLQ512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLVQ512load {sym} [off] x ptr mem) + // result: (VPROLQ512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) for { - x := v_0 - l := v_1 + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -39029,24 +40412,23 @@ func rewriteValueAMD64_OpAMD64VPROLVQ512(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPROLVQ512load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VPROLQ512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPROLVQMasked128(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPROLQMasked128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPROLVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // match: (VPROLQMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLVQMasked128load {sym} [off] x ptr mask mem) + // result: (VPROLQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) for { - x := v_0 - l := v_1 + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload128 { break } @@ -39054,28 +40436,27 @@ func rewriteValueAMD64_OpAMD64VPROLVQMasked128(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 + mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPROLVQMasked128load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VPROLQMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPROLVQMasked256(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPROLQMasked256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPROLVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // match: (VPROLQMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLVQMasked256load {sym} [off] x ptr mask mem) + // result: (VPROLQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) for { - x := v_0 - l := v_1 + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload256 { break } @@ -39083,28 +40464,27 @@ func rewriteValueAMD64_OpAMD64VPROLVQMasked256(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 + mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPROLVQMasked256load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VPROLQMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPROLVQMasked512(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPROLQMasked512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPROLVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // match: (VPROLQMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPROLVQMasked512load {sym} [off] x ptr mask mem) + // result: (VPROLQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) for { - x := v_0 - l := v_1 + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -39112,24 +40492,24 @@ func rewriteValueAMD64_OpAMD64VPROLVQMasked512(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 + mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPROLVQMasked512load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VPROLQMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPRORVD128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPROLVD128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPRORVD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // match: (VPROLVD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORVD128load {sym} [off] x ptr mem) + // result: (VPROLVD128load {sym} [off] x ptr mem) for { x := v_0 l := v_1 @@ -39143,7 +40523,7 @@ func rewriteValueAMD64_OpAMD64VPRORVD128(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPRORVD128load) + v.reset(OpAMD64VPROLVD128load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) @@ -39151,12 +40531,12 @@ func rewriteValueAMD64_OpAMD64VPRORVD128(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64VPRORVD256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPROLVD256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPRORVD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // match: (VPROLVD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORVD256load {sym} [off] x ptr mem) + // result: (VPROLVD256load {sym} [off] x ptr mem) for { x := v_0 l := v_1 @@ -39170,7 +40550,7 @@ func rewriteValueAMD64_OpAMD64VPRORVD256(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPRORVD256load) + v.reset(OpAMD64VPROLVD256load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) @@ -39178,12 +40558,12 @@ func rewriteValueAMD64_OpAMD64VPRORVD256(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64VPRORVD512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPROLVD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPRORVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VPROLVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORVD512load {sym} [off] x ptr mem) + // result: (VPROLVD512load {sym} [off] x ptr mem) for { x := v_0 l := v_1 @@ -39197,7 +40577,7 @@ func rewriteValueAMD64_OpAMD64VPRORVD512(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPRORVD512load) + v.reset(OpAMD64VPROLVD512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) @@ -39205,13 +40585,13 @@ func rewriteValueAMD64_OpAMD64VPRORVD512(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64VPRORVDMasked128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPROLVDMasked128(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPRORVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // match: (VPROLVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORVDMasked128load {sym} [off] x ptr mask mem) + // result: (VPROLVDMasked128load {sym} [off] x ptr mask mem) for { x := v_0 l := v_1 @@ -39226,7 +40606,7 @@ func rewriteValueAMD64_OpAMD64VPRORVDMasked128(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPRORVDMasked128load) + v.reset(OpAMD64VPROLVDMasked128load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) @@ -39234,13 +40614,13 @@ func rewriteValueAMD64_OpAMD64VPRORVDMasked128(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64VPRORVDMasked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPROLVDMasked256(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPRORVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // match: (VPROLVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORVDMasked256load {sym} [off] x ptr mask mem) + // result: (VPROLVDMasked256load {sym} [off] x ptr mask mem) for { x := v_0 l := v_1 @@ -39255,7 +40635,7 @@ func rewriteValueAMD64_OpAMD64VPRORVDMasked256(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPRORVDMasked256load) + v.reset(OpAMD64VPROLVDMasked256load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) @@ -39263,13 +40643,13 @@ func rewriteValueAMD64_OpAMD64VPRORVDMasked256(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64VPRORVDMasked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPROLVDMasked512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPRORVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // match: (VPROLVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORVDMasked512load {sym} [off] x ptr mask mem) + // result: (VPROLVDMasked512load {sym} [off] x ptr mask mem) for { x := v_0 l := v_1 @@ -39284,7 +40664,7 @@ func rewriteValueAMD64_OpAMD64VPRORVDMasked512(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPRORVDMasked512load) + v.reset(OpAMD64VPROLVDMasked512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) @@ -39292,12 +40672,12 @@ func rewriteValueAMD64_OpAMD64VPRORVDMasked512(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64VPRORVQ128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPROLVQ128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPRORVQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // match: (VPROLVQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORVQ128load {sym} [off] x ptr mem) + // result: (VPROLVQ128load {sym} [off] x ptr mem) for { x := v_0 l := v_1 @@ -39311,7 +40691,7 @@ func rewriteValueAMD64_OpAMD64VPRORVQ128(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPRORVQ128load) + v.reset(OpAMD64VPROLVQ128load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) @@ -39319,12 +40699,12 @@ func rewriteValueAMD64_OpAMD64VPRORVQ128(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64VPRORVQ256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPROLVQ256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPRORVQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // match: (VPROLVQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORVQ256load {sym} [off] x ptr mem) + // result: (VPROLVQ256load {sym} [off] x ptr mem) for { x := v_0 l := v_1 @@ -39338,7 +40718,7 @@ func rewriteValueAMD64_OpAMD64VPRORVQ256(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPRORVQ256load) + v.reset(OpAMD64VPROLVQ256load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) @@ -39346,12 +40726,12 @@ func rewriteValueAMD64_OpAMD64VPRORVQ256(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64VPRORVQ512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPROLVQ512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPRORVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VPROLVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORVQ512load {sym} [off] x ptr mem) + // result: (VPROLVQ512load {sym} [off] x ptr mem) for { x := v_0 l := v_1 @@ -39365,7 +40745,7 @@ func rewriteValueAMD64_OpAMD64VPRORVQ512(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPRORVQ512load) + v.reset(OpAMD64VPROLVQ512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) @@ -39373,13 +40753,13 @@ func rewriteValueAMD64_OpAMD64VPRORVQ512(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64VPRORVQMasked128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPROLVQMasked128(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPRORVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // match: (VPROLVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORVQMasked128load {sym} [off] x ptr mask mem) + // result: (VPROLVQMasked128load {sym} [off] x ptr mask mem) for { x := v_0 l := v_1 @@ -39394,7 +40774,7 @@ func rewriteValueAMD64_OpAMD64VPRORVQMasked128(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPRORVQMasked128load) + v.reset(OpAMD64VPROLVQMasked128load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) @@ -39402,13 +40782,13 @@ func rewriteValueAMD64_OpAMD64VPRORVQMasked128(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64VPRORVQMasked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPROLVQMasked256(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPRORVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // match: (VPROLVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORVQMasked256load {sym} [off] x ptr mask mem) + // result: (VPROLVQMasked256load {sym} [off] x ptr mask mem) for { x := v_0 l := v_1 @@ -39423,7 +40803,7 @@ func rewriteValueAMD64_OpAMD64VPRORVQMasked256(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPRORVQMasked256load) + v.reset(OpAMD64VPROLVQMasked256load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) @@ -39431,13 +40811,13 @@ func rewriteValueAMD64_OpAMD64VPRORVQMasked256(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64VPRORVQMasked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPROLVQMasked512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPRORVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // match: (VPROLVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPRORVQMasked512load {sym} [off] x ptr mask mem) + // result: (VPROLVQMasked512load {sym} [off] x ptr mask mem) for { x := v_0 l := v_1 @@ -39452,7 +40832,7 @@ func rewriteValueAMD64_OpAMD64VPRORVQMasked512(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPRORVQMasked512load) + v.reset(OpAMD64VPROLVQMasked512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) @@ -39460,17 +40840,14 @@ func rewriteValueAMD64_OpAMD64VPRORVQMasked512(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64VPSHLDVD128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VPRORD128(v *Value) bool { v_0 := v.Args[0] - // match: (VPSHLDVD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) + // match: (VPRORD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDVD128load {sym} [off] x y ptr mem) + // result: (VPRORD128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) for { - x := v_0 - y := v_1 - l := v_2 + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload128 { break } @@ -39481,25 +40858,22 @@ func rewriteValueAMD64_OpAMD64VPSHLDVD128(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSHLDVD128load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VPRORD128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSHLDVD256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VPRORD256(v *Value) bool { v_0 := v.Args[0] - // match: (VPSHLDVD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) + // match: (VPRORD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDVD256load {sym} [off] x y ptr mem) + // result: (VPRORD256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) for { - x := v_0 - y := v_1 - l := v_2 + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload256 { break } @@ -39510,25 +40884,22 @@ func rewriteValueAMD64_OpAMD64VPSHLDVD256(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSHLDVD256load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VPRORD256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSHLDVD512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VPRORD512(v *Value) bool { v_0 := v.Args[0] - // match: (VPSHLDVD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VPRORD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDVD512load {sym} [off] x y ptr mem) + // result: (VPRORD512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) for { - x := v_0 - y := v_1 - l := v_2 + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -39539,26 +40910,23 @@ func rewriteValueAMD64_OpAMD64VPSHLDVD512(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSHLDVD512load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VPRORD512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSHLDVDMasked128(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPRORDMasked128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSHLDVDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // match: (VPRORDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDVDMasked128load {sym} [off] x y ptr mask mem) + // result: (VPRORDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) for { - x := v_0 - y := v_1 - l := v_2 + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload128 { break } @@ -39566,30 +40934,27 @@ func rewriteValueAMD64_OpAMD64VPSHLDVDMasked128(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_3 + mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSHLDVDMasked128load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VPRORDMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSHLDVDMasked256(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPRORDMasked256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSHLDVDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // match: (VPRORDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDVDMasked256load {sym} [off] x y ptr mask mem) + // result: (VPRORDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) for { - x := v_0 - y := v_1 - l := v_2 + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload256 { break } @@ -39597,30 +40962,27 @@ func rewriteValueAMD64_OpAMD64VPSHLDVDMasked256(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_3 + mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSHLDVDMasked256load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VPRORDMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSHLDVDMasked512(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPRORDMasked512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSHLDVDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // match: (VPRORDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDVDMasked512load {sym} [off] x y ptr mask mem) + // result: (VPRORDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) for { - x := v_0 - y := v_1 - l := v_2 + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -39628,29 +40990,26 @@ func rewriteValueAMD64_OpAMD64VPSHLDVDMasked512(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_3 + mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSHLDVDMasked512load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VPRORDMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSHLDVQ128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VPRORQ128(v *Value) bool { v_0 := v.Args[0] - // match: (VPSHLDVQ128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) + // match: (VPRORQ128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDVQ128load {sym} [off] x y ptr mem) + // result: (VPRORQ128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) for { - x := v_0 - y := v_1 - l := v_2 + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload128 { break } @@ -39661,25 +41020,22 @@ func rewriteValueAMD64_OpAMD64VPSHLDVQ128(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSHLDVQ128load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VPRORQ128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSHLDVQ256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VPRORQ256(v *Value) bool { v_0 := v.Args[0] - // match: (VPSHLDVQ256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) + // match: (VPRORQ256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDVQ256load {sym} [off] x y ptr mem) + // result: (VPRORQ256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) for { - x := v_0 - y := v_1 - l := v_2 + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload256 { break } @@ -39690,25 +41046,22 @@ func rewriteValueAMD64_OpAMD64VPSHLDVQ256(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSHLDVQ256load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VPRORQ256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSHLDVQ512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VPRORQ512(v *Value) bool { v_0 := v.Args[0] - // match: (VPSHLDVQ512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VPRORQ512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDVQ512load {sym} [off] x y ptr mem) + // result: (VPRORQ512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) for { - x := v_0 - y := v_1 - l := v_2 + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -39719,26 +41072,23 @@ func rewriteValueAMD64_OpAMD64VPSHLDVQ512(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSHLDVQ512load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VPRORQ512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSHLDVQMasked128(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPRORQMasked128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSHLDVQMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // match: (VPRORQMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDVQMasked128load {sym} [off] x y ptr mask mem) + // result: (VPRORQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) for { - x := v_0 - y := v_1 - l := v_2 + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload128 { break } @@ -39746,30 +41096,27 @@ func rewriteValueAMD64_OpAMD64VPSHLDVQMasked128(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_3 + mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSHLDVQMasked128load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VPRORQMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSHLDVQMasked256(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPRORQMasked256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSHLDVQMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // match: (VPRORQMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDVQMasked256load {sym} [off] x y ptr mask mem) + // result: (VPRORQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) for { - x := v_0 - y := v_1 - l := v_2 + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload256 { break } @@ -39777,30 +41124,27 @@ func rewriteValueAMD64_OpAMD64VPSHLDVQMasked256(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_3 + mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSHLDVQMasked256load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VPRORQMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSHLDVQMasked512(v *Value) bool { - v_3 := v.Args[3] - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPRORQMasked512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSHLDVQMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // match: (VPRORQMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHLDVQMasked512load {sym} [off] x y ptr mask mem) + // result: (VPRORQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) for { - x := v_0 - y := v_1 - l := v_2 + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -39808,29 +41152,27 @@ func rewriteValueAMD64_OpAMD64VPSHLDVQMasked512(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_3 + mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSHLDVQMasked512load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VPRORQMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSHRDVD128(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPRORVD128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSHRDVD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) + // match: (VPRORVD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDVD128load {sym} [off] x y ptr mem) + // result: (VPRORVD128load {sym} [off] x ptr mem) for { x := v_0 - y := v_1 - l := v_2 + l := v_1 if l.Op != OpAMD64VMOVDQUload128 { break } @@ -39841,25 +41183,23 @@ func rewriteValueAMD64_OpAMD64VPSHRDVD128(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSHRDVD128load) + v.reset(OpAMD64VPRORVD128load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSHRDVD256(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPRORVD256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSHRDVD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) + // match: (VPRORVD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDVD256load {sym} [off] x y ptr mem) + // result: (VPRORVD256load {sym} [off] x ptr mem) for { x := v_0 - y := v_1 - l := v_2 + l := v_1 if l.Op != OpAMD64VMOVDQUload256 { break } @@ -39870,25 +41210,23 @@ func rewriteValueAMD64_OpAMD64VPSHRDVD256(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSHRDVD256load) + v.reset(OpAMD64VPRORVD256load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSHRDVD512(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPRORVD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSHRDVD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VPRORVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDVD512load {sym} [off] x y ptr mem) + // result: (VPRORVD512load {sym} [off] x ptr mem) for { x := v_0 - y := v_1 - l := v_2 + l := v_1 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -39899,26 +41237,24 @@ func rewriteValueAMD64_OpAMD64VPSHRDVD512(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSHRDVD512load) + v.reset(OpAMD64VPRORVD512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSHRDVDMasked128(v *Value) bool { - v_3 := v.Args[3] +func rewriteValueAMD64_OpAMD64VPRORVDMasked128(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSHRDVDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // match: (VPRORVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDVDMasked128load {sym} [off] x y ptr mask mem) + // result: (VPRORVDMasked128load {sym} [off] x ptr mask mem) for { x := v_0 - y := v_1 - l := v_2 + l := v_1 if l.Op != OpAMD64VMOVDQUload128 { break } @@ -39926,30 +41262,28 @@ func rewriteValueAMD64_OpAMD64VPSHRDVDMasked128(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_3 + mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSHRDVDMasked128load) + v.reset(OpAMD64VPRORVDMasked128load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSHRDVDMasked256(v *Value) bool { - v_3 := v.Args[3] +func rewriteValueAMD64_OpAMD64VPRORVDMasked256(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSHRDVDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // match: (VPRORVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDVDMasked256load {sym} [off] x y ptr mask mem) + // result: (VPRORVDMasked256load {sym} [off] x ptr mask mem) for { x := v_0 - y := v_1 - l := v_2 + l := v_1 if l.Op != OpAMD64VMOVDQUload256 { break } @@ -39957,30 +41291,28 @@ func rewriteValueAMD64_OpAMD64VPSHRDVDMasked256(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_3 + mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSHRDVDMasked256load) + v.reset(OpAMD64VPRORVDMasked256load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSHRDVDMasked512(v *Value) bool { - v_3 := v.Args[3] +func rewriteValueAMD64_OpAMD64VPRORVDMasked512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSHRDVDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // match: (VPRORVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDVDMasked512load {sym} [off] x y ptr mask mem) + // result: (VPRORVDMasked512load {sym} [off] x ptr mask mem) for { x := v_0 - y := v_1 - l := v_2 + l := v_1 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -39988,29 +41320,27 @@ func rewriteValueAMD64_OpAMD64VPSHRDVDMasked512(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_3 + mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSHRDVDMasked512load) + v.reset(OpAMD64VPRORVDMasked512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSHRDVQ128(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPRORVQ128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSHRDVQ128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) + // match: (VPRORVQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDVQ128load {sym} [off] x y ptr mem) + // result: (VPRORVQ128load {sym} [off] x ptr mem) for { x := v_0 - y := v_1 - l := v_2 + l := v_1 if l.Op != OpAMD64VMOVDQUload128 { break } @@ -40021,25 +41351,23 @@ func rewriteValueAMD64_OpAMD64VPSHRDVQ128(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSHRDVQ128load) + v.reset(OpAMD64VPRORVQ128load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSHRDVQ256(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPRORVQ256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSHRDVQ256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) + // match: (VPRORVQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDVQ256load {sym} [off] x y ptr mem) + // result: (VPRORVQ256load {sym} [off] x ptr mem) for { x := v_0 - y := v_1 - l := v_2 + l := v_1 if l.Op != OpAMD64VMOVDQUload256 { break } @@ -40050,25 +41378,23 @@ func rewriteValueAMD64_OpAMD64VPSHRDVQ256(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSHRDVQ256load) + v.reset(OpAMD64VPRORVQ256load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSHRDVQ512(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPRORVQ512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSHRDVQ512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VPRORVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDVQ512load {sym} [off] x y ptr mem) + // result: (VPRORVQ512load {sym} [off] x ptr mem) for { x := v_0 - y := v_1 - l := v_2 + l := v_1 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -40079,26 +41405,24 @@ func rewriteValueAMD64_OpAMD64VPSHRDVQ512(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSHRDVQ512load) + v.reset(OpAMD64VPRORVQ512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, y, ptr, mem) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSHRDVQMasked128(v *Value) bool { - v_3 := v.Args[3] +func rewriteValueAMD64_OpAMD64VPRORVQMasked128(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSHRDVQMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // match: (VPRORVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDVQMasked128load {sym} [off] x y ptr mask mem) + // result: (VPRORVQMasked128load {sym} [off] x ptr mask mem) for { x := v_0 - y := v_1 - l := v_2 + l := v_1 if l.Op != OpAMD64VMOVDQUload128 { break } @@ -40106,30 +41430,28 @@ func rewriteValueAMD64_OpAMD64VPSHRDVQMasked128(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_3 + mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSHRDVQMasked128load) + v.reset(OpAMD64VPRORVQMasked128load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSHRDVQMasked256(v *Value) bool { - v_3 := v.Args[3] +func rewriteValueAMD64_OpAMD64VPRORVQMasked256(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSHRDVQMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // match: (VPRORVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDVQMasked256load {sym} [off] x y ptr mask mem) + // result: (VPRORVQMasked256load {sym} [off] x ptr mask mem) for { x := v_0 - y := v_1 - l := v_2 + l := v_1 if l.Op != OpAMD64VMOVDQUload256 { break } @@ -40137,30 +41459,28 @@ func rewriteValueAMD64_OpAMD64VPSHRDVQMasked256(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_3 + mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSHRDVQMasked256load) + v.reset(OpAMD64VPRORVQMasked256load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSHRDVQMasked512(v *Value) bool { - v_3 := v.Args[3] +func rewriteValueAMD64_OpAMD64VPRORVQMasked512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSHRDVQMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // match: (VPRORVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSHRDVQMasked512load {sym} [off] x y ptr mask mem) + // result: (VPRORVQMasked512load {sym} [off] x ptr mask mem) for { x := v_0 - y := v_1 - l := v_2 + l := v_1 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -40168,253 +41488,345 @@ func rewriteValueAMD64_OpAMD64VPSHRDVQMasked512(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_3 + mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSHRDVQMasked512load) + v.reset(OpAMD64VPRORVQMasked512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg5(x, y, ptr, mask, mem) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLD128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDD128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLD128 x (MOVQconst [c])) - // result: (VPSLLD128const [uint8(c)] x) + // match: (VPSHLDD128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDD128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLD128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHLDD128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLD256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDD256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLD256 x (MOVQconst [c])) - // result: (VPSLLD256const [uint8(c)] x) + // match: (VPSHLDD256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDD256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLD256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHLDD256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLD512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLD512 x (MOVQconst [c])) - // result: (VPSLLD512const [uint8(c)] x) + // match: (VPSHLDD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLD512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHLDD512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLDMasked128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDDMasked128(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLDMasked128 x (MOVQconst [c]) mask) - // result: (VPSLLDMasked128const [uint8(c)] x mask) + // match: (VPSHLDDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - c := auxIntToInt64(v_1.AuxInt) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - v.reset(OpAMD64VPSLLDMasked128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHLDDMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLDMasked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDDMasked256(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLDMasked256 x (MOVQconst [c]) mask) - // result: (VPSLLDMasked256const [uint8(c)] x mask) + // match: (VPSHLDDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - c := auxIntToInt64(v_1.AuxInt) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - v.reset(OpAMD64VPSLLDMasked256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHLDDMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLDMasked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDDMasked512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLDMasked512 x (MOVQconst [c]) mask) - // result: (VPSLLDMasked512const [uint8(c)] x mask) + // match: (VPSHLDDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - c := auxIntToInt64(v_1.AuxInt) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - v.reset(OpAMD64VPSLLDMasked512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLQ128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLQ128 x (MOVQconst [c])) - // result: (VPSLLQ128const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + if !(canMergeLoad(v, l) && clobber(l)) { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLQ128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + v.reset(OpAMD64VPSHLDDMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLQ256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDQ128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLQ256 x (MOVQconst [c])) - // result: (VPSLLQ256const [uint8(c)] x) + // match: (VPSHLDQ128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDQ128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLQ256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHLDQ128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLQ512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDQ256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLQ512 x (MOVQconst [c])) - // result: (VPSLLQ512const [uint8(c)] x) + // match: (VPSHLDQ256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDQ256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLQ512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHLDQ256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLQMasked128(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPSHLDQ512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLQMasked128 x (MOVQconst [c]) mask) - // result: (VPSLLQMasked128const [uint8(c)] x mask) + // match: (VPSHLDQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDQ512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSLLQMasked128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHLDQ512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLQMasked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDQMasked128(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLQMasked256 x (MOVQconst [c]) mask) - // result: (VPSLLQMasked256const [uint8(c)] x mask) + // match: (VPSHLDQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - c := auxIntToInt64(v_1.AuxInt) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - v.reset(OpAMD64VPSLLQMasked256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHLDQMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLQMasked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDQMasked256(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLQMasked512 x (MOVQconst [c]) mask) - // result: (VPSLLQMasked512const [uint8(c)] x mask) + // match: (VPSHLDQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - c := auxIntToInt64(v_1.AuxInt) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - v.reset(OpAMD64VPSLLQMasked512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHLDQMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLVD512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDQMasked512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VPSHLDQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSLLVD512load {sym} [off] x ptr mem) + // result: (VPSHLDQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 l := v_1 if l.Op != OpAMD64VMOVDQUload512 { @@ -40424,27 +41836,29 @@ func rewriteValueAMD64_OpAMD64VPSLLVD512(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] + mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSLLVD512load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VPSHLDQMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLVDMasked128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDVD128(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // match: (VPSHLDVD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSLLVDMasked128load {sym} [off] x ptr mask mem) + // result: (VPSHLDVD128load {sym} [off] x y ptr mem) for { x := v_0 - l := v_1 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload128 { break } @@ -40452,28 +41866,28 @@ func rewriteValueAMD64_OpAMD64VPSLLVDMasked128(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSLLVDMasked128load) + v.reset(OpAMD64VPSHLDVD128load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg4(x, y, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLVDMasked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDVD256(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // match: (VPSHLDVD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSLLVDMasked256load {sym} [off] x ptr mask mem) + // result: (VPSHLDVD256load {sym} [off] x y ptr mem) for { x := v_0 - l := v_1 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload256 { break } @@ -40481,28 +41895,28 @@ func rewriteValueAMD64_OpAMD64VPSLLVDMasked256(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSLLVDMasked256load) + v.reset(OpAMD64VPSHLDVD256load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg4(x, y, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLVDMasked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDVD512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // match: (VPSHLDVD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSLLVDMasked512load {sym} [off] x ptr mask mem) + // result: (VPSHLDVD512load {sym} [off] x y ptr mem) for { x := v_0 - l := v_1 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -40510,481 +41924,617 @@ func rewriteValueAMD64_OpAMD64VPSLLVDMasked512(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSLLVDMasked512load) + v.reset(OpAMD64VPSHLDVD512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg4(x, y, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLVQ512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDVDMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VPSHLDVDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSLLVQ512load {sym} [off] x ptr mem) + // result: (VPSHLDVDMasked128load {sym} [off] x y ptr mask mem) for { x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { break } off := auxIntToInt32(l.AuxInt) sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] + mask := v_3 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSLLVQ512load) + v.reset(OpAMD64VPSHLDVDMasked128load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.AddArg5(x, y, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLVQMasked128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDVDMasked256(v *Value) bool { + v_3 := v.Args[3] v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // match: (VPSHLDVDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSLLVQMasked128load {sym} [off] x ptr mask mem) + // result: (VPSHLDVDMasked256load {sym} [off] x y ptr mask mem) for { x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { break } off := auxIntToInt32(l.AuxInt) sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 + mask := v_3 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSLLVQMasked128load) + v.reset(OpAMD64VPSHLDVDMasked256load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg5(x, y, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLVQMasked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDVDMasked512(v *Value) bool { + v_3 := v.Args[3] v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // match: (VPSHLDVDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSLLVQMasked256load {sym} [off] x ptr mask mem) + // result: (VPSHLDVDMasked512load {sym} [off] x y ptr mask mem) for { x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { break } off := auxIntToInt32(l.AuxInt) sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 + mask := v_3 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSLLVQMasked256load) + v.reset(OpAMD64VPSHLDVDMasked512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg5(x, y, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLVQMasked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDVQ128(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // match: (VPSHLDVQ128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSLLVQMasked512load {sym} [off] x ptr mask mem) + // result: (VPSHLDVQ128load {sym} [off] x y ptr mem) for { x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { break } off := auxIntToInt32(l.AuxInt) sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSLLVQMasked512load) + v.reset(OpAMD64VPSHLDVQ128load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg4(x, y, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLW128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDVQ256(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLW128 x (MOVQconst [c])) - // result: (VPSLLW128const [uint8(c)] x) + // match: (VPSHLDVQ256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDVQ256load {sym} [off] x y ptr mem) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLW128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHLDVQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLW256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDVQ512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLW256 x (MOVQconst [c])) - // result: (VPSLLW256const [uint8(c)] x) + // match: (VPSHLDVQ512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDVQ512load {sym} [off] x y ptr mem) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLW256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64VPSLLW512(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSLLW512 x (MOVQconst [c])) - // result: (VPSLLW512const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLW512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + v.reset(OpAMD64VPSHLDVQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLWMasked128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDVQMasked128(v *Value) bool { + v_3 := v.Args[3] v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLWMasked128 x (MOVQconst [c]) mask) - // result: (VPSLLWMasked128const [uint8(c)] x mask) + // match: (VPSHLDVQMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDVQMasked128load {sym} [off] x y ptr mask mem) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { break } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSLLWMasked128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHLDVQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLWMasked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDVQMasked256(v *Value) bool { + v_3 := v.Args[3] v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLWMasked256 x (MOVQconst [c]) mask) - // result: (VPSLLWMasked256const [uint8(c)] x mask) + // match: (VPSHLDVQMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDVQMasked256load {sym} [off] x y ptr mask mem) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { break } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSLLWMasked256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHLDVQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLWMasked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDVQMasked512(v *Value) bool { + v_3 := v.Args[3] v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLWMasked512 x (MOVQconst [c]) mask) - // result: (VPSLLWMasked512const [uint8(c)] x mask) + // match: (VPSHLDVQMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDVQMasked512load {sym} [off] x y ptr mask mem) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { break } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSLLWMasked512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHLDVQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAD128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDD128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAD128 x (MOVQconst [c])) - // result: (VPSRAD128const [uint8(c)] x) + // match: (VPSHRDD128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDD128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAD128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDD128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAD256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDD256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAD256 x (MOVQconst [c])) - // result: (VPSRAD256const [uint8(c)] x) + // match: (VPSHRDD256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDD256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAD256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDD256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAD512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAD512 x (MOVQconst [c])) - // result: (VPSRAD512const [uint8(c)] x) + // match: (VPSHRDD512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAD512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDD512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRADMasked128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDDMasked128(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRADMasked128 x (MOVQconst [c]) mask) - // result: (VPSRADMasked128const [uint8(c)] x mask) + // match: (VPSHRDDMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - c := auxIntToInt64(v_1.AuxInt) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - v.reset(OpAMD64VPSRADMasked128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDDMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRADMasked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDDMasked256(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRADMasked256 x (MOVQconst [c]) mask) - // result: (VPSRADMasked256const [uint8(c)] x mask) + // match: (VPSHRDDMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - c := auxIntToInt64(v_1.AuxInt) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - v.reset(OpAMD64VPSRADMasked256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDDMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRADMasked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDDMasked512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRADMasked512 x (MOVQconst [c]) mask) - // result: (VPSRADMasked512const [uint8(c)] x mask) + // match: (VPSHRDDMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - c := auxIntToInt64(v_1.AuxInt) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - v.reset(OpAMD64VPSRADMasked512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) - return true - } - return false -} -func rewriteValueAMD64_OpAMD64VPSRAQ128(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPSRAQ128 x (MOVQconst [c])) - // result: (VPSRAQ128const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + if !(canMergeLoad(v, l) && clobber(l)) { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAQ128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + v.reset(OpAMD64VPSHRDDMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAQ256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDQ128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAQ256 x (MOVQconst [c])) - // result: (VPSRAQ256const [uint8(c)] x) + // match: (VPSHRDQ128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDQ128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAQ256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDQ128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAQ512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDQ256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAQ512 x (MOVQconst [c])) - // result: (VPSRAQ512const [uint8(c)] x) + // match: (VPSHRDQ256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDQ256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAQ512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDQ256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAQMasked128(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPSHRDQ512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAQMasked128 x (MOVQconst [c]) mask) - // result: (VPSRAQMasked128const [uint8(c)] x mask) + // match: (VPSHRDQ512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDQ512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSRAQMasked128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDQ512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAQMasked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDQMasked128(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAQMasked256 x (MOVQconst [c]) mask) - // result: (VPSRAQMasked256const [uint8(c)] x mask) + // match: (VPSHRDQMasked128 [c] x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - c := auxIntToInt64(v_1.AuxInt) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - v.reset(OpAMD64VPSRAQMasked256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDQMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAQMasked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDQMasked256(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAQMasked512 x (MOVQconst [c]) mask) - // result: (VPSRAQMasked512const [uint8(c)] x mask) + // match: (VPSHRDQMasked256 [c] x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - c := auxIntToInt64(v_1.AuxInt) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - v.reset(OpAMD64VPSRAQMasked512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDQMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAVD512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDQMasked512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VPSHRDQMasked512 [c] x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAVD512load {sym} [off] x ptr mem) + // result: (VPSHRDQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem) for { + c := auxIntToUint8(v.AuxInt) x := v_0 l := v_1 if l.Op != OpAMD64VMOVDQUload512 { @@ -40994,27 +42544,29 @@ func rewriteValueAMD64_OpAMD64VPSRAVD512(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] + mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSRAVD512load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VPSHRDQMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAVDMasked128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDVD128(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // match: (VPSHRDVD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAVDMasked128load {sym} [off] x ptr mask mem) + // result: (VPSHRDVD128load {sym} [off] x y ptr mem) for { x := v_0 - l := v_1 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload128 { break } @@ -41022,28 +42574,28 @@ func rewriteValueAMD64_OpAMD64VPSRAVDMasked128(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSRAVDMasked128load) + v.reset(OpAMD64VPSHRDVD128load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg4(x, y, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAVDMasked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDVD256(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // match: (VPSHRDVD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAVDMasked256load {sym} [off] x ptr mask mem) + // result: (VPSHRDVD256load {sym} [off] x y ptr mem) for { x := v_0 - l := v_1 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload256 { break } @@ -41051,28 +42603,28 @@ func rewriteValueAMD64_OpAMD64VPSRAVDMasked256(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSRAVDMasked256load) + v.reset(OpAMD64VPSHRDVD256load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg4(x, y, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAVDMasked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDVD512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // match: (VPSHRDVD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAVDMasked512load {sym} [off] x ptr mask mem) + // result: (VPSHRDVD512load {sym} [off] x y ptr mem) for { x := v_0 - l := v_1 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -41080,27 +42632,29 @@ func rewriteValueAMD64_OpAMD64VPSRAVDMasked512(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSRAVDMasked512load) + v.reset(OpAMD64VPSHRDVD512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg4(x, y, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAVQ128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDVDMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAVQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // match: (VPSHRDVDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAVQ128load {sym} [off] x ptr mem) + // result: (VPSHRDVDMasked128load {sym} [off] x y ptr mask mem) for { x := v_0 - l := v_1 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload128 { break } @@ -41108,26 +42662,30 @@ func rewriteValueAMD64_OpAMD64VPSRAVQ128(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] + mask := v_3 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSRAVQ128load) + v.reset(OpAMD64VPSHRDVDMasked128load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.AddArg5(x, y, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAVQ256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDVDMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAVQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // match: (VPSHRDVDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAVQ256load {sym} [off] x ptr mem) + // result: (VPSHRDVDMasked256load {sym} [off] x y ptr mask mem) for { x := v_0 - l := v_1 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload256 { break } @@ -41135,26 +42693,30 @@ func rewriteValueAMD64_OpAMD64VPSRAVQ256(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] + mask := v_3 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSRAVQ256load) + v.reset(OpAMD64VPSHRDVDMasked256load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.AddArg5(x, y, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAVQ512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDVDMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VPSHRDVDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAVQ512load {sym} [off] x ptr mem) + // result: (VPSHRDVDMasked512load {sym} [off] x y ptr mask mem) for { x := v_0 - l := v_1 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -41162,27 +42724,29 @@ func rewriteValueAMD64_OpAMD64VPSRAVQ512(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] + mask := v_3 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSRAVQ512load) + v.reset(OpAMD64VPSHRDVDMasked512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.AddArg5(x, y, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAVQMasked128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDVQ128(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // match: (VPSHRDVQ128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAVQMasked128load {sym} [off] x ptr mask mem) + // result: (VPSHRDVQ128load {sym} [off] x y ptr mem) for { x := v_0 - l := v_1 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload128 { break } @@ -41190,28 +42754,28 @@ func rewriteValueAMD64_OpAMD64VPSRAVQMasked128(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSRAVQMasked128load) + v.reset(OpAMD64VPSHRDVQ128load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg4(x, y, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAVQMasked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDVQ256(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // match: (VPSHRDVQ256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAVQMasked256load {sym} [off] x ptr mask mem) + // result: (VPSHRDVQ256load {sym} [off] x y ptr mem) for { x := v_0 - l := v_1 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload256 { break } @@ -41219,28 +42783,28 @@ func rewriteValueAMD64_OpAMD64VPSRAVQMasked256(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSRAVQMasked256load) + v.reset(OpAMD64VPSHRDVQ256load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg4(x, y, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAVQMasked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDVQ512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // match: (VPSHRDVQ512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRAVQMasked512load {sym} [off] x ptr mask mem) + // result: (VPSHRDVQ512load {sym} [off] x y ptr mem) for { x := v_0 - l := v_1 + y := v_1 + l := v_2 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -41248,78 +42812,306 @@ func rewriteValueAMD64_OpAMD64VPSRAVQMasked512(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSRAVQMasked512load) + v.reset(OpAMD64VPSHRDVQ512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg4(x, y, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAW128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHRDVQMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAW128 x (MOVQconst [c])) - // result: (VPSRAW128const [uint8(c)] x) + // match: (VPSHRDVQMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDVQMasked128load {sym} [off] x y ptr mask mem) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDVQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSHRDVQMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHRDVQMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDVQMasked256load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDVQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSHRDVQMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHRDVQMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDVQMasked512load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDVQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSHUFD512(v *Value) bool { + v_0 := v.Args[0] + // match: (VPSHUFD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHUFD512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + for { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHUFD512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSHUFDMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHUFDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHUFDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHUFDMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSHUFDMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHUFDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHUFDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHUFDMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSHUFDMasked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHUFDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHUFDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHUFDMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSLLD128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLD128 x (MOVQconst [c])) + // result: (VPSLLD128const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break } c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAW128const) + v.reset(OpAMD64VPSLLD128const) v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg(x) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAW256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLD256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAW256 x (MOVQconst [c])) - // result: (VPSRAW256const [uint8(c)] x) + // match: (VPSLLD256 x (MOVQconst [c])) + // result: (VPSLLD256const [uint8(c)] x) for { x := v_0 if v_1.Op != OpAMD64MOVQconst { break } c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAW256const) + v.reset(OpAMD64VPSLLD256const) v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg(x) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAW512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAW512 x (MOVQconst [c])) - // result: (VPSRAW512const [uint8(c)] x) + // match: (VPSLLD512 x (MOVQconst [c])) + // result: (VPSLLD512const [uint8(c)] x) for { x := v_0 if v_1.Op != OpAMD64MOVQconst { break } c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAW512const) + v.reset(OpAMD64VPSLLD512const) v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg(x) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAWMasked128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLD512const(v *Value) bool { + v_0 := v.Args[0] + // match: (VPSLLD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLD512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + for { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSLLD512constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSLLDMasked128(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAWMasked128 x (MOVQconst [c]) mask) - // result: (VPSRAWMasked128const [uint8(c)] x mask) + // match: (VPSLLDMasked128 x (MOVQconst [c]) mask) + // result: (VPSLLDMasked128const [uint8(c)] x mask) for { x := v_0 if v_1.Op != OpAMD64MOVQconst { @@ -41327,19 +43119,47 @@ func rewriteValueAMD64_OpAMD64VPSRAWMasked128(v *Value) bool { } c := auxIntToInt64(v_1.AuxInt) mask := v_2 - v.reset(OpAMD64VPSRAWMasked128const) + v.reset(OpAMD64VPSLLDMasked128const) v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg2(x, mask) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAWMasked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLDMasked128const(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLDMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLDMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSLLDMasked128constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSLLDMasked256(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAWMasked256 x (MOVQconst [c]) mask) - // result: (VPSRAWMasked256const [uint8(c)] x mask) + // match: (VPSLLDMasked256 x (MOVQconst [c]) mask) + // result: (VPSLLDMasked256const [uint8(c)] x mask) for { x := v_0 if v_1.Op != OpAMD64MOVQconst { @@ -41347,19 +43167,47 @@ func rewriteValueAMD64_OpAMD64VPSRAWMasked256(v *Value) bool { } c := auxIntToInt64(v_1.AuxInt) mask := v_2 - v.reset(OpAMD64VPSRAWMasked256const) + v.reset(OpAMD64VPSLLDMasked256const) v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg2(x, mask) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAWMasked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLDMasked256const(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLDMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLDMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSLLDMasked256constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSLLDMasked512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAWMasked512 x (MOVQconst [c]) mask) - // result: (VPSRAWMasked512const [uint8(c)] x mask) + // match: (VPSLLDMasked512 x (MOVQconst [c]) mask) + // result: (VPSLLDMasked512const [uint8(c)] x mask) for { x := v_0 if v_1.Op != OpAMD64MOVQconst { @@ -41367,22 +43215,103 @@ func rewriteValueAMD64_OpAMD64VPSRAWMasked512(v *Value) bool { } c := auxIntToInt64(v_1.AuxInt) mask := v_2 - v.reset(OpAMD64VPSRAWMasked512const) + v.reset(OpAMD64VPSLLDMasked512const) v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg2(x, mask) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRLVD512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLDMasked512const(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRLVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VPSLLDMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRLVD512load {sym} [off] x ptr mem) + // result: (VPSLLDMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSLLDMasked512constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSLLQ128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLQ128 x (MOVQconst [c])) + // result: (VPSLLQ128const [uint8(c)] x) for { x := v_0 - l := v_1 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLQ128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSLLQ256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLQ256 x (MOVQconst [c])) + // result: (VPSLLQ256const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLQ256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSLLQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLQ512 x (MOVQconst [c])) + // result: (VPSLLQ512const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLQ512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSLLQ512const(v *Value) bool { + v_0 := v.Args[0] + // match: (VPSLLQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLQ512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + for { + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -41393,24 +43322,43 @@ func rewriteValueAMD64_OpAMD64VPSRLVD512(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSRLVD512load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VPSLLQ512constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRLVDMasked128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLQMasked128(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRLVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRLVDMasked128load {sym} [off] x ptr mask mem) + // match: (VPSLLQMasked128 x (MOVQconst [c]) mask) + // result: (VPSLLQMasked128const [uint8(c)] x mask) for { x := v_0 - l := v_1 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLQMasked128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSLLQMasked128const(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLQMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload128 { break } @@ -41418,28 +43366,47 @@ func rewriteValueAMD64_OpAMD64VPSRLVDMasked128(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 + mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSRLVDMasked128load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VPSLLQMasked128constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRLVDMasked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLQMasked256(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRLVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRLVDMasked256load {sym} [off] x ptr mask mem) + // match: (VPSLLQMasked256 x (MOVQconst [c]) mask) + // result: (VPSLLQMasked256const [uint8(c)] x mask) for { x := v_0 - l := v_1 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLQMasked256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSLLQMasked256const(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLQMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload256 { break } @@ -41447,28 +43414,47 @@ func rewriteValueAMD64_OpAMD64VPSRLVDMasked256(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 + mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSRLVDMasked256load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VPSLLQMasked256constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRLVDMasked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLQMasked512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRLVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRLVDMasked512load {sym} [off] x ptr mask mem) + // match: (VPSLLQMasked512 x (MOVQconst [c]) mask) + // result: (VPSLLQMasked512const [uint8(c)] x mask) for { x := v_0 - l := v_1 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLQMasked512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSLLQMasked512const(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLQMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -41476,24 +43462,24 @@ func rewriteValueAMD64_OpAMD64VPSRLVDMasked512(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 + mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSRLVDMasked512load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VPSLLQMasked512constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRLVQ512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLVD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRLVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VPSLLVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRLVQ512load {sym} [off] x ptr mem) + // result: (VPSLLVD512load {sym} [off] x ptr mem) for { x := v_0 l := v_1 @@ -41507,7 +43493,7 @@ func rewriteValueAMD64_OpAMD64VPSRLVQ512(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSRLVQ512load) + v.reset(OpAMD64VPSLLVD512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) @@ -41515,13 +43501,13 @@ func rewriteValueAMD64_OpAMD64VPSRLVQ512(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64VPSRLVQMasked128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLVDMasked128(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRLVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // match: (VPSLLVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRLVQMasked128load {sym} [off] x ptr mask mem) + // result: (VPSLLVDMasked128load {sym} [off] x ptr mask mem) for { x := v_0 l := v_1 @@ -41536,7 +43522,7 @@ func rewriteValueAMD64_OpAMD64VPSRLVQMasked128(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSRLVQMasked128load) + v.reset(OpAMD64VPSLLVDMasked128load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) @@ -41544,13 +43530,13 @@ func rewriteValueAMD64_OpAMD64VPSRLVQMasked128(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64VPSRLVQMasked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLVDMasked256(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRLVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // match: (VPSLLVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRLVQMasked256load {sym} [off] x ptr mask mem) + // result: (VPSLLVDMasked256load {sym} [off] x ptr mask mem) for { x := v_0 l := v_1 @@ -41565,7 +43551,7 @@ func rewriteValueAMD64_OpAMD64VPSRLVQMasked256(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSRLVQMasked256load) + v.reset(OpAMD64VPSLLVDMasked256load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) @@ -41573,13 +43559,13 @@ func rewriteValueAMD64_OpAMD64VPSRLVQMasked256(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64VPSRLVQMasked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLVDMasked512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRLVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // match: (VPSLLVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSRLVQMasked512load {sym} [off] x ptr mask mem) + // result: (VPSLLVDMasked512load {sym} [off] x ptr mask mem) for { x := v_0 l := v_1 @@ -41594,7 +43580,7 @@ func rewriteValueAMD64_OpAMD64VPSRLVQMasked512(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSRLVQMasked512load) + v.reset(OpAMD64VPSLLVDMasked512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) v.AddArg4(x, ptr, mask, mem) @@ -41602,12 +43588,12 @@ func rewriteValueAMD64_OpAMD64VPSRLVQMasked512(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64VPSUBD512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLVQ512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSUBD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VPSLLVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSUBD512load {sym} [off] x ptr mem) + // result: (VPSLLVQ512load {sym} [off] x ptr mem) for { x := v_0 l := v_1 @@ -41621,7 +43607,1579 @@ func rewriteValueAMD64_OpAMD64VPSUBD512(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSUBD512load) + v.reset(OpAMD64VPSLLVQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSLLVQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLVQMasked128load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSLLVQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSLLVQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLVQMasked256load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSLLVQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSLLVQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLVQMasked512load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSLLVQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSLLW128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLW128 x (MOVQconst [c])) + // result: (VPSLLW128const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLW128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSLLW256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLW256 x (MOVQconst [c])) + // result: (VPSLLW256const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLW256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSLLW512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLW512 x (MOVQconst [c])) + // result: (VPSLLW512const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLW512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSLLWMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLWMasked128 x (MOVQconst [c]) mask) + // result: (VPSLLWMasked128const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLWMasked128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSLLWMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLWMasked256 x (MOVQconst [c]) mask) + // result: (VPSLLWMasked256const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLWMasked256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSLLWMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLWMasked512 x (MOVQconst [c]) mask) + // result: (VPSLLWMasked512const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLWMasked512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAD128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAD128 x (MOVQconst [c])) + // result: (VPSRAD128const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAD128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAD256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAD256 x (MOVQconst [c])) + // result: (VPSRAD256const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAD256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAD512 x (MOVQconst [c])) + // result: (VPSRAD512const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAD512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAD512const(v *Value) bool { + v_0 := v.Args[0] + // match: (VPSRAD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAD512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + for { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAD512constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRADMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRADMasked128 x (MOVQconst [c]) mask) + // result: (VPSRADMasked128const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRADMasked128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRADMasked128const(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRADMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRADMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRADMasked128constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRADMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRADMasked256 x (MOVQconst [c]) mask) + // result: (VPSRADMasked256const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRADMasked256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRADMasked256const(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRADMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRADMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRADMasked256constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRADMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRADMasked512 x (MOVQconst [c]) mask) + // result: (VPSRADMasked512const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRADMasked512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRADMasked512const(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRADMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRADMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRADMasked512constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAQ128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAQ128 x (MOVQconst [c])) + // result: (VPSRAQ128const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAQ128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAQ128const(v *Value) bool { + v_0 := v.Args[0] + // match: (VPSRAQ128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAQ128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + for { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAQ128constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAQ256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAQ256 x (MOVQconst [c])) + // result: (VPSRAQ256const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAQ256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAQ256const(v *Value) bool { + v_0 := v.Args[0] + // match: (VPSRAQ256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAQ256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + for { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAQ256constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAQ512 x (MOVQconst [c])) + // result: (VPSRAQ512const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAQ512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAQ512const(v *Value) bool { + v_0 := v.Args[0] + // match: (VPSRAQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAQ512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + for { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAQ512constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAQMasked128 x (MOVQconst [c]) mask) + // result: (VPSRAQMasked128const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRAQMasked128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAQMasked128const(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAQMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAQMasked128constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAQMasked256 x (MOVQconst [c]) mask) + // result: (VPSRAQMasked256const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRAQMasked256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAQMasked256const(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAQMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAQMasked256constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAQMasked512 x (MOVQconst [c]) mask) + // result: (VPSRAQMasked512const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRAQMasked512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAQMasked512const(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAQMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAQMasked512constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAVD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAVD512load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAVD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAVDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAVDMasked128load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAVDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAVDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAVDMasked256load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAVDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAVDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAVDMasked512load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAVDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAVQ128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAVQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAVQ128load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAVQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAVQ256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAVQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAVQ256load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAVQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAVQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAVQ512load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAVQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAVQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAVQMasked128load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAVQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAVQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAVQMasked256load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAVQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAVQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAVQMasked512load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAVQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAW128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAW128 x (MOVQconst [c])) + // result: (VPSRAW128const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAW128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAW256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAW256 x (MOVQconst [c])) + // result: (VPSRAW256const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAW256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAW512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAW512 x (MOVQconst [c])) + // result: (VPSRAW512const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAW512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAWMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAWMasked128 x (MOVQconst [c]) mask) + // result: (VPSRAWMasked128const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRAWMasked128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAWMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAWMasked256 x (MOVQconst [c]) mask) + // result: (VPSRAWMasked256const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRAWMasked256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAWMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAWMasked512 x (MOVQconst [c]) mask) + // result: (VPSRAWMasked512const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRAWMasked512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLD512const(v *Value) bool { + v_0 := v.Args[0] + // match: (VPSRLD512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLD512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + for { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLD512constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLDMasked128const(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLDMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLDMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLDMasked128constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLDMasked256const(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLDMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLDMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLDMasked256constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLDMasked512const(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLDMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLDMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLDMasked512constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLQ512const(v *Value) bool { + v_0 := v.Args[0] + // match: (VPSRLQ512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLQ512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) + for { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLQ512constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLQMasked128const(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLQMasked128const [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLQMasked128constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLQMasked128constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLQMasked256const(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLQMasked256const [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLQMasked256constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLQMasked256constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLQMasked512const(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLQMasked512const [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLQMasked512constload {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) + for { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLQMasked512constload) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLVD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLVD512load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLVD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLVDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLVDMasked128load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLVDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLVDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLVDMasked256load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLVDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLVDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLVDMasked512load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLVDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLVQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLVQ512load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLVQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLVQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLVQMasked128load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLVQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLVQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLVQMasked256load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLVQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRLVQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLVQMasked512load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLVQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSUBD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSUBD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSUBD512load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSUBD512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) v.AddArg3(x, ptr, mem) @@ -41633,98 +45191,885 @@ func rewriteValueAMD64_OpAMD64VPSUBDMasked128(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSUBDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // match: (VPSUBDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSUBDMasked128load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSUBDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSUBDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSUBDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSUBDMasked256load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSUBDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSUBDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSUBDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSUBDMasked512load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSUBDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSUBQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSUBQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSUBQ512load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSUBQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSUBQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSUBQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSUBQMasked128load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSUBQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSUBQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSUBQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSUBQMasked256load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSUBQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSUBQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSUBQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSUBQMasked512load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSUBQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPUNPCKHDQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPUNPCKHDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPUNPCKHDQ512load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPUNPCKHDQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPUNPCKHQDQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPUNPCKHQDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPUNPCKHQDQ512load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPUNPCKHQDQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPUNPCKLDQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPUNPCKLDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPUNPCKLDQ512load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPUNPCKLDQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPUNPCKLQDQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPUNPCKLQDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPUNPCKLQDQ512load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPUNPCKLQDQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPXORD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPXORD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPXORD512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPXORD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPXORDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPXORDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSUBDMasked128load {sym} [off] x ptr mask mem) + // result: (VPXORDMasked128load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPXORDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPXORDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPXORDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPXORDMasked256load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPXORDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPXORDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPXORDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPXORDMasked512load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPXORDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPXORQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPXORQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPXORQ512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPXORQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPXORQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPXORQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPXORQMasked128load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPXORQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPXORQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPXORQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPXORQMasked256load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPXORQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPXORQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPXORQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPXORQMasked512load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPXORQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VRCP14PD128(v *Value) bool { + v_0 := v.Args[0] + // match: (VRCP14PD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRCP14PD128load {sym} [off] ptr mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRCP14PD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VRCP14PD256(v *Value) bool { + v_0 := v.Args[0] + // match: (VRCP14PD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRCP14PD256load {sym} [off] ptr mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRCP14PD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VRCP14PD512(v *Value) bool { + v_0 := v.Args[0] + // match: (VRCP14PD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRCP14PD512load {sym} [off] ptr mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRCP14PD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VRCP14PDMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VRCP14PDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRCP14PDMasked128load {sym} [off] ptr mask mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRCP14PDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VRCP14PDMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VRCP14PDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRCP14PDMasked256load {sym} [off] ptr mask mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRCP14PDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VRCP14PDMasked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VRCP14PDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRCP14PDMasked512load {sym} [off] ptr mask mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRCP14PDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VRCP14PS512(v *Value) bool { + v_0 := v.Args[0] + // match: (VRCP14PS512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRCP14PS512load {sym} [off] ptr mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRCP14PS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VRCP14PSMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VRCP14PSMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRCP14PSMasked128load {sym} [off] ptr mask mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRCP14PSMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VRCP14PSMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VRCP14PSMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRCP14PSMasked256load {sym} [off] ptr mask mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRCP14PSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VRCP14PSMasked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VRCP14PSMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRCP14PSMasked512load {sym} [off] ptr mask mem) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } off := auxIntToInt32(l.AuxInt) sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 + mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSUBDMasked128load) + v.reset(OpAMD64VRCP14PSMasked512load) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSUBDMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VREDUCEPD128(v *Value) bool { v_0 := v.Args[0] - // match: (VPSUBDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // match: (VREDUCEPD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSUBDMasked256load {sym} [off] x ptr mask mem) + // result: (VREDUCEPD128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } off := auxIntToInt32(l.AuxInt) sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSUBDMasked256load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VREDUCEPD128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSUBDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VREDUCEPD256(v *Value) bool { v_0 := v.Args[0] - // match: (VPSUBDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // match: (VREDUCEPD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSUBDMasked512load {sym} [off] x ptr mask mem) + // result: (VREDUCEPD256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } off := auxIntToInt32(l.AuxInt) sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSUBDMasked512load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VREDUCEPD256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSUBQ512(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VREDUCEPD512(v *Value) bool { v_0 := v.Args[0] - // match: (VPSUBQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VREDUCEPD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSUBQ512load {sym} [off] x ptr mem) + // result: (VREDUCEPD512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) for { - x := v_0 - l := v_1 + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -41735,24 +46080,23 @@ func rewriteValueAMD64_OpAMD64VPSUBQ512(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSUBQ512load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VREDUCEPD512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSUBQMasked128(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VREDUCEPDMasked128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSUBQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // match: (VREDUCEPDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSUBQMasked128load {sym} [off] x ptr mask mem) + // result: (VREDUCEPDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) for { - x := v_0 - l := v_1 + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload128 { break } @@ -41760,28 +46104,27 @@ func rewriteValueAMD64_OpAMD64VPSUBQMasked128(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 + mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSUBQMasked128load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VREDUCEPDMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSUBQMasked256(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VREDUCEPDMasked256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSUBQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // match: (VREDUCEPDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSUBQMasked256load {sym} [off] x ptr mask mem) + // result: (VREDUCEPDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) for { - x := v_0 - l := v_1 + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload256 { break } @@ -41789,28 +46132,27 @@ func rewriteValueAMD64_OpAMD64VPSUBQMasked256(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 + mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSUBQMasked256load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VREDUCEPDMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSUBQMasked512(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VREDUCEPDMasked512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSUBQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // match: (VREDUCEPDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPSUBQMasked512load {sym} [off] x ptr mask mem) + // result: (VREDUCEPDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) for { - x := v_0 - l := v_1 + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -41818,28 +46160,27 @@ func rewriteValueAMD64_OpAMD64VPSUBQMasked512(v *Value) bool { sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_2 + mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPSUBQMasked512load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VREDUCEPDMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPUNPCKHDQ512(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VREDUCEPS128(v *Value) bool { v_0 := v.Args[0] - // match: (VPUNPCKHDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VREDUCEPS128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPUNPCKHDQ512load {sym} [off] x ptr mem) + // result: (VREDUCEPS128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } off := auxIntToInt32(l.AuxInt) @@ -41849,24 +46190,23 @@ func rewriteValueAMD64_OpAMD64VPUNPCKHDQ512(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPUNPCKHDQ512load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VREDUCEPS128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPUNPCKHQDQ512(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VREDUCEPS256(v *Value) bool { v_0 := v.Args[0] - // match: (VPUNPCKHQDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VREDUCEPS256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPUNPCKHQDQ512load {sym} [off] x ptr mem) + // result: (VREDUCEPS256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } off := auxIntToInt32(l.AuxInt) @@ -41876,23 +46216,22 @@ func rewriteValueAMD64_OpAMD64VPUNPCKHQDQ512(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPUNPCKHQDQ512load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VREDUCEPS256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPUNPCKLDQ512(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VREDUCEPS512(v *Value) bool { v_0 := v.Args[0] - // match: (VPUNPCKLDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VREDUCEPS512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPUNPCKLDQ512load {sym} [off] x ptr mem) + // result: (VREDUCEPS512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) for { - x := v_0 - l := v_1 + c := auxIntToUint8(v.AuxInt) + l := v_0 if l.Op != OpAMD64VMOVDQUload512 { break } @@ -41903,301 +46242,159 @@ func rewriteValueAMD64_OpAMD64VPUNPCKLDQ512(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPUNPCKLDQ512load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VREDUCEPS512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPUNPCKLQDQ512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VREDUCEPSMasked128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPUNPCKLQDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VREDUCEPSMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPUNPCKLQDQ512load {sym} [off] x ptr mem) + // result: (VREDUCEPSMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) for { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } off := auxIntToInt32(l.AuxInt) sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] + mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPUNPCKLQDQ512load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VREDUCEPSMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPXORD512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VREDUCEPSMasked256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPXORD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VREDUCEPSMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPXORD512load {sym} [off] x ptr mem) + // result: (VREDUCEPSMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPXORD512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break } - break - } - return false -} -func rewriteValueAMD64_OpAMD64VPXORDMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPXORDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPXORDMasked128load {sym} [off] x ptr mask mem) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPXORDMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break } - break + v.reset(OpAMD64VREDUCEPSMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true } return false } -func rewriteValueAMD64_OpAMD64VPXORDMasked256(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VREDUCEPSMasked512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPXORDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // match: (VREDUCEPSMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPXORDMasked256load {sym} [off] x ptr mask mem) + // result: (VREDUCEPSMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPXORDMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break } - break - } - return false -} -func rewriteValueAMD64_OpAMD64VPXORDMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPXORDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPXORDMasked512load {sym} [off] x ptr mask mem) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPXORDMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break } - break + v.reset(OpAMD64VREDUCEPSMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true } return false } -func rewriteValueAMD64_OpAMD64VPXORQ512(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VRNDSCALEPD128(v *Value) bool { v_0 := v.Args[0] - // match: (VPXORQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VRNDSCALEPD128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPXORQ512load {sym} [off] x ptr mem) + // result: (VRNDSCALEPD128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPXORQ512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break } - break - } - return false -} -func rewriteValueAMD64_OpAMD64VPXORQMasked128(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPXORQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPXORQMasked128load {sym} [off] x ptr mask mem) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload128 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPXORQMasked128load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break } - break + v.reset(OpAMD64VRNDSCALEPD128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true } return false } -func rewriteValueAMD64_OpAMD64VPXORQMasked256(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VRNDSCALEPD256(v *Value) bool { v_0 := v.Args[0] - // match: (VPXORQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // match: (VRNDSCALEPD256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPXORQMasked256load {sym} [off] x ptr mask mem) + // result: (VRNDSCALEPD256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload256 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPXORQMasked256load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + c := auxIntToUint8(v.AuxInt) + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break } - break - } - return false -} -func rewriteValueAMD64_OpAMD64VPXORQMasked512(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (VPXORQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) - // cond: canMergeLoad(v, l) && clobber(l) - // result: (VPXORQMasked512load {sym} [off] x ptr mask mem) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64VMOVDQUload512 { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - mask := v_2 - if !(canMergeLoad(v, l) && clobber(l)) { - continue - } - v.reset(OpAMD64VPXORQMasked512load) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg4(x, ptr, mask, mem) - return true + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break } - break + v.reset(OpAMD64VRNDSCALEPD256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true } return false } -func rewriteValueAMD64_OpAMD64VRCP14PD128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRNDSCALEPD512(v *Value) bool { v_0 := v.Args[0] - // match: (VRCP14PD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) + // match: (VRNDSCALEPD512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRCP14PD128load {sym} [off] ptr mem) + // result: (VRNDSCALEPD512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) for { + c := auxIntToUint8(v.AuxInt) l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + if l.Op != OpAMD64VMOVDQUload512 { break } off := auxIntToInt32(l.AuxInt) @@ -42207,73 +46404,80 @@ func rewriteValueAMD64_OpAMD64VRCP14PD128(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VRCP14PD128load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VRNDSCALEPD512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VRCP14PD256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRNDSCALEPDMasked128(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VRCP14PD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // match: (VRNDSCALEPDMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRCP14PD256load {sym} [off] ptr mem) + // result: (VRNDSCALEPDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) for { + c := auxIntToUint8(v.AuxInt) l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + if l.Op != OpAMD64VMOVDQUload128 { break } off := auxIntToInt32(l.AuxInt) sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] + mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VRCP14PD256load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VRNDSCALEPDMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VRCP14PD512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRNDSCALEPDMasked256(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VRCP14PD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VRNDSCALEPDMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRCP14PD512load {sym} [off] ptr mem) + // result: (VRNDSCALEPDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) for { + c := auxIntToUint8(v.AuxInt) l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + if l.Op != OpAMD64VMOVDQUload256 { break } off := auxIntToInt32(l.AuxInt) sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] + mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VRCP14PD512load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VRNDSCALEPDMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg2(ptr, mem) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VRCP14PDMasked128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRNDSCALEPDMasked512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VRCP14PDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // match: (VRNDSCALEPDMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRCP14PDMasked128load {sym} [off] ptr mask mem) + // result: (VRNDSCALEPDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) for { + c := auxIntToUint8(v.AuxInt) l := v_0 - if l.Op != OpAMD64VMOVDQUload128 { + if l.Op != OpAMD64VMOVDQUload512 { break } off := auxIntToInt32(l.AuxInt) @@ -42284,74 +46488,73 @@ func rewriteValueAMD64_OpAMD64VRCP14PDMasked128(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VRCP14PDMasked128load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VRNDSCALEPDMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VRCP14PDMasked256(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VRNDSCALEPS128(v *Value) bool { v_0 := v.Args[0] - // match: (VRCP14PDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // match: (VRNDSCALEPS128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRCP14PDMasked256load {sym} [off] ptr mask mem) + // result: (VRNDSCALEPS128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) for { + c := auxIntToUint8(v.AuxInt) l := v_0 - if l.Op != OpAMD64VMOVDQUload256 { + if l.Op != OpAMD64VMOVDQUload128 { break } off := auxIntToInt32(l.AuxInt) sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VRCP14PDMasked256load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VRNDSCALEPS128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VRCP14PDMasked512(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VRNDSCALEPS256(v *Value) bool { v_0 := v.Args[0] - // match: (VRCP14PDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // match: (VRNDSCALEPS256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRCP14PDMasked512load {sym} [off] ptr mask mem) + // result: (VRNDSCALEPS256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) for { + c := auxIntToUint8(v.AuxInt) l := v_0 - if l.Op != OpAMD64VMOVDQUload512 { + if l.Op != OpAMD64VMOVDQUload256 { break } off := auxIntToInt32(l.AuxInt) sym := auxToSym(l.Aux) mem := l.Args[1] ptr := l.Args[0] - mask := v_1 if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VRCP14PDMasked512load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VRNDSCALEPS256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) - v.AddArg3(ptr, mask, mem) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VRCP14PS512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRNDSCALEPS512(v *Value) bool { v_0 := v.Args[0] - // match: (VRCP14PS512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // match: (VRNDSCALEPS512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem)) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRCP14PS512load {sym} [off] ptr mem) + // result: (VRNDSCALEPS512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mem) for { + c := auxIntToUint8(v.AuxInt) l := v_0 if l.Op != OpAMD64VMOVDQUload512 { break @@ -42363,21 +46566,22 @@ func rewriteValueAMD64_OpAMD64VRCP14PS512(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VRCP14PS512load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VRNDSCALEPS512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VRCP14PSMasked128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRNDSCALEPSMasked128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VRCP14PSMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // match: (VRNDSCALEPSMasked128 [c] l:(VMOVDQUload128 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRCP14PSMasked128load {sym} [off] ptr mask mem) + // result: (VRNDSCALEPSMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) for { + c := auxIntToUint8(v.AuxInt) l := v_0 if l.Op != OpAMD64VMOVDQUload128 { break @@ -42390,21 +46594,22 @@ func rewriteValueAMD64_OpAMD64VRCP14PSMasked128(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VRCP14PSMasked128load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VRNDSCALEPSMasked128load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VRCP14PSMasked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRNDSCALEPSMasked256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VRCP14PSMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // match: (VRNDSCALEPSMasked256 [c] l:(VMOVDQUload256 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRCP14PSMasked256load {sym} [off] ptr mask mem) + // result: (VRNDSCALEPSMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) for { + c := auxIntToUint8(v.AuxInt) l := v_0 if l.Op != OpAMD64VMOVDQUload256 { break @@ -42417,21 +46622,22 @@ func rewriteValueAMD64_OpAMD64VRCP14PSMasked256(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VRCP14PSMasked256load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VRNDSCALEPSMasked256load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VRCP14PSMasked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRNDSCALEPSMasked512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VRCP14PSMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // match: (VRNDSCALEPSMasked512 [c] l:(VMOVDQUload512 {sym} [off] ptr mem) mask) // cond: canMergeLoad(v, l) && clobber(l) - // result: (VRCP14PSMasked512load {sym} [off] ptr mask mem) + // result: (VRNDSCALEPSMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] ptr mask mem) for { + c := auxIntToUint8(v.AuxInt) l := v_0 if l.Op != OpAMD64VMOVDQUload512 { break @@ -42444,8 +46650,8 @@ func rewriteValueAMD64_OpAMD64VRCP14PSMasked512(v *Value) bool { if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VRCP14PSMasked512load) - v.AuxInt = int32ToAuxInt(off) + v.reset(OpAMD64VRNDSCALEPSMasked512load) + v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off)) v.Aux = symToAux(sym) v.AddArg3(ptr, mask, mem) return true diff --git a/src/simd/_gen/simdgen/gen_simdrules.go b/src/simd/_gen/simdgen/gen_simdrules.go index c9fae4eed7..2339a1910d 100644 --- a/src/simd/_gen/simdgen/gen_simdrules.go +++ b/src/simd/_gen/simdgen/gen_simdrules.go @@ -236,7 +236,7 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer { panic("simdgen sees unknwon special lower " + *gOp.SpecialLower + ", maybe implement it?") } } - if gOp.MemFeatures != nil && *gOp.MemFeatures == "vbcst" && immType == NoImm { + if gOp.MemFeatures != nil && *gOp.MemFeatures == "vbcst" { // sanity check selected := true for _, a := range gOp.In { @@ -257,9 +257,21 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer { } memOpData := data // Remove the last vreg from the arg and change it to a load. - memOpData.ArgsLoadAddr = data.Args[:len(data.Args)-1] + fmt.Sprintf("l:(VMOVDQUload%d {sym} [off] ptr mem)", *lastVreg.Bits) + origArgs := data.Args[:len(data.Args)-1] + // Prepare imm args. + immArg := "" + immArgCombineOff := " [off] " + if immType != NoImm && immType != InvalidImm { + _, after, found := strings.Cut(origArgs, "]") + if found { + origArgs = after + } + immArg = "[c] " + immArgCombineOff = " [makeValAndOff(int32(int8(c)),off)] " + } + memOpData.ArgsLoadAddr = immArg + origArgs + fmt.Sprintf("l:(VMOVDQUload%d {sym} [off] ptr mem)", *lastVreg.Bits) // Remove the last vreg from the arg and change it to "ptr". - memOpData.ArgsAddr = "{sym} [off] " + data.Args[:len(data.Args)-1] + "ptr" + memOpData.ArgsAddr = "{sym}" + immArgCombineOff + origArgs + "ptr" if maskType == OneMask { memOpData.ArgsAddr += " mask" memOpData.ArgsLoadAddr += " mask" -- 2.52.0