From 48f366d82666951f23a4de5535e8f7cbdf43c6a8 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Mon, 8 Sep 2025 14:29:35 +0000 Subject: [PATCH] [dev.simd] cmd/compile: add memop peephole rules Change-Id: I442da7964ca8b4b9012ed206ccb92f5e68b0d42b Reviewed-on: https://go-review.googlesource.com/c/go/+/701695 Reviewed-by: Cherry Mui LUCI-TryBot-Result: Go LUCI --- .../compile/internal/ssa/_gen/simdAMD64.rules | 494 + src/cmd/compile/internal/ssa/rewriteAMD64.go | 19853 ++++++++++++++-- src/simd/_gen/simdgen/gen_simdrules.go | 49 +- 3 files changed, 18143 insertions(+), 2253 deletions(-) diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules index bfedad1e9b..82a53a7c4f 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules @@ -1469,3 +1469,497 @@ (VMOVDQU16Masked512 (VPSRAW512const [a] x) mask) => (VPSRAWMasked512const [a] x mask) (VMOVDQU32Masked512 (VPSRAD512const [a] x) mask) => (VPSRADMasked512const [a] x mask) (VMOVDQU64Masked512 (VPSRAQ512const [a] x) mask) => (VPSRAQMasked512const [a] x mask) +(VPABSD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSD128load {sym} [off] ptr mem) +(VPABSD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSD256load {sym} [off] ptr mem) +(VPABSD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSD512load {sym} [off] ptr mem) +(VPABSQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSQ128load {sym} [off] ptr mem) +(VPABSQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSQ256load {sym} [off] ptr mem) +(VPABSQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPABSQ512load {sym} [off] ptr mem) +(VPABSDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPABSDMasked128load {sym} [off] ptr mask mem) +(VPABSDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPABSDMasked256load {sym} [off] ptr mask mem) +(VPABSDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPABSDMasked512load {sym} [off] ptr mask mem) +(VPABSQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPABSQMasked128load {sym} [off] ptr mask mem) +(VPABSQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPABSQMasked256load {sym} [off] ptr mask mem) +(VPABSQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPABSQMasked512load {sym} [off] ptr mask mem) +(VADDPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VADDPS128load {sym} [off] x ptr mem) +(VADDPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VADDPS256load {sym} [off] x ptr mem) +(VADDPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VADDPS512load {sym} [off] x ptr mem) +(VADDPD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VADDPD128load {sym} [off] x ptr mem) +(VADDPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VADDPD256load {sym} [off] x ptr mem) +(VADDPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VADDPD512load {sym} [off] x ptr mem) +(VPADDD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPADDD128load {sym} [off] x ptr mem) +(VPADDD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPADDD256load {sym} [off] x ptr mem) +(VPADDD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPADDD512load {sym} [off] x ptr mem) +(VPADDQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPADDQ128load {sym} [off] x ptr mem) +(VPADDQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPADDQ256load {sym} [off] x ptr mem) +(VPADDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPADDQ512load {sym} [off] x ptr mem) +(VPDPWSSD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSD128load {sym} [off] x y ptr mem) +(VPDPWSSD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSD256load {sym} [off] x y ptr mem) +(VPDPWSSD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSD512load {sym} [off] x y ptr mem) +(VPDPWSSDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSDMasked128load {sym} [off] x y ptr mask mem) +(VPDPWSSDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSDMasked256load {sym} [off] x y ptr mask mem) +(VPDPWSSDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSDMasked512load {sym} [off] x y ptr mask mem) +(VPDPWSSDS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSDS128load {sym} [off] x y ptr mem) +(VPDPWSSDS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSDS256load {sym} [off] x y ptr mem) +(VPDPWSSDS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSDS512load {sym} [off] x y ptr mem) +(VPDPWSSDSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSDSMasked128load {sym} [off] x y ptr mask mem) +(VPDPWSSDSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSDSMasked256load {sym} [off] x y ptr mask mem) +(VPDPWSSDSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSDSMasked512load {sym} [off] x y ptr mask mem) +(VPDPBUSD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSD128load {sym} [off] x y ptr mem) +(VPDPBUSD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSD256load {sym} [off] x y ptr mem) +(VPDPBUSD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSD512load {sym} [off] x y ptr mem) +(VPDPBUSDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDMasked128load {sym} [off] x y ptr mask mem) +(VPDPBUSDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDMasked256load {sym} [off] x y ptr mask mem) +(VPDPBUSDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDMasked512load {sym} [off] x y ptr mask mem) +(VPDPBUSDS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDS128load {sym} [off] x y ptr mem) +(VPDPBUSDS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDS256load {sym} [off] x y ptr mem) +(VPDPBUSDS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDS512load {sym} [off] x y ptr mem) +(VPDPBUSDSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDSMasked128load {sym} [off] x y ptr mask mem) +(VPDPBUSDSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDSMasked256load {sym} [off] x y ptr mask mem) +(VPDPBUSDSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDSMasked512load {sym} [off] x y ptr mask mem) +(VADDPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VADDPSMasked128load {sym} [off] x ptr mask mem) +(VADDPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VADDPSMasked256load {sym} [off] x ptr mask mem) +(VADDPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VADDPSMasked512load {sym} [off] x ptr mask mem) +(VADDPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VADDPDMasked128load {sym} [off] x ptr mask mem) +(VADDPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VADDPDMasked256load {sym} [off] x ptr mask mem) +(VADDPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VADDPDMasked512load {sym} [off] x ptr mask mem) +(VPADDDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPADDDMasked128load {sym} [off] x ptr mask mem) +(VPADDDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPADDDMasked256load {sym} [off] x ptr mask mem) +(VPADDDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPADDDMasked512load {sym} [off] x ptr mask mem) +(VPADDQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPADDQMasked128load {sym} [off] x ptr mask mem) +(VPADDQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPADDQMasked256load {sym} [off] x ptr mask mem) +(VPADDQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPADDQMasked512load {sym} [off] x ptr mask mem) +(VPANDD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPANDD512load {sym} [off] x ptr mem) +(VPANDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPANDQ512load {sym} [off] x ptr mem) +(VPANDDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDDMasked128load {sym} [off] x ptr mask mem) +(VPANDDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDDMasked256load {sym} [off] x ptr mask mem) +(VPANDDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDDMasked512load {sym} [off] x ptr mask mem) +(VPANDQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDQMasked128load {sym} [off] x ptr mask mem) +(VPANDQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDQMasked256load {sym} [off] x ptr mask mem) +(VPANDQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDQMasked512load {sym} [off] x ptr mask mem) +(VPANDND512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPANDND512load {sym} [off] x ptr mem) +(VPANDNQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPANDNQ512load {sym} [off] x ptr mem) +(VPANDNDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDNDMasked128load {sym} [off] x ptr mask mem) +(VPANDNDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDNDMasked256load {sym} [off] x ptr mask mem) +(VPANDNDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDNDMasked512load {sym} [off] x ptr mask mem) +(VPANDNQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDNQMasked128load {sym} [off] x ptr mask mem) +(VPANDNQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDNQMasked256load {sym} [off] x ptr mask mem) +(VPANDNQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPANDNQMasked512load {sym} [off] x ptr mask mem) +(VPACKSSDW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDW128load {sym} [off] x ptr mem) +(VPACKSSDW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDW256load {sym} [off] x ptr mem) +(VPACKSSDW512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDW512load {sym} [off] x ptr mem) +(VPACKSSDWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked128load {sym} [off] x ptr mask mem) +(VPACKSSDWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked256load {sym} [off] x ptr mask mem) +(VPACKSSDWMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKSSDWMasked512load {sym} [off] x ptr mask mem) +(VCVTTPS2DQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTTPS2DQ128load {sym} [off] ptr mem) +(VCVTTPS2DQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTTPS2DQ256load {sym} [off] ptr mem) +(VCVTTPS2DQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTTPS2DQ512load {sym} [off] ptr mem) +(VCVTTPS2DQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTTPS2DQMasked128load {sym} [off] ptr mask mem) +(VCVTTPS2DQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTTPS2DQMasked256load {sym} [off] ptr mask mem) +(VCVTTPS2DQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTTPS2DQMasked512load {sym} [off] ptr mask mem) +(VPACKUSDW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDW128load {sym} [off] x ptr mem) +(VPACKUSDW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDW256load {sym} [off] x ptr mem) +(VPACKUSDW512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDW512load {sym} [off] x ptr mem) +(VPACKUSDWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDWMasked128load {sym} [off] x ptr mask mem) +(VPACKUSDWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDWMasked256load {sym} [off] x ptr mask mem) +(VPACKUSDWMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPACKUSDWMasked512load {sym} [off] x ptr mask mem) +(VCVTPS2UDQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTPS2UDQ128load {sym} [off] ptr mem) +(VCVTPS2UDQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTPS2UDQ256load {sym} [off] ptr mem) +(VCVTPS2UDQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTPS2UDQ512load {sym} [off] ptr mem) +(VCVTPS2UDQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTPS2UDQMasked128load {sym} [off] ptr mask mem) +(VCVTPS2UDQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTPS2UDQMasked256load {sym} [off] ptr mask mem) +(VCVTPS2UDQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTPS2UDQMasked512load {sym} [off] ptr mask mem) +(VDIVPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VDIVPS128load {sym} [off] x ptr mem) +(VDIVPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VDIVPS256load {sym} [off] x ptr mem) +(VDIVPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VDIVPS512load {sym} [off] x ptr mem) +(VDIVPD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VDIVPD128load {sym} [off] x ptr mem) +(VDIVPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VDIVPD256load {sym} [off] x ptr mem) +(VDIVPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VDIVPD512load {sym} [off] x ptr mem) +(VDIVPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPSMasked128load {sym} [off] x ptr mask mem) +(VDIVPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPSMasked256load {sym} [off] x ptr mask mem) +(VDIVPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPSMasked512load {sym} [off] x ptr mask mem) +(VDIVPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPDMasked128load {sym} [off] x ptr mask mem) +(VDIVPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPDMasked256load {sym} [off] x ptr mask mem) +(VDIVPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VDIVPDMasked512load {sym} [off] x ptr mask mem) +(VPCMPEQD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQD128load {sym} [off] x ptr mem) +(VPCMPEQD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQD256load {sym} [off] x ptr mem) +(VPCMPEQD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQD512load {sym} [off] x ptr mem) +(VPCMPEQQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQQ128load {sym} [off] x ptr mem) +(VPCMPEQQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQQ256load {sym} [off] x ptr mem) +(VPCMPEQQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPEQQ512load {sym} [off] x ptr mem) +(VPCMPGTD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPGTD128load {sym} [off] x ptr mem) +(VPCMPGTD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPGTD256load {sym} [off] x ptr mem) +(VPCMPGTD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPGTD512load {sym} [off] x ptr mem) +(VPCMPGTQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPGTQ128load {sym} [off] x ptr mem) +(VPCMPGTQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPGTQ256load {sym} [off] x ptr mem) +(VPCMPGTQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPCMPGTQ512load {sym} [off] x ptr mem) +(VPUNPCKHDQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKHDQ128load {sym} [off] x ptr mem) +(VPUNPCKHQDQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKHQDQ128load {sym} [off] x ptr mem) +(VPUNPCKHDQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKHDQ256load {sym} [off] x ptr mem) +(VPUNPCKHDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKHDQ512load {sym} [off] x ptr mem) +(VPUNPCKHQDQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKHQDQ256load {sym} [off] x ptr mem) +(VPUNPCKHQDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKHQDQ512load {sym} [off] x ptr mem) +(VPUNPCKLDQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLDQ128load {sym} [off] x ptr mem) +(VPUNPCKLQDQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLQDQ128load {sym} [off] x ptr mem) +(VPUNPCKLDQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLDQ256load {sym} [off] x ptr mem) +(VPUNPCKLDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLDQ512load {sym} [off] x ptr mem) +(VPUNPCKLQDQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLQDQ256load {sym} [off] x ptr mem) +(VPUNPCKLQDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPUNPCKLQDQ512load {sym} [off] x ptr mem) +(VMAXPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMAXPS128load {sym} [off] x ptr mem) +(VMAXPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMAXPS256load {sym} [off] x ptr mem) +(VMAXPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMAXPS512load {sym} [off] x ptr mem) +(VMAXPD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMAXPD128load {sym} [off] x ptr mem) +(VMAXPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMAXPD256load {sym} [off] x ptr mem) +(VMAXPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMAXPD512load {sym} [off] x ptr mem) +(VPMAXSD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXSD128load {sym} [off] x ptr mem) +(VPMAXSD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXSD256load {sym} [off] x ptr mem) +(VPMAXSD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXSD512load {sym} [off] x ptr mem) +(VPMAXSQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXSQ128load {sym} [off] x ptr mem) +(VPMAXSQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXSQ256load {sym} [off] x ptr mem) +(VPMAXSQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXSQ512load {sym} [off] x ptr mem) +(VPMAXUD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXUD128load {sym} [off] x ptr mem) +(VPMAXUD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXUD256load {sym} [off] x ptr mem) +(VPMAXUD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXUD512load {sym} [off] x ptr mem) +(VPMAXUQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXUQ128load {sym} [off] x ptr mem) +(VPMAXUQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXUQ256load {sym} [off] x ptr mem) +(VPMAXUQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMAXUQ512load {sym} [off] x ptr mem) +(VMAXPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMAXPSMasked128load {sym} [off] x ptr mask mem) +(VMAXPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMAXPSMasked256load {sym} [off] x ptr mask mem) +(VMAXPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMAXPSMasked512load {sym} [off] x ptr mask mem) +(VMAXPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMAXPDMasked128load {sym} [off] x ptr mask mem) +(VMAXPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMAXPDMasked256load {sym} [off] x ptr mask mem) +(VMAXPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMAXPDMasked512load {sym} [off] x ptr mask mem) +(VPMAXSDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXSDMasked128load {sym} [off] x ptr mask mem) +(VPMAXSDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXSDMasked256load {sym} [off] x ptr mask mem) +(VPMAXSDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXSDMasked512load {sym} [off] x ptr mask mem) +(VPMAXSQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXSQMasked128load {sym} [off] x ptr mask mem) +(VPMAXSQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXSQMasked256load {sym} [off] x ptr mask mem) +(VPMAXSQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXSQMasked512load {sym} [off] x ptr mask mem) +(VPMAXUDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXUDMasked128load {sym} [off] x ptr mask mem) +(VPMAXUDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXUDMasked256load {sym} [off] x ptr mask mem) +(VPMAXUDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXUDMasked512load {sym} [off] x ptr mask mem) +(VPMAXUQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXUQMasked128load {sym} [off] x ptr mask mem) +(VPMAXUQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXUQMasked256load {sym} [off] x ptr mask mem) +(VPMAXUQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMAXUQMasked512load {sym} [off] x ptr mask mem) +(VMINPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMINPS128load {sym} [off] x ptr mem) +(VMINPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMINPS256load {sym} [off] x ptr mem) +(VMINPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMINPS512load {sym} [off] x ptr mem) +(VMINPD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMINPD128load {sym} [off] x ptr mem) +(VMINPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMINPD256load {sym} [off] x ptr mem) +(VMINPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMINPD512load {sym} [off] x ptr mem) +(VPMINSD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINSD128load {sym} [off] x ptr mem) +(VPMINSD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINSD256load {sym} [off] x ptr mem) +(VPMINSD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINSD512load {sym} [off] x ptr mem) +(VPMINSQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINSQ128load {sym} [off] x ptr mem) +(VPMINSQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINSQ256load {sym} [off] x ptr mem) +(VPMINSQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINSQ512load {sym} [off] x ptr mem) +(VPMINUD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINUD128load {sym} [off] x ptr mem) +(VPMINUD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINUD256load {sym} [off] x ptr mem) +(VPMINUD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINUD512load {sym} [off] x ptr mem) +(VPMINUQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINUQ128load {sym} [off] x ptr mem) +(VPMINUQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINUQ256load {sym} [off] x ptr mem) +(VPMINUQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMINUQ512load {sym} [off] x ptr mem) +(VMINPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMINPSMasked128load {sym} [off] x ptr mask mem) +(VMINPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMINPSMasked256load {sym} [off] x ptr mask mem) +(VMINPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMINPSMasked512load {sym} [off] x ptr mask mem) +(VMINPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMINPDMasked128load {sym} [off] x ptr mask mem) +(VMINPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMINPDMasked256load {sym} [off] x ptr mask mem) +(VMINPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMINPDMasked512load {sym} [off] x ptr mask mem) +(VPMINSDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINSDMasked128load {sym} [off] x ptr mask mem) +(VPMINSDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINSDMasked256load {sym} [off] x ptr mask mem) +(VPMINSDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINSDMasked512load {sym} [off] x ptr mask mem) +(VPMINSQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINSQMasked128load {sym} [off] x ptr mask mem) +(VPMINSQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINSQMasked256load {sym} [off] x ptr mask mem) +(VPMINSQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINSQMasked512load {sym} [off] x ptr mask mem) +(VPMINUDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINUDMasked128load {sym} [off] x ptr mask mem) +(VPMINUDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINUDMasked256load {sym} [off] x ptr mask mem) +(VPMINUDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINUDMasked512load {sym} [off] x ptr mask mem) +(VPMINUQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINUQMasked128load {sym} [off] x ptr mask mem) +(VPMINUQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINUQMasked256load {sym} [off] x ptr mask mem) +(VPMINUQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMINUQMasked512load {sym} [off] x ptr mask mem) +(VMULPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMULPS128load {sym} [off] x ptr mem) +(VMULPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMULPS256load {sym} [off] x ptr mem) +(VMULPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMULPS512load {sym} [off] x ptr mem) +(VMULPD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMULPD128load {sym} [off] x ptr mem) +(VMULPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMULPD256load {sym} [off] x ptr mem) +(VMULPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VMULPD512load {sym} [off] x ptr mem) +(VPMULLD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULLD128load {sym} [off] x ptr mem) +(VPMULLD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULLD256load {sym} [off] x ptr mem) +(VPMULLD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULLD512load {sym} [off] x ptr mem) +(VPMULLQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULLQ128load {sym} [off] x ptr mem) +(VPMULLQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULLQ256load {sym} [off] x ptr mem) +(VPMULLQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULLQ512load {sym} [off] x ptr mem) +(VFMADD213PS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PS128load {sym} [off] x y ptr mem) +(VFMADD213PS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PS256load {sym} [off] x y ptr mem) +(VFMADD213PS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PS512load {sym} [off] x y ptr mem) +(VFMADD213PD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PD128load {sym} [off] x y ptr mem) +(VFMADD213PD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PD256load {sym} [off] x y ptr mem) +(VFMADD213PD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PD512load {sym} [off] x y ptr mem) +(VFMADD213PSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PSMasked128load {sym} [off] x y ptr mask mem) +(VFMADD213PSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PSMasked256load {sym} [off] x y ptr mask mem) +(VFMADD213PSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PSMasked512load {sym} [off] x y ptr mask mem) +(VFMADD213PDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PDMasked128load {sym} [off] x y ptr mask mem) +(VFMADD213PDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PDMasked256load {sym} [off] x y ptr mask mem) +(VFMADD213PDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADD213PDMasked512load {sym} [off] x y ptr mask mem) +(VFMADDSUB213PS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PS128load {sym} [off] x y ptr mem) +(VFMADDSUB213PS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PS256load {sym} [off] x y ptr mem) +(VFMADDSUB213PS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PS512load {sym} [off] x y ptr mem) +(VFMADDSUB213PD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PD128load {sym} [off] x y ptr mem) +(VFMADDSUB213PD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PD256load {sym} [off] x y ptr mem) +(VFMADDSUB213PD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PD512load {sym} [off] x y ptr mem) +(VFMADDSUB213PSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PSMasked128load {sym} [off] x y ptr mask mem) +(VFMADDSUB213PSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PSMasked256load {sym} [off] x y ptr mask mem) +(VFMADDSUB213PSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PSMasked512load {sym} [off] x y ptr mask mem) +(VFMADDSUB213PDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PDMasked128load {sym} [off] x y ptr mask mem) +(VFMADDSUB213PDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PDMasked256load {sym} [off] x y ptr mask mem) +(VFMADDSUB213PDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMADDSUB213PDMasked512load {sym} [off] x y ptr mask mem) +(VPMULDQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULDQ128load {sym} [off] x ptr mem) +(VPMULDQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULDQ256load {sym} [off] x ptr mem) +(VPMULUDQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULUDQ128load {sym} [off] x ptr mem) +(VPMULUDQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPMULUDQ256load {sym} [off] x ptr mem) +(VMULPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMULPSMasked128load {sym} [off] x ptr mask mem) +(VMULPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMULPSMasked256load {sym} [off] x ptr mask mem) +(VMULPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMULPSMasked512load {sym} [off] x ptr mask mem) +(VMULPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMULPDMasked128load {sym} [off] x ptr mask mem) +(VMULPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMULPDMasked256load {sym} [off] x ptr mask mem) +(VMULPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VMULPDMasked512load {sym} [off] x ptr mask mem) +(VPMULLDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMULLDMasked128load {sym} [off] x ptr mask mem) +(VPMULLDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMULLDMasked256load {sym} [off] x ptr mask mem) +(VPMULLDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMULLDMasked512load {sym} [off] x ptr mask mem) +(VPMULLQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMULLQMasked128load {sym} [off] x ptr mask mem) +(VPMULLQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMULLQMasked256load {sym} [off] x ptr mask mem) +(VPMULLQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPMULLQMasked512load {sym} [off] x ptr mask mem) +(VFMSUBADD213PS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PS128load {sym} [off] x y ptr mem) +(VFMSUBADD213PS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PS256load {sym} [off] x y ptr mem) +(VFMSUBADD213PS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PS512load {sym} [off] x y ptr mem) +(VFMSUBADD213PD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PD128load {sym} [off] x y ptr mem) +(VFMSUBADD213PD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PD256load {sym} [off] x y ptr mem) +(VFMSUBADD213PD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PD512load {sym} [off] x y ptr mem) +(VFMSUBADD213PSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PSMasked128load {sym} [off] x y ptr mask mem) +(VFMSUBADD213PSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PSMasked256load {sym} [off] x y ptr mask mem) +(VFMSUBADD213PSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PSMasked512load {sym} [off] x y ptr mask mem) +(VFMSUBADD213PDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PDMasked128load {sym} [off] x y ptr mask mem) +(VFMSUBADD213PDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PDMasked256load {sym} [off] x y ptr mask mem) +(VFMSUBADD213PDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VFMSUBADD213PDMasked512load {sym} [off] x y ptr mask mem) +(VPOPCNTD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTD128load {sym} [off] ptr mem) +(VPOPCNTD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTD256load {sym} [off] ptr mem) +(VPOPCNTD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTD512load {sym} [off] ptr mem) +(VPOPCNTQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTQ128load {sym} [off] ptr mem) +(VPOPCNTQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTQ256load {sym} [off] ptr mem) +(VPOPCNTQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTQ512load {sym} [off] ptr mem) +(VPOPCNTDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTDMasked128load {sym} [off] ptr mask mem) +(VPOPCNTDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTDMasked256load {sym} [off] ptr mask mem) +(VPOPCNTDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTDMasked512load {sym} [off] ptr mask mem) +(VPOPCNTQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTQMasked128load {sym} [off] ptr mask mem) +(VPOPCNTQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTQMasked256load {sym} [off] ptr mask mem) +(VPOPCNTQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPOPCNTQMasked512load {sym} [off] ptr mask mem) +(VPORD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPORD512load {sym} [off] x ptr mem) +(VPORQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPORQ512load {sym} [off] x ptr mem) +(VPORDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPORDMasked128load {sym} [off] x ptr mask mem) +(VPORDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPORDMasked256load {sym} [off] x ptr mask mem) +(VPORDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPORDMasked512load {sym} [off] x ptr mask mem) +(VPORQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPORQMasked128load {sym} [off] x ptr mask mem) +(VPORQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPORQMasked256load {sym} [off] x ptr mask mem) +(VPORQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPORQMasked512load {sym} [off] x ptr mask mem) +(VPERMPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMPS256load {sym} [off] x ptr mem) +(VPERMD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMD256load {sym} [off] x ptr mem) +(VPERMPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMPS512load {sym} [off] x ptr mem) +(VPERMD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMD512load {sym} [off] x ptr mem) +(VPERMPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMPD256load {sym} [off] x ptr mem) +(VPERMQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMQ256load {sym} [off] x ptr mem) +(VPERMPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMPD512load {sym} [off] x ptr mem) +(VPERMQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMQ512load {sym} [off] x ptr mem) +(VPERMI2PS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PS128load {sym} [off] x y ptr mem) +(VPERMI2D128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2D128load {sym} [off] x y ptr mem) +(VPERMI2PS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PS256load {sym} [off] x y ptr mem) +(VPERMI2D256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2D256load {sym} [off] x y ptr mem) +(VPERMI2PS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PS512load {sym} [off] x y ptr mem) +(VPERMI2D512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2D512load {sym} [off] x y ptr mem) +(VPERMI2PD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PD128load {sym} [off] x y ptr mem) +(VPERMI2Q128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2Q128load {sym} [off] x y ptr mem) +(VPERMI2PD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PD256load {sym} [off] x y ptr mem) +(VPERMI2Q256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2Q256load {sym} [off] x y ptr mem) +(VPERMI2PD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PD512load {sym} [off] x y ptr mem) +(VPERMI2Q512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPERMI2Q512load {sym} [off] x y ptr mem) +(VPERMI2PSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PSMasked128load {sym} [off] x y ptr mask mem) +(VPERMI2DMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2DMasked128load {sym} [off] x y ptr mask mem) +(VPERMI2PSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PSMasked256load {sym} [off] x y ptr mask mem) +(VPERMI2DMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2DMasked256load {sym} [off] x y ptr mask mem) +(VPERMI2PSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PSMasked512load {sym} [off] x y ptr mask mem) +(VPERMI2DMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2DMasked512load {sym} [off] x y ptr mask mem) +(VPERMI2PDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PDMasked128load {sym} [off] x y ptr mask mem) +(VPERMI2QMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2QMasked128load {sym} [off] x y ptr mask mem) +(VPERMI2PDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PDMasked256load {sym} [off] x y ptr mask mem) +(VPERMI2QMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2QMasked256load {sym} [off] x y ptr mask mem) +(VPERMI2PDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PDMasked512load {sym} [off] x y ptr mask mem) +(VPERMI2QMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2QMasked512load {sym} [off] x y ptr mask mem) +(VPERMPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMPSMasked256load {sym} [off] x ptr mask mem) +(VPERMDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMDMasked256load {sym} [off] x ptr mask mem) +(VPERMPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMPSMasked512load {sym} [off] x ptr mask mem) +(VPERMDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMDMasked512load {sym} [off] x ptr mask mem) +(VPERMPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMPDMasked256load {sym} [off] x ptr mask mem) +(VPERMQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMQMasked256load {sym} [off] x ptr mask mem) +(VPERMPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMPDMasked512load {sym} [off] x ptr mask mem) +(VPERMQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMQMasked512load {sym} [off] x ptr mask mem) +(VRCP14PS512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRCP14PS512load {sym} [off] ptr mem) +(VRCP14PD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRCP14PD128load {sym} [off] ptr mem) +(VRCP14PD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRCP14PD256load {sym} [off] ptr mem) +(VRCP14PD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRCP14PD512load {sym} [off] ptr mem) +(VRCP14PSMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRCP14PSMasked128load {sym} [off] ptr mask mem) +(VRCP14PSMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRCP14PSMasked256load {sym} [off] ptr mask mem) +(VRCP14PSMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRCP14PSMasked512load {sym} [off] ptr mask mem) +(VRCP14PDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRCP14PDMasked128load {sym} [off] ptr mask mem) +(VRCP14PDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRCP14PDMasked256load {sym} [off] ptr mask mem) +(VRCP14PDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRCP14PDMasked512load {sym} [off] ptr mask mem) +(VRSQRT14PS512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRSQRT14PS512load {sym} [off] ptr mem) +(VRSQRT14PD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRSQRT14PD128load {sym} [off] ptr mem) +(VRSQRT14PD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRSQRT14PD256load {sym} [off] ptr mem) +(VRSQRT14PD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VRSQRT14PD512load {sym} [off] ptr mem) +(VRSQRT14PSMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRSQRT14PSMasked128load {sym} [off] ptr mask mem) +(VRSQRT14PSMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRSQRT14PSMasked256load {sym} [off] ptr mask mem) +(VRSQRT14PSMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRSQRT14PSMasked512load {sym} [off] ptr mask mem) +(VRSQRT14PDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRSQRT14PDMasked128load {sym} [off] ptr mask mem) +(VRSQRT14PDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRSQRT14PDMasked256load {sym} [off] ptr mask mem) +(VRSQRT14PDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VRSQRT14PDMasked512load {sym} [off] ptr mask mem) +(VPROLVD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLVD128load {sym} [off] x ptr mem) +(VPROLVD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLVD256load {sym} [off] x ptr mem) +(VPROLVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLVD512load {sym} [off] x ptr mem) +(VPROLVQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLVQ128load {sym} [off] x ptr mem) +(VPROLVQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLVQ256load {sym} [off] x ptr mem) +(VPROLVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPROLVQ512load {sym} [off] x ptr mem) +(VPROLVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLVDMasked128load {sym} [off] x ptr mask mem) +(VPROLVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLVDMasked256load {sym} [off] x ptr mask mem) +(VPROLVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLVDMasked512load {sym} [off] x ptr mask mem) +(VPROLVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLVQMasked128load {sym} [off] x ptr mask mem) +(VPROLVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLVQMasked256load {sym} [off] x ptr mask mem) +(VPROLVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPROLVQMasked512load {sym} [off] x ptr mask mem) +(VPRORVD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORVD128load {sym} [off] x ptr mem) +(VPRORVD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORVD256load {sym} [off] x ptr mem) +(VPRORVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORVD512load {sym} [off] x ptr mem) +(VPRORVQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORVQ128load {sym} [off] x ptr mem) +(VPRORVQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORVQ256load {sym} [off] x ptr mem) +(VPRORVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPRORVQ512load {sym} [off] x ptr mem) +(VPRORVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORVDMasked128load {sym} [off] x ptr mask mem) +(VPRORVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORVDMasked256load {sym} [off] x ptr mask mem) +(VPRORVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORVDMasked512load {sym} [off] x ptr mask mem) +(VPRORVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORVQMasked128load {sym} [off] x ptr mask mem) +(VPRORVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORVQMasked256load {sym} [off] x ptr mask mem) +(VPRORVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPRORVQMasked512load {sym} [off] x ptr mask mem) +(VSCALEFPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPS128load {sym} [off] x ptr mem) +(VSCALEFPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPS256load {sym} [off] x ptr mem) +(VSCALEFPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPS512load {sym} [off] x ptr mem) +(VSCALEFPD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPD128load {sym} [off] x ptr mem) +(VSCALEFPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPD256load {sym} [off] x ptr mem) +(VSCALEFPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPD512load {sym} [off] x ptr mem) +(VSCALEFPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPSMasked128load {sym} [off] x ptr mask mem) +(VSCALEFPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPSMasked256load {sym} [off] x ptr mask mem) +(VSCALEFPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPSMasked512load {sym} [off] x ptr mask mem) +(VSCALEFPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPDMasked128load {sym} [off] x ptr mask mem) +(VSCALEFPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPDMasked256load {sym} [off] x ptr mask mem) +(VSCALEFPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSCALEFPDMasked512load {sym} [off] x ptr mask mem) +(VPSLLVD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLVD128load {sym} [off] x ptr mem) +(VPSLLVD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLVD256load {sym} [off] x ptr mem) +(VPSLLVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLVD512load {sym} [off] x ptr mem) +(VPSLLVQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLVQ128load {sym} [off] x ptr mem) +(VPSLLVQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLVQ256load {sym} [off] x ptr mem) +(VPSLLVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSLLVQ512load {sym} [off] x ptr mem) +(VPSHLDVD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVD128load {sym} [off] x y ptr mem) +(VPSHLDVD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVD256load {sym} [off] x y ptr mem) +(VPSHLDVD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVD512load {sym} [off] x y ptr mem) +(VPSHLDVQ128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVQ128load {sym} [off] x y ptr mem) +(VPSHLDVQ256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVQ256load {sym} [off] x y ptr mem) +(VPSHLDVQ512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVQ512load {sym} [off] x y ptr mem) +(VPSHLDVDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVDMasked128load {sym} [off] x y ptr mask mem) +(VPSHLDVDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVDMasked256load {sym} [off] x y ptr mask mem) +(VPSHLDVDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVDMasked512load {sym} [off] x y ptr mask mem) +(VPSHLDVQMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVQMasked128load {sym} [off] x y ptr mask mem) +(VPSHLDVQMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVQMasked256load {sym} [off] x y ptr mask mem) +(VPSHLDVQMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHLDVQMasked512load {sym} [off] x y ptr mask mem) +(VPSLLVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLVDMasked128load {sym} [off] x ptr mask mem) +(VPSLLVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLVDMasked256load {sym} [off] x ptr mask mem) +(VPSLLVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLVDMasked512load {sym} [off] x ptr mask mem) +(VPSLLVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLVQMasked128load {sym} [off] x ptr mask mem) +(VPSLLVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLVQMasked256load {sym} [off] x ptr mask mem) +(VPSLLVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSLLVQMasked512load {sym} [off] x ptr mask mem) +(VPSRAVD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAVD128load {sym} [off] x ptr mem) +(VPSRAVD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAVD256load {sym} [off] x ptr mem) +(VPSRAVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAVD512load {sym} [off] x ptr mem) +(VPSRAVQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAVQ128load {sym} [off] x ptr mem) +(VPSRAVQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAVQ256load {sym} [off] x ptr mem) +(VPSRAVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRAVQ512load {sym} [off] x ptr mem) +(VPSRLVD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRLVD128load {sym} [off] x ptr mem) +(VPSRLVD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRLVD256load {sym} [off] x ptr mem) +(VPSRLVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRLVD512load {sym} [off] x ptr mem) +(VPSRLVQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRLVQ128load {sym} [off] x ptr mem) +(VPSRLVQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRLVQ256load {sym} [off] x ptr mem) +(VPSRLVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSRLVQ512load {sym} [off] x ptr mem) +(VPSHRDVD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVD128load {sym} [off] x y ptr mem) +(VPSHRDVD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVD256load {sym} [off] x y ptr mem) +(VPSHRDVD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVD512load {sym} [off] x y ptr mem) +(VPSHRDVQ128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVQ128load {sym} [off] x y ptr mem) +(VPSHRDVQ256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVQ256load {sym} [off] x y ptr mem) +(VPSHRDVQ512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVQ512load {sym} [off] x y ptr mem) +(VPSHRDVDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVDMasked128load {sym} [off] x y ptr mask mem) +(VPSHRDVDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVDMasked256load {sym} [off] x y ptr mask mem) +(VPSHRDVDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVDMasked512load {sym} [off] x y ptr mask mem) +(VPSHRDVQMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVQMasked128load {sym} [off] x y ptr mask mem) +(VPSHRDVQMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVQMasked256load {sym} [off] x y ptr mask mem) +(VPSHRDVQMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSHRDVQMasked512load {sym} [off] x y ptr mask mem) +(VPSRAVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAVDMasked128load {sym} [off] x ptr mask mem) +(VPSRAVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAVDMasked256load {sym} [off] x ptr mask mem) +(VPSRAVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAVDMasked512load {sym} [off] x ptr mask mem) +(VPSRAVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAVQMasked128load {sym} [off] x ptr mask mem) +(VPSRAVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAVQMasked256load {sym} [off] x ptr mask mem) +(VPSRAVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRAVQMasked512load {sym} [off] x ptr mask mem) +(VPSRLVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLVDMasked128load {sym} [off] x ptr mask mem) +(VPSRLVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLVDMasked256load {sym} [off] x ptr mask mem) +(VPSRLVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLVDMasked512load {sym} [off] x ptr mask mem) +(VPSRLVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLVQMasked128load {sym} [off] x ptr mask mem) +(VPSRLVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLVQMasked256load {sym} [off] x ptr mask mem) +(VPSRLVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSRLVQMasked512load {sym} [off] x ptr mask mem) +(VSQRTPS128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSQRTPS128load {sym} [off] ptr mem) +(VSQRTPS256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSQRTPS256load {sym} [off] ptr mem) +(VSQRTPS512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSQRTPS512load {sym} [off] ptr mem) +(VSQRTPD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSQRTPD128load {sym} [off] ptr mem) +(VSQRTPD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSQRTPD256load {sym} [off] ptr mem) +(VSQRTPD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSQRTPD512load {sym} [off] ptr mem) +(VSQRTPSMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSQRTPSMasked128load {sym} [off] ptr mask mem) +(VSQRTPSMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSQRTPSMasked256load {sym} [off] ptr mask mem) +(VSQRTPSMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSQRTPSMasked512load {sym} [off] ptr mask mem) +(VSQRTPDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSQRTPDMasked128load {sym} [off] ptr mask mem) +(VSQRTPDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSQRTPDMasked256load {sym} [off] ptr mask mem) +(VSQRTPDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSQRTPDMasked512load {sym} [off] ptr mask mem) +(VSUBPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSUBPS128load {sym} [off] x ptr mem) +(VSUBPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSUBPS256load {sym} [off] x ptr mem) +(VSUBPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSUBPS512load {sym} [off] x ptr mem) +(VSUBPD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSUBPD128load {sym} [off] x ptr mem) +(VSUBPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSUBPD256load {sym} [off] x ptr mem) +(VSUBPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VSUBPD512load {sym} [off] x ptr mem) +(VPSUBD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSUBD128load {sym} [off] x ptr mem) +(VPSUBD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSUBD256load {sym} [off] x ptr mem) +(VPSUBD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSUBD512load {sym} [off] x ptr mem) +(VPSUBQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSUBQ128load {sym} [off] x ptr mem) +(VPSUBQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSUBQ256load {sym} [off] x ptr mem) +(VPSUBQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPSUBQ512load {sym} [off] x ptr mem) +(VSUBPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSUBPSMasked128load {sym} [off] x ptr mask mem) +(VSUBPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSUBPSMasked256load {sym} [off] x ptr mask mem) +(VSUBPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSUBPSMasked512load {sym} [off] x ptr mask mem) +(VSUBPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSUBPDMasked128load {sym} [off] x ptr mask mem) +(VSUBPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSUBPDMasked256load {sym} [off] x ptr mask mem) +(VSUBPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VSUBPDMasked512load {sym} [off] x ptr mask mem) +(VPSUBDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSUBDMasked128load {sym} [off] x ptr mask mem) +(VPSUBDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSUBDMasked256load {sym} [off] x ptr mask mem) +(VPSUBDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSUBDMasked512load {sym} [off] x ptr mask mem) +(VPSUBQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSUBQMasked128load {sym} [off] x ptr mask mem) +(VPSUBQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSUBQMasked256load {sym} [off] x ptr mask mem) +(VPSUBQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPSUBQMasked512load {sym} [off] x ptr mask mem) +(VPXORD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPXORD512load {sym} [off] x ptr mem) +(VPXORQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPXORQ512load {sym} [off] x ptr mem) +(VPXORDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPXORDMasked128load {sym} [off] x ptr mask mem) +(VPXORDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPXORDMasked256load {sym} [off] x ptr mask mem) +(VPXORDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPXORDMasked512load {sym} [off] x ptr mask mem) +(VPXORQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPXORQMasked128load {sym} [off] x ptr mask mem) +(VPXORQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPXORQMasked256load {sym} [off] x ptr mask mem) +(VPXORQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPXORQMasked512load {sym} [off] x ptr mask mem) +(VPBLENDMDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPBLENDMDMasked512load {sym} [off] x ptr mask mem) +(VPBLENDMQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPBLENDMQMasked512load {sym} [off] x ptr mask mem) diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 06cafc8e6d..737b0c4762 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -507,6 +507,198 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64TESTW(v) case OpAMD64TESTWconst: return rewriteValueAMD64_OpAMD64TESTWconst(v) + case OpAMD64VADDPD128: + return rewriteValueAMD64_OpAMD64VADDPD128(v) + case OpAMD64VADDPD256: + return rewriteValueAMD64_OpAMD64VADDPD256(v) + case OpAMD64VADDPD512: + return rewriteValueAMD64_OpAMD64VADDPD512(v) + case OpAMD64VADDPDMasked128: + return rewriteValueAMD64_OpAMD64VADDPDMasked128(v) + case OpAMD64VADDPDMasked256: + return rewriteValueAMD64_OpAMD64VADDPDMasked256(v) + case OpAMD64VADDPDMasked512: + return rewriteValueAMD64_OpAMD64VADDPDMasked512(v) + case OpAMD64VADDPS128: + return rewriteValueAMD64_OpAMD64VADDPS128(v) + case OpAMD64VADDPS256: + return rewriteValueAMD64_OpAMD64VADDPS256(v) + case OpAMD64VADDPS512: + return rewriteValueAMD64_OpAMD64VADDPS512(v) + case OpAMD64VADDPSMasked128: + return rewriteValueAMD64_OpAMD64VADDPSMasked128(v) + case OpAMD64VADDPSMasked256: + return rewriteValueAMD64_OpAMD64VADDPSMasked256(v) + case OpAMD64VADDPSMasked512: + return rewriteValueAMD64_OpAMD64VADDPSMasked512(v) + case OpAMD64VCVTPS2UDQ128: + return rewriteValueAMD64_OpAMD64VCVTPS2UDQ128(v) + case OpAMD64VCVTPS2UDQ256: + return rewriteValueAMD64_OpAMD64VCVTPS2UDQ256(v) + case OpAMD64VCVTPS2UDQ512: + return rewriteValueAMD64_OpAMD64VCVTPS2UDQ512(v) + case OpAMD64VCVTPS2UDQMasked128: + return rewriteValueAMD64_OpAMD64VCVTPS2UDQMasked128(v) + case OpAMD64VCVTPS2UDQMasked256: + return rewriteValueAMD64_OpAMD64VCVTPS2UDQMasked256(v) + case OpAMD64VCVTPS2UDQMasked512: + return rewriteValueAMD64_OpAMD64VCVTPS2UDQMasked512(v) + case OpAMD64VCVTTPS2DQ128: + return rewriteValueAMD64_OpAMD64VCVTTPS2DQ128(v) + case OpAMD64VCVTTPS2DQ256: + return rewriteValueAMD64_OpAMD64VCVTTPS2DQ256(v) + case OpAMD64VCVTTPS2DQ512: + return rewriteValueAMD64_OpAMD64VCVTTPS2DQ512(v) + case OpAMD64VCVTTPS2DQMasked128: + return rewriteValueAMD64_OpAMD64VCVTTPS2DQMasked128(v) + case OpAMD64VCVTTPS2DQMasked256: + return rewriteValueAMD64_OpAMD64VCVTTPS2DQMasked256(v) + case OpAMD64VCVTTPS2DQMasked512: + return rewriteValueAMD64_OpAMD64VCVTTPS2DQMasked512(v) + case OpAMD64VDIVPD128: + return rewriteValueAMD64_OpAMD64VDIVPD128(v) + case OpAMD64VDIVPD256: + return rewriteValueAMD64_OpAMD64VDIVPD256(v) + case OpAMD64VDIVPD512: + return rewriteValueAMD64_OpAMD64VDIVPD512(v) + case OpAMD64VDIVPDMasked128: + return rewriteValueAMD64_OpAMD64VDIVPDMasked128(v) + case OpAMD64VDIVPDMasked256: + return rewriteValueAMD64_OpAMD64VDIVPDMasked256(v) + case OpAMD64VDIVPDMasked512: + return rewriteValueAMD64_OpAMD64VDIVPDMasked512(v) + case OpAMD64VDIVPS128: + return rewriteValueAMD64_OpAMD64VDIVPS128(v) + case OpAMD64VDIVPS256: + return rewriteValueAMD64_OpAMD64VDIVPS256(v) + case OpAMD64VDIVPS512: + return rewriteValueAMD64_OpAMD64VDIVPS512(v) + case OpAMD64VDIVPSMasked128: + return rewriteValueAMD64_OpAMD64VDIVPSMasked128(v) + case OpAMD64VDIVPSMasked256: + return rewriteValueAMD64_OpAMD64VDIVPSMasked256(v) + case OpAMD64VDIVPSMasked512: + return rewriteValueAMD64_OpAMD64VDIVPSMasked512(v) + case OpAMD64VFMADD213PD128: + return rewriteValueAMD64_OpAMD64VFMADD213PD128(v) + case OpAMD64VFMADD213PD256: + return rewriteValueAMD64_OpAMD64VFMADD213PD256(v) + case OpAMD64VFMADD213PD512: + return rewriteValueAMD64_OpAMD64VFMADD213PD512(v) + case OpAMD64VFMADD213PDMasked128: + return rewriteValueAMD64_OpAMD64VFMADD213PDMasked128(v) + case OpAMD64VFMADD213PDMasked256: + return rewriteValueAMD64_OpAMD64VFMADD213PDMasked256(v) + case OpAMD64VFMADD213PDMasked512: + return rewriteValueAMD64_OpAMD64VFMADD213PDMasked512(v) + case OpAMD64VFMADD213PS128: + return rewriteValueAMD64_OpAMD64VFMADD213PS128(v) + case OpAMD64VFMADD213PS256: + return rewriteValueAMD64_OpAMD64VFMADD213PS256(v) + case OpAMD64VFMADD213PS512: + return rewriteValueAMD64_OpAMD64VFMADD213PS512(v) + case OpAMD64VFMADD213PSMasked128: + return rewriteValueAMD64_OpAMD64VFMADD213PSMasked128(v) + case OpAMD64VFMADD213PSMasked256: + return rewriteValueAMD64_OpAMD64VFMADD213PSMasked256(v) + case OpAMD64VFMADD213PSMasked512: + return rewriteValueAMD64_OpAMD64VFMADD213PSMasked512(v) + case OpAMD64VFMADDSUB213PD128: + return rewriteValueAMD64_OpAMD64VFMADDSUB213PD128(v) + case OpAMD64VFMADDSUB213PD256: + return rewriteValueAMD64_OpAMD64VFMADDSUB213PD256(v) + case OpAMD64VFMADDSUB213PD512: + return rewriteValueAMD64_OpAMD64VFMADDSUB213PD512(v) + case OpAMD64VFMADDSUB213PDMasked128: + return rewriteValueAMD64_OpAMD64VFMADDSUB213PDMasked128(v) + case OpAMD64VFMADDSUB213PDMasked256: + return rewriteValueAMD64_OpAMD64VFMADDSUB213PDMasked256(v) + case OpAMD64VFMADDSUB213PDMasked512: + return rewriteValueAMD64_OpAMD64VFMADDSUB213PDMasked512(v) + case OpAMD64VFMADDSUB213PS128: + return rewriteValueAMD64_OpAMD64VFMADDSUB213PS128(v) + case OpAMD64VFMADDSUB213PS256: + return rewriteValueAMD64_OpAMD64VFMADDSUB213PS256(v) + case OpAMD64VFMADDSUB213PS512: + return rewriteValueAMD64_OpAMD64VFMADDSUB213PS512(v) + case OpAMD64VFMADDSUB213PSMasked128: + return rewriteValueAMD64_OpAMD64VFMADDSUB213PSMasked128(v) + case OpAMD64VFMADDSUB213PSMasked256: + return rewriteValueAMD64_OpAMD64VFMADDSUB213PSMasked256(v) + case OpAMD64VFMADDSUB213PSMasked512: + return rewriteValueAMD64_OpAMD64VFMADDSUB213PSMasked512(v) + case OpAMD64VFMSUBADD213PD128: + return rewriteValueAMD64_OpAMD64VFMSUBADD213PD128(v) + case OpAMD64VFMSUBADD213PD256: + return rewriteValueAMD64_OpAMD64VFMSUBADD213PD256(v) + case OpAMD64VFMSUBADD213PD512: + return rewriteValueAMD64_OpAMD64VFMSUBADD213PD512(v) + case OpAMD64VFMSUBADD213PDMasked128: + return rewriteValueAMD64_OpAMD64VFMSUBADD213PDMasked128(v) + case OpAMD64VFMSUBADD213PDMasked256: + return rewriteValueAMD64_OpAMD64VFMSUBADD213PDMasked256(v) + case OpAMD64VFMSUBADD213PDMasked512: + return rewriteValueAMD64_OpAMD64VFMSUBADD213PDMasked512(v) + case OpAMD64VFMSUBADD213PS128: + return rewriteValueAMD64_OpAMD64VFMSUBADD213PS128(v) + case OpAMD64VFMSUBADD213PS256: + return rewriteValueAMD64_OpAMD64VFMSUBADD213PS256(v) + case OpAMD64VFMSUBADD213PS512: + return rewriteValueAMD64_OpAMD64VFMSUBADD213PS512(v) + case OpAMD64VFMSUBADD213PSMasked128: + return rewriteValueAMD64_OpAMD64VFMSUBADD213PSMasked128(v) + case OpAMD64VFMSUBADD213PSMasked256: + return rewriteValueAMD64_OpAMD64VFMSUBADD213PSMasked256(v) + case OpAMD64VFMSUBADD213PSMasked512: + return rewriteValueAMD64_OpAMD64VFMSUBADD213PSMasked512(v) + case OpAMD64VMAXPD128: + return rewriteValueAMD64_OpAMD64VMAXPD128(v) + case OpAMD64VMAXPD256: + return rewriteValueAMD64_OpAMD64VMAXPD256(v) + case OpAMD64VMAXPD512: + return rewriteValueAMD64_OpAMD64VMAXPD512(v) + case OpAMD64VMAXPDMasked128: + return rewriteValueAMD64_OpAMD64VMAXPDMasked128(v) + case OpAMD64VMAXPDMasked256: + return rewriteValueAMD64_OpAMD64VMAXPDMasked256(v) + case OpAMD64VMAXPDMasked512: + return rewriteValueAMD64_OpAMD64VMAXPDMasked512(v) + case OpAMD64VMAXPS128: + return rewriteValueAMD64_OpAMD64VMAXPS128(v) + case OpAMD64VMAXPS256: + return rewriteValueAMD64_OpAMD64VMAXPS256(v) + case OpAMD64VMAXPS512: + return rewriteValueAMD64_OpAMD64VMAXPS512(v) + case OpAMD64VMAXPSMasked128: + return rewriteValueAMD64_OpAMD64VMAXPSMasked128(v) + case OpAMD64VMAXPSMasked256: + return rewriteValueAMD64_OpAMD64VMAXPSMasked256(v) + case OpAMD64VMAXPSMasked512: + return rewriteValueAMD64_OpAMD64VMAXPSMasked512(v) + case OpAMD64VMINPD128: + return rewriteValueAMD64_OpAMD64VMINPD128(v) + case OpAMD64VMINPD256: + return rewriteValueAMD64_OpAMD64VMINPD256(v) + case OpAMD64VMINPD512: + return rewriteValueAMD64_OpAMD64VMINPD512(v) + case OpAMD64VMINPDMasked128: + return rewriteValueAMD64_OpAMD64VMINPDMasked128(v) + case OpAMD64VMINPDMasked256: + return rewriteValueAMD64_OpAMD64VMINPDMasked256(v) + case OpAMD64VMINPDMasked512: + return rewriteValueAMD64_OpAMD64VMINPDMasked512(v) + case OpAMD64VMINPS128: + return rewriteValueAMD64_OpAMD64VMINPS128(v) + case OpAMD64VMINPS256: + return rewriteValueAMD64_OpAMD64VMINPS256(v) + case OpAMD64VMINPS512: + return rewriteValueAMD64_OpAMD64VMINPS512(v) + case OpAMD64VMINPSMasked128: + return rewriteValueAMD64_OpAMD64VMINPSMasked128(v) + case OpAMD64VMINPSMasked256: + return rewriteValueAMD64_OpAMD64VMINPSMasked256(v) + case OpAMD64VMINPSMasked512: + return rewriteValueAMD64_OpAMD64VMINPSMasked512(v) case OpAMD64VMOVD: return rewriteValueAMD64_OpAMD64VMOVD(v) case OpAMD64VMOVDQU16Masked512: @@ -523,8 +715,138 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64VMOVSDf2v(v) case OpAMD64VMOVSSf2v: return rewriteValueAMD64_OpAMD64VMOVSSf2v(v) + case OpAMD64VMULPD128: + return rewriteValueAMD64_OpAMD64VMULPD128(v) + case OpAMD64VMULPD256: + return rewriteValueAMD64_OpAMD64VMULPD256(v) + case OpAMD64VMULPD512: + return rewriteValueAMD64_OpAMD64VMULPD512(v) + case OpAMD64VMULPDMasked128: + return rewriteValueAMD64_OpAMD64VMULPDMasked128(v) + case OpAMD64VMULPDMasked256: + return rewriteValueAMD64_OpAMD64VMULPDMasked256(v) + case OpAMD64VMULPDMasked512: + return rewriteValueAMD64_OpAMD64VMULPDMasked512(v) + case OpAMD64VMULPS128: + return rewriteValueAMD64_OpAMD64VMULPS128(v) + case OpAMD64VMULPS256: + return rewriteValueAMD64_OpAMD64VMULPS256(v) + case OpAMD64VMULPS512: + return rewriteValueAMD64_OpAMD64VMULPS512(v) + case OpAMD64VMULPSMasked128: + return rewriteValueAMD64_OpAMD64VMULPSMasked128(v) + case OpAMD64VMULPSMasked256: + return rewriteValueAMD64_OpAMD64VMULPSMasked256(v) + case OpAMD64VMULPSMasked512: + return rewriteValueAMD64_OpAMD64VMULPSMasked512(v) + case OpAMD64VPABSD128: + return rewriteValueAMD64_OpAMD64VPABSD128(v) + case OpAMD64VPABSD256: + return rewriteValueAMD64_OpAMD64VPABSD256(v) + case OpAMD64VPABSD512: + return rewriteValueAMD64_OpAMD64VPABSD512(v) + case OpAMD64VPABSDMasked128: + return rewriteValueAMD64_OpAMD64VPABSDMasked128(v) + case OpAMD64VPABSDMasked256: + return rewriteValueAMD64_OpAMD64VPABSDMasked256(v) + case OpAMD64VPABSDMasked512: + return rewriteValueAMD64_OpAMD64VPABSDMasked512(v) + case OpAMD64VPABSQ128: + return rewriteValueAMD64_OpAMD64VPABSQ128(v) + case OpAMD64VPABSQ256: + return rewriteValueAMD64_OpAMD64VPABSQ256(v) + case OpAMD64VPABSQ512: + return rewriteValueAMD64_OpAMD64VPABSQ512(v) + case OpAMD64VPABSQMasked128: + return rewriteValueAMD64_OpAMD64VPABSQMasked128(v) + case OpAMD64VPABSQMasked256: + return rewriteValueAMD64_OpAMD64VPABSQMasked256(v) + case OpAMD64VPABSQMasked512: + return rewriteValueAMD64_OpAMD64VPABSQMasked512(v) + case OpAMD64VPACKSSDW128: + return rewriteValueAMD64_OpAMD64VPACKSSDW128(v) + case OpAMD64VPACKSSDW256: + return rewriteValueAMD64_OpAMD64VPACKSSDW256(v) + case OpAMD64VPACKSSDW512: + return rewriteValueAMD64_OpAMD64VPACKSSDW512(v) + case OpAMD64VPACKSSDWMasked128: + return rewriteValueAMD64_OpAMD64VPACKSSDWMasked128(v) + case OpAMD64VPACKSSDWMasked256: + return rewriteValueAMD64_OpAMD64VPACKSSDWMasked256(v) + case OpAMD64VPACKSSDWMasked512: + return rewriteValueAMD64_OpAMD64VPACKSSDWMasked512(v) + case OpAMD64VPACKUSDW128: + return rewriteValueAMD64_OpAMD64VPACKUSDW128(v) + case OpAMD64VPACKUSDW256: + return rewriteValueAMD64_OpAMD64VPACKUSDW256(v) + case OpAMD64VPACKUSDW512: + return rewriteValueAMD64_OpAMD64VPACKUSDW512(v) + case OpAMD64VPACKUSDWMasked128: + return rewriteValueAMD64_OpAMD64VPACKUSDWMasked128(v) + case OpAMD64VPACKUSDWMasked256: + return rewriteValueAMD64_OpAMD64VPACKUSDWMasked256(v) + case OpAMD64VPACKUSDWMasked512: + return rewriteValueAMD64_OpAMD64VPACKUSDWMasked512(v) + case OpAMD64VPADDD128: + return rewriteValueAMD64_OpAMD64VPADDD128(v) + case OpAMD64VPADDD256: + return rewriteValueAMD64_OpAMD64VPADDD256(v) + case OpAMD64VPADDD512: + return rewriteValueAMD64_OpAMD64VPADDD512(v) + case OpAMD64VPADDDMasked128: + return rewriteValueAMD64_OpAMD64VPADDDMasked128(v) + case OpAMD64VPADDDMasked256: + return rewriteValueAMD64_OpAMD64VPADDDMasked256(v) + case OpAMD64VPADDDMasked512: + return rewriteValueAMD64_OpAMD64VPADDDMasked512(v) + case OpAMD64VPADDQ128: + return rewriteValueAMD64_OpAMD64VPADDQ128(v) + case OpAMD64VPADDQ256: + return rewriteValueAMD64_OpAMD64VPADDQ256(v) + case OpAMD64VPADDQ512: + return rewriteValueAMD64_OpAMD64VPADDQ512(v) + case OpAMD64VPADDQMasked128: + return rewriteValueAMD64_OpAMD64VPADDQMasked128(v) + case OpAMD64VPADDQMasked256: + return rewriteValueAMD64_OpAMD64VPADDQMasked256(v) + case OpAMD64VPADDQMasked512: + return rewriteValueAMD64_OpAMD64VPADDQMasked512(v) + case OpAMD64VPANDD512: + return rewriteValueAMD64_OpAMD64VPANDD512(v) + case OpAMD64VPANDDMasked128: + return rewriteValueAMD64_OpAMD64VPANDDMasked128(v) + case OpAMD64VPANDDMasked256: + return rewriteValueAMD64_OpAMD64VPANDDMasked256(v) + case OpAMD64VPANDDMasked512: + return rewriteValueAMD64_OpAMD64VPANDDMasked512(v) + case OpAMD64VPANDND512: + return rewriteValueAMD64_OpAMD64VPANDND512(v) + case OpAMD64VPANDNDMasked128: + return rewriteValueAMD64_OpAMD64VPANDNDMasked128(v) + case OpAMD64VPANDNDMasked256: + return rewriteValueAMD64_OpAMD64VPANDNDMasked256(v) + case OpAMD64VPANDNDMasked512: + return rewriteValueAMD64_OpAMD64VPANDNDMasked512(v) + case OpAMD64VPANDNQ512: + return rewriteValueAMD64_OpAMD64VPANDNQ512(v) + case OpAMD64VPANDNQMasked128: + return rewriteValueAMD64_OpAMD64VPANDNQMasked128(v) + case OpAMD64VPANDNQMasked256: + return rewriteValueAMD64_OpAMD64VPANDNQMasked256(v) + case OpAMD64VPANDNQMasked512: + return rewriteValueAMD64_OpAMD64VPANDNQMasked512(v) case OpAMD64VPANDQ512: return rewriteValueAMD64_OpAMD64VPANDQ512(v) + case OpAMD64VPANDQMasked128: + return rewriteValueAMD64_OpAMD64VPANDQMasked128(v) + case OpAMD64VPANDQMasked256: + return rewriteValueAMD64_OpAMD64VPANDQMasked256(v) + case OpAMD64VPANDQMasked512: + return rewriteValueAMD64_OpAMD64VPANDQMasked512(v) + case OpAMD64VPBLENDMDMasked512: + return rewriteValueAMD64_OpAMD64VPBLENDMDMasked512(v) + case OpAMD64VPBLENDMQMasked512: + return rewriteValueAMD64_OpAMD64VPBLENDMQMasked512(v) case OpAMD64VPBROADCASTB128: return rewriteValueAMD64_OpAMD64VPBROADCASTB128(v) case OpAMD64VPBROADCASTB256: @@ -537,10 +859,258 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64VPBROADCASTW256(v) case OpAMD64VPBROADCASTW512: return rewriteValueAMD64_OpAMD64VPBROADCASTW512(v) + case OpAMD64VPCMPEQD128: + return rewriteValueAMD64_OpAMD64VPCMPEQD128(v) + case OpAMD64VPCMPEQD256: + return rewriteValueAMD64_OpAMD64VPCMPEQD256(v) + case OpAMD64VPCMPEQD512: + return rewriteValueAMD64_OpAMD64VPCMPEQD512(v) + case OpAMD64VPCMPEQQ128: + return rewriteValueAMD64_OpAMD64VPCMPEQQ128(v) + case OpAMD64VPCMPEQQ256: + return rewriteValueAMD64_OpAMD64VPCMPEQQ256(v) + case OpAMD64VPCMPEQQ512: + return rewriteValueAMD64_OpAMD64VPCMPEQQ512(v) + case OpAMD64VPCMPGTD128: + return rewriteValueAMD64_OpAMD64VPCMPGTD128(v) + case OpAMD64VPCMPGTD256: + return rewriteValueAMD64_OpAMD64VPCMPGTD256(v) + case OpAMD64VPCMPGTD512: + return rewriteValueAMD64_OpAMD64VPCMPGTD512(v) + case OpAMD64VPCMPGTQ128: + return rewriteValueAMD64_OpAMD64VPCMPGTQ128(v) + case OpAMD64VPCMPGTQ256: + return rewriteValueAMD64_OpAMD64VPCMPGTQ256(v) + case OpAMD64VPCMPGTQ512: + return rewriteValueAMD64_OpAMD64VPCMPGTQ512(v) + case OpAMD64VPDPBUSD128: + return rewriteValueAMD64_OpAMD64VPDPBUSD128(v) + case OpAMD64VPDPBUSD256: + return rewriteValueAMD64_OpAMD64VPDPBUSD256(v) + case OpAMD64VPDPBUSD512: + return rewriteValueAMD64_OpAMD64VPDPBUSD512(v) + case OpAMD64VPDPBUSDMasked128: + return rewriteValueAMD64_OpAMD64VPDPBUSDMasked128(v) + case OpAMD64VPDPBUSDMasked256: + return rewriteValueAMD64_OpAMD64VPDPBUSDMasked256(v) + case OpAMD64VPDPBUSDMasked512: + return rewriteValueAMD64_OpAMD64VPDPBUSDMasked512(v) + case OpAMD64VPDPBUSDS128: + return rewriteValueAMD64_OpAMD64VPDPBUSDS128(v) + case OpAMD64VPDPBUSDS256: + return rewriteValueAMD64_OpAMD64VPDPBUSDS256(v) + case OpAMD64VPDPBUSDS512: + return rewriteValueAMD64_OpAMD64VPDPBUSDS512(v) + case OpAMD64VPDPBUSDSMasked128: + return rewriteValueAMD64_OpAMD64VPDPBUSDSMasked128(v) + case OpAMD64VPDPBUSDSMasked256: + return rewriteValueAMD64_OpAMD64VPDPBUSDSMasked256(v) + case OpAMD64VPDPBUSDSMasked512: + return rewriteValueAMD64_OpAMD64VPDPBUSDSMasked512(v) + case OpAMD64VPDPWSSD128: + return rewriteValueAMD64_OpAMD64VPDPWSSD128(v) + case OpAMD64VPDPWSSD256: + return rewriteValueAMD64_OpAMD64VPDPWSSD256(v) + case OpAMD64VPDPWSSD512: + return rewriteValueAMD64_OpAMD64VPDPWSSD512(v) + case OpAMD64VPDPWSSDMasked128: + return rewriteValueAMD64_OpAMD64VPDPWSSDMasked128(v) + case OpAMD64VPDPWSSDMasked256: + return rewriteValueAMD64_OpAMD64VPDPWSSDMasked256(v) + case OpAMD64VPDPWSSDMasked512: + return rewriteValueAMD64_OpAMD64VPDPWSSDMasked512(v) + case OpAMD64VPDPWSSDS128: + return rewriteValueAMD64_OpAMD64VPDPWSSDS128(v) + case OpAMD64VPDPWSSDS256: + return rewriteValueAMD64_OpAMD64VPDPWSSDS256(v) + case OpAMD64VPDPWSSDS512: + return rewriteValueAMD64_OpAMD64VPDPWSSDS512(v) + case OpAMD64VPDPWSSDSMasked128: + return rewriteValueAMD64_OpAMD64VPDPWSSDSMasked128(v) + case OpAMD64VPDPWSSDSMasked256: + return rewriteValueAMD64_OpAMD64VPDPWSSDSMasked256(v) + case OpAMD64VPDPWSSDSMasked512: + return rewriteValueAMD64_OpAMD64VPDPWSSDSMasked512(v) + case OpAMD64VPERMD256: + return rewriteValueAMD64_OpAMD64VPERMD256(v) + case OpAMD64VPERMD512: + return rewriteValueAMD64_OpAMD64VPERMD512(v) + case OpAMD64VPERMDMasked256: + return rewriteValueAMD64_OpAMD64VPERMDMasked256(v) + case OpAMD64VPERMDMasked512: + return rewriteValueAMD64_OpAMD64VPERMDMasked512(v) + case OpAMD64VPERMI2D128: + return rewriteValueAMD64_OpAMD64VPERMI2D128(v) + case OpAMD64VPERMI2D256: + return rewriteValueAMD64_OpAMD64VPERMI2D256(v) + case OpAMD64VPERMI2D512: + return rewriteValueAMD64_OpAMD64VPERMI2D512(v) + case OpAMD64VPERMI2DMasked128: + return rewriteValueAMD64_OpAMD64VPERMI2DMasked128(v) + case OpAMD64VPERMI2DMasked256: + return rewriteValueAMD64_OpAMD64VPERMI2DMasked256(v) + case OpAMD64VPERMI2DMasked512: + return rewriteValueAMD64_OpAMD64VPERMI2DMasked512(v) + case OpAMD64VPERMI2PD128: + return rewriteValueAMD64_OpAMD64VPERMI2PD128(v) + case OpAMD64VPERMI2PD256: + return rewriteValueAMD64_OpAMD64VPERMI2PD256(v) + case OpAMD64VPERMI2PD512: + return rewriteValueAMD64_OpAMD64VPERMI2PD512(v) + case OpAMD64VPERMI2PDMasked128: + return rewriteValueAMD64_OpAMD64VPERMI2PDMasked128(v) + case OpAMD64VPERMI2PDMasked256: + return rewriteValueAMD64_OpAMD64VPERMI2PDMasked256(v) + case OpAMD64VPERMI2PDMasked512: + return rewriteValueAMD64_OpAMD64VPERMI2PDMasked512(v) + case OpAMD64VPERMI2PS128: + return rewriteValueAMD64_OpAMD64VPERMI2PS128(v) + case OpAMD64VPERMI2PS256: + return rewriteValueAMD64_OpAMD64VPERMI2PS256(v) + case OpAMD64VPERMI2PS512: + return rewriteValueAMD64_OpAMD64VPERMI2PS512(v) + case OpAMD64VPERMI2PSMasked128: + return rewriteValueAMD64_OpAMD64VPERMI2PSMasked128(v) + case OpAMD64VPERMI2PSMasked256: + return rewriteValueAMD64_OpAMD64VPERMI2PSMasked256(v) + case OpAMD64VPERMI2PSMasked512: + return rewriteValueAMD64_OpAMD64VPERMI2PSMasked512(v) + case OpAMD64VPERMI2Q128: + return rewriteValueAMD64_OpAMD64VPERMI2Q128(v) + case OpAMD64VPERMI2Q256: + return rewriteValueAMD64_OpAMD64VPERMI2Q256(v) + case OpAMD64VPERMI2Q512: + return rewriteValueAMD64_OpAMD64VPERMI2Q512(v) + case OpAMD64VPERMI2QMasked128: + return rewriteValueAMD64_OpAMD64VPERMI2QMasked128(v) + case OpAMD64VPERMI2QMasked256: + return rewriteValueAMD64_OpAMD64VPERMI2QMasked256(v) + case OpAMD64VPERMI2QMasked512: + return rewriteValueAMD64_OpAMD64VPERMI2QMasked512(v) + case OpAMD64VPERMPD256: + return rewriteValueAMD64_OpAMD64VPERMPD256(v) + case OpAMD64VPERMPD512: + return rewriteValueAMD64_OpAMD64VPERMPD512(v) + case OpAMD64VPERMPDMasked256: + return rewriteValueAMD64_OpAMD64VPERMPDMasked256(v) + case OpAMD64VPERMPDMasked512: + return rewriteValueAMD64_OpAMD64VPERMPDMasked512(v) + case OpAMD64VPERMPS256: + return rewriteValueAMD64_OpAMD64VPERMPS256(v) + case OpAMD64VPERMPS512: + return rewriteValueAMD64_OpAMD64VPERMPS512(v) + case OpAMD64VPERMPSMasked256: + return rewriteValueAMD64_OpAMD64VPERMPSMasked256(v) + case OpAMD64VPERMPSMasked512: + return rewriteValueAMD64_OpAMD64VPERMPSMasked512(v) + case OpAMD64VPERMQ256: + return rewriteValueAMD64_OpAMD64VPERMQ256(v) + case OpAMD64VPERMQ512: + return rewriteValueAMD64_OpAMD64VPERMQ512(v) + case OpAMD64VPERMQMasked256: + return rewriteValueAMD64_OpAMD64VPERMQMasked256(v) + case OpAMD64VPERMQMasked512: + return rewriteValueAMD64_OpAMD64VPERMQMasked512(v) case OpAMD64VPINSRD128: return rewriteValueAMD64_OpAMD64VPINSRD128(v) case OpAMD64VPINSRQ128: return rewriteValueAMD64_OpAMD64VPINSRQ128(v) + case OpAMD64VPMAXSD128: + return rewriteValueAMD64_OpAMD64VPMAXSD128(v) + case OpAMD64VPMAXSD256: + return rewriteValueAMD64_OpAMD64VPMAXSD256(v) + case OpAMD64VPMAXSD512: + return rewriteValueAMD64_OpAMD64VPMAXSD512(v) + case OpAMD64VPMAXSDMasked128: + return rewriteValueAMD64_OpAMD64VPMAXSDMasked128(v) + case OpAMD64VPMAXSDMasked256: + return rewriteValueAMD64_OpAMD64VPMAXSDMasked256(v) + case OpAMD64VPMAXSDMasked512: + return rewriteValueAMD64_OpAMD64VPMAXSDMasked512(v) + case OpAMD64VPMAXSQ128: + return rewriteValueAMD64_OpAMD64VPMAXSQ128(v) + case OpAMD64VPMAXSQ256: + return rewriteValueAMD64_OpAMD64VPMAXSQ256(v) + case OpAMD64VPMAXSQ512: + return rewriteValueAMD64_OpAMD64VPMAXSQ512(v) + case OpAMD64VPMAXSQMasked128: + return rewriteValueAMD64_OpAMD64VPMAXSQMasked128(v) + case OpAMD64VPMAXSQMasked256: + return rewriteValueAMD64_OpAMD64VPMAXSQMasked256(v) + case OpAMD64VPMAXSQMasked512: + return rewriteValueAMD64_OpAMD64VPMAXSQMasked512(v) + case OpAMD64VPMAXUD128: + return rewriteValueAMD64_OpAMD64VPMAXUD128(v) + case OpAMD64VPMAXUD256: + return rewriteValueAMD64_OpAMD64VPMAXUD256(v) + case OpAMD64VPMAXUD512: + return rewriteValueAMD64_OpAMD64VPMAXUD512(v) + case OpAMD64VPMAXUDMasked128: + return rewriteValueAMD64_OpAMD64VPMAXUDMasked128(v) + case OpAMD64VPMAXUDMasked256: + return rewriteValueAMD64_OpAMD64VPMAXUDMasked256(v) + case OpAMD64VPMAXUDMasked512: + return rewriteValueAMD64_OpAMD64VPMAXUDMasked512(v) + case OpAMD64VPMAXUQ128: + return rewriteValueAMD64_OpAMD64VPMAXUQ128(v) + case OpAMD64VPMAXUQ256: + return rewriteValueAMD64_OpAMD64VPMAXUQ256(v) + case OpAMD64VPMAXUQ512: + return rewriteValueAMD64_OpAMD64VPMAXUQ512(v) + case OpAMD64VPMAXUQMasked128: + return rewriteValueAMD64_OpAMD64VPMAXUQMasked128(v) + case OpAMD64VPMAXUQMasked256: + return rewriteValueAMD64_OpAMD64VPMAXUQMasked256(v) + case OpAMD64VPMAXUQMasked512: + return rewriteValueAMD64_OpAMD64VPMAXUQMasked512(v) + case OpAMD64VPMINSD128: + return rewriteValueAMD64_OpAMD64VPMINSD128(v) + case OpAMD64VPMINSD256: + return rewriteValueAMD64_OpAMD64VPMINSD256(v) + case OpAMD64VPMINSD512: + return rewriteValueAMD64_OpAMD64VPMINSD512(v) + case OpAMD64VPMINSDMasked128: + return rewriteValueAMD64_OpAMD64VPMINSDMasked128(v) + case OpAMD64VPMINSDMasked256: + return rewriteValueAMD64_OpAMD64VPMINSDMasked256(v) + case OpAMD64VPMINSDMasked512: + return rewriteValueAMD64_OpAMD64VPMINSDMasked512(v) + case OpAMD64VPMINSQ128: + return rewriteValueAMD64_OpAMD64VPMINSQ128(v) + case OpAMD64VPMINSQ256: + return rewriteValueAMD64_OpAMD64VPMINSQ256(v) + case OpAMD64VPMINSQ512: + return rewriteValueAMD64_OpAMD64VPMINSQ512(v) + case OpAMD64VPMINSQMasked128: + return rewriteValueAMD64_OpAMD64VPMINSQMasked128(v) + case OpAMD64VPMINSQMasked256: + return rewriteValueAMD64_OpAMD64VPMINSQMasked256(v) + case OpAMD64VPMINSQMasked512: + return rewriteValueAMD64_OpAMD64VPMINSQMasked512(v) + case OpAMD64VPMINUD128: + return rewriteValueAMD64_OpAMD64VPMINUD128(v) + case OpAMD64VPMINUD256: + return rewriteValueAMD64_OpAMD64VPMINUD256(v) + case OpAMD64VPMINUD512: + return rewriteValueAMD64_OpAMD64VPMINUD512(v) + case OpAMD64VPMINUDMasked128: + return rewriteValueAMD64_OpAMD64VPMINUDMasked128(v) + case OpAMD64VPMINUDMasked256: + return rewriteValueAMD64_OpAMD64VPMINUDMasked256(v) + case OpAMD64VPMINUDMasked512: + return rewriteValueAMD64_OpAMD64VPMINUDMasked512(v) + case OpAMD64VPMINUQ128: + return rewriteValueAMD64_OpAMD64VPMINUQ128(v) + case OpAMD64VPMINUQ256: + return rewriteValueAMD64_OpAMD64VPMINUQ256(v) + case OpAMD64VPMINUQ512: + return rewriteValueAMD64_OpAMD64VPMINUQ512(v) + case OpAMD64VPMINUQMasked128: + return rewriteValueAMD64_OpAMD64VPMINUQMasked128(v) + case OpAMD64VPMINUQMasked256: + return rewriteValueAMD64_OpAMD64VPMINUQMasked256(v) + case OpAMD64VPMINUQMasked512: + return rewriteValueAMD64_OpAMD64VPMINUQMasked512(v) case OpAMD64VPMOVVec16x16ToM: return rewriteValueAMD64_OpAMD64VPMOVVec16x16ToM(v) case OpAMD64VPMOVVec16x32ToM: @@ -565,6 +1135,174 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64VPMOVVec8x32ToM(v) case OpAMD64VPMOVVec8x64ToM: return rewriteValueAMD64_OpAMD64VPMOVVec8x64ToM(v) + case OpAMD64VPMULDQ128: + return rewriteValueAMD64_OpAMD64VPMULDQ128(v) + case OpAMD64VPMULDQ256: + return rewriteValueAMD64_OpAMD64VPMULDQ256(v) + case OpAMD64VPMULLD128: + return rewriteValueAMD64_OpAMD64VPMULLD128(v) + case OpAMD64VPMULLD256: + return rewriteValueAMD64_OpAMD64VPMULLD256(v) + case OpAMD64VPMULLD512: + return rewriteValueAMD64_OpAMD64VPMULLD512(v) + case OpAMD64VPMULLDMasked128: + return rewriteValueAMD64_OpAMD64VPMULLDMasked128(v) + case OpAMD64VPMULLDMasked256: + return rewriteValueAMD64_OpAMD64VPMULLDMasked256(v) + case OpAMD64VPMULLDMasked512: + return rewriteValueAMD64_OpAMD64VPMULLDMasked512(v) + case OpAMD64VPMULLQ128: + return rewriteValueAMD64_OpAMD64VPMULLQ128(v) + case OpAMD64VPMULLQ256: + return rewriteValueAMD64_OpAMD64VPMULLQ256(v) + case OpAMD64VPMULLQ512: + return rewriteValueAMD64_OpAMD64VPMULLQ512(v) + case OpAMD64VPMULLQMasked128: + return rewriteValueAMD64_OpAMD64VPMULLQMasked128(v) + case OpAMD64VPMULLQMasked256: + return rewriteValueAMD64_OpAMD64VPMULLQMasked256(v) + case OpAMD64VPMULLQMasked512: + return rewriteValueAMD64_OpAMD64VPMULLQMasked512(v) + case OpAMD64VPMULUDQ128: + return rewriteValueAMD64_OpAMD64VPMULUDQ128(v) + case OpAMD64VPMULUDQ256: + return rewriteValueAMD64_OpAMD64VPMULUDQ256(v) + case OpAMD64VPOPCNTD128: + return rewriteValueAMD64_OpAMD64VPOPCNTD128(v) + case OpAMD64VPOPCNTD256: + return rewriteValueAMD64_OpAMD64VPOPCNTD256(v) + case OpAMD64VPOPCNTD512: + return rewriteValueAMD64_OpAMD64VPOPCNTD512(v) + case OpAMD64VPOPCNTDMasked128: + return rewriteValueAMD64_OpAMD64VPOPCNTDMasked128(v) + case OpAMD64VPOPCNTDMasked256: + return rewriteValueAMD64_OpAMD64VPOPCNTDMasked256(v) + case OpAMD64VPOPCNTDMasked512: + return rewriteValueAMD64_OpAMD64VPOPCNTDMasked512(v) + case OpAMD64VPOPCNTQ128: + return rewriteValueAMD64_OpAMD64VPOPCNTQ128(v) + case OpAMD64VPOPCNTQ256: + return rewriteValueAMD64_OpAMD64VPOPCNTQ256(v) + case OpAMD64VPOPCNTQ512: + return rewriteValueAMD64_OpAMD64VPOPCNTQ512(v) + case OpAMD64VPOPCNTQMasked128: + return rewriteValueAMD64_OpAMD64VPOPCNTQMasked128(v) + case OpAMD64VPOPCNTQMasked256: + return rewriteValueAMD64_OpAMD64VPOPCNTQMasked256(v) + case OpAMD64VPOPCNTQMasked512: + return rewriteValueAMD64_OpAMD64VPOPCNTQMasked512(v) + case OpAMD64VPORD512: + return rewriteValueAMD64_OpAMD64VPORD512(v) + case OpAMD64VPORDMasked128: + return rewriteValueAMD64_OpAMD64VPORDMasked128(v) + case OpAMD64VPORDMasked256: + return rewriteValueAMD64_OpAMD64VPORDMasked256(v) + case OpAMD64VPORDMasked512: + return rewriteValueAMD64_OpAMD64VPORDMasked512(v) + case OpAMD64VPORQ512: + return rewriteValueAMD64_OpAMD64VPORQ512(v) + case OpAMD64VPORQMasked128: + return rewriteValueAMD64_OpAMD64VPORQMasked128(v) + case OpAMD64VPORQMasked256: + return rewriteValueAMD64_OpAMD64VPORQMasked256(v) + case OpAMD64VPORQMasked512: + return rewriteValueAMD64_OpAMD64VPORQMasked512(v) + case OpAMD64VPROLVD128: + return rewriteValueAMD64_OpAMD64VPROLVD128(v) + case OpAMD64VPROLVD256: + return rewriteValueAMD64_OpAMD64VPROLVD256(v) + case OpAMD64VPROLVD512: + return rewriteValueAMD64_OpAMD64VPROLVD512(v) + case OpAMD64VPROLVDMasked128: + return rewriteValueAMD64_OpAMD64VPROLVDMasked128(v) + case OpAMD64VPROLVDMasked256: + return rewriteValueAMD64_OpAMD64VPROLVDMasked256(v) + case OpAMD64VPROLVDMasked512: + return rewriteValueAMD64_OpAMD64VPROLVDMasked512(v) + case OpAMD64VPROLVQ128: + return rewriteValueAMD64_OpAMD64VPROLVQ128(v) + case OpAMD64VPROLVQ256: + return rewriteValueAMD64_OpAMD64VPROLVQ256(v) + case OpAMD64VPROLVQ512: + return rewriteValueAMD64_OpAMD64VPROLVQ512(v) + case OpAMD64VPROLVQMasked128: + return rewriteValueAMD64_OpAMD64VPROLVQMasked128(v) + case OpAMD64VPROLVQMasked256: + return rewriteValueAMD64_OpAMD64VPROLVQMasked256(v) + case OpAMD64VPROLVQMasked512: + return rewriteValueAMD64_OpAMD64VPROLVQMasked512(v) + case OpAMD64VPRORVD128: + return rewriteValueAMD64_OpAMD64VPRORVD128(v) + case OpAMD64VPRORVD256: + return rewriteValueAMD64_OpAMD64VPRORVD256(v) + case OpAMD64VPRORVD512: + return rewriteValueAMD64_OpAMD64VPRORVD512(v) + case OpAMD64VPRORVDMasked128: + return rewriteValueAMD64_OpAMD64VPRORVDMasked128(v) + case OpAMD64VPRORVDMasked256: + return rewriteValueAMD64_OpAMD64VPRORVDMasked256(v) + case OpAMD64VPRORVDMasked512: + return rewriteValueAMD64_OpAMD64VPRORVDMasked512(v) + case OpAMD64VPRORVQ128: + return rewriteValueAMD64_OpAMD64VPRORVQ128(v) + case OpAMD64VPRORVQ256: + return rewriteValueAMD64_OpAMD64VPRORVQ256(v) + case OpAMD64VPRORVQ512: + return rewriteValueAMD64_OpAMD64VPRORVQ512(v) + case OpAMD64VPRORVQMasked128: + return rewriteValueAMD64_OpAMD64VPRORVQMasked128(v) + case OpAMD64VPRORVQMasked256: + return rewriteValueAMD64_OpAMD64VPRORVQMasked256(v) + case OpAMD64VPRORVQMasked512: + return rewriteValueAMD64_OpAMD64VPRORVQMasked512(v) + case OpAMD64VPSHLDVD128: + return rewriteValueAMD64_OpAMD64VPSHLDVD128(v) + case OpAMD64VPSHLDVD256: + return rewriteValueAMD64_OpAMD64VPSHLDVD256(v) + case OpAMD64VPSHLDVD512: + return rewriteValueAMD64_OpAMD64VPSHLDVD512(v) + case OpAMD64VPSHLDVDMasked128: + return rewriteValueAMD64_OpAMD64VPSHLDVDMasked128(v) + case OpAMD64VPSHLDVDMasked256: + return rewriteValueAMD64_OpAMD64VPSHLDVDMasked256(v) + case OpAMD64VPSHLDVDMasked512: + return rewriteValueAMD64_OpAMD64VPSHLDVDMasked512(v) + case OpAMD64VPSHLDVQ128: + return rewriteValueAMD64_OpAMD64VPSHLDVQ128(v) + case OpAMD64VPSHLDVQ256: + return rewriteValueAMD64_OpAMD64VPSHLDVQ256(v) + case OpAMD64VPSHLDVQ512: + return rewriteValueAMD64_OpAMD64VPSHLDVQ512(v) + case OpAMD64VPSHLDVQMasked128: + return rewriteValueAMD64_OpAMD64VPSHLDVQMasked128(v) + case OpAMD64VPSHLDVQMasked256: + return rewriteValueAMD64_OpAMD64VPSHLDVQMasked256(v) + case OpAMD64VPSHLDVQMasked512: + return rewriteValueAMD64_OpAMD64VPSHLDVQMasked512(v) + case OpAMD64VPSHRDVD128: + return rewriteValueAMD64_OpAMD64VPSHRDVD128(v) + case OpAMD64VPSHRDVD256: + return rewriteValueAMD64_OpAMD64VPSHRDVD256(v) + case OpAMD64VPSHRDVD512: + return rewriteValueAMD64_OpAMD64VPSHRDVD512(v) + case OpAMD64VPSHRDVDMasked128: + return rewriteValueAMD64_OpAMD64VPSHRDVDMasked128(v) + case OpAMD64VPSHRDVDMasked256: + return rewriteValueAMD64_OpAMD64VPSHRDVDMasked256(v) + case OpAMD64VPSHRDVDMasked512: + return rewriteValueAMD64_OpAMD64VPSHRDVDMasked512(v) + case OpAMD64VPSHRDVQ128: + return rewriteValueAMD64_OpAMD64VPSHRDVQ128(v) + case OpAMD64VPSHRDVQ256: + return rewriteValueAMD64_OpAMD64VPSHRDVQ256(v) + case OpAMD64VPSHRDVQ512: + return rewriteValueAMD64_OpAMD64VPSHRDVQ512(v) + case OpAMD64VPSHRDVQMasked128: + return rewriteValueAMD64_OpAMD64VPSHRDVQMasked128(v) + case OpAMD64VPSHRDVQMasked256: + return rewriteValueAMD64_OpAMD64VPSHRDVQMasked256(v) + case OpAMD64VPSHRDVQMasked512: + return rewriteValueAMD64_OpAMD64VPSHRDVQMasked512(v) case OpAMD64VPSLLD128: return rewriteValueAMD64_OpAMD64VPSLLD128(v) case OpAMD64VPSLLD256: @@ -589,6 +1327,30 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64VPSLLQMasked256(v) case OpAMD64VPSLLQMasked512: return rewriteValueAMD64_OpAMD64VPSLLQMasked512(v) + case OpAMD64VPSLLVD128: + return rewriteValueAMD64_OpAMD64VPSLLVD128(v) + case OpAMD64VPSLLVD256: + return rewriteValueAMD64_OpAMD64VPSLLVD256(v) + case OpAMD64VPSLLVD512: + return rewriteValueAMD64_OpAMD64VPSLLVD512(v) + case OpAMD64VPSLLVDMasked128: + return rewriteValueAMD64_OpAMD64VPSLLVDMasked128(v) + case OpAMD64VPSLLVDMasked256: + return rewriteValueAMD64_OpAMD64VPSLLVDMasked256(v) + case OpAMD64VPSLLVDMasked512: + return rewriteValueAMD64_OpAMD64VPSLLVDMasked512(v) + case OpAMD64VPSLLVQ128: + return rewriteValueAMD64_OpAMD64VPSLLVQ128(v) + case OpAMD64VPSLLVQ256: + return rewriteValueAMD64_OpAMD64VPSLLVQ256(v) + case OpAMD64VPSLLVQ512: + return rewriteValueAMD64_OpAMD64VPSLLVQ512(v) + case OpAMD64VPSLLVQMasked128: + return rewriteValueAMD64_OpAMD64VPSLLVQMasked128(v) + case OpAMD64VPSLLVQMasked256: + return rewriteValueAMD64_OpAMD64VPSLLVQMasked256(v) + case OpAMD64VPSLLVQMasked512: + return rewriteValueAMD64_OpAMD64VPSLLVQMasked512(v) case OpAMD64VPSLLW128: return rewriteValueAMD64_OpAMD64VPSLLW128(v) case OpAMD64VPSLLW256: @@ -625,6 +1387,30 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64VPSRAQMasked256(v) case OpAMD64VPSRAQMasked512: return rewriteValueAMD64_OpAMD64VPSRAQMasked512(v) + case OpAMD64VPSRAVD128: + return rewriteValueAMD64_OpAMD64VPSRAVD128(v) + case OpAMD64VPSRAVD256: + return rewriteValueAMD64_OpAMD64VPSRAVD256(v) + case OpAMD64VPSRAVD512: + return rewriteValueAMD64_OpAMD64VPSRAVD512(v) + case OpAMD64VPSRAVDMasked128: + return rewriteValueAMD64_OpAMD64VPSRAVDMasked128(v) + case OpAMD64VPSRAVDMasked256: + return rewriteValueAMD64_OpAMD64VPSRAVDMasked256(v) + case OpAMD64VPSRAVDMasked512: + return rewriteValueAMD64_OpAMD64VPSRAVDMasked512(v) + case OpAMD64VPSRAVQ128: + return rewriteValueAMD64_OpAMD64VPSRAVQ128(v) + case OpAMD64VPSRAVQ256: + return rewriteValueAMD64_OpAMD64VPSRAVQ256(v) + case OpAMD64VPSRAVQ512: + return rewriteValueAMD64_OpAMD64VPSRAVQ512(v) + case OpAMD64VPSRAVQMasked128: + return rewriteValueAMD64_OpAMD64VPSRAVQMasked128(v) + case OpAMD64VPSRAVQMasked256: + return rewriteValueAMD64_OpAMD64VPSRAVQMasked256(v) + case OpAMD64VPSRAVQMasked512: + return rewriteValueAMD64_OpAMD64VPSRAVQMasked512(v) case OpAMD64VPSRAW128: return rewriteValueAMD64_OpAMD64VPSRAW128(v) case OpAMD64VPSRAW256: @@ -637,6 +1423,206 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64VPSRAWMasked256(v) case OpAMD64VPSRAWMasked512: return rewriteValueAMD64_OpAMD64VPSRAWMasked512(v) + case OpAMD64VPSRLVD128: + return rewriteValueAMD64_OpAMD64VPSRLVD128(v) + case OpAMD64VPSRLVD256: + return rewriteValueAMD64_OpAMD64VPSRLVD256(v) + case OpAMD64VPSRLVD512: + return rewriteValueAMD64_OpAMD64VPSRLVD512(v) + case OpAMD64VPSRLVDMasked128: + return rewriteValueAMD64_OpAMD64VPSRLVDMasked128(v) + case OpAMD64VPSRLVDMasked256: + return rewriteValueAMD64_OpAMD64VPSRLVDMasked256(v) + case OpAMD64VPSRLVDMasked512: + return rewriteValueAMD64_OpAMD64VPSRLVDMasked512(v) + case OpAMD64VPSRLVQ128: + return rewriteValueAMD64_OpAMD64VPSRLVQ128(v) + case OpAMD64VPSRLVQ256: + return rewriteValueAMD64_OpAMD64VPSRLVQ256(v) + case OpAMD64VPSRLVQ512: + return rewriteValueAMD64_OpAMD64VPSRLVQ512(v) + case OpAMD64VPSRLVQMasked128: + return rewriteValueAMD64_OpAMD64VPSRLVQMasked128(v) + case OpAMD64VPSRLVQMasked256: + return rewriteValueAMD64_OpAMD64VPSRLVQMasked256(v) + case OpAMD64VPSRLVQMasked512: + return rewriteValueAMD64_OpAMD64VPSRLVQMasked512(v) + case OpAMD64VPSUBD128: + return rewriteValueAMD64_OpAMD64VPSUBD128(v) + case OpAMD64VPSUBD256: + return rewriteValueAMD64_OpAMD64VPSUBD256(v) + case OpAMD64VPSUBD512: + return rewriteValueAMD64_OpAMD64VPSUBD512(v) + case OpAMD64VPSUBDMasked128: + return rewriteValueAMD64_OpAMD64VPSUBDMasked128(v) + case OpAMD64VPSUBDMasked256: + return rewriteValueAMD64_OpAMD64VPSUBDMasked256(v) + case OpAMD64VPSUBDMasked512: + return rewriteValueAMD64_OpAMD64VPSUBDMasked512(v) + case OpAMD64VPSUBQ128: + return rewriteValueAMD64_OpAMD64VPSUBQ128(v) + case OpAMD64VPSUBQ256: + return rewriteValueAMD64_OpAMD64VPSUBQ256(v) + case OpAMD64VPSUBQ512: + return rewriteValueAMD64_OpAMD64VPSUBQ512(v) + case OpAMD64VPSUBQMasked128: + return rewriteValueAMD64_OpAMD64VPSUBQMasked128(v) + case OpAMD64VPSUBQMasked256: + return rewriteValueAMD64_OpAMD64VPSUBQMasked256(v) + case OpAMD64VPSUBQMasked512: + return rewriteValueAMD64_OpAMD64VPSUBQMasked512(v) + case OpAMD64VPUNPCKHDQ128: + return rewriteValueAMD64_OpAMD64VPUNPCKHDQ128(v) + case OpAMD64VPUNPCKHDQ256: + return rewriteValueAMD64_OpAMD64VPUNPCKHDQ256(v) + case OpAMD64VPUNPCKHDQ512: + return rewriteValueAMD64_OpAMD64VPUNPCKHDQ512(v) + case OpAMD64VPUNPCKHQDQ128: + return rewriteValueAMD64_OpAMD64VPUNPCKHQDQ128(v) + case OpAMD64VPUNPCKHQDQ256: + return rewriteValueAMD64_OpAMD64VPUNPCKHQDQ256(v) + case OpAMD64VPUNPCKHQDQ512: + return rewriteValueAMD64_OpAMD64VPUNPCKHQDQ512(v) + case OpAMD64VPUNPCKLDQ128: + return rewriteValueAMD64_OpAMD64VPUNPCKLDQ128(v) + case OpAMD64VPUNPCKLDQ256: + return rewriteValueAMD64_OpAMD64VPUNPCKLDQ256(v) + case OpAMD64VPUNPCKLDQ512: + return rewriteValueAMD64_OpAMD64VPUNPCKLDQ512(v) + case OpAMD64VPUNPCKLQDQ128: + return rewriteValueAMD64_OpAMD64VPUNPCKLQDQ128(v) + case OpAMD64VPUNPCKLQDQ256: + return rewriteValueAMD64_OpAMD64VPUNPCKLQDQ256(v) + case OpAMD64VPUNPCKLQDQ512: + return rewriteValueAMD64_OpAMD64VPUNPCKLQDQ512(v) + case OpAMD64VPXORD512: + return rewriteValueAMD64_OpAMD64VPXORD512(v) + case OpAMD64VPXORDMasked128: + return rewriteValueAMD64_OpAMD64VPXORDMasked128(v) + case OpAMD64VPXORDMasked256: + return rewriteValueAMD64_OpAMD64VPXORDMasked256(v) + case OpAMD64VPXORDMasked512: + return rewriteValueAMD64_OpAMD64VPXORDMasked512(v) + case OpAMD64VPXORQ512: + return rewriteValueAMD64_OpAMD64VPXORQ512(v) + case OpAMD64VPXORQMasked128: + return rewriteValueAMD64_OpAMD64VPXORQMasked128(v) + case OpAMD64VPXORQMasked256: + return rewriteValueAMD64_OpAMD64VPXORQMasked256(v) + case OpAMD64VPXORQMasked512: + return rewriteValueAMD64_OpAMD64VPXORQMasked512(v) + case OpAMD64VRCP14PD128: + return rewriteValueAMD64_OpAMD64VRCP14PD128(v) + case OpAMD64VRCP14PD256: + return rewriteValueAMD64_OpAMD64VRCP14PD256(v) + case OpAMD64VRCP14PD512: + return rewriteValueAMD64_OpAMD64VRCP14PD512(v) + case OpAMD64VRCP14PDMasked128: + return rewriteValueAMD64_OpAMD64VRCP14PDMasked128(v) + case OpAMD64VRCP14PDMasked256: + return rewriteValueAMD64_OpAMD64VRCP14PDMasked256(v) + case OpAMD64VRCP14PDMasked512: + return rewriteValueAMD64_OpAMD64VRCP14PDMasked512(v) + case OpAMD64VRCP14PS512: + return rewriteValueAMD64_OpAMD64VRCP14PS512(v) + case OpAMD64VRCP14PSMasked128: + return rewriteValueAMD64_OpAMD64VRCP14PSMasked128(v) + case OpAMD64VRCP14PSMasked256: + return rewriteValueAMD64_OpAMD64VRCP14PSMasked256(v) + case OpAMD64VRCP14PSMasked512: + return rewriteValueAMD64_OpAMD64VRCP14PSMasked512(v) + case OpAMD64VRSQRT14PD128: + return rewriteValueAMD64_OpAMD64VRSQRT14PD128(v) + case OpAMD64VRSQRT14PD256: + return rewriteValueAMD64_OpAMD64VRSQRT14PD256(v) + case OpAMD64VRSQRT14PD512: + return rewriteValueAMD64_OpAMD64VRSQRT14PD512(v) + case OpAMD64VRSQRT14PDMasked128: + return rewriteValueAMD64_OpAMD64VRSQRT14PDMasked128(v) + case OpAMD64VRSQRT14PDMasked256: + return rewriteValueAMD64_OpAMD64VRSQRT14PDMasked256(v) + case OpAMD64VRSQRT14PDMasked512: + return rewriteValueAMD64_OpAMD64VRSQRT14PDMasked512(v) + case OpAMD64VRSQRT14PS512: + return rewriteValueAMD64_OpAMD64VRSQRT14PS512(v) + case OpAMD64VRSQRT14PSMasked128: + return rewriteValueAMD64_OpAMD64VRSQRT14PSMasked128(v) + case OpAMD64VRSQRT14PSMasked256: + return rewriteValueAMD64_OpAMD64VRSQRT14PSMasked256(v) + case OpAMD64VRSQRT14PSMasked512: + return rewriteValueAMD64_OpAMD64VRSQRT14PSMasked512(v) + case OpAMD64VSCALEFPD128: + return rewriteValueAMD64_OpAMD64VSCALEFPD128(v) + case OpAMD64VSCALEFPD256: + return rewriteValueAMD64_OpAMD64VSCALEFPD256(v) + case OpAMD64VSCALEFPD512: + return rewriteValueAMD64_OpAMD64VSCALEFPD512(v) + case OpAMD64VSCALEFPDMasked128: + return rewriteValueAMD64_OpAMD64VSCALEFPDMasked128(v) + case OpAMD64VSCALEFPDMasked256: + return rewriteValueAMD64_OpAMD64VSCALEFPDMasked256(v) + case OpAMD64VSCALEFPDMasked512: + return rewriteValueAMD64_OpAMD64VSCALEFPDMasked512(v) + case OpAMD64VSCALEFPS128: + return rewriteValueAMD64_OpAMD64VSCALEFPS128(v) + case OpAMD64VSCALEFPS256: + return rewriteValueAMD64_OpAMD64VSCALEFPS256(v) + case OpAMD64VSCALEFPS512: + return rewriteValueAMD64_OpAMD64VSCALEFPS512(v) + case OpAMD64VSCALEFPSMasked128: + return rewriteValueAMD64_OpAMD64VSCALEFPSMasked128(v) + case OpAMD64VSCALEFPSMasked256: + return rewriteValueAMD64_OpAMD64VSCALEFPSMasked256(v) + case OpAMD64VSCALEFPSMasked512: + return rewriteValueAMD64_OpAMD64VSCALEFPSMasked512(v) + case OpAMD64VSQRTPD128: + return rewriteValueAMD64_OpAMD64VSQRTPD128(v) + case OpAMD64VSQRTPD256: + return rewriteValueAMD64_OpAMD64VSQRTPD256(v) + case OpAMD64VSQRTPD512: + return rewriteValueAMD64_OpAMD64VSQRTPD512(v) + case OpAMD64VSQRTPDMasked128: + return rewriteValueAMD64_OpAMD64VSQRTPDMasked128(v) + case OpAMD64VSQRTPDMasked256: + return rewriteValueAMD64_OpAMD64VSQRTPDMasked256(v) + case OpAMD64VSQRTPDMasked512: + return rewriteValueAMD64_OpAMD64VSQRTPDMasked512(v) + case OpAMD64VSQRTPS128: + return rewriteValueAMD64_OpAMD64VSQRTPS128(v) + case OpAMD64VSQRTPS256: + return rewriteValueAMD64_OpAMD64VSQRTPS256(v) + case OpAMD64VSQRTPS512: + return rewriteValueAMD64_OpAMD64VSQRTPS512(v) + case OpAMD64VSQRTPSMasked128: + return rewriteValueAMD64_OpAMD64VSQRTPSMasked128(v) + case OpAMD64VSQRTPSMasked256: + return rewriteValueAMD64_OpAMD64VSQRTPSMasked256(v) + case OpAMD64VSQRTPSMasked512: + return rewriteValueAMD64_OpAMD64VSQRTPSMasked512(v) + case OpAMD64VSUBPD128: + return rewriteValueAMD64_OpAMD64VSUBPD128(v) + case OpAMD64VSUBPD256: + return rewriteValueAMD64_OpAMD64VSUBPD256(v) + case OpAMD64VSUBPD512: + return rewriteValueAMD64_OpAMD64VSUBPD512(v) + case OpAMD64VSUBPDMasked128: + return rewriteValueAMD64_OpAMD64VSUBPDMasked128(v) + case OpAMD64VSUBPDMasked256: + return rewriteValueAMD64_OpAMD64VSUBPDMasked256(v) + case OpAMD64VSUBPDMasked512: + return rewriteValueAMD64_OpAMD64VSUBPDMasked512(v) + case OpAMD64VSUBPS128: + return rewriteValueAMD64_OpAMD64VSUBPS128(v) + case OpAMD64VSUBPS256: + return rewriteValueAMD64_OpAMD64VSUBPS256(v) + case OpAMD64VSUBPS512: + return rewriteValueAMD64_OpAMD64VSUBPS512(v) + case OpAMD64VSUBPSMasked128: + return rewriteValueAMD64_OpAMD64VSUBPSMasked128(v) + case OpAMD64VSUBPSMasked256: + return rewriteValueAMD64_OpAMD64VSUBPSMasked256(v) + case OpAMD64VSUBPSMasked512: + return rewriteValueAMD64_OpAMD64VSUBPSMasked512(v) case OpAMD64XADDLlock: return rewriteValueAMD64_OpAMD64XADDLlock(v) case OpAMD64XADDQlock: @@ -26594,3672 +27580,18035 @@ func rewriteValueAMD64_OpAMD64TESTWconst(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64VADDPD128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VADDPD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VADDPD128load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VADDPD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VADDPD256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VADDPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VADDPD256load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VADDPD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VADDPD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VADDPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VADDPD512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VADDPD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VADDPDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VADDPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VADDPDMasked128load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VADDPDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VADDPDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VADDPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VADDPDMasked256load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VADDPDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VADDPDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VADDPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VADDPDMasked512load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VADDPDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VADDPS128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VADDPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VADDPS128load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VADDPS128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VADDPS256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VADDPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VADDPS256load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VADDPS256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VADDPS512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VADDPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VADDPS512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VADDPS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VADDPSMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VADDPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VADDPSMasked128load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VADDPSMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VADDPSMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VADDPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VADDPSMasked256load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VADDPSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VADDPSMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VADDPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VADDPSMasked512load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VADDPSMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VCVTPS2UDQ128(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTPS2UDQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTPS2UDQ128load {sym} [off] ptr mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VCVTPS2UDQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VCVTPS2UDQ256(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTPS2UDQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTPS2UDQ256load {sym} [off] ptr mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VCVTPS2UDQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VCVTPS2UDQ512(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTPS2UDQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTPS2UDQ512load {sym} [off] ptr mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VCVTPS2UDQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VCVTPS2UDQMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTPS2UDQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTPS2UDQMasked128load {sym} [off] ptr mask mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VCVTPS2UDQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VCVTPS2UDQMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTPS2UDQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTPS2UDQMasked256load {sym} [off] ptr mask mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VCVTPS2UDQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VCVTPS2UDQMasked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTPS2UDQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTPS2UDQMasked512load {sym} [off] ptr mask mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VCVTPS2UDQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPS2DQ128(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTTPS2DQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPS2DQ128load {sym} [off] ptr mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VCVTTPS2DQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPS2DQ256(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTTPS2DQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPS2DQ256load {sym} [off] ptr mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VCVTTPS2DQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPS2DQ512(v *Value) bool { + v_0 := v.Args[0] + // match: (VCVTTPS2DQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPS2DQ512load {sym} [off] ptr mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VCVTTPS2DQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPS2DQMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTTPS2DQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPS2DQMasked128load {sym} [off] ptr mask mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VCVTTPS2DQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPS2DQMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTTPS2DQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPS2DQMasked256load {sym} [off] ptr mask mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VCVTTPS2DQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VCVTTPS2DQMasked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VCVTTPS2DQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VCVTTPS2DQMasked512load {sym} [off] ptr mask mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VCVTTPS2DQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VDIVPD128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VDIVPD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VDIVPD128load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VDIVPD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VDIVPD256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VDIVPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VDIVPD256load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VDIVPD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VDIVPD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VDIVPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VDIVPD512load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VDIVPD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VDIVPDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VDIVPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VDIVPDMasked128load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VDIVPDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VDIVPDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VDIVPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VDIVPDMasked256load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VDIVPDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VDIVPDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VDIVPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VDIVPDMasked512load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VDIVPDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VDIVPS128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VDIVPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VDIVPS128load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VDIVPS128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VDIVPS256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VDIVPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VDIVPS256load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VDIVPS256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VDIVPS512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VDIVPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VDIVPS512load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VDIVPS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VDIVPSMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VDIVPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VDIVPSMasked128load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VDIVPSMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VDIVPSMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VDIVPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VDIVPSMasked256load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VDIVPSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VDIVPSMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VDIVPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VDIVPSMasked512load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VDIVPSMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMADD213PD128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADD213PD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADD213PD128load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMADD213PD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMADD213PD256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADD213PD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADD213PD256load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMADD213PD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMADD213PD512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADD213PD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADD213PD512load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMADD213PD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMADD213PDMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADD213PDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADD213PDMasked128load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMADD213PDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMADD213PDMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADD213PDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADD213PDMasked256load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMADD213PDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMADD213PDMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADD213PDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADD213PDMasked512load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMADD213PDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMADD213PS128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADD213PS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADD213PS128load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMADD213PS128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMADD213PS256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADD213PS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADD213PS256load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMADD213PS256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMADD213PS512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADD213PS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADD213PS512load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMADD213PS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMADD213PSMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADD213PSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADD213PSMasked128load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMADD213PSMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMADD213PSMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADD213PSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADD213PSMasked256load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMADD213PSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMADD213PSMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADD213PSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADD213PSMasked512load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMADD213PSMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMADDSUB213PD128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADDSUB213PD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADDSUB213PD128load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMADDSUB213PD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMADDSUB213PD256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADDSUB213PD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADDSUB213PD256load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMADDSUB213PD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMADDSUB213PD512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADDSUB213PD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADDSUB213PD512load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMADDSUB213PD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMADDSUB213PDMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADDSUB213PDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADDSUB213PDMasked128load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMADDSUB213PDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMADDSUB213PDMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADDSUB213PDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADDSUB213PDMasked256load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMADDSUB213PDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMADDSUB213PDMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADDSUB213PDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADDSUB213PDMasked512load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMADDSUB213PDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMADDSUB213PS128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADDSUB213PS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADDSUB213PS128load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMADDSUB213PS128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMADDSUB213PS256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADDSUB213PS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADDSUB213PS256load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMADDSUB213PS256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMADDSUB213PS512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADDSUB213PS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADDSUB213PS512load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMADDSUB213PS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMADDSUB213PSMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADDSUB213PSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADDSUB213PSMasked128load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMADDSUB213PSMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMADDSUB213PSMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADDSUB213PSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADDSUB213PSMasked256load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMADDSUB213PSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMADDSUB213PSMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMADDSUB213PSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMADDSUB213PSMasked512load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMADDSUB213PSMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMSUBADD213PD128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMSUBADD213PD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMSUBADD213PD128load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMSUBADD213PD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMSUBADD213PD256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMSUBADD213PD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMSUBADD213PD256load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMSUBADD213PD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMSUBADD213PD512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMSUBADD213PD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMSUBADD213PD512load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMSUBADD213PD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMSUBADD213PDMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMSUBADD213PDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMSUBADD213PDMasked128load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMSUBADD213PDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMSUBADD213PDMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMSUBADD213PDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMSUBADD213PDMasked256load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMSUBADD213PDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMSUBADD213PDMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMSUBADD213PDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMSUBADD213PDMasked512load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMSUBADD213PDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMSUBADD213PS128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMSUBADD213PS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMSUBADD213PS128load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMSUBADD213PS128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMSUBADD213PS256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMSUBADD213PS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMSUBADD213PS256load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMSUBADD213PS256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMSUBADD213PS512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMSUBADD213PS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMSUBADD213PS512load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMSUBADD213PS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMSUBADD213PSMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMSUBADD213PSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMSUBADD213PSMasked128load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMSUBADD213PSMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMSUBADD213PSMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMSUBADD213PSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMSUBADD213PSMasked256load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMSUBADD213PSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VFMSUBADD213PSMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VFMSUBADD213PSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VFMSUBADD213PSMasked512load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VFMSUBADD213PSMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VMAXPD128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMAXPD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMAXPD128load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMAXPD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMAXPD256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMAXPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMAXPD256load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMAXPD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMAXPD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMAXPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMAXPD512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMAXPD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMAXPDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMAXPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMAXPDMasked128load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMAXPDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMAXPDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMAXPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMAXPDMasked256load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMAXPDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMAXPDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMAXPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMAXPDMasked512load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMAXPDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMAXPS128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMAXPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMAXPS128load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMAXPS128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMAXPS256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMAXPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMAXPS256load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMAXPS256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMAXPS512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMAXPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMAXPS512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMAXPS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMAXPSMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMAXPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMAXPSMasked128load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMAXPSMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMAXPSMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMAXPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMAXPSMasked256load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMAXPSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMAXPSMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMAXPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMAXPSMasked512load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMAXPSMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMINPD128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMINPD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMINPD128load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMINPD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMINPD256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMINPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMINPD256load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMINPD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMINPD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMINPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMINPD512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMINPD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMINPDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMINPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMINPDMasked128load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMINPDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMINPDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMINPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMINPDMasked256load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMINPDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMINPDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMINPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMINPDMasked512load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMINPDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMINPS128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMINPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMINPS128load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMINPS128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMINPS256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMINPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMINPS256load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMINPS256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMINPS512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMINPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMINPS512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMINPS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMINPSMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMINPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMINPSMasked128load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMINPSMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMINPSMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMINPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMINPSMasked256load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMINPSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMINPSMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMINPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMINPSMasked512load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMINPSMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} func rewriteValueAMD64_OpAMD64VMOVD(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (VMOVD x:(MOVLload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (VMOVDload [off] {sym} ptr mem) + b := v.Block + // match: (VMOVD x:(MOVLload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (VMOVDload [off] {sym} ptr mem) + for { + x := v_0 + if x.Op != OpAMD64MOVLload { + break + } + off := auxIntToInt32(x.AuxInt) + sym := auxToSym(x.Aux) + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { + break + } + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64VMOVDload, v.Type) + v.copyOf(v0) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VMOVDQU16Masked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMOVDQU16Masked512 (VPABSW512 x) mask) + // result: (VPABSWMasked512 x mask) + for { + if v_0.Op != OpAMD64VPABSW512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPABSWMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPADDW512 x y) mask) + // result: (VPADDWMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPADDW512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPADDWMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPADDSW512 x y) mask) + // result: (VPADDSWMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPADDSW512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPADDSWMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPADDUSW512 x y) mask) + // result: (VPADDUSWMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPADDUSW512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPADDUSWMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPAVGW512 x y) mask) + // result: (VPAVGWMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPAVGW512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPAVGWMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPBROADCASTW512 x) mask) + // result: (VPBROADCASTWMasked512 x mask) + for { + if v_0.Op != OpAMD64VPBROADCASTW512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPBROADCASTWMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPMOVSXWD512 x) mask) + // result: (VPMOVSXWDMasked512 x mask) + for { + if v_0.Op != OpAMD64VPMOVSXWD512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSXWDMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPMOVSXWQ512 x) mask) + // result: (VPMOVSXWQMasked512 x mask) + for { + if v_0.Op != OpAMD64VPMOVSXWQ512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSXWQMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPMOVZXWD512 x) mask) + // result: (VPMOVZXWDMasked512 x mask) + for { + if v_0.Op != OpAMD64VPMOVZXWD512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXWDMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPMOVZXWQ512 x) mask) + // result: (VPMOVZXWQMasked512 x mask) + for { + if v_0.Op != OpAMD64VPMOVZXWQ512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXWQMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPMADDWD512 x y) mask) + // result: (VPMADDWDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPMADDWD512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMADDWDMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPMADDUBSW512 x y) mask) + // result: (VPMADDUBSWMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPMADDUBSW512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMADDUBSWMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPMAXSW512 x y) mask) + // result: (VPMAXSWMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPMAXSW512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMAXSWMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPMAXUW512 x y) mask) + // result: (VPMAXUWMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPMAXUW512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMAXUWMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPMINSW512 x y) mask) + // result: (VPMINSWMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPMINSW512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMINSWMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPMINUW512 x y) mask) + // result: (VPMINUWMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPMINUW512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMINUWMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPMULHW512 x y) mask) + // result: (VPMULHWMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPMULHW512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMULHWMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPMULHUW512 x y) mask) + // result: (VPMULHUWMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPMULHUW512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMULHUWMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPMULLW512 x y) mask) + // result: (VPMULLWMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPMULLW512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMULLWMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPOPCNTW512 x) mask) + // result: (VPOPCNTWMasked512 x mask) + for { + if v_0.Op != OpAMD64VPOPCNTW512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPOPCNTWMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPERMI2W512 x y z) mask) + // result: (VPERMI2WMasked512 x y z mask) + for { + if v_0.Op != OpAMD64VPERMI2W512 { + break + } + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPERMI2WMasked512) + v.AddArg4(x, y, z, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPSHUFHW512 [a] x) mask) + // result: (VPSHUFHWMasked512 [a] x mask) + for { + if v_0.Op != OpAMD64VPSHUFHW512 { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSHUFHWMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPERMW512 x y) mask) + // result: (VPERMWMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPERMW512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPERMWMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPSHLDW512 [a] x y) mask) + // result: (VPSHLDWMasked512 [a] x y mask) + for { + if v_0.Op != OpAMD64VPSHLDW512 { + break + } + a := auxIntToUint8(v_0.AuxInt) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSHLDWMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPSLLW512 x y) mask) + // result: (VPSLLWMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSLLW512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSLLWMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPSHRDW512 [a] x y) mask) + // result: (VPSHRDWMasked512 [a] x y mask) + for { + if v_0.Op != OpAMD64VPSHRDW512 { + break + } + a := auxIntToUint8(v_0.AuxInt) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSHRDWMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPSRAW512 x y) mask) + // result: (VPSRAWMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSRAW512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRAWMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPSRLW512 x y) mask) + // result: (VPSRLWMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSRLW512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLWMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPSHLDVW512 x y z) mask) + // result: (VPSHLDVWMasked512 x y z mask) + for { + if v_0.Op != OpAMD64VPSHLDVW512 { + break + } + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPSHLDVWMasked512) + v.AddArg4(x, y, z, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPSLLVW512 x y) mask) + // result: (VPSLLVWMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSLLVW512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSLLVWMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPSHRDVW512 x y z) mask) + // result: (VPSHRDVWMasked512 x y z mask) + for { + if v_0.Op != OpAMD64VPSHRDVW512 { + break + } + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPSHRDVWMasked512) + v.AddArg4(x, y, z, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPSRAVW512 x y) mask) + // result: (VPSRAVWMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSRAVW512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRAVWMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPSRLVW512 x y) mask) + // result: (VPSRLVWMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSRLVW512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLVWMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPSUBW512 x y) mask) + // result: (VPSUBWMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSUBW512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSUBWMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPSUBSW512 x y) mask) + // result: (VPSUBSWMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSUBSW512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSUBSWMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPSUBUSW512 x y) mask) + // result: (VPSUBUSWMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSUBUSW512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSUBUSWMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPSLLW512const [a] x) mask) + // result: (VPSLLWMasked512const [a] x mask) + for { + if v_0.Op != OpAMD64VPSLLW512const { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSLLWMasked512const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPSRLW512const [a] x) mask) + // result: (VPSRLWMasked512const [a] x mask) + for { + if v_0.Op != OpAMD64VPSRLW512const { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLWMasked512const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU16Masked512 (VPSRAW512const [a] x) mask) + // result: (VPSRAWMasked512const [a] x mask) + for { + if v_0.Op != OpAMD64VPSRAW512const { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRAWMasked512const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMOVDQU32Masked512 (VPABSD512 x) mask) + // result: (VPABSDMasked512 x mask) + for { + if v_0.Op != OpAMD64VPABSD512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPABSDMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPDPWSSD512 x y z) mask) + // result: (VPDPWSSDMasked512 x y z mask) + for { + if v_0.Op != OpAMD64VPDPWSSD512 { + break + } + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPDPWSSDMasked512) + v.AddArg4(x, y, z, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPDPWSSDS512 x y z) mask) + // result: (VPDPWSSDSMasked512 x y z mask) + for { + if v_0.Op != OpAMD64VPDPWSSDS512 { + break + } + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPDPWSSDSMasked512) + v.AddArg4(x, y, z, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPDPBUSD512 x y z) mask) + // result: (VPDPBUSDMasked512 x y z mask) + for { + if v_0.Op != OpAMD64VPDPBUSD512 { + break + } + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPDPBUSDMasked512) + v.AddArg4(x, y, z, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPDPBUSDS512 x y z) mask) + // result: (VPDPBUSDSMasked512 x y z mask) + for { + if v_0.Op != OpAMD64VPDPBUSDS512 { + break + } + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPDPBUSDSMasked512) + v.AddArg4(x, y, z, mask) + return true + } + // match: (VMOVDQU32Masked512 (VADDPS512 x y) mask) + // result: (VADDPSMasked512 x y mask) + for { + if v_0.Op != OpAMD64VADDPS512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VADDPSMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPADDD512 x y) mask) + // result: (VPADDDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPADDD512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPADDDMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPANDD512 x y) mask) + // result: (VPANDDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPANDD512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPANDDMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPANDND512 x y) mask) + // result: (VPANDNDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPANDND512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPANDNDMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VBROADCASTSS512 x) mask) + // result: (VBROADCASTSSMasked512 x mask) + for { + if v_0.Op != OpAMD64VBROADCASTSS512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VBROADCASTSSMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPBROADCASTD512 x) mask) + // result: (VPBROADCASTDMasked512 x mask) + for { + if v_0.Op != OpAMD64VPBROADCASTD512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPBROADCASTDMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked512 (VRNDSCALEPS512 [a] x) mask) + // result: (VRNDSCALEPSMasked512 [a] x mask) + for { + if v_0.Op != OpAMD64VRNDSCALEPS512 { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VRNDSCALEPSMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked512 (VREDUCEPS512 [a] x) mask) + // result: (VREDUCEPSMasked512 [a] x mask) + for { + if v_0.Op != OpAMD64VREDUCEPS512 { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VREDUCEPSMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPACKSSDW512 x y) mask) + // result: (VPACKSSDWMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPACKSSDW512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPACKSSDWMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VCVTTPS2DQ512 x) mask) + // result: (VCVTTPS2DQMasked512 x mask) + for { + if v_0.Op != OpAMD64VCVTTPS2DQ512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTTPS2DQMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPMOVSXDQ512 x) mask) + // result: (VPMOVSXDQMasked512 x mask) + for { + if v_0.Op != OpAMD64VPMOVSXDQ512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSXDQMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPACKUSDW512 x y) mask) + // result: (VPACKUSDWMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPACKUSDW512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPACKUSDWMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VCVTPS2UDQ512 x) mask) + // result: (VCVTPS2UDQMasked512 x mask) + for { + if v_0.Op != OpAMD64VCVTPS2UDQ512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VCVTPS2UDQMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPMOVZXDQ512 x) mask) + // result: (VPMOVZXDQMasked512 x mask) + for { + if v_0.Op != OpAMD64VPMOVZXDQ512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXDQMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked512 (VDIVPS512 x y) mask) + // result: (VDIVPSMasked512 x y mask) + for { + if v_0.Op != OpAMD64VDIVPS512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VDIVPSMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPLZCNTD512 x) mask) + // result: (VPLZCNTDMasked512 x mask) + for { + if v_0.Op != OpAMD64VPLZCNTD512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPLZCNTDMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked512 (VMAXPS512 x y) mask) + // result: (VMAXPSMasked512 x y mask) + for { + if v_0.Op != OpAMD64VMAXPS512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VMAXPSMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPMAXSD512 x y) mask) + // result: (VPMAXSDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPMAXSD512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMAXSDMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPMAXUD512 x y) mask) + // result: (VPMAXUDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPMAXUD512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMAXUDMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VMINPS512 x y) mask) + // result: (VMINPSMasked512 x y mask) + for { + if v_0.Op != OpAMD64VMINPS512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VMINPSMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPMINSD512 x y) mask) + // result: (VPMINSDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPMINSD512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMINSDMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPMINUD512 x y) mask) + // result: (VPMINUDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPMINUD512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMINUDMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VFMADD213PS512 x y z) mask) + // result: (VFMADD213PSMasked512 x y z mask) + for { + if v_0.Op != OpAMD64VFMADD213PS512 { + break + } + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VFMADD213PSMasked512) + v.AddArg4(x, y, z, mask) + return true + } + // match: (VMOVDQU32Masked512 (VFMADDSUB213PS512 x y z) mask) + // result: (VFMADDSUB213PSMasked512 x y z mask) + for { + if v_0.Op != OpAMD64VFMADDSUB213PS512 { + break + } + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VFMADDSUB213PSMasked512) + v.AddArg4(x, y, z, mask) + return true + } + // match: (VMOVDQU32Masked512 (VMULPS512 x y) mask) + // result: (VMULPSMasked512 x y mask) + for { + if v_0.Op != OpAMD64VMULPS512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VMULPSMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPMULLD512 x y) mask) + // result: (VPMULLDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPMULLD512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMULLDMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VFMSUBADD213PS512 x y z) mask) + // result: (VFMSUBADD213PSMasked512 x y z mask) + for { + if v_0.Op != OpAMD64VFMSUBADD213PS512 { + break + } + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VFMSUBADD213PSMasked512) + v.AddArg4(x, y, z, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPOPCNTD512 x) mask) + // result: (VPOPCNTDMasked512 x mask) + for { + if v_0.Op != OpAMD64VPOPCNTD512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPOPCNTDMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPORD512 x y) mask) + // result: (VPORDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPORD512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPORDMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPERMI2PS512 x y z) mask) + // result: (VPERMI2PSMasked512 x y z mask) + for { + if v_0.Op != OpAMD64VPERMI2PS512 { + break + } + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPERMI2PSMasked512) + v.AddArg4(x, y, z, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPERMI2D512 x y z) mask) + // result: (VPERMI2DMasked512 x y z mask) + for { + if v_0.Op != OpAMD64VPERMI2D512 { + break + } + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPERMI2DMasked512) + v.AddArg4(x, y, z, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPSHUFD512 [a] x) mask) + // result: (VPSHUFDMasked512 [a] x mask) + for { + if v_0.Op != OpAMD64VPSHUFD512 { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSHUFDMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPERMPS512 x y) mask) + // result: (VPERMPSMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPERMPS512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPERMPSMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPERMD512 x y) mask) + // result: (VPERMDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPERMD512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPERMDMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VRCP14PS512 x) mask) + // result: (VRCP14PSMasked512 x mask) + for { + if v_0.Op != OpAMD64VRCP14PS512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VRCP14PSMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked512 (VRSQRT14PS512 x) mask) + // result: (VRSQRT14PSMasked512 x mask) + for { + if v_0.Op != OpAMD64VRSQRT14PS512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VRSQRT14PSMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPROLD512 [a] x) mask) + // result: (VPROLDMasked512 [a] x mask) + for { + if v_0.Op != OpAMD64VPROLD512 { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPROLDMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPRORD512 [a] x) mask) + // result: (VPRORDMasked512 [a] x mask) + for { + if v_0.Op != OpAMD64VPRORD512 { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPRORDMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPROLVD512 x y) mask) + // result: (VPROLVDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPROLVD512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPROLVDMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPRORVD512 x y) mask) + // result: (VPRORVDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPRORVD512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPRORVDMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VSCALEFPS512 x y) mask) + // result: (VSCALEFPSMasked512 x y mask) + for { + if v_0.Op != OpAMD64VSCALEFPS512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VSCALEFPSMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPSHLDD512 [a] x y) mask) + // result: (VPSHLDDMasked512 [a] x y mask) + for { + if v_0.Op != OpAMD64VPSHLDD512 { + break + } + a := auxIntToUint8(v_0.AuxInt) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSHLDDMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPSLLD512 x y) mask) + // result: (VPSLLDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSLLD512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSLLDMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPSHRDD512 [a] x y) mask) + // result: (VPSHRDDMasked512 [a] x y mask) + for { + if v_0.Op != OpAMD64VPSHRDD512 { + break + } + a := auxIntToUint8(v_0.AuxInt) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSHRDDMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPSRAD512 x y) mask) + // result: (VPSRADMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSRAD512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRADMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPSRLD512 x y) mask) + // result: (VPSRLDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSRLD512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLDMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPSHLDVD512 x y z) mask) + // result: (VPSHLDVDMasked512 x y z mask) + for { + if v_0.Op != OpAMD64VPSHLDVD512 { + break + } + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPSHLDVDMasked512) + v.AddArg4(x, y, z, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPSLLVD512 x y) mask) + // result: (VPSLLVDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSLLVD512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSLLVDMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPSHRDVD512 x y z) mask) + // result: (VPSHRDVDMasked512 x y z mask) + for { + if v_0.Op != OpAMD64VPSHRDVD512 { + break + } + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPSHRDVDMasked512) + v.AddArg4(x, y, z, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPSRAVD512 x y) mask) + // result: (VPSRAVDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSRAVD512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRAVDMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPSRLVD512 x y) mask) + // result: (VPSRLVDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSRLVD512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLVDMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VSQRTPS512 x) mask) + // result: (VSQRTPSMasked512 x mask) + for { + if v_0.Op != OpAMD64VSQRTPS512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VSQRTPSMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked512 (VSUBPS512 x y) mask) + // result: (VSUBPSMasked512 x y mask) + for { + if v_0.Op != OpAMD64VSUBPS512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VSUBPSMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPSUBD512 x y) mask) + // result: (VPSUBDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSUBD512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSUBDMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPXORD512 x y) mask) + // result: (VPXORDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPXORD512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPXORDMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPSLLD512const [a] x) mask) + // result: (VPSLLDMasked512const [a] x mask) + for { + if v_0.Op != OpAMD64VPSLLD512const { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSLLDMasked512const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPSRLD512const [a] x) mask) + // result: (VPSRLDMasked512const [a] x mask) + for { + if v_0.Op != OpAMD64VPSRLD512const { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLDMasked512const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU32Masked512 (VPSRAD512const [a] x) mask) + // result: (VPSRADMasked512const [a] x mask) + for { + if v_0.Op != OpAMD64VPSRAD512const { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRADMasked512const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VMOVDQU64Masked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMOVDQU64Masked512 (VPABSQ512 x) mask) + // result: (VPABSQMasked512 x mask) + for { + if v_0.Op != OpAMD64VPABSQ512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPABSQMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VADDPD512 x y) mask) + // result: (VADDPDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VADDPD512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VADDPDMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPADDQ512 x y) mask) + // result: (VPADDQMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPADDQ512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPADDQMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPANDQ512 x y) mask) + // result: (VPANDQMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPANDQ512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPANDQMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPANDNQ512 x y) mask) + // result: (VPANDNQMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPANDNQ512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPANDNQMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VBROADCASTSD512 x) mask) + // result: (VBROADCASTSDMasked512 x mask) + for { + if v_0.Op != OpAMD64VBROADCASTSD512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VBROADCASTSDMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPBROADCASTQ512 x) mask) + // result: (VPBROADCASTQMasked512 x mask) + for { + if v_0.Op != OpAMD64VPBROADCASTQ512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPBROADCASTQMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VRNDSCALEPD512 [a] x) mask) + // result: (VRNDSCALEPDMasked512 [a] x mask) + for { + if v_0.Op != OpAMD64VRNDSCALEPD512 { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VRNDSCALEPDMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VREDUCEPD512 [a] x) mask) + // result: (VREDUCEPDMasked512 [a] x mask) + for { + if v_0.Op != OpAMD64VREDUCEPD512 { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VREDUCEPDMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VDIVPD512 x y) mask) + // result: (VDIVPDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VDIVPD512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VDIVPDMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPLZCNTQ512 x) mask) + // result: (VPLZCNTQMasked512 x mask) + for { + if v_0.Op != OpAMD64VPLZCNTQ512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPLZCNTQMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VMAXPD512 x y) mask) + // result: (VMAXPDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VMAXPD512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VMAXPDMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPMAXSQ512 x y) mask) + // result: (VPMAXSQMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPMAXSQ512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMAXSQMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPMAXUQ512 x y) mask) + // result: (VPMAXUQMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPMAXUQ512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMAXUQMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VMINPD512 x y) mask) + // result: (VMINPDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VMINPD512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VMINPDMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPMINSQ512 x y) mask) + // result: (VPMINSQMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPMINSQ512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMINSQMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPMINUQ512 x y) mask) + // result: (VPMINUQMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPMINUQ512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMINUQMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VFMADD213PD512 x y z) mask) + // result: (VFMADD213PDMasked512 x y z mask) + for { + if v_0.Op != OpAMD64VFMADD213PD512 { + break + } + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VFMADD213PDMasked512) + v.AddArg4(x, y, z, mask) + return true + } + // match: (VMOVDQU64Masked512 (VFMADDSUB213PD512 x y z) mask) + // result: (VFMADDSUB213PDMasked512 x y z mask) + for { + if v_0.Op != OpAMD64VFMADDSUB213PD512 { + break + } + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VFMADDSUB213PDMasked512) + v.AddArg4(x, y, z, mask) + return true + } + // match: (VMOVDQU64Masked512 (VMULPD512 x y) mask) + // result: (VMULPDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VMULPD512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VMULPDMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPMULLQ512 x y) mask) + // result: (VPMULLQMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPMULLQ512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMULLQMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VFMSUBADD213PD512 x y z) mask) + // result: (VFMSUBADD213PDMasked512 x y z mask) + for { + if v_0.Op != OpAMD64VFMSUBADD213PD512 { + break + } + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VFMSUBADD213PDMasked512) + v.AddArg4(x, y, z, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPOPCNTQ512 x) mask) + // result: (VPOPCNTQMasked512 x mask) + for { + if v_0.Op != OpAMD64VPOPCNTQ512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPOPCNTQMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPORQ512 x y) mask) + // result: (VPORQMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPORQ512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPORQMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPERMI2PD512 x y z) mask) + // result: (VPERMI2PDMasked512 x y z mask) + for { + if v_0.Op != OpAMD64VPERMI2PD512 { + break + } + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPERMI2PDMasked512) + v.AddArg4(x, y, z, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPERMI2Q512 x y z) mask) + // result: (VPERMI2QMasked512 x y z mask) + for { + if v_0.Op != OpAMD64VPERMI2Q512 { + break + } + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPERMI2QMasked512) + v.AddArg4(x, y, z, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPERMPD512 x y) mask) + // result: (VPERMPDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPERMPD512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPERMPDMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPERMQ512 x y) mask) + // result: (VPERMQMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPERMQ512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPERMQMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VRCP14PD512 x) mask) + // result: (VRCP14PDMasked512 x mask) + for { + if v_0.Op != OpAMD64VRCP14PD512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VRCP14PDMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VRSQRT14PD512 x) mask) + // result: (VRSQRT14PDMasked512 x mask) + for { + if v_0.Op != OpAMD64VRSQRT14PD512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VRSQRT14PDMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPROLQ512 [a] x) mask) + // result: (VPROLQMasked512 [a] x mask) + for { + if v_0.Op != OpAMD64VPROLQ512 { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPROLQMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPRORQ512 [a] x) mask) + // result: (VPRORQMasked512 [a] x mask) + for { + if v_0.Op != OpAMD64VPRORQ512 { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPRORQMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPROLVQ512 x y) mask) + // result: (VPROLVQMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPROLVQ512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPROLVQMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPRORVQ512 x y) mask) + // result: (VPRORVQMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPRORVQ512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPRORVQMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VSCALEFPD512 x y) mask) + // result: (VSCALEFPDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VSCALEFPD512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VSCALEFPDMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPSHLDQ512 [a] x y) mask) + // result: (VPSHLDQMasked512 [a] x y mask) + for { + if v_0.Op != OpAMD64VPSHLDQ512 { + break + } + a := auxIntToUint8(v_0.AuxInt) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSHLDQMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPSLLQ512 x y) mask) + // result: (VPSLLQMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSLLQ512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSLLQMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPSHRDQ512 [a] x y) mask) + // result: (VPSHRDQMasked512 [a] x y mask) + for { + if v_0.Op != OpAMD64VPSHRDQ512 { + break + } + a := auxIntToUint8(v_0.AuxInt) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSHRDQMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPSRAQ512 x y) mask) + // result: (VPSRAQMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSRAQ512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRAQMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPSRLQ512 x y) mask) + // result: (VPSRLQMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSRLQ512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLQMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPSHLDVQ512 x y z) mask) + // result: (VPSHLDVQMasked512 x y z mask) + for { + if v_0.Op != OpAMD64VPSHLDVQ512 { + break + } + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPSHLDVQMasked512) + v.AddArg4(x, y, z, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPSLLVQ512 x y) mask) + // result: (VPSLLVQMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSLLVQ512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSLLVQMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPSHRDVQ512 x y z) mask) + // result: (VPSHRDVQMasked512 x y z mask) + for { + if v_0.Op != OpAMD64VPSHRDVQ512 { + break + } + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPSHRDVQMasked512) + v.AddArg4(x, y, z, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPSRAVQ512 x y) mask) + // result: (VPSRAVQMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSRAVQ512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRAVQMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPSRLVQ512 x y) mask) + // result: (VPSRLVQMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSRLVQ512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLVQMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VSQRTPD512 x) mask) + // result: (VSQRTPDMasked512 x mask) + for { + if v_0.Op != OpAMD64VSQRTPD512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VSQRTPDMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VSUBPD512 x y) mask) + // result: (VSUBPDMasked512 x y mask) + for { + if v_0.Op != OpAMD64VSUBPD512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VSUBPDMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPSUBQ512 x y) mask) + // result: (VPSUBQMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSUBQ512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSUBQMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPXORQ512 x y) mask) + // result: (VPXORQMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPXORQ512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPXORQMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPSLLQ512const [a] x) mask) + // result: (VPSLLQMasked512const [a] x mask) + for { + if v_0.Op != OpAMD64VPSLLQ512const { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSLLQMasked512const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPSRLQ512const [a] x) mask) + // result: (VPSRLQMasked512const [a] x mask) + for { + if v_0.Op != OpAMD64VPSRLQ512const { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRLQMasked512const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU64Masked512 (VPSRAQ512const [a] x) mask) + // result: (VPSRAQMasked512const [a] x mask) + for { + if v_0.Op != OpAMD64VPSRAQ512const { + break + } + a := auxIntToUint8(v_0.AuxInt) + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSRAQMasked512const) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg2(x, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VMOVDQU8Masked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMOVDQU8Masked512 (VPABSB512 x) mask) + // result: (VPABSBMasked512 x mask) + for { + if v_0.Op != OpAMD64VPABSB512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPABSBMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU8Masked512 (VPADDB512 x y) mask) + // result: (VPADDBMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPADDB512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPADDBMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU8Masked512 (VPADDSB512 x y) mask) + // result: (VPADDSBMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPADDSB512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPADDSBMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU8Masked512 (VPADDUSB512 x y) mask) + // result: (VPADDUSBMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPADDUSB512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPADDUSBMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU8Masked512 (VPAVGB512 x y) mask) + // result: (VPAVGBMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPAVGB512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPAVGBMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU8Masked512 (VPBROADCASTB512 x) mask) + // result: (VPBROADCASTBMasked512 x mask) + for { + if v_0.Op != OpAMD64VPBROADCASTB512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPBROADCASTBMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU8Masked512 (VPMOVSXBW512 x) mask) + // result: (VPMOVSXBWMasked512 x mask) + for { + if v_0.Op != OpAMD64VPMOVSXBW512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSXBWMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU8Masked512 (VPMOVSXBD512 x) mask) + // result: (VPMOVSXBDMasked512 x mask) + for { + if v_0.Op != OpAMD64VPMOVSXBD512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSXBDMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU8Masked512 (VPMOVSXBQ512 x) mask) + // result: (VPMOVSXBQMasked512 x mask) + for { + if v_0.Op != OpAMD64VPMOVSXBQ512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVSXBQMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU8Masked512 (VPMOVZXBW512 x) mask) + // result: (VPMOVZXBWMasked512 x mask) + for { + if v_0.Op != OpAMD64VPMOVZXBW512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXBWMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU8Masked512 (VPMOVZXBD512 x) mask) + // result: (VPMOVZXBDMasked512 x mask) + for { + if v_0.Op != OpAMD64VPMOVZXBD512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXBDMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU8Masked512 (VPMOVZXBQ512 x) mask) + // result: (VPMOVZXBQMasked512 x mask) + for { + if v_0.Op != OpAMD64VPMOVZXBQ512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMOVZXBQMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU8Masked512 (VGF2P8AFFINEINVQB512 [a] x y) mask) + // result: (VGF2P8AFFINEINVQBMasked512 [a] x y mask) + for { + if v_0.Op != OpAMD64VGF2P8AFFINEINVQB512 { + break + } + a := auxIntToUint8(v_0.AuxInt) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VGF2P8AFFINEINVQBMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU8Masked512 (VGF2P8AFFINEQB512 [a] x y) mask) + // result: (VGF2P8AFFINEQBMasked512 [a] x y mask) + for { + if v_0.Op != OpAMD64VGF2P8AFFINEQB512 { + break + } + a := auxIntToUint8(v_0.AuxInt) + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VGF2P8AFFINEQBMasked512) + v.AuxInt = uint8ToAuxInt(a) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU8Masked512 (VGF2P8MULB512 x y) mask) + // result: (VGF2P8MULBMasked512 x y mask) + for { + if v_0.Op != OpAMD64VGF2P8MULB512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VGF2P8MULBMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU8Masked512 (VPMAXSB512 x y) mask) + // result: (VPMAXSBMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPMAXSB512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMAXSBMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU8Masked512 (VPMAXUB512 x y) mask) + // result: (VPMAXUBMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPMAXUB512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMAXUBMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU8Masked512 (VPMINSB512 x y) mask) + // result: (VPMINSBMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPMINSB512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMINSBMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU8Masked512 (VPMINUB512 x y) mask) + // result: (VPMINUBMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPMINUB512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPMINUBMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU8Masked512 (VPOPCNTB512 x) mask) + // result: (VPOPCNTBMasked512 x mask) + for { + if v_0.Op != OpAMD64VPOPCNTB512 { + break + } + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPOPCNTBMasked512) + v.AddArg2(x, mask) + return true + } + // match: (VMOVDQU8Masked512 (VPERMI2B512 x y z) mask) + // result: (VPERMI2BMasked512 x y z mask) + for { + if v_0.Op != OpAMD64VPERMI2B512 { + break + } + z := v_0.Args[2] + x := v_0.Args[0] + y := v_0.Args[1] + mask := v_1 + v.reset(OpAMD64VPERMI2BMasked512) + v.AddArg4(x, y, z, mask) + return true + } + // match: (VMOVDQU8Masked512 (VPSHUFB512 x y) mask) + // result: (VPSHUFBMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSHUFB512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSHUFBMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU8Masked512 (VPERMB512 x y) mask) + // result: (VPERMBMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPERMB512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPERMBMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU8Masked512 (VPSUBB512 x y) mask) + // result: (VPSUBBMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSUBB512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSUBBMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU8Masked512 (VPSUBSB512 x y) mask) + // result: (VPSUBSBMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSUBSB512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSUBSBMasked512) + v.AddArg3(x, y, mask) + return true + } + // match: (VMOVDQU8Masked512 (VPSUBUSB512 x y) mask) + // result: (VPSUBUSBMasked512 x y mask) + for { + if v_0.Op != OpAMD64VPSUBUSB512 { + break + } + y := v_0.Args[1] + x := v_0.Args[0] + mask := v_1 + v.reset(OpAMD64VPSUBUSBMasked512) + v.AddArg3(x, y, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VMOVQ(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (VMOVQ x:(MOVQload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (VMOVQload [off] {sym} ptr mem) + for { + x := v_0 + if x.Op != OpAMD64MOVQload { + break + } + off := auxIntToInt32(x.AuxInt) + sym := auxToSym(x.Aux) + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { + break + } + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64VMOVQload, v.Type) + v.copyOf(v0) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VMOVSDf2v(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (VMOVSDf2v x:(MOVSDload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (VMOVSDload [off] {sym} ptr mem) + for { + x := v_0 + if x.Op != OpAMD64MOVSDload { + break + } + off := auxIntToInt32(x.AuxInt) + sym := auxToSym(x.Aux) + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { + break + } + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64VMOVSDload, v.Type) + v.copyOf(v0) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + return true + } + // match: (VMOVSDf2v x:(MOVSDconst [c] )) + // result: (VMOVSDconst [c] ) + for { + x := v_0 + if x.Op != OpAMD64MOVSDconst { + break + } + c := auxIntToFloat64(x.AuxInt) + v.reset(OpAMD64VMOVSDconst) + v.AuxInt = float64ToAuxInt(c) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VMOVSSf2v(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (VMOVSSf2v x:(MOVSSload [off] {sym} ptr mem)) + // cond: x.Uses == 1 && clobber(x) + // result: @x.Block (VMOVSSload [off] {sym} ptr mem) + for { + x := v_0 + if x.Op != OpAMD64MOVSSload { + break + } + off := auxIntToInt32(x.AuxInt) + sym := auxToSym(x.Aux) + mem := x.Args[1] + ptr := x.Args[0] + if !(x.Uses == 1 && clobber(x)) { + break + } + b = x.Block + v0 := b.NewValue0(x.Pos, OpAMD64VMOVSSload, v.Type) + v.copyOf(v0) + v0.AuxInt = int32ToAuxInt(off) + v0.Aux = symToAux(sym) + v0.AddArg2(ptr, mem) + return true + } + // match: (VMOVSSf2v x:(MOVSSconst [c] )) + // result: (VMOVSSconst [c] ) + for { + x := v_0 + if x.Op != OpAMD64MOVSSconst { + break + } + c := auxIntToFloat32(x.AuxInt) + v.reset(OpAMD64VMOVSSconst) + v.AuxInt = float32ToAuxInt(c) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VMULPD128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMULPD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMULPD128load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMULPD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMULPD256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMULPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMULPD256load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMULPD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMULPD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMULPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMULPD512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMULPD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMULPDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMULPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMULPDMasked128load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMULPDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMULPDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMULPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMULPDMasked256load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMULPDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMULPDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMULPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMULPDMasked512load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMULPDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMULPS128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMULPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMULPS128load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMULPS128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMULPS256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMULPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMULPS256load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMULPS256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMULPS512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMULPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMULPS512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMULPS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMULPSMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMULPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMULPSMasked128load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMULPSMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMULPSMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMULPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMULPSMasked256load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMULPSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VMULPSMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VMULPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VMULPSMasked512load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VMULPSMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPABSD128(v *Value) bool { + v_0 := v.Args[0] + // match: (VPABSD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPABSD128load {sym} [off] ptr mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPABSD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPABSD256(v *Value) bool { + v_0 := v.Args[0] + // match: (VPABSD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPABSD256load {sym} [off] ptr mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPABSD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPABSD512(v *Value) bool { + v_0 := v.Args[0] + // match: (VPABSD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPABSD512load {sym} [off] ptr mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPABSD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPABSDMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPABSDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPABSDMasked128load {sym} [off] ptr mask mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPABSDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPABSDMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPABSDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPABSDMasked256load {sym} [off] ptr mask mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPABSDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPABSDMasked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPABSDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPABSDMasked512load {sym} [off] ptr mask mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPABSDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPABSQ128(v *Value) bool { + v_0 := v.Args[0] + // match: (VPABSQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPABSQ128load {sym} [off] ptr mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPABSQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPABSQ256(v *Value) bool { + v_0 := v.Args[0] + // match: (VPABSQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPABSQ256load {sym} [off] ptr mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPABSQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPABSQ512(v *Value) bool { + v_0 := v.Args[0] + // match: (VPABSQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPABSQ512load {sym} [off] ptr mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPABSQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPABSQMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPABSQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPABSQMasked128load {sym} [off] ptr mask mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPABSQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPABSQMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPABSQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPABSQMasked256load {sym} [off] ptr mask mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPABSQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPABSQMasked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPABSQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPABSQMasked512load {sym} [off] ptr mask mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPABSQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPACKSSDW128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPACKSSDW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPACKSSDW128load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPACKSSDW128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPACKSSDW256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPACKSSDW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPACKSSDW256load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPACKSSDW256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPACKSSDW512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPACKSSDW512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPACKSSDW512load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPACKSSDW512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPACKSSDWMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPACKSSDWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPACKSSDWMasked128load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPACKSSDWMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPACKSSDWMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPACKSSDWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPACKSSDWMasked256load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPACKSSDWMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPACKSSDWMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPACKSSDWMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPACKSSDWMasked512load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPACKSSDWMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPACKUSDW128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPACKUSDW128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPACKUSDW128load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPACKUSDW128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPACKUSDW256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPACKUSDW256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPACKUSDW256load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPACKUSDW256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPACKUSDW512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPACKUSDW512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPACKUSDW512load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPACKUSDW512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPACKUSDWMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPACKUSDWMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPACKUSDWMasked128load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPACKUSDWMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPACKUSDWMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPACKUSDWMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPACKUSDWMasked256load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPACKUSDWMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPACKUSDWMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPACKUSDWMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPACKUSDWMasked512load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPACKUSDWMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPADDD128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPADDD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPADDD128load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPADDD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPADDD256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPADDD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPADDD256load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPADDD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPADDD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPADDD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPADDD512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPADDD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPADDDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPADDDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPADDDMasked128load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPADDDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPADDDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPADDDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPADDDMasked256load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPADDDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPADDDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPADDDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPADDDMasked512load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPADDDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPADDQ128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPADDQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPADDQ128load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPADDQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPADDQ256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPADDQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPADDQ256load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPADDQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPADDQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPADDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPADDQ512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPADDQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPADDQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPADDQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPADDQMasked128load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPADDQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPADDQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPADDQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPADDQMasked256load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPADDQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPADDQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPADDQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPADDQMasked512load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPADDQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPANDD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPANDD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPANDD512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPANDD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPANDDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPANDDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPANDDMasked128load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPANDDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPANDDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPANDDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPANDDMasked256load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPANDDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPANDDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPANDDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPANDDMasked512load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPANDDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPANDND512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPANDND512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPANDND512load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPANDND512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPANDNDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPANDNDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPANDNDMasked128load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPANDNDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPANDNDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPANDNDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPANDNDMasked256load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPANDNDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPANDNDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPANDNDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPANDNDMasked512load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPANDNDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPANDNQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPANDNQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPANDNQ512load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPANDNQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPANDNQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPANDNQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPANDNQMasked128load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPANDNQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPANDNQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPANDNQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPANDNQMasked256load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPANDNQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPANDNQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPANDNQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPANDNQMasked512load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPANDNQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPANDQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPANDQ512 x (VPMOVMToVec64x8 k)) + // result: (VMOVDQU64Masked512 x k) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64VPMOVMToVec64x8 { + continue + } + k := v_1.Args[0] + v.reset(OpAMD64VMOVDQU64Masked512) + v.AddArg2(x, k) + return true + } + break + } + // match: (VPANDQ512 x (VPMOVMToVec32x16 k)) + // result: (VMOVDQU32Masked512 x k) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64VPMOVMToVec32x16 { + continue + } + k := v_1.Args[0] + v.reset(OpAMD64VMOVDQU32Masked512) + v.AddArg2(x, k) + return true + } + break + } + // match: (VPANDQ512 x (VPMOVMToVec16x32 k)) + // result: (VMOVDQU16Masked512 x k) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64VPMOVMToVec16x32 { + continue + } + k := v_1.Args[0] + v.reset(OpAMD64VMOVDQU16Masked512) + v.AddArg2(x, k) + return true + } + break + } + // match: (VPANDQ512 x (VPMOVMToVec8x64 k)) + // result: (VMOVDQU8Masked512 x k) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64VPMOVMToVec8x64 { + continue + } + k := v_1.Args[0] + v.reset(OpAMD64VMOVDQU8Masked512) + v.AddArg2(x, k) + return true + } + break + } + // match: (VPANDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPANDQ512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPANDQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPANDQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPANDQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPANDQMasked128load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPANDQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPANDQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPANDQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPANDQMasked256load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPANDQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPANDQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPANDQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPANDQMasked512load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPANDQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPBLENDMDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPBLENDMDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPBLENDMDMasked512load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPBLENDMDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPBLENDMQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPBLENDMQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPBLENDMQMasked512load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPBLENDMQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPBROADCASTB128(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (VPBROADCASTB128 x:(VPINSRB128 [0] (Zero128 ) y)) + // cond: x.Uses == 1 + // result: (VPBROADCASTB128 (VMOVQ y)) + for { + x := v_0 + if x.Op != OpAMD64VPINSRB128 || auxIntToUint8(x.AuxInt) != 0 { + break + } + y := x.Args[1] + x_0 := x.Args[0] + if x_0.Op != OpAMD64Zero128 { + break + } + if !(x.Uses == 1) { + break + } + v.reset(OpAMD64VPBROADCASTB128) + v0 := b.NewValue0(v.Pos, OpAMD64VMOVQ, types.TypeVec128) + v0.AddArg(y) + v.AddArg(v0) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPBROADCASTB256(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (VPBROADCASTB256 x:(VPINSRB128 [0] (Zero128 ) y)) + // cond: x.Uses == 1 + // result: (VPBROADCASTB256 (VMOVQ y)) + for { + x := v_0 + if x.Op != OpAMD64VPINSRB128 || auxIntToUint8(x.AuxInt) != 0 { + break + } + y := x.Args[1] + x_0 := x.Args[0] + if x_0.Op != OpAMD64Zero128 { + break + } + if !(x.Uses == 1) { + break + } + v.reset(OpAMD64VPBROADCASTB256) + v0 := b.NewValue0(v.Pos, OpAMD64VMOVQ, types.TypeVec128) + v0.AddArg(y) + v.AddArg(v0) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPBROADCASTB512(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (VPBROADCASTB512 x:(VPINSRB128 [0] (Zero128 ) y)) + // cond: x.Uses == 1 + // result: (VPBROADCASTB512 (VMOVQ y)) + for { + x := v_0 + if x.Op != OpAMD64VPINSRB128 || auxIntToUint8(x.AuxInt) != 0 { + break + } + y := x.Args[1] + x_0 := x.Args[0] + if x_0.Op != OpAMD64Zero128 { + break + } + if !(x.Uses == 1) { + break + } + v.reset(OpAMD64VPBROADCASTB512) + v0 := b.NewValue0(v.Pos, OpAMD64VMOVQ, types.TypeVec128) + v0.AddArg(y) + v.AddArg(v0) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPBROADCASTW128(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (VPBROADCASTW128 x:(VPINSRW128 [0] (Zero128 ) y)) + // cond: x.Uses == 1 + // result: (VPBROADCASTW128 (VMOVQ y)) + for { + x := v_0 + if x.Op != OpAMD64VPINSRW128 || auxIntToUint8(x.AuxInt) != 0 { + break + } + y := x.Args[1] + x_0 := x.Args[0] + if x_0.Op != OpAMD64Zero128 { + break + } + if !(x.Uses == 1) { + break + } + v.reset(OpAMD64VPBROADCASTW128) + v0 := b.NewValue0(v.Pos, OpAMD64VMOVQ, types.TypeVec128) + v0.AddArg(y) + v.AddArg(v0) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPBROADCASTW256(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (VPBROADCASTW256 x:(VPINSRW128 [0] (Zero128 ) y)) + // cond: x.Uses == 1 + // result: (VPBROADCASTW256 (VMOVQ y)) + for { + x := v_0 + if x.Op != OpAMD64VPINSRW128 || auxIntToUint8(x.AuxInt) != 0 { + break + } + y := x.Args[1] + x_0 := x.Args[0] + if x_0.Op != OpAMD64Zero128 { + break + } + if !(x.Uses == 1) { + break + } + v.reset(OpAMD64VPBROADCASTW256) + v0 := b.NewValue0(v.Pos, OpAMD64VMOVQ, types.TypeVec128) + v0.AddArg(y) + v.AddArg(v0) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPBROADCASTW512(v *Value) bool { + v_0 := v.Args[0] + b := v.Block + // match: (VPBROADCASTW512 x:(VPINSRW128 [0] (Zero128 ) y)) + // cond: x.Uses == 1 + // result: (VPBROADCASTW512 (VMOVQ y)) + for { + x := v_0 + if x.Op != OpAMD64VPINSRW128 || auxIntToUint8(x.AuxInt) != 0 { + break + } + y := x.Args[1] + x_0 := x.Args[0] + if x_0.Op != OpAMD64Zero128 { + break + } + if !(x.Uses == 1) { + break + } + v.reset(OpAMD64VPBROADCASTW512) + v0 := b.NewValue0(v.Pos, OpAMD64VMOVQ, types.TypeVec128) + v0.AddArg(y) + v.AddArg(v0) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPCMPEQD128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPCMPEQD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPCMPEQD128load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPCMPEQD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPCMPEQD256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPCMPEQD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPCMPEQD256load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPCMPEQD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPCMPEQD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPCMPEQD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPCMPEQD512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPCMPEQD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPCMPEQQ128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPCMPEQQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPCMPEQQ128load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPCMPEQQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPCMPEQQ256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPCMPEQQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPCMPEQQ256load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPCMPEQQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPCMPEQQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPCMPEQQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPCMPEQQ512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPCMPEQQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPCMPGTD128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPCMPGTD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPCMPGTD128load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPCMPGTD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPCMPGTD256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPCMPGTD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPCMPGTD256load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPCMPGTD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPCMPGTD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPCMPGTD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPCMPGTD512load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPCMPGTD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPCMPGTQ128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPCMPGTQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPCMPGTQ128load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPCMPGTQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPCMPGTQ256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPCMPGTQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPCMPGTQ256load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPCMPGTQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPCMPGTQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPCMPGTQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPCMPGTQ512load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPCMPGTQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPDPBUSD128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPDPBUSD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPDPBUSD128load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPDPBUSD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPDPBUSD256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPDPBUSD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPDPBUSD256load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPDPBUSD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPDPBUSD512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPDPBUSD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPDPBUSD512load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPDPBUSD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPDPBUSDMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPDPBUSDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPDPBUSDMasked128load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPDPBUSDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPDPBUSDMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPDPBUSDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPDPBUSDMasked256load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPDPBUSDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPDPBUSDMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPDPBUSDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPDPBUSDMasked512load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPDPBUSDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPDPBUSDS128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPDPBUSDS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPDPBUSDS128load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPDPBUSDS128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPDPBUSDS256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPDPBUSDS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPDPBUSDS256load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPDPBUSDS256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPDPBUSDS512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPDPBUSDS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPDPBUSDS512load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPDPBUSDS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPDPBUSDSMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPDPBUSDSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPDPBUSDSMasked128load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPDPBUSDSMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPDPBUSDSMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPDPBUSDSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPDPBUSDSMasked256load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPDPBUSDSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPDPBUSDSMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPDPBUSDSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPDPBUSDSMasked512load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPDPBUSDSMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPDPWSSD128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPDPWSSD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPDPWSSD128load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPDPWSSD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPDPWSSD256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPDPWSSD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPDPWSSD256load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPDPWSSD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPDPWSSD512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPDPWSSD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPDPWSSD512load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPDPWSSD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPDPWSSDMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPDPWSSDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPDPWSSDMasked128load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPDPWSSDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPDPWSSDMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPDPWSSDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPDPWSSDMasked256load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPDPWSSDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPDPWSSDMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPDPWSSDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPDPWSSDMasked512load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPDPWSSDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPDPWSSDS128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPDPWSSDS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPDPWSSDS128load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPDPWSSDS128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPDPWSSDS256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPDPWSSDS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPDPWSSDS256load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPDPWSSDS256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPDPWSSDS512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPDPWSSDS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPDPWSSDS512load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPDPWSSDS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPDPWSSDSMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPDPWSSDSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPDPWSSDSMasked128load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPDPWSSDSMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPDPWSSDSMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPDPWSSDSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPDPWSSDSMasked256load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPDPWSSDSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPDPWSSDSMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPDPWSSDSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPDPWSSDSMasked512load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPDPWSSDSMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMD256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMD256load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMD512load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMDMasked256load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMDMasked512load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMI2D128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMI2D128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMI2D128load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMI2D128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMI2D256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMI2D256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMI2D256load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMI2D256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMI2D512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMI2D512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMI2D512load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMI2D512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMI2DMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMI2DMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMI2DMasked128load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMI2DMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMI2DMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMI2DMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMI2DMasked256load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMI2DMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMI2DMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMI2DMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMI2DMasked512load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMI2DMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMI2PD128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMI2PD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMI2PD128load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMI2PD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMI2PD256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMI2PD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMI2PD256load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMI2PD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMI2PD512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMI2PD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMI2PD512load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMI2PD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMI2PDMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMI2PDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMI2PDMasked128load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMI2PDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMI2PDMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMI2PDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMI2PDMasked256load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMI2PDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMI2PDMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMI2PDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMI2PDMasked512load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMI2PDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMI2PS128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMI2PS128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMI2PS128load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMI2PS128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMI2PS256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMI2PS256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMI2PS256load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMI2PS256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMI2PS512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMI2PS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMI2PS512load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMI2PS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMI2PSMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMI2PSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMI2PSMasked128load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMI2PSMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMI2PSMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMI2PSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMI2PSMasked256load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMI2PSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMI2PSMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMI2PSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMI2PSMasked512load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMI2PSMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMI2Q128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMI2Q128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMI2Q128load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMI2Q128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMI2Q256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMI2Q256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMI2Q256load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMI2Q256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMI2Q512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMI2Q512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMI2Q512load {sym} [off] x y ptr mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMI2Q512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMI2QMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMI2QMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMI2QMasked128load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMI2QMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMI2QMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMI2QMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMI2QMasked256load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMI2QMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMI2QMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMI2QMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMI2QMasked512load {sym} [off] x y ptr mask mem) + for { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMI2QMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMPD256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMPD256load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMPD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMPD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMPD512load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMPD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMPDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMPDMasked256load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMPDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMPDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMPDMasked512load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMPDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMPS256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMPS256load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMPS256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMPS512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMPS512load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMPS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMPSMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMPSMasked256load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMPSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMPSMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMPSMasked512load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMPSMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMQ256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMQ256load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMQ512load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMQMasked256load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPERMQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPERMQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPERMQMasked512load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPERMQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPINSRD128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPINSRD128 [0] (Zero128 ) y) + // cond: y.Type.IsFloat() + // result: (VMOVSSf2v y) + for { + if auxIntToUint8(v.AuxInt) != 0 || v_0.Op != OpAMD64Zero128 { + break + } + y := v_1 + if !(y.Type.IsFloat()) { + break + } + v.reset(OpAMD64VMOVSSf2v) + v.Type = types.TypeVec128 + v.AddArg(y) + return true + } + // match: (VPINSRD128 [0] (Zero128 ) y) + // cond: !y.Type.IsFloat() + // result: (VMOVD y) + for { + if auxIntToUint8(v.AuxInt) != 0 || v_0.Op != OpAMD64Zero128 { + break + } + y := v_1 + if !(!y.Type.IsFloat()) { + break + } + v.reset(OpAMD64VMOVD) + v.Type = types.TypeVec128 + v.AddArg(y) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPINSRQ128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPINSRQ128 [0] (Zero128 ) y) + // cond: y.Type.IsFloat() + // result: (VMOVSDf2v y) + for { + if auxIntToUint8(v.AuxInt) != 0 || v_0.Op != OpAMD64Zero128 { + break + } + y := v_1 + if !(y.Type.IsFloat()) { + break + } + v.reset(OpAMD64VMOVSDf2v) + v.Type = types.TypeVec128 + v.AddArg(y) + return true + } + // match: (VPINSRQ128 [0] (Zero128 ) y) + // cond: !y.Type.IsFloat() + // result: (VMOVQ y) + for { + if auxIntToUint8(v.AuxInt) != 0 || v_0.Op != OpAMD64Zero128 { + break + } + y := v_1 + if !(!y.Type.IsFloat()) { + break + } + v.reset(OpAMD64VMOVQ) + v.Type = types.TypeVec128 + v.AddArg(y) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPMAXSD128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMAXSD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXSD128load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXSD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMAXSD256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMAXSD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXSD256load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXSD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMAXSD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMAXSD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXSD512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXSD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMAXSDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMAXSDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXSDMasked128load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXSDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMAXSDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMAXSDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXSDMasked256load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXSDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMAXSDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMAXSDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXSDMasked512load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXSDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMAXSQ128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMAXSQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXSQ128load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXSQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMAXSQ256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMAXSQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXSQ256load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXSQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMAXSQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMAXSQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXSQ512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXSQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMAXSQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMAXSQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXSQMasked128load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXSQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMAXSQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMAXSQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXSQMasked256load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXSQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMAXSQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMAXSQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXSQMasked512load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXSQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMAXUD128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMAXUD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXUD128load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXUD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMAXUD256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMAXUD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXUD256load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXUD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMAXUD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMAXUD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXUD512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXUD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMAXUDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMAXUDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXUDMasked128load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXUDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMAXUDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMAXUDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXUDMasked256load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXUDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMAXUDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMAXUDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXUDMasked512load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXUDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMAXUQ128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMAXUQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXUQ128load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXUQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMAXUQ256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMAXUQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXUQ256load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXUQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMAXUQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMAXUQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXUQ512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXUQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMAXUQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMAXUQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXUQMasked128load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXUQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMAXUQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMAXUQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXUQMasked256load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXUQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMAXUQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMAXUQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMAXUQMasked512load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMAXUQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMINSD128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMINSD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINSD128load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINSD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMINSD256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMINSD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINSD256load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINSD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMINSD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMINSD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINSD512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINSD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMINSDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMINSDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINSDMasked128load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINSDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMINSDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMINSDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINSDMasked256load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINSDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMINSDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMINSDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINSDMasked512load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINSDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMINSQ128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMINSQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINSQ128load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINSQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMINSQ256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMINSQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINSQ256load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINSQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMINSQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMINSQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINSQ512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINSQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMINSQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMINSQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINSQMasked128load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINSQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMINSQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMINSQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINSQMasked256load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINSQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMINSQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMINSQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINSQMasked512load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINSQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMINUD128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMINUD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINUD128load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINUD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMINUD256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMINUD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINUD256load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINUD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMINUD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMINUD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINUD512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINUD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMINUDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMINUDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINUDMasked128load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINUDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMINUDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMINUDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINUDMasked256load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINUDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMINUDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMINUDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINUDMasked512load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINUDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMINUQ128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMINUQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINUQ128load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINUQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMINUQ256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMINUQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINUQ256load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINUQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMINUQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMINUQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINUQ512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINUQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMINUQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMINUQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINUQMasked128load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINUQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMINUQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMINUQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINUQMasked256load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINUQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMINUQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMINUQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMINUQMasked512load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMINUQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMOVVec16x16ToM(v *Value) bool { + v_0 := v.Args[0] + // match: (VPMOVVec16x16ToM (VPMOVMToVec16x16 x)) + // result: x + for { + if v_0.Op != OpAMD64VPMOVMToVec16x16 { + break + } + x := v_0.Args[0] + v.copyOf(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPMOVVec16x32ToM(v *Value) bool { + v_0 := v.Args[0] + // match: (VPMOVVec16x32ToM (VPMOVMToVec16x32 x)) + // result: x + for { + if v_0.Op != OpAMD64VPMOVMToVec16x32 { + break + } + x := v_0.Args[0] + v.copyOf(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPMOVVec16x8ToM(v *Value) bool { + v_0 := v.Args[0] + // match: (VPMOVVec16x8ToM (VPMOVMToVec16x8 x)) + // result: x + for { + if v_0.Op != OpAMD64VPMOVMToVec16x8 { + break + } + x := v_0.Args[0] + v.copyOf(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPMOVVec32x16ToM(v *Value) bool { + v_0 := v.Args[0] + // match: (VPMOVVec32x16ToM (VPMOVMToVec32x16 x)) + // result: x + for { + if v_0.Op != OpAMD64VPMOVMToVec32x16 { + break + } + x := v_0.Args[0] + v.copyOf(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPMOVVec32x4ToM(v *Value) bool { + v_0 := v.Args[0] + // match: (VPMOVVec32x4ToM (VPMOVMToVec32x4 x)) + // result: x + for { + if v_0.Op != OpAMD64VPMOVMToVec32x4 { + break + } + x := v_0.Args[0] + v.copyOf(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPMOVVec32x8ToM(v *Value) bool { + v_0 := v.Args[0] + // match: (VPMOVVec32x8ToM (VPMOVMToVec32x8 x)) + // result: x + for { + if v_0.Op != OpAMD64VPMOVMToVec32x8 { + break + } + x := v_0.Args[0] + v.copyOf(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPMOVVec64x2ToM(v *Value) bool { + v_0 := v.Args[0] + // match: (VPMOVVec64x2ToM (VPMOVMToVec64x2 x)) + // result: x + for { + if v_0.Op != OpAMD64VPMOVMToVec64x2 { + break + } + x := v_0.Args[0] + v.copyOf(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPMOVVec64x4ToM(v *Value) bool { + v_0 := v.Args[0] + // match: (VPMOVVec64x4ToM (VPMOVMToVec64x4 x)) + // result: x + for { + if v_0.Op != OpAMD64VPMOVMToVec64x4 { + break + } + x := v_0.Args[0] + v.copyOf(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPMOVVec64x8ToM(v *Value) bool { + v_0 := v.Args[0] + // match: (VPMOVVec64x8ToM (VPMOVMToVec64x8 x)) + // result: x + for { + if v_0.Op != OpAMD64VPMOVMToVec64x8 { + break + } + x := v_0.Args[0] + v.copyOf(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPMOVVec8x16ToM(v *Value) bool { + v_0 := v.Args[0] + // match: (VPMOVVec8x16ToM (VPMOVMToVec8x16 x)) + // result: x + for { + if v_0.Op != OpAMD64VPMOVMToVec8x16 { + break + } + x := v_0.Args[0] + v.copyOf(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPMOVVec8x32ToM(v *Value) bool { + v_0 := v.Args[0] + // match: (VPMOVVec8x32ToM (VPMOVMToVec8x32 x)) + // result: x + for { + if v_0.Op != OpAMD64VPMOVMToVec8x32 { + break + } + x := v_0.Args[0] + v.copyOf(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPMOVVec8x64ToM(v *Value) bool { + v_0 := v.Args[0] + // match: (VPMOVVec8x64ToM (VPMOVMToVec8x64 x)) + // result: x + for { + if v_0.Op != OpAMD64VPMOVMToVec8x64 { + break + } + x := v_0.Args[0] + v.copyOf(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPMULDQ128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMULDQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMULDQ128load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMULDQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMULDQ256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMULDQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMULDQ256load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMULDQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMULLD128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMULLD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMULLD128load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMULLD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMULLD256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMULLD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMULLD256load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMULLD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMULLD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMULLD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMULLD512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMULLD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMULLDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMULLDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMULLDMasked128load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMULLDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMULLDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMULLDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMULLDMasked256load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMULLDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMULLDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMULLDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMULLDMasked512load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMULLDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMULLQ128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMULLQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMULLQ128load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMULLQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMULLQ256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMULLQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMULLQ256load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMULLQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMULLQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMULLQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMULLQ512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMULLQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMULLQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMULLQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMULLQMasked128load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMULLQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMULLQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMULLQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMULLQMasked256load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMULLQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMULLQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMULLQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMULLQMasked512load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMULLQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMULUDQ128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMULUDQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMULUDQ128load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMULUDQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPMULUDQ256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPMULUDQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPMULUDQ256load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPMULUDQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPOPCNTD128(v *Value) bool { + v_0 := v.Args[0] + // match: (VPOPCNTD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPOPCNTD128load {sym} [off] ptr mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPOPCNTD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPOPCNTD256(v *Value) bool { + v_0 := v.Args[0] + // match: (VPOPCNTD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPOPCNTD256load {sym} [off] ptr mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPOPCNTD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPOPCNTD512(v *Value) bool { + v_0 := v.Args[0] + // match: (VPOPCNTD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPOPCNTD512load {sym} [off] ptr mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPOPCNTD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPOPCNTDMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPOPCNTDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPOPCNTDMasked128load {sym} [off] ptr mask mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPOPCNTDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPOPCNTDMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPOPCNTDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPOPCNTDMasked256load {sym} [off] ptr mask mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPOPCNTDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPOPCNTDMasked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPOPCNTDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPOPCNTDMasked512load {sym} [off] ptr mask mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPOPCNTDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPOPCNTQ128(v *Value) bool { + v_0 := v.Args[0] + // match: (VPOPCNTQ128 l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPOPCNTQ128load {sym} [off] ptr mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPOPCNTQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPOPCNTQ256(v *Value) bool { + v_0 := v.Args[0] + // match: (VPOPCNTQ256 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPOPCNTQ256load {sym} [off] ptr mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPOPCNTQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPOPCNTQ512(v *Value) bool { + v_0 := v.Args[0] + // match: (VPOPCNTQ512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPOPCNTQ512load {sym} [off] ptr mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPOPCNTQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPOPCNTQMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPOPCNTQMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPOPCNTQMasked128load {sym} [off] ptr mask mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPOPCNTQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPOPCNTQMasked256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPOPCNTQMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPOPCNTQMasked256load {sym} [off] ptr mask mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPOPCNTQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPOPCNTQMasked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPOPCNTQMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPOPCNTQMasked512load {sym} [off] ptr mask mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPOPCNTQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPORD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPORD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPORD512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPORD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPORDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPORDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPORDMasked128load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPORDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPORDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPORDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPORDMasked256load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPORDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPORDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPORDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPORDMasked512load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPORDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPORQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPORQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPORQ512load {sym} [off] x ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPORQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPORQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPORQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPORQMasked128load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPORQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPORQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPORQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPORQMasked256load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPORQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPORQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPORQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPORQMasked512load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPORQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPROLVD128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPROLVD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLVD128load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPROLVD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPROLVD256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPROLVD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLVD256load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPROLVD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPROLVD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPROLVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLVD512load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPROLVD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPROLVDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPROLVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLVDMasked128load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPROLVDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPROLVDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPROLVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLVDMasked256load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPROLVDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPROLVDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPROLVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLVDMasked512load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPROLVDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPROLVQ128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPROLVQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLVQ128load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPROLVQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPROLVQ256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPROLVQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLVQ256load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPROLVQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPROLVQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPROLVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLVQ512load {sym} [off] x ptr mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPROLVQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPROLVQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPROLVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLVQMasked128load {sym} [off] x ptr mask mem) + for { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPROLVQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPROLVQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPROLVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLVQMasked256load {sym} [off] x ptr mask mem) for { x := v_0 - if x.Op != OpAMD64MOVLload { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - off := auxIntToInt32(x.AuxInt) - sym := auxToSym(x.Aux) - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64VMOVDload, v.Type) - v.copyOf(v0) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) + v.reset(OpAMD64VPROLVQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VMOVDQU16Masked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPROLVQMasked512(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VMOVDQU16Masked512 (VPABSW512 x) mask) - // result: (VPABSWMasked512 x mask) + // match: (VPROLVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPROLVQMasked512load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPABSW512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPABSWMasked512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU16Masked512 (VPADDW512 x y) mask) - // result: (VPADDWMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPADDW512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPADDWMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPROLVQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU16Masked512 (VPADDSW512 x y) mask) - // result: (VPADDSWMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPRORVD128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPRORVD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORVD128load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPADDSW512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPADDSWMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU16Masked512 (VPADDUSW512 x y) mask) - // result: (VPADDUSWMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPADDUSW512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPADDUSWMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPRORVD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU16Masked512 (VPAVGW512 x y) mask) - // result: (VPAVGWMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPRORVD256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPRORVD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORVD256load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPAVGW512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPAVGWMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU16Masked512 (VPBROADCASTW512 x) mask) - // result: (VPBROADCASTWMasked512 x mask) - for { - if v_0.Op != OpAMD64VPBROADCASTW512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPBROADCASTWMasked512) - v.AddArg2(x, mask) + v.reset(OpAMD64VPRORVD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU16Masked512 (VPMOVSXWD512 x) mask) - // result: (VPMOVSXWDMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPRORVD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPRORVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORVD512load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPMOVSXWD512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSXWDMasked512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU16Masked512 (VPMOVSXWQ512 x) mask) - // result: (VPMOVSXWQMasked512 x mask) - for { - if v_0.Op != OpAMD64VPMOVSXWQ512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSXWQMasked512) - v.AddArg2(x, mask) + v.reset(OpAMD64VPRORVD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU16Masked512 (VPMOVZXWD512 x) mask) - // result: (VPMOVZXWDMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPRORVDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPRORVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORVDMasked128load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPMOVZXWD512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVZXWDMasked512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU16Masked512 (VPMOVZXWQ512 x) mask) - // result: (VPMOVZXWQMasked512 x mask) - for { - if v_0.Op != OpAMD64VPMOVZXWQ512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVZXWQMasked512) - v.AddArg2(x, mask) + v.reset(OpAMD64VPRORVDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU16Masked512 (VPMADDWD512 x y) mask) - // result: (VPMADDWDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPRORVDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPRORVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORVDMasked256load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPMADDWD512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMADDWDMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU16Masked512 (VPMADDUBSW512 x y) mask) - // result: (VPMADDUBSWMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPMADDUBSW512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMADDUBSWMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPRORVDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU16Masked512 (VPMAXSW512 x y) mask) - // result: (VPMAXSWMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPRORVDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPRORVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORVDMasked512load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPMAXSW512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMAXSWMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU16Masked512 (VPMAXUW512 x y) mask) - // result: (VPMAXUWMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPMAXUW512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMAXUWMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPRORVDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU16Masked512 (VPMINSW512 x y) mask) - // result: (VPMINSWMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPRORVQ128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPRORVQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORVQ128load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPMINSW512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMINSWMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU16Masked512 (VPMINUW512 x y) mask) - // result: (VPMINUWMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPMINUW512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMINUWMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPRORVQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU16Masked512 (VPMULHW512 x y) mask) - // result: (VPMULHWMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPRORVQ256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPRORVQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORVQ256load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPMULHW512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMULHWMasked512) - v.AddArg3(x, y, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPRORVQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU16Masked512 (VPMULHUW512 x y) mask) - // result: (VPMULHUWMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPRORVQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPRORVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORVQ512load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPMULHUW512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMULHUWMasked512) - v.AddArg3(x, y, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPRORVQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU16Masked512 (VPMULLW512 x y) mask) - // result: (VPMULLWMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPRORVQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPRORVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORVQMasked128load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPMULLW512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMULLWMasked512) - v.AddArg3(x, y, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPRORVQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU16Masked512 (VPOPCNTW512 x) mask) - // result: (VPOPCNTWMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPRORVQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPRORVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORVQMasked256load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPOPCNTW512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPOPCNTWMasked512) - v.AddArg2(x, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPRORVQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU16Masked512 (VPERMI2W512 x y z) mask) - // result: (VPERMI2WMasked512 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64VPRORVQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPRORVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPRORVQMasked512load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPERMI2W512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPERMI2WMasked512) - v.AddArg4(x, y, z, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPRORVQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU16Masked512 (VPSHUFHW512 [a] x) mask) - // result: (VPSHUFHWMasked512 [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSHLDVD128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHLDVD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDVD128load {sym} [off] x y ptr mem) for { - if v_0.Op != OpAMD64VPSHUFHW512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHUFHWMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHLDVD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } - // match: (VMOVDQU16Masked512 (VPERMW512 x y) mask) - // result: (VPERMWMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSHLDVD256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHLDVD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDVD256load {sym} [off] x y ptr mem) for { - if v_0.Op != OpAMD64VPERMW512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPERMWMasked512) - v.AddArg3(x, y, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHLDVD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } - // match: (VMOVDQU16Masked512 (VPSHLDW512 [a] x y) mask) - // result: (VPSHLDWMasked512 [a] x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSHLDVD512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHLDVD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDVD512load {sym} [off] x y ptr mem) for { - if v_0.Op != OpAMD64VPSHLDW512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { break } - a := auxIntToUint8(v_0.AuxInt) - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHLDWMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(x, y, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHLDVD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } - // match: (VMOVDQU16Masked512 (VPSLLW512 x y) mask) - // result: (VPSLLWMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSHLDVDMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHLDVDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDVDMasked128load {sym} [off] x y ptr mask mem) for { - if v_0.Op != OpAMD64VPSLLW512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLWMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPSHLDVDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } - // match: (VMOVDQU16Masked512 (VPSHRDW512 [a] x y) mask) - // result: (VPSHRDWMasked512 [a] x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSHLDVDMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHLDVDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDVDMasked256load {sym} [off] x y ptr mask mem) for { - if v_0.Op != OpAMD64VPSHRDW512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { break } - a := auxIntToUint8(v_0.AuxInt) - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHRDWMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU16Masked512 (VPSRAW512 x y) mask) - // result: (VPSRAWMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPSRAW512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRAWMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPSHLDVDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } - // match: (VMOVDQU16Masked512 (VPSRLW512 x y) mask) - // result: (VPSRLWMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSHLDVDMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHLDVDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDVDMasked512load {sym} [off] x y ptr mask mem) for { - if v_0.Op != OpAMD64VPSRLW512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLWMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU16Masked512 (VPSHLDVW512 x y z) mask) - // result: (VPSHLDVWMasked512 x y z mask) - for { - if v_0.Op != OpAMD64VPSHLDVW512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPSHLDVWMasked512) - v.AddArg4(x, y, z, mask) + v.reset(OpAMD64VPSHLDVDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } - // match: (VMOVDQU16Masked512 (VPSLLVW512 x y) mask) - // result: (VPSLLVWMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSHLDVQ128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHLDVQ128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDVQ128load {sym} [off] x y ptr mem) for { - if v_0.Op != OpAMD64VPSLLVW512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLVWMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU16Masked512 (VPSHRDVW512 x y z) mask) - // result: (VPSHRDVWMasked512 x y z mask) - for { - if v_0.Op != OpAMD64VPSHRDVW512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPSHRDVWMasked512) - v.AddArg4(x, y, z, mask) + v.reset(OpAMD64VPSHLDVQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } - // match: (VMOVDQU16Masked512 (VPSRAVW512 x y) mask) - // result: (VPSRAVWMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSHLDVQ256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHLDVQ256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDVQ256load {sym} [off] x y ptr mem) for { - if v_0.Op != OpAMD64VPSRAVW512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRAVWMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU16Masked512 (VPSRLVW512 x y) mask) - // result: (VPSRLVWMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPSRLVW512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLVWMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPSHLDVQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } - // match: (VMOVDQU16Masked512 (VPSUBW512 x y) mask) - // result: (VPSUBWMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSHLDVQ512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHLDVQ512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDVQ512load {sym} [off] x y ptr mem) for { - if v_0.Op != OpAMD64VPSUBW512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSUBWMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU16Masked512 (VPSUBSW512 x y) mask) - // result: (VPSUBSWMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPSUBSW512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSUBSWMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPSHLDVQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } - // match: (VMOVDQU16Masked512 (VPSUBUSW512 x y) mask) - // result: (VPSUBUSWMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSHLDVQMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHLDVQMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDVQMasked128load {sym} [off] x y ptr mask mem) for { - if v_0.Op != OpAMD64VPSUBUSW512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSUBUSWMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU16Masked512 (VPSLLW512const [a] x) mask) - // result: (VPSLLWMasked512const [a] x mask) - for { - if v_0.Op != OpAMD64VPSLLW512const { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLWMasked512const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + v.reset(OpAMD64VPSHLDVQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } - // match: (VMOVDQU16Masked512 (VPSRLW512const [a] x) mask) - // result: (VPSRLWMasked512const [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSHLDVQMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHLDVQMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDVQMasked256load {sym} [off] x y ptr mask mem) for { - if v_0.Op != OpAMD64VPSRLW512const { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLWMasked512const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU16Masked512 (VPSRAW512const [a] x) mask) - // result: (VPSRAWMasked512const [a] x mask) - for { - if v_0.Op != OpAMD64VPSRAW512const { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRAWMasked512const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + v.reset(OpAMD64VPSHLDVQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSHLDVQMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VMOVDQU32Masked512 (VPABSD512 x) mask) - // result: (VPABSDMasked512 x mask) + // match: (VPSHLDVQMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHLDVQMasked512load {sym} [off] x y ptr mask mem) for { - if v_0.Op != OpAMD64VPABSD512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPABSDMasked512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPDPWSSD512 x y z) mask) - // result: (VPDPWSSDMasked512 x y z mask) - for { - if v_0.Op != OpAMD64VPDPWSSD512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPDPWSSDMasked512) - v.AddArg4(x, y, z, mask) + v.reset(OpAMD64VPSHLDVQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VPDPWSSDS512 x y z) mask) - // result: (VPDPWSSDSMasked512 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSHRDVD128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHRDVD128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDVD128load {sym} [off] x y ptr mem) for { - if v_0.Op != OpAMD64VPDPWSSDS512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPDPWSSDSMasked512) - v.AddArg4(x, y, z, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPDPBUSD512 x y z) mask) - // result: (VPDPBUSDMasked512 x y z mask) - for { - if v_0.Op != OpAMD64VPDPBUSD512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPDPBUSDMasked512) - v.AddArg4(x, y, z, mask) + v.reset(OpAMD64VPSHRDVD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } - // match: (VMOVDQU32Masked512 (VPDPBUSDS512 x y z) mask) - // result: (VPDPBUSDSMasked512 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSHRDVD256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHRDVD256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDVD256load {sym} [off] x y ptr mem) for { - if v_0.Op != OpAMD64VPDPBUSDS512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPDPBUSDSMasked512) - v.AddArg4(x, y, z, mask) - return true - } - // match: (VMOVDQU32Masked512 (VADDPS512 x y) mask) - // result: (VADDPSMasked512 x y mask) - for { - if v_0.Op != OpAMD64VADDPS512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VADDPSMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPSHRDVD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } - // match: (VMOVDQU32Masked512 (VPADDD512 x y) mask) - // result: (VPADDDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSHRDVD512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHRDVD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDVD512load {sym} [off] x y ptr mem) for { - if v_0.Op != OpAMD64VPADDD512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPADDDMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPANDD512 x y) mask) - // result: (VPANDDMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPANDD512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPANDDMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPSHRDVD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } - // match: (VMOVDQU32Masked512 (VPANDND512 x y) mask) - // result: (VPANDNDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSHRDVDMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHRDVDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDVDMasked128load {sym} [off] x y ptr mask mem) for { - if v_0.Op != OpAMD64VPANDND512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPANDNDMasked512) - v.AddArg3(x, y, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDVDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VBROADCASTSS512 x) mask) - // result: (VBROADCASTSSMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSHRDVDMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHRDVDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDVDMasked256load {sym} [off] x y ptr mask mem) for { - if v_0.Op != OpAMD64VBROADCASTSS512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VBROADCASTSSMasked512) - v.AddArg2(x, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDVDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VPBROADCASTD512 x) mask) - // result: (VPBROADCASTDMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSHRDVDMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHRDVDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDVDMasked512load {sym} [off] x y ptr mask mem) for { - if v_0.Op != OpAMD64VPBROADCASTD512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPBROADCASTDMasked512) - v.AddArg2(x, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDVDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VRNDSCALEPS512 [a] x) mask) - // result: (VRNDSCALEPSMasked512 [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSHRDVQ128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHRDVQ128 x y l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDVQ128load {sym} [off] x y ptr mem) for { - if v_0.Op != OpAMD64VRNDSCALEPS512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VRNDSCALEPSMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSHRDVQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } - // match: (VMOVDQU32Masked512 (VREDUCEPS512 [a] x) mask) - // result: (VREDUCEPSMasked512 [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSHRDVQ256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHRDVQ256 x y l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDVQ256load {sym} [off] x y ptr mem) for { - if v_0.Op != OpAMD64VREDUCEPS512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VREDUCEPSMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + v.reset(OpAMD64VPSHRDVQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } - // match: (VMOVDQU32Masked512 (VPACKSSDW512 x y) mask) - // result: (VPACKSSDWMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSHRDVQ512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHRDVQ512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDVQ512load {sym} [off] x y ptr mem) for { - if v_0.Op != OpAMD64VPACKSSDW512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPACKSSDWMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked512 (VCVTTPS2DQ512 x) mask) - // result: (VCVTTPS2DQMasked512 x mask) - for { - if v_0.Op != OpAMD64VCVTTPS2DQ512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTTPS2DQMasked512) - v.AddArg2(x, mask) + v.reset(OpAMD64VPSHRDVQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, y, ptr, mem) return true } - // match: (VMOVDQU32Masked512 (VPMOVSXDQ512 x) mask) - // result: (VPMOVSXDQMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSHRDVQMasked128(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHRDVQMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDVQMasked128load {sym} [off] x y ptr mask mem) for { - if v_0.Op != OpAMD64VPMOVSXDQ512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSXDQMasked512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPACKUSDW512 x y) mask) - // result: (VPACKUSDWMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPACKUSDW512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPACKUSDWMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPSHRDVQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VCVTPS2UDQ512 x) mask) - // result: (VCVTPS2UDQMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSHRDVQMasked256(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHRDVQMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDVQMasked256load {sym} [off] x y ptr mask mem) for { - if v_0.Op != OpAMD64VCVTPS2UDQ512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload256 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VCVTPS2UDQMasked512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPMOVZXDQ512 x) mask) - // result: (VPMOVZXDQMasked512 x mask) - for { - if v_0.Op != OpAMD64VPMOVZXDQ512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVZXDQMasked512) - v.AddArg2(x, mask) + v.reset(OpAMD64VPSHRDVQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VDIVPS512 x y) mask) - // result: (VDIVPSMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSHRDVQMasked512(v *Value) bool { + v_3 := v.Args[3] + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSHRDVQMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSHRDVQMasked512load {sym} [off] x y ptr mask mem) for { - if v_0.Op != OpAMD64VDIVPS512 { + x := v_0 + y := v_1 + l := v_2 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VDIVPSMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPLZCNTD512 x) mask) - // result: (VPLZCNTDMasked512 x mask) - for { - if v_0.Op != OpAMD64VPLZCNTD512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_3 + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPLZCNTDMasked512) - v.AddArg2(x, mask) + v.reset(OpAMD64VPSHRDVQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg5(x, y, ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VMAXPS512 x y) mask) - // result: (VMAXPSMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLD128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLD128 x (MOVQconst [c])) + // result: (VPSLLD128const [uint8(c)] x) for { - if v_0.Op != OpAMD64VMAXPS512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VMAXPSMasked512) - v.AddArg3(x, y, mask) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLD128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } - // match: (VMOVDQU32Masked512 (VPMAXSD512 x y) mask) - // result: (VPMAXSDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLD256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLD256 x (MOVQconst [c])) + // result: (VPSLLD256const [uint8(c)] x) for { - if v_0.Op != OpAMD64VPMAXSD512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMAXSDMasked512) - v.AddArg3(x, y, mask) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLD256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } - // match: (VMOVDQU32Masked512 (VPMAXUD512 x y) mask) - // result: (VPMAXUDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLD512 x (MOVQconst [c])) + // result: (VPSLLD512const [uint8(c)] x) for { - if v_0.Op != OpAMD64VPMAXUD512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMAXUDMasked512) - v.AddArg3(x, y, mask) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLD512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } - // match: (VMOVDQU32Masked512 (VMINPS512 x y) mask) - // result: (VMINPSMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLDMasked128 x (MOVQconst [c]) mask) + // result: (VPSLLDMasked128const [uint8(c)] x mask) for { - if v_0.Op != OpAMD64VMINPS512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VMINPSMasked512) - v.AddArg3(x, y, mask) + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLDMasked128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } - // match: (VMOVDQU32Masked512 (VPMINSD512 x y) mask) - // result: (VPMINSDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLDMasked256 x (MOVQconst [c]) mask) + // result: (VPSLLDMasked256const [uint8(c)] x mask) for { - if v_0.Op != OpAMD64VPMINSD512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMINSDMasked512) - v.AddArg3(x, y, mask) + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLDMasked256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } - // match: (VMOVDQU32Masked512 (VPMINUD512 x y) mask) - // result: (VPMINUDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLDMasked512 x (MOVQconst [c]) mask) + // result: (VPSLLDMasked512const [uint8(c)] x mask) for { - if v_0.Op != OpAMD64VPMINUD512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMINUDMasked512) - v.AddArg3(x, y, mask) + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLDMasked512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } - // match: (VMOVDQU32Masked512 (VFMADD213PS512 x y z) mask) - // result: (VFMADD213PSMasked512 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLQ128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLQ128 x (MOVQconst [c])) + // result: (VPSLLQ128const [uint8(c)] x) for { - if v_0.Op != OpAMD64VFMADD213PS512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VFMADD213PSMasked512) - v.AddArg4(x, y, z, mask) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLQ128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } - // match: (VMOVDQU32Masked512 (VFMADDSUB213PS512 x y z) mask) - // result: (VFMADDSUB213PSMasked512 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLQ256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLQ256 x (MOVQconst [c])) + // result: (VPSLLQ256const [uint8(c)] x) for { - if v_0.Op != OpAMD64VFMADDSUB213PS512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VFMADDSUB213PSMasked512) - v.AddArg4(x, y, z, mask) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLQ256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } - // match: (VMOVDQU32Masked512 (VMULPS512 x y) mask) - // result: (VMULPSMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLQ512 x (MOVQconst [c])) + // result: (VPSLLQ512const [uint8(c)] x) for { - if v_0.Op != OpAMD64VMULPS512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VMULPSMasked512) - v.AddArg3(x, y, mask) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLQ512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } - // match: (VMOVDQU32Masked512 (VPMULLD512 x y) mask) - // result: (VPMULLDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLQMasked128 x (MOVQconst [c]) mask) + // result: (VPSLLQMasked128const [uint8(c)] x mask) for { - if v_0.Op != OpAMD64VPMULLD512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMULLDMasked512) - v.AddArg3(x, y, mask) + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLQMasked128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } - // match: (VMOVDQU32Masked512 (VFMSUBADD213PS512 x y z) mask) - // result: (VFMSUBADD213PSMasked512 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLQMasked256 x (MOVQconst [c]) mask) + // result: (VPSLLQMasked256const [uint8(c)] x mask) for { - if v_0.Op != OpAMD64VFMSUBADD213PS512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VFMSUBADD213PSMasked512) - v.AddArg4(x, y, z, mask) + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLQMasked256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } - // match: (VMOVDQU32Masked512 (VPOPCNTD512 x) mask) - // result: (VPOPCNTDMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLQMasked512 x (MOVQconst [c]) mask) + // result: (VPSLLQMasked512const [uint8(c)] x mask) for { - if v_0.Op != OpAMD64VPOPCNTD512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPOPCNTDMasked512) + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLQMasked512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg2(x, mask) return true } - // match: (VMOVDQU32Masked512 (VPORD512 x y) mask) - // result: (VPORDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLVD128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLVD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLVD128load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPORD512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPORDMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPERMI2PS512 x y z) mask) - // result: (VPERMI2PSMasked512 x y z mask) - for { - if v_0.Op != OpAMD64VPERMI2PS512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPERMI2PSMasked512) - v.AddArg4(x, y, z, mask) + v.reset(OpAMD64VPSLLVD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU32Masked512 (VPERMI2D512 x y z) mask) - // result: (VPERMI2DMasked512 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLVD256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLVD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLVD256load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPERMI2D512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPERMI2DMasked512) - v.AddArg4(x, y, z, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSLLVD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU32Masked512 (VPSHUFD512 [a] x) mask) - // result: (VPSHUFDMasked512 [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLVD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLVD512load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPSHUFD512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHUFDMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSLLVD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU32Masked512 (VPERMPS512 x y) mask) - // result: (VPERMPSMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLVDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLVDMasked128load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPERMPS512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPERMPSMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPSLLVDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VPERMD512 x y) mask) - // result: (VPERMDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLVDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLVDMasked256load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPERMD512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPERMDMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked512 (VRCP14PS512 x) mask) - // result: (VRCP14PSMasked512 x mask) - for { - if v_0.Op != OpAMD64VRCP14PS512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VRCP14PSMasked512) - v.AddArg2(x, mask) + v.reset(OpAMD64VPSLLVDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VRSQRT14PS512 x) mask) - // result: (VRSQRT14PSMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLVDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLVDMasked512load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VRSQRT14PS512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VRSQRT14PSMasked512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPROLD512 [a] x) mask) - // result: (VPROLDMasked512 [a] x mask) - for { - if v_0.Op != OpAMD64VPROLD512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPROLDMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + v.reset(OpAMD64VPSLLVDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VPRORD512 [a] x) mask) - // result: (VPRORDMasked512 [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLVQ128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLVQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLVQ128load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPRORD512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPRORDMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPROLVD512 x y) mask) - // result: (VPROLVDMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPROLVD512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPROLVDMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPSLLVQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU32Masked512 (VPRORVD512 x y) mask) - // result: (VPRORVDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLVQ256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLVQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLVQ256load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPRORVD512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPRORVDMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked512 (VSCALEFPS512 x y) mask) - // result: (VSCALEFPSMasked512 x y mask) - for { - if v_0.Op != OpAMD64VSCALEFPS512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VSCALEFPSMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPSLLVQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU32Masked512 (VPSHLDD512 [a] x y) mask) - // result: (VPSHLDDMasked512 [a] x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLVQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLVQ512load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPSHLDD512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - a := auxIntToUint8(v_0.AuxInt) - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHLDDMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU32Masked512 (VPSLLD512 x y) mask) - // result: (VPSLLDMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPSLLD512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLDMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPSLLVQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU32Masked512 (VPSHRDD512 [a] x y) mask) - // result: (VPSHRDDMasked512 [a] x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLVQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLVQMasked128load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPSHRDD512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - a := auxIntToUint8(v_0.AuxInt) - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHRDDMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(x, y, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSLLVQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VPSRAD512 x y) mask) - // result: (VPSRADMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLVQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLVQMasked256load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPSRAD512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRADMasked512) - v.AddArg3(x, y, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSLLVQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VPSRLD512 x y) mask) - // result: (VPSRLDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLVQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSLLVQMasked512load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPSRLD512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLDMasked512) - v.AddArg3(x, y, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSLLVQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU32Masked512 (VPSHLDVD512 x y z) mask) - // result: (VPSHLDVDMasked512 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLW128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLW128 x (MOVQconst [c])) + // result: (VPSLLW128const [uint8(c)] x) for { - if v_0.Op != OpAMD64VPSHLDVD512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPSHLDVDMasked512) - v.AddArg4(x, y, z, mask) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLW128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } - // match: (VMOVDQU32Masked512 (VPSLLVD512 x y) mask) - // result: (VPSLLVDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLW256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLW256 x (MOVQconst [c])) + // result: (VPSLLW256const [uint8(c)] x) for { - if v_0.Op != OpAMD64VPSLLVD512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLVDMasked512) - v.AddArg3(x, y, mask) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLW256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } - // match: (VMOVDQU32Masked512 (VPSHRDVD512 x y z) mask) - // result: (VPSHRDVDMasked512 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLW512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLW512 x (MOVQconst [c])) + // result: (VPSLLW512const [uint8(c)] x) for { - if v_0.Op != OpAMD64VPSHRDVD512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPSHRDVDMasked512) - v.AddArg4(x, y, z, mask) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLW512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } - // match: (VMOVDQU32Masked512 (VPSRAVD512 x y) mask) - // result: (VPSRAVDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLWMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLWMasked128 x (MOVQconst [c]) mask) + // result: (VPSLLWMasked128const [uint8(c)] x mask) for { - if v_0.Op != OpAMD64VPSRAVD512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRAVDMasked512) - v.AddArg3(x, y, mask) + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLWMasked128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } - // match: (VMOVDQU32Masked512 (VPSRLVD512 x y) mask) - // result: (VPSRLVDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLWMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLWMasked256 x (MOVQconst [c]) mask) + // result: (VPSLLWMasked256const [uint8(c)] x mask) for { - if v_0.Op != OpAMD64VPSRLVD512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLVDMasked512) - v.AddArg3(x, y, mask) + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLWMasked256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } - // match: (VMOVDQU32Masked512 (VSQRTPS512 x) mask) - // result: (VSQRTPSMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLWMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLWMasked512 x (MOVQconst [c]) mask) + // result: (VPSLLWMasked512const [uint8(c)] x mask) for { - if v_0.Op != OpAMD64VSQRTPS512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VSQRTPSMasked512) + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLWMasked512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg2(x, mask) return true } - // match: (VMOVDQU32Masked512 (VSUBPS512 x y) mask) - // result: (VSUBPSMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRAD128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAD128 x (MOVQconst [c])) + // result: (VPSRAD128const [uint8(c)] x) for { - if v_0.Op != OpAMD64VSUBPS512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VSUBPSMasked512) - v.AddArg3(x, y, mask) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAD128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } - // match: (VMOVDQU32Masked512 (VPSUBD512 x y) mask) - // result: (VPSUBDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRAD256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAD256 x (MOVQconst [c])) + // result: (VPSRAD256const [uint8(c)] x) for { - if v_0.Op != OpAMD64VPSUBD512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSUBDMasked512) - v.AddArg3(x, y, mask) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAD256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } - // match: (VMOVDQU32Masked512 (VPXORD512 x y) mask) - // result: (VPXORDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRAD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAD512 x (MOVQconst [c])) + // result: (VPSRAD512const [uint8(c)] x) for { - if v_0.Op != OpAMD64VPXORD512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPXORDMasked512) - v.AddArg3(x, y, mask) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAD512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } - // match: (VMOVDQU32Masked512 (VPSLLD512const [a] x) mask) - // result: (VPSLLDMasked512const [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRADMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRADMasked128 x (MOVQconst [c]) mask) + // result: (VPSRADMasked128const [uint8(c)] x mask) for { - if v_0.Op != OpAMD64VPSLLD512const { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLDMasked512const) - v.AuxInt = uint8ToAuxInt(a) + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRADMasked128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg2(x, mask) return true } - // match: (VMOVDQU32Masked512 (VPSRLD512const [a] x) mask) - // result: (VPSRLDMasked512const [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRADMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRADMasked256 x (MOVQconst [c]) mask) + // result: (VPSRADMasked256const [uint8(c)] x mask) for { - if v_0.Op != OpAMD64VPSRLD512const { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLDMasked512const) - v.AuxInt = uint8ToAuxInt(a) + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRADMasked256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg2(x, mask) return true } - // match: (VMOVDQU32Masked512 (VPSRAD512const [a] x) mask) - // result: (VPSRADMasked512const [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRADMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRADMasked512 x (MOVQconst [c]) mask) + // result: (VPSRADMasked512const [uint8(c)] x mask) for { - if v_0.Op != OpAMD64VPSRAD512const { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 v.reset(OpAMD64VPSRADMasked512const) - v.AuxInt = uint8ToAuxInt(a) + v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg2(x, mask) return true } return false } -func rewriteValueAMD64_OpAMD64VMOVDQU64Masked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRAQ128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VMOVDQU64Masked512 (VPABSQ512 x) mask) - // result: (VPABSQMasked512 x mask) + // match: (VPSRAQ128 x (MOVQconst [c])) + // result: (VPSRAQ128const [uint8(c)] x) for { - if v_0.Op != OpAMD64VPABSQ512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPABSQMasked512) - v.AddArg2(x, mask) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAQ128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } - // match: (VMOVDQU64Masked512 (VADDPD512 x y) mask) - // result: (VADDPDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRAQ256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAQ256 x (MOVQconst [c])) + // result: (VPSRAQ256const [uint8(c)] x) for { - if v_0.Op != OpAMD64VADDPD512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VADDPDMasked512) - v.AddArg3(x, y, mask) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAQ256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } - // match: (VMOVDQU64Masked512 (VPADDQ512 x y) mask) - // result: (VPADDQMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRAQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAQ512 x (MOVQconst [c])) + // result: (VPSRAQ512const [uint8(c)] x) for { - if v_0.Op != OpAMD64VPADDQ512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPADDQMasked512) - v.AddArg3(x, y, mask) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAQ512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } - // match: (VMOVDQU64Masked512 (VPANDQ512 x y) mask) - // result: (VPANDQMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRAQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAQMasked128 x (MOVQconst [c]) mask) + // result: (VPSRAQMasked128const [uint8(c)] x mask) for { - if v_0.Op != OpAMD64VPANDQ512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPANDQMasked512) - v.AddArg3(x, y, mask) + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRAQMasked128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } - // match: (VMOVDQU64Masked512 (VPANDNQ512 x y) mask) - // result: (VPANDNQMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRAQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAQMasked256 x (MOVQconst [c]) mask) + // result: (VPSRAQMasked256const [uint8(c)] x mask) for { - if v_0.Op != OpAMD64VPANDNQ512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPANDNQMasked512) - v.AddArg3(x, y, mask) + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRAQMasked256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } - // match: (VMOVDQU64Masked512 (VBROADCASTSD512 x) mask) - // result: (VBROADCASTSDMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRAQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAQMasked512 x (MOVQconst [c]) mask) + // result: (VPSRAQMasked512const [uint8(c)] x mask) for { - if v_0.Op != OpAMD64VBROADCASTSD512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VBROADCASTSDMasked512) + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRAQMasked512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg2(x, mask) return true } - // match: (VMOVDQU64Masked512 (VPBROADCASTQ512 x) mask) - // result: (VPBROADCASTQMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRAVD128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAVD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAVD128load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPBROADCASTQ512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPBROADCASTQMasked512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked512 (VRNDSCALEPD512 [a] x) mask) - // result: (VRNDSCALEPDMasked512 [a] x mask) - for { - if v_0.Op != OpAMD64VRNDSCALEPD512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VRNDSCALEPDMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + v.reset(OpAMD64VPSRAVD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU64Masked512 (VREDUCEPD512 [a] x) mask) - // result: (VREDUCEPDMasked512 [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRAVD256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAVD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAVD256load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VREDUCEPD512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VREDUCEPDMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked512 (VDIVPD512 x y) mask) - // result: (VDIVPDMasked512 x y mask) - for { - if v_0.Op != OpAMD64VDIVPD512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VDIVPDMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPSRAVD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU64Masked512 (VPLZCNTQ512 x) mask) - // result: (VPLZCNTQMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRAVD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAVD512load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPLZCNTQ512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPLZCNTQMasked512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU64Masked512 (VMAXPD512 x y) mask) - // result: (VMAXPDMasked512 x y mask) - for { - if v_0.Op != OpAMD64VMAXPD512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VMAXPDMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPSRAVD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU64Masked512 (VPMAXSQ512 x y) mask) - // result: (VPMAXSQMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRAVDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAVDMasked128load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPMAXSQ512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMAXSQMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked512 (VPMAXUQ512 x y) mask) - // result: (VPMAXUQMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPMAXUQ512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMAXUQMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPSRAVDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked512 (VMINPD512 x y) mask) - // result: (VMINPDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRAVDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAVDMasked256load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VMINPD512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VMINPDMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked512 (VPMINSQ512 x y) mask) - // result: (VPMINSQMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPMINSQ512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMINSQMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPSRAVDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked512 (VPMINUQ512 x y) mask) - // result: (VPMINUQMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRAVDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAVDMasked512load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPMINUQ512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMINUQMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked512 (VFMADD213PD512 x y z) mask) - // result: (VFMADD213PDMasked512 x y z mask) - for { - if v_0.Op != OpAMD64VFMADD213PD512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VFMADD213PDMasked512) - v.AddArg4(x, y, z, mask) + v.reset(OpAMD64VPSRAVDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked512 (VFMADDSUB213PD512 x y z) mask) - // result: (VFMADDSUB213PDMasked512 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRAVQ128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAVQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAVQ128load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VFMADDSUB213PD512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VFMADDSUB213PDMasked512) - v.AddArg4(x, y, z, mask) - return true - } - // match: (VMOVDQU64Masked512 (VMULPD512 x y) mask) - // result: (VMULPDMasked512 x y mask) - for { - if v_0.Op != OpAMD64VMULPD512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VMULPDMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPSRAVQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU64Masked512 (VPMULLQ512 x y) mask) - // result: (VPMULLQMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRAVQ256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAVQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAVQ256load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPMULLQ512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMULLQMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked512 (VFMSUBADD213PD512 x y z) mask) - // result: (VFMSUBADD213PDMasked512 x y z mask) - for { - if v_0.Op != OpAMD64VFMSUBADD213PD512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VFMSUBADD213PDMasked512) - v.AddArg4(x, y, z, mask) + v.reset(OpAMD64VPSRAVQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU64Masked512 (VPOPCNTQ512 x) mask) - // result: (VPOPCNTQMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRAVQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAVQ512load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPOPCNTQ512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPOPCNTQMasked512) - v.AddArg2(x, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAVQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU64Masked512 (VPORQ512 x y) mask) - // result: (VPORQMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRAVQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAVQMasked128load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPORQ512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPORQMasked512) - v.AddArg3(x, y, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAVQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked512 (VPERMI2PD512 x y z) mask) - // result: (VPERMI2PDMasked512 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRAVQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAVQMasked256load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPERMI2PD512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPERMI2PDMasked512) - v.AddArg4(x, y, z, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAVQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked512 (VPERMI2Q512 x y z) mask) - // result: (VPERMI2QMasked512 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRAVQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRAVQMasked512load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPERMI2Q512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPERMI2QMasked512) - v.AddArg4(x, y, z, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRAVQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked512 (VPERMPD512 x y) mask) - // result: (VPERMPDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRAW128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAW128 x (MOVQconst [c])) + // result: (VPSRAW128const [uint8(c)] x) for { - if v_0.Op != OpAMD64VPERMPD512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPERMPDMasked512) - v.AddArg3(x, y, mask) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAW128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } - // match: (VMOVDQU64Masked512 (VPERMQ512 x y) mask) - // result: (VPERMQMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRAW256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAW256 x (MOVQconst [c])) + // result: (VPSRAW256const [uint8(c)] x) for { - if v_0.Op != OpAMD64VPERMQ512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPERMQMasked512) - v.AddArg3(x, y, mask) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAW256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } - // match: (VMOVDQU64Masked512 (VRCP14PD512 x) mask) - // result: (VRCP14PDMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRAW512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAW512 x (MOVQconst [c])) + // result: (VPSRAW512const [uint8(c)] x) for { - if v_0.Op != OpAMD64VRCP14PD512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VRCP14PDMasked512) - v.AddArg2(x, mask) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAW512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } - // match: (VMOVDQU64Masked512 (VRSQRT14PD512 x) mask) - // result: (VRSQRT14PDMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRAWMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAWMasked128 x (MOVQconst [c]) mask) + // result: (VPSRAWMasked128const [uint8(c)] x mask) for { - if v_0.Op != OpAMD64VRSQRT14PD512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VRSQRT14PDMasked512) + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRAWMasked128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg2(x, mask) return true } - // match: (VMOVDQU64Masked512 (VPROLQ512 [a] x) mask) - // result: (VPROLQMasked512 [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRAWMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAWMasked256 x (MOVQconst [c]) mask) + // result: (VPSRAWMasked256const [uint8(c)] x mask) for { - if v_0.Op != OpAMD64VPROLQ512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPROLQMasked512) - v.AuxInt = uint8ToAuxInt(a) + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRAWMasked256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg2(x, mask) return true } - // match: (VMOVDQU64Masked512 (VPRORQ512 [a] x) mask) - // result: (VPRORQMasked512 [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRAWMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAWMasked512 x (MOVQconst [c]) mask) + // result: (VPSRAWMasked512const [uint8(c)] x mask) for { - if v_0.Op != OpAMD64VPRORQ512 { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPRORQMasked512) - v.AuxInt = uint8ToAuxInt(a) + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRAWMasked512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg2(x, mask) return true } - // match: (VMOVDQU64Masked512 (VPROLVQ512 x y) mask) - // result: (VPROLVQMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRLVD128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLVD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLVD128load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPROLVQ512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPROLVQMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked512 (VPRORVQ512 x y) mask) - // result: (VPRORVQMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPRORVQ512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPRORVQMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPSRLVD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU64Masked512 (VSCALEFPD512 x y) mask) - // result: (VSCALEFPDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRLVD256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLVD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLVD256load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VSCALEFPD512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VSCALEFPDMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked512 (VPSHLDQ512 [a] x y) mask) - // result: (VPSHLDQMasked512 [a] x y mask) - for { - if v_0.Op != OpAMD64VPSHLDQ512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - a := auxIntToUint8(v_0.AuxInt) - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHLDQMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPSRLVD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU64Masked512 (VPSLLQ512 x y) mask) - // result: (VPSLLQMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRLVD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLVD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLVD512load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPSLLQ512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLQMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked512 (VPSHRDQ512 [a] x y) mask) - // result: (VPSHRDQMasked512 [a] x y mask) - for { - if v_0.Op != OpAMD64VPSHRDQ512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - a := auxIntToUint8(v_0.AuxInt) - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHRDQMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPSRLVD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU64Masked512 (VPSRAQ512 x y) mask) - // result: (VPSRAQMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRLVDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLVDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLVDMasked128load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPSRAQ512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRAQMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU64Masked512 (VPSRLQ512 x y) mask) - // result: (VPSRLQMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPSRLQ512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLQMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPSRLVDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked512 (VPSHLDVQ512 x y z) mask) - // result: (VPSHLDVQMasked512 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRLVDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLVDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLVDMasked256load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPSHLDVQ512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPSHLDVQMasked512) - v.AddArg4(x, y, z, mask) - return true - } - // match: (VMOVDQU64Masked512 (VPSLLVQ512 x y) mask) - // result: (VPSLLVQMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPSLLVQ512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLVQMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPSRLVDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked512 (VPSHRDVQ512 x y z) mask) - // result: (VPSHRDVQMasked512 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRLVDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLVDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLVDMasked512load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPSHRDVQ512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPSHRDVQMasked512) - v.AddArg4(x, y, z, mask) - return true - } - // match: (VMOVDQU64Masked512 (VPSRAVQ512 x y) mask) - // result: (VPSRAVQMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPSRAVQ512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRAVQMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPSRLVDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked512 (VPSRLVQ512 x y) mask) - // result: (VPSRLVQMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRLVQ128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLVQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLVQ128load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPSRLVQ512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLVQMasked512) - v.AddArg3(x, y, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLVQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU64Masked512 (VSQRTPD512 x) mask) - // result: (VSQRTPDMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRLVQ256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLVQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLVQ256load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VSQRTPD512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VSQRTPDMasked512) - v.AddArg2(x, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLVQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU64Masked512 (VSUBPD512 x y) mask) - // result: (VSUBPDMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRLVQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLVQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLVQ512load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VSUBPD512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VSUBPDMasked512) - v.AddArg3(x, y, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLVQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU64Masked512 (VPSUBQ512 x y) mask) - // result: (VPSUBQMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRLVQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLVQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLVQMasked128load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPSUBQ512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSUBQMasked512) - v.AddArg3(x, y, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLVQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked512 (VPXORQ512 x y) mask) - // result: (VPXORQMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRLVQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLVQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLVQMasked256load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPXORQ512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPXORQMasked512) - v.AddArg3(x, y, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLVQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked512 (VPSLLQ512const [a] x) mask) - // result: (VPSLLQMasked512const [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSRLVQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRLVQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSRLVQMasked512load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPSLLQ512const { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSLLQMasked512const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSRLVQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU64Masked512 (VPSRLQ512const [a] x) mask) - // result: (VPSRLQMasked512const [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSUBD128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSUBD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSUBD128load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPSRLQ512const { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + break + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRLQMasked512const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + v.reset(OpAMD64VPSUBD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU64Masked512 (VPSRAQ512const [a] x) mask) - // result: (VPSRAQMasked512const [a] x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSUBD256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSUBD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSUBD256load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPSRAQ512const { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - a := auxIntToUint8(v_0.AuxInt) - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSRAQMasked512const) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg2(x, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSUBD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VMOVDQU8Masked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSUBD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VMOVDQU8Masked512 (VPABSB512 x) mask) - // result: (VPABSBMasked512 x mask) + // match: (VPSUBD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSUBD512load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPABSB512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPABSBMasked512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU8Masked512 (VPADDB512 x y) mask) - // result: (VPADDBMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPADDB512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPADDBMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPSUBD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU8Masked512 (VPADDSB512 x y) mask) - // result: (VPADDSBMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSUBDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSUBDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSUBDMasked128load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPADDSB512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPADDSBMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU8Masked512 (VPADDUSB512 x y) mask) - // result: (VPADDUSBMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPADDUSB512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPADDUSBMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPSUBDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU8Masked512 (VPAVGB512 x y) mask) - // result: (VPAVGBMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSUBDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSUBDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSUBDMasked256load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPAVGB512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPAVGBMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU8Masked512 (VPBROADCASTB512 x) mask) - // result: (VPBROADCASTBMasked512 x mask) - for { - if v_0.Op != OpAMD64VPBROADCASTB512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPBROADCASTBMasked512) - v.AddArg2(x, mask) + v.reset(OpAMD64VPSUBDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU8Masked512 (VPMOVSXBW512 x) mask) - // result: (VPMOVSXBWMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSUBDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSUBDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSUBDMasked512load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPMOVSXBW512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSXBWMasked512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU8Masked512 (VPMOVSXBD512 x) mask) - // result: (VPMOVSXBDMasked512 x mask) - for { - if v_0.Op != OpAMD64VPMOVSXBD512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSXBDMasked512) - v.AddArg2(x, mask) + v.reset(OpAMD64VPSUBDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU8Masked512 (VPMOVSXBQ512 x) mask) - // result: (VPMOVSXBQMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSUBQ128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSUBQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSUBQ128load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPMOVSXBQ512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVSXBQMasked512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU8Masked512 (VPMOVZXBW512 x) mask) - // result: (VPMOVZXBWMasked512 x mask) - for { - if v_0.Op != OpAMD64VPMOVZXBW512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVZXBWMasked512) - v.AddArg2(x, mask) + v.reset(OpAMD64VPSUBQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU8Masked512 (VPMOVZXBD512 x) mask) - // result: (VPMOVZXBDMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSUBQ256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSUBQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSUBQ256load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPMOVZXBD512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVZXBDMasked512) - v.AddArg2(x, mask) - return true - } - // match: (VMOVDQU8Masked512 (VPMOVZXBQ512 x) mask) - // result: (VPMOVZXBQMasked512 x mask) - for { - if v_0.Op != OpAMD64VPMOVZXBQ512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMOVZXBQMasked512) - v.AddArg2(x, mask) + v.reset(OpAMD64VPSUBQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU8Masked512 (VGF2P8AFFINEINVQB512 [a] x y) mask) - // result: (VGF2P8AFFINEINVQBMasked512 [a] x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSUBQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSUBQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSUBQ512load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VGF2P8AFFINEINVQB512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - a := auxIntToUint8(v_0.AuxInt) - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VGF2P8AFFINEINVQBMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU8Masked512 (VGF2P8AFFINEQB512 [a] x y) mask) - // result: (VGF2P8AFFINEQBMasked512 [a] x y mask) - for { - if v_0.Op != OpAMD64VGF2P8AFFINEQB512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - a := auxIntToUint8(v_0.AuxInt) - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VGF2P8AFFINEQBMasked512) - v.AuxInt = uint8ToAuxInt(a) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPSUBQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU8Masked512 (VGF2P8MULB512 x y) mask) - // result: (VGF2P8MULBMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSUBQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSUBQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSUBQMasked128load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VGF2P8MULB512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VGF2P8MULBMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU8Masked512 (VPMAXSB512 x y) mask) - // result: (VPMAXSBMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPMAXSB512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMAXSBMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPSUBQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU8Masked512 (VPMAXUB512 x y) mask) - // result: (VPMAXUBMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSUBQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSUBQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSUBQMasked256load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPMAXUB512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMAXUBMasked512) - v.AddArg3(x, y, mask) - return true - } - // match: (VMOVDQU8Masked512 (VPMINSB512 x y) mask) - // result: (VPMINSBMasked512 x y mask) - for { - if v_0.Op != OpAMD64VPMINSB512 { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMINSBMasked512) - v.AddArg3(x, y, mask) + v.reset(OpAMD64VPSUBQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU8Masked512 (VPMINUB512 x y) mask) - // result: (VPMINUBMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPSUBQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSUBQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPSUBQMasked512load {sym} [off] x ptr mask mem) for { - if v_0.Op != OpAMD64VPMINUB512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPMINUBMasked512) - v.AddArg3(x, y, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPSUBQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } - // match: (VMOVDQU8Masked512 (VPOPCNTB512 x) mask) - // result: (VPOPCNTBMasked512 x mask) + return false +} +func rewriteValueAMD64_OpAMD64VPUNPCKHDQ128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPUNPCKHDQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPUNPCKHDQ128load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPOPCNTB512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPOPCNTBMasked512) - v.AddArg2(x, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPUNPCKHDQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU8Masked512 (VPERMI2B512 x y z) mask) - // result: (VPERMI2BMasked512 x y z mask) + return false +} +func rewriteValueAMD64_OpAMD64VPUNPCKHDQ256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPUNPCKHDQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPUNPCKHDQ256load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPERMI2B512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - z := v_0.Args[2] - x := v_0.Args[0] - y := v_0.Args[1] - mask := v_1 - v.reset(OpAMD64VPERMI2BMasked512) - v.AddArg4(x, y, z, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPUNPCKHDQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU8Masked512 (VPSHUFB512 x y) mask) - // result: (VPSHUFBMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPUNPCKHDQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPUNPCKHDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPUNPCKHDQ512load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPSHUFB512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSHUFBMasked512) - v.AddArg3(x, y, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPUNPCKHDQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU8Masked512 (VPERMB512 x y) mask) - // result: (VPERMBMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPUNPCKHQDQ128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPUNPCKHQDQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPUNPCKHQDQ128load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPERMB512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPERMBMasked512) - v.AddArg3(x, y, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPUNPCKHQDQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU8Masked512 (VPSUBB512 x y) mask) - // result: (VPSUBBMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPUNPCKHQDQ256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPUNPCKHQDQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPUNPCKHQDQ256load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPSUBB512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSUBBMasked512) - v.AddArg3(x, y, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPUNPCKHQDQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU8Masked512 (VPSUBSB512 x y) mask) - // result: (VPSUBSBMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPUNPCKHQDQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPUNPCKHQDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPUNPCKHQDQ512load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPSUBSB512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSUBSBMasked512) - v.AddArg3(x, y, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPUNPCKHQDQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVDQU8Masked512 (VPSUBUSB512 x y) mask) - // result: (VPSUBUSBMasked512 x y mask) + return false +} +func rewriteValueAMD64_OpAMD64VPUNPCKLDQ128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPUNPCKLDQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPUNPCKLDQ128load {sym} [off] x ptr mem) for { - if v_0.Op != OpAMD64VPSUBUSB512 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_0.Args[1] - x := v_0.Args[0] - mask := v_1 - v.reset(OpAMD64VPSUBUSBMasked512) - v.AddArg3(x, y, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPUNPCKLDQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VMOVQ(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPUNPCKLDQ256(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (VMOVQ x:(MOVQload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (VMOVQload [off] {sym} ptr mem) + // match: (VPUNPCKLDQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPUNPCKLDQ256load {sym} [off] x ptr mem) for { x := v_0 - if x.Op != OpAMD64MOVQload { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - off := auxIntToInt32(x.AuxInt) - sym := auxToSym(x.Aux) - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64VMOVQload, v.Type) - v.copyOf(v0) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) + v.reset(OpAMD64VPUNPCKLDQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VMOVSDf2v(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPUNPCKLDQ512(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (VMOVSDf2v x:(MOVSDload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (VMOVSDload [off] {sym} ptr mem) + // match: (VPUNPCKLDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPUNPCKLDQ512load {sym} [off] x ptr mem) for { x := v_0 - if x.Op != OpAMD64MOVSDload { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - off := auxIntToInt32(x.AuxInt) - sym := auxToSym(x.Aux) - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64VMOVSDload, v.Type) - v.copyOf(v0) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) + v.reset(OpAMD64VPUNPCKLDQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVSDf2v x:(MOVSDconst [c] )) - // result: (VMOVSDconst [c] ) + return false +} +func rewriteValueAMD64_OpAMD64VPUNPCKLQDQ128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPUNPCKLQDQ128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPUNPCKLQDQ128load {sym} [off] x ptr mem) for { x := v_0 - if x.Op != OpAMD64MOVSDconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - c := auxIntToFloat64(x.AuxInt) - v.reset(OpAMD64VMOVSDconst) - v.AuxInt = float64ToAuxInt(c) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPUNPCKLQDQ128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VMOVSSf2v(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPUNPCKLQDQ256(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (VMOVSSf2v x:(MOVSSload [off] {sym} ptr mem)) - // cond: x.Uses == 1 && clobber(x) - // result: @x.Block (VMOVSSload [off] {sym} ptr mem) + // match: (VPUNPCKLQDQ256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPUNPCKLQDQ256load {sym} [off] x ptr mem) for { x := v_0 - if x.Op != OpAMD64MOVSSload { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - off := auxIntToInt32(x.AuxInt) - sym := auxToSym(x.Aux) - mem := x.Args[1] - ptr := x.Args[0] - if !(x.Uses == 1 && clobber(x)) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - b = x.Block - v0 := b.NewValue0(x.Pos, OpAMD64VMOVSSload, v.Type) - v.copyOf(v0) - v0.AuxInt = int32ToAuxInt(off) - v0.Aux = symToAux(sym) - v0.AddArg2(ptr, mem) + v.reset(OpAMD64VPUNPCKLQDQ256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } - // match: (VMOVSSf2v x:(MOVSSconst [c] )) - // result: (VMOVSSconst [c] ) + return false +} +func rewriteValueAMD64_OpAMD64VPUNPCKLQDQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPUNPCKLQDQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPUNPCKLQDQ512load {sym} [off] x ptr mem) for { x := v_0 - if x.Op != OpAMD64MOVSSconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - c := auxIntToFloat32(x.AuxInt) - v.reset(OpAMD64VMOVSSconst) - v.AuxInt = float32ToAuxInt(c) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VPUNPCKLQDQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPANDQ512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPXORD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPANDQ512 x (VPMOVMToVec64x8 k)) - // result: (VMOVDQU64Masked512 x k) + // match: (VPXORD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPXORD512load {sym} [off] x ptr mem) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x := v_0 - if v_1.Op != OpAMD64VPMOVMToVec64x8 { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { continue } - k := v_1.Args[0] - v.reset(OpAMD64VMOVDQU64Masked512) - v.AddArg2(x, k) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPXORD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } break } - // match: (VPANDQ512 x (VPMOVMToVec32x16 k)) - // result: (VMOVDQU32Masked512 x k) + return false +} +func rewriteValueAMD64_OpAMD64VPXORDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPXORDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPXORDMasked128load {sym} [off] x ptr mask mem) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x := v_0 - if v_1.Op != OpAMD64VPMOVMToVec32x16 { + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { continue } - k := v_1.Args[0] - v.reset(OpAMD64VMOVDQU32Masked512) - v.AddArg2(x, k) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPXORDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } break } - // match: (VPANDQ512 x (VPMOVMToVec16x32 k)) - // result: (VMOVDQU16Masked512 x k) + return false +} +func rewriteValueAMD64_OpAMD64VPXORDMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPXORDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPXORDMasked256load {sym} [off] x ptr mask mem) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x := v_0 - if v_1.Op != OpAMD64VPMOVMToVec16x32 { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { continue } - k := v_1.Args[0] - v.reset(OpAMD64VMOVDQU16Masked512) - v.AddArg2(x, k) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPXORDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } break } - // match: (VPANDQ512 x (VPMOVMToVec8x64 k)) - // result: (VMOVDQU8Masked512 x k) + return false +} +func rewriteValueAMD64_OpAMD64VPXORDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPXORDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPXORDMasked512load {sym} [off] x ptr mask mem) for { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { x := v_0 - if v_1.Op != OpAMD64VPMOVMToVec8x64 { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { continue } - k := v_1.Args[0] - v.reset(OpAMD64VMOVDQU8Masked512) - v.AddArg2(x, k) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPXORDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } break } return false } -func rewriteValueAMD64_OpAMD64VPBROADCASTB128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPXORQ512(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (VPBROADCASTB128 x:(VPINSRB128 [0] (Zero128 ) y)) - // cond: x.Uses == 1 - // result: (VPBROADCASTB128 (VMOVQ y)) + // match: (VPXORQ512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPXORQ512load {sym} [off] x ptr mem) for { - x := v_0 - if x.Op != OpAMD64VPINSRB128 || auxIntToUint8(x.AuxInt) != 0 { - break - } - y := x.Args[1] - x_0 := x.Args[0] - if x_0.Op != OpAMD64Zero128 { - break - } - if !(x.Uses == 1) { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPXORQ512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true } - v.reset(OpAMD64VPBROADCASTB128) - v0 := b.NewValue0(v.Pos, OpAMD64VMOVQ, types.TypeVec128) - v0.AddArg(y) - v.AddArg(v0) - return true + break } return false } -func rewriteValueAMD64_OpAMD64VPBROADCASTB256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPXORQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (VPBROADCASTB256 x:(VPINSRB128 [0] (Zero128 ) y)) - // cond: x.Uses == 1 - // result: (VPBROADCASTB256 (VMOVQ y)) + // match: (VPXORQMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPXORQMasked128load {sym} [off] x ptr mask mem) for { - x := v_0 - if x.Op != OpAMD64VPINSRB128 || auxIntToUint8(x.AuxInt) != 0 { - break - } - y := x.Args[1] - x_0 := x.Args[0] - if x_0.Op != OpAMD64Zero128 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPXORQMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - if !(x.Uses == 1) { - break + break + } + return false +} +func rewriteValueAMD64_OpAMD64VPXORQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPXORQMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPXORQMasked256load {sym} [off] x ptr mask mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPXORQMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - v.reset(OpAMD64VPBROADCASTB256) - v0 := b.NewValue0(v.Pos, OpAMD64VMOVQ, types.TypeVec128) - v0.AddArg(y) - v.AddArg(v0) - return true + break } return false } -func rewriteValueAMD64_OpAMD64VPBROADCASTB512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPXORQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (VPBROADCASTB512 x:(VPINSRB128 [0] (Zero128 ) y)) - // cond: x.Uses == 1 - // result: (VPBROADCASTB512 (VMOVQ y)) + // match: (VPXORQMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VPXORQMasked512load {sym} [off] x ptr mask mem) for { - x := v_0 - if x.Op != OpAMD64VPINSRB128 || auxIntToUint8(x.AuxInt) != 0 { - break + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_2 + if !(canMergeLoad(v, l) && clobber(l)) { + continue + } + v.reset(OpAMD64VPXORQMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) + return true } - y := x.Args[1] - x_0 := x.Args[0] - if x_0.Op != OpAMD64Zero128 { + break + } + return false +} +func rewriteValueAMD64_OpAMD64VRCP14PD128(v *Value) bool { + v_0 := v.Args[0] + // match: (VRCP14PD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRCP14PD128load {sym} [off] ptr mem) + for { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - if !(x.Uses == 1) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPBROADCASTB512) - v0 := b.NewValue0(v.Pos, OpAMD64VMOVQ, types.TypeVec128) - v0.AddArg(y) - v.AddArg(v0) + v.reset(OpAMD64VRCP14PD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPBROADCASTW128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRCP14PD256(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (VPBROADCASTW128 x:(VPINSRW128 [0] (Zero128 ) y)) - // cond: x.Uses == 1 - // result: (VPBROADCASTW128 (VMOVQ y)) + // match: (VRCP14PD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRCP14PD256load {sym} [off] ptr mem) for { - x := v_0 - if x.Op != OpAMD64VPINSRW128 || auxIntToUint8(x.AuxInt) != 0 { - break - } - y := x.Args[1] - x_0 := x.Args[0] - if x_0.Op != OpAMD64Zero128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - if !(x.Uses == 1) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPBROADCASTW128) - v0 := b.NewValue0(v.Pos, OpAMD64VMOVQ, types.TypeVec128) - v0.AddArg(y) - v.AddArg(v0) + v.reset(OpAMD64VRCP14PD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPBROADCASTW256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRCP14PD512(v *Value) bool { v_0 := v.Args[0] - b := v.Block - // match: (VPBROADCASTW256 x:(VPINSRW128 [0] (Zero128 ) y)) - // cond: x.Uses == 1 - // result: (VPBROADCASTW256 (VMOVQ y)) + // match: (VRCP14PD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRCP14PD512load {sym} [off] ptr mem) for { - x := v_0 - if x.Op != OpAMD64VPINSRW128 || auxIntToUint8(x.AuxInt) != 0 { - break - } - y := x.Args[1] - x_0 := x.Args[0] - if x_0.Op != OpAMD64Zero128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - if !(x.Uses == 1) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPBROADCASTW256) - v0 := b.NewValue0(v.Pos, OpAMD64VMOVQ, types.TypeVec128) - v0.AddArg(y) - v.AddArg(v0) + v.reset(OpAMD64VRCP14PD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPBROADCASTW512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRCP14PDMasked128(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (VPBROADCASTW512 x:(VPINSRW128 [0] (Zero128 ) y)) - // cond: x.Uses == 1 - // result: (VPBROADCASTW512 (VMOVQ y)) + // match: (VRCP14PDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRCP14PDMasked128load {sym} [off] ptr mask mem) for { - x := v_0 - if x.Op != OpAMD64VPINSRW128 || auxIntToUint8(x.AuxInt) != 0 { - break - } - y := x.Args[1] - x_0 := x.Args[0] - if x_0.Op != OpAMD64Zero128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - if !(x.Uses == 1) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VPBROADCASTW512) - v0 := b.NewValue0(v.Pos, OpAMD64VMOVQ, types.TypeVec128) - v0.AddArg(y) - v.AddArg(v0) + v.reset(OpAMD64VRCP14PDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPINSRD128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRCP14PDMasked256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPINSRD128 [0] (Zero128 ) y) - // cond: y.Type.IsFloat() - // result: (VMOVSSf2v y) + // match: (VRCP14PDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRCP14PDMasked256load {sym} [off] ptr mask mem) for { - if auxIntToUint8(v.AuxInt) != 0 || v_0.Op != OpAMD64Zero128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - y := v_1 - if !(y.Type.IsFloat()) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VMOVSSf2v) - v.Type = types.TypeVec128 - v.AddArg(y) + v.reset(OpAMD64VRCP14PDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } - // match: (VPINSRD128 [0] (Zero128 ) y) - // cond: !y.Type.IsFloat() - // result: (VMOVD y) + return false +} +func rewriteValueAMD64_OpAMD64VRCP14PDMasked512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VRCP14PDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRCP14PDMasked512load {sym} [off] ptr mask mem) for { - if auxIntToUint8(v.AuxInt) != 0 || v_0.Op != OpAMD64Zero128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_1 - if !(!y.Type.IsFloat()) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VMOVD) - v.Type = types.TypeVec128 - v.AddArg(y) + v.reset(OpAMD64VRCP14PDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPINSRQ128(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VRCP14PS512(v *Value) bool { v_0 := v.Args[0] - // match: (VPINSRQ128 [0] (Zero128 ) y) - // cond: y.Type.IsFloat() - // result: (VMOVSDf2v y) + // match: (VRCP14PS512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRCP14PS512load {sym} [off] ptr mem) for { - if auxIntToUint8(v.AuxInt) != 0 || v_0.Op != OpAMD64Zero128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - y := v_1 - if !(y.Type.IsFloat()) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VMOVSDf2v) - v.Type = types.TypeVec128 - v.AddArg(y) + v.reset(OpAMD64VRCP14PS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } - // match: (VPINSRQ128 [0] (Zero128 ) y) - // cond: !y.Type.IsFloat() - // result: (VMOVQ y) + return false +} +func rewriteValueAMD64_OpAMD64VRCP14PSMasked128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VRCP14PSMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRCP14PSMasked128load {sym} [off] ptr mask mem) for { - if auxIntToUint8(v.AuxInt) != 0 || v_0.Op != OpAMD64Zero128 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - y := v_1 - if !(!y.Type.IsFloat()) { + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { break } - v.reset(OpAMD64VMOVQ) - v.Type = types.TypeVec128 - v.AddArg(y) + v.reset(OpAMD64VRCP14PSMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPMOVVec16x16ToM(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRCP14PSMasked256(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPMOVVec16x16ToM (VPMOVMToVec16x16 x)) - // result: x + // match: (VRCP14PSMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRCP14PSMasked256load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPMOVMToVec16x16 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - x := v_0.Args[0] - v.copyOf(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRCP14PSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPMOVVec16x32ToM(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRCP14PSMasked512(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPMOVVec16x32ToM (VPMOVMToVec16x32 x)) - // result: x + // match: (VRCP14PSMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRCP14PSMasked512load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPMOVMToVec16x32 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - x := v_0.Args[0] - v.copyOf(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRCP14PSMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPMOVVec16x8ToM(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRSQRT14PD128(v *Value) bool { v_0 := v.Args[0] - // match: (VPMOVVec16x8ToM (VPMOVMToVec16x8 x)) - // result: x + // match: (VRSQRT14PD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRSQRT14PD128load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VPMOVMToVec16x8 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0.Args[0] - v.copyOf(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRSQRT14PD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPMOVVec32x16ToM(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRSQRT14PD256(v *Value) bool { v_0 := v.Args[0] - // match: (VPMOVVec32x16ToM (VPMOVMToVec32x16 x)) - // result: x + // match: (VRSQRT14PD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRSQRT14PD256load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VPMOVMToVec32x16 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - x := v_0.Args[0] - v.copyOf(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRSQRT14PD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPMOVVec32x4ToM(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRSQRT14PD512(v *Value) bool { v_0 := v.Args[0] - // match: (VPMOVVec32x4ToM (VPMOVMToVec32x4 x)) - // result: x + // match: (VRSQRT14PD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRSQRT14PD512load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VPMOVMToVec32x4 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - x := v_0.Args[0] - v.copyOf(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRSQRT14PD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPMOVVec32x8ToM(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRSQRT14PDMasked128(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPMOVVec32x8ToM (VPMOVMToVec32x8 x)) - // result: x + // match: (VRSQRT14PDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRSQRT14PDMasked128load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPMOVMToVec32x8 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0.Args[0] - v.copyOf(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRSQRT14PDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPMOVVec64x2ToM(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRSQRT14PDMasked256(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPMOVVec64x2ToM (VPMOVMToVec64x2 x)) - // result: x + // match: (VRSQRT14PDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRSQRT14PDMasked256load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPMOVMToVec64x2 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - x := v_0.Args[0] - v.copyOf(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRSQRT14PDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPMOVVec64x4ToM(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRSQRT14PDMasked512(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPMOVVec64x4ToM (VPMOVMToVec64x4 x)) - // result: x + // match: (VRSQRT14PDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRSQRT14PDMasked512load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPMOVMToVec64x4 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - x := v_0.Args[0] - v.copyOf(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRSQRT14PDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPMOVVec64x8ToM(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRSQRT14PS512(v *Value) bool { v_0 := v.Args[0] - // match: (VPMOVVec64x8ToM (VPMOVMToVec64x8 x)) - // result: x + // match: (VRSQRT14PS512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRSQRT14PS512load {sym} [off] ptr mem) for { - if v_0.Op != OpAMD64VPMOVMToVec64x8 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - x := v_0.Args[0] - v.copyOf(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRSQRT14PS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPMOVVec8x16ToM(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRSQRT14PSMasked128(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPMOVVec8x16ToM (VPMOVMToVec8x16 x)) - // result: x + // match: (VRSQRT14PSMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRSQRT14PSMasked128load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPMOVMToVec8x16 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - x := v_0.Args[0] - v.copyOf(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRSQRT14PSMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPMOVVec8x32ToM(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRSQRT14PSMasked256(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPMOVVec8x32ToM (VPMOVMToVec8x32 x)) - // result: x + // match: (VRSQRT14PSMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRSQRT14PSMasked256load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPMOVMToVec8x32 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - x := v_0.Args[0] - v.copyOf(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRSQRT14PSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPMOVVec8x64ToM(v *Value) bool { +func rewriteValueAMD64_OpAMD64VRSQRT14PSMasked512(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPMOVVec8x64ToM (VPMOVMToVec8x64 x)) - // result: x + // match: (VRSQRT14PSMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VRSQRT14PSMasked512load {sym} [off] ptr mask mem) for { - if v_0.Op != OpAMD64VPMOVMToVec8x64 { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - x := v_0.Args[0] - v.copyOf(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VRSQRT14PSMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLD128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSCALEFPD128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLD128 x (MOVQconst [c])) - // result: (VPSLLD128const [uint8(c)] x) + // match: (VSCALEFPD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSCALEFPD128load {sym} [off] x ptr mem) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLD128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSCALEFPD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLD256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSCALEFPD256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLD256 x (MOVQconst [c])) - // result: (VPSLLD256const [uint8(c)] x) + // match: (VSCALEFPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSCALEFPD256load {sym} [off] x ptr mem) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLD256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSCALEFPD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLD512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSCALEFPD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLD512 x (MOVQconst [c])) - // result: (VPSLLD512const [uint8(c)] x) + // match: (VSCALEFPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSCALEFPD512load {sym} [off] x ptr mem) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLD512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSCALEFPD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLDMasked128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSCALEFPDMasked128(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLDMasked128 x (MOVQconst [c]) mask) - // result: (VPSLLDMasked128const [uint8(c)] x mask) + // match: (VSCALEFPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSCALEFPDMasked128load {sym} [off] x ptr mask mem) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - c := auxIntToInt64(v_1.AuxInt) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - v.reset(OpAMD64VPSLLDMasked128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSCALEFPDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLDMasked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSCALEFPDMasked256(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLDMasked256 x (MOVQconst [c]) mask) - // result: (VPSLLDMasked256const [uint8(c)] x mask) + // match: (VSCALEFPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSCALEFPDMasked256load {sym} [off] x ptr mask mem) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - c := auxIntToInt64(v_1.AuxInt) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - v.reset(OpAMD64VPSLLDMasked256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSCALEFPDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLDMasked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSCALEFPDMasked512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLDMasked512 x (MOVQconst [c]) mask) - // result: (VPSLLDMasked512const [uint8(c)] x mask) + // match: (VSCALEFPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSCALEFPDMasked512load {sym} [off] x ptr mask mem) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - c := auxIntToInt64(v_1.AuxInt) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - v.reset(OpAMD64VPSLLDMasked512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSCALEFPDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLQ128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSCALEFPS128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLQ128 x (MOVQconst [c])) - // result: (VPSLLQ128const [uint8(c)] x) + // match: (VSCALEFPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSCALEFPS128load {sym} [off] x ptr mem) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLQ128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSCALEFPS128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLQ256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSCALEFPS256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLQ256 x (MOVQconst [c])) - // result: (VPSLLQ256const [uint8(c)] x) + // match: (VSCALEFPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSCALEFPS256load {sym} [off] x ptr mem) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLQ256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSCALEFPS256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLQ512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSCALEFPS512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLQ512 x (MOVQconst [c])) - // result: (VPSLLQ512const [uint8(c)] x) + // match: (VSCALEFPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSCALEFPS512load {sym} [off] x ptr mem) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLQ512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSCALEFPS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLQMasked128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSCALEFPSMasked128(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLQMasked128 x (MOVQconst [c]) mask) - // result: (VPSLLQMasked128const [uint8(c)] x mask) + // match: (VSCALEFPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSCALEFPSMasked128load {sym} [off] x ptr mask mem) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - c := auxIntToInt64(v_1.AuxInt) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - v.reset(OpAMD64VPSLLQMasked128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSCALEFPSMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLQMasked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSCALEFPSMasked256(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLQMasked256 x (MOVQconst [c]) mask) - // result: (VPSLLQMasked256const [uint8(c)] x mask) + // match: (VSCALEFPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSCALEFPSMasked256load {sym} [off] x ptr mask mem) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - c := auxIntToInt64(v_1.AuxInt) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - v.reset(OpAMD64VPSLLQMasked256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSCALEFPSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLQMasked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSCALEFPSMasked512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLQMasked512 x (MOVQconst [c]) mask) - // result: (VPSLLQMasked512const [uint8(c)] x mask) + // match: (VSCALEFPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSCALEFPSMasked512load {sym} [off] x ptr mask mem) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - c := auxIntToInt64(v_1.AuxInt) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - v.reset(OpAMD64VPSLLQMasked512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSCALEFPSMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLW128(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VSQRTPD128(v *Value) bool { v_0 := v.Args[0] - // match: (VPSLLW128 x (MOVQconst [c])) - // result: (VPSLLW128const [uint8(c)] x) + // match: (VSQRTPD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSQRTPD128load {sym} [off] ptr mem) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLW128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSQRTPD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLW256(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VSQRTPD256(v *Value) bool { v_0 := v.Args[0] - // match: (VPSLLW256 x (MOVQconst [c])) - // result: (VPSLLW256const [uint8(c)] x) + // match: (VSQRTPD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSQRTPD256load {sym} [off] ptr mem) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLW256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSQRTPD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLW512(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VSQRTPD512(v *Value) bool { v_0 := v.Args[0] - // match: (VPSLLW512 x (MOVQconst [c])) - // result: (VPSLLW512const [uint8(c)] x) + // match: (VSQRTPD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSQRTPD512load {sym} [off] ptr mem) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLW512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSQRTPD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLWMasked128(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VSQRTPDMasked128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLWMasked128 x (MOVQconst [c]) mask) - // result: (VPSLLWMasked128const [uint8(c)] x mask) + // match: (VSQRTPDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSQRTPDMasked128load {sym} [off] ptr mask mem) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSLLWMasked128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSQRTPDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLWMasked256(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VSQRTPDMasked256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLWMasked256 x (MOVQconst [c]) mask) - // result: (VPSLLWMasked256const [uint8(c)] x mask) + // match: (VSQRTPDMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSQRTPDMasked256load {sym} [off] ptr mask mem) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSLLWMasked256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSQRTPDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSLLWMasked512(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VSQRTPDMasked512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSLLWMasked512 x (MOVQconst [c]) mask) - // result: (VPSLLWMasked512const [uint8(c)] x mask) + // match: (VSQRTPDMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSQRTPDMasked512load {sym} [off] ptr mask mem) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSLLWMasked512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSQRTPDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAD128(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VSQRTPS128(v *Value) bool { v_0 := v.Args[0] - // match: (VPSRAD128 x (MOVQconst [c])) - // result: (VPSRAD128const [uint8(c)] x) + // match: (VSQRTPS128 l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSQRTPS128load {sym} [off] ptr mem) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAD128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSQRTPS128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAD256(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VSQRTPS256(v *Value) bool { v_0 := v.Args[0] - // match: (VPSRAD256 x (MOVQconst [c])) - // result: (VPSRAD256const [uint8(c)] x) + // match: (VSQRTPS256 l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSQRTPS256load {sym} [off] ptr mem) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAD256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSQRTPS256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAD512(v *Value) bool { - v_1 := v.Args[1] +func rewriteValueAMD64_OpAMD64VSQRTPS512(v *Value) bool { v_0 := v.Args[0] - // match: (VPSRAD512 x (MOVQconst [c])) - // result: (VPSRAD512const [uint8(c)] x) + // match: (VSQRTPS512 l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSQRTPS512load {sym} [off] ptr mem) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAD512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSQRTPS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg2(ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRADMasked128(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VSQRTPSMasked128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRADMasked128 x (MOVQconst [c]) mask) - // result: (VPSRADMasked128const [uint8(c)] x mask) + // match: (VSQRTPSMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSQRTPSMasked128load {sym} [off] ptr mask mem) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_0 + if l.Op != OpAMD64VMOVDQUload128 { break } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSRADMasked128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSQRTPSMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRADMasked256(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VSQRTPSMasked256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRADMasked256 x (MOVQconst [c]) mask) - // result: (VPSRADMasked256const [uint8(c)] x mask) + // match: (VSQRTPSMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSQRTPSMasked256load {sym} [off] ptr mask mem) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_0 + if l.Op != OpAMD64VMOVDQUload256 { break } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSRADMasked256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSQRTPSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRADMasked512(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VSQRTPSMasked512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRADMasked512 x (MOVQconst [c]) mask) - // result: (VPSRADMasked512const [uint8(c)] x mask) + // match: (VSQRTPSMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSQRTPSMasked512load {sym} [off] ptr mask mem) for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_0 + if l.Op != OpAMD64VMOVDQUload512 { break } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSRADMasked512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + mask := v_1 + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSQRTPSMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAQ128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSUBPD128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAQ128 x (MOVQconst [c])) - // result: (VPSRAQ128const [uint8(c)] x) + // match: (VSUBPD128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSUBPD128load {sym} [off] x ptr mem) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAQ128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSUBPD128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAQ256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSUBPD256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAQ256 x (MOVQconst [c])) - // result: (VPSRAQ256const [uint8(c)] x) + // match: (VSUBPD256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSUBPD256load {sym} [off] x ptr mem) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAQ256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSUBPD256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAQ512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSUBPD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAQ512 x (MOVQconst [c])) - // result: (VPSRAQ512const [uint8(c)] x) + // match: (VSUBPD512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSUBPD512load {sym} [off] x ptr mem) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAQ512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSUBPD512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAQMasked128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSUBPDMasked128(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAQMasked128 x (MOVQconst [c]) mask) - // result: (VPSRAQMasked128const [uint8(c)] x mask) + // match: (VSUBPDMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSUBPDMasked128load {sym} [off] x ptr mask mem) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - c := auxIntToInt64(v_1.AuxInt) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - v.reset(OpAMD64VPSRAQMasked128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSUBPDMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAQMasked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSUBPDMasked256(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAQMasked256 x (MOVQconst [c]) mask) - // result: (VPSRAQMasked256const [uint8(c)] x mask) + // match: (VSUBPDMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSUBPDMasked256load {sym} [off] x ptr mask mem) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - c := auxIntToInt64(v_1.AuxInt) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - v.reset(OpAMD64VPSRAQMasked256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSUBPDMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAQMasked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSUBPDMasked512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAQMasked512 x (MOVQconst [c]) mask) - // result: (VPSRAQMasked512const [uint8(c)] x mask) + // match: (VSUBPDMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSUBPDMasked512load {sym} [off] x ptr mask mem) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - c := auxIntToInt64(v_1.AuxInt) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - v.reset(OpAMD64VPSRAQMasked512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSUBPDMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAW128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSUBPS128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAW128 x (MOVQconst [c])) - // result: (VPSRAW128const [uint8(c)] x) + // match: (VSUBPS128 x l:(VMOVDQUload128 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSUBPS128load {sym} [off] x ptr mem) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAW128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSUBPS128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAW256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSUBPS256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAW256 x (MOVQconst [c])) - // result: (VPSRAW256const [uint8(c)] x) + // match: (VSUBPS256 x l:(VMOVDQUload256 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSUBPS256load {sym} [off] x ptr mem) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAW256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSUBPS256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAW512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSUBPS512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAW512 x (MOVQconst [c])) - // result: (VPSRAW512const [uint8(c)] x) + // match: (VSUBPS512 x l:(VMOVDQUload512 {sym} [off] ptr mem)) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSUBPS512load {sym} [off] x ptr mem) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAW512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSUBPS512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAWMasked128(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSUBPSMasked128(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAWMasked128 x (MOVQconst [c]) mask) - // result: (VPSRAWMasked128const [uint8(c)] x mask) + // match: (VSUBPSMasked128 x l:(VMOVDQUload128 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSUBPSMasked128load {sym} [off] x ptr mask mem) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload128 { break } - c := auxIntToInt64(v_1.AuxInt) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - v.reset(OpAMD64VPSRAWMasked128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSUBPSMasked128load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAWMasked256(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSUBPSMasked256(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAWMasked256 x (MOVQconst [c]) mask) - // result: (VPSRAWMasked256const [uint8(c)] x mask) + // match: (VSUBPSMasked256 x l:(VMOVDQUload256 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSUBPSMasked256load {sym} [off] x ptr mask mem) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload256 { break } - c := auxIntToInt64(v_1.AuxInt) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - v.reset(OpAMD64VPSRAWMasked256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSUBPSMasked256load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false } -func rewriteValueAMD64_OpAMD64VPSRAWMasked512(v *Value) bool { +func rewriteValueAMD64_OpAMD64VSUBPSMasked512(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (VPSRAWMasked512 x (MOVQconst [c]) mask) - // result: (VPSRAWMasked512const [uint8(c)] x mask) + // match: (VSUBPSMasked512 x l:(VMOVDQUload512 {sym} [off] ptr mem) mask) + // cond: canMergeLoad(v, l) && clobber(l) + // result: (VSUBPSMasked512load {sym} [off] x ptr mask mem) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { + l := v_1 + if l.Op != OpAMD64VMOVDQUload512 { break } - c := auxIntToInt64(v_1.AuxInt) + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] mask := v_2 - v.reset(OpAMD64VPSRAWMasked512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg2(x, mask) + if !(canMergeLoad(v, l) && clobber(l)) { + break + } + v.reset(OpAMD64VSUBPSMasked512load) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg4(x, ptr, mask, mem) return true } return false diff --git a/src/simd/_gen/simdgen/gen_simdrules.go b/src/simd/_gen/simdgen/gen_simdrules.go index 8c31411113..c9fae4eed7 100644 --- a/src/simd/_gen/simdgen/gen_simdrules.go +++ b/src/simd/_gen/simdgen/gen_simdrules.go @@ -22,6 +22,9 @@ type tplRuleData struct { MaskInConvert string // e.g. "VPMOVVec32x8ToM" MaskOutConvert string // e.g. "VPMOVMToVec32x8" ElementSize int // e.g. 32 + Size int // e.g. 128 + ArgsLoadAddr string // [Args] with its last vreg arg being a concrete "(VMOVDQUload* ptr mem)", and might contain mask. + ArgsAddr string // [Args] with its last vreg arg being replaced by "ptr", and might contain mask, and with a "mem" at the end. } var ( @@ -38,6 +41,8 @@ var ( {{end}} {{define "masksftimm"}}({{.Asm}} x (MOVQconst [c]) mask) => ({{.Asm}}const [uint8(c)] x mask) {{end}} +{{define "vregMem"}}({{.Asm}} {{.ArgsLoadAddr}}) && canMergeLoad(v, l) && clobber(l) => ({{.Asm}}load {{.ArgsAddr}}) +{{end}} `)) ) @@ -85,6 +90,7 @@ var tmplOrder = map[string]int{ "maskOut": 3, "maskIn": 4, "pureVreg": 5, + "vregMem": 6, } func compareTplRuleData(x, y tplRuleData) int { @@ -118,7 +124,9 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer { buffer.WriteString(generatedHeader + "\n") var allData []tplRuleData - var optData []tplRuleData // for peephole optimizations + var optData []tplRuleData // for mask peephole optimizations, and other misc + var memOptData []tplRuleData // for memory peephole optimizations + memOpSeen := make(map[string]bool) for _, opr := range ops { opInShape, opOutShape, maskType, immType, gOp := opr.shape() @@ -228,6 +236,39 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer { panic("simdgen sees unknwon special lower " + *gOp.SpecialLower + ", maybe implement it?") } } + if gOp.MemFeatures != nil && *gOp.MemFeatures == "vbcst" && immType == NoImm { + // sanity check + selected := true + for _, a := range gOp.In { + if a.TreatLikeAScalarOfSize != nil { + selected = false + break + } + } + if _, ok := memOpSeen[data.Asm]; ok { + selected = false + } + if selected { + memOpSeen[data.Asm] = true + lastVreg := gOp.In[vregInCnt-1] + // sanity check + if lastVreg.Class != "vreg" { + panic(fmt.Errorf("simdgen expects vbcst replaced operand to be a vreg, but %v found", lastVreg)) + } + memOpData := data + // Remove the last vreg from the arg and change it to a load. + memOpData.ArgsLoadAddr = data.Args[:len(data.Args)-1] + fmt.Sprintf("l:(VMOVDQUload%d {sym} [off] ptr mem)", *lastVreg.Bits) + // Remove the last vreg from the arg and change it to "ptr". + memOpData.ArgsAddr = "{sym} [off] " + data.Args[:len(data.Args)-1] + "ptr" + if maskType == OneMask { + memOpData.ArgsAddr += " mask" + memOpData.ArgsLoadAddr += " mask" + } + memOpData.ArgsAddr += " mem" + memOpData.tplName = "vregMem" + memOptData = append(memOptData, memOpData) + } + } if tplName == "pureVreg" && data.Args == data.ArgsOut { data.Args = "..." @@ -262,5 +303,11 @@ func writeSIMDRules(ops []Operation) *bytes.Buffer { } } + for _, data := range memOptData { + if err := ruleTemplates.ExecuteTemplate(buffer, data.tplName, data); err != nil { + panic(fmt.Errorf("failed to execute template %s for %s: %w", data.tplName, data.Asm, err)) + } + } + return buffer } -- 2.52.0