From 08ab8e24a310944768717356e188a14c46c7447b Mon Sep 17 00:00:00 2001 From: David Chase Date: Tue, 12 Aug 2025 17:01:55 -0400 Subject: [PATCH] [dev.simd] cmd/compile: generated code from 'fix generated rules for shifts' this code is generated by simdgen CL 695455 Change-Id: I5afdc209a50b49d68e120130e0578e4666bf8749 Reviewed-on: https://go-review.googlesource.com/c/go/+/695475 LUCI-TryBot-Result: Go LUCI Reviewed-by: Junyang Shao --- .../compile/internal/ssa/_gen/simdAMD64.rules | 180 +- src/cmd/compile/internal/ssa/rewriteAMD64.go | 4130 +++++++---------- 2 files changed, 1777 insertions(+), 2533 deletions(-) diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules index abfa10020d..80cddaae79 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules @@ -1444,42 +1444,33 @@ (SetLoUint32x16 x y) => (VINSERTI64X4512 [0] x y) (SetLoUint64x4 x y) => (VINSERTI128256 [0] x y) (SetLoUint64x8 x y) => (VINSERTI64X4512 [0] x y) -(ShiftAllLeftInt16x8 x (MOVQconst [c])) => (VPSLLW128const [uint8(c)] x) -(ShiftAllLeftInt16x8 x y) => (VPSLLW128 x y) -(ShiftAllLeftInt16x16 x (MOVQconst [c])) => (VPSLLW256const [uint8(c)] x) -(ShiftAllLeftInt16x16 x y) => (VPSLLW256 x y) -(ShiftAllLeftInt16x32 x (MOVQconst [c])) => (VPSLLW512const [uint8(c)] x) -(ShiftAllLeftInt16x32 x y) => (VPSLLW512 x y) -(ShiftAllLeftInt32x4 x (MOVQconst [c])) => (VPSLLD128const [uint8(c)] x) -(ShiftAllLeftInt32x4 x y) => (VPSLLD128 x y) -(ShiftAllLeftInt32x8 x (MOVQconst [c])) => (VPSLLD256const [uint8(c)] x) -(ShiftAllLeftInt32x8 x y) => (VPSLLD256 x y) -(ShiftAllLeftInt32x16 x (MOVQconst [c])) => (VPSLLD512const [uint8(c)] x) -(ShiftAllLeftInt32x16 x y) => (VPSLLD512 x y) -(ShiftAllLeftInt64x2 x (MOVQconst [c])) => (VPSLLQ128const [uint8(c)] x) -(ShiftAllLeftInt64x2 x y) => (VPSLLQ128 x y) -(ShiftAllLeftInt64x4 x (MOVQconst [c])) => (VPSLLQ256const [uint8(c)] x) -(ShiftAllLeftInt64x4 x y) => (VPSLLQ256 x y) -(ShiftAllLeftInt64x8 x (MOVQconst [c])) => (VPSLLQ512const [uint8(c)] x) -(ShiftAllLeftInt64x8 x y) => (VPSLLQ512 x y) -(ShiftAllLeftUint16x8 x (MOVQconst [c])) => (VPSLLW128const [uint8(c)] x) -(ShiftAllLeftUint16x8 x y) => (VPSLLW128 x y) -(ShiftAllLeftUint16x16 x (MOVQconst [c])) => (VPSLLW256const [uint8(c)] x) -(ShiftAllLeftUint16x16 x y) => (VPSLLW256 x y) -(ShiftAllLeftUint16x32 x (MOVQconst [c])) => (VPSLLW512const [uint8(c)] x) -(ShiftAllLeftUint16x32 x y) => (VPSLLW512 x y) -(ShiftAllLeftUint32x4 x (MOVQconst [c])) => (VPSLLD128const [uint8(c)] x) -(ShiftAllLeftUint32x4 x y) => (VPSLLD128 x y) -(ShiftAllLeftUint32x8 x (MOVQconst [c])) => (VPSLLD256const [uint8(c)] x) -(ShiftAllLeftUint32x8 x y) => (VPSLLD256 x y) -(ShiftAllLeftUint32x16 x (MOVQconst [c])) => (VPSLLD512const [uint8(c)] x) -(ShiftAllLeftUint32x16 x y) => (VPSLLD512 x y) -(ShiftAllLeftUint64x2 x (MOVQconst [c])) => (VPSLLQ128const [uint8(c)] x) -(ShiftAllLeftUint64x2 x y) => (VPSLLQ128 x y) -(ShiftAllLeftUint64x4 x (MOVQconst [c])) => (VPSLLQ256const [uint8(c)] x) -(ShiftAllLeftUint64x4 x y) => (VPSLLQ256 x y) -(ShiftAllLeftUint64x8 x (MOVQconst [c])) => (VPSLLQ512const [uint8(c)] x) -(ShiftAllLeftUint64x8 x y) => (VPSLLQ512 x y) +(ShiftAllLeftInt16x8 ...) => (VPSLLW128 ...) +(VPSLLW128 x (MOVQconst [c])) => (VPSLLW128const [uint8(c)] x) +(ShiftAllLeftInt16x16 ...) => (VPSLLW256 ...) +(VPSLLW256 x (MOVQconst [c])) => (VPSLLW256const [uint8(c)] x) +(ShiftAllLeftInt16x32 ...) => (VPSLLW512 ...) +(VPSLLW512 x (MOVQconst [c])) => (VPSLLW512const [uint8(c)] x) +(ShiftAllLeftInt32x4 ...) => (VPSLLD128 ...) +(VPSLLD128 x (MOVQconst [c])) => (VPSLLD128const [uint8(c)] x) +(ShiftAllLeftInt32x8 ...) => (VPSLLD256 ...) +(VPSLLD256 x (MOVQconst [c])) => (VPSLLD256const [uint8(c)] x) +(ShiftAllLeftInt32x16 ...) => (VPSLLD512 ...) +(VPSLLD512 x (MOVQconst [c])) => (VPSLLD512const [uint8(c)] x) +(ShiftAllLeftInt64x2 ...) => (VPSLLQ128 ...) +(VPSLLQ128 x (MOVQconst [c])) => (VPSLLQ128const [uint8(c)] x) +(ShiftAllLeftInt64x4 ...) => (VPSLLQ256 ...) +(VPSLLQ256 x (MOVQconst [c])) => (VPSLLQ256const [uint8(c)] x) +(ShiftAllLeftInt64x8 ...) => (VPSLLQ512 ...) +(VPSLLQ512 x (MOVQconst [c])) => (VPSLLQ512const [uint8(c)] x) +(ShiftAllLeftUint16x8 ...) => (VPSLLW128 ...) +(ShiftAllLeftUint16x16 ...) => (VPSLLW256 ...) +(ShiftAllLeftUint16x32 ...) => (VPSLLW512 ...) +(ShiftAllLeftUint32x4 ...) => (VPSLLD128 ...) +(ShiftAllLeftUint32x8 ...) => (VPSLLD256 ...) +(ShiftAllLeftUint32x16 ...) => (VPSLLD512 ...) +(ShiftAllLeftUint64x2 ...) => (VPSLLQ128 ...) +(ShiftAllLeftUint64x4 ...) => (VPSLLQ256 ...) +(ShiftAllLeftUint64x8 ...) => (VPSLLQ512 ...) (ShiftAllLeftConcatInt16x8 ...) => (VPSHLDW128 ...) (ShiftAllLeftConcatInt16x16 ...) => (VPSHLDW256 ...) (ShiftAllLeftConcatInt16x32 ...) => (VPSHLDW512 ...) @@ -1516,78 +1507,60 @@ (ShiftAllLeftConcatMaskedUint64x2 [a] x y mask) => (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) (ShiftAllLeftConcatMaskedUint64x4 [a] x y mask) => (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) (ShiftAllLeftConcatMaskedUint64x8 [a] x y mask) => (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) -(ShiftAllLeftMaskedInt16x8 x (MOVQconst [c]) mask) => (VPSLLWMasked128const [uint8(c)] x (VPMOVVec16x8ToM mask)) +(VPSLLWMasked128 x (MOVQconst [c]) mask) => (VPSLLWMasked128const [uint8(c)] x mask) (ShiftAllLeftMaskedInt16x8 x y mask) => (VPSLLWMasked128 x y (VPMOVVec16x8ToM mask)) -(ShiftAllLeftMaskedInt16x16 x (MOVQconst [c]) mask) => (VPSLLWMasked256const [uint8(c)] x (VPMOVVec16x16ToM mask)) +(VPSLLWMasked256 x (MOVQconst [c]) mask) => (VPSLLWMasked256const [uint8(c)] x mask) (ShiftAllLeftMaskedInt16x16 x y mask) => (VPSLLWMasked256 x y (VPMOVVec16x16ToM mask)) -(ShiftAllLeftMaskedInt16x32 x (MOVQconst [c]) mask) => (VPSLLWMasked512const [uint8(c)] x (VPMOVVec16x32ToM mask)) +(VPSLLWMasked512 x (MOVQconst [c]) mask) => (VPSLLWMasked512const [uint8(c)] x mask) (ShiftAllLeftMaskedInt16x32 x y mask) => (VPSLLWMasked512 x y (VPMOVVec16x32ToM mask)) -(ShiftAllLeftMaskedInt32x4 x (MOVQconst [c]) mask) => (VPSLLDMasked128const [uint8(c)] x (VPMOVVec32x4ToM mask)) +(VPSLLDMasked128 x (MOVQconst [c]) mask) => (VPSLLDMasked128const [uint8(c)] x mask) (ShiftAllLeftMaskedInt32x4 x y mask) => (VPSLLDMasked128 x y (VPMOVVec32x4ToM mask)) -(ShiftAllLeftMaskedInt32x8 x (MOVQconst [c]) mask) => (VPSLLDMasked256const [uint8(c)] x (VPMOVVec32x8ToM mask)) +(VPSLLDMasked256 x (MOVQconst [c]) mask) => (VPSLLDMasked256const [uint8(c)] x mask) (ShiftAllLeftMaskedInt32x8 x y mask) => (VPSLLDMasked256 x y (VPMOVVec32x8ToM mask)) -(ShiftAllLeftMaskedInt32x16 x (MOVQconst [c]) mask) => (VPSLLDMasked512const [uint8(c)] x (VPMOVVec32x16ToM mask)) +(VPSLLDMasked512 x (MOVQconst [c]) mask) => (VPSLLDMasked512const [uint8(c)] x mask) (ShiftAllLeftMaskedInt32x16 x y mask) => (VPSLLDMasked512 x y (VPMOVVec32x16ToM mask)) -(ShiftAllLeftMaskedInt64x2 x (MOVQconst [c]) mask) => (VPSLLQMasked128const [uint8(c)] x (VPMOVVec64x2ToM mask)) +(VPSLLQMasked128 x (MOVQconst [c]) mask) => (VPSLLQMasked128const [uint8(c)] x mask) (ShiftAllLeftMaskedInt64x2 x y mask) => (VPSLLQMasked128 x y (VPMOVVec64x2ToM mask)) -(ShiftAllLeftMaskedInt64x4 x (MOVQconst [c]) mask) => (VPSLLQMasked256const [uint8(c)] x (VPMOVVec64x4ToM mask)) +(VPSLLQMasked256 x (MOVQconst [c]) mask) => (VPSLLQMasked256const [uint8(c)] x mask) (ShiftAllLeftMaskedInt64x4 x y mask) => (VPSLLQMasked256 x y (VPMOVVec64x4ToM mask)) -(ShiftAllLeftMaskedInt64x8 x (MOVQconst [c]) mask) => (VPSLLQMasked512const [uint8(c)] x (VPMOVVec64x8ToM mask)) +(VPSLLQMasked512 x (MOVQconst [c]) mask) => (VPSLLQMasked512const [uint8(c)] x mask) (ShiftAllLeftMaskedInt64x8 x y mask) => (VPSLLQMasked512 x y (VPMOVVec64x8ToM mask)) -(ShiftAllLeftMaskedUint16x8 x (MOVQconst [c]) mask) => (VPSLLWMasked128const [uint8(c)] x (VPMOVVec16x8ToM mask)) (ShiftAllLeftMaskedUint16x8 x y mask) => (VPSLLWMasked128 x y (VPMOVVec16x8ToM mask)) -(ShiftAllLeftMaskedUint16x16 x (MOVQconst [c]) mask) => (VPSLLWMasked256const [uint8(c)] x (VPMOVVec16x16ToM mask)) (ShiftAllLeftMaskedUint16x16 x y mask) => (VPSLLWMasked256 x y (VPMOVVec16x16ToM mask)) -(ShiftAllLeftMaskedUint16x32 x (MOVQconst [c]) mask) => (VPSLLWMasked512const [uint8(c)] x (VPMOVVec16x32ToM mask)) (ShiftAllLeftMaskedUint16x32 x y mask) => (VPSLLWMasked512 x y (VPMOVVec16x32ToM mask)) -(ShiftAllLeftMaskedUint32x4 x (MOVQconst [c]) mask) => (VPSLLDMasked128const [uint8(c)] x (VPMOVVec32x4ToM mask)) (ShiftAllLeftMaskedUint32x4 x y mask) => (VPSLLDMasked128 x y (VPMOVVec32x4ToM mask)) -(ShiftAllLeftMaskedUint32x8 x (MOVQconst [c]) mask) => (VPSLLDMasked256const [uint8(c)] x (VPMOVVec32x8ToM mask)) (ShiftAllLeftMaskedUint32x8 x y mask) => (VPSLLDMasked256 x y (VPMOVVec32x8ToM mask)) -(ShiftAllLeftMaskedUint32x16 x (MOVQconst [c]) mask) => (VPSLLDMasked512const [uint8(c)] x (VPMOVVec32x16ToM mask)) (ShiftAllLeftMaskedUint32x16 x y mask) => (VPSLLDMasked512 x y (VPMOVVec32x16ToM mask)) -(ShiftAllLeftMaskedUint64x2 x (MOVQconst [c]) mask) => (VPSLLQMasked128const [uint8(c)] x (VPMOVVec64x2ToM mask)) (ShiftAllLeftMaskedUint64x2 x y mask) => (VPSLLQMasked128 x y (VPMOVVec64x2ToM mask)) -(ShiftAllLeftMaskedUint64x4 x (MOVQconst [c]) mask) => (VPSLLQMasked256const [uint8(c)] x (VPMOVVec64x4ToM mask)) (ShiftAllLeftMaskedUint64x4 x y mask) => (VPSLLQMasked256 x y (VPMOVVec64x4ToM mask)) -(ShiftAllLeftMaskedUint64x8 x (MOVQconst [c]) mask) => (VPSLLQMasked512const [uint8(c)] x (VPMOVVec64x8ToM mask)) (ShiftAllLeftMaskedUint64x8 x y mask) => (VPSLLQMasked512 x y (VPMOVVec64x8ToM mask)) -(ShiftAllRightInt16x8 x (MOVQconst [c])) => (VPSRAW128const [uint8(c)] x) -(ShiftAllRightInt16x8 x y) => (VPSRAW128 x y) -(ShiftAllRightInt16x16 x (MOVQconst [c])) => (VPSRAW256const [uint8(c)] x) -(ShiftAllRightInt16x16 x y) => (VPSRAW256 x y) -(ShiftAllRightInt16x32 x (MOVQconst [c])) => (VPSRAW512const [uint8(c)] x) -(ShiftAllRightInt16x32 x y) => (VPSRAW512 x y) -(ShiftAllRightInt32x4 x (MOVQconst [c])) => (VPSRAD128const [uint8(c)] x) -(ShiftAllRightInt32x4 x y) => (VPSRAD128 x y) -(ShiftAllRightInt32x8 x (MOVQconst [c])) => (VPSRAD256const [uint8(c)] x) -(ShiftAllRightInt32x8 x y) => (VPSRAD256 x y) -(ShiftAllRightInt32x16 x (MOVQconst [c])) => (VPSRAD512const [uint8(c)] x) -(ShiftAllRightInt32x16 x y) => (VPSRAD512 x y) -(ShiftAllRightInt64x2 x (MOVQconst [c])) => (VPSRAQ128const [uint8(c)] x) -(ShiftAllRightInt64x2 x y) => (VPSRAQ128 x y) -(ShiftAllRightInt64x4 x (MOVQconst [c])) => (VPSRAQ256const [uint8(c)] x) -(ShiftAllRightInt64x4 x y) => (VPSRAQ256 x y) -(ShiftAllRightInt64x8 x (MOVQconst [c])) => (VPSRAQ512const [uint8(c)] x) -(ShiftAllRightInt64x8 x y) => (VPSRAQ512 x y) -(ShiftAllRightUint16x8 x (MOVQconst [c])) => (VPSRLW128const [uint8(c)] x) -(ShiftAllRightUint16x8 x y) => (VPSRLW128 x y) -(ShiftAllRightUint16x16 x (MOVQconst [c])) => (VPSRLW256const [uint8(c)] x) -(ShiftAllRightUint16x16 x y) => (VPSRLW256 x y) -(ShiftAllRightUint16x32 x (MOVQconst [c])) => (VPSRLW512const [uint8(c)] x) -(ShiftAllRightUint16x32 x y) => (VPSRLW512 x y) -(ShiftAllRightUint32x4 x (MOVQconst [c])) => (VPSRLD128const [uint8(c)] x) -(ShiftAllRightUint32x4 x y) => (VPSRLD128 x y) -(ShiftAllRightUint32x8 x (MOVQconst [c])) => (VPSRLD256const [uint8(c)] x) -(ShiftAllRightUint32x8 x y) => (VPSRLD256 x y) -(ShiftAllRightUint32x16 x (MOVQconst [c])) => (VPSRLD512const [uint8(c)] x) -(ShiftAllRightUint32x16 x y) => (VPSRLD512 x y) -(ShiftAllRightUint64x2 x (MOVQconst [c])) => (VPSRLQ128const [uint8(c)] x) -(ShiftAllRightUint64x2 x y) => (VPSRLQ128 x y) -(ShiftAllRightUint64x4 x (MOVQconst [c])) => (VPSRLQ256const [uint8(c)] x) -(ShiftAllRightUint64x4 x y) => (VPSRLQ256 x y) -(ShiftAllRightUint64x8 x (MOVQconst [c])) => (VPSRLQ512const [uint8(c)] x) -(ShiftAllRightUint64x8 x y) => (VPSRLQ512 x y) +(ShiftAllRightInt16x8 ...) => (VPSRAW128 ...) +(VPSRAW128 x (MOVQconst [c])) => (VPSRAW128const [uint8(c)] x) +(ShiftAllRightInt16x16 ...) => (VPSRAW256 ...) +(VPSRAW256 x (MOVQconst [c])) => (VPSRAW256const [uint8(c)] x) +(ShiftAllRightInt16x32 ...) => (VPSRAW512 ...) +(VPSRAW512 x (MOVQconst [c])) => (VPSRAW512const [uint8(c)] x) +(ShiftAllRightInt32x4 ...) => (VPSRAD128 ...) +(VPSRAD128 x (MOVQconst [c])) => (VPSRAD128const [uint8(c)] x) +(ShiftAllRightInt32x8 ...) => (VPSRAD256 ...) +(VPSRAD256 x (MOVQconst [c])) => (VPSRAD256const [uint8(c)] x) +(ShiftAllRightInt32x16 ...) => (VPSRAD512 ...) +(VPSRAD512 x (MOVQconst [c])) => (VPSRAD512const [uint8(c)] x) +(ShiftAllRightInt64x2 ...) => (VPSRAQ128 ...) +(VPSRAQ128 x (MOVQconst [c])) => (VPSRAQ128const [uint8(c)] x) +(ShiftAllRightInt64x4 ...) => (VPSRAQ256 ...) +(VPSRAQ256 x (MOVQconst [c])) => (VPSRAQ256const [uint8(c)] x) +(ShiftAllRightInt64x8 ...) => (VPSRAQ512 ...) +(VPSRAQ512 x (MOVQconst [c])) => (VPSRAQ512const [uint8(c)] x) +(ShiftAllRightUint16x8 ...) => (VPSRLW128 ...) +(ShiftAllRightUint16x16 ...) => (VPSRLW256 ...) +(ShiftAllRightUint16x32 ...) => (VPSRLW512 ...) +(ShiftAllRightUint32x4 ...) => (VPSRLD128 ...) +(ShiftAllRightUint32x8 ...) => (VPSRLD256 ...) +(ShiftAllRightUint32x16 ...) => (VPSRLD512 ...) +(ShiftAllRightUint64x2 ...) => (VPSRLQ128 ...) +(ShiftAllRightUint64x4 ...) => (VPSRLQ256 ...) +(ShiftAllRightUint64x8 ...) => (VPSRLQ512 ...) (ShiftAllRightConcatInt16x8 ...) => (VPSHRDW128 ...) (ShiftAllRightConcatInt16x16 ...) => (VPSHRDW256 ...) (ShiftAllRightConcatInt16x32 ...) => (VPSHRDW512 ...) @@ -1624,41 +1597,32 @@ (ShiftAllRightConcatMaskedUint64x2 [a] x y mask) => (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) (ShiftAllRightConcatMaskedUint64x4 [a] x y mask) => (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) (ShiftAllRightConcatMaskedUint64x8 [a] x y mask) => (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) -(ShiftAllRightMaskedInt16x8 x (MOVQconst [c]) mask) => (VPSRAWMasked128const [uint8(c)] x (VPMOVVec16x8ToM mask)) +(VPSRAWMasked128 x (MOVQconst [c]) mask) => (VPSRAWMasked128const [uint8(c)] x mask) (ShiftAllRightMaskedInt16x8 x y mask) => (VPSRAWMasked128 x y (VPMOVVec16x8ToM mask)) -(ShiftAllRightMaskedInt16x16 x (MOVQconst [c]) mask) => (VPSRAWMasked256const [uint8(c)] x (VPMOVVec16x16ToM mask)) +(VPSRAWMasked256 x (MOVQconst [c]) mask) => (VPSRAWMasked256const [uint8(c)] x mask) (ShiftAllRightMaskedInt16x16 x y mask) => (VPSRAWMasked256 x y (VPMOVVec16x16ToM mask)) -(ShiftAllRightMaskedInt16x32 x (MOVQconst [c]) mask) => (VPSRAWMasked512const [uint8(c)] x (VPMOVVec16x32ToM mask)) +(VPSRAWMasked512 x (MOVQconst [c]) mask) => (VPSRAWMasked512const [uint8(c)] x mask) (ShiftAllRightMaskedInt16x32 x y mask) => (VPSRAWMasked512 x y (VPMOVVec16x32ToM mask)) -(ShiftAllRightMaskedInt32x4 x (MOVQconst [c]) mask) => (VPSRADMasked128const [uint8(c)] x (VPMOVVec32x4ToM mask)) +(VPSRADMasked128 x (MOVQconst [c]) mask) => (VPSRADMasked128const [uint8(c)] x mask) (ShiftAllRightMaskedInt32x4 x y mask) => (VPSRADMasked128 x y (VPMOVVec32x4ToM mask)) -(ShiftAllRightMaskedInt32x8 x (MOVQconst [c]) mask) => (VPSRADMasked256const [uint8(c)] x (VPMOVVec32x8ToM mask)) +(VPSRADMasked256 x (MOVQconst [c]) mask) => (VPSRADMasked256const [uint8(c)] x mask) (ShiftAllRightMaskedInt32x8 x y mask) => (VPSRADMasked256 x y (VPMOVVec32x8ToM mask)) -(ShiftAllRightMaskedInt32x16 x (MOVQconst [c]) mask) => (VPSRADMasked512const [uint8(c)] x (VPMOVVec32x16ToM mask)) +(VPSRADMasked512 x (MOVQconst [c]) mask) => (VPSRADMasked512const [uint8(c)] x mask) (ShiftAllRightMaskedInt32x16 x y mask) => (VPSRADMasked512 x y (VPMOVVec32x16ToM mask)) -(ShiftAllRightMaskedInt64x2 x (MOVQconst [c]) mask) => (VPSRAQMasked128const [uint8(c)] x (VPMOVVec64x2ToM mask)) +(VPSRAQMasked128 x (MOVQconst [c]) mask) => (VPSRAQMasked128const [uint8(c)] x mask) (ShiftAllRightMaskedInt64x2 x y mask) => (VPSRAQMasked128 x y (VPMOVVec64x2ToM mask)) -(ShiftAllRightMaskedInt64x4 x (MOVQconst [c]) mask) => (VPSRAQMasked256const [uint8(c)] x (VPMOVVec64x4ToM mask)) +(VPSRAQMasked256 x (MOVQconst [c]) mask) => (VPSRAQMasked256const [uint8(c)] x mask) (ShiftAllRightMaskedInt64x4 x y mask) => (VPSRAQMasked256 x y (VPMOVVec64x4ToM mask)) -(ShiftAllRightMaskedInt64x8 x (MOVQconst [c]) mask) => (VPSRAQMasked512const [uint8(c)] x (VPMOVVec64x8ToM mask)) +(VPSRAQMasked512 x (MOVQconst [c]) mask) => (VPSRAQMasked512const [uint8(c)] x mask) (ShiftAllRightMaskedInt64x8 x y mask) => (VPSRAQMasked512 x y (VPMOVVec64x8ToM mask)) -(ShiftAllRightMaskedUint16x8 x (MOVQconst [c]) mask) => (VPSRLWMasked128const [uint8(c)] x (VPMOVVec16x8ToM mask)) (ShiftAllRightMaskedUint16x8 x y mask) => (VPSRLWMasked128 x y (VPMOVVec16x8ToM mask)) -(ShiftAllRightMaskedUint16x16 x (MOVQconst [c]) mask) => (VPSRLWMasked256const [uint8(c)] x (VPMOVVec16x16ToM mask)) (ShiftAllRightMaskedUint16x16 x y mask) => (VPSRLWMasked256 x y (VPMOVVec16x16ToM mask)) -(ShiftAllRightMaskedUint16x32 x (MOVQconst [c]) mask) => (VPSRLWMasked512const [uint8(c)] x (VPMOVVec16x32ToM mask)) (ShiftAllRightMaskedUint16x32 x y mask) => (VPSRLWMasked512 x y (VPMOVVec16x32ToM mask)) -(ShiftAllRightMaskedUint32x4 x (MOVQconst [c]) mask) => (VPSRLDMasked128const [uint8(c)] x (VPMOVVec32x4ToM mask)) (ShiftAllRightMaskedUint32x4 x y mask) => (VPSRLDMasked128 x y (VPMOVVec32x4ToM mask)) -(ShiftAllRightMaskedUint32x8 x (MOVQconst [c]) mask) => (VPSRLDMasked256const [uint8(c)] x (VPMOVVec32x8ToM mask)) (ShiftAllRightMaskedUint32x8 x y mask) => (VPSRLDMasked256 x y (VPMOVVec32x8ToM mask)) -(ShiftAllRightMaskedUint32x16 x (MOVQconst [c]) mask) => (VPSRLDMasked512const [uint8(c)] x (VPMOVVec32x16ToM mask)) (ShiftAllRightMaskedUint32x16 x y mask) => (VPSRLDMasked512 x y (VPMOVVec32x16ToM mask)) -(ShiftAllRightMaskedUint64x2 x (MOVQconst [c]) mask) => (VPSRLQMasked128const [uint8(c)] x (VPMOVVec64x2ToM mask)) (ShiftAllRightMaskedUint64x2 x y mask) => (VPSRLQMasked128 x y (VPMOVVec64x2ToM mask)) -(ShiftAllRightMaskedUint64x4 x (MOVQconst [c]) mask) => (VPSRLQMasked256const [uint8(c)] x (VPMOVVec64x4ToM mask)) (ShiftAllRightMaskedUint64x4 x y mask) => (VPSRLQMasked256 x y (VPMOVVec64x4ToM mask)) -(ShiftAllRightMaskedUint64x8 x (MOVQconst [c]) mask) => (VPSRLQMasked512const [uint8(c)] x (VPMOVVec64x8ToM mask)) (ShiftAllRightMaskedUint64x8 x y mask) => (VPSRLQMasked512 x y (VPMOVVec64x8ToM mask)) (ShiftLeftInt16x8 ...) => (VPSLLVW128 ...) (ShiftLeftInt16x16 ...) => (VPSLLVW256 ...) diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index fbe8a448d8..c5367adefe 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -531,6 +531,78 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64VPMOVVec8x32ToM(v) case OpAMD64VPMOVVec8x64ToM: return rewriteValueAMD64_OpAMD64VPMOVVec8x64ToM(v) + case OpAMD64VPSLLD128: + return rewriteValueAMD64_OpAMD64VPSLLD128(v) + case OpAMD64VPSLLD256: + return rewriteValueAMD64_OpAMD64VPSLLD256(v) + case OpAMD64VPSLLD512: + return rewriteValueAMD64_OpAMD64VPSLLD512(v) + case OpAMD64VPSLLDMasked128: + return rewriteValueAMD64_OpAMD64VPSLLDMasked128(v) + case OpAMD64VPSLLDMasked256: + return rewriteValueAMD64_OpAMD64VPSLLDMasked256(v) + case OpAMD64VPSLLDMasked512: + return rewriteValueAMD64_OpAMD64VPSLLDMasked512(v) + case OpAMD64VPSLLQ128: + return rewriteValueAMD64_OpAMD64VPSLLQ128(v) + case OpAMD64VPSLLQ256: + return rewriteValueAMD64_OpAMD64VPSLLQ256(v) + case OpAMD64VPSLLQ512: + return rewriteValueAMD64_OpAMD64VPSLLQ512(v) + case OpAMD64VPSLLQMasked128: + return rewriteValueAMD64_OpAMD64VPSLLQMasked128(v) + case OpAMD64VPSLLQMasked256: + return rewriteValueAMD64_OpAMD64VPSLLQMasked256(v) + case OpAMD64VPSLLQMasked512: + return rewriteValueAMD64_OpAMD64VPSLLQMasked512(v) + case OpAMD64VPSLLW128: + return rewriteValueAMD64_OpAMD64VPSLLW128(v) + case OpAMD64VPSLLW256: + return rewriteValueAMD64_OpAMD64VPSLLW256(v) + case OpAMD64VPSLLW512: + return rewriteValueAMD64_OpAMD64VPSLLW512(v) + case OpAMD64VPSLLWMasked128: + return rewriteValueAMD64_OpAMD64VPSLLWMasked128(v) + case OpAMD64VPSLLWMasked256: + return rewriteValueAMD64_OpAMD64VPSLLWMasked256(v) + case OpAMD64VPSLLWMasked512: + return rewriteValueAMD64_OpAMD64VPSLLWMasked512(v) + case OpAMD64VPSRAD128: + return rewriteValueAMD64_OpAMD64VPSRAD128(v) + case OpAMD64VPSRAD256: + return rewriteValueAMD64_OpAMD64VPSRAD256(v) + case OpAMD64VPSRAD512: + return rewriteValueAMD64_OpAMD64VPSRAD512(v) + case OpAMD64VPSRADMasked128: + return rewriteValueAMD64_OpAMD64VPSRADMasked128(v) + case OpAMD64VPSRADMasked256: + return rewriteValueAMD64_OpAMD64VPSRADMasked256(v) + case OpAMD64VPSRADMasked512: + return rewriteValueAMD64_OpAMD64VPSRADMasked512(v) + case OpAMD64VPSRAQ128: + return rewriteValueAMD64_OpAMD64VPSRAQ128(v) + case OpAMD64VPSRAQ256: + return rewriteValueAMD64_OpAMD64VPSRAQ256(v) + case OpAMD64VPSRAQ512: + return rewriteValueAMD64_OpAMD64VPSRAQ512(v) + case OpAMD64VPSRAQMasked128: + return rewriteValueAMD64_OpAMD64VPSRAQMasked128(v) + case OpAMD64VPSRAQMasked256: + return rewriteValueAMD64_OpAMD64VPSRAQMasked256(v) + case OpAMD64VPSRAQMasked512: + return rewriteValueAMD64_OpAMD64VPSRAQMasked512(v) + case OpAMD64VPSRAW128: + return rewriteValueAMD64_OpAMD64VPSRAW128(v) + case OpAMD64VPSRAW256: + return rewriteValueAMD64_OpAMD64VPSRAW256(v) + case OpAMD64VPSRAW512: + return rewriteValueAMD64_OpAMD64VPSRAW512(v) + case OpAMD64VPSRAWMasked128: + return rewriteValueAMD64_OpAMD64VPSRAWMasked128(v) + case OpAMD64VPSRAWMasked256: + return rewriteValueAMD64_OpAMD64VPSRAWMasked256(v) + case OpAMD64VPSRAWMasked512: + return rewriteValueAMD64_OpAMD64VPSRAWMasked512(v) case OpAMD64XADDLlock: return rewriteValueAMD64_OpAMD64XADDLlock(v) case OpAMD64XADDQlock: @@ -4662,23 +4734,32 @@ func rewriteValueAMD64(v *Value) bool { v.Op = OpAMD64VPSHLDQ512 return true case OpShiftAllLeftInt16x16: - return rewriteValueAMD64_OpShiftAllLeftInt16x16(v) + v.Op = OpAMD64VPSLLW256 + return true case OpShiftAllLeftInt16x32: - return rewriteValueAMD64_OpShiftAllLeftInt16x32(v) + v.Op = OpAMD64VPSLLW512 + return true case OpShiftAllLeftInt16x8: - return rewriteValueAMD64_OpShiftAllLeftInt16x8(v) + v.Op = OpAMD64VPSLLW128 + return true case OpShiftAllLeftInt32x16: - return rewriteValueAMD64_OpShiftAllLeftInt32x16(v) + v.Op = OpAMD64VPSLLD512 + return true case OpShiftAllLeftInt32x4: - return rewriteValueAMD64_OpShiftAllLeftInt32x4(v) + v.Op = OpAMD64VPSLLD128 + return true case OpShiftAllLeftInt32x8: - return rewriteValueAMD64_OpShiftAllLeftInt32x8(v) + v.Op = OpAMD64VPSLLD256 + return true case OpShiftAllLeftInt64x2: - return rewriteValueAMD64_OpShiftAllLeftInt64x2(v) + v.Op = OpAMD64VPSLLQ128 + return true case OpShiftAllLeftInt64x4: - return rewriteValueAMD64_OpShiftAllLeftInt64x4(v) + v.Op = OpAMD64VPSLLQ256 + return true case OpShiftAllLeftInt64x8: - return rewriteValueAMD64_OpShiftAllLeftInt64x8(v) + v.Op = OpAMD64VPSLLQ512 + return true case OpShiftAllLeftMaskedInt16x16: return rewriteValueAMD64_OpShiftAllLeftMaskedInt16x16(v) case OpShiftAllLeftMaskedInt16x32: @@ -4716,23 +4797,32 @@ func rewriteValueAMD64(v *Value) bool { case OpShiftAllLeftMaskedUint64x8: return rewriteValueAMD64_OpShiftAllLeftMaskedUint64x8(v) case OpShiftAllLeftUint16x16: - return rewriteValueAMD64_OpShiftAllLeftUint16x16(v) + v.Op = OpAMD64VPSLLW256 + return true case OpShiftAllLeftUint16x32: - return rewriteValueAMD64_OpShiftAllLeftUint16x32(v) + v.Op = OpAMD64VPSLLW512 + return true case OpShiftAllLeftUint16x8: - return rewriteValueAMD64_OpShiftAllLeftUint16x8(v) + v.Op = OpAMD64VPSLLW128 + return true case OpShiftAllLeftUint32x16: - return rewriteValueAMD64_OpShiftAllLeftUint32x16(v) + v.Op = OpAMD64VPSLLD512 + return true case OpShiftAllLeftUint32x4: - return rewriteValueAMD64_OpShiftAllLeftUint32x4(v) + v.Op = OpAMD64VPSLLD128 + return true case OpShiftAllLeftUint32x8: - return rewriteValueAMD64_OpShiftAllLeftUint32x8(v) + v.Op = OpAMD64VPSLLD256 + return true case OpShiftAllLeftUint64x2: - return rewriteValueAMD64_OpShiftAllLeftUint64x2(v) + v.Op = OpAMD64VPSLLQ128 + return true case OpShiftAllLeftUint64x4: - return rewriteValueAMD64_OpShiftAllLeftUint64x4(v) + v.Op = OpAMD64VPSLLQ256 + return true case OpShiftAllLeftUint64x8: - return rewriteValueAMD64_OpShiftAllLeftUint64x8(v) + v.Op = OpAMD64VPSLLQ512 + return true case OpShiftAllRightConcatInt16x16: v.Op = OpAMD64VPSHRDW256 return true @@ -4824,23 +4914,32 @@ func rewriteValueAMD64(v *Value) bool { v.Op = OpAMD64VPSHRDQ512 return true case OpShiftAllRightInt16x16: - return rewriteValueAMD64_OpShiftAllRightInt16x16(v) + v.Op = OpAMD64VPSRAW256 + return true case OpShiftAllRightInt16x32: - return rewriteValueAMD64_OpShiftAllRightInt16x32(v) + v.Op = OpAMD64VPSRAW512 + return true case OpShiftAllRightInt16x8: - return rewriteValueAMD64_OpShiftAllRightInt16x8(v) + v.Op = OpAMD64VPSRAW128 + return true case OpShiftAllRightInt32x16: - return rewriteValueAMD64_OpShiftAllRightInt32x16(v) + v.Op = OpAMD64VPSRAD512 + return true case OpShiftAllRightInt32x4: - return rewriteValueAMD64_OpShiftAllRightInt32x4(v) + v.Op = OpAMD64VPSRAD128 + return true case OpShiftAllRightInt32x8: - return rewriteValueAMD64_OpShiftAllRightInt32x8(v) + v.Op = OpAMD64VPSRAD256 + return true case OpShiftAllRightInt64x2: - return rewriteValueAMD64_OpShiftAllRightInt64x2(v) + v.Op = OpAMD64VPSRAQ128 + return true case OpShiftAllRightInt64x4: - return rewriteValueAMD64_OpShiftAllRightInt64x4(v) + v.Op = OpAMD64VPSRAQ256 + return true case OpShiftAllRightInt64x8: - return rewriteValueAMD64_OpShiftAllRightInt64x8(v) + v.Op = OpAMD64VPSRAQ512 + return true case OpShiftAllRightMaskedInt16x16: return rewriteValueAMD64_OpShiftAllRightMaskedInt16x16(v) case OpShiftAllRightMaskedInt16x32: @@ -4878,23 +4977,32 @@ func rewriteValueAMD64(v *Value) bool { case OpShiftAllRightMaskedUint64x8: return rewriteValueAMD64_OpShiftAllRightMaskedUint64x8(v) case OpShiftAllRightUint16x16: - return rewriteValueAMD64_OpShiftAllRightUint16x16(v) + v.Op = OpAMD64VPSRLW256 + return true case OpShiftAllRightUint16x32: - return rewriteValueAMD64_OpShiftAllRightUint16x32(v) + v.Op = OpAMD64VPSRLW512 + return true case OpShiftAllRightUint16x8: - return rewriteValueAMD64_OpShiftAllRightUint16x8(v) + v.Op = OpAMD64VPSRLW128 + return true case OpShiftAllRightUint32x16: - return rewriteValueAMD64_OpShiftAllRightUint32x16(v) + v.Op = OpAMD64VPSRLD512 + return true case OpShiftAllRightUint32x4: - return rewriteValueAMD64_OpShiftAllRightUint32x4(v) + v.Op = OpAMD64VPSRLD128 + return true case OpShiftAllRightUint32x8: - return rewriteValueAMD64_OpShiftAllRightUint32x8(v) + v.Op = OpAMD64VPSRLD256 + return true case OpShiftAllRightUint64x2: - return rewriteValueAMD64_OpShiftAllRightUint64x2(v) + v.Op = OpAMD64VPSRLQ128 + return true case OpShiftAllRightUint64x4: - return rewriteValueAMD64_OpShiftAllRightUint64x4(v) + v.Op = OpAMD64VPSRLQ256 + return true case OpShiftAllRightUint64x8: - return rewriteValueAMD64_OpShiftAllRightUint64x8(v) + v.Op = OpAMD64VPSRLQ512 + return true case OpShiftLeftConcatInt16x16: v.Op = OpAMD64VPSHLDVW256 return true @@ -27713,416 +27821,1100 @@ func rewriteValueAMD64_OpAMD64VPMOVVec8x64ToM(v *Value) bool { } return false } -func rewriteValueAMD64_OpAMD64XADDLlock(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPSLLD128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (XADDLlock [off1] {sym} val (ADDQconst [off2] ptr) mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (XADDLlock [off1+off2] {sym} val ptr mem) + // match: (VPSLLD128 x (MOVQconst [c])) + // result: (VPSLLD128const [uint8(c)] x) for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64ADDQconst { - break - } - off2 := auxIntToInt32(v_1.AuxInt) - ptr := v_1.Args[0] - mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - v.reset(OpAMD64XADDLlock) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(sym) - v.AddArg3(val, ptr, mem) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLD128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } return false } -func rewriteValueAMD64_OpAMD64XADDQlock(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPSLLD256(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (XADDQlock [off1] {sym} val (ADDQconst [off2] ptr) mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (XADDQlock [off1+off2] {sym} val ptr mem) + // match: (VPSLLD256 x (MOVQconst [c])) + // result: (VPSLLD256const [uint8(c)] x) for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64ADDQconst { - break - } - off2 := auxIntToInt32(v_1.AuxInt) - ptr := v_1.Args[0] - mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - v.reset(OpAMD64XADDQlock) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(sym) - v.AddArg3(val, ptr, mem) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLD256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } return false } -func rewriteValueAMD64_OpAMD64XCHGL(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpAMD64VPSLLD512(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (XCHGL [off1] {sym} val (ADDQconst [off2] ptr) mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (XCHGL [off1+off2] {sym} val ptr mem) + // match: (VPSLLD512 x (MOVQconst [c])) + // result: (VPSLLD512const [uint8(c)] x) for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64ADDQconst { - break - } - off2 := auxIntToInt32(v_1.AuxInt) - ptr := v_1.Args[0] - mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - v.reset(OpAMD64XCHGL) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(sym) - v.AddArg3(val, ptr, mem) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLD512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } - // match: (XCHGL [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && ptr.Op != OpSB - // result: (XCHGL [off1+off2] {mergeSym(sym1,sym2)} val ptr mem) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLDMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLDMasked128 x (MOVQconst [c]) mask) + // result: (VPSLLDMasked128const [uint8(c)] x mask) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64LEAQ { - break - } - off2 := auxIntToInt32(v_1.AuxInt) - sym2 := auxToSym(v_1.Aux) - ptr := v_1.Args[0] - mem := v_2 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && ptr.Op != OpSB) { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - v.reset(OpAMD64XCHGL) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(val, ptr, mem) + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLDMasked128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } return false } -func rewriteValueAMD64_OpAMD64XCHGQ(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLDMasked256(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (XCHGQ [off1] {sym} val (ADDQconst [off2] ptr) mem) - // cond: is32Bit(int64(off1)+int64(off2)) - // result: (XCHGQ [off1+off2] {sym} val ptr mem) + // match: (VPSLLDMasked256 x (MOVQconst [c]) mask) + // result: (VPSLLDMasked256const [uint8(c)] x mask) for { - off1 := auxIntToInt32(v.AuxInt) - sym := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64ADDQconst { - break - } - off2 := auxIntToInt32(v_1.AuxInt) - ptr := v_1.Args[0] - mem := v_2 - if !(is32Bit(int64(off1) + int64(off2))) { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - v.reset(OpAMD64XCHGQ) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(sym) - v.AddArg3(val, ptr, mem) + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLDMasked256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } - // match: (XCHGQ [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) - // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && ptr.Op != OpSB - // result: (XCHGQ [off1+off2] {mergeSym(sym1,sym2)} val ptr mem) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLDMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLDMasked512 x (MOVQconst [c]) mask) + // result: (VPSLLDMasked512const [uint8(c)] x mask) for { - off1 := auxIntToInt32(v.AuxInt) - sym1 := auxToSym(v.Aux) - val := v_0 - if v_1.Op != OpAMD64LEAQ { - break - } - off2 := auxIntToInt32(v_1.AuxInt) - sym2 := auxToSym(v_1.Aux) - ptr := v_1.Args[0] - mem := v_2 - if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && ptr.Op != OpSB) { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - v.reset(OpAMD64XCHGQ) - v.AuxInt = int32ToAuxInt(off1 + off2) - v.Aux = symToAux(mergeSym(sym1, sym2)) - v.AddArg3(val, ptr, mem) + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLDMasked512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } return false } -func rewriteValueAMD64_OpAMD64XORL(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLQ128(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (XORL (SHLL (MOVLconst [1]) y) x) - // result: (BTCL x y) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - if v_0.Op != OpAMD64SHLL { - continue - } - y := v_0.Args[1] - v_0_0 := v_0.Args[0] - if v_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0.AuxInt) != 1 { - continue - } - x := v_1 - v.reset(OpAMD64BTCL) - v.AddArg2(x, y) - return true - } - break - } - // match: (XORL x (MOVLconst [c])) - // result: (XORLconst [c] x) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpAMD64MOVLconst { - continue - } - c := auxIntToInt32(v_1.AuxInt) - v.reset(OpAMD64XORLconst) - v.AuxInt = int32ToAuxInt(c) - v.AddArg(x) - return true - } - break - } - // match: (XORL x x) - // result: (MOVLconst [0]) + // match: (VPSLLQ128 x (MOVQconst [c])) + // result: (VPSLLQ128const [uint8(c)] x) for { x := v_0 - if x != v_1 { + if v_1.Op != OpAMD64MOVQconst { break } - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(0) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLQ128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } - // match: (XORL x l:(MOVLload [off] {sym} ptr mem)) - // cond: canMergeLoadClobber(v, l, x) && clobber(l) - // result: (XORLload x [off] {sym} ptr mem) - for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - l := v_1 - if l.Op != OpAMD64MOVLload { - continue - } - off := auxIntToInt32(l.AuxInt) - sym := auxToSym(l.Aux) - mem := l.Args[1] - ptr := l.Args[0] - if !(canMergeLoadClobber(v, l, x) && clobber(l)) { - continue - } - v.reset(OpAMD64XORLload) - v.AuxInt = int32ToAuxInt(off) - v.Aux = symToAux(sym) - v.AddArg3(x, ptr, mem) - return true - } - break - } - // match: (XORL x (ADDLconst [-1] x)) - // cond: buildcfg.GOAMD64 >= 3 - // result: (BLSMSKL x) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLQ256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLQ256 x (MOVQconst [c])) + // result: (VPSLLQ256const [uint8(c)] x) for { - for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { - x := v_0 - if v_1.Op != OpAMD64ADDLconst || auxIntToInt32(v_1.AuxInt) != -1 || x != v_1.Args[0] || !(buildcfg.GOAMD64 >= 3) { - continue - } - v.reset(OpAMD64BLSMSKL) - v.AddArg(x) - return true + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break } - break + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLQ256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true } return false } -func rewriteValueAMD64_OpAMD64XORLconst(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSLLQ512(v *Value) bool { + v_1 := v.Args[1] v_0 := v.Args[0] - // match: (XORLconst [1] (SETNE x)) - // result: (SETEQ x) + // match: (VPSLLQ512 x (MOVQconst [c])) + // result: (VPSLLQ512const [uint8(c)] x) for { - if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETNE { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - x := v_0.Args[0] - v.reset(OpAMD64SETEQ) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLQ512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg(x) return true } - // match: (XORLconst [1] (SETEQ x)) - // result: (SETNE x) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLQMasked128 x (MOVQconst [c]) mask) + // result: (VPSLLQMasked128const [uint8(c)] x mask) for { - if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETEQ { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - x := v_0.Args[0] - v.reset(OpAMD64SETNE) - v.AddArg(x) + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLQMasked128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } - // match: (XORLconst [1] (SETL x)) - // result: (SETGE x) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLQMasked256 x (MOVQconst [c]) mask) + // result: (VPSLLQMasked256const [uint8(c)] x mask) for { - if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETL { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - x := v_0.Args[0] - v.reset(OpAMD64SETGE) - v.AddArg(x) + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLQMasked256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } - // match: (XORLconst [1] (SETGE x)) - // result: (SETL x) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLQMasked512 x (MOVQconst [c]) mask) + // result: (VPSLLQMasked512const [uint8(c)] x mask) for { - if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETGE { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - x := v_0.Args[0] - v.reset(OpAMD64SETL) - v.AddArg(x) + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLQMasked512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } - // match: (XORLconst [1] (SETLE x)) - // result: (SETG x) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLW128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLW128 x (MOVQconst [c])) + // result: (VPSLLW128const [uint8(c)] x) for { - if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETLE { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - x := v_0.Args[0] - v.reset(OpAMD64SETG) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLW128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg(x) return true } - // match: (XORLconst [1] (SETG x)) - // result: (SETLE x) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLW256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLW256 x (MOVQconst [c])) + // result: (VPSLLW256const [uint8(c)] x) for { - if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETG { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - x := v_0.Args[0] - v.reset(OpAMD64SETLE) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLW256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg(x) return true } - // match: (XORLconst [1] (SETB x)) - // result: (SETAE x) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLW512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLW512 x (MOVQconst [c])) + // result: (VPSLLW512const [uint8(c)] x) for { - if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETB { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - x := v_0.Args[0] - v.reset(OpAMD64SETAE) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLW512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg(x) return true } - // match: (XORLconst [1] (SETAE x)) - // result: (SETB x) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLWMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLWMasked128 x (MOVQconst [c]) mask) + // result: (VPSLLWMasked128const [uint8(c)] x mask) for { - if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETAE { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - x := v_0.Args[0] - v.reset(OpAMD64SETB) - v.AddArg(x) + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLWMasked128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } - // match: (XORLconst [1] (SETBE x)) - // result: (SETA x) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLWMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLWMasked256 x (MOVQconst [c]) mask) + // result: (VPSLLWMasked256const [uint8(c)] x mask) for { - if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETBE { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - x := v_0.Args[0] - v.reset(OpAMD64SETA) - v.AddArg(x) + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLWMasked256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } - // match: (XORLconst [1] (SETA x)) - // result: (SETBE x) + return false +} +func rewriteValueAMD64_OpAMD64VPSLLWMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSLLWMasked512 x (MOVQconst [c]) mask) + // result: (VPSLLWMasked512const [uint8(c)] x mask) for { - if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETA { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - x := v_0.Args[0] - v.reset(OpAMD64SETBE) - v.AddArg(x) + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLWMasked512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) return true } - // match: (XORLconst [c] (XORLconst [d] x)) - // result: (XORLconst [c ^ d] x) + return false +} +func rewriteValueAMD64_OpAMD64VPSRAD128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAD128 x (MOVQconst [c])) + // result: (VPSRAD128const [uint8(c)] x) for { - c := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64XORLconst { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - d := auxIntToInt32(v_0.AuxInt) - x := v_0.Args[0] - v.reset(OpAMD64XORLconst) - v.AuxInt = int32ToAuxInt(c ^ d) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAD128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) v.AddArg(x) return true } - // match: (XORLconst [c] x) - // cond: c==0 - // result: x + return false +} +func rewriteValueAMD64_OpAMD64VPSRAD256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAD256 x (MOVQconst [c])) + // result: (VPSRAD256const [uint8(c)] x) for { - c := auxIntToInt32(v.AuxInt) x := v_0 - if !(c == 0) { + if v_1.Op != OpAMD64MOVQconst { break } - v.copyOf(x) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAD256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } - // match: (XORLconst [c] (MOVLconst [d])) - // result: (MOVLconst [c^d]) + return false +} +func rewriteValueAMD64_OpAMD64VPSRAD512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAD512 x (MOVQconst [c])) + // result: (VPSRAD512const [uint8(c)] x) for { - c := auxIntToInt32(v.AuxInt) - if v_0.Op != OpAMD64MOVLconst { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { break } - d := auxIntToInt32(v_0.AuxInt) - v.reset(OpAMD64MOVLconst) - v.AuxInt = int32ToAuxInt(c ^ d) + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAD512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) return true } return false } -func rewriteValueAMD64_OpAMD64XORLconstmodify(v *Value) bool { +func rewriteValueAMD64_OpAMD64VPSRADMasked128(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (XORLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) - // cond: ValAndOff(valoff1).canAdd32(off2) - // result: (XORLconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem) + // match: (VPSRADMasked128 x (MOVQconst [c]) mask) + // result: (VPSRADMasked128const [uint8(c)] x mask) for { - valoff1 := auxIntToValAndOff(v.AuxInt) - sym := auxToSym(v.Aux) + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRADMasked128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRADMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRADMasked256 x (MOVQconst [c]) mask) + // result: (VPSRADMasked256const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRADMasked256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRADMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRADMasked512 x (MOVQconst [c]) mask) + // result: (VPSRADMasked512const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRADMasked512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAQ128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAQ128 x (MOVQconst [c])) + // result: (VPSRAQ128const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAQ128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAQ256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAQ256 x (MOVQconst [c])) + // result: (VPSRAQ256const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAQ256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAQ512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAQ512 x (MOVQconst [c])) + // result: (VPSRAQ512const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAQ512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAQMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAQMasked128 x (MOVQconst [c]) mask) + // result: (VPSRAQMasked128const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRAQMasked128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAQMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAQMasked256 x (MOVQconst [c]) mask) + // result: (VPSRAQMasked256const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRAQMasked256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAQMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAQMasked512 x (MOVQconst [c]) mask) + // result: (VPSRAQMasked512const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRAQMasked512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAW128(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAW128 x (MOVQconst [c])) + // result: (VPSRAW128const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAW128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAW256(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAW256 x (MOVQconst [c])) + // result: (VPSRAW256const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAW256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAW512(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAW512 x (MOVQconst [c])) + // result: (VPSRAW512const [uint8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAW512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg(x) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAWMasked128(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAWMasked128 x (MOVQconst [c]) mask) + // result: (VPSRAWMasked128const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRAWMasked128const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAWMasked256(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAWMasked256 x (MOVQconst [c]) mask) + // result: (VPSRAWMasked256const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRAWMasked256const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64VPSRAWMasked512(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (VPSRAWMasked512 x (MOVQconst [c]) mask) + // result: (VPSRAWMasked512const [uint8(c)] x mask) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRAWMasked512const) + v.AuxInt = uint8ToAuxInt(uint8(c)) + v.AddArg2(x, mask) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64XADDLlock(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (XADDLlock [off1] {sym} val (ADDQconst [off2] ptr) mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (XADDLlock [off1+off2] {sym} val ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + val := v_0 + if v_1.Op != OpAMD64ADDQconst { + break + } + off2 := auxIntToInt32(v_1.AuxInt) + ptr := v_1.Args[0] + mem := v_2 + if !(is32Bit(int64(off1) + int64(off2))) { + break + } + v.reset(OpAMD64XADDLlock) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg3(val, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64XADDQlock(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (XADDQlock [off1] {sym} val (ADDQconst [off2] ptr) mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (XADDQlock [off1+off2] {sym} val ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + val := v_0 + if v_1.Op != OpAMD64ADDQconst { + break + } + off2 := auxIntToInt32(v_1.AuxInt) + ptr := v_1.Args[0] + mem := v_2 + if !(is32Bit(int64(off1) + int64(off2))) { + break + } + v.reset(OpAMD64XADDQlock) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg3(val, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64XCHGL(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (XCHGL [off1] {sym} val (ADDQconst [off2] ptr) mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (XCHGL [off1+off2] {sym} val ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + val := v_0 + if v_1.Op != OpAMD64ADDQconst { + break + } + off2 := auxIntToInt32(v_1.AuxInt) + ptr := v_1.Args[0] + mem := v_2 + if !(is32Bit(int64(off1) + int64(off2))) { + break + } + v.reset(OpAMD64XCHGL) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg3(val, ptr, mem) + return true + } + // match: (XCHGL [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && ptr.Op != OpSB + // result: (XCHGL [off1+off2] {mergeSym(sym1,sym2)} val ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + val := v_0 + if v_1.Op != OpAMD64LEAQ { + break + } + off2 := auxIntToInt32(v_1.AuxInt) + sym2 := auxToSym(v_1.Aux) + ptr := v_1.Args[0] + mem := v_2 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && ptr.Op != OpSB) { + break + } + v.reset(OpAMD64XCHGL) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(val, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64XCHGQ(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (XCHGQ [off1] {sym} val (ADDQconst [off2] ptr) mem) + // cond: is32Bit(int64(off1)+int64(off2)) + // result: (XCHGQ [off1+off2] {sym} val ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + val := v_0 + if v_1.Op != OpAMD64ADDQconst { + break + } + off2 := auxIntToInt32(v_1.AuxInt) + ptr := v_1.Args[0] + mem := v_2 + if !(is32Bit(int64(off1) + int64(off2))) { + break + } + v.reset(OpAMD64XCHGQ) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(sym) + v.AddArg3(val, ptr, mem) + return true + } + // match: (XCHGQ [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) + // cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && ptr.Op != OpSB + // result: (XCHGQ [off1+off2] {mergeSym(sym1,sym2)} val ptr mem) + for { + off1 := auxIntToInt32(v.AuxInt) + sym1 := auxToSym(v.Aux) + val := v_0 + if v_1.Op != OpAMD64LEAQ { + break + } + off2 := auxIntToInt32(v_1.AuxInt) + sym2 := auxToSym(v_1.Aux) + ptr := v_1.Args[0] + mem := v_2 + if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && ptr.Op != OpSB) { + break + } + v.reset(OpAMD64XCHGQ) + v.AuxInt = int32ToAuxInt(off1 + off2) + v.Aux = symToAux(mergeSym(sym1, sym2)) + v.AddArg3(val, ptr, mem) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64XORL(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (XORL (SHLL (MOVLconst [1]) y) x) + // result: (BTCL x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + if v_0.Op != OpAMD64SHLL { + continue + } + y := v_0.Args[1] + v_0_0 := v_0.Args[0] + if v_0_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_0_0.AuxInt) != 1 { + continue + } + x := v_1 + v.reset(OpAMD64BTCL) + v.AddArg2(x, y) + return true + } + break + } + // match: (XORL x (MOVLconst [c])) + // result: (XORLconst [c] x) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64MOVLconst { + continue + } + c := auxIntToInt32(v_1.AuxInt) + v.reset(OpAMD64XORLconst) + v.AuxInt = int32ToAuxInt(c) + v.AddArg(x) + return true + } + break + } + // match: (XORL x x) + // result: (MOVLconst [0]) + for { + x := v_0 + if x != v_1 { + break + } + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(0) + return true + } + // match: (XORL x l:(MOVLload [off] {sym} ptr mem)) + // cond: canMergeLoadClobber(v, l, x) && clobber(l) + // result: (XORLload x [off] {sym} ptr mem) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + l := v_1 + if l.Op != OpAMD64MOVLload { + continue + } + off := auxIntToInt32(l.AuxInt) + sym := auxToSym(l.Aux) + mem := l.Args[1] + ptr := l.Args[0] + if !(canMergeLoadClobber(v, l, x) && clobber(l)) { + continue + } + v.reset(OpAMD64XORLload) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v.AddArg3(x, ptr, mem) + return true + } + break + } + // match: (XORL x (ADDLconst [-1] x)) + // cond: buildcfg.GOAMD64 >= 3 + // result: (BLSMSKL x) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64ADDLconst || auxIntToInt32(v_1.AuxInt) != -1 || x != v_1.Args[0] || !(buildcfg.GOAMD64 >= 3) { + continue + } + v.reset(OpAMD64BLSMSKL) + v.AddArg(x) + return true + } + break + } + return false +} +func rewriteValueAMD64_OpAMD64XORLconst(v *Value) bool { + v_0 := v.Args[0] + // match: (XORLconst [1] (SETNE x)) + // result: (SETEQ x) + for { + if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETNE { + break + } + x := v_0.Args[0] + v.reset(OpAMD64SETEQ) + v.AddArg(x) + return true + } + // match: (XORLconst [1] (SETEQ x)) + // result: (SETNE x) + for { + if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETEQ { + break + } + x := v_0.Args[0] + v.reset(OpAMD64SETNE) + v.AddArg(x) + return true + } + // match: (XORLconst [1] (SETL x)) + // result: (SETGE x) + for { + if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETL { + break + } + x := v_0.Args[0] + v.reset(OpAMD64SETGE) + v.AddArg(x) + return true + } + // match: (XORLconst [1] (SETGE x)) + // result: (SETL x) + for { + if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETGE { + break + } + x := v_0.Args[0] + v.reset(OpAMD64SETL) + v.AddArg(x) + return true + } + // match: (XORLconst [1] (SETLE x)) + // result: (SETG x) + for { + if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETLE { + break + } + x := v_0.Args[0] + v.reset(OpAMD64SETG) + v.AddArg(x) + return true + } + // match: (XORLconst [1] (SETG x)) + // result: (SETLE x) + for { + if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETG { + break + } + x := v_0.Args[0] + v.reset(OpAMD64SETLE) + v.AddArg(x) + return true + } + // match: (XORLconst [1] (SETB x)) + // result: (SETAE x) + for { + if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETB { + break + } + x := v_0.Args[0] + v.reset(OpAMD64SETAE) + v.AddArg(x) + return true + } + // match: (XORLconst [1] (SETAE x)) + // result: (SETB x) + for { + if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETAE { + break + } + x := v_0.Args[0] + v.reset(OpAMD64SETB) + v.AddArg(x) + return true + } + // match: (XORLconst [1] (SETBE x)) + // result: (SETA x) + for { + if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETBE { + break + } + x := v_0.Args[0] + v.reset(OpAMD64SETA) + v.AddArg(x) + return true + } + // match: (XORLconst [1] (SETA x)) + // result: (SETBE x) + for { + if auxIntToInt32(v.AuxInt) != 1 || v_0.Op != OpAMD64SETA { + break + } + x := v_0.Args[0] + v.reset(OpAMD64SETBE) + v.AddArg(x) + return true + } + // match: (XORLconst [c] (XORLconst [d] x)) + // result: (XORLconst [c ^ d] x) + for { + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpAMD64XORLconst { + break + } + d := auxIntToInt32(v_0.AuxInt) + x := v_0.Args[0] + v.reset(OpAMD64XORLconst) + v.AuxInt = int32ToAuxInt(c ^ d) + v.AddArg(x) + return true + } + // match: (XORLconst [c] x) + // cond: c==0 + // result: x + for { + c := auxIntToInt32(v.AuxInt) + x := v_0 + if !(c == 0) { + break + } + v.copyOf(x) + return true + } + // match: (XORLconst [c] (MOVLconst [d])) + // result: (MOVLconst [c^d]) + for { + c := auxIntToInt32(v.AuxInt) + if v_0.Op != OpAMD64MOVLconst { + break + } + d := auxIntToInt32(v_0.AuxInt) + v.reset(OpAMD64MOVLconst) + v.AuxInt = int32ToAuxInt(c ^ d) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64XORLconstmodify(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (XORLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) + // cond: ValAndOff(valoff1).canAdd32(off2) + // result: (XORLconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem) + for { + valoff1 := auxIntToValAndOff(v.AuxInt) + sym := auxToSym(v.Aux) if v_0.Op != OpAMD64ADDQconst { break } @@ -51779,2188 +52571,1252 @@ func rewriteValueAMD64_OpSetHiFloat32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] // match: (SetHiFloat32x16 x y) - // result: (VINSERTF64X4512 [1] x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTF64X4512) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpSetHiFloat32x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetHiFloat32x8 x y) - // result: (VINSERTF128256 [1] x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTF128256) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpSetHiFloat64x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetHiFloat64x4 x y) - // result: (VINSERTF128256 [1] x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTF128256) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpSetHiFloat64x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetHiFloat64x8 x y) - // result: (VINSERTF64X4512 [1] x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTF64X4512) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpSetHiInt16x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetHiInt16x16 x y) - // result: (VINSERTI128256 [1] x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpSetHiInt16x32(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetHiInt16x32 x y) - // result: (VINSERTI64X4512 [1] x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI64X4512) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpSetHiInt32x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetHiInt32x16 x y) - // result: (VINSERTI64X4512 [1] x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI64X4512) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpSetHiInt32x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetHiInt32x8 x y) - // result: (VINSERTI128256 [1] x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpSetHiInt64x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetHiInt64x4 x y) - // result: (VINSERTI128256 [1] x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpSetHiInt64x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetHiInt64x8 x y) - // result: (VINSERTI64X4512 [1] x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI64X4512) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpSetHiInt8x32(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetHiInt8x32 x y) - // result: (VINSERTI128256 [1] x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpSetHiInt8x64(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetHiInt8x64 x y) - // result: (VINSERTI64X4512 [1] x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI64X4512) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpSetHiUint16x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetHiUint16x16 x y) - // result: (VINSERTI128256 [1] x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpSetHiUint16x32(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetHiUint16x32 x y) - // result: (VINSERTI64X4512 [1] x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI64X4512) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpSetHiUint32x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetHiUint32x16 x y) - // result: (VINSERTI64X4512 [1] x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI64X4512) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpSetHiUint32x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetHiUint32x8 x y) - // result: (VINSERTI128256 [1] x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpSetHiUint64x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetHiUint64x4 x y) - // result: (VINSERTI128256 [1] x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpSetHiUint64x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetHiUint64x8 x y) - // result: (VINSERTI64X4512 [1] x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI64X4512) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpSetHiUint8x32(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetHiUint8x32 x y) - // result: (VINSERTI128256 [1] x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpSetHiUint8x64(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetHiUint8x64 x y) - // result: (VINSERTI64X4512 [1] x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI64X4512) - v.AuxInt = uint8ToAuxInt(1) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpSetLoFloat32x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetLoFloat32x16 x y) - // result: (VINSERTF64X4512 [0] x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTF64X4512) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpSetLoFloat32x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetLoFloat32x8 x y) - // result: (VINSERTF128256 [0] x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTF128256) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpSetLoFloat64x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetLoFloat64x4 x y) - // result: (VINSERTF128256 [0] x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTF128256) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpSetLoFloat64x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetLoFloat64x8 x y) - // result: (VINSERTF64X4512 [0] x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTF64X4512) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpSetLoInt16x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetLoInt16x16 x y) - // result: (VINSERTI128256 [0] x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpSetLoInt16x32(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetLoInt16x32 x y) - // result: (VINSERTI64X4512 [0] x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI64X4512) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpSetLoInt32x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetLoInt32x16 x y) - // result: (VINSERTI64X4512 [0] x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI64X4512) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpSetLoInt32x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetLoInt32x8 x y) - // result: (VINSERTI128256 [0] x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = uint8ToAuxInt(0) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpSetLoInt64x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (SetLoInt64x4 x y) - // result: (VINSERTI128256 [0] x y) + // result: (VINSERTF64X4512 [1] x y) for { x := v_0 y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = uint8ToAuxInt(0) + v.reset(OpAMD64VINSERTF64X4512) + v.AuxInt = uint8ToAuxInt(1) v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpSetLoInt64x8(v *Value) bool { +func rewriteValueAMD64_OpSetHiFloat32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (SetLoInt64x8 x y) - // result: (VINSERTI64X4512 [0] x y) + // match: (SetHiFloat32x8 x y) + // result: (VINSERTF128256 [1] x y) for { x := v_0 y := v_1 - v.reset(OpAMD64VINSERTI64X4512) - v.AuxInt = uint8ToAuxInt(0) + v.reset(OpAMD64VINSERTF128256) + v.AuxInt = uint8ToAuxInt(1) v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpSetLoInt8x32(v *Value) bool { +func rewriteValueAMD64_OpSetHiFloat64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (SetLoInt8x32 x y) - // result: (VINSERTI128256 [0] x y) + // match: (SetHiFloat64x4 x y) + // result: (VINSERTF128256 [1] x y) for { x := v_0 y := v_1 - v.reset(OpAMD64VINSERTI128256) - v.AuxInt = uint8ToAuxInt(0) + v.reset(OpAMD64VINSERTF128256) + v.AuxInt = uint8ToAuxInt(1) v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpSetLoInt8x64(v *Value) bool { +func rewriteValueAMD64_OpSetHiFloat64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (SetLoInt8x64 x y) - // result: (VINSERTI64X4512 [0] x y) + // match: (SetHiFloat64x8 x y) + // result: (VINSERTF64X4512 [1] x y) for { x := v_0 y := v_1 - v.reset(OpAMD64VINSERTI64X4512) - v.AuxInt = uint8ToAuxInt(0) + v.reset(OpAMD64VINSERTF64X4512) + v.AuxInt = uint8ToAuxInt(1) v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpSetLoUint16x16(v *Value) bool { +func rewriteValueAMD64_OpSetHiInt16x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (SetLoUint16x16 x y) - // result: (VINSERTI128256 [0] x y) + // match: (SetHiInt16x16 x y) + // result: (VINSERTI128256 [1] x y) for { x := v_0 y := v_1 v.reset(OpAMD64VINSERTI128256) - v.AuxInt = uint8ToAuxInt(0) + v.AuxInt = uint8ToAuxInt(1) v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpSetLoUint16x32(v *Value) bool { +func rewriteValueAMD64_OpSetHiInt16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (SetLoUint16x32 x y) - // result: (VINSERTI64X4512 [0] x y) + // match: (SetHiInt16x32 x y) + // result: (VINSERTI64X4512 [1] x y) for { x := v_0 y := v_1 v.reset(OpAMD64VINSERTI64X4512) - v.AuxInt = uint8ToAuxInt(0) + v.AuxInt = uint8ToAuxInt(1) v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpSetLoUint32x16(v *Value) bool { +func rewriteValueAMD64_OpSetHiInt32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (SetLoUint32x16 x y) - // result: (VINSERTI64X4512 [0] x y) + // match: (SetHiInt32x16 x y) + // result: (VINSERTI64X4512 [1] x y) for { x := v_0 y := v_1 v.reset(OpAMD64VINSERTI64X4512) - v.AuxInt = uint8ToAuxInt(0) + v.AuxInt = uint8ToAuxInt(1) v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpSetLoUint32x8(v *Value) bool { +func rewriteValueAMD64_OpSetHiInt32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (SetLoUint32x8 x y) - // result: (VINSERTI128256 [0] x y) + // match: (SetHiInt32x8 x y) + // result: (VINSERTI128256 [1] x y) for { x := v_0 y := v_1 v.reset(OpAMD64VINSERTI128256) - v.AuxInt = uint8ToAuxInt(0) + v.AuxInt = uint8ToAuxInt(1) v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpSetLoUint64x4(v *Value) bool { +func rewriteValueAMD64_OpSetHiInt64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (SetLoUint64x4 x y) - // result: (VINSERTI128256 [0] x y) + // match: (SetHiInt64x4 x y) + // result: (VINSERTI128256 [1] x y) for { x := v_0 y := v_1 v.reset(OpAMD64VINSERTI128256) - v.AuxInt = uint8ToAuxInt(0) + v.AuxInt = uint8ToAuxInt(1) v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpSetLoUint64x8(v *Value) bool { +func rewriteValueAMD64_OpSetHiInt64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (SetLoUint64x8 x y) - // result: (VINSERTI64X4512 [0] x y) + // match: (SetHiInt64x8 x y) + // result: (VINSERTI64X4512 [1] x y) for { x := v_0 y := v_1 v.reset(OpAMD64VINSERTI64X4512) - v.AuxInt = uint8ToAuxInt(0) + v.AuxInt = uint8ToAuxInt(1) v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpSetLoUint8x32(v *Value) bool { +func rewriteValueAMD64_OpSetHiInt8x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (SetLoUint8x32 x y) - // result: (VINSERTI128256 [0] x y) + // match: (SetHiInt8x32 x y) + // result: (VINSERTI128256 [1] x y) for { x := v_0 y := v_1 v.reset(OpAMD64VINSERTI128256) - v.AuxInt = uint8ToAuxInt(0) + v.AuxInt = uint8ToAuxInt(1) v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpSetLoUint8x64(v *Value) bool { +func rewriteValueAMD64_OpSetHiInt8x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (SetLoUint8x64 x y) - // result: (VINSERTI64X4512 [0] x y) + // match: (SetHiInt8x64 x y) + // result: (VINSERTI64X4512 [1] x y) for { x := v_0 y := v_1 v.reset(OpAMD64VINSERTI64X4512) - v.AuxInt = uint8ToAuxInt(0) + v.AuxInt = uint8ToAuxInt(1) v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x16(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (ShiftAllLeftConcatMaskedInt16x16 [a] x y mask) - // result: (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) - for { - a := auxIntToUint8(v.AuxInt) - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDWMasked256) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x32(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (ShiftAllLeftConcatMaskedInt16x32 [a] x y mask) - // result: (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) - for { - a := auxIntToUint8(v.AuxInt) - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDWMasked512) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x8(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (ShiftAllLeftConcatMaskedInt16x8 [a] x y mask) - // result: (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) - for { - a := auxIntToUint8(v.AuxInt) - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDWMasked128) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpSetHiUint16x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ShiftAllLeftConcatMaskedInt32x16 [a] x y mask) - // result: (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM mask)) + // match: (SetHiUint16x16 x y) + // result: (VINSERTI128256 [1] x y) for { - a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDDMasked512) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpSetHiUint16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ShiftAllLeftConcatMaskedInt32x4 [a] x y mask) - // result: (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM mask)) + // match: (SetHiUint16x32 x y) + // result: (VINSERTI64X4512 [1] x y) for { - a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDDMasked128) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpSetHiUint32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ShiftAllLeftConcatMaskedInt32x8 [a] x y mask) - // result: (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM mask)) + // match: (SetHiUint32x16 x y) + // result: (VINSERTI64X4512 [1] x y) for { - a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDDMasked256) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x2(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpSetHiUint32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ShiftAllLeftConcatMaskedInt64x2 [a] x y mask) - // result: (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) + // match: (SetHiUint32x8 x y) + // result: (VINSERTI128256 [1] x y) for { - a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDQMasked128) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpSetHiUint64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ShiftAllLeftConcatMaskedInt64x4 [a] x y mask) - // result: (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) + // match: (SetHiUint64x4 x y) + // result: (VINSERTI128256 [1] x y) for { - a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDQMasked256) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpSetHiUint64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ShiftAllLeftConcatMaskedInt64x8 [a] x y mask) - // result: (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) + // match: (SetHiUint64x8 x y) + // result: (VINSERTI64X4512 [1] x y) for { - a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDQMasked512) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpSetHiUint8x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ShiftAllLeftConcatMaskedUint16x16 [a] x y mask) - // result: (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) + // match: (SetHiUint8x32 x y) + // result: (VINSERTI128256 [1] x y) for { - a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDWMasked256) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpSetHiUint8x64(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ShiftAllLeftConcatMaskedUint16x32 [a] x y mask) - // result: (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) + // match: (SetHiUint8x64 x y) + // result: (VINSERTI64X4512 [1] x y) for { - a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDWMasked512) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = uint8ToAuxInt(1) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpSetLoFloat32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ShiftAllLeftConcatMaskedUint16x8 [a] x y mask) - // result: (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) + // match: (SetLoFloat32x16 x y) + // result: (VINSERTF64X4512 [0] x y) for { - a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDWMasked128) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VINSERTF64X4512) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpSetLoFloat32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ShiftAllLeftConcatMaskedUint32x16 [a] x y mask) - // result: (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM mask)) + // match: (SetLoFloat32x8 x y) + // result: (VINSERTF128256 [0] x y) for { - a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDDMasked512) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VINSERTF128256) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpSetLoFloat64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ShiftAllLeftConcatMaskedUint32x4 [a] x y mask) - // result: (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM mask)) + // match: (SetLoFloat64x4 x y) + // result: (VINSERTF128256 [0] x y) for { - a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDDMasked128) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VINSERTF128256) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpSetLoFloat64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ShiftAllLeftConcatMaskedUint32x8 [a] x y mask) - // result: (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM mask)) + // match: (SetLoFloat64x8 x y) + // result: (VINSERTF64X4512 [0] x y) for { - a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDDMasked256) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VINSERTF64X4512) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x2(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpSetLoInt16x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ShiftAllLeftConcatMaskedUint64x2 [a] x y mask) - // result: (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) + // match: (SetLoInt16x16 x y) + // result: (VINSERTI128256 [0] x y) for { - a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDQMasked128) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x4(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpSetLoInt16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ShiftAllLeftConcatMaskedUint64x4 [a] x y mask) - // result: (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) + // match: (SetLoInt16x32 x y) + // result: (VINSERTI64X4512 [0] x y) for { - a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDQMasked256) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpSetLoInt32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ShiftAllLeftConcatMaskedUint64x8 [a] x y mask) - // result: (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) + // match: (SetLoInt32x16 x y) + // result: (VINSERTI64X4512 [0] x y) for { - a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHLDQMasked512) - v.AuxInt = uint8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpShiftAllLeftInt16x16(v *Value) bool { +func rewriteValueAMD64_OpSetLoInt32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftInt16x16 x (MOVQconst [c])) - // result: (VPSLLW256const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLW256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - // match: (ShiftAllLeftInt16x16 x y) - // result: (VPSLLW256 x y) + // match: (SetLoInt32x8 x y) + // result: (VINSERTI128256 [0] x y) for { x := v_0 y := v_1 - v.reset(OpAMD64VPSLLW256) + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = uint8ToAuxInt(0) v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpShiftAllLeftInt16x32(v *Value) bool { +func rewriteValueAMD64_OpSetLoInt64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftInt16x32 x (MOVQconst [c])) - // result: (VPSLLW512const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLW512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - // match: (ShiftAllLeftInt16x32 x y) - // result: (VPSLLW512 x y) + // match: (SetLoInt64x4 x y) + // result: (VINSERTI128256 [0] x y) for { x := v_0 y := v_1 - v.reset(OpAMD64VPSLLW512) + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = uint8ToAuxInt(0) v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpShiftAllLeftInt16x8(v *Value) bool { +func rewriteValueAMD64_OpSetLoInt64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftInt16x8 x (MOVQconst [c])) - // result: (VPSLLW128const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLW128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - // match: (ShiftAllLeftInt16x8 x y) - // result: (VPSLLW128 x y) + // match: (SetLoInt64x8 x y) + // result: (VINSERTI64X4512 [0] x y) for { x := v_0 y := v_1 - v.reset(OpAMD64VPSLLW128) + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = uint8ToAuxInt(0) v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpShiftAllLeftInt32x16(v *Value) bool { +func rewriteValueAMD64_OpSetLoInt8x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftInt32x16 x (MOVQconst [c])) - // result: (VPSLLD512const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLD512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - // match: (ShiftAllLeftInt32x16 x y) - // result: (VPSLLD512 x y) + // match: (SetLoInt8x32 x y) + // result: (VINSERTI128256 [0] x y) for { x := v_0 y := v_1 - v.reset(OpAMD64VPSLLD512) + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = uint8ToAuxInt(0) v.AddArg2(x, y) return true } -} -func rewriteValueAMD64_OpShiftAllLeftInt32x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (ShiftAllLeftInt32x4 x (MOVQconst [c])) - // result: (VPSLLD128const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLD128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - // match: (ShiftAllLeftInt32x4 x y) - // result: (VPSLLD128 x y) +} +func rewriteValueAMD64_OpSetLoInt8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoInt8x64 x y) + // result: (VINSERTI64X4512 [0] x y) for { x := v_0 y := v_1 - v.reset(OpAMD64VPSLLD128) + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = uint8ToAuxInt(0) v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpShiftAllLeftInt32x8(v *Value) bool { +func rewriteValueAMD64_OpSetLoUint16x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftInt32x8 x (MOVQconst [c])) - // result: (VPSLLD256const [uint8(c)] x) + // match: (SetLoUint16x16 x y) + // result: (VINSERTI128256 [0] x y) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLD256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) return true } - // match: (ShiftAllLeftInt32x8 x y) - // result: (VPSLLD256 x y) +} +func rewriteValueAMD64_OpSetLoUint16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoUint16x32 x y) + // result: (VINSERTI64X4512 [0] x y) for { x := v_0 y := v_1 - v.reset(OpAMD64VPSLLD256) + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = uint8ToAuxInt(0) v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpShiftAllLeftInt64x2(v *Value) bool { +func rewriteValueAMD64_OpSetLoUint32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftInt64x2 x (MOVQconst [c])) - // result: (VPSLLQ128const [uint8(c)] x) + // match: (SetLoUint32x16 x y) + // result: (VINSERTI64X4512 [0] x y) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLQ128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) return true } - // match: (ShiftAllLeftInt64x2 x y) - // result: (VPSLLQ128 x y) +} +func rewriteValueAMD64_OpSetLoUint32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoUint32x8 x y) + // result: (VINSERTI128256 [0] x y) for { x := v_0 y := v_1 - v.reset(OpAMD64VPSLLQ128) + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = uint8ToAuxInt(0) v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpShiftAllLeftInt64x4(v *Value) bool { +func rewriteValueAMD64_OpSetLoUint64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftInt64x4 x (MOVQconst [c])) - // result: (VPSLLQ256const [uint8(c)] x) + // match: (SetLoUint64x4 x y) + // result: (VINSERTI128256 [0] x y) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLQ256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) return true } - // match: (ShiftAllLeftInt64x4 x y) - // result: (VPSLLQ256 x y) +} +func rewriteValueAMD64_OpSetLoUint64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoUint64x8 x y) + // result: (VINSERTI64X4512 [0] x y) for { x := v_0 y := v_1 - v.reset(OpAMD64VPSLLQ256) + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = uint8ToAuxInt(0) v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpShiftAllLeftInt64x8(v *Value) bool { +func rewriteValueAMD64_OpSetLoUint8x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftInt64x8 x (MOVQconst [c])) - // result: (VPSLLQ512const [uint8(c)] x) + // match: (SetLoUint8x32 x y) + // result: (VINSERTI128256 [0] x y) for { x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLQ512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = uint8ToAuxInt(0) + v.AddArg2(x, y) return true } - // match: (ShiftAllLeftInt64x8 x y) - // result: (VPSLLQ512 x y) +} +func rewriteValueAMD64_OpSetLoUint8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoUint8x64 x y) + // result: (VINSERTI64X4512 [0] x y) for { x := v_0 y := v_1 - v.reset(OpAMD64VPSLLQ512) + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = uint8ToAuxInt(0) v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x16(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllLeftMaskedInt16x16 x (MOVQconst [c]) mask) - // result: (VPSLLWMasked256const [uint8(c)] x (VPMOVVec16x16ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSLLWMasked256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } - // match: (ShiftAllLeftMaskedInt16x16 x y mask) - // result: (VPSLLWMasked256 x y (VPMOVVec16x16ToM mask)) + // match: (ShiftAllLeftConcatMaskedInt16x16 [a] x y mask) + // result: (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) for { + a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSLLWMasked256) + v.reset(OpAMD64VPSHLDWMasked256) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x32(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllLeftMaskedInt16x32 x (MOVQconst [c]) mask) - // result: (VPSLLWMasked512const [uint8(c)] x (VPMOVVec16x32ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSLLWMasked512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } - // match: (ShiftAllLeftMaskedInt16x32 x y mask) - // result: (VPSLLWMasked512 x y (VPMOVVec16x32ToM mask)) + // match: (ShiftAllLeftConcatMaskedInt16x32 [a] x y mask) + // result: (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) for { + a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSLLWMasked512) + v.reset(OpAMD64VPSHLDWMasked512) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllLeftMaskedInt16x8 x (MOVQconst [c]) mask) - // result: (VPSLLWMasked128const [uint8(c)] x (VPMOVVec16x8ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSLLWMasked128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } - // match: (ShiftAllLeftMaskedInt16x8 x y mask) - // result: (VPSLLWMasked128 x y (VPMOVVec16x8ToM mask)) + // match: (ShiftAllLeftConcatMaskedInt16x8 [a] x y mask) + // result: (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) for { + a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSLLWMasked128) + v.reset(OpAMD64VPSHLDWMasked128) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x16(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllLeftMaskedInt32x16 x (MOVQconst [c]) mask) - // result: (VPSLLDMasked512const [uint8(c)] x (VPMOVVec32x16ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSLLDMasked512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } - // match: (ShiftAllLeftMaskedInt32x16 x y mask) - // result: (VPSLLDMasked512 x y (VPMOVVec32x16ToM mask)) + // match: (ShiftAllLeftConcatMaskedInt32x16 [a] x y mask) + // result: (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM mask)) for { + a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSLLDMasked512) + v.reset(OpAMD64VPSHLDDMasked512) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x4(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllLeftMaskedInt32x4 x (MOVQconst [c]) mask) - // result: (VPSLLDMasked128const [uint8(c)] x (VPMOVVec32x4ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSLLDMasked128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } - // match: (ShiftAllLeftMaskedInt32x4 x y mask) - // result: (VPSLLDMasked128 x y (VPMOVVec32x4ToM mask)) + // match: (ShiftAllLeftConcatMaskedInt32x4 [a] x y mask) + // result: (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM mask)) for { + a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSLLDMasked128) + v.reset(OpAMD64VPSHLDDMasked128) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt32x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllLeftMaskedInt32x8 x (MOVQconst [c]) mask) - // result: (VPSLLDMasked256const [uint8(c)] x (VPMOVVec32x8ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSLLDMasked256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } - // match: (ShiftAllLeftMaskedInt32x8 x y mask) - // result: (VPSLLDMasked256 x y (VPMOVVec32x8ToM mask)) + // match: (ShiftAllLeftConcatMaskedInt32x8 [a] x y mask) + // result: (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM mask)) for { + a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSLLDMasked256) + v.reset(OpAMD64VPSHLDDMasked256) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x2(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllLeftMaskedInt64x2 x (MOVQconst [c]) mask) - // result: (VPSLLQMasked128const [uint8(c)] x (VPMOVVec64x2ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSLLQMasked128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } - // match: (ShiftAllLeftMaskedInt64x2 x y mask) - // result: (VPSLLQMasked128 x y (VPMOVVec64x2ToM mask)) + // match: (ShiftAllLeftConcatMaskedInt64x2 [a] x y mask) + // result: (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) for { + a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSLLQMasked128) + v.reset(OpAMD64VPSHLDQMasked128) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x4(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllLeftMaskedInt64x4 x (MOVQconst [c]) mask) - // result: (VPSLLQMasked256const [uint8(c)] x (VPMOVVec64x4ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSLLQMasked256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } - // match: (ShiftAllLeftMaskedInt64x4 x y mask) - // result: (VPSLLQMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (ShiftAllLeftConcatMaskedInt64x4 [a] x y mask) + // result: (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) for { + a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSLLQMasked256) + v.reset(OpAMD64VPSHLDQMasked256) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllLeftMaskedInt64x8 x (MOVQconst [c]) mask) - // result: (VPSLLQMasked512const [uint8(c)] x (VPMOVVec64x8ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSLLQMasked512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } - // match: (ShiftAllLeftMaskedInt64x8 x y mask) - // result: (VPSLLQMasked512 x y (VPMOVVec64x8ToM mask)) + // match: (ShiftAllLeftConcatMaskedInt64x8 [a] x y mask) + // result: (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) for { + a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSLLQMasked512) + v.reset(OpAMD64VPSHLDQMasked512) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x16(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllLeftMaskedUint16x16 x (MOVQconst [c]) mask) - // result: (VPSLLWMasked256const [uint8(c)] x (VPMOVVec16x16ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSLLWMasked256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } - // match: (ShiftAllLeftMaskedUint16x16 x y mask) - // result: (VPSLLWMasked256 x y (VPMOVVec16x16ToM mask)) + // match: (ShiftAllLeftConcatMaskedUint16x16 [a] x y mask) + // result: (VPSHLDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) for { + a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSLLWMasked256) + v.reset(OpAMD64VPSHLDWMasked256) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x32(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllLeftMaskedUint16x32 x (MOVQconst [c]) mask) - // result: (VPSLLWMasked512const [uint8(c)] x (VPMOVVec16x32ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSLLWMasked512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } - // match: (ShiftAllLeftMaskedUint16x32 x y mask) - // result: (VPSLLWMasked512 x y (VPMOVVec16x32ToM mask)) + // match: (ShiftAllLeftConcatMaskedUint16x32 [a] x y mask) + // result: (VPSHLDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) for { + a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSLLWMasked512) + v.reset(OpAMD64VPSHLDWMasked512) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint16x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllLeftMaskedUint16x8 x (MOVQconst [c]) mask) - // result: (VPSLLWMasked128const [uint8(c)] x (VPMOVVec16x8ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSLLWMasked128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } - // match: (ShiftAllLeftMaskedUint16x8 x y mask) - // result: (VPSLLWMasked128 x y (VPMOVVec16x8ToM mask)) + // match: (ShiftAllLeftConcatMaskedUint16x8 [a] x y mask) + // result: (VPSHLDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) for { + a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSLLWMasked128) + v.reset(OpAMD64VPSHLDWMasked128) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x16(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllLeftMaskedUint32x16 x (MOVQconst [c]) mask) - // result: (VPSLLDMasked512const [uint8(c)] x (VPMOVVec32x16ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSLLDMasked512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } - // match: (ShiftAllLeftMaskedUint32x16 x y mask) - // result: (VPSLLDMasked512 x y (VPMOVVec32x16ToM mask)) + // match: (ShiftAllLeftConcatMaskedUint32x16 [a] x y mask) + // result: (VPSHLDDMasked512 [a] x y (VPMOVVec32x16ToM mask)) for { + a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSLLDMasked512) + v.reset(OpAMD64VPSHLDDMasked512) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x4(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllLeftMaskedUint32x4 x (MOVQconst [c]) mask) - // result: (VPSLLDMasked128const [uint8(c)] x (VPMOVVec32x4ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSLLDMasked128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } - // match: (ShiftAllLeftMaskedUint32x4 x y mask) - // result: (VPSLLDMasked128 x y (VPMOVVec32x4ToM mask)) + // match: (ShiftAllLeftConcatMaskedUint32x4 [a] x y mask) + // result: (VPSHLDDMasked128 [a] x y (VPMOVVec32x4ToM mask)) for { + a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSLLDMasked128) + v.reset(OpAMD64VPSHLDDMasked128) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint32x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllLeftMaskedUint32x8 x (MOVQconst [c]) mask) - // result: (VPSLLDMasked256const [uint8(c)] x (VPMOVVec32x8ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSLLDMasked256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } - // match: (ShiftAllLeftMaskedUint32x8 x y mask) - // result: (VPSLLDMasked256 x y (VPMOVVec32x8ToM mask)) + // match: (ShiftAllLeftConcatMaskedUint32x8 [a] x y mask) + // result: (VPSHLDDMasked256 [a] x y (VPMOVVec32x8ToM mask)) for { + a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSLLDMasked256) + v.reset(OpAMD64VPSHLDDMasked256) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x2(v *Value) bool { - v_2 := v.Args[2] - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - // match: (ShiftAllLeftMaskedUint64x2 x (MOVQconst [c]) mask) - // result: (VPSLLQMasked128const [uint8(c)] x (VPMOVVec64x2ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSLLQMasked128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } - // match: (ShiftAllLeftMaskedUint64x2 x y mask) - // result: (VPSLLQMasked128 x y (VPMOVVec64x2ToM mask)) - for { - x := v_0 - y := v_1 - mask := v_2 - v.reset(OpAMD64VPSLLQMasked128) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) - return true - } -} -func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x4(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllLeftMaskedUint64x4 x (MOVQconst [c]) mask) - // result: (VPSLLQMasked256const [uint8(c)] x (VPMOVVec64x4ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSLLQMasked256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } - // match: (ShiftAllLeftMaskedUint64x4 x y mask) - // result: (VPSLLQMasked256 x y (VPMOVVec64x4ToM mask)) + // match: (ShiftAllLeftConcatMaskedUint64x2 [a] x y mask) + // result: (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) for { + a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSLLQMasked256) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v.reset(OpAMD64VPSHLDQMasked128) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllLeftMaskedUint64x8 x (MOVQconst [c]) mask) - // result: (VPSLLQMasked512const [uint8(c)] x (VPMOVVec64x8ToM mask)) + // match: (ShiftAllLeftConcatMaskedUint64x4 [a] x y mask) + // result: (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) for { + a := auxIntToUint8(v.AuxInt) x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) + y := v_1 mask := v_2 - v.reset(OpAMD64VPSLLQMasked512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v.reset(OpAMD64VPSHLDQMasked256) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg2(x, v0) + v.AddArg3(x, y, v0) return true } - // match: (ShiftAllLeftMaskedUint64x8 x y mask) - // result: (VPSLLQMasked512 x y (VPMOVVec64x8ToM mask)) +} +func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllLeftConcatMaskedUint64x8 [a] x y mask) + // result: (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) for { + a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSLLQMasked512) + v.reset(OpAMD64VPSHLDQMasked512) + v.AuxInt = uint8ToAuxInt(a) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftUint16x16(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftUint16x16 x (MOVQconst [c])) - // result: (VPSLLW256const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLW256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - // match: (ShiftAllLeftUint16x16 x y) - // result: (VPSLLW256 x y) + b := v.Block + // match: (ShiftAllLeftMaskedInt16x16 x y mask) + // result: (VPSLLWMasked256 x y (VPMOVVec16x16ToM mask)) for { x := v_0 y := v_1 - v.reset(OpAMD64VPSLLW256) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSLLWMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftUint16x32(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x32(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftUint16x32 x (MOVQconst [c])) - // result: (VPSLLW512const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLW512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - // match: (ShiftAllLeftUint16x32 x y) - // result: (VPSLLW512 x y) + b := v.Block + // match: (ShiftAllLeftMaskedInt16x32 x y mask) + // result: (VPSLLWMasked512 x y (VPMOVVec16x32ToM mask)) for { x := v_0 y := v_1 - v.reset(OpAMD64VPSLLW512) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSLLWMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftUint16x8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftUint16x8 x (MOVQconst [c])) - // result: (VPSLLW128const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLW128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - // match: (ShiftAllLeftUint16x8 x y) - // result: (VPSLLW128 x y) + b := v.Block + // match: (ShiftAllLeftMaskedInt16x8 x y mask) + // result: (VPSLLWMasked128 x y (VPMOVVec16x8ToM mask)) for { x := v_0 y := v_1 - v.reset(OpAMD64VPSLLW128) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSLLWMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftUint32x16(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftUint32x16 x (MOVQconst [c])) - // result: (VPSLLD512const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLD512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - // match: (ShiftAllLeftUint32x16 x y) - // result: (VPSLLD512 x y) + b := v.Block + // match: (ShiftAllLeftMaskedInt32x16 x y mask) + // result: (VPSLLDMasked512 x y (VPMOVVec32x16ToM mask)) for { x := v_0 y := v_1 - v.reset(OpAMD64VPSLLD512) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSLLDMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftUint32x4(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftUint32x4 x (MOVQconst [c])) - // result: (VPSLLD128const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLD128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - // match: (ShiftAllLeftUint32x4 x y) - // result: (VPSLLD128 x y) + b := v.Block + // match: (ShiftAllLeftMaskedInt32x4 x y mask) + // result: (VPSLLDMasked128 x y (VPMOVVec32x4ToM mask)) for { x := v_0 y := v_1 - v.reset(OpAMD64VPSLLD128) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSLLDMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftUint32x8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftUint32x8 x (MOVQconst [c])) - // result: (VPSLLD256const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLD256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - // match: (ShiftAllLeftUint32x8 x y) - // result: (VPSLLD256 x y) + b := v.Block + // match: (ShiftAllLeftMaskedInt32x8 x y mask) + // result: (VPSLLDMasked256 x y (VPMOVVec32x8ToM mask)) for { x := v_0 y := v_1 - v.reset(OpAMD64VPSLLD256) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSLLDMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftUint64x2(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x2(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftUint64x2 x (MOVQconst [c])) - // result: (VPSLLQ128const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLQ128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - // match: (ShiftAllLeftUint64x2 x y) - // result: (VPSLLQ128 x y) + b := v.Block + // match: (ShiftAllLeftMaskedInt64x2 x y mask) + // result: (VPSLLQMasked128 x y (VPMOVVec64x2ToM mask)) for { x := v_0 y := v_1 - v.reset(OpAMD64VPSLLQ128) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSLLQMasked128) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftUint64x4(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftUint64x4 x (MOVQconst [c])) - // result: (VPSLLQ256const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLQ256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - // match: (ShiftAllLeftUint64x4 x y) - // result: (VPSLLQ256 x y) + b := v.Block + // match: (ShiftAllLeftMaskedInt64x4 x y mask) + // result: (VPSLLQMasked256 x y (VPMOVVec64x4ToM mask)) for { x := v_0 y := v_1 - v.reset(OpAMD64VPSLLQ256) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSLLQMasked256) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllLeftUint64x8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllLeftUint64x8 x (MOVQconst [c])) - // result: (VPSLLQ512const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSLLQ512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - // match: (ShiftAllLeftUint64x8 x y) - // result: (VPSLLQ512 x y) + b := v.Block + // match: (ShiftAllLeftMaskedInt64x8 x y mask) + // result: (VPSLLQMasked512 x y (VPMOVVec64x8ToM mask)) for { x := v_0 y := v_1 - v.reset(OpAMD64VPSLLQ512) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSLLQMasked512) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x16(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightConcatMaskedInt16x16 [a] x y mask) - // result: (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) + // match: (ShiftAllLeftMaskedUint16x16 x y mask) + // result: (VPSLLWMasked256 x y (VPMOVVec16x16ToM mask)) for { - a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSHRDWMasked256) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPSLLWMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x32(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightConcatMaskedInt16x32 [a] x y mask) - // result: (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) + // match: (ShiftAllLeftMaskedUint16x32 x y mask) + // result: (VPSLLWMasked512 x y (VPMOVVec16x32ToM mask)) for { - a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSHRDWMasked512) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPSLLWMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightConcatMaskedInt16x8 [a] x y mask) - // result: (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) + // match: (ShiftAllLeftMaskedUint16x8 x y mask) + // result: (VPSLLWMasked128 x y (VPMOVVec16x8ToM mask)) for { - a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSHRDWMasked128) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPSLLWMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x16(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightConcatMaskedInt32x16 [a] x y mask) - // result: (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM mask)) + // match: (ShiftAllLeftMaskedUint32x16 x y mask) + // result: (VPSLLDMasked512 x y (VPMOVVec32x16ToM mask)) for { - a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSHRDDMasked512) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPSLLDMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x4(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightConcatMaskedInt32x4 [a] x y mask) - // result: (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM mask)) + // match: (ShiftAllLeftMaskedUint32x4 x y mask) + // result: (VPSLLDMasked128 x y (VPMOVVec32x4ToM mask)) for { - a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSHRDDMasked128) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPSLLDMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightConcatMaskedInt32x8 [a] x y mask) - // result: (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM mask)) + // match: (ShiftAllLeftMaskedUint32x8 x y mask) + // result: (VPSLLDMasked256 x y (VPMOVVec32x8ToM mask)) for { - a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSHRDDMasked256) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPSLLDMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x2(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightConcatMaskedInt64x2 [a] x y mask) - // result: (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) + // match: (ShiftAllLeftMaskedUint64x2 x y mask) + // result: (VPSLLQMasked128 x y (VPMOVVec64x2ToM mask)) for { - a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSHRDQMasked128) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPSLLQMasked128) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x4(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightConcatMaskedInt64x4 [a] x y mask) - // result: (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) + // match: (ShiftAllLeftMaskedUint64x4 x y mask) + // result: (VPSLLQMasked256 x y (VPMOVVec64x4ToM mask)) for { - a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSHRDQMasked256) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPSLLQMasked256) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightConcatMaskedInt64x8 [a] x y mask) - // result: (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) + // match: (ShiftAllLeftMaskedUint64x8 x y mask) + // result: (VPSLLQMasked512 x y (VPMOVVec64x8ToM mask)) for { - a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 mask := v_2 - v.reset(OpAMD64VPSHRDQMasked512) - v.AuxInt = uint8ToAuxInt(a) + v.reset(OpAMD64VPSLLQMasked512) v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v0.AddArg(mask) v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x16(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightConcatMaskedUint16x16 [a] x y mask) + // match: (ShiftAllRightConcatMaskedInt16x16 [a] x y mask) // result: (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) for { a := auxIntToUint8(v.AuxInt) @@ -53975,12 +53831,12 @@ func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x32(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightConcatMaskedUint16x32 [a] x y mask) + // match: (ShiftAllRightConcatMaskedInt16x32 [a] x y mask) // result: (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) for { a := auxIntToUint8(v.AuxInt) @@ -53995,12 +53851,12 @@ func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x32(v *Value) bool { return true } } -func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightConcatMaskedUint16x8 [a] x y mask) + // match: (ShiftAllRightConcatMaskedInt16x8 [a] x y mask) // result: (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) for { a := auxIntToUint8(v.AuxInt) @@ -54015,12 +53871,12 @@ func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x16(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightConcatMaskedUint32x16 [a] x y mask) + // match: (ShiftAllRightConcatMaskedInt32x16 [a] x y mask) // result: (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM mask)) for { a := auxIntToUint8(v.AuxInt) @@ -54035,12 +53891,12 @@ func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x16(v *Value) bool { return true } } -func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x4(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightConcatMaskedUint32x4 [a] x y mask) + // match: (ShiftAllRightConcatMaskedInt32x4 [a] x y mask) // result: (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM mask)) for { a := auxIntToUint8(v.AuxInt) @@ -54055,12 +53911,12 @@ func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x4(v *Value) bool { return true } } -func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightConcatMaskedUint32x8 [a] x y mask) + // match: (ShiftAllRightConcatMaskedInt32x8 [a] x y mask) // result: (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM mask)) for { a := auxIntToUint8(v.AuxInt) @@ -54075,12 +53931,12 @@ func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x2(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x2(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightConcatMaskedUint64x2 [a] x y mask) + // match: (ShiftAllRightConcatMaskedInt64x2 [a] x y mask) // result: (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) for { a := auxIntToUint8(v.AuxInt) @@ -54095,12 +53951,12 @@ func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x2(v *Value) bool { return true } } -func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x4(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x4(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightConcatMaskedUint64x4 [a] x y mask) + // match: (ShiftAllRightConcatMaskedInt64x4 [a] x y mask) // result: (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) for { a := auxIntToUint8(v.AuxInt) @@ -54115,12 +53971,12 @@ func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x4(v *Value) bool { return true } } -func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightConcatMaskedUint64x8 [a] x y mask) + // match: (ShiftAllRightConcatMaskedInt64x8 [a] x y mask) // result: (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) for { a := auxIntToUint8(v.AuxInt) @@ -54135,261 +53991,191 @@ func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpShiftAllRightInt16x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (ShiftAllRightInt16x16 x (MOVQconst [c])) - // result: (VPSRAW256const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAW256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - // match: (ShiftAllRightInt16x16 x y) - // result: (VPSRAW256 x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPSRAW256) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpShiftAllRightInt16x32(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (ShiftAllRightInt16x32 x (MOVQconst [c])) - // result: (VPSRAW512const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAW512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - // match: (ShiftAllRightInt16x32 x y) - // result: (VPSRAW512 x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPSRAW512) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpShiftAllRightInt16x8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllRightInt16x8 x (MOVQconst [c])) - // result: (VPSRAW128const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAW128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - // match: (ShiftAllRightInt16x8 x y) - // result: (VPSRAW128 x y) + b := v.Block + // match: (ShiftAllRightConcatMaskedUint16x16 [a] x y mask) + // result: (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) for { + a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPSRAW128) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSHRDWMasked256) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllRightInt32x16(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x32(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllRightInt32x16 x (MOVQconst [c])) - // result: (VPSRAD512const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAD512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - // match: (ShiftAllRightInt32x16 x y) - // result: (VPSRAD512 x y) + b := v.Block + // match: (ShiftAllRightConcatMaskedUint16x32 [a] x y mask) + // result: (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) for { + a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPSRAD512) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSHRDWMasked512) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllRightInt32x4(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint16x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllRightInt32x4 x (MOVQconst [c])) - // result: (VPSRAD128const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAD128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - // match: (ShiftAllRightInt32x4 x y) - // result: (VPSRAD128 x y) + b := v.Block + // match: (ShiftAllRightConcatMaskedUint16x8 [a] x y mask) + // result: (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) for { + a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPSRAD128) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSHRDWMasked128) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllRightInt32x8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllRightInt32x8 x (MOVQconst [c])) - // result: (VPSRAD256const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAD256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - // match: (ShiftAllRightInt32x8 x y) - // result: (VPSRAD256 x y) + b := v.Block + // match: (ShiftAllRightConcatMaskedUint32x16 [a] x y mask) + // result: (VPSHRDDMasked512 [a] x y (VPMOVVec32x16ToM mask)) for { + a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPSRAD256) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSHRDDMasked512) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllRightInt64x2(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x4(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllRightInt64x2 x (MOVQconst [c])) - // result: (VPSRAQ128const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAQ128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - // match: (ShiftAllRightInt64x2 x y) - // result: (VPSRAQ128 x y) + b := v.Block + // match: (ShiftAllRightConcatMaskedUint32x4 [a] x y mask) + // result: (VPSHRDDMasked128 [a] x y (VPMOVVec32x4ToM mask)) for { + a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPSRAQ128) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSHRDDMasked128) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllRightInt64x4(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint32x8(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllRightInt64x4 x (MOVQconst [c])) - // result: (VPSRAQ256const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAQ256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - // match: (ShiftAllRightInt64x4 x y) - // result: (VPSRAQ256 x y) + b := v.Block + // match: (ShiftAllRightConcatMaskedUint32x8 [a] x y mask) + // result: (VPSHRDDMasked256 [a] x y (VPMOVVec32x8ToM mask)) for { + a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPSRAQ256) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSHRDDMasked256) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllRightInt64x8(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x2(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] - // match: (ShiftAllRightInt64x8 x (MOVQconst [c])) - // result: (VPSRAQ512const [uint8(c)] x) + b := v.Block + // match: (ShiftAllRightConcatMaskedUint64x2 [a] x y mask) + // result: (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) for { + a := auxIntToUint8(v.AuxInt) x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRAQ512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHRDQMasked128) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } - // match: (ShiftAllRightInt64x8 x y) - // result: (VPSRAQ512 x y) +} +func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x4(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllRightConcatMaskedUint64x4 [a] x y mask) + // result: (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) for { + a := auxIntToUint8(v.AuxInt) x := v_0 y := v_1 - v.reset(OpAMD64VPSRAQ512) - v.AddArg2(x, y) + mask := v_2 + v.reset(OpAMD64VPSHRDQMasked256) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) return true } } -func rewriteValueAMD64_OpShiftAllRightMaskedInt16x16(v *Value) bool { +func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightMaskedInt16x16 x (MOVQconst [c]) mask) - // result: (VPSRAWMasked256const [uint8(c)] x (VPMOVVec16x16ToM mask)) + // match: (ShiftAllRightConcatMaskedUint64x8 [a] x y mask) + // result: (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) for { + a := auxIntToUint8(v.AuxInt) x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) + y := v_1 mask := v_2 - v.reset(OpAMD64VPSRAWMasked256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v.reset(OpAMD64VPSHRDQMasked512) + v.AuxInt = uint8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) v0.AddArg(mask) - v.AddArg2(x, v0) + v.AddArg3(x, y, v0) return true } +} +func rewriteValueAMD64_OpShiftAllRightMaskedInt16x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block // match: (ShiftAllRightMaskedInt16x16 x y mask) // result: (VPSRAWMasked256 x y (VPMOVVec16x16ToM mask)) for { @@ -54408,22 +54194,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightMaskedInt16x32 x (MOVQconst [c]) mask) - // result: (VPSRAWMasked512const [uint8(c)] x (VPMOVVec16x32ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSRAWMasked512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } // match: (ShiftAllRightMaskedInt16x32 x y mask) // result: (VPSRAWMasked512 x y (VPMOVVec16x32ToM mask)) for { @@ -54442,22 +54212,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt16x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightMaskedInt16x8 x (MOVQconst [c]) mask) - // result: (VPSRAWMasked128const [uint8(c)] x (VPMOVVec16x8ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSRAWMasked128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } // match: (ShiftAllRightMaskedInt16x8 x y mask) // result: (VPSRAWMasked128 x y (VPMOVVec16x8ToM mask)) for { @@ -54476,22 +54230,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightMaskedInt32x16 x (MOVQconst [c]) mask) - // result: (VPSRADMasked512const [uint8(c)] x (VPMOVVec32x16ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSRADMasked512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } // match: (ShiftAllRightMaskedInt32x16 x y mask) // result: (VPSRADMasked512 x y (VPMOVVec32x16ToM mask)) for { @@ -54510,22 +54248,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightMaskedInt32x4 x (MOVQconst [c]) mask) - // result: (VPSRADMasked128const [uint8(c)] x (VPMOVVec32x4ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSRADMasked128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } // match: (ShiftAllRightMaskedInt32x4 x y mask) // result: (VPSRADMasked128 x y (VPMOVVec32x4ToM mask)) for { @@ -54544,22 +54266,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightMaskedInt32x8 x (MOVQconst [c]) mask) - // result: (VPSRADMasked256const [uint8(c)] x (VPMOVVec32x8ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSRADMasked256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } // match: (ShiftAllRightMaskedInt32x8 x y mask) // result: (VPSRADMasked256 x y (VPMOVVec32x8ToM mask)) for { @@ -54578,22 +54284,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightMaskedInt64x2 x (MOVQconst [c]) mask) - // result: (VPSRAQMasked128const [uint8(c)] x (VPMOVVec64x2ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSRAQMasked128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } // match: (ShiftAllRightMaskedInt64x2 x y mask) // result: (VPSRAQMasked128 x y (VPMOVVec64x2ToM mask)) for { @@ -54612,22 +54302,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightMaskedInt64x4 x (MOVQconst [c]) mask) - // result: (VPSRAQMasked256const [uint8(c)] x (VPMOVVec64x4ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSRAQMasked256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } // match: (ShiftAllRightMaskedInt64x4 x y mask) // result: (VPSRAQMasked256 x y (VPMOVVec64x4ToM mask)) for { @@ -54646,22 +54320,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightMaskedInt64x8 x (MOVQconst [c]) mask) - // result: (VPSRAQMasked512const [uint8(c)] x (VPMOVVec64x8ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSRAQMasked512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } // match: (ShiftAllRightMaskedInt64x8 x y mask) // result: (VPSRAQMasked512 x y (VPMOVVec64x8ToM mask)) for { @@ -54680,22 +54338,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint16x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightMaskedUint16x16 x (MOVQconst [c]) mask) - // result: (VPSRLWMasked256const [uint8(c)] x (VPMOVVec16x16ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSRLWMasked256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } // match: (ShiftAllRightMaskedUint16x16 x y mask) // result: (VPSRLWMasked256 x y (VPMOVVec16x16ToM mask)) for { @@ -54714,22 +54356,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightMaskedUint16x32 x (MOVQconst [c]) mask) - // result: (VPSRLWMasked512const [uint8(c)] x (VPMOVVec16x32ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSRLWMasked512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } // match: (ShiftAllRightMaskedUint16x32 x y mask) // result: (VPSRLWMasked512 x y (VPMOVVec16x32ToM mask)) for { @@ -54748,22 +54374,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint16x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightMaskedUint16x8 x (MOVQconst [c]) mask) - // result: (VPSRLWMasked128const [uint8(c)] x (VPMOVVec16x8ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSRLWMasked128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } // match: (ShiftAllRightMaskedUint16x8 x y mask) // result: (VPSRLWMasked128 x y (VPMOVVec16x8ToM mask)) for { @@ -54782,22 +54392,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightMaskedUint32x16 x (MOVQconst [c]) mask) - // result: (VPSRLDMasked512const [uint8(c)] x (VPMOVVec32x16ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSRLDMasked512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } // match: (ShiftAllRightMaskedUint32x16 x y mask) // result: (VPSRLDMasked512 x y (VPMOVVec32x16ToM mask)) for { @@ -54816,22 +54410,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightMaskedUint32x4 x (MOVQconst [c]) mask) - // result: (VPSRLDMasked128const [uint8(c)] x (VPMOVVec32x4ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSRLDMasked128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } // match: (ShiftAllRightMaskedUint32x4 x y mask) // result: (VPSRLDMasked128 x y (VPMOVVec32x4ToM mask)) for { @@ -54850,22 +54428,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightMaskedUint32x8 x (MOVQconst [c]) mask) - // result: (VPSRLDMasked256const [uint8(c)] x (VPMOVVec32x8ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSRLDMasked256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } // match: (ShiftAllRightMaskedUint32x8 x y mask) // result: (VPSRLDMasked256 x y (VPMOVVec32x8ToM mask)) for { @@ -54884,22 +54446,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightMaskedUint64x2 x (MOVQconst [c]) mask) - // result: (VPSRLQMasked128const [uint8(c)] x (VPMOVVec64x2ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSRLQMasked128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } // match: (ShiftAllRightMaskedUint64x2 x y mask) // result: (VPSRLQMasked128 x y (VPMOVVec64x2ToM mask)) for { @@ -54918,22 +54464,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightMaskedUint64x4 x (MOVQconst [c]) mask) - // result: (VPSRLQMasked256const [uint8(c)] x (VPMOVVec64x4ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSRLQMasked256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } // match: (ShiftAllRightMaskedUint64x4 x y mask) // result: (VPSRLQMasked256 x y (VPMOVVec64x4ToM mask)) for { @@ -54952,22 +54482,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block - // match: (ShiftAllRightMaskedUint64x8 x (MOVQconst [c]) mask) - // result: (VPSRLQMasked512const [uint8(c)] x (VPMOVVec64x8ToM mask)) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - mask := v_2 - v.reset(OpAMD64VPSRLQMasked512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg2(x, v0) - return true - } // match: (ShiftAllRightMaskedUint64x8 x y mask) // result: (VPSRLQMasked512 x y (VPMOVVec64x8ToM mask)) for { @@ -54981,240 +54495,6 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint64x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpShiftAllRightUint16x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (ShiftAllRightUint16x16 x (MOVQconst [c])) - // result: (VPSRLW256const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRLW256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - // match: (ShiftAllRightUint16x16 x y) - // result: (VPSRLW256 x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPSRLW256) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpShiftAllRightUint16x32(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (ShiftAllRightUint16x32 x (MOVQconst [c])) - // result: (VPSRLW512const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRLW512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - // match: (ShiftAllRightUint16x32 x y) - // result: (VPSRLW512 x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPSRLW512) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpShiftAllRightUint16x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (ShiftAllRightUint16x8 x (MOVQconst [c])) - // result: (VPSRLW128const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRLW128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - // match: (ShiftAllRightUint16x8 x y) - // result: (VPSRLW128 x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPSRLW128) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpShiftAllRightUint32x16(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (ShiftAllRightUint32x16 x (MOVQconst [c])) - // result: (VPSRLD512const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRLD512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - // match: (ShiftAllRightUint32x16 x y) - // result: (VPSRLD512 x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPSRLD512) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpShiftAllRightUint32x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (ShiftAllRightUint32x4 x (MOVQconst [c])) - // result: (VPSRLD128const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRLD128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - // match: (ShiftAllRightUint32x4 x y) - // result: (VPSRLD128 x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPSRLD128) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpShiftAllRightUint32x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (ShiftAllRightUint32x8 x (MOVQconst [c])) - // result: (VPSRLD256const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRLD256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - // match: (ShiftAllRightUint32x8 x y) - // result: (VPSRLD256 x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPSRLD256) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpShiftAllRightUint64x2(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (ShiftAllRightUint64x2 x (MOVQconst [c])) - // result: (VPSRLQ128const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRLQ128const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - // match: (ShiftAllRightUint64x2 x y) - // result: (VPSRLQ128 x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPSRLQ128) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpShiftAllRightUint64x4(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (ShiftAllRightUint64x4 x (MOVQconst [c])) - // result: (VPSRLQ256const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRLQ256const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - // match: (ShiftAllRightUint64x4 x y) - // result: (VPSRLQ256 x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPSRLQ256) - v.AddArg2(x, y) - return true - } -} -func rewriteValueAMD64_OpShiftAllRightUint64x8(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - // match: (ShiftAllRightUint64x8 x (MOVQconst [c])) - // result: (VPSRLQ512const [uint8(c)] x) - for { - x := v_0 - if v_1.Op != OpAMD64MOVQconst { - break - } - c := auxIntToInt64(v_1.AuxInt) - v.reset(OpAMD64VPSRLQ512const) - v.AuxInt = uint8ToAuxInt(uint8(c)) - v.AddArg(x) - return true - } - // match: (ShiftAllRightUint64x8 x y) - // result: (VPSRLQ512 x y) - for { - x := v_0 - y := v_1 - v.reset(OpAMD64VPSRLQ512) - v.AddArg2(x, y) - return true - } -} func rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x16(v *Value) bool { v_3 := v.Args[3] v_2 := v.Args[2] -- 2.52.0