From: Junyang Shao Date: Tue, 5 Aug 2025 04:28:44 +0000 (+0000) Subject: [dev.simd] cmd/compile: add ShiftAll immediate variant X-Git-Tag: go1.26rc1~147^2~147 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=82d056ddd7;p=gostls13.git [dev.simd] cmd/compile: add ShiftAll immediate variant This CL is generated by CL 693136. Change-Id: Ifd2278d3f927efa008a14cc5e592e7c14b7120ff Reviewed-on: https://go-review.googlesource.com/c/go/+/693157 LUCI-TryBot-Result: Go LUCI Reviewed-by: Cherry Mui Reviewed-by: David Chase --- diff --git a/src/cmd/compile/internal/amd64/simdssa.go b/src/cmd/compile/internal/amd64/simdssa.go index 76ef42576d..bd6af6221d 100644 --- a/src/cmd/compile/internal/amd64/simdssa.go +++ b/src/cmd/compile/internal/amd64/simdssa.go @@ -689,7 +689,34 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPRORD512, ssa.OpAMD64VPRORQ128, ssa.OpAMD64VPRORQ256, - ssa.OpAMD64VPRORQ512: + ssa.OpAMD64VPRORQ512, + ssa.OpAMD64VPSLLW128const, + ssa.OpAMD64VPSLLW256const, + ssa.OpAMD64VPSLLW512const, + ssa.OpAMD64VPSLLD128const, + ssa.OpAMD64VPSLLD256const, + ssa.OpAMD64VPSLLD512const, + ssa.OpAMD64VPSLLQ128const, + ssa.OpAMD64VPSLLQ256const, + ssa.OpAMD64VPSLLQ512const, + ssa.OpAMD64VPSRLW128const, + ssa.OpAMD64VPSRLW256const, + ssa.OpAMD64VPSRLW512const, + ssa.OpAMD64VPSRLD128const, + ssa.OpAMD64VPSRLD256const, + ssa.OpAMD64VPSRLD512const, + ssa.OpAMD64VPSRLQ128const, + ssa.OpAMD64VPSRLQ256const, + ssa.OpAMD64VPSRLQ512const, + ssa.OpAMD64VPSRAW128const, + ssa.OpAMD64VPSRAW256const, + ssa.OpAMD64VPSRAW512const, + ssa.OpAMD64VPSRAD128const, + ssa.OpAMD64VPSRAD256const, + ssa.OpAMD64VPSRAD512const, + ssa.OpAMD64VPSRAQ128const, + ssa.OpAMD64VPSRAQ256const, + ssa.OpAMD64VPSRAQ512const: p = simdV11Imm8(s, v) case ssa.OpAMD64VRNDSCALEPSMasked128, @@ -715,7 +742,34 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPRORDMasked512, ssa.OpAMD64VPRORQMasked128, ssa.OpAMD64VPRORQMasked256, - ssa.OpAMD64VPRORQMasked512: + ssa.OpAMD64VPRORQMasked512, + ssa.OpAMD64VPSLLWMasked128const, + ssa.OpAMD64VPSLLWMasked256const, + ssa.OpAMD64VPSLLWMasked512const, + ssa.OpAMD64VPSLLDMasked128const, + ssa.OpAMD64VPSLLDMasked256const, + ssa.OpAMD64VPSLLDMasked512const, + ssa.OpAMD64VPSLLQMasked128const, + ssa.OpAMD64VPSLLQMasked256const, + ssa.OpAMD64VPSLLQMasked512const, + ssa.OpAMD64VPSRLWMasked128const, + ssa.OpAMD64VPSRLWMasked256const, + ssa.OpAMD64VPSRLWMasked512const, + ssa.OpAMD64VPSRLDMasked128const, + ssa.OpAMD64VPSRLDMasked256const, + ssa.OpAMD64VPSRLDMasked512const, + ssa.OpAMD64VPSRLQMasked128const, + ssa.OpAMD64VPSRLQMasked256const, + ssa.OpAMD64VPSRLQMasked512const, + ssa.OpAMD64VPSRAWMasked128const, + ssa.OpAMD64VPSRAWMasked256const, + ssa.OpAMD64VPSRAWMasked512const, + ssa.OpAMD64VPSRADMasked128const, + ssa.OpAMD64VPSRADMasked256const, + ssa.OpAMD64VPSRADMasked512const, + ssa.OpAMD64VPSRAQMasked128const, + ssa.OpAMD64VPSRAQMasked256const, + ssa.OpAMD64VPSRAQMasked512const: p = simdVkvImm8(s, v) case ssa.OpAMD64VDPPS128, @@ -1497,7 +1551,34 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPXORDMasked512, ssa.OpAMD64VPXORQMasked128, ssa.OpAMD64VPXORQMasked256, - ssa.OpAMD64VPXORQMasked512: + ssa.OpAMD64VPXORQMasked512, + ssa.OpAMD64VPSLLWMasked128const, + ssa.OpAMD64VPSLLWMasked256const, + ssa.OpAMD64VPSLLWMasked512const, + ssa.OpAMD64VPSLLDMasked128const, + ssa.OpAMD64VPSLLDMasked256const, + ssa.OpAMD64VPSLLDMasked512const, + ssa.OpAMD64VPSLLQMasked128const, + ssa.OpAMD64VPSLLQMasked256const, + ssa.OpAMD64VPSLLQMasked512const, + ssa.OpAMD64VPSRLWMasked128const, + ssa.OpAMD64VPSRLWMasked256const, + ssa.OpAMD64VPSRLWMasked512const, + ssa.OpAMD64VPSRLDMasked128const, + ssa.OpAMD64VPSRLDMasked256const, + ssa.OpAMD64VPSRLDMasked512const, + ssa.OpAMD64VPSRLQMasked128const, + ssa.OpAMD64VPSRLQMasked256const, + ssa.OpAMD64VPSRLQMasked512const, + ssa.OpAMD64VPSRAWMasked128const, + ssa.OpAMD64VPSRAWMasked256const, + ssa.OpAMD64VPSRAWMasked512const, + ssa.OpAMD64VPSRADMasked128const, + ssa.OpAMD64VPSRADMasked256const, + ssa.OpAMD64VPSRADMasked512const, + ssa.OpAMD64VPSRAQMasked128const, + ssa.OpAMD64VPSRAQMasked256const, + ssa.OpAMD64VPSRAQMasked512const: x86.ParseSuffix(p, "Z") } diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules index 060f220c7d..b8bd0d9b4c 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules @@ -1345,24 +1345,42 @@ (SetElemUint16x8 ...) => (VPINSRW128 ...) (SetElemUint32x4 ...) => (VPINSRD128 ...) (SetElemUint64x2 ...) => (VPINSRQ128 ...) -(ShiftAllLeftInt16x8 ...) => (VPSLLW128 ...) -(ShiftAllLeftInt16x16 ...) => (VPSLLW256 ...) -(ShiftAllLeftInt16x32 ...) => (VPSLLW512 ...) -(ShiftAllLeftInt32x4 ...) => (VPSLLD128 ...) -(ShiftAllLeftInt32x8 ...) => (VPSLLD256 ...) -(ShiftAllLeftInt32x16 ...) => (VPSLLD512 ...) -(ShiftAllLeftInt64x2 ...) => (VPSLLQ128 ...) -(ShiftAllLeftInt64x4 ...) => (VPSLLQ256 ...) -(ShiftAllLeftInt64x8 ...) => (VPSLLQ512 ...) -(ShiftAllLeftUint16x8 ...) => (VPSLLW128 ...) -(ShiftAllLeftUint16x16 ...) => (VPSLLW256 ...) -(ShiftAllLeftUint16x32 ...) => (VPSLLW512 ...) -(ShiftAllLeftUint32x4 ...) => (VPSLLD128 ...) -(ShiftAllLeftUint32x8 ...) => (VPSLLD256 ...) -(ShiftAllLeftUint32x16 ...) => (VPSLLD512 ...) -(ShiftAllLeftUint64x2 ...) => (VPSLLQ128 ...) -(ShiftAllLeftUint64x4 ...) => (VPSLLQ256 ...) -(ShiftAllLeftUint64x8 ...) => (VPSLLQ512 ...) +(ShiftAllLeftInt16x8 x (MOVQconst [c])) => (VPSLLW128const [int8(c)] x) +(ShiftAllLeftInt16x8 x y) => (VPSLLW128 x y) +(ShiftAllLeftInt16x16 x (MOVQconst [c])) => (VPSLLW256const [int8(c)] x) +(ShiftAllLeftInt16x16 x y) => (VPSLLW256 x y) +(ShiftAllLeftInt16x32 x (MOVQconst [c])) => (VPSLLW512const [int8(c)] x) +(ShiftAllLeftInt16x32 x y) => (VPSLLW512 x y) +(ShiftAllLeftInt32x4 x (MOVQconst [c])) => (VPSLLD128const [int8(c)] x) +(ShiftAllLeftInt32x4 x y) => (VPSLLD128 x y) +(ShiftAllLeftInt32x8 x (MOVQconst [c])) => (VPSLLD256const [int8(c)] x) +(ShiftAllLeftInt32x8 x y) => (VPSLLD256 x y) +(ShiftAllLeftInt32x16 x (MOVQconst [c])) => (VPSLLD512const [int8(c)] x) +(ShiftAllLeftInt32x16 x y) => (VPSLLD512 x y) +(ShiftAllLeftInt64x2 x (MOVQconst [c])) => (VPSLLQ128const [int8(c)] x) +(ShiftAllLeftInt64x2 x y) => (VPSLLQ128 x y) +(ShiftAllLeftInt64x4 x (MOVQconst [c])) => (VPSLLQ256const [int8(c)] x) +(ShiftAllLeftInt64x4 x y) => (VPSLLQ256 x y) +(ShiftAllLeftInt64x8 x (MOVQconst [c])) => (VPSLLQ512const [int8(c)] x) +(ShiftAllLeftInt64x8 x y) => (VPSLLQ512 x y) +(ShiftAllLeftUint16x8 x (MOVQconst [c])) => (VPSLLW128const [int8(c)] x) +(ShiftAllLeftUint16x8 x y) => (VPSLLW128 x y) +(ShiftAllLeftUint16x16 x (MOVQconst [c])) => (VPSLLW256const [int8(c)] x) +(ShiftAllLeftUint16x16 x y) => (VPSLLW256 x y) +(ShiftAllLeftUint16x32 x (MOVQconst [c])) => (VPSLLW512const [int8(c)] x) +(ShiftAllLeftUint16x32 x y) => (VPSLLW512 x y) +(ShiftAllLeftUint32x4 x (MOVQconst [c])) => (VPSLLD128const [int8(c)] x) +(ShiftAllLeftUint32x4 x y) => (VPSLLD128 x y) +(ShiftAllLeftUint32x8 x (MOVQconst [c])) => (VPSLLD256const [int8(c)] x) +(ShiftAllLeftUint32x8 x y) => (VPSLLD256 x y) +(ShiftAllLeftUint32x16 x (MOVQconst [c])) => (VPSLLD512const [int8(c)] x) +(ShiftAllLeftUint32x16 x y) => (VPSLLD512 x y) +(ShiftAllLeftUint64x2 x (MOVQconst [c])) => (VPSLLQ128const [int8(c)] x) +(ShiftAllLeftUint64x2 x y) => (VPSLLQ128 x y) +(ShiftAllLeftUint64x4 x (MOVQconst [c])) => (VPSLLQ256const [int8(c)] x) +(ShiftAllLeftUint64x4 x y) => (VPSLLQ256 x y) +(ShiftAllLeftUint64x8 x (MOVQconst [c])) => (VPSLLQ512const [int8(c)] x) +(ShiftAllLeftUint64x8 x y) => (VPSLLQ512 x y) (ShiftAllLeftConcatInt16x8 ...) => (VPSHLDW128 ...) (ShiftAllLeftConcatInt16x16 ...) => (VPSHLDW256 ...) (ShiftAllLeftConcatInt16x32 ...) => (VPSHLDW512 ...) @@ -1399,42 +1417,78 @@ (ShiftAllLeftConcatMaskedUint64x2 [a] x y mask) => (VPSHLDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) (ShiftAllLeftConcatMaskedUint64x4 [a] x y mask) => (VPSHLDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) (ShiftAllLeftConcatMaskedUint64x8 [a] x y mask) => (VPSHLDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) +(ShiftAllLeftMaskedInt16x8 x (MOVQconst [c]) mask) => (VPSLLWMasked128const [int8(c)] x (VPMOVVec16x8ToM mask)) (ShiftAllLeftMaskedInt16x8 x y mask) => (VPSLLWMasked128 x y (VPMOVVec16x8ToM mask)) +(ShiftAllLeftMaskedInt16x16 x (MOVQconst [c]) mask) => (VPSLLWMasked256const [int8(c)] x (VPMOVVec16x16ToM mask)) (ShiftAllLeftMaskedInt16x16 x y mask) => (VPSLLWMasked256 x y (VPMOVVec16x16ToM mask)) +(ShiftAllLeftMaskedInt16x32 x (MOVQconst [c]) mask) => (VPSLLWMasked512const [int8(c)] x (VPMOVVec16x32ToM mask)) (ShiftAllLeftMaskedInt16x32 x y mask) => (VPSLLWMasked512 x y (VPMOVVec16x32ToM mask)) +(ShiftAllLeftMaskedInt32x4 x (MOVQconst [c]) mask) => (VPSLLDMasked128const [int8(c)] x (VPMOVVec32x4ToM mask)) (ShiftAllLeftMaskedInt32x4 x y mask) => (VPSLLDMasked128 x y (VPMOVVec32x4ToM mask)) +(ShiftAllLeftMaskedInt32x8 x (MOVQconst [c]) mask) => (VPSLLDMasked256const [int8(c)] x (VPMOVVec32x8ToM mask)) (ShiftAllLeftMaskedInt32x8 x y mask) => (VPSLLDMasked256 x y (VPMOVVec32x8ToM mask)) +(ShiftAllLeftMaskedInt32x16 x (MOVQconst [c]) mask) => (VPSLLDMasked512const [int8(c)] x (VPMOVVec32x16ToM mask)) (ShiftAllLeftMaskedInt32x16 x y mask) => (VPSLLDMasked512 x y (VPMOVVec32x16ToM mask)) +(ShiftAllLeftMaskedInt64x2 x (MOVQconst [c]) mask) => (VPSLLQMasked128const [int8(c)] x (VPMOVVec64x2ToM mask)) (ShiftAllLeftMaskedInt64x2 x y mask) => (VPSLLQMasked128 x y (VPMOVVec64x2ToM mask)) +(ShiftAllLeftMaskedInt64x4 x (MOVQconst [c]) mask) => (VPSLLQMasked256const [int8(c)] x (VPMOVVec64x4ToM mask)) (ShiftAllLeftMaskedInt64x4 x y mask) => (VPSLLQMasked256 x y (VPMOVVec64x4ToM mask)) +(ShiftAllLeftMaskedInt64x8 x (MOVQconst [c]) mask) => (VPSLLQMasked512const [int8(c)] x (VPMOVVec64x8ToM mask)) (ShiftAllLeftMaskedInt64x8 x y mask) => (VPSLLQMasked512 x y (VPMOVVec64x8ToM mask)) +(ShiftAllLeftMaskedUint16x8 x (MOVQconst [c]) mask) => (VPSLLWMasked128const [int8(c)] x (VPMOVVec16x8ToM mask)) (ShiftAllLeftMaskedUint16x8 x y mask) => (VPSLLWMasked128 x y (VPMOVVec16x8ToM mask)) +(ShiftAllLeftMaskedUint16x16 x (MOVQconst [c]) mask) => (VPSLLWMasked256const [int8(c)] x (VPMOVVec16x16ToM mask)) (ShiftAllLeftMaskedUint16x16 x y mask) => (VPSLLWMasked256 x y (VPMOVVec16x16ToM mask)) +(ShiftAllLeftMaskedUint16x32 x (MOVQconst [c]) mask) => (VPSLLWMasked512const [int8(c)] x (VPMOVVec16x32ToM mask)) (ShiftAllLeftMaskedUint16x32 x y mask) => (VPSLLWMasked512 x y (VPMOVVec16x32ToM mask)) +(ShiftAllLeftMaskedUint32x4 x (MOVQconst [c]) mask) => (VPSLLDMasked128const [int8(c)] x (VPMOVVec32x4ToM mask)) (ShiftAllLeftMaskedUint32x4 x y mask) => (VPSLLDMasked128 x y (VPMOVVec32x4ToM mask)) +(ShiftAllLeftMaskedUint32x8 x (MOVQconst [c]) mask) => (VPSLLDMasked256const [int8(c)] x (VPMOVVec32x8ToM mask)) (ShiftAllLeftMaskedUint32x8 x y mask) => (VPSLLDMasked256 x y (VPMOVVec32x8ToM mask)) +(ShiftAllLeftMaskedUint32x16 x (MOVQconst [c]) mask) => (VPSLLDMasked512const [int8(c)] x (VPMOVVec32x16ToM mask)) (ShiftAllLeftMaskedUint32x16 x y mask) => (VPSLLDMasked512 x y (VPMOVVec32x16ToM mask)) +(ShiftAllLeftMaskedUint64x2 x (MOVQconst [c]) mask) => (VPSLLQMasked128const [int8(c)] x (VPMOVVec64x2ToM mask)) (ShiftAllLeftMaskedUint64x2 x y mask) => (VPSLLQMasked128 x y (VPMOVVec64x2ToM mask)) +(ShiftAllLeftMaskedUint64x4 x (MOVQconst [c]) mask) => (VPSLLQMasked256const [int8(c)] x (VPMOVVec64x4ToM mask)) (ShiftAllLeftMaskedUint64x4 x y mask) => (VPSLLQMasked256 x y (VPMOVVec64x4ToM mask)) +(ShiftAllLeftMaskedUint64x8 x (MOVQconst [c]) mask) => (VPSLLQMasked512const [int8(c)] x (VPMOVVec64x8ToM mask)) (ShiftAllLeftMaskedUint64x8 x y mask) => (VPSLLQMasked512 x y (VPMOVVec64x8ToM mask)) -(ShiftAllRightInt16x8 ...) => (VPSRAW128 ...) -(ShiftAllRightInt16x16 ...) => (VPSRAW256 ...) -(ShiftAllRightInt16x32 ...) => (VPSRAW512 ...) -(ShiftAllRightInt32x4 ...) => (VPSRAD128 ...) -(ShiftAllRightInt32x8 ...) => (VPSRAD256 ...) -(ShiftAllRightInt32x16 ...) => (VPSRAD512 ...) -(ShiftAllRightInt64x2 ...) => (VPSRAQ128 ...) -(ShiftAllRightInt64x4 ...) => (VPSRAQ256 ...) -(ShiftAllRightInt64x8 ...) => (VPSRAQ512 ...) -(ShiftAllRightUint16x8 ...) => (VPSRLW128 ...) -(ShiftAllRightUint16x16 ...) => (VPSRLW256 ...) -(ShiftAllRightUint16x32 ...) => (VPSRLW512 ...) -(ShiftAllRightUint32x4 ...) => (VPSRLD128 ...) -(ShiftAllRightUint32x8 ...) => (VPSRLD256 ...) -(ShiftAllRightUint32x16 ...) => (VPSRLD512 ...) -(ShiftAllRightUint64x2 ...) => (VPSRLQ128 ...) -(ShiftAllRightUint64x4 ...) => (VPSRLQ256 ...) -(ShiftAllRightUint64x8 ...) => (VPSRLQ512 ...) +(ShiftAllRightInt16x8 x (MOVQconst [c])) => (VPSRAW128const [int8(c)] x) +(ShiftAllRightInt16x8 x y) => (VPSRAW128 x y) +(ShiftAllRightInt16x16 x (MOVQconst [c])) => (VPSRAW256const [int8(c)] x) +(ShiftAllRightInt16x16 x y) => (VPSRAW256 x y) +(ShiftAllRightInt16x32 x (MOVQconst [c])) => (VPSRAW512const [int8(c)] x) +(ShiftAllRightInt16x32 x y) => (VPSRAW512 x y) +(ShiftAllRightInt32x4 x (MOVQconst [c])) => (VPSRAD128const [int8(c)] x) +(ShiftAllRightInt32x4 x y) => (VPSRAD128 x y) +(ShiftAllRightInt32x8 x (MOVQconst [c])) => (VPSRAD256const [int8(c)] x) +(ShiftAllRightInt32x8 x y) => (VPSRAD256 x y) +(ShiftAllRightInt32x16 x (MOVQconst [c])) => (VPSRAD512const [int8(c)] x) +(ShiftAllRightInt32x16 x y) => (VPSRAD512 x y) +(ShiftAllRightInt64x2 x (MOVQconst [c])) => (VPSRAQ128const [int8(c)] x) +(ShiftAllRightInt64x2 x y) => (VPSRAQ128 x y) +(ShiftAllRightInt64x4 x (MOVQconst [c])) => (VPSRAQ256const [int8(c)] x) +(ShiftAllRightInt64x4 x y) => (VPSRAQ256 x y) +(ShiftAllRightInt64x8 x (MOVQconst [c])) => (VPSRAQ512const [int8(c)] x) +(ShiftAllRightInt64x8 x y) => (VPSRAQ512 x y) +(ShiftAllRightUint16x8 x (MOVQconst [c])) => (VPSRLW128const [int8(c)] x) +(ShiftAllRightUint16x8 x y) => (VPSRLW128 x y) +(ShiftAllRightUint16x16 x (MOVQconst [c])) => (VPSRLW256const [int8(c)] x) +(ShiftAllRightUint16x16 x y) => (VPSRLW256 x y) +(ShiftAllRightUint16x32 x (MOVQconst [c])) => (VPSRLW512const [int8(c)] x) +(ShiftAllRightUint16x32 x y) => (VPSRLW512 x y) +(ShiftAllRightUint32x4 x (MOVQconst [c])) => (VPSRLD128const [int8(c)] x) +(ShiftAllRightUint32x4 x y) => (VPSRLD128 x y) +(ShiftAllRightUint32x8 x (MOVQconst [c])) => (VPSRLD256const [int8(c)] x) +(ShiftAllRightUint32x8 x y) => (VPSRLD256 x y) +(ShiftAllRightUint32x16 x (MOVQconst [c])) => (VPSRLD512const [int8(c)] x) +(ShiftAllRightUint32x16 x y) => (VPSRLD512 x y) +(ShiftAllRightUint64x2 x (MOVQconst [c])) => (VPSRLQ128const [int8(c)] x) +(ShiftAllRightUint64x2 x y) => (VPSRLQ128 x y) +(ShiftAllRightUint64x4 x (MOVQconst [c])) => (VPSRLQ256const [int8(c)] x) +(ShiftAllRightUint64x4 x y) => (VPSRLQ256 x y) +(ShiftAllRightUint64x8 x (MOVQconst [c])) => (VPSRLQ512const [int8(c)] x) +(ShiftAllRightUint64x8 x y) => (VPSRLQ512 x y) (ShiftAllRightConcatInt16x8 ...) => (VPSHRDW128 ...) (ShiftAllRightConcatInt16x16 ...) => (VPSHRDW256 ...) (ShiftAllRightConcatInt16x32 ...) => (VPSHRDW512 ...) @@ -1471,23 +1525,41 @@ (ShiftAllRightConcatMaskedUint64x2 [a] x y mask) => (VPSHRDQMasked128 [a] x y (VPMOVVec64x2ToM mask)) (ShiftAllRightConcatMaskedUint64x4 [a] x y mask) => (VPSHRDQMasked256 [a] x y (VPMOVVec64x4ToM mask)) (ShiftAllRightConcatMaskedUint64x8 [a] x y mask) => (VPSHRDQMasked512 [a] x y (VPMOVVec64x8ToM mask)) +(ShiftAllRightMaskedInt16x8 x (MOVQconst [c]) mask) => (VPSRAWMasked128const [int8(c)] x (VPMOVVec16x8ToM mask)) (ShiftAllRightMaskedInt16x8 x y mask) => (VPSRAWMasked128 x y (VPMOVVec16x8ToM mask)) +(ShiftAllRightMaskedInt16x16 x (MOVQconst [c]) mask) => (VPSRAWMasked256const [int8(c)] x (VPMOVVec16x16ToM mask)) (ShiftAllRightMaskedInt16x16 x y mask) => (VPSRAWMasked256 x y (VPMOVVec16x16ToM mask)) +(ShiftAllRightMaskedInt16x32 x (MOVQconst [c]) mask) => (VPSRAWMasked512const [int8(c)] x (VPMOVVec16x32ToM mask)) (ShiftAllRightMaskedInt16x32 x y mask) => (VPSRAWMasked512 x y (VPMOVVec16x32ToM mask)) +(ShiftAllRightMaskedInt32x4 x (MOVQconst [c]) mask) => (VPSRADMasked128const [int8(c)] x (VPMOVVec32x4ToM mask)) (ShiftAllRightMaskedInt32x4 x y mask) => (VPSRADMasked128 x y (VPMOVVec32x4ToM mask)) +(ShiftAllRightMaskedInt32x8 x (MOVQconst [c]) mask) => (VPSRADMasked256const [int8(c)] x (VPMOVVec32x8ToM mask)) (ShiftAllRightMaskedInt32x8 x y mask) => (VPSRADMasked256 x y (VPMOVVec32x8ToM mask)) +(ShiftAllRightMaskedInt32x16 x (MOVQconst [c]) mask) => (VPSRADMasked512const [int8(c)] x (VPMOVVec32x16ToM mask)) (ShiftAllRightMaskedInt32x16 x y mask) => (VPSRADMasked512 x y (VPMOVVec32x16ToM mask)) +(ShiftAllRightMaskedInt64x2 x (MOVQconst [c]) mask) => (VPSRAQMasked128const [int8(c)] x (VPMOVVec64x2ToM mask)) (ShiftAllRightMaskedInt64x2 x y mask) => (VPSRAQMasked128 x y (VPMOVVec64x2ToM mask)) +(ShiftAllRightMaskedInt64x4 x (MOVQconst [c]) mask) => (VPSRAQMasked256const [int8(c)] x (VPMOVVec64x4ToM mask)) (ShiftAllRightMaskedInt64x4 x y mask) => (VPSRAQMasked256 x y (VPMOVVec64x4ToM mask)) +(ShiftAllRightMaskedInt64x8 x (MOVQconst [c]) mask) => (VPSRAQMasked512const [int8(c)] x (VPMOVVec64x8ToM mask)) (ShiftAllRightMaskedInt64x8 x y mask) => (VPSRAQMasked512 x y (VPMOVVec64x8ToM mask)) +(ShiftAllRightMaskedUint16x8 x (MOVQconst [c]) mask) => (VPSRLWMasked128const [int8(c)] x (VPMOVVec16x8ToM mask)) (ShiftAllRightMaskedUint16x8 x y mask) => (VPSRLWMasked128 x y (VPMOVVec16x8ToM mask)) +(ShiftAllRightMaskedUint16x16 x (MOVQconst [c]) mask) => (VPSRLWMasked256const [int8(c)] x (VPMOVVec16x16ToM mask)) (ShiftAllRightMaskedUint16x16 x y mask) => (VPSRLWMasked256 x y (VPMOVVec16x16ToM mask)) +(ShiftAllRightMaskedUint16x32 x (MOVQconst [c]) mask) => (VPSRLWMasked512const [int8(c)] x (VPMOVVec16x32ToM mask)) (ShiftAllRightMaskedUint16x32 x y mask) => (VPSRLWMasked512 x y (VPMOVVec16x32ToM mask)) +(ShiftAllRightMaskedUint32x4 x (MOVQconst [c]) mask) => (VPSRLDMasked128const [int8(c)] x (VPMOVVec32x4ToM mask)) (ShiftAllRightMaskedUint32x4 x y mask) => (VPSRLDMasked128 x y (VPMOVVec32x4ToM mask)) +(ShiftAllRightMaskedUint32x8 x (MOVQconst [c]) mask) => (VPSRLDMasked256const [int8(c)] x (VPMOVVec32x8ToM mask)) (ShiftAllRightMaskedUint32x8 x y mask) => (VPSRLDMasked256 x y (VPMOVVec32x8ToM mask)) +(ShiftAllRightMaskedUint32x16 x (MOVQconst [c]) mask) => (VPSRLDMasked512const [int8(c)] x (VPMOVVec32x16ToM mask)) (ShiftAllRightMaskedUint32x16 x y mask) => (VPSRLDMasked512 x y (VPMOVVec32x16ToM mask)) +(ShiftAllRightMaskedUint64x2 x (MOVQconst [c]) mask) => (VPSRLQMasked128const [int8(c)] x (VPMOVVec64x2ToM mask)) (ShiftAllRightMaskedUint64x2 x y mask) => (VPSRLQMasked128 x y (VPMOVVec64x2ToM mask)) +(ShiftAllRightMaskedUint64x4 x (MOVQconst [c]) mask) => (VPSRLQMasked256const [int8(c)] x (VPMOVVec64x4ToM mask)) (ShiftAllRightMaskedUint64x4 x y mask) => (VPSRLQMasked256 x y (VPMOVVec64x4ToM mask)) +(ShiftAllRightMaskedUint64x8 x (MOVQconst [c]) mask) => (VPSRLQMasked512const [int8(c)] x (VPMOVVec64x8ToM mask)) (ShiftAllRightMaskedUint64x8 x y mask) => (VPSRLQMasked512 x y (VPMOVVec64x8ToM mask)) (ShiftLeftInt16x8 ...) => (VPSLLVW128 ...) (ShiftLeftInt16x16 ...) => (VPSLLVW256 ...) diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go index adb6dd968f..8b7a7791bc 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go @@ -1002,5 +1002,59 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPSHRDQMasked128", argLength: 3, reg: w2kw, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSHRDQMasked256", argLength: 3, reg: w2kw, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSHRDQMasked512", argLength: 3, reg: w2kw, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSLLW128const", argLength: 1, reg: v11, asm: "VPSLLW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSLLW256const", argLength: 1, reg: v11, asm: "VPSLLW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSLLW512const", argLength: 1, reg: w11, asm: "VPSLLW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSLLD128const", argLength: 1, reg: v11, asm: "VPSLLD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSLLD256const", argLength: 1, reg: v11, asm: "VPSLLD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSLLD512const", argLength: 1, reg: w11, asm: "VPSLLD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSLLQ128const", argLength: 1, reg: v11, asm: "VPSLLQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSLLQ256const", argLength: 1, reg: v11, asm: "VPSLLQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSLLQ512const", argLength: 1, reg: w11, asm: "VPSLLQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSLLWMasked128const", argLength: 2, reg: wkw, asm: "VPSLLW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSLLWMasked256const", argLength: 2, reg: wkw, asm: "VPSLLW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSLLWMasked512const", argLength: 2, reg: wkw, asm: "VPSLLW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSLLDMasked128const", argLength: 2, reg: wkw, asm: "VPSLLD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSLLDMasked256const", argLength: 2, reg: wkw, asm: "VPSLLD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSLLDMasked512const", argLength: 2, reg: wkw, asm: "VPSLLD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSLLQMasked128const", argLength: 2, reg: wkw, asm: "VPSLLQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSLLQMasked256const", argLength: 2, reg: wkw, asm: "VPSLLQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSLLQMasked512const", argLength: 2, reg: wkw, asm: "VPSLLQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRLW128const", argLength: 1, reg: v11, asm: "VPSRLW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRLW256const", argLength: 1, reg: v11, asm: "VPSRLW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRLW512const", argLength: 1, reg: w11, asm: "VPSRLW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRLD128const", argLength: 1, reg: v11, asm: "VPSRLD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRLD256const", argLength: 1, reg: v11, asm: "VPSRLD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRLD512const", argLength: 1, reg: w11, asm: "VPSRLD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRLQ128const", argLength: 1, reg: v11, asm: "VPSRLQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRLQ256const", argLength: 1, reg: v11, asm: "VPSRLQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRLQ512const", argLength: 1, reg: w11, asm: "VPSRLQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRAW128const", argLength: 1, reg: v11, asm: "VPSRAW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRAW256const", argLength: 1, reg: v11, asm: "VPSRAW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRAW512const", argLength: 1, reg: w11, asm: "VPSRAW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRAD128const", argLength: 1, reg: v11, asm: "VPSRAD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRAD256const", argLength: 1, reg: v11, asm: "VPSRAD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRAD512const", argLength: 1, reg: w11, asm: "VPSRAD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRAQ128const", argLength: 1, reg: w11, asm: "VPSRAQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRAQ256const", argLength: 1, reg: w11, asm: "VPSRAQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRAQ512const", argLength: 1, reg: w11, asm: "VPSRAQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRLWMasked128const", argLength: 2, reg: wkw, asm: "VPSRLW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRLWMasked256const", argLength: 2, reg: wkw, asm: "VPSRLW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRLWMasked512const", argLength: 2, reg: wkw, asm: "VPSRLW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRLDMasked128const", argLength: 2, reg: wkw, asm: "VPSRLD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRLDMasked256const", argLength: 2, reg: wkw, asm: "VPSRLD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRLDMasked512const", argLength: 2, reg: wkw, asm: "VPSRLD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRLQMasked128const", argLength: 2, reg: wkw, asm: "VPSRLQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRLQMasked256const", argLength: 2, reg: wkw, asm: "VPSRLQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRLQMasked512const", argLength: 2, reg: wkw, asm: "VPSRLQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRAWMasked128const", argLength: 2, reg: wkw, asm: "VPSRAW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRAWMasked256const", argLength: 2, reg: wkw, asm: "VPSRAW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRAWMasked512const", argLength: 2, reg: wkw, asm: "VPSRAW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRADMasked128const", argLength: 2, reg: wkw, asm: "VPSRAD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRADMasked256const", argLength: 2, reg: wkw, asm: "VPSRAD", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRADMasked512const", argLength: 2, reg: wkw, asm: "VPSRAD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VPSRAQMasked128const", argLength: 2, reg: wkw, asm: "VPSRAQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VPSRAQMasked256const", argLength: 2, reg: wkw, asm: "VPSRAQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VPSRAQMasked512const", argLength: 2, reg: wkw, asm: "VPSRAQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, } } diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index a69612f28a..15fcabbb8d 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -2221,6 +2221,60 @@ const ( OpAMD64VPSHRDQMasked128 OpAMD64VPSHRDQMasked256 OpAMD64VPSHRDQMasked512 + OpAMD64VPSLLW128const + OpAMD64VPSLLW256const + OpAMD64VPSLLW512const + OpAMD64VPSLLD128const + OpAMD64VPSLLD256const + OpAMD64VPSLLD512const + OpAMD64VPSLLQ128const + OpAMD64VPSLLQ256const + OpAMD64VPSLLQ512const + OpAMD64VPSLLWMasked128const + OpAMD64VPSLLWMasked256const + OpAMD64VPSLLWMasked512const + OpAMD64VPSLLDMasked128const + OpAMD64VPSLLDMasked256const + OpAMD64VPSLLDMasked512const + OpAMD64VPSLLQMasked128const + OpAMD64VPSLLQMasked256const + OpAMD64VPSLLQMasked512const + OpAMD64VPSRLW128const + OpAMD64VPSRLW256const + OpAMD64VPSRLW512const + OpAMD64VPSRLD128const + OpAMD64VPSRLD256const + OpAMD64VPSRLD512const + OpAMD64VPSRLQ128const + OpAMD64VPSRLQ256const + OpAMD64VPSRLQ512const + OpAMD64VPSRAW128const + OpAMD64VPSRAW256const + OpAMD64VPSRAW512const + OpAMD64VPSRAD128const + OpAMD64VPSRAD256const + OpAMD64VPSRAD512const + OpAMD64VPSRAQ128const + OpAMD64VPSRAQ256const + OpAMD64VPSRAQ512const + OpAMD64VPSRLWMasked128const + OpAMD64VPSRLWMasked256const + OpAMD64VPSRLWMasked512const + OpAMD64VPSRLDMasked128const + OpAMD64VPSRLDMasked256const + OpAMD64VPSRLDMasked512const + OpAMD64VPSRLQMasked128const + OpAMD64VPSRLQMasked256const + OpAMD64VPSRLQMasked512const + OpAMD64VPSRAWMasked128const + OpAMD64VPSRAWMasked256const + OpAMD64VPSRAWMasked512const + OpAMD64VPSRADMasked128const + OpAMD64VPSRADMasked256const + OpAMD64VPSRADMasked512const + OpAMD64VPSRAQMasked128const + OpAMD64VPSRAQMasked256const + OpAMD64VPSRAQMasked512const OpARMADD OpARMADDconst @@ -34317,6 +34371,789 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPSLLW128const", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPSLLW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSLLW256const", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPSLLW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSLLW512const", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPSLLW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSLLD128const", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPSLLD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSLLD256const", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPSLLD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSLLD512const", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPSLLD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSLLQ128const", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPSLLQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSLLQ256const", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPSLLQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSLLQ512const", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPSLLQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSLLWMasked128const", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSLLW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSLLWMasked256const", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSLLW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSLLWMasked512const", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSLLW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSLLDMasked128const", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSLLD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSLLDMasked256const", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSLLD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSLLDMasked512const", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSLLD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSLLQMasked128const", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSLLQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSLLQMasked256const", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSLLQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSLLQMasked512const", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSLLQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLW128const", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPSRLW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLW256const", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPSRLW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLW512const", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPSRLW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSRLD128const", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPSRLD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLD256const", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPSRLD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLD512const", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPSRLD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSRLQ128const", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPSRLQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLQ256const", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPSRLQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLQ512const", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPSRLQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSRAW128const", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPSRAW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAW256const", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPSRAW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAW512const", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPSRAW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSRAD128const", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPSRAD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAD256const", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPSRAD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAD512const", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPSRAD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSRAQ128const", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPSRAQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSRAQ256const", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPSRAQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSRAQ512const", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPSRAQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VPSRLWMasked128const", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSRLW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLWMasked256const", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSRLW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLWMasked512const", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSRLW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLDMasked128const", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSRLD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLDMasked256const", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSRLD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLDMasked512const", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSRLD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLQMasked128const", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSRLQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLQMasked256const", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSRLQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRLQMasked512const", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSRLQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAWMasked128const", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSRAW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAWMasked256const", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSRAW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAWMasked512const", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSRAW, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRADMasked128const", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSRAD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRADMasked256const", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSRAD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRADMasked512const", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSRAD, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAQMasked128const", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSRAQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAQMasked256const", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSRAQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VPSRAQMasked512const", + auxType: auxInt8, + argLen: 2, + asm: x86.AVPSRAQ, + reg: regInfo{ + inputs: []inputInfo{ + {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, { name: "ADD", diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index f0b25d3c5d..2e564b0c30 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -4451,32 +4451,23 @@ func rewriteValueAMD64(v *Value) bool { v.Op = OpAMD64VPSHLDQ512 return true case OpShiftAllLeftInt16x16: - v.Op = OpAMD64VPSLLW256 - return true + return rewriteValueAMD64_OpShiftAllLeftInt16x16(v) case OpShiftAllLeftInt16x32: - v.Op = OpAMD64VPSLLW512 - return true + return rewriteValueAMD64_OpShiftAllLeftInt16x32(v) case OpShiftAllLeftInt16x8: - v.Op = OpAMD64VPSLLW128 - return true + return rewriteValueAMD64_OpShiftAllLeftInt16x8(v) case OpShiftAllLeftInt32x16: - v.Op = OpAMD64VPSLLD512 - return true + return rewriteValueAMD64_OpShiftAllLeftInt32x16(v) case OpShiftAllLeftInt32x4: - v.Op = OpAMD64VPSLLD128 - return true + return rewriteValueAMD64_OpShiftAllLeftInt32x4(v) case OpShiftAllLeftInt32x8: - v.Op = OpAMD64VPSLLD256 - return true + return rewriteValueAMD64_OpShiftAllLeftInt32x8(v) case OpShiftAllLeftInt64x2: - v.Op = OpAMD64VPSLLQ128 - return true + return rewriteValueAMD64_OpShiftAllLeftInt64x2(v) case OpShiftAllLeftInt64x4: - v.Op = OpAMD64VPSLLQ256 - return true + return rewriteValueAMD64_OpShiftAllLeftInt64x4(v) case OpShiftAllLeftInt64x8: - v.Op = OpAMD64VPSLLQ512 - return true + return rewriteValueAMD64_OpShiftAllLeftInt64x8(v) case OpShiftAllLeftMaskedInt16x16: return rewriteValueAMD64_OpShiftAllLeftMaskedInt16x16(v) case OpShiftAllLeftMaskedInt16x32: @@ -4514,32 +4505,23 @@ func rewriteValueAMD64(v *Value) bool { case OpShiftAllLeftMaskedUint64x8: return rewriteValueAMD64_OpShiftAllLeftMaskedUint64x8(v) case OpShiftAllLeftUint16x16: - v.Op = OpAMD64VPSLLW256 - return true + return rewriteValueAMD64_OpShiftAllLeftUint16x16(v) case OpShiftAllLeftUint16x32: - v.Op = OpAMD64VPSLLW512 - return true + return rewriteValueAMD64_OpShiftAllLeftUint16x32(v) case OpShiftAllLeftUint16x8: - v.Op = OpAMD64VPSLLW128 - return true + return rewriteValueAMD64_OpShiftAllLeftUint16x8(v) case OpShiftAllLeftUint32x16: - v.Op = OpAMD64VPSLLD512 - return true + return rewriteValueAMD64_OpShiftAllLeftUint32x16(v) case OpShiftAllLeftUint32x4: - v.Op = OpAMD64VPSLLD128 - return true + return rewriteValueAMD64_OpShiftAllLeftUint32x4(v) case OpShiftAllLeftUint32x8: - v.Op = OpAMD64VPSLLD256 - return true + return rewriteValueAMD64_OpShiftAllLeftUint32x8(v) case OpShiftAllLeftUint64x2: - v.Op = OpAMD64VPSLLQ128 - return true + return rewriteValueAMD64_OpShiftAllLeftUint64x2(v) case OpShiftAllLeftUint64x4: - v.Op = OpAMD64VPSLLQ256 - return true + return rewriteValueAMD64_OpShiftAllLeftUint64x4(v) case OpShiftAllLeftUint64x8: - v.Op = OpAMD64VPSLLQ512 - return true + return rewriteValueAMD64_OpShiftAllLeftUint64x8(v) case OpShiftAllRightConcatInt16x16: v.Op = OpAMD64VPSHRDW256 return true @@ -4631,32 +4613,23 @@ func rewriteValueAMD64(v *Value) bool { v.Op = OpAMD64VPSHRDQ512 return true case OpShiftAllRightInt16x16: - v.Op = OpAMD64VPSRAW256 - return true + return rewriteValueAMD64_OpShiftAllRightInt16x16(v) case OpShiftAllRightInt16x32: - v.Op = OpAMD64VPSRAW512 - return true + return rewriteValueAMD64_OpShiftAllRightInt16x32(v) case OpShiftAllRightInt16x8: - v.Op = OpAMD64VPSRAW128 - return true + return rewriteValueAMD64_OpShiftAllRightInt16x8(v) case OpShiftAllRightInt32x16: - v.Op = OpAMD64VPSRAD512 - return true + return rewriteValueAMD64_OpShiftAllRightInt32x16(v) case OpShiftAllRightInt32x4: - v.Op = OpAMD64VPSRAD128 - return true + return rewriteValueAMD64_OpShiftAllRightInt32x4(v) case OpShiftAllRightInt32x8: - v.Op = OpAMD64VPSRAD256 - return true + return rewriteValueAMD64_OpShiftAllRightInt32x8(v) case OpShiftAllRightInt64x2: - v.Op = OpAMD64VPSRAQ128 - return true + return rewriteValueAMD64_OpShiftAllRightInt64x2(v) case OpShiftAllRightInt64x4: - v.Op = OpAMD64VPSRAQ256 - return true + return rewriteValueAMD64_OpShiftAllRightInt64x4(v) case OpShiftAllRightInt64x8: - v.Op = OpAMD64VPSRAQ512 - return true + return rewriteValueAMD64_OpShiftAllRightInt64x8(v) case OpShiftAllRightMaskedInt16x16: return rewriteValueAMD64_OpShiftAllRightMaskedInt16x16(v) case OpShiftAllRightMaskedInt16x32: @@ -4694,32 +4667,23 @@ func rewriteValueAMD64(v *Value) bool { case OpShiftAllRightMaskedUint64x8: return rewriteValueAMD64_OpShiftAllRightMaskedUint64x8(v) case OpShiftAllRightUint16x16: - v.Op = OpAMD64VPSRLW256 - return true + return rewriteValueAMD64_OpShiftAllRightUint16x16(v) case OpShiftAllRightUint16x32: - v.Op = OpAMD64VPSRLW512 - return true + return rewriteValueAMD64_OpShiftAllRightUint16x32(v) case OpShiftAllRightUint16x8: - v.Op = OpAMD64VPSRLW128 - return true + return rewriteValueAMD64_OpShiftAllRightUint16x8(v) case OpShiftAllRightUint32x16: - v.Op = OpAMD64VPSRLD512 - return true + return rewriteValueAMD64_OpShiftAllRightUint32x16(v) case OpShiftAllRightUint32x4: - v.Op = OpAMD64VPSRLD128 - return true + return rewriteValueAMD64_OpShiftAllRightUint32x4(v) case OpShiftAllRightUint32x8: - v.Op = OpAMD64VPSRLD256 - return true + return rewriteValueAMD64_OpShiftAllRightUint32x8(v) case OpShiftAllRightUint64x2: - v.Op = OpAMD64VPSRLQ128 - return true + return rewriteValueAMD64_OpShiftAllRightUint64x2(v) case OpShiftAllRightUint64x4: - v.Op = OpAMD64VPSRLQ256 - return true + return rewriteValueAMD64_OpShiftAllRightUint64x4(v) case OpShiftAllRightUint64x8: - v.Op = OpAMD64VPSRLQ512 - return true + return rewriteValueAMD64_OpShiftAllRightUint64x8(v) case OpShiftLeftConcatInt16x16: v.Op = OpAMD64VPSHLDVW256 return true @@ -50791,11 +50755,261 @@ func rewriteValueAMD64_OpShiftAllLeftConcatMaskedUint64x8(v *Value) bool { return true } } +func rewriteValueAMD64_OpShiftAllLeftInt16x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftInt16x16 x (MOVQconst [c])) + // result: (VPSLLW256const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLW256const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllLeftInt16x16 x y) + // result: (VPSLLW256 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSLLW256) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftInt16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftInt16x32 x (MOVQconst [c])) + // result: (VPSLLW512const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLW512const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllLeftInt16x32 x y) + // result: (VPSLLW512 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSLLW512) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftInt16x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftInt16x8 x (MOVQconst [c])) + // result: (VPSLLW128const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLW128const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllLeftInt16x8 x y) + // result: (VPSLLW128 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSLLW128) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftInt32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftInt32x16 x (MOVQconst [c])) + // result: (VPSLLD512const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLD512const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllLeftInt32x16 x y) + // result: (VPSLLD512 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSLLD512) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftInt32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftInt32x4 x (MOVQconst [c])) + // result: (VPSLLD128const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLD128const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllLeftInt32x4 x y) + // result: (VPSLLD128 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSLLD128) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftInt32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftInt32x8 x (MOVQconst [c])) + // result: (VPSLLD256const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLD256const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllLeftInt32x8 x y) + // result: (VPSLLD256 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSLLD256) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftInt64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftInt64x2 x (MOVQconst [c])) + // result: (VPSLLQ128const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLQ128const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllLeftInt64x2 x y) + // result: (VPSLLQ128 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSLLQ128) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftInt64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftInt64x4 x (MOVQconst [c])) + // result: (VPSLLQ256const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLQ256const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllLeftInt64x4 x y) + // result: (VPSLLQ256 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSLLQ256) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftInt64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftInt64x8 x (MOVQconst [c])) + // result: (VPSLLQ512const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLQ512const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllLeftInt64x8 x y) + // result: (VPSLLQ512 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSLLQ512) + v.AddArg2(x, y) + return true + } +} func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllLeftMaskedInt16x16 x (MOVQconst [c]) mask) + // result: (VPSLLWMasked256const [int8(c)] x (VPMOVVec16x16ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLWMasked256const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllLeftMaskedInt16x16 x y mask) // result: (VPSLLWMasked256 x y (VPMOVVec16x16ToM mask)) for { @@ -50814,6 +51028,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllLeftMaskedInt16x32 x (MOVQconst [c]) mask) + // result: (VPSLLWMasked512const [int8(c)] x (VPMOVVec16x32ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLWMasked512const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllLeftMaskedInt16x32 x y mask) // result: (VPSLLWMasked512 x y (VPMOVVec16x32ToM mask)) for { @@ -50832,6 +51062,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedInt16x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllLeftMaskedInt16x8 x (MOVQconst [c]) mask) + // result: (VPSLLWMasked128const [int8(c)] x (VPMOVVec16x8ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLWMasked128const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllLeftMaskedInt16x8 x y mask) // result: (VPSLLWMasked128 x y (VPMOVVec16x8ToM mask)) for { @@ -50850,6 +51096,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllLeftMaskedInt32x16 x (MOVQconst [c]) mask) + // result: (VPSLLDMasked512const [int8(c)] x (VPMOVVec32x16ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLDMasked512const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllLeftMaskedInt32x16 x y mask) // result: (VPSLLDMasked512 x y (VPMOVVec32x16ToM mask)) for { @@ -50868,6 +51130,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllLeftMaskedInt32x4 x (MOVQconst [c]) mask) + // result: (VPSLLDMasked128const [int8(c)] x (VPMOVVec32x4ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLDMasked128const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllLeftMaskedInt32x4 x y mask) // result: (VPSLLDMasked128 x y (VPMOVVec32x4ToM mask)) for { @@ -50886,6 +51164,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedInt32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllLeftMaskedInt32x8 x (MOVQconst [c]) mask) + // result: (VPSLLDMasked256const [int8(c)] x (VPMOVVec32x8ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLDMasked256const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllLeftMaskedInt32x8 x y mask) // result: (VPSLLDMasked256 x y (VPMOVVec32x8ToM mask)) for { @@ -50904,6 +51198,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllLeftMaskedInt64x2 x (MOVQconst [c]) mask) + // result: (VPSLLQMasked128const [int8(c)] x (VPMOVVec64x2ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLQMasked128const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllLeftMaskedInt64x2 x y mask) // result: (VPSLLQMasked128 x y (VPMOVVec64x2ToM mask)) for { @@ -50922,6 +51232,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllLeftMaskedInt64x4 x (MOVQconst [c]) mask) + // result: (VPSLLQMasked256const [int8(c)] x (VPMOVVec64x4ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLQMasked256const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllLeftMaskedInt64x4 x y mask) // result: (VPSLLQMasked256 x y (VPMOVVec64x4ToM mask)) for { @@ -50940,6 +51266,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedInt64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllLeftMaskedInt64x8 x (MOVQconst [c]) mask) + // result: (VPSLLQMasked512const [int8(c)] x (VPMOVVec64x8ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLQMasked512const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllLeftMaskedInt64x8 x y mask) // result: (VPSLLQMasked512 x y (VPMOVVec64x8ToM mask)) for { @@ -50958,6 +51300,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllLeftMaskedUint16x16 x (MOVQconst [c]) mask) + // result: (VPSLLWMasked256const [int8(c)] x (VPMOVVec16x16ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLWMasked256const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllLeftMaskedUint16x16 x y mask) // result: (VPSLLWMasked256 x y (VPMOVVec16x16ToM mask)) for { @@ -50976,6 +51334,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllLeftMaskedUint16x32 x (MOVQconst [c]) mask) + // result: (VPSLLWMasked512const [int8(c)] x (VPMOVVec16x32ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLWMasked512const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllLeftMaskedUint16x32 x y mask) // result: (VPSLLWMasked512 x y (VPMOVVec16x32ToM mask)) for { @@ -50994,6 +51368,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedUint16x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllLeftMaskedUint16x8 x (MOVQconst [c]) mask) + // result: (VPSLLWMasked128const [int8(c)] x (VPMOVVec16x8ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLWMasked128const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllLeftMaskedUint16x8 x y mask) // result: (VPSLLWMasked128 x y (VPMOVVec16x8ToM mask)) for { @@ -51012,6 +51402,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllLeftMaskedUint32x16 x (MOVQconst [c]) mask) + // result: (VPSLLDMasked512const [int8(c)] x (VPMOVVec32x16ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLDMasked512const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllLeftMaskedUint32x16 x y mask) // result: (VPSLLDMasked512 x y (VPMOVVec32x16ToM mask)) for { @@ -51030,6 +51436,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllLeftMaskedUint32x4 x (MOVQconst [c]) mask) + // result: (VPSLLDMasked128const [int8(c)] x (VPMOVVec32x4ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLDMasked128const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllLeftMaskedUint32x4 x y mask) // result: (VPSLLDMasked128 x y (VPMOVVec32x4ToM mask)) for { @@ -51048,6 +51470,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedUint32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllLeftMaskedUint32x8 x (MOVQconst [c]) mask) + // result: (VPSLLDMasked256const [int8(c)] x (VPMOVVec32x8ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLDMasked256const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllLeftMaskedUint32x8 x y mask) // result: (VPSLLDMasked256 x y (VPMOVVec32x8ToM mask)) for { @@ -51066,6 +51504,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllLeftMaskedUint64x2 x (MOVQconst [c]) mask) + // result: (VPSLLQMasked128const [int8(c)] x (VPMOVVec64x2ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLQMasked128const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllLeftMaskedUint64x2 x y mask) // result: (VPSLLQMasked128 x y (VPMOVVec64x2ToM mask)) for { @@ -51084,6 +51538,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllLeftMaskedUint64x4 x (MOVQconst [c]) mask) + // result: (VPSLLQMasked256const [int8(c)] x (VPMOVVec64x4ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLQMasked256const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllLeftMaskedUint64x4 x y mask) // result: (VPSLLQMasked256 x y (VPMOVVec64x4ToM mask)) for { @@ -51102,6 +51572,22 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllLeftMaskedUint64x8 x (MOVQconst [c]) mask) + // result: (VPSLLQMasked512const [int8(c)] x (VPMOVVec64x8ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSLLQMasked512const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllLeftMaskedUint64x8 x y mask) // result: (VPSLLQMasked512 x y (VPMOVVec64x8ToM mask)) for { @@ -51115,68 +51601,302 @@ func rewriteValueAMD64_OpShiftAllLeftMaskedUint64x8(v *Value) bool { return true } } -func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpShiftAllLeftUint16x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ShiftAllRightConcatMaskedInt16x16 [a] x y mask) - // result: (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) + // match: (ShiftAllLeftUint16x16 x (MOVQconst [c])) + // result: (VPSLLW256const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLW256const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllLeftUint16x16 x y) + // result: (VPSLLW256 x y) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHRDWMasked256) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPSLLW256) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x32(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpShiftAllLeftUint16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ShiftAllRightConcatMaskedInt16x32 [a] x y mask) - // result: (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) + // match: (ShiftAllLeftUint16x32 x (MOVQconst [c])) + // result: (VPSLLW512const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLW512const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllLeftUint16x32 x y) + // result: (VPSLLW512 x y) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHRDWMasked512) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPSLLW512) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x8(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpShiftAllLeftUint16x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] - b := v.Block - // match: (ShiftAllRightConcatMaskedInt16x8 [a] x y mask) - // result: (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) + // match: (ShiftAllLeftUint16x8 x (MOVQconst [c])) + // result: (VPSLLW128const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLW128const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllLeftUint16x8 x y) + // result: (VPSLLW128 x y) for { - a := auxIntToInt8(v.AuxInt) x := v_0 y := v_1 - mask := v_2 - v.reset(OpAMD64VPSHRDWMasked128) - v.AuxInt = int8ToAuxInt(a) - v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) - v0.AddArg(mask) - v.AddArg3(x, y, v0) + v.reset(OpAMD64VPSLLW128) + v.AddArg2(x, y) return true } } -func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x16(v *Value) bool { - v_2 := v.Args[2] +func rewriteValueAMD64_OpShiftAllLeftUint32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftUint32x16 x (MOVQconst [c])) + // result: (VPSLLD512const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLD512const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllLeftUint32x16 x y) + // result: (VPSLLD512 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSLLD512) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftUint32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftUint32x4 x (MOVQconst [c])) + // result: (VPSLLD128const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLD128const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllLeftUint32x4 x y) + // result: (VPSLLD128 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSLLD128) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftUint32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftUint32x8 x (MOVQconst [c])) + // result: (VPSLLD256const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLD256const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllLeftUint32x8 x y) + // result: (VPSLLD256 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSLLD256) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftUint64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftUint64x2 x (MOVQconst [c])) + // result: (VPSLLQ128const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLQ128const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllLeftUint64x2 x y) + // result: (VPSLLQ128 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSLLQ128) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftUint64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftUint64x4 x (MOVQconst [c])) + // result: (VPSLLQ256const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLQ256const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllLeftUint64x4 x y) + // result: (VPSLLQ256 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSLLQ256) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllLeftUint64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllLeftUint64x8 x (MOVQconst [c])) + // result: (VPSLLQ512const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSLLQ512const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllLeftUint64x8 x y) + // result: (VPSLLQ512 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSLLQ512) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x16(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllRightConcatMaskedInt16x16 [a] x y mask) + // result: (VPSHRDWMasked256 [a] x y (VPMOVVec16x16ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHRDWMasked256) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x32(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllRightConcatMaskedInt16x32 [a] x y mask) + // result: (VPSHRDWMasked512 [a] x y (VPMOVVec16x32ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHRDWMasked512) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt16x8(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + // match: (ShiftAllRightConcatMaskedInt16x8 [a] x y mask) + // result: (VPSHRDWMasked128 [a] x y (VPMOVVec16x8ToM mask)) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + y := v_1 + mask := v_2 + v.reset(OpAMD64VPSHRDWMasked128) + v.AuxInt = int8ToAuxInt(a) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg3(x, y, v0) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightConcatMaskedInt32x16(v *Value) bool { + v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block @@ -51475,11 +52195,261 @@ func rewriteValueAMD64_OpShiftAllRightConcatMaskedUint64x8(v *Value) bool { return true } } +func rewriteValueAMD64_OpShiftAllRightInt16x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightInt16x16 x (MOVQconst [c])) + // result: (VPSRAW256const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAW256const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllRightInt16x16 x y) + // result: (VPSRAW256 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSRAW256) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightInt16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightInt16x32 x (MOVQconst [c])) + // result: (VPSRAW512const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAW512const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllRightInt16x32 x y) + // result: (VPSRAW512 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSRAW512) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightInt16x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightInt16x8 x (MOVQconst [c])) + // result: (VPSRAW128const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAW128const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllRightInt16x8 x y) + // result: (VPSRAW128 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSRAW128) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightInt32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightInt32x16 x (MOVQconst [c])) + // result: (VPSRAD512const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAD512const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllRightInt32x16 x y) + // result: (VPSRAD512 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSRAD512) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightInt32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightInt32x4 x (MOVQconst [c])) + // result: (VPSRAD128const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAD128const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllRightInt32x4 x y) + // result: (VPSRAD128 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSRAD128) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightInt32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightInt32x8 x (MOVQconst [c])) + // result: (VPSRAD256const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAD256const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllRightInt32x8 x y) + // result: (VPSRAD256 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSRAD256) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightInt64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightInt64x2 x (MOVQconst [c])) + // result: (VPSRAQ128const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAQ128const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllRightInt64x2 x y) + // result: (VPSRAQ128 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSRAQ128) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightInt64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightInt64x4 x (MOVQconst [c])) + // result: (VPSRAQ256const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAQ256const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllRightInt64x4 x y) + // result: (VPSRAQ256 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSRAQ256) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightInt64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightInt64x8 x (MOVQconst [c])) + // result: (VPSRAQ512const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRAQ512const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllRightInt64x8 x y) + // result: (VPSRAQ512 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSRAQ512) + v.AddArg2(x, y) + return true + } +} func rewriteValueAMD64_OpShiftAllRightMaskedInt16x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllRightMaskedInt16x16 x (MOVQconst [c]) mask) + // result: (VPSRAWMasked256const [int8(c)] x (VPMOVVec16x16ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRAWMasked256const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllRightMaskedInt16x16 x y mask) // result: (VPSRAWMasked256 x y (VPMOVVec16x16ToM mask)) for { @@ -51498,6 +52468,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllRightMaskedInt16x32 x (MOVQconst [c]) mask) + // result: (VPSRAWMasked512const [int8(c)] x (VPMOVVec16x32ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRAWMasked512const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllRightMaskedInt16x32 x y mask) // result: (VPSRAWMasked512 x y (VPMOVVec16x32ToM mask)) for { @@ -51516,6 +52502,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt16x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllRightMaskedInt16x8 x (MOVQconst [c]) mask) + // result: (VPSRAWMasked128const [int8(c)] x (VPMOVVec16x8ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRAWMasked128const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllRightMaskedInt16x8 x y mask) // result: (VPSRAWMasked128 x y (VPMOVVec16x8ToM mask)) for { @@ -51534,6 +52536,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllRightMaskedInt32x16 x (MOVQconst [c]) mask) + // result: (VPSRADMasked512const [int8(c)] x (VPMOVVec32x16ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRADMasked512const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllRightMaskedInt32x16 x y mask) // result: (VPSRADMasked512 x y (VPMOVVec32x16ToM mask)) for { @@ -51552,6 +52570,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllRightMaskedInt32x4 x (MOVQconst [c]) mask) + // result: (VPSRADMasked128const [int8(c)] x (VPMOVVec32x4ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRADMasked128const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllRightMaskedInt32x4 x y mask) // result: (VPSRADMasked128 x y (VPMOVVec32x4ToM mask)) for { @@ -51570,6 +52604,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllRightMaskedInt32x8 x (MOVQconst [c]) mask) + // result: (VPSRADMasked256const [int8(c)] x (VPMOVVec32x8ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRADMasked256const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllRightMaskedInt32x8 x y mask) // result: (VPSRADMasked256 x y (VPMOVVec32x8ToM mask)) for { @@ -51588,6 +52638,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllRightMaskedInt64x2 x (MOVQconst [c]) mask) + // result: (VPSRAQMasked128const [int8(c)] x (VPMOVVec64x2ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRAQMasked128const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllRightMaskedInt64x2 x y mask) // result: (VPSRAQMasked128 x y (VPMOVVec64x2ToM mask)) for { @@ -51606,6 +52672,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllRightMaskedInt64x4 x (MOVQconst [c]) mask) + // result: (VPSRAQMasked256const [int8(c)] x (VPMOVVec64x4ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRAQMasked256const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllRightMaskedInt64x4 x y mask) // result: (VPSRAQMasked256 x y (VPMOVVec64x4ToM mask)) for { @@ -51624,6 +52706,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedInt64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllRightMaskedInt64x8 x (MOVQconst [c]) mask) + // result: (VPSRAQMasked512const [int8(c)] x (VPMOVVec64x8ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRAQMasked512const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllRightMaskedInt64x8 x y mask) // result: (VPSRAQMasked512 x y (VPMOVVec64x8ToM mask)) for { @@ -51642,6 +52740,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint16x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllRightMaskedUint16x16 x (MOVQconst [c]) mask) + // result: (VPSRLWMasked256const [int8(c)] x (VPMOVVec16x16ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLWMasked256const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllRightMaskedUint16x16 x y mask) // result: (VPSRLWMasked256 x y (VPMOVVec16x16ToM mask)) for { @@ -51660,6 +52774,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint16x32(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllRightMaskedUint16x32 x (MOVQconst [c]) mask) + // result: (VPSRLWMasked512const [int8(c)] x (VPMOVVec16x32ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLWMasked512const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllRightMaskedUint16x32 x y mask) // result: (VPSRLWMasked512 x y (VPMOVVec16x32ToM mask)) for { @@ -51678,6 +52808,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint16x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllRightMaskedUint16x8 x (MOVQconst [c]) mask) + // result: (VPSRLWMasked128const [int8(c)] x (VPMOVVec16x8ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLWMasked128const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllRightMaskedUint16x8 x y mask) // result: (VPSRLWMasked128 x y (VPMOVVec16x8ToM mask)) for { @@ -51696,6 +52842,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllRightMaskedUint32x16 x (MOVQconst [c]) mask) + // result: (VPSRLDMasked512const [int8(c)] x (VPMOVVec32x16ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLDMasked512const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllRightMaskedUint32x16 x y mask) // result: (VPSRLDMasked512 x y (VPMOVVec32x16ToM mask)) for { @@ -51714,6 +52876,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint32x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllRightMaskedUint32x4 x (MOVQconst [c]) mask) + // result: (VPSRLDMasked128const [int8(c)] x (VPMOVVec32x4ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLDMasked128const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllRightMaskedUint32x4 x y mask) // result: (VPSRLDMasked128 x y (VPMOVVec32x4ToM mask)) for { @@ -51732,6 +52910,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint32x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllRightMaskedUint32x8 x (MOVQconst [c]) mask) + // result: (VPSRLDMasked256const [int8(c)] x (VPMOVVec32x8ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLDMasked256const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllRightMaskedUint32x8 x y mask) // result: (VPSRLDMasked256 x y (VPMOVVec32x8ToM mask)) for { @@ -51750,6 +52944,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint64x2(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllRightMaskedUint64x2 x (MOVQconst [c]) mask) + // result: (VPSRLQMasked128const [int8(c)] x (VPMOVVec64x2ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLQMasked128const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllRightMaskedUint64x2 x y mask) // result: (VPSRLQMasked128 x y (VPMOVVec64x2ToM mask)) for { @@ -51768,6 +52978,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint64x4(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllRightMaskedUint64x4 x (MOVQconst [c]) mask) + // result: (VPSRLQMasked256const [int8(c)] x (VPMOVVec64x4ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLQMasked256const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllRightMaskedUint64x4 x y mask) // result: (VPSRLQMasked256 x y (VPMOVVec64x4ToM mask)) for { @@ -51786,6 +53012,22 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint64x8(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] b := v.Block + // match: (ShiftAllRightMaskedUint64x8 x (MOVQconst [c]) mask) + // result: (VPSRLQMasked512const [int8(c)] x (VPMOVVec64x8ToM mask)) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + mask := v_2 + v.reset(OpAMD64VPSRLQMasked512const) + v.AuxInt = int8ToAuxInt(int8(c)) + v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, types.TypeMask) + v0.AddArg(mask) + v.AddArg2(x, v0) + return true + } // match: (ShiftAllRightMaskedUint64x8 x y mask) // result: (VPSRLQMasked512 x y (VPMOVVec64x8ToM mask)) for { @@ -51799,6 +53041,240 @@ func rewriteValueAMD64_OpShiftAllRightMaskedUint64x8(v *Value) bool { return true } } +func rewriteValueAMD64_OpShiftAllRightUint16x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightUint16x16 x (MOVQconst [c])) + // result: (VPSRLW256const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLW256const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllRightUint16x16 x y) + // result: (VPSRLW256 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSRLW256) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightUint16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightUint16x32 x (MOVQconst [c])) + // result: (VPSRLW512const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLW512const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllRightUint16x32 x y) + // result: (VPSRLW512 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSRLW512) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightUint16x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightUint16x8 x (MOVQconst [c])) + // result: (VPSRLW128const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLW128const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllRightUint16x8 x y) + // result: (VPSRLW128 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSRLW128) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightUint32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightUint32x16 x (MOVQconst [c])) + // result: (VPSRLD512const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLD512const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllRightUint32x16 x y) + // result: (VPSRLD512 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSRLD512) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightUint32x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightUint32x4 x (MOVQconst [c])) + // result: (VPSRLD128const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLD128const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllRightUint32x4 x y) + // result: (VPSRLD128 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSRLD128) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightUint32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightUint32x8 x (MOVQconst [c])) + // result: (VPSRLD256const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLD256const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllRightUint32x8 x y) + // result: (VPSRLD256 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSRLD256) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightUint64x2(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightUint64x2 x (MOVQconst [c])) + // result: (VPSRLQ128const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLQ128const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllRightUint64x2 x y) + // result: (VPSRLQ128 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSRLQ128) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightUint64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightUint64x4 x (MOVQconst [c])) + // result: (VPSRLQ256const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLQ256const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllRightUint64x4 x y) + // result: (VPSRLQ256 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSRLQ256) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpShiftAllRightUint64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ShiftAllRightUint64x8 x (MOVQconst [c])) + // result: (VPSRLQ512const [int8(c)] x) + for { + x := v_0 + if v_1.Op != OpAMD64MOVQconst { + break + } + c := auxIntToInt64(v_1.AuxInt) + v.reset(OpAMD64VPSRLQ512const) + v.AuxInt = int8ToAuxInt(int8(c)) + v.AddArg(x) + return true + } + // match: (ShiftAllRightUint64x8 x y) + // result: (VPSRLQ512 x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VPSRLQ512) + v.AddArg2(x, y) + return true + } +} func rewriteValueAMD64_OpShiftLeftConcatMaskedInt16x16(v *Value) bool { v_3 := v.Args[3] v_2 := v.Args[2] diff --git a/src/simd/simd_test.go b/src/simd/simd_test.go index 2326addea9..1df27f8757 100644 --- a/src/simd/simd_test.go +++ b/src/simd/simd_test.go @@ -206,6 +206,24 @@ func TestPairDotProdAccumulate(t *testing.T) { } } +var testShiftAllVal uint64 = 3 + +func TestShiftAll(t *testing.T) { + got := make([]int32, 4) + simd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).ShiftAllLeft(2).StoreSlice(got) + for _, v := range got { + if v != 0b1100 { + t.Errorf("expect 0b1100, got %b", v) + } + } + simd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).ShiftAllLeft(testShiftAllVal).StoreSlice(got) + for _, v := range got { + if v != 0b11000 { + t.Errorf("expect 0b11000, got %b", v) + } + } +} + func TestSlicesInt8(t *testing.T) { a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}