From: Junyang Shao Date: Tue, 5 Aug 2025 19:07:51 +0000 (+0000) Subject: [dev.simd] cmd/compile, simd: (Set|Get)(Lo|Hi) X-Git-Tag: go1.26rc1~147^2~145 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=d3cf582f8a;p=gostls13.git [dev.simd] cmd/compile, simd: (Set|Get)(Lo|Hi) This CL is generated by CL 693335. Change-Id: Ie9adda526573f979ec7e4f535033ba29236cc5cb Reviewed-on: https://go-review.googlesource.com/c/go/+/693355 LUCI-TryBot-Result: Go LUCI Reviewed-by: David Chase --- diff --git a/src/cmd/compile/internal/amd64/simdssa.go b/src/cmd/compile/internal/amd64/simdssa.go index e0571d2cc3..7a0a0be58f 100644 --- a/src/cmd/compile/internal/amd64/simdssa.go +++ b/src/cmd/compile/internal/amd64/simdssa.go @@ -685,7 +685,9 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VREDUCEPD256, ssa.OpAMD64VREDUCEPD512, ssa.OpAMD64VEXTRACTF128128, + ssa.OpAMD64VEXTRACTF64X4256, ssa.OpAMD64VEXTRACTI128128, + ssa.OpAMD64VEXTRACTI64X4256, ssa.OpAMD64VPROLD128, ssa.OpAMD64VPROLD256, ssa.OpAMD64VPROLD512, @@ -794,7 +796,9 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VGF2P8AFFINEINVQB256, ssa.OpAMD64VGF2P8AFFINEINVQB512, ssa.OpAMD64VINSERTF128256, + ssa.OpAMD64VINSERTF64X4512, ssa.OpAMD64VINSERTI128256, + ssa.OpAMD64VINSERTI64X4512, ssa.OpAMD64VPSHLDW128, ssa.OpAMD64VPSHLDW256, ssa.OpAMD64VPSHLDW512, diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules index 9a4c82c0af..316db1b841 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules @@ -467,16 +467,6 @@ (GaloisFieldMulMaskedUint8x16 x y mask) => (VGF2P8MULBMasked128 x y (VPMOVVec8x16ToM mask)) (GaloisFieldMulMaskedUint8x32 x y mask) => (VGF2P8MULBMasked256 x y (VPMOVVec8x32ToM mask)) (GaloisFieldMulMaskedUint8x64 x y mask) => (VGF2P8MULBMasked512 x y (VPMOVVec8x64ToM mask)) -(Get128Float32x8 ...) => (VEXTRACTF128128 ...) -(Get128Float64x4 ...) => (VEXTRACTF128128 ...) -(Get128Int8x32 ...) => (VEXTRACTI128128 ...) -(Get128Int16x16 ...) => (VEXTRACTI128128 ...) -(Get128Int32x8 ...) => (VEXTRACTI128128 ...) -(Get128Int64x4 ...) => (VEXTRACTI128128 ...) -(Get128Uint8x32 ...) => (VEXTRACTI128128 ...) -(Get128Uint16x16 ...) => (VEXTRACTI128128 ...) -(Get128Uint32x8 ...) => (VEXTRACTI128128 ...) -(Get128Uint64x4 ...) => (VEXTRACTI128128 ...) (GetElemInt8x16 ...) => (VPEXTRB128 ...) (GetElemInt16x8 ...) => (VPEXTRW128 ...) (GetElemInt32x4 ...) => (VPEXTRD128 ...) @@ -485,6 +475,46 @@ (GetElemUint16x8 ...) => (VPEXTRW128 ...) (GetElemUint32x4 ...) => (VPEXTRD128 ...) (GetElemUint64x2 ...) => (VPEXTRQ128 ...) +(GetHiFloat32x8 x) => (VEXTRACTF128128 [1] x) +(GetHiFloat32x16 x) => (VEXTRACTF64X4256 [1] x) +(GetHiFloat64x4 x) => (VEXTRACTF128128 [1] x) +(GetHiFloat64x8 x) => (VEXTRACTF64X4256 [1] x) +(GetHiInt8x32 x) => (VEXTRACTI128128 [1] x) +(GetHiInt8x64 x) => (VEXTRACTI64X4256 [1] x) +(GetHiInt16x16 x) => (VEXTRACTI128128 [1] x) +(GetHiInt16x32 x) => (VEXTRACTI64X4256 [1] x) +(GetHiInt32x8 x) => (VEXTRACTI128128 [1] x) +(GetHiInt32x16 x) => (VEXTRACTI64X4256 [1] x) +(GetHiInt64x4 x) => (VEXTRACTI128128 [1] x) +(GetHiInt64x8 x) => (VEXTRACTI64X4256 [1] x) +(GetHiUint8x32 x) => (VEXTRACTI128128 [1] x) +(GetHiUint8x64 x) => (VEXTRACTI64X4256 [1] x) +(GetHiUint16x16 x) => (VEXTRACTI128128 [1] x) +(GetHiUint16x32 x) => (VEXTRACTI64X4256 [1] x) +(GetHiUint32x8 x) => (VEXTRACTI128128 [1] x) +(GetHiUint32x16 x) => (VEXTRACTI64X4256 [1] x) +(GetHiUint64x4 x) => (VEXTRACTI128128 [1] x) +(GetHiUint64x8 x) => (VEXTRACTI64X4256 [1] x) +(GetLoFloat32x8 x) => (VEXTRACTF128128 [0] x) +(GetLoFloat32x16 x) => (VEXTRACTF64X4256 [0] x) +(GetLoFloat64x4 x) => (VEXTRACTF128128 [0] x) +(GetLoFloat64x8 x) => (VEXTRACTF64X4256 [0] x) +(GetLoInt8x32 x) => (VEXTRACTI128128 [0] x) +(GetLoInt8x64 x) => (VEXTRACTI64X4256 [0] x) +(GetLoInt16x16 x) => (VEXTRACTI128128 [0] x) +(GetLoInt16x32 x) => (VEXTRACTI64X4256 [0] x) +(GetLoInt32x8 x) => (VEXTRACTI128128 [0] x) +(GetLoInt32x16 x) => (VEXTRACTI64X4256 [0] x) +(GetLoInt64x4 x) => (VEXTRACTI128128 [0] x) +(GetLoInt64x8 x) => (VEXTRACTI64X4256 [0] x) +(GetLoUint8x32 x) => (VEXTRACTI128128 [0] x) +(GetLoUint8x64 x) => (VEXTRACTI64X4256 [0] x) +(GetLoUint16x16 x) => (VEXTRACTI128128 [0] x) +(GetLoUint16x32 x) => (VEXTRACTI64X4256 [0] x) +(GetLoUint32x8 x) => (VEXTRACTI128128 [0] x) +(GetLoUint32x16 x) => (VEXTRACTI64X4256 [0] x) +(GetLoUint64x4 x) => (VEXTRACTI128128 [0] x) +(GetLoUint64x8 x) => (VEXTRACTI64X4256 [0] x) (GreaterFloat32x4 x y) => (VCMPPS128 [14] x y) (GreaterFloat32x8 x y) => (VCMPPS256 [14] x y) (GreaterFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [14] x y)) @@ -1327,16 +1357,6 @@ (ScaleMaskedFloat64x2 x y mask) => (VSCALEFPDMasked128 x y (VPMOVVec64x2ToM mask)) (ScaleMaskedFloat64x4 x y mask) => (VSCALEFPDMasked256 x y (VPMOVVec64x4ToM mask)) (ScaleMaskedFloat64x8 x y mask) => (VSCALEFPDMasked512 x y (VPMOVVec64x8ToM mask)) -(Set128Float32x8 ...) => (VINSERTF128256 ...) -(Set128Float64x4 ...) => (VINSERTF128256 ...) -(Set128Int8x32 ...) => (VINSERTI128256 ...) -(Set128Int16x16 ...) => (VINSERTI128256 ...) -(Set128Int32x8 ...) => (VINSERTI128256 ...) -(Set128Int64x4 ...) => (VINSERTI128256 ...) -(Set128Uint8x32 ...) => (VINSERTI128256 ...) -(Set128Uint16x16 ...) => (VINSERTI128256 ...) -(Set128Uint32x8 ...) => (VINSERTI128256 ...) -(Set128Uint64x4 ...) => (VINSERTI128256 ...) (SetElemInt8x16 ...) => (VPINSRB128 ...) (SetElemInt16x8 ...) => (VPINSRW128 ...) (SetElemInt32x4 ...) => (VPINSRD128 ...) @@ -1345,6 +1365,46 @@ (SetElemUint16x8 ...) => (VPINSRW128 ...) (SetElemUint32x4 ...) => (VPINSRD128 ...) (SetElemUint64x2 ...) => (VPINSRQ128 ...) +(SetHiFloat32x8 x y) => (VINSERTF128256 [1] x y) +(SetHiFloat32x16 x y) => (VINSERTF64X4512 [1] x y) +(SetHiFloat64x4 x y) => (VINSERTF128256 [1] x y) +(SetHiFloat64x8 x y) => (VINSERTF64X4512 [1] x y) +(SetHiInt8x32 x y) => (VINSERTI128256 [1] x y) +(SetHiInt8x64 x y) => (VINSERTI64X4512 [1] x y) +(SetHiInt16x16 x y) => (VINSERTI128256 [1] x y) +(SetHiInt16x32 x y) => (VINSERTI64X4512 [1] x y) +(SetHiInt32x8 x y) => (VINSERTI128256 [1] x y) +(SetHiInt32x16 x y) => (VINSERTI64X4512 [1] x y) +(SetHiInt64x4 x y) => (VINSERTI128256 [1] x y) +(SetHiInt64x8 x y) => (VINSERTI64X4512 [1] x y) +(SetHiUint8x32 x y) => (VINSERTI128256 [1] x y) +(SetHiUint8x64 x y) => (VINSERTI64X4512 [1] x y) +(SetHiUint16x16 x y) => (VINSERTI128256 [1] x y) +(SetHiUint16x32 x y) => (VINSERTI64X4512 [1] x y) +(SetHiUint32x8 x y) => (VINSERTI128256 [1] x y) +(SetHiUint32x16 x y) => (VINSERTI64X4512 [1] x y) +(SetHiUint64x4 x y) => (VINSERTI128256 [1] x y) +(SetHiUint64x8 x y) => (VINSERTI64X4512 [1] x y) +(SetLoFloat32x8 x y) => (VINSERTF128256 [0] x y) +(SetLoFloat32x16 x y) => (VINSERTF64X4512 [0] x y) +(SetLoFloat64x4 x y) => (VINSERTF128256 [0] x y) +(SetLoFloat64x8 x y) => (VINSERTF64X4512 [0] x y) +(SetLoInt8x32 x y) => (VINSERTI128256 [0] x y) +(SetLoInt8x64 x y) => (VINSERTI64X4512 [0] x y) +(SetLoInt16x16 x y) => (VINSERTI128256 [0] x y) +(SetLoInt16x32 x y) => (VINSERTI64X4512 [0] x y) +(SetLoInt32x8 x y) => (VINSERTI128256 [0] x y) +(SetLoInt32x16 x y) => (VINSERTI64X4512 [0] x y) +(SetLoInt64x4 x y) => (VINSERTI128256 [0] x y) +(SetLoInt64x8 x y) => (VINSERTI64X4512 [0] x y) +(SetLoUint8x32 x y) => (VINSERTI128256 [0] x y) +(SetLoUint8x64 x y) => (VINSERTI64X4512 [0] x y) +(SetLoUint16x16 x y) => (VINSERTI128256 [0] x y) +(SetLoUint16x32 x y) => (VINSERTI64X4512 [0] x y) +(SetLoUint32x8 x y) => (VINSERTI128256 [0] x y) +(SetLoUint32x16 x y) => (VINSERTI64X4512 [0] x y) +(SetLoUint64x4 x y) => (VINSERTI128256 [0] x y) +(SetLoUint64x8 x y) => (VINSERTI64X4512 [0] x y) (ShiftAllLeftInt16x8 x (MOVQconst [c])) => (VPSLLW128const [int8(c)] x) (ShiftAllLeftInt16x8 x y) => (VPSLLW128 x y) (ShiftAllLeftInt16x16 x (MOVQconst [c])) => (VPSLLW256const [int8(c)] x) diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go index 7860a0889e..591f8a5bca 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go @@ -912,12 +912,14 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VGF2P8AFFINEQBMasked128", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VGF2P8AFFINEQBMasked256", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VGF2P8AFFINEQBMasked512", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VEXTRACTF128128", argLength: 1, reg: v11, asm: "VEXTRACTF128", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, - {name: "VEXTRACTI128128", argLength: 1, reg: v11, asm: "VEXTRACTI128", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPEXTRB128", argLength: 1, reg: wgp, asm: "VPEXTRB", aux: "Int8", commutative: false, typ: "int8", resultInArg0: false}, {name: "VPEXTRW128", argLength: 1, reg: wgp, asm: "VPEXTRW", aux: "Int8", commutative: false, typ: "int16", resultInArg0: false}, {name: "VPEXTRD128", argLength: 1, reg: vgp, asm: "VPEXTRD", aux: "Int8", commutative: false, typ: "int32", resultInArg0: false}, {name: "VPEXTRQ128", argLength: 1, reg: vgp, asm: "VPEXTRQ", aux: "Int8", commutative: false, typ: "int64", resultInArg0: false}, + {name: "VEXTRACTF128128", argLength: 1, reg: v11, asm: "VEXTRACTF128", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VEXTRACTF64X4256", argLength: 1, reg: w11, asm: "VEXTRACTF64X4", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VEXTRACTI128128", argLength: 1, reg: v11, asm: "VEXTRACTI128", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VEXTRACTI64X4256", argLength: 1, reg: w11, asm: "VEXTRACTI64X4", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPCMPUB128", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPUB256", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPUB512", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, @@ -966,12 +968,14 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf {name: "VPRORQMasked128", argLength: 2, reg: wkw, asm: "VPRORQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPRORQMasked256", argLength: 2, reg: wkw, asm: "VPRORQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPRORQMasked512", argLength: 2, reg: wkw, asm: "VPRORQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, - {name: "VINSERTF128256", argLength: 2, reg: v21, asm: "VINSERTF128", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, - {name: "VINSERTI128256", argLength: 2, reg: v21, asm: "VINSERTI128", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPINSRB128", argLength: 2, reg: vgpv, asm: "VPINSRB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPINSRW128", argLength: 2, reg: vgpv, asm: "VPINSRW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPINSRD128", argLength: 2, reg: vgpv, asm: "VPINSRD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPINSRQ128", argLength: 2, reg: vgpv, asm: "VPINSRQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, + {name: "VINSERTF128256", argLength: 2, reg: v21, asm: "VINSERTF128", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VINSERTF64X4512", argLength: 2, reg: w21, asm: "VINSERTF64X4", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, + {name: "VINSERTI128256", argLength: 2, reg: v21, asm: "VINSERTI128", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, + {name: "VINSERTI64X4512", argLength: 2, reg: w21, asm: "VINSERTI64X4", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, {name: "VPSHLDW128", argLength: 2, reg: w21, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPSHLDW256", argLength: 2, reg: w21, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false}, {name: "VPSHLDW512", argLength: 2, reg: w21, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false}, diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go index bf85df5e6d..e132b058a4 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go +++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go @@ -410,6 +410,46 @@ func simdGenericOps() []opData { {name: "GaloisFieldMulUint8x16", argLength: 2, commutative: false}, {name: "GaloisFieldMulUint8x32", argLength: 2, commutative: false}, {name: "GaloisFieldMulUint8x64", argLength: 2, commutative: false}, + {name: "GetHiFloat32x8", argLength: 1, commutative: false}, + {name: "GetHiFloat32x16", argLength: 1, commutative: false}, + {name: "GetHiFloat64x4", argLength: 1, commutative: false}, + {name: "GetHiFloat64x8", argLength: 1, commutative: false}, + {name: "GetHiInt8x32", argLength: 1, commutative: false}, + {name: "GetHiInt8x64", argLength: 1, commutative: false}, + {name: "GetHiInt16x16", argLength: 1, commutative: false}, + {name: "GetHiInt16x32", argLength: 1, commutative: false}, + {name: "GetHiInt32x8", argLength: 1, commutative: false}, + {name: "GetHiInt32x16", argLength: 1, commutative: false}, + {name: "GetHiInt64x4", argLength: 1, commutative: false}, + {name: "GetHiInt64x8", argLength: 1, commutative: false}, + {name: "GetHiUint8x32", argLength: 1, commutative: false}, + {name: "GetHiUint8x64", argLength: 1, commutative: false}, + {name: "GetHiUint16x16", argLength: 1, commutative: false}, + {name: "GetHiUint16x32", argLength: 1, commutative: false}, + {name: "GetHiUint32x8", argLength: 1, commutative: false}, + {name: "GetHiUint32x16", argLength: 1, commutative: false}, + {name: "GetHiUint64x4", argLength: 1, commutative: false}, + {name: "GetHiUint64x8", argLength: 1, commutative: false}, + {name: "GetLoFloat32x8", argLength: 1, commutative: false}, + {name: "GetLoFloat32x16", argLength: 1, commutative: false}, + {name: "GetLoFloat64x4", argLength: 1, commutative: false}, + {name: "GetLoFloat64x8", argLength: 1, commutative: false}, + {name: "GetLoInt8x32", argLength: 1, commutative: false}, + {name: "GetLoInt8x64", argLength: 1, commutative: false}, + {name: "GetLoInt16x16", argLength: 1, commutative: false}, + {name: "GetLoInt16x32", argLength: 1, commutative: false}, + {name: "GetLoInt32x8", argLength: 1, commutative: false}, + {name: "GetLoInt32x16", argLength: 1, commutative: false}, + {name: "GetLoInt64x4", argLength: 1, commutative: false}, + {name: "GetLoInt64x8", argLength: 1, commutative: false}, + {name: "GetLoUint8x32", argLength: 1, commutative: false}, + {name: "GetLoUint8x64", argLength: 1, commutative: false}, + {name: "GetLoUint16x16", argLength: 1, commutative: false}, + {name: "GetLoUint16x32", argLength: 1, commutative: false}, + {name: "GetLoUint32x8", argLength: 1, commutative: false}, + {name: "GetLoUint32x16", argLength: 1, commutative: false}, + {name: "GetLoUint64x4", argLength: 1, commutative: false}, + {name: "GetLoUint64x8", argLength: 1, commutative: false}, {name: "GreaterEqualFloat32x4", argLength: 2, commutative: false}, {name: "GreaterEqualFloat32x8", argLength: 2, commutative: false}, {name: "GreaterEqualFloat32x16", argLength: 2, commutative: false}, @@ -1180,6 +1220,46 @@ func simdGenericOps() []opData { {name: "ScaleMaskedFloat64x2", argLength: 3, commutative: false}, {name: "ScaleMaskedFloat64x4", argLength: 3, commutative: false}, {name: "ScaleMaskedFloat64x8", argLength: 3, commutative: false}, + {name: "SetHiFloat32x8", argLength: 2, commutative: false}, + {name: "SetHiFloat32x16", argLength: 2, commutative: false}, + {name: "SetHiFloat64x4", argLength: 2, commutative: false}, + {name: "SetHiFloat64x8", argLength: 2, commutative: false}, + {name: "SetHiInt8x32", argLength: 2, commutative: false}, + {name: "SetHiInt8x64", argLength: 2, commutative: false}, + {name: "SetHiInt16x16", argLength: 2, commutative: false}, + {name: "SetHiInt16x32", argLength: 2, commutative: false}, + {name: "SetHiInt32x8", argLength: 2, commutative: false}, + {name: "SetHiInt32x16", argLength: 2, commutative: false}, + {name: "SetHiInt64x4", argLength: 2, commutative: false}, + {name: "SetHiInt64x8", argLength: 2, commutative: false}, + {name: "SetHiUint8x32", argLength: 2, commutative: false}, + {name: "SetHiUint8x64", argLength: 2, commutative: false}, + {name: "SetHiUint16x16", argLength: 2, commutative: false}, + {name: "SetHiUint16x32", argLength: 2, commutative: false}, + {name: "SetHiUint32x8", argLength: 2, commutative: false}, + {name: "SetHiUint32x16", argLength: 2, commutative: false}, + {name: "SetHiUint64x4", argLength: 2, commutative: false}, + {name: "SetHiUint64x8", argLength: 2, commutative: false}, + {name: "SetLoFloat32x8", argLength: 2, commutative: false}, + {name: "SetLoFloat32x16", argLength: 2, commutative: false}, + {name: "SetLoFloat64x4", argLength: 2, commutative: false}, + {name: "SetLoFloat64x8", argLength: 2, commutative: false}, + {name: "SetLoInt8x32", argLength: 2, commutative: false}, + {name: "SetLoInt8x64", argLength: 2, commutative: false}, + {name: "SetLoInt16x16", argLength: 2, commutative: false}, + {name: "SetLoInt16x32", argLength: 2, commutative: false}, + {name: "SetLoInt32x8", argLength: 2, commutative: false}, + {name: "SetLoInt32x16", argLength: 2, commutative: false}, + {name: "SetLoInt64x4", argLength: 2, commutative: false}, + {name: "SetLoInt64x8", argLength: 2, commutative: false}, + {name: "SetLoUint8x32", argLength: 2, commutative: false}, + {name: "SetLoUint8x64", argLength: 2, commutative: false}, + {name: "SetLoUint16x16", argLength: 2, commutative: false}, + {name: "SetLoUint16x32", argLength: 2, commutative: false}, + {name: "SetLoUint32x8", argLength: 2, commutative: false}, + {name: "SetLoUint32x16", argLength: 2, commutative: false}, + {name: "SetLoUint64x4", argLength: 2, commutative: false}, + {name: "SetLoUint64x8", argLength: 2, commutative: false}, {name: "ShiftAllLeftInt16x8", argLength: 2, commutative: false}, {name: "ShiftAllLeftInt16x16", argLength: 2, commutative: false}, {name: "ShiftAllLeftInt16x32", argLength: 2, commutative: false}, @@ -1624,16 +1704,6 @@ func simdGenericOps() []opData { {name: "GaloisFieldAffineTransformUint8x16", argLength: 2, commutative: false, aux: "Int8"}, {name: "GaloisFieldAffineTransformUint8x32", argLength: 2, commutative: false, aux: "Int8"}, {name: "GaloisFieldAffineTransformUint8x64", argLength: 2, commutative: false, aux: "Int8"}, - {name: "Get128Float32x8", argLength: 1, commutative: false, aux: "Int8"}, - {name: "Get128Float64x4", argLength: 1, commutative: false, aux: "Int8"}, - {name: "Get128Int8x32", argLength: 1, commutative: false, aux: "Int8"}, - {name: "Get128Int16x16", argLength: 1, commutative: false, aux: "Int8"}, - {name: "Get128Int32x8", argLength: 1, commutative: false, aux: "Int8"}, - {name: "Get128Int64x4", argLength: 1, commutative: false, aux: "Int8"}, - {name: "Get128Uint8x32", argLength: 1, commutative: false, aux: "Int8"}, - {name: "Get128Uint16x16", argLength: 1, commutative: false, aux: "Int8"}, - {name: "Get128Uint32x8", argLength: 1, commutative: false, aux: "Int8"}, - {name: "Get128Uint64x4", argLength: 1, commutative: false, aux: "Int8"}, {name: "GetElemInt8x16", argLength: 1, commutative: false, aux: "Int8"}, {name: "GetElemInt16x8", argLength: 1, commutative: false, aux: "Int8"}, {name: "GetElemInt32x4", argLength: 1, commutative: false, aux: "Int8"}, @@ -1714,16 +1784,6 @@ func simdGenericOps() []opData { {name: "RoundScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"}, {name: "RoundScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "RoundScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"}, - {name: "Set128Float32x8", argLength: 2, commutative: false, aux: "Int8"}, - {name: "Set128Float64x4", argLength: 2, commutative: false, aux: "Int8"}, - {name: "Set128Int8x32", argLength: 2, commutative: false, aux: "Int8"}, - {name: "Set128Int16x16", argLength: 2, commutative: false, aux: "Int8"}, - {name: "Set128Int32x8", argLength: 2, commutative: false, aux: "Int8"}, - {name: "Set128Int64x4", argLength: 2, commutative: false, aux: "Int8"}, - {name: "Set128Uint8x32", argLength: 2, commutative: false, aux: "Int8"}, - {name: "Set128Uint16x16", argLength: 2, commutative: false, aux: "Int8"}, - {name: "Set128Uint32x8", argLength: 2, commutative: false, aux: "Int8"}, - {name: "Set128Uint64x4", argLength: 2, commutative: false, aux: "Int8"}, {name: "SetElemInt8x16", argLength: 2, commutative: false, aux: "Int8"}, {name: "SetElemInt16x8", argLength: 2, commutative: false, aux: "Int8"}, {name: "SetElemInt32x4", argLength: 2, commutative: false, aux: "Int8"}, diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 9ce9220901..b39311cd90 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -2131,12 +2131,14 @@ const ( OpAMD64VGF2P8AFFINEQBMasked128 OpAMD64VGF2P8AFFINEQBMasked256 OpAMD64VGF2P8AFFINEQBMasked512 - OpAMD64VEXTRACTF128128 - OpAMD64VEXTRACTI128128 OpAMD64VPEXTRB128 OpAMD64VPEXTRW128 OpAMD64VPEXTRD128 OpAMD64VPEXTRQ128 + OpAMD64VEXTRACTF128128 + OpAMD64VEXTRACTF64X4256 + OpAMD64VEXTRACTI128128 + OpAMD64VEXTRACTI64X4256 OpAMD64VPCMPUB128 OpAMD64VPCMPUB256 OpAMD64VPCMPUB512 @@ -2185,12 +2187,14 @@ const ( OpAMD64VPRORQMasked128 OpAMD64VPRORQMasked256 OpAMD64VPRORQMasked512 - OpAMD64VINSERTF128256 - OpAMD64VINSERTI128256 OpAMD64VPINSRB128 OpAMD64VPINSRW128 OpAMD64VPINSRD128 OpAMD64VPINSRQ128 + OpAMD64VINSERTF128256 + OpAMD64VINSERTF64X4512 + OpAMD64VINSERTI128256 + OpAMD64VINSERTI64X4512 OpAMD64VPSHLDW128 OpAMD64VPSHLDW256 OpAMD64VPSHLDW512 @@ -4967,6 +4971,46 @@ const ( OpGaloisFieldMulUint8x16 OpGaloisFieldMulUint8x32 OpGaloisFieldMulUint8x64 + OpGetHiFloat32x8 + OpGetHiFloat32x16 + OpGetHiFloat64x4 + OpGetHiFloat64x8 + OpGetHiInt8x32 + OpGetHiInt8x64 + OpGetHiInt16x16 + OpGetHiInt16x32 + OpGetHiInt32x8 + OpGetHiInt32x16 + OpGetHiInt64x4 + OpGetHiInt64x8 + OpGetHiUint8x32 + OpGetHiUint8x64 + OpGetHiUint16x16 + OpGetHiUint16x32 + OpGetHiUint32x8 + OpGetHiUint32x16 + OpGetHiUint64x4 + OpGetHiUint64x8 + OpGetLoFloat32x8 + OpGetLoFloat32x16 + OpGetLoFloat64x4 + OpGetLoFloat64x8 + OpGetLoInt8x32 + OpGetLoInt8x64 + OpGetLoInt16x16 + OpGetLoInt16x32 + OpGetLoInt32x8 + OpGetLoInt32x16 + OpGetLoInt64x4 + OpGetLoInt64x8 + OpGetLoUint8x32 + OpGetLoUint8x64 + OpGetLoUint16x16 + OpGetLoUint16x32 + OpGetLoUint32x8 + OpGetLoUint32x16 + OpGetLoUint64x4 + OpGetLoUint64x8 OpGreaterEqualFloat32x4 OpGreaterEqualFloat32x8 OpGreaterEqualFloat32x16 @@ -5737,6 +5781,46 @@ const ( OpScaleMaskedFloat64x2 OpScaleMaskedFloat64x4 OpScaleMaskedFloat64x8 + OpSetHiFloat32x8 + OpSetHiFloat32x16 + OpSetHiFloat64x4 + OpSetHiFloat64x8 + OpSetHiInt8x32 + OpSetHiInt8x64 + OpSetHiInt16x16 + OpSetHiInt16x32 + OpSetHiInt32x8 + OpSetHiInt32x16 + OpSetHiInt64x4 + OpSetHiInt64x8 + OpSetHiUint8x32 + OpSetHiUint8x64 + OpSetHiUint16x16 + OpSetHiUint16x32 + OpSetHiUint32x8 + OpSetHiUint32x16 + OpSetHiUint64x4 + OpSetHiUint64x8 + OpSetLoFloat32x8 + OpSetLoFloat32x16 + OpSetLoFloat64x4 + OpSetLoFloat64x8 + OpSetLoInt8x32 + OpSetLoInt8x64 + OpSetLoInt16x16 + OpSetLoInt16x32 + OpSetLoInt32x8 + OpSetLoInt32x16 + OpSetLoInt64x4 + OpSetLoInt64x8 + OpSetLoUint8x32 + OpSetLoUint8x64 + OpSetLoUint16x16 + OpSetLoUint16x32 + OpSetLoUint32x8 + OpSetLoUint32x16 + OpSetLoUint64x4 + OpSetLoUint64x8 OpShiftAllLeftInt16x8 OpShiftAllLeftInt16x16 OpShiftAllLeftInt16x32 @@ -6181,16 +6265,6 @@ const ( OpGaloisFieldAffineTransformUint8x16 OpGaloisFieldAffineTransformUint8x32 OpGaloisFieldAffineTransformUint8x64 - OpGet128Float32x8 - OpGet128Float64x4 - OpGet128Int8x32 - OpGet128Int16x16 - OpGet128Int32x8 - OpGet128Int64x4 - OpGet128Uint8x32 - OpGet128Uint16x16 - OpGet128Uint32x8 - OpGet128Uint64x4 OpGetElemInt8x16 OpGetElemInt16x8 OpGetElemInt32x4 @@ -6271,16 +6345,6 @@ const ( OpRoundScaledResidueMaskedFloat64x2 OpRoundScaledResidueMaskedFloat64x4 OpRoundScaledResidueMaskedFloat64x8 - OpSet128Float32x8 - OpSet128Float64x4 - OpSet128Int8x32 - OpSet128Int16x16 - OpSet128Int32x8 - OpSet128Int64x4 - OpSet128Uint8x32 - OpSet128Uint16x16 - OpSet128Uint32x8 - OpSet128Uint64x4 OpSetElemInt8x16 OpSetElemInt16x8 OpSetElemInt32x4 @@ -33034,41 +33098,41 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VEXTRACTF128128", + name: "VPEXTRB128", auxType: auxInt8, argLen: 1, - asm: x86.AVEXTRACTF128, + asm: x86.AVPEXTRB, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 }, outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 }, }, }, { - name: "VEXTRACTI128128", + name: "VPEXTRW128", auxType: auxInt8, argLen: 1, - asm: x86.AVEXTRACTI128, + asm: x86.AVPEXTRW, reg: regInfo{ inputs: []inputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 }, outputs: []outputInfo{ - {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 }, }, }, { - name: "VPEXTRB128", + name: "VPEXTRD128", auxType: auxInt8, argLen: 1, - asm: x86.AVPEXTRB, + asm: x86.AVPEXTRD, reg: regInfo{ inputs: []inputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 @@ -33076,13 +33140,13 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPEXTRW128", + name: "VPEXTRQ128", auxType: auxInt8, argLen: 1, - asm: x86.AVPEXTRW, + asm: x86.AVPEXTRQ, reg: regInfo{ inputs: []inputInfo{ - {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 @@ -33090,30 +33154,58 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPEXTRD128", + name: "VEXTRACTF128128", auxType: auxInt8, argLen: 1, - asm: x86.AVPEXTRD, + asm: x86.AVEXTRACTF128, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ - {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, }, }, { - name: "VPEXTRQ128", + name: "VEXTRACTF64X4256", auxType: auxInt8, argLen: 1, - asm: x86.AVPEXTRQ, + asm: x86.AVEXTRACTF64X4, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VEXTRACTI128128", + auxType: auxInt8, + argLen: 1, + asm: x86.AVEXTRACTI128, reg: regInfo{ inputs: []inputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ - {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + }, + }, + { + name: "VEXTRACTI64X4256", + auxType: auxInt8, + argLen: 1, + asm: x86.AVEXTRACTI64X4, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 }, }, }, @@ -33826,14 +33918,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VINSERTF128256", + name: "VPINSRB128", auxType: auxInt8, argLen: 2, - asm: x86.AVINSERTF128, + asm: x86.AVPINSRB, reg: regInfo{ inputs: []inputInfo{ + {1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -33841,14 +33933,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VINSERTI128256", + name: "VPINSRW128", auxType: auxInt8, argLen: 2, - asm: x86.AVINSERTI128, + asm: x86.AVPINSRW, reg: regInfo{ inputs: []inputInfo{ + {1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 - {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -33856,10 +33948,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPINSRB128", + name: "VPINSRD128", auxType: auxInt8, argLen: 2, - asm: x86.AVPINSRB, + asm: x86.AVPINSRD, reg: regInfo{ inputs: []inputInfo{ {1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 @@ -33871,10 +33963,10 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPINSRW128", + name: "VPINSRQ128", auxType: auxInt8, argLen: 2, - asm: x86.AVPINSRW, + asm: x86.AVPINSRQ, reg: regInfo{ inputs: []inputInfo{ {1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 @@ -33886,14 +33978,14 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPINSRD128", + name: "VINSERTF128256", auxType: auxInt8, argLen: 2, - asm: x86.AVPINSRD, + asm: x86.AVINSERTF128, reg: regInfo{ inputs: []inputInfo{ - {1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 @@ -33901,20 +33993,50 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "VPINSRQ128", + name: "VINSERTF64X4512", auxType: auxInt8, argLen: 2, - asm: x86.AVPINSRQ, + asm: x86.AVINSERTF64X4, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, + { + name: "VINSERTI128256", + auxType: auxInt8, + argLen: 2, + asm: x86.AVINSERTI128, reg: regInfo{ inputs: []inputInfo{ - {1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, outputs: []outputInfo{ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 }, }, }, + { + name: "VINSERTI64X4512", + auxType: auxInt8, + argLen: 2, + asm: x86.AVINSERTI64X4, + reg: regInfo{ + inputs: []inputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + outputs: []outputInfo{ + {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31 + }, + }, + }, { name: "VPSHLDW128", auxType: auxInt8, @@ -64937,6 +65059,206 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, + { + name: "GetHiFloat32x8", + argLen: 1, + generic: true, + }, + { + name: "GetHiFloat32x16", + argLen: 1, + generic: true, + }, + { + name: "GetHiFloat64x4", + argLen: 1, + generic: true, + }, + { + name: "GetHiFloat64x8", + argLen: 1, + generic: true, + }, + { + name: "GetHiInt8x32", + argLen: 1, + generic: true, + }, + { + name: "GetHiInt8x64", + argLen: 1, + generic: true, + }, + { + name: "GetHiInt16x16", + argLen: 1, + generic: true, + }, + { + name: "GetHiInt16x32", + argLen: 1, + generic: true, + }, + { + name: "GetHiInt32x8", + argLen: 1, + generic: true, + }, + { + name: "GetHiInt32x16", + argLen: 1, + generic: true, + }, + { + name: "GetHiInt64x4", + argLen: 1, + generic: true, + }, + { + name: "GetHiInt64x8", + argLen: 1, + generic: true, + }, + { + name: "GetHiUint8x32", + argLen: 1, + generic: true, + }, + { + name: "GetHiUint8x64", + argLen: 1, + generic: true, + }, + { + name: "GetHiUint16x16", + argLen: 1, + generic: true, + }, + { + name: "GetHiUint16x32", + argLen: 1, + generic: true, + }, + { + name: "GetHiUint32x8", + argLen: 1, + generic: true, + }, + { + name: "GetHiUint32x16", + argLen: 1, + generic: true, + }, + { + name: "GetHiUint64x4", + argLen: 1, + generic: true, + }, + { + name: "GetHiUint64x8", + argLen: 1, + generic: true, + }, + { + name: "GetLoFloat32x8", + argLen: 1, + generic: true, + }, + { + name: "GetLoFloat32x16", + argLen: 1, + generic: true, + }, + { + name: "GetLoFloat64x4", + argLen: 1, + generic: true, + }, + { + name: "GetLoFloat64x8", + argLen: 1, + generic: true, + }, + { + name: "GetLoInt8x32", + argLen: 1, + generic: true, + }, + { + name: "GetLoInt8x64", + argLen: 1, + generic: true, + }, + { + name: "GetLoInt16x16", + argLen: 1, + generic: true, + }, + { + name: "GetLoInt16x32", + argLen: 1, + generic: true, + }, + { + name: "GetLoInt32x8", + argLen: 1, + generic: true, + }, + { + name: "GetLoInt32x16", + argLen: 1, + generic: true, + }, + { + name: "GetLoInt64x4", + argLen: 1, + generic: true, + }, + { + name: "GetLoInt64x8", + argLen: 1, + generic: true, + }, + { + name: "GetLoUint8x32", + argLen: 1, + generic: true, + }, + { + name: "GetLoUint8x64", + argLen: 1, + generic: true, + }, + { + name: "GetLoUint16x16", + argLen: 1, + generic: true, + }, + { + name: "GetLoUint16x32", + argLen: 1, + generic: true, + }, + { + name: "GetLoUint32x8", + argLen: 1, + generic: true, + }, + { + name: "GetLoUint32x16", + argLen: 1, + generic: true, + }, + { + name: "GetLoUint64x4", + argLen: 1, + generic: true, + }, + { + name: "GetLoUint64x8", + argLen: 1, + generic: true, + }, { name: "GreaterEqualFloat32x4", argLen: 2, @@ -69073,6 +69395,206 @@ var opcodeTable = [...]opInfo{ argLen: 3, generic: true, }, + { + name: "SetHiFloat32x8", + argLen: 2, + generic: true, + }, + { + name: "SetHiFloat32x16", + argLen: 2, + generic: true, + }, + { + name: "SetHiFloat64x4", + argLen: 2, + generic: true, + }, + { + name: "SetHiFloat64x8", + argLen: 2, + generic: true, + }, + { + name: "SetHiInt8x32", + argLen: 2, + generic: true, + }, + { + name: "SetHiInt8x64", + argLen: 2, + generic: true, + }, + { + name: "SetHiInt16x16", + argLen: 2, + generic: true, + }, + { + name: "SetHiInt16x32", + argLen: 2, + generic: true, + }, + { + name: "SetHiInt32x8", + argLen: 2, + generic: true, + }, + { + name: "SetHiInt32x16", + argLen: 2, + generic: true, + }, + { + name: "SetHiInt64x4", + argLen: 2, + generic: true, + }, + { + name: "SetHiInt64x8", + argLen: 2, + generic: true, + }, + { + name: "SetHiUint8x32", + argLen: 2, + generic: true, + }, + { + name: "SetHiUint8x64", + argLen: 2, + generic: true, + }, + { + name: "SetHiUint16x16", + argLen: 2, + generic: true, + }, + { + name: "SetHiUint16x32", + argLen: 2, + generic: true, + }, + { + name: "SetHiUint32x8", + argLen: 2, + generic: true, + }, + { + name: "SetHiUint32x16", + argLen: 2, + generic: true, + }, + { + name: "SetHiUint64x4", + argLen: 2, + generic: true, + }, + { + name: "SetHiUint64x8", + argLen: 2, + generic: true, + }, + { + name: "SetLoFloat32x8", + argLen: 2, + generic: true, + }, + { + name: "SetLoFloat32x16", + argLen: 2, + generic: true, + }, + { + name: "SetLoFloat64x4", + argLen: 2, + generic: true, + }, + { + name: "SetLoFloat64x8", + argLen: 2, + generic: true, + }, + { + name: "SetLoInt8x32", + argLen: 2, + generic: true, + }, + { + name: "SetLoInt8x64", + argLen: 2, + generic: true, + }, + { + name: "SetLoInt16x16", + argLen: 2, + generic: true, + }, + { + name: "SetLoInt16x32", + argLen: 2, + generic: true, + }, + { + name: "SetLoInt32x8", + argLen: 2, + generic: true, + }, + { + name: "SetLoInt32x16", + argLen: 2, + generic: true, + }, + { + name: "SetLoInt64x4", + argLen: 2, + generic: true, + }, + { + name: "SetLoInt64x8", + argLen: 2, + generic: true, + }, + { + name: "SetLoUint8x32", + argLen: 2, + generic: true, + }, + { + name: "SetLoUint8x64", + argLen: 2, + generic: true, + }, + { + name: "SetLoUint16x16", + argLen: 2, + generic: true, + }, + { + name: "SetLoUint16x32", + argLen: 2, + generic: true, + }, + { + name: "SetLoUint32x8", + argLen: 2, + generic: true, + }, + { + name: "SetLoUint32x16", + argLen: 2, + generic: true, + }, + { + name: "SetLoUint64x4", + argLen: 2, + generic: true, + }, + { + name: "SetLoUint64x8", + argLen: 2, + generic: true, + }, { name: "ShiftAllLeftInt16x8", argLen: 2, @@ -71389,66 +71911,6 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, - { - name: "Get128Float32x8", - auxType: auxInt8, - argLen: 1, - generic: true, - }, - { - name: "Get128Float64x4", - auxType: auxInt8, - argLen: 1, - generic: true, - }, - { - name: "Get128Int8x32", - auxType: auxInt8, - argLen: 1, - generic: true, - }, - { - name: "Get128Int16x16", - auxType: auxInt8, - argLen: 1, - generic: true, - }, - { - name: "Get128Int32x8", - auxType: auxInt8, - argLen: 1, - generic: true, - }, - { - name: "Get128Int64x4", - auxType: auxInt8, - argLen: 1, - generic: true, - }, - { - name: "Get128Uint8x32", - auxType: auxInt8, - argLen: 1, - generic: true, - }, - { - name: "Get128Uint16x16", - auxType: auxInt8, - argLen: 1, - generic: true, - }, - { - name: "Get128Uint32x8", - auxType: auxInt8, - argLen: 1, - generic: true, - }, - { - name: "Get128Uint64x4", - auxType: auxInt8, - argLen: 1, - generic: true, - }, { name: "GetElemInt8x16", auxType: auxInt8, @@ -71929,66 +72391,6 @@ var opcodeTable = [...]opInfo{ argLen: 2, generic: true, }, - { - name: "Set128Float32x8", - auxType: auxInt8, - argLen: 2, - generic: true, - }, - { - name: "Set128Float64x4", - auxType: auxInt8, - argLen: 2, - generic: true, - }, - { - name: "Set128Int8x32", - auxType: auxInt8, - argLen: 2, - generic: true, - }, - { - name: "Set128Int16x16", - auxType: auxInt8, - argLen: 2, - generic: true, - }, - { - name: "Set128Int32x8", - auxType: auxInt8, - argLen: 2, - generic: true, - }, - { - name: "Set128Int64x4", - auxType: auxInt8, - argLen: 2, - generic: true, - }, - { - name: "Set128Uint8x32", - auxType: auxInt8, - argLen: 2, - generic: true, - }, - { - name: "Set128Uint16x16", - auxType: auxInt8, - argLen: 2, - generic: true, - }, - { - name: "Set128Uint32x8", - auxType: auxInt8, - argLen: 2, - generic: true, - }, - { - name: "Set128Uint64x4", - auxType: auxInt8, - argLen: 2, - generic: true, - }, { name: "SetElemInt8x16", auxType: auxInt8, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index e181798245..91fd3fb470 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -1949,36 +1949,6 @@ func rewriteValueAMD64(v *Value) bool { case OpGaloisFieldMulUint8x64: v.Op = OpAMD64VGF2P8MULB512 return true - case OpGet128Float32x8: - v.Op = OpAMD64VEXTRACTF128128 - return true - case OpGet128Float64x4: - v.Op = OpAMD64VEXTRACTF128128 - return true - case OpGet128Int16x16: - v.Op = OpAMD64VEXTRACTI128128 - return true - case OpGet128Int32x8: - v.Op = OpAMD64VEXTRACTI128128 - return true - case OpGet128Int64x4: - v.Op = OpAMD64VEXTRACTI128128 - return true - case OpGet128Int8x32: - v.Op = OpAMD64VEXTRACTI128128 - return true - case OpGet128Uint16x16: - v.Op = OpAMD64VEXTRACTI128128 - return true - case OpGet128Uint32x8: - v.Op = OpAMD64VEXTRACTI128128 - return true - case OpGet128Uint64x4: - v.Op = OpAMD64VEXTRACTI128128 - return true - case OpGet128Uint8x32: - v.Op = OpAMD64VEXTRACTI128128 - return true case OpGetCallerPC: v.Op = OpAMD64LoweredGetCallerPC return true @@ -2014,6 +1984,86 @@ func rewriteValueAMD64(v *Value) bool { return true case OpGetG: return rewriteValueAMD64_OpGetG(v) + case OpGetHiFloat32x16: + return rewriteValueAMD64_OpGetHiFloat32x16(v) + case OpGetHiFloat32x8: + return rewriteValueAMD64_OpGetHiFloat32x8(v) + case OpGetHiFloat64x4: + return rewriteValueAMD64_OpGetHiFloat64x4(v) + case OpGetHiFloat64x8: + return rewriteValueAMD64_OpGetHiFloat64x8(v) + case OpGetHiInt16x16: + return rewriteValueAMD64_OpGetHiInt16x16(v) + case OpGetHiInt16x32: + return rewriteValueAMD64_OpGetHiInt16x32(v) + case OpGetHiInt32x16: + return rewriteValueAMD64_OpGetHiInt32x16(v) + case OpGetHiInt32x8: + return rewriteValueAMD64_OpGetHiInt32x8(v) + case OpGetHiInt64x4: + return rewriteValueAMD64_OpGetHiInt64x4(v) + case OpGetHiInt64x8: + return rewriteValueAMD64_OpGetHiInt64x8(v) + case OpGetHiInt8x32: + return rewriteValueAMD64_OpGetHiInt8x32(v) + case OpGetHiInt8x64: + return rewriteValueAMD64_OpGetHiInt8x64(v) + case OpGetHiUint16x16: + return rewriteValueAMD64_OpGetHiUint16x16(v) + case OpGetHiUint16x32: + return rewriteValueAMD64_OpGetHiUint16x32(v) + case OpGetHiUint32x16: + return rewriteValueAMD64_OpGetHiUint32x16(v) + case OpGetHiUint32x8: + return rewriteValueAMD64_OpGetHiUint32x8(v) + case OpGetHiUint64x4: + return rewriteValueAMD64_OpGetHiUint64x4(v) + case OpGetHiUint64x8: + return rewriteValueAMD64_OpGetHiUint64x8(v) + case OpGetHiUint8x32: + return rewriteValueAMD64_OpGetHiUint8x32(v) + case OpGetHiUint8x64: + return rewriteValueAMD64_OpGetHiUint8x64(v) + case OpGetLoFloat32x16: + return rewriteValueAMD64_OpGetLoFloat32x16(v) + case OpGetLoFloat32x8: + return rewriteValueAMD64_OpGetLoFloat32x8(v) + case OpGetLoFloat64x4: + return rewriteValueAMD64_OpGetLoFloat64x4(v) + case OpGetLoFloat64x8: + return rewriteValueAMD64_OpGetLoFloat64x8(v) + case OpGetLoInt16x16: + return rewriteValueAMD64_OpGetLoInt16x16(v) + case OpGetLoInt16x32: + return rewriteValueAMD64_OpGetLoInt16x32(v) + case OpGetLoInt32x16: + return rewriteValueAMD64_OpGetLoInt32x16(v) + case OpGetLoInt32x8: + return rewriteValueAMD64_OpGetLoInt32x8(v) + case OpGetLoInt64x4: + return rewriteValueAMD64_OpGetLoInt64x4(v) + case OpGetLoInt64x8: + return rewriteValueAMD64_OpGetLoInt64x8(v) + case OpGetLoInt8x32: + return rewriteValueAMD64_OpGetLoInt8x32(v) + case OpGetLoInt8x64: + return rewriteValueAMD64_OpGetLoInt8x64(v) + case OpGetLoUint16x16: + return rewriteValueAMD64_OpGetLoUint16x16(v) + case OpGetLoUint16x32: + return rewriteValueAMD64_OpGetLoUint16x32(v) + case OpGetLoUint32x16: + return rewriteValueAMD64_OpGetLoUint32x16(v) + case OpGetLoUint32x8: + return rewriteValueAMD64_OpGetLoUint32x8(v) + case OpGetLoUint64x4: + return rewriteValueAMD64_OpGetLoUint64x4(v) + case OpGetLoUint64x8: + return rewriteValueAMD64_OpGetLoUint64x8(v) + case OpGetLoUint8x32: + return rewriteValueAMD64_OpGetLoUint8x32(v) + case OpGetLoUint8x64: + return rewriteValueAMD64_OpGetLoUint8x64(v) case OpGreaterEqualFloat32x16: return rewriteValueAMD64_OpGreaterEqualFloat32x16(v) case OpGreaterEqualFloat32x4: @@ -4306,36 +4356,6 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpSelect1(v) case OpSelectN: return rewriteValueAMD64_OpSelectN(v) - case OpSet128Float32x8: - v.Op = OpAMD64VINSERTF128256 - return true - case OpSet128Float64x4: - v.Op = OpAMD64VINSERTF128256 - return true - case OpSet128Int16x16: - v.Op = OpAMD64VINSERTI128256 - return true - case OpSet128Int32x8: - v.Op = OpAMD64VINSERTI128256 - return true - case OpSet128Int64x4: - v.Op = OpAMD64VINSERTI128256 - return true - case OpSet128Int8x32: - v.Op = OpAMD64VINSERTI128256 - return true - case OpSet128Uint16x16: - v.Op = OpAMD64VINSERTI128256 - return true - case OpSet128Uint32x8: - v.Op = OpAMD64VINSERTI128256 - return true - case OpSet128Uint64x4: - v.Op = OpAMD64VINSERTI128256 - return true - case OpSet128Uint8x32: - v.Op = OpAMD64VINSERTI128256 - return true case OpSetElemInt16x8: v.Op = OpAMD64VPINSRW128 return true @@ -4360,6 +4380,86 @@ func rewriteValueAMD64(v *Value) bool { case OpSetElemUint8x16: v.Op = OpAMD64VPINSRB128 return true + case OpSetHiFloat32x16: + return rewriteValueAMD64_OpSetHiFloat32x16(v) + case OpSetHiFloat32x8: + return rewriteValueAMD64_OpSetHiFloat32x8(v) + case OpSetHiFloat64x4: + return rewriteValueAMD64_OpSetHiFloat64x4(v) + case OpSetHiFloat64x8: + return rewriteValueAMD64_OpSetHiFloat64x8(v) + case OpSetHiInt16x16: + return rewriteValueAMD64_OpSetHiInt16x16(v) + case OpSetHiInt16x32: + return rewriteValueAMD64_OpSetHiInt16x32(v) + case OpSetHiInt32x16: + return rewriteValueAMD64_OpSetHiInt32x16(v) + case OpSetHiInt32x8: + return rewriteValueAMD64_OpSetHiInt32x8(v) + case OpSetHiInt64x4: + return rewriteValueAMD64_OpSetHiInt64x4(v) + case OpSetHiInt64x8: + return rewriteValueAMD64_OpSetHiInt64x8(v) + case OpSetHiInt8x32: + return rewriteValueAMD64_OpSetHiInt8x32(v) + case OpSetHiInt8x64: + return rewriteValueAMD64_OpSetHiInt8x64(v) + case OpSetHiUint16x16: + return rewriteValueAMD64_OpSetHiUint16x16(v) + case OpSetHiUint16x32: + return rewriteValueAMD64_OpSetHiUint16x32(v) + case OpSetHiUint32x16: + return rewriteValueAMD64_OpSetHiUint32x16(v) + case OpSetHiUint32x8: + return rewriteValueAMD64_OpSetHiUint32x8(v) + case OpSetHiUint64x4: + return rewriteValueAMD64_OpSetHiUint64x4(v) + case OpSetHiUint64x8: + return rewriteValueAMD64_OpSetHiUint64x8(v) + case OpSetHiUint8x32: + return rewriteValueAMD64_OpSetHiUint8x32(v) + case OpSetHiUint8x64: + return rewriteValueAMD64_OpSetHiUint8x64(v) + case OpSetLoFloat32x16: + return rewriteValueAMD64_OpSetLoFloat32x16(v) + case OpSetLoFloat32x8: + return rewriteValueAMD64_OpSetLoFloat32x8(v) + case OpSetLoFloat64x4: + return rewriteValueAMD64_OpSetLoFloat64x4(v) + case OpSetLoFloat64x8: + return rewriteValueAMD64_OpSetLoFloat64x8(v) + case OpSetLoInt16x16: + return rewriteValueAMD64_OpSetLoInt16x16(v) + case OpSetLoInt16x32: + return rewriteValueAMD64_OpSetLoInt16x32(v) + case OpSetLoInt32x16: + return rewriteValueAMD64_OpSetLoInt32x16(v) + case OpSetLoInt32x8: + return rewriteValueAMD64_OpSetLoInt32x8(v) + case OpSetLoInt64x4: + return rewriteValueAMD64_OpSetLoInt64x4(v) + case OpSetLoInt64x8: + return rewriteValueAMD64_OpSetLoInt64x8(v) + case OpSetLoInt8x32: + return rewriteValueAMD64_OpSetLoInt8x32(v) + case OpSetLoInt8x64: + return rewriteValueAMD64_OpSetLoInt8x64(v) + case OpSetLoUint16x16: + return rewriteValueAMD64_OpSetLoUint16x16(v) + case OpSetLoUint16x32: + return rewriteValueAMD64_OpSetLoUint16x32(v) + case OpSetLoUint32x16: + return rewriteValueAMD64_OpSetLoUint32x16(v) + case OpSetLoUint32x8: + return rewriteValueAMD64_OpSetLoUint32x8(v) + case OpSetLoUint64x4: + return rewriteValueAMD64_OpSetLoUint64x4(v) + case OpSetLoUint64x8: + return rewriteValueAMD64_OpSetLoUint64x8(v) + case OpSetLoUint8x32: + return rewriteValueAMD64_OpSetLoUint8x32(v) + case OpSetLoUint8x64: + return rewriteValueAMD64_OpSetLoUint8x64(v) case OpShiftAllLeftConcatInt16x16: v.Op = OpAMD64VPSHLDW256 return true @@ -35376,6 +35476,486 @@ func rewriteValueAMD64_OpGetG(v *Value) bool { } return false } +func rewriteValueAMD64_OpGetHiFloat32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiFloat32x16 x) + // result: (VEXTRACTF64X4256 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTF64X4256) + v.AuxInt = int8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiFloat32x8 x) + // result: (VEXTRACTF128128 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTF128128) + v.AuxInt = int8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiFloat64x4 x) + // result: (VEXTRACTF128128 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTF128128) + v.AuxInt = int8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiFloat64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiFloat64x8 x) + // result: (VEXTRACTF64X4256 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTF64X4256) + v.AuxInt = int8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiInt16x16(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiInt16x16 x) + // result: (VEXTRACTI128128 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = int8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiInt16x32(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiInt16x32 x) + // result: (VEXTRACTI64X4256 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI64X4256) + v.AuxInt = int8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiInt32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiInt32x16 x) + // result: (VEXTRACTI64X4256 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI64X4256) + v.AuxInt = int8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiInt32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiInt32x8 x) + // result: (VEXTRACTI128128 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = int8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiInt64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiInt64x4 x) + // result: (VEXTRACTI128128 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = int8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiInt64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiInt64x8 x) + // result: (VEXTRACTI64X4256 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI64X4256) + v.AuxInt = int8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiInt8x32(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiInt8x32 x) + // result: (VEXTRACTI128128 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = int8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiInt8x64(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiInt8x64 x) + // result: (VEXTRACTI64X4256 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI64X4256) + v.AuxInt = int8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiUint16x16(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiUint16x16 x) + // result: (VEXTRACTI128128 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = int8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiUint16x32(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiUint16x32 x) + // result: (VEXTRACTI64X4256 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI64X4256) + v.AuxInt = int8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiUint32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiUint32x16 x) + // result: (VEXTRACTI64X4256 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI64X4256) + v.AuxInt = int8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiUint32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiUint32x8 x) + // result: (VEXTRACTI128128 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = int8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiUint64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiUint64x4 x) + // result: (VEXTRACTI128128 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = int8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiUint64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiUint64x8 x) + // result: (VEXTRACTI64X4256 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI64X4256) + v.AuxInt = int8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiUint8x32(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiUint8x32 x) + // result: (VEXTRACTI128128 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = int8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetHiUint8x64(v *Value) bool { + v_0 := v.Args[0] + // match: (GetHiUint8x64 x) + // result: (VEXTRACTI64X4256 [1] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI64X4256) + v.AuxInt = int8ToAuxInt(1) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoFloat32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoFloat32x16 x) + // result: (VEXTRACTF64X4256 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTF64X4256) + v.AuxInt = int8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoFloat32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoFloat32x8 x) + // result: (VEXTRACTF128128 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTF128128) + v.AuxInt = int8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoFloat64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoFloat64x4 x) + // result: (VEXTRACTF128128 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTF128128) + v.AuxInt = int8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoFloat64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoFloat64x8 x) + // result: (VEXTRACTF64X4256 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTF64X4256) + v.AuxInt = int8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoInt16x16(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoInt16x16 x) + // result: (VEXTRACTI128128 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = int8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoInt16x32(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoInt16x32 x) + // result: (VEXTRACTI64X4256 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI64X4256) + v.AuxInt = int8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoInt32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoInt32x16 x) + // result: (VEXTRACTI64X4256 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI64X4256) + v.AuxInt = int8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoInt32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoInt32x8 x) + // result: (VEXTRACTI128128 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = int8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoInt64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoInt64x4 x) + // result: (VEXTRACTI128128 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = int8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoInt64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoInt64x8 x) + // result: (VEXTRACTI64X4256 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI64X4256) + v.AuxInt = int8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoInt8x32(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoInt8x32 x) + // result: (VEXTRACTI128128 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = int8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoInt8x64(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoInt8x64 x) + // result: (VEXTRACTI64X4256 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI64X4256) + v.AuxInt = int8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoUint16x16(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoUint16x16 x) + // result: (VEXTRACTI128128 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = int8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoUint16x32(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoUint16x32 x) + // result: (VEXTRACTI64X4256 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI64X4256) + v.AuxInt = int8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoUint32x16(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoUint32x16 x) + // result: (VEXTRACTI64X4256 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI64X4256) + v.AuxInt = int8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoUint32x8(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoUint32x8 x) + // result: (VEXTRACTI128128 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = int8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoUint64x4(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoUint64x4 x) + // result: (VEXTRACTI128128 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = int8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoUint64x8(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoUint64x8 x) + // result: (VEXTRACTI64X4256 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI64X4256) + v.AuxInt = int8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoUint8x32(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoUint8x32 x) + // result: (VEXTRACTI128128 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI128128) + v.AuxInt = int8ToAuxInt(0) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetLoUint8x64(v *Value) bool { + v_0 := v.Args[0] + // match: (GetLoUint8x64 x) + // result: (VEXTRACTI64X4256 [0] x) + for { + x := v_0 + v.reset(OpAMD64VEXTRACTI64X4256) + v.AuxInt = int8ToAuxInt(0) + v.AddArg(x) + return true + } +} func rewriteValueAMD64_OpGreaterEqualFloat32x16(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -50409,6 +50989,566 @@ func rewriteValueAMD64_OpSelectN(v *Value) bool { } return false } +func rewriteValueAMD64_OpSetHiFloat32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiFloat32x16 x y) + // result: (VINSERTF64X4512 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTF64X4512) + v.AuxInt = int8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiFloat32x8 x y) + // result: (VINSERTF128256 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTF128256) + v.AuxInt = int8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiFloat64x4 x y) + // result: (VINSERTF128256 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTF128256) + v.AuxInt = int8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiFloat64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiFloat64x8 x y) + // result: (VINSERTF64X4512 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTF64X4512) + v.AuxInt = int8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiInt16x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiInt16x16 x y) + // result: (VINSERTI128256 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = int8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiInt16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiInt16x32 x y) + // result: (VINSERTI64X4512 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = int8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiInt32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiInt32x16 x y) + // result: (VINSERTI64X4512 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = int8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiInt32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiInt32x8 x y) + // result: (VINSERTI128256 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = int8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiInt64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiInt64x4 x y) + // result: (VINSERTI128256 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = int8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiInt64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiInt64x8 x y) + // result: (VINSERTI64X4512 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = int8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiInt8x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiInt8x32 x y) + // result: (VINSERTI128256 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = int8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiInt8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiInt8x64 x y) + // result: (VINSERTI64X4512 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = int8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiUint16x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiUint16x16 x y) + // result: (VINSERTI128256 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = int8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiUint16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiUint16x32 x y) + // result: (VINSERTI64X4512 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = int8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiUint32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiUint32x16 x y) + // result: (VINSERTI64X4512 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = int8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiUint32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiUint32x8 x y) + // result: (VINSERTI128256 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = int8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiUint64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiUint64x4 x y) + // result: (VINSERTI128256 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = int8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiUint64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiUint64x8 x y) + // result: (VINSERTI64X4512 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = int8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiUint8x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiUint8x32 x y) + // result: (VINSERTI128256 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = int8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetHiUint8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetHiUint8x64 x y) + // result: (VINSERTI64X4512 [1] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = int8ToAuxInt(1) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoFloat32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoFloat32x16 x y) + // result: (VINSERTF64X4512 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTF64X4512) + v.AuxInt = int8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoFloat32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoFloat32x8 x y) + // result: (VINSERTF128256 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTF128256) + v.AuxInt = int8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoFloat64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoFloat64x4 x y) + // result: (VINSERTF128256 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTF128256) + v.AuxInt = int8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoFloat64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoFloat64x8 x y) + // result: (VINSERTF64X4512 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTF64X4512) + v.AuxInt = int8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoInt16x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoInt16x16 x y) + // result: (VINSERTI128256 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = int8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoInt16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoInt16x32 x y) + // result: (VINSERTI64X4512 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = int8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoInt32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoInt32x16 x y) + // result: (VINSERTI64X4512 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = int8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoInt32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoInt32x8 x y) + // result: (VINSERTI128256 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = int8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoInt64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoInt64x4 x y) + // result: (VINSERTI128256 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = int8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoInt64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoInt64x8 x y) + // result: (VINSERTI64X4512 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = int8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoInt8x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoInt8x32 x y) + // result: (VINSERTI128256 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = int8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoInt8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoInt8x64 x y) + // result: (VINSERTI64X4512 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = int8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoUint16x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoUint16x16 x y) + // result: (VINSERTI128256 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = int8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoUint16x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoUint16x32 x y) + // result: (VINSERTI64X4512 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = int8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoUint32x16(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoUint32x16 x y) + // result: (VINSERTI64X4512 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = int8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoUint32x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoUint32x8 x y) + // result: (VINSERTI128256 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = int8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoUint64x4(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoUint64x4 x y) + // result: (VINSERTI128256 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = int8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoUint64x8(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoUint64x8 x y) + // result: (VINSERTI64X4512 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = int8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoUint8x32(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoUint8x32 x y) + // result: (VINSERTI128256 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI128256) + v.AuxInt = int8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} +func rewriteValueAMD64_OpSetLoUint8x64(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (SetLoUint8x64 x y) + // result: (VINSERTI64X4512 [0] x y) + for { + x := v_0 + y := v_1 + v.reset(OpAMD64VINSERTI64X4512) + v.AuxInt = int8ToAuxInt(0) + v.AddArg2(x, y) + return true + } +} func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x16(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go index fb68846347..873bb8e2de 100644 --- a/src/cmd/compile/internal/ssagen/simdintrinsics.go +++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go @@ -478,16 +478,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint8x16.GaloisFieldMulMasked", opLen3(ssa.OpGaloisFieldMulMaskedUint8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Uint8x32.GaloisFieldMulMasked", opLen3(ssa.OpGaloisFieldMulMaskedUint8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Uint8x64.GaloisFieldMulMasked", opLen3(ssa.OpGaloisFieldMulMaskedUint8x64, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float32x8.Get128", opLen1Imm8(ssa.OpGet128Float32x8, types.TypeVec128, 0), sys.AMD64) - addF(simdPackage, "Float64x4.Get128", opLen1Imm8(ssa.OpGet128Float64x4, types.TypeVec128, 0), sys.AMD64) - addF(simdPackage, "Int8x32.Get128", opLen1Imm8(ssa.OpGet128Int8x32, types.TypeVec128, 0), sys.AMD64) - addF(simdPackage, "Int16x16.Get128", opLen1Imm8(ssa.OpGet128Int16x16, types.TypeVec128, 0), sys.AMD64) - addF(simdPackage, "Int32x8.Get128", opLen1Imm8(ssa.OpGet128Int32x8, types.TypeVec128, 0), sys.AMD64) - addF(simdPackage, "Int64x4.Get128", opLen1Imm8(ssa.OpGet128Int64x4, types.TypeVec128, 0), sys.AMD64) - addF(simdPackage, "Uint8x32.Get128", opLen1Imm8(ssa.OpGet128Uint8x32, types.TypeVec128, 0), sys.AMD64) - addF(simdPackage, "Uint16x16.Get128", opLen1Imm8(ssa.OpGet128Uint16x16, types.TypeVec128, 0), sys.AMD64) - addF(simdPackage, "Uint32x8.Get128", opLen1Imm8(ssa.OpGet128Uint32x8, types.TypeVec128, 0), sys.AMD64) - addF(simdPackage, "Uint64x4.Get128", opLen1Imm8(ssa.OpGet128Uint64x4, types.TypeVec128, 0), sys.AMD64) addF(simdPackage, "Int8x16.GetElem", opLen1Imm8(ssa.OpGetElemInt8x16, types.Types[types.TINT8], 0), sys.AMD64) addF(simdPackage, "Int16x8.GetElem", opLen1Imm8(ssa.OpGetElemInt16x8, types.Types[types.TINT16], 0), sys.AMD64) addF(simdPackage, "Int32x4.GetElem", opLen1Imm8(ssa.OpGetElemInt32x4, types.Types[types.TINT32], 0), sys.AMD64) @@ -496,6 +486,46 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint16x8.GetElem", opLen1Imm8(ssa.OpGetElemUint16x8, types.Types[types.TUINT16], 0), sys.AMD64) addF(simdPackage, "Uint32x4.GetElem", opLen1Imm8(ssa.OpGetElemUint32x4, types.Types[types.TUINT32], 0), sys.AMD64) addF(simdPackage, "Uint64x2.GetElem", opLen1Imm8(ssa.OpGetElemUint64x2, types.Types[types.TUINT64], 0), sys.AMD64) + addF(simdPackage, "Float32x8.GetHi", opLen1(ssa.OpGetHiFloat32x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float32x16.GetHi", opLen1(ssa.OpGetHiFloat32x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x4.GetHi", opLen1(ssa.OpGetHiFloat64x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float64x8.GetHi", opLen1(ssa.OpGetHiFloat64x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int8x32.GetHi", opLen1(ssa.OpGetHiInt8x32, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int8x64.GetHi", opLen1(ssa.OpGetHiInt8x64, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x16.GetHi", opLen1(ssa.OpGetHiInt16x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x32.GetHi", opLen1(ssa.OpGetHiInt16x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x8.GetHi", opLen1(ssa.OpGetHiInt32x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x16.GetHi", opLen1(ssa.OpGetHiInt32x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x4.GetHi", opLen1(ssa.OpGetHiInt64x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x8.GetHi", opLen1(ssa.OpGetHiInt64x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x32.GetHi", opLen1(ssa.OpGetHiUint8x32, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x64.GetHi", opLen1(ssa.OpGetHiUint8x64, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x16.GetHi", opLen1(ssa.OpGetHiUint16x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x32.GetHi", opLen1(ssa.OpGetHiUint16x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x8.GetHi", opLen1(ssa.OpGetHiUint32x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x16.GetHi", opLen1(ssa.OpGetHiUint32x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x4.GetHi", opLen1(ssa.OpGetHiUint64x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x8.GetHi", opLen1(ssa.OpGetHiUint64x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x8.GetLo", opLen1(ssa.OpGetLoFloat32x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float32x16.GetLo", opLen1(ssa.OpGetLoFloat32x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x4.GetLo", opLen1(ssa.OpGetLoFloat64x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Float64x8.GetLo", opLen1(ssa.OpGetLoFloat64x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int8x32.GetLo", opLen1(ssa.OpGetLoInt8x32, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int8x64.GetLo", opLen1(ssa.OpGetLoInt8x64, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x16.GetLo", opLen1(ssa.OpGetLoInt16x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int16x32.GetLo", opLen1(ssa.OpGetLoInt16x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x8.GetLo", opLen1(ssa.OpGetLoInt32x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int32x16.GetLo", opLen1(ssa.OpGetLoInt32x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x4.GetLo", opLen1(ssa.OpGetLoInt64x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Int64x8.GetLo", opLen1(ssa.OpGetLoInt64x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x32.GetLo", opLen1(ssa.OpGetLoUint8x32, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint8x64.GetLo", opLen1(ssa.OpGetLoUint8x64, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x16.GetLo", opLen1(ssa.OpGetLoUint16x16, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint16x32.GetLo", opLen1(ssa.OpGetLoUint16x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x8.GetLo", opLen1(ssa.OpGetLoUint32x8, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint32x16.GetLo", opLen1(ssa.OpGetLoUint32x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x4.GetLo", opLen1(ssa.OpGetLoUint64x4, types.TypeVec128), sys.AMD64) + addF(simdPackage, "Uint64x8.GetLo", opLen1(ssa.OpGetLoUint64x8, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x16.Greater", opLen2(ssa.OpGreaterInt8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x32.Greater", opLen2(ssa.OpGreaterInt8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int8x64.Greater", opLen2(ssa.OpGreaterInt8x64, types.TypeVec512), sys.AMD64) @@ -1338,16 +1368,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Float64x2.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x4.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x8.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x8, types.TypeVec512), sys.AMD64) - addF(simdPackage, "Float32x8.Set128", opLen2Imm8(ssa.OpSet128Float32x8, types.TypeVec256, 0), sys.AMD64) - addF(simdPackage, "Float64x4.Set128", opLen2Imm8(ssa.OpSet128Float64x4, types.TypeVec256, 0), sys.AMD64) - addF(simdPackage, "Int8x32.Set128", opLen2Imm8(ssa.OpSet128Int8x32, types.TypeVec256, 0), sys.AMD64) - addF(simdPackage, "Int16x16.Set128", opLen2Imm8(ssa.OpSet128Int16x16, types.TypeVec256, 0), sys.AMD64) - addF(simdPackage, "Int32x8.Set128", opLen2Imm8(ssa.OpSet128Int32x8, types.TypeVec256, 0), sys.AMD64) - addF(simdPackage, "Int64x4.Set128", opLen2Imm8(ssa.OpSet128Int64x4, types.TypeVec256, 0), sys.AMD64) - addF(simdPackage, "Uint8x32.Set128", opLen2Imm8(ssa.OpSet128Uint8x32, types.TypeVec256, 0), sys.AMD64) - addF(simdPackage, "Uint16x16.Set128", opLen2Imm8(ssa.OpSet128Uint16x16, types.TypeVec256, 0), sys.AMD64) - addF(simdPackage, "Uint32x8.Set128", opLen2Imm8(ssa.OpSet128Uint32x8, types.TypeVec256, 0), sys.AMD64) - addF(simdPackage, "Uint64x4.Set128", opLen2Imm8(ssa.OpSet128Uint64x4, types.TypeVec256, 0), sys.AMD64) addF(simdPackage, "Int8x16.SetElem", opLen2Imm8(ssa.OpSetElemInt8x16, types.TypeVec128, 0), sys.AMD64) addF(simdPackage, "Int16x8.SetElem", opLen2Imm8(ssa.OpSetElemInt16x8, types.TypeVec128, 0), sys.AMD64) addF(simdPackage, "Int32x4.SetElem", opLen2Imm8(ssa.OpSetElemInt32x4, types.TypeVec128, 0), sys.AMD64) @@ -1356,6 +1376,46 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Uint16x8.SetElem", opLen2Imm8(ssa.OpSetElemUint16x8, types.TypeVec128, 0), sys.AMD64) addF(simdPackage, "Uint32x4.SetElem", opLen2Imm8(ssa.OpSetElemUint32x4, types.TypeVec128, 0), sys.AMD64) addF(simdPackage, "Uint64x2.SetElem", opLen2Imm8(ssa.OpSetElemUint64x2, types.TypeVec128, 0), sys.AMD64) + addF(simdPackage, "Float32x8.SetHi", opLen2(ssa.OpSetHiFloat32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x16.SetHi", opLen2(ssa.OpSetHiFloat32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float64x4.SetHi", opLen2(ssa.OpSetHiFloat64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x8.SetHi", opLen2(ssa.OpSetHiFloat64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int8x32.SetHi", opLen2(ssa.OpSetHiInt8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int8x64.SetHi", opLen2(ssa.OpSetHiInt8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x16.SetHi", opLen2(ssa.OpSetHiInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.SetHi", opLen2(ssa.OpSetHiInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x8.SetHi", opLen2(ssa.OpSetHiInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.SetHi", opLen2(ssa.OpSetHiInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x4.SetHi", opLen2(ssa.OpSetHiInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.SetHi", opLen2(ssa.OpSetHiInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x32.SetHi", opLen2(ssa.OpSetHiUint8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x64.SetHi", opLen2(ssa.OpSetHiUint8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x16.SetHi", opLen2(ssa.OpSetHiUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.SetHi", opLen2(ssa.OpSetHiUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x8.SetHi", opLen2(ssa.OpSetHiUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.SetHi", opLen2(ssa.OpSetHiUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x4.SetHi", opLen2(ssa.OpSetHiUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.SetHi", opLen2(ssa.OpSetHiUint64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float32x8.SetLo", opLen2(ssa.OpSetLoFloat32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float32x16.SetLo", opLen2(ssa.OpSetLoFloat32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Float64x4.SetLo", opLen2(ssa.OpSetLoFloat64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Float64x8.SetLo", opLen2(ssa.OpSetLoFloat64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int8x32.SetLo", opLen2(ssa.OpSetLoInt8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int8x64.SetLo", opLen2(ssa.OpSetLoInt8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int16x16.SetLo", opLen2(ssa.OpSetLoInt16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int16x32.SetLo", opLen2(ssa.OpSetLoInt16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int32x8.SetLo", opLen2(ssa.OpSetLoInt32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int32x16.SetLo", opLen2(ssa.OpSetLoInt32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int64x4.SetLo", opLen2(ssa.OpSetLoInt64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Int64x8.SetLo", opLen2(ssa.OpSetLoInt64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint8x32.SetLo", opLen2(ssa.OpSetLoUint8x32, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint8x64.SetLo", opLen2(ssa.OpSetLoUint8x64, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint16x16.SetLo", opLen2(ssa.OpSetLoUint16x16, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint16x32.SetLo", opLen2(ssa.OpSetLoUint16x32, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint32x8.SetLo", opLen2(ssa.OpSetLoUint32x8, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint32x16.SetLo", opLen2(ssa.OpSetLoUint32x16, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Uint64x4.SetLo", opLen2(ssa.OpSetLoUint64x4, types.TypeVec256), sys.AMD64) + addF(simdPackage, "Uint64x8.SetLo", opLen2(ssa.OpSetLoUint64x8, types.TypeVec512), sys.AMD64) addF(simdPackage, "Int16x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt16x8, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int16x16.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt16x16, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x32.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt16x32, types.TypeVec512), sys.AMD64) diff --git a/src/simd/ops_amd64.go b/src/simd/ops_amd64.go index 61a708b56e..5eb8fea476 100644 --- a/src/simd/ops_amd64.go +++ b/src/simd/ops_amd64.go @@ -3041,135 +3041,267 @@ func (x Uint8x32) GaloisFieldMulMasked(y Uint8x32, mask Mask8x32) Uint8x32 // Asm: VGF2P8MULB, CPU Feature: AVX512GFNI func (x Uint8x64) GaloisFieldMulMasked(y Uint8x64, mask Mask8x64) Uint8x64 -/* Get128 */ +/* GetElem */ -// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. +// GetElem retrieves a single constant-indexed element's value. // // index is expected to be a constant, non-constant value will trigger a runtime panic. // -// Asm: VEXTRACTF128, CPU Feature: AVX -func (x Float32x8) Get128(index uint8) Float32x4 +// Asm: VPEXTRB, CPU Feature: AVX512BW +func (x Int8x16) GetElem(index uint8) int8 -// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. +// GetElem retrieves a single constant-indexed element's value. // // index is expected to be a constant, non-constant value will trigger a runtime panic. // -// Asm: VEXTRACTF128, CPU Feature: AVX -func (x Float64x4) Get128(index uint8) Float64x2 +// Asm: VPEXTRW, CPU Feature: AVX512BW +func (x Int16x8) GetElem(index uint8) int16 -// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. +// GetElem retrieves a single constant-indexed element's value. // // index is expected to be a constant, non-constant value will trigger a runtime panic. // -// Asm: VEXTRACTI128, CPU Feature: AVX2 -func (x Int8x32) Get128(index uint8) Int8x16 +// Asm: VPEXTRD, CPU Feature: AVX +func (x Int32x4) GetElem(index uint8) int32 -// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. +// GetElem retrieves a single constant-indexed element's value. // // index is expected to be a constant, non-constant value will trigger a runtime panic. // -// Asm: VEXTRACTI128, CPU Feature: AVX2 -func (x Int16x16) Get128(index uint8) Int16x8 +// Asm: VPEXTRQ, CPU Feature: AVX +func (x Int64x2) GetElem(index uint8) int64 -// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. +// GetElem retrieves a single constant-indexed element's value. // // index is expected to be a constant, non-constant value will trigger a runtime panic. // -// Asm: VEXTRACTI128, CPU Feature: AVX2 -func (x Int32x8) Get128(index uint8) Int32x4 +// Asm: VPEXTRB, CPU Feature: AVX512BW +func (x Uint8x16) GetElem(index uint8) uint8 -// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. +// GetElem retrieves a single constant-indexed element's value. // // index is expected to be a constant, non-constant value will trigger a runtime panic. // -// Asm: VEXTRACTI128, CPU Feature: AVX2 -func (x Int64x4) Get128(index uint8) Int64x2 +// Asm: VPEXTRW, CPU Feature: AVX512BW +func (x Uint16x8) GetElem(index uint8) uint16 -// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. +// GetElem retrieves a single constant-indexed element's value. // // index is expected to be a constant, non-constant value will trigger a runtime panic. // -// Asm: VEXTRACTI128, CPU Feature: AVX2 -func (x Uint8x32) Get128(index uint8) Uint8x16 +// Asm: VPEXTRD, CPU Feature: AVX +func (x Uint32x4) GetElem(index uint8) uint32 -// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. +// GetElem retrieves a single constant-indexed element's value. // // index is expected to be a constant, non-constant value will trigger a runtime panic. // +// Asm: VPEXTRQ, CPU Feature: AVX +func (x Uint64x2) GetElem(index uint8) uint64 + +/* GetHi */ + +// GetHi returns the upper half of x. +// +// Asm: VEXTRACTF128, CPU Feature: AVX +func (x Float32x8) GetHi() Float32x4 + +// GetHi returns the upper half of x. +// +// Asm: VEXTRACTF64X4, CPU Feature: AVX512F +func (x Float32x16) GetHi() Float32x8 + +// GetHi returns the upper half of x. +// +// Asm: VEXTRACTF128, CPU Feature: AVX +func (x Float64x4) GetHi() Float64x2 + +// GetHi returns the upper half of x. +// +// Asm: VEXTRACTF64X4, CPU Feature: AVX512F +func (x Float64x8) GetHi() Float64x4 + +// GetHi returns the upper half of x. +// // Asm: VEXTRACTI128, CPU Feature: AVX2 -func (x Uint16x16) Get128(index uint8) Uint16x8 +func (x Int8x32) GetHi() Int8x16 -// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. +// GetHi returns the upper half of x. // -// index is expected to be a constant, non-constant value will trigger a runtime panic. +// Asm: VEXTRACTI64X4, CPU Feature: AVX512F +func (x Int8x64) GetHi() Int8x32 + +// GetHi returns the upper half of x. // // Asm: VEXTRACTI128, CPU Feature: AVX2 -func (x Uint32x8) Get128(index uint8) Uint32x4 +func (x Int16x16) GetHi() Int16x8 -// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand. +// GetHi returns the upper half of x. // -// index is expected to be a constant, non-constant value will trigger a runtime panic. +// Asm: VEXTRACTI64X4, CPU Feature: AVX512F +func (x Int16x32) GetHi() Int16x16 + +// GetHi returns the upper half of x. // // Asm: VEXTRACTI128, CPU Feature: AVX2 -func (x Uint64x4) Get128(index uint8) Uint64x2 +func (x Int32x8) GetHi() Int32x4 -/* GetElem */ +// GetHi returns the upper half of x. +// +// Asm: VEXTRACTI64X4, CPU Feature: AVX512F +func (x Int32x16) GetHi() Int32x8 -// GetElem retrieves a single constant-indexed element's value. +// GetHi returns the upper half of x. // -// index is expected to be a constant, non-constant value will trigger a runtime panic. +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Int64x4) GetHi() Int64x2 + +// GetHi returns the upper half of x. // -// Asm: VPEXTRB, CPU Feature: AVX512BW -func (x Int8x16) GetElem(index uint8) int8 +// Asm: VEXTRACTI64X4, CPU Feature: AVX512F +func (x Int64x8) GetHi() Int64x4 -// GetElem retrieves a single constant-indexed element's value. +// GetHi returns the upper half of x. // -// index is expected to be a constant, non-constant value will trigger a runtime panic. +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Uint8x32) GetHi() Uint8x16 + +// GetHi returns the upper half of x. // -// Asm: VPEXTRW, CPU Feature: AVX512BW -func (x Int16x8) GetElem(index uint8) int16 +// Asm: VEXTRACTI64X4, CPU Feature: AVX512F +func (x Uint8x64) GetHi() Uint8x32 -// GetElem retrieves a single constant-indexed element's value. +// GetHi returns the upper half of x. // -// index is expected to be a constant, non-constant value will trigger a runtime panic. +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Uint16x16) GetHi() Uint16x8 + +// GetHi returns the upper half of x. // -// Asm: VPEXTRD, CPU Feature: AVX -func (x Int32x4) GetElem(index uint8) int32 +// Asm: VEXTRACTI64X4, CPU Feature: AVX512F +func (x Uint16x32) GetHi() Uint16x16 -// GetElem retrieves a single constant-indexed element's value. +// GetHi returns the upper half of x. // -// index is expected to be a constant, non-constant value will trigger a runtime panic. +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Uint32x8) GetHi() Uint32x4 + +// GetHi returns the upper half of x. // -// Asm: VPEXTRQ, CPU Feature: AVX -func (x Int64x2) GetElem(index uint8) int64 +// Asm: VEXTRACTI64X4, CPU Feature: AVX512F +func (x Uint32x16) GetHi() Uint32x8 -// GetElem retrieves a single constant-indexed element's value. +// GetHi returns the upper half of x. // -// index is expected to be a constant, non-constant value will trigger a runtime panic. +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Uint64x4) GetHi() Uint64x2 + +// GetHi returns the upper half of x. // -// Asm: VPEXTRB, CPU Feature: AVX512BW -func (x Uint8x16) GetElem(index uint8) uint8 +// Asm: VEXTRACTI64X4, CPU Feature: AVX512F +func (x Uint64x8) GetHi() Uint64x4 -// GetElem retrieves a single constant-indexed element's value. +/* GetLo */ + +// GetLo returns the lower half of x. // -// index is expected to be a constant, non-constant value will trigger a runtime panic. +// Asm: VEXTRACTF128, CPU Feature: AVX +func (x Float32x8) GetLo() Float32x4 + +// GetLo returns the lower half of x. // -// Asm: VPEXTRW, CPU Feature: AVX512BW -func (x Uint16x8) GetElem(index uint8) uint16 +// Asm: VEXTRACTF64X4, CPU Feature: AVX512F +func (x Float32x16) GetLo() Float32x8 -// GetElem retrieves a single constant-indexed element's value. +// GetLo returns the lower half of x. // -// index is expected to be a constant, non-constant value will trigger a runtime panic. +// Asm: VEXTRACTF128, CPU Feature: AVX +func (x Float64x4) GetLo() Float64x2 + +// GetLo returns the lower half of x. // -// Asm: VPEXTRD, CPU Feature: AVX -func (x Uint32x4) GetElem(index uint8) uint32 +// Asm: VEXTRACTF64X4, CPU Feature: AVX512F +func (x Float64x8) GetLo() Float64x4 -// GetElem retrieves a single constant-indexed element's value. +// GetLo returns the lower half of x. // -// index is expected to be a constant, non-constant value will trigger a runtime panic. +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Int8x32) GetLo() Int8x16 + +// GetLo returns the lower half of x. // -// Asm: VPEXTRQ, CPU Feature: AVX -func (x Uint64x2) GetElem(index uint8) uint64 +// Asm: VEXTRACTI64X4, CPU Feature: AVX512F +func (x Int8x64) GetLo() Int8x32 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Int16x16) GetLo() Int16x8 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTI64X4, CPU Feature: AVX512F +func (x Int16x32) GetLo() Int16x16 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Int32x8) GetLo() Int32x4 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTI64X4, CPU Feature: AVX512F +func (x Int32x16) GetLo() Int32x8 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Int64x4) GetLo() Int64x2 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTI64X4, CPU Feature: AVX512F +func (x Int64x8) GetLo() Int64x4 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Uint8x32) GetLo() Uint8x16 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTI64X4, CPU Feature: AVX512F +func (x Uint8x64) GetLo() Uint8x32 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Uint16x16) GetLo() Uint16x8 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTI64X4, CPU Feature: AVX512F +func (x Uint16x32) GetLo() Uint16x16 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Uint32x8) GetLo() Uint32x4 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTI64X4, CPU Feature: AVX512F +func (x Uint32x16) GetLo() Uint32x8 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Uint64x4) GetLo() Uint64x2 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTI64X4, CPU Feature: AVX512F +func (x Uint64x8) GetLo() Uint64x4 /* Greater */ @@ -8757,135 +8889,267 @@ func (x Float64x4) ScaleMasked(y Float64x4, mask Mask64x4) Float64x4 // Asm: VSCALEFPD, CPU Feature: AVX512F func (x Float64x8) ScaleMasked(y Float64x8, mask Mask64x8) Float64x8 -/* Set128 */ +/* SetElem */ -// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector. +// SetElem sets a single constant-indexed element's value. // // index is expected to be a constant, non-constant value will trigger a runtime panic. // -// Asm: VINSERTF128, CPU Feature: AVX -func (x Float32x8) Set128(index uint8, y Float32x4) Float32x8 +// Asm: VPINSRB, CPU Feature: AVX +func (x Int8x16) SetElem(index uint8, y int8) Int8x16 -// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector. +// SetElem sets a single constant-indexed element's value. // // index is expected to be a constant, non-constant value will trigger a runtime panic. // -// Asm: VINSERTF128, CPU Feature: AVX -func (x Float64x4) Set128(index uint8, y Float64x2) Float64x4 +// Asm: VPINSRW, CPU Feature: AVX +func (x Int16x8) SetElem(index uint8, y int16) Int16x8 -// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector. +// SetElem sets a single constant-indexed element's value. // // index is expected to be a constant, non-constant value will trigger a runtime panic. // -// Asm: VINSERTI128, CPU Feature: AVX2 -func (x Int8x32) Set128(index uint8, y Int8x16) Int8x32 +// Asm: VPINSRD, CPU Feature: AVX +func (x Int32x4) SetElem(index uint8, y int32) Int32x4 -// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector. +// SetElem sets a single constant-indexed element's value. // // index is expected to be a constant, non-constant value will trigger a runtime panic. // -// Asm: VINSERTI128, CPU Feature: AVX2 -func (x Int16x16) Set128(index uint8, y Int16x8) Int16x16 +// Asm: VPINSRQ, CPU Feature: AVX +func (x Int64x2) SetElem(index uint8, y int64) Int64x2 -// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector. +// SetElem sets a single constant-indexed element's value. // // index is expected to be a constant, non-constant value will trigger a runtime panic. // -// Asm: VINSERTI128, CPU Feature: AVX2 -func (x Int32x8) Set128(index uint8, y Int32x4) Int32x8 +// Asm: VPINSRB, CPU Feature: AVX +func (x Uint8x16) SetElem(index uint8, y uint8) Uint8x16 -// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector. +// SetElem sets a single constant-indexed element's value. // // index is expected to be a constant, non-constant value will trigger a runtime panic. // -// Asm: VINSERTI128, CPU Feature: AVX2 -func (x Int64x4) Set128(index uint8, y Int64x2) Int64x4 +// Asm: VPINSRW, CPU Feature: AVX +func (x Uint16x8) SetElem(index uint8, y uint16) Uint16x8 -// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector. +// SetElem sets a single constant-indexed element's value. // // index is expected to be a constant, non-constant value will trigger a runtime panic. // -// Asm: VINSERTI128, CPU Feature: AVX2 -func (x Uint8x32) Set128(index uint8, y Uint8x16) Uint8x32 +// Asm: VPINSRD, CPU Feature: AVX +func (x Uint32x4) SetElem(index uint8, y uint32) Uint32x4 -// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector. +// SetElem sets a single constant-indexed element's value. // // index is expected to be a constant, non-constant value will trigger a runtime panic. // +// Asm: VPINSRQ, CPU Feature: AVX +func (x Uint64x2) SetElem(index uint8, y uint64) Uint64x2 + +/* SetHi */ + +// SetHi returns x with its upper half set to y. +// +// Asm: VINSERTF128, CPU Feature: AVX +func (x Float32x8) SetHi(y Float32x4) Float32x8 + +// SetHi returns x with its upper half set to y. +// +// Asm: VINSERTF64X4, CPU Feature: AVX512F +func (x Float32x16) SetHi(y Float32x8) Float32x16 + +// SetHi returns x with its upper half set to y. +// +// Asm: VINSERTF128, CPU Feature: AVX +func (x Float64x4) SetHi(y Float64x2) Float64x4 + +// SetHi returns x with its upper half set to y. +// +// Asm: VINSERTF64X4, CPU Feature: AVX512F +func (x Float64x8) SetHi(y Float64x4) Float64x8 + +// SetHi returns x with its upper half set to y. +// // Asm: VINSERTI128, CPU Feature: AVX2 -func (x Uint16x16) Set128(index uint8, y Uint16x8) Uint16x16 +func (x Int8x32) SetHi(y Int8x16) Int8x32 -// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector. +// SetHi returns x with its upper half set to y. // -// index is expected to be a constant, non-constant value will trigger a runtime panic. +// Asm: VINSERTI64X4, CPU Feature: AVX512F +func (x Int8x64) SetHi(y Int8x32) Int8x64 + +// SetHi returns x with its upper half set to y. // // Asm: VINSERTI128, CPU Feature: AVX2 -func (x Uint32x8) Set128(index uint8, y Uint32x4) Uint32x8 +func (x Int16x16) SetHi(y Int16x8) Int16x16 -// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector. +// SetHi returns x with its upper half set to y. // -// index is expected to be a constant, non-constant value will trigger a runtime panic. +// Asm: VINSERTI64X4, CPU Feature: AVX512F +func (x Int16x32) SetHi(y Int16x16) Int16x32 + +// SetHi returns x with its upper half set to y. // // Asm: VINSERTI128, CPU Feature: AVX2 -func (x Uint64x4) Set128(index uint8, y Uint64x2) Uint64x4 +func (x Int32x8) SetHi(y Int32x4) Int32x8 -/* SetElem */ +// SetHi returns x with its upper half set to y. +// +// Asm: VINSERTI64X4, CPU Feature: AVX512F +func (x Int32x16) SetHi(y Int32x8) Int32x16 -// SetElem sets a single constant-indexed element's value. +// SetHi returns x with its upper half set to y. // -// index is expected to be a constant, non-constant value will trigger a runtime panic. +// Asm: VINSERTI128, CPU Feature: AVX2 +func (x Int64x4) SetHi(y Int64x2) Int64x4 + +// SetHi returns x with its upper half set to y. // -// Asm: VPINSRB, CPU Feature: AVX -func (x Int8x16) SetElem(index uint8, y int8) Int8x16 +// Asm: VINSERTI64X4, CPU Feature: AVX512F +func (x Int64x8) SetHi(y Int64x4) Int64x8 -// SetElem sets a single constant-indexed element's value. +// SetHi returns x with its upper half set to y. // -// index is expected to be a constant, non-constant value will trigger a runtime panic. +// Asm: VINSERTI128, CPU Feature: AVX2 +func (x Uint8x32) SetHi(y Uint8x16) Uint8x32 + +// SetHi returns x with its upper half set to y. // -// Asm: VPINSRW, CPU Feature: AVX -func (x Int16x8) SetElem(index uint8, y int16) Int16x8 +// Asm: VINSERTI64X4, CPU Feature: AVX512F +func (x Uint8x64) SetHi(y Uint8x32) Uint8x64 -// SetElem sets a single constant-indexed element's value. +// SetHi returns x with its upper half set to y. // -// index is expected to be a constant, non-constant value will trigger a runtime panic. +// Asm: VINSERTI128, CPU Feature: AVX2 +func (x Uint16x16) SetHi(y Uint16x8) Uint16x16 + +// SetHi returns x with its upper half set to y. // -// Asm: VPINSRD, CPU Feature: AVX -func (x Int32x4) SetElem(index uint8, y int32) Int32x4 +// Asm: VINSERTI64X4, CPU Feature: AVX512F +func (x Uint16x32) SetHi(y Uint16x16) Uint16x32 -// SetElem sets a single constant-indexed element's value. +// SetHi returns x with its upper half set to y. // -// index is expected to be a constant, non-constant value will trigger a runtime panic. +// Asm: VINSERTI128, CPU Feature: AVX2 +func (x Uint32x8) SetHi(y Uint32x4) Uint32x8 + +// SetHi returns x with its upper half set to y. // -// Asm: VPINSRQ, CPU Feature: AVX -func (x Int64x2) SetElem(index uint8, y int64) Int64x2 +// Asm: VINSERTI64X4, CPU Feature: AVX512F +func (x Uint32x16) SetHi(y Uint32x8) Uint32x16 -// SetElem sets a single constant-indexed element's value. +// SetHi returns x with its upper half set to y. // -// index is expected to be a constant, non-constant value will trigger a runtime panic. +// Asm: VINSERTI128, CPU Feature: AVX2 +func (x Uint64x4) SetHi(y Uint64x2) Uint64x4 + +// SetHi returns x with its upper half set to y. // -// Asm: VPINSRB, CPU Feature: AVX -func (x Uint8x16) SetElem(index uint8, y uint8) Uint8x16 +// Asm: VINSERTI64X4, CPU Feature: AVX512F +func (x Uint64x8) SetHi(y Uint64x4) Uint64x8 -// SetElem sets a single constant-indexed element's value. +/* SetLo */ + +// SetLo returns x with its lower half set to y. // -// index is expected to be a constant, non-constant value will trigger a runtime panic. +// Asm: VINSERTF128, CPU Feature: AVX +func (x Float32x8) SetLo(y Float32x4) Float32x8 + +// SetLo returns x with its lower half set to y. // -// Asm: VPINSRW, CPU Feature: AVX -func (x Uint16x8) SetElem(index uint8, y uint16) Uint16x8 +// Asm: VINSERTF64X4, CPU Feature: AVX512F +func (x Float32x16) SetLo(y Float32x8) Float32x16 -// SetElem sets a single constant-indexed element's value. +// SetLo returns x with its lower half set to y. // -// index is expected to be a constant, non-constant value will trigger a runtime panic. +// Asm: VINSERTF128, CPU Feature: AVX +func (x Float64x4) SetLo(y Float64x2) Float64x4 + +// SetLo returns x with its lower half set to y. // -// Asm: VPINSRD, CPU Feature: AVX -func (x Uint32x4) SetElem(index uint8, y uint32) Uint32x4 +// Asm: VINSERTF64X4, CPU Feature: AVX512F +func (x Float64x8) SetLo(y Float64x4) Float64x8 -// SetElem sets a single constant-indexed element's value. +// SetLo returns x with its lower half set to y. // -// index is expected to be a constant, non-constant value will trigger a runtime panic. +// Asm: VINSERTI128, CPU Feature: AVX2 +func (x Int8x32) SetLo(y Int8x16) Int8x32 + +// SetLo returns x with its lower half set to y. // -// Asm: VPINSRQ, CPU Feature: AVX -func (x Uint64x2) SetElem(index uint8, y uint64) Uint64x2 +// Asm: VINSERTI64X4, CPU Feature: AVX512F +func (x Int8x64) SetLo(y Int8x32) Int8x64 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTI128, CPU Feature: AVX2 +func (x Int16x16) SetLo(y Int16x8) Int16x16 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTI64X4, CPU Feature: AVX512F +func (x Int16x32) SetLo(y Int16x16) Int16x32 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTI128, CPU Feature: AVX2 +func (x Int32x8) SetLo(y Int32x4) Int32x8 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTI64X4, CPU Feature: AVX512F +func (x Int32x16) SetLo(y Int32x8) Int32x16 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTI128, CPU Feature: AVX2 +func (x Int64x4) SetLo(y Int64x2) Int64x4 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTI64X4, CPU Feature: AVX512F +func (x Int64x8) SetLo(y Int64x4) Int64x8 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTI128, CPU Feature: AVX2 +func (x Uint8x32) SetLo(y Uint8x16) Uint8x32 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTI64X4, CPU Feature: AVX512F +func (x Uint8x64) SetLo(y Uint8x32) Uint8x64 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTI128, CPU Feature: AVX2 +func (x Uint16x16) SetLo(y Uint16x8) Uint16x16 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTI64X4, CPU Feature: AVX512F +func (x Uint16x32) SetLo(y Uint16x16) Uint16x32 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTI128, CPU Feature: AVX2 +func (x Uint32x8) SetLo(y Uint32x4) Uint32x8 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTI64X4, CPU Feature: AVX512F +func (x Uint32x16) SetLo(y Uint32x8) Uint32x16 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTI128, CPU Feature: AVX2 +func (x Uint64x4) SetLo(y Uint64x2) Uint64x4 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTI64X4, CPU Feature: AVX512F +func (x Uint64x8) SetLo(y Uint64x4) Uint64x8 /* ShiftAllLeft */ diff --git a/src/simd/simd_test.go b/src/simd/simd_test.go index 1df27f8757..5718347838 100644 --- a/src/simd/simd_test.go +++ b/src/simd/simd_test.go @@ -257,93 +257,6 @@ func TestSlicesInt8GetElem(t *testing.T) { } -func TestSlicesInt8Set128(t *testing.T) { - a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, - 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} - v := simd.LoadInt8x16Slice(a) // 1-16 - u := simd.LoadInt8x32Slice(a) // 1-32 - - w := u.Set128(1, v) // 1-16:1-16 - - b := make([]int8, 32, 32) - w.StoreSlice(b) - - checkSlices(t, a, b[:16]) - checkSlices(t, a, b[16:]) -} - -func TestSlicesInt8Get128(t *testing.T) { - a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, - 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} - u := simd.LoadInt8x32Slice(a) // 1-32 - v := u.Get128(0) // 1-16 - w := u.Get128(1) // 17-32 - - b := make([]int8, 32, 32) - v.StoreSlice(b[:16]) - w.StoreSlice(b[16:]) - - checkSlices(t, a, b) -} - -func TestSlicesFloat32Set128(t *testing.T) { - a := []float32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, - 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} - v := simd.LoadFloat32x4Slice(a) // 1-4 - u := simd.LoadFloat32x8Slice(a) // 1-4 - - w := u.Set128(1, v) // 1-4:1-4 - - b := make([]float32, 8, 8) - w.StoreSlice(b) - - checkSlices(t, a, b[:4]) - checkSlices(t, a, b[4:]) -} - -func TestSlicesFloat32Get128(t *testing.T) { - a := []float32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, - 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} - u := simd.LoadFloat32x8Slice(a) // 1-8 - v := u.Get128(0) // 1-4 - w := u.Get128(1) // 5-8 - - b := make([]float32, 8, 8) - v.StoreSlice(b[:4]) - w.StoreSlice(b[4:]) - - checkSlices(t, a, b) -} - -func TestSlicesFloat64Set128(t *testing.T) { - a := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, - 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} - v := simd.LoadFloat64x2Slice(a) // 1-2 - u := simd.LoadFloat64x4Slice(a) // 1-2 - - w := u.Set128(1, v) // 1-2:1-2 - - b := make([]float64, 4, 4) - w.StoreSlice(b) - - checkSlices(t, a, b[:2]) - checkSlices(t, a, b[2:]) -} - -func TestSlicesFloat64Get128(t *testing.T) { - a := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, - 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} - u := simd.LoadFloat64x4Slice(a) // 1-4 - v := u.Get128(0) // 1-2 - w := u.Get128(1) // 3-4 - - b := make([]float64, 4, 4) - v.StoreSlice(b[:2]) - w.StoreSlice(b[2:]) - - checkSlices(t, a, b) -} - func TestSlicesInt8TooShortLoad(t *testing.T) { defer func() { if r := recover(); r != nil { diff --git a/src/simd/slicepart_amd64.go b/src/simd/slicepart_amd64.go index 6d0b5a41f2..206d3b98cb 100644 --- a/src/simd/slicepart_amd64.go +++ b/src/simd/slicepart_amd64.go @@ -76,9 +76,9 @@ func LoadInt8x32SlicePart(s []int8) Int8x32 { return x } if l > 16 { - return x.Set128(0, LoadInt8x16Slice(s)).Set128(1, LoadInt8x16SlicePart(s[16:])) + return x.SetLo(LoadInt8x16Slice(s)).SetHi(LoadInt8x16SlicePart(s[16:])) } else { - return x.Set128(0, LoadInt8x16SlicePart(s)) + return x.SetLo(LoadInt8x16SlicePart(s)) } } @@ -95,9 +95,9 @@ func LoadInt16x16SlicePart(s []int16) Int16x16 { return x } if l > 8 { - return x.Set128(0, LoadInt16x8Slice(s)).Set128(1, LoadInt16x8SlicePart(s[8:])) + return x.SetLo(LoadInt16x8Slice(s)).SetHi(LoadInt16x8SlicePart(s[8:])) } else { - return x.Set128(0, LoadInt16x8SlicePart(s)) + return x.SetLo(LoadInt16x8SlicePart(s)) } } @@ -114,10 +114,10 @@ func (x Int8x32) StoreSlicePart(s []int8) { return } if l > 16 { - x.Get128(0).StoreSlice(s) - x.Get128(1).StoreSlicePart(s[16:]) + x.GetLo().StoreSlice(s) + x.GetHi().StoreSlicePart(s[16:]) } else { // fits in one - x.Get128(0).StoreSlicePart(s) + x.GetLo().StoreSlicePart(s) } } @@ -134,10 +134,10 @@ func (x Int16x16) StoreSlicePart(s []int16) { return } if l > 8 { - x.Get128(0).StoreSlice(s) - x.Get128(1).StoreSlicePart(s[8:]) + x.GetLo().StoreSlice(s) + x.GetHi().StoreSlicePart(s[8:]) } else { // fits in one - x.Get128(0).StoreSlicePart(s) + x.GetLo().StoreSlicePart(s) } }