]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.simd] cmd/compile, simd: (Set|Get)(Lo|Hi)
authorJunyang Shao <shaojunyang@google.com>
Tue, 5 Aug 2025 19:07:51 +0000 (19:07 +0000)
committerJunyang Shao <shaojunyang@google.com>
Wed, 6 Aug 2025 20:50:50 +0000 (13:50 -0700)
This CL is generated by CL 693335.

Change-Id: Ie9adda526573f979ec7e4f535033ba29236cc5cb
Reviewed-on: https://go-review.googlesource.com/c/go/+/693355
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
src/cmd/compile/internal/amd64/simdssa.go
src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
src/cmd/compile/internal/ssagen/simdintrinsics.go
src/simd/ops_amd64.go
src/simd/simd_test.go
src/simd/slicepart_amd64.go

index e0571d2cc37a96db8dbc774f3011261c732d08b5..7a0a0be58fa32908318c58eee274c1596626acad 100644 (file)
@@ -685,7 +685,9 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VREDUCEPD256,
                ssa.OpAMD64VREDUCEPD512,
                ssa.OpAMD64VEXTRACTF128128,
+               ssa.OpAMD64VEXTRACTF64X4256,
                ssa.OpAMD64VEXTRACTI128128,
+               ssa.OpAMD64VEXTRACTI64X4256,
                ssa.OpAMD64VPROLD128,
                ssa.OpAMD64VPROLD256,
                ssa.OpAMD64VPROLD512,
@@ -794,7 +796,9 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VGF2P8AFFINEINVQB256,
                ssa.OpAMD64VGF2P8AFFINEINVQB512,
                ssa.OpAMD64VINSERTF128256,
+               ssa.OpAMD64VINSERTF64X4512,
                ssa.OpAMD64VINSERTI128256,
+               ssa.OpAMD64VINSERTI64X4512,
                ssa.OpAMD64VPSHLDW128,
                ssa.OpAMD64VPSHLDW256,
                ssa.OpAMD64VPSHLDW512,
index 9a4c82c0afcd3ee95136be57cffa261247ce004e..316db1b84110686361826fcee37d1d40cc2020db 100644 (file)
 (GaloisFieldMulMaskedUint8x16 x y mask) => (VGF2P8MULBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
 (GaloisFieldMulMaskedUint8x32 x y mask) => (VGF2P8MULBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
 (GaloisFieldMulMaskedUint8x64 x y mask) => (VGF2P8MULBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
-(Get128Float32x8 ...) => (VEXTRACTF128128 ...)
-(Get128Float64x4 ...) => (VEXTRACTF128128 ...)
-(Get128Int8x32 ...) => (VEXTRACTI128128 ...)
-(Get128Int16x16 ...) => (VEXTRACTI128128 ...)
-(Get128Int32x8 ...) => (VEXTRACTI128128 ...)
-(Get128Int64x4 ...) => (VEXTRACTI128128 ...)
-(Get128Uint8x32 ...) => (VEXTRACTI128128 ...)
-(Get128Uint16x16 ...) => (VEXTRACTI128128 ...)
-(Get128Uint32x8 ...) => (VEXTRACTI128128 ...)
-(Get128Uint64x4 ...) => (VEXTRACTI128128 ...)
 (GetElemInt8x16 ...) => (VPEXTRB128 ...)
 (GetElemInt16x8 ...) => (VPEXTRW128 ...)
 (GetElemInt32x4 ...) => (VPEXTRD128 ...)
 (GetElemUint16x8 ...) => (VPEXTRW128 ...)
 (GetElemUint32x4 ...) => (VPEXTRD128 ...)
 (GetElemUint64x2 ...) => (VPEXTRQ128 ...)
+(GetHiFloat32x8 x) => (VEXTRACTF128128 [1] x)
+(GetHiFloat32x16 x) => (VEXTRACTF64X4256 [1] x)
+(GetHiFloat64x4 x) => (VEXTRACTF128128 [1] x)
+(GetHiFloat64x8 x) => (VEXTRACTF64X4256 [1] x)
+(GetHiInt8x32 x) => (VEXTRACTI128128 [1] x)
+(GetHiInt8x64 x) => (VEXTRACTI64X4256 [1] x)
+(GetHiInt16x16 x) => (VEXTRACTI128128 [1] x)
+(GetHiInt16x32 x) => (VEXTRACTI64X4256 [1] x)
+(GetHiInt32x8 x) => (VEXTRACTI128128 [1] x)
+(GetHiInt32x16 x) => (VEXTRACTI64X4256 [1] x)
+(GetHiInt64x4 x) => (VEXTRACTI128128 [1] x)
+(GetHiInt64x8 x) => (VEXTRACTI64X4256 [1] x)
+(GetHiUint8x32 x) => (VEXTRACTI128128 [1] x)
+(GetHiUint8x64 x) => (VEXTRACTI64X4256 [1] x)
+(GetHiUint16x16 x) => (VEXTRACTI128128 [1] x)
+(GetHiUint16x32 x) => (VEXTRACTI64X4256 [1] x)
+(GetHiUint32x8 x) => (VEXTRACTI128128 [1] x)
+(GetHiUint32x16 x) => (VEXTRACTI64X4256 [1] x)
+(GetHiUint64x4 x) => (VEXTRACTI128128 [1] x)
+(GetHiUint64x8 x) => (VEXTRACTI64X4256 [1] x)
+(GetLoFloat32x8 x) => (VEXTRACTF128128 [0] x)
+(GetLoFloat32x16 x) => (VEXTRACTF64X4256 [0] x)
+(GetLoFloat64x4 x) => (VEXTRACTF128128 [0] x)
+(GetLoFloat64x8 x) => (VEXTRACTF64X4256 [0] x)
+(GetLoInt8x32 x) => (VEXTRACTI128128 [0] x)
+(GetLoInt8x64 x) => (VEXTRACTI64X4256 [0] x)
+(GetLoInt16x16 x) => (VEXTRACTI128128 [0] x)
+(GetLoInt16x32 x) => (VEXTRACTI64X4256 [0] x)
+(GetLoInt32x8 x) => (VEXTRACTI128128 [0] x)
+(GetLoInt32x16 x) => (VEXTRACTI64X4256 [0] x)
+(GetLoInt64x4 x) => (VEXTRACTI128128 [0] x)
+(GetLoInt64x8 x) => (VEXTRACTI64X4256 [0] x)
+(GetLoUint8x32 x) => (VEXTRACTI128128 [0] x)
+(GetLoUint8x64 x) => (VEXTRACTI64X4256 [0] x)
+(GetLoUint16x16 x) => (VEXTRACTI128128 [0] x)
+(GetLoUint16x32 x) => (VEXTRACTI64X4256 [0] x)
+(GetLoUint32x8 x) => (VEXTRACTI128128 [0] x)
+(GetLoUint32x16 x) => (VEXTRACTI64X4256 [0] x)
+(GetLoUint64x4 x) => (VEXTRACTI128128 [0] x)
+(GetLoUint64x8 x) => (VEXTRACTI64X4256 [0] x)
 (GreaterFloat32x4 x y) => (VCMPPS128 [14] x y)
 (GreaterFloat32x8 x y) => (VCMPPS256 [14] x y)
 (GreaterFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [14] x y))
 (ScaleMaskedFloat64x2 x y mask) => (VSCALEFPDMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
 (ScaleMaskedFloat64x4 x y mask) => (VSCALEFPDMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
 (ScaleMaskedFloat64x8 x y mask) => (VSCALEFPDMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(Set128Float32x8 ...) => (VINSERTF128256 ...)
-(Set128Float64x4 ...) => (VINSERTF128256 ...)
-(Set128Int8x32 ...) => (VINSERTI128256 ...)
-(Set128Int16x16 ...) => (VINSERTI128256 ...)
-(Set128Int32x8 ...) => (VINSERTI128256 ...)
-(Set128Int64x4 ...) => (VINSERTI128256 ...)
-(Set128Uint8x32 ...) => (VINSERTI128256 ...)
-(Set128Uint16x16 ...) => (VINSERTI128256 ...)
-(Set128Uint32x8 ...) => (VINSERTI128256 ...)
-(Set128Uint64x4 ...) => (VINSERTI128256 ...)
 (SetElemInt8x16 ...) => (VPINSRB128 ...)
 (SetElemInt16x8 ...) => (VPINSRW128 ...)
 (SetElemInt32x4 ...) => (VPINSRD128 ...)
 (SetElemUint16x8 ...) => (VPINSRW128 ...)
 (SetElemUint32x4 ...) => (VPINSRD128 ...)
 (SetElemUint64x2 ...) => (VPINSRQ128 ...)
+(SetHiFloat32x8 x y) => (VINSERTF128256 [1] x y)
+(SetHiFloat32x16 x y) => (VINSERTF64X4512 [1] x y)
+(SetHiFloat64x4 x y) => (VINSERTF128256 [1] x y)
+(SetHiFloat64x8 x y) => (VINSERTF64X4512 [1] x y)
+(SetHiInt8x32 x y) => (VINSERTI128256 [1] x y)
+(SetHiInt8x64 x y) => (VINSERTI64X4512 [1] x y)
+(SetHiInt16x16 x y) => (VINSERTI128256 [1] x y)
+(SetHiInt16x32 x y) => (VINSERTI64X4512 [1] x y)
+(SetHiInt32x8 x y) => (VINSERTI128256 [1] x y)
+(SetHiInt32x16 x y) => (VINSERTI64X4512 [1] x y)
+(SetHiInt64x4 x y) => (VINSERTI128256 [1] x y)
+(SetHiInt64x8 x y) => (VINSERTI64X4512 [1] x y)
+(SetHiUint8x32 x y) => (VINSERTI128256 [1] x y)
+(SetHiUint8x64 x y) => (VINSERTI64X4512 [1] x y)
+(SetHiUint16x16 x y) => (VINSERTI128256 [1] x y)
+(SetHiUint16x32 x y) => (VINSERTI64X4512 [1] x y)
+(SetHiUint32x8 x y) => (VINSERTI128256 [1] x y)
+(SetHiUint32x16 x y) => (VINSERTI64X4512 [1] x y)
+(SetHiUint64x4 x y) => (VINSERTI128256 [1] x y)
+(SetHiUint64x8 x y) => (VINSERTI64X4512 [1] x y)
+(SetLoFloat32x8 x y) => (VINSERTF128256 [0] x y)
+(SetLoFloat32x16 x y) => (VINSERTF64X4512 [0] x y)
+(SetLoFloat64x4 x y) => (VINSERTF128256 [0] x y)
+(SetLoFloat64x8 x y) => (VINSERTF64X4512 [0] x y)
+(SetLoInt8x32 x y) => (VINSERTI128256 [0] x y)
+(SetLoInt8x64 x y) => (VINSERTI64X4512 [0] x y)
+(SetLoInt16x16 x y) => (VINSERTI128256 [0] x y)
+(SetLoInt16x32 x y) => (VINSERTI64X4512 [0] x y)
+(SetLoInt32x8 x y) => (VINSERTI128256 [0] x y)
+(SetLoInt32x16 x y) => (VINSERTI64X4512 [0] x y)
+(SetLoInt64x4 x y) => (VINSERTI128256 [0] x y)
+(SetLoInt64x8 x y) => (VINSERTI64X4512 [0] x y)
+(SetLoUint8x32 x y) => (VINSERTI128256 [0] x y)
+(SetLoUint8x64 x y) => (VINSERTI64X4512 [0] x y)
+(SetLoUint16x16 x y) => (VINSERTI128256 [0] x y)
+(SetLoUint16x32 x y) => (VINSERTI64X4512 [0] x y)
+(SetLoUint32x8 x y) => (VINSERTI128256 [0] x y)
+(SetLoUint32x16 x y) => (VINSERTI64X4512 [0] x y)
+(SetLoUint64x4 x y) => (VINSERTI128256 [0] x y)
+(SetLoUint64x8 x y) => (VINSERTI64X4512 [0] x y)
 (ShiftAllLeftInt16x8 x (MOVQconst [c])) => (VPSLLW128const [int8(c)] x)
 (ShiftAllLeftInt16x8 x y) => (VPSLLW128 x y)
 (ShiftAllLeftInt16x16 x (MOVQconst [c])) => (VPSLLW256const [int8(c)] x)
index 7860a0889eb65d74e95ceedb59e7281e074c9878..591f8a5bcafb589176ddfef75e56428fc2ff38db 100644 (file)
@@ -912,12 +912,14 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
                {name: "VGF2P8AFFINEQBMasked128", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VGF2P8AFFINEQBMasked256", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VGF2P8AFFINEQBMasked512", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
-               {name: "VEXTRACTF128128", argLength: 1, reg: v11, asm: "VEXTRACTF128", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
-               {name: "VEXTRACTI128128", argLength: 1, reg: v11, asm: "VEXTRACTI128", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPEXTRB128", argLength: 1, reg: wgp, asm: "VPEXTRB", aux: "Int8", commutative: false, typ: "int8", resultInArg0: false},
                {name: "VPEXTRW128", argLength: 1, reg: wgp, asm: "VPEXTRW", aux: "Int8", commutative: false, typ: "int16", resultInArg0: false},
                {name: "VPEXTRD128", argLength: 1, reg: vgp, asm: "VPEXTRD", aux: "Int8", commutative: false, typ: "int32", resultInArg0: false},
                {name: "VPEXTRQ128", argLength: 1, reg: vgp, asm: "VPEXTRQ", aux: "Int8", commutative: false, typ: "int64", resultInArg0: false},
+               {name: "VEXTRACTF128128", argLength: 1, reg: v11, asm: "VEXTRACTF128", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VEXTRACTF64X4256", argLength: 1, reg: w11, asm: "VEXTRACTF64X4", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VEXTRACTI128128", argLength: 1, reg: v11, asm: "VEXTRACTI128", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VEXTRACTI64X4256", argLength: 1, reg: w11, asm: "VEXTRACTI64X4", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VPCMPUB128", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VPCMPUB256", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VPCMPUB512", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
@@ -966,12 +968,14 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
                {name: "VPRORQMasked128", argLength: 2, reg: wkw, asm: "VPRORQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPRORQMasked256", argLength: 2, reg: wkw, asm: "VPRORQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VPRORQMasked512", argLength: 2, reg: wkw, asm: "VPRORQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
-               {name: "VINSERTF128256", argLength: 2, reg: v21, asm: "VINSERTF128", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
-               {name: "VINSERTI128256", argLength: 2, reg: v21, asm: "VINSERTI128", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VPINSRB128", argLength: 2, reg: vgpv, asm: "VPINSRB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPINSRW128", argLength: 2, reg: vgpv, asm: "VPINSRW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPINSRD128", argLength: 2, reg: vgpv, asm: "VPINSRD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPINSRQ128", argLength: 2, reg: vgpv, asm: "VPINSRQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VINSERTF128256", argLength: 2, reg: v21, asm: "VINSERTF128", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VINSERTF64X4512", argLength: 2, reg: w21, asm: "VINSERTF64X4", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+               {name: "VINSERTI128256", argLength: 2, reg: v21, asm: "VINSERTI128", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
+               {name: "VINSERTI64X4512", argLength: 2, reg: w21, asm: "VINSERTI64X4", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
                {name: "VPSHLDW128", argLength: 2, reg: w21, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPSHLDW256", argLength: 2, reg: w21, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VPSHLDW512", argLength: 2, reg: w21, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
index bf85df5e6dadad73cf9a3f487cc962ed542ef85d..e132b058a4a0b69d9a5516f3164c5a0f973bf894 100644 (file)
@@ -410,6 +410,46 @@ func simdGenericOps() []opData {
                {name: "GaloisFieldMulUint8x16", argLength: 2, commutative: false},
                {name: "GaloisFieldMulUint8x32", argLength: 2, commutative: false},
                {name: "GaloisFieldMulUint8x64", argLength: 2, commutative: false},
+               {name: "GetHiFloat32x8", argLength: 1, commutative: false},
+               {name: "GetHiFloat32x16", argLength: 1, commutative: false},
+               {name: "GetHiFloat64x4", argLength: 1, commutative: false},
+               {name: "GetHiFloat64x8", argLength: 1, commutative: false},
+               {name: "GetHiInt8x32", argLength: 1, commutative: false},
+               {name: "GetHiInt8x64", argLength: 1, commutative: false},
+               {name: "GetHiInt16x16", argLength: 1, commutative: false},
+               {name: "GetHiInt16x32", argLength: 1, commutative: false},
+               {name: "GetHiInt32x8", argLength: 1, commutative: false},
+               {name: "GetHiInt32x16", argLength: 1, commutative: false},
+               {name: "GetHiInt64x4", argLength: 1, commutative: false},
+               {name: "GetHiInt64x8", argLength: 1, commutative: false},
+               {name: "GetHiUint8x32", argLength: 1, commutative: false},
+               {name: "GetHiUint8x64", argLength: 1, commutative: false},
+               {name: "GetHiUint16x16", argLength: 1, commutative: false},
+               {name: "GetHiUint16x32", argLength: 1, commutative: false},
+               {name: "GetHiUint32x8", argLength: 1, commutative: false},
+               {name: "GetHiUint32x16", argLength: 1, commutative: false},
+               {name: "GetHiUint64x4", argLength: 1, commutative: false},
+               {name: "GetHiUint64x8", argLength: 1, commutative: false},
+               {name: "GetLoFloat32x8", argLength: 1, commutative: false},
+               {name: "GetLoFloat32x16", argLength: 1, commutative: false},
+               {name: "GetLoFloat64x4", argLength: 1, commutative: false},
+               {name: "GetLoFloat64x8", argLength: 1, commutative: false},
+               {name: "GetLoInt8x32", argLength: 1, commutative: false},
+               {name: "GetLoInt8x64", argLength: 1, commutative: false},
+               {name: "GetLoInt16x16", argLength: 1, commutative: false},
+               {name: "GetLoInt16x32", argLength: 1, commutative: false},
+               {name: "GetLoInt32x8", argLength: 1, commutative: false},
+               {name: "GetLoInt32x16", argLength: 1, commutative: false},
+               {name: "GetLoInt64x4", argLength: 1, commutative: false},
+               {name: "GetLoInt64x8", argLength: 1, commutative: false},
+               {name: "GetLoUint8x32", argLength: 1, commutative: false},
+               {name: "GetLoUint8x64", argLength: 1, commutative: false},
+               {name: "GetLoUint16x16", argLength: 1, commutative: false},
+               {name: "GetLoUint16x32", argLength: 1, commutative: false},
+               {name: "GetLoUint32x8", argLength: 1, commutative: false},
+               {name: "GetLoUint32x16", argLength: 1, commutative: false},
+               {name: "GetLoUint64x4", argLength: 1, commutative: false},
+               {name: "GetLoUint64x8", argLength: 1, commutative: false},
                {name: "GreaterEqualFloat32x4", argLength: 2, commutative: false},
                {name: "GreaterEqualFloat32x8", argLength: 2, commutative: false},
                {name: "GreaterEqualFloat32x16", argLength: 2, commutative: false},
@@ -1180,6 +1220,46 @@ func simdGenericOps() []opData {
                {name: "ScaleMaskedFloat64x2", argLength: 3, commutative: false},
                {name: "ScaleMaskedFloat64x4", argLength: 3, commutative: false},
                {name: "ScaleMaskedFloat64x8", argLength: 3, commutative: false},
+               {name: "SetHiFloat32x8", argLength: 2, commutative: false},
+               {name: "SetHiFloat32x16", argLength: 2, commutative: false},
+               {name: "SetHiFloat64x4", argLength: 2, commutative: false},
+               {name: "SetHiFloat64x8", argLength: 2, commutative: false},
+               {name: "SetHiInt8x32", argLength: 2, commutative: false},
+               {name: "SetHiInt8x64", argLength: 2, commutative: false},
+               {name: "SetHiInt16x16", argLength: 2, commutative: false},
+               {name: "SetHiInt16x32", argLength: 2, commutative: false},
+               {name: "SetHiInt32x8", argLength: 2, commutative: false},
+               {name: "SetHiInt32x16", argLength: 2, commutative: false},
+               {name: "SetHiInt64x4", argLength: 2, commutative: false},
+               {name: "SetHiInt64x8", argLength: 2, commutative: false},
+               {name: "SetHiUint8x32", argLength: 2, commutative: false},
+               {name: "SetHiUint8x64", argLength: 2, commutative: false},
+               {name: "SetHiUint16x16", argLength: 2, commutative: false},
+               {name: "SetHiUint16x32", argLength: 2, commutative: false},
+               {name: "SetHiUint32x8", argLength: 2, commutative: false},
+               {name: "SetHiUint32x16", argLength: 2, commutative: false},
+               {name: "SetHiUint64x4", argLength: 2, commutative: false},
+               {name: "SetHiUint64x8", argLength: 2, commutative: false},
+               {name: "SetLoFloat32x8", argLength: 2, commutative: false},
+               {name: "SetLoFloat32x16", argLength: 2, commutative: false},
+               {name: "SetLoFloat64x4", argLength: 2, commutative: false},
+               {name: "SetLoFloat64x8", argLength: 2, commutative: false},
+               {name: "SetLoInt8x32", argLength: 2, commutative: false},
+               {name: "SetLoInt8x64", argLength: 2, commutative: false},
+               {name: "SetLoInt16x16", argLength: 2, commutative: false},
+               {name: "SetLoInt16x32", argLength: 2, commutative: false},
+               {name: "SetLoInt32x8", argLength: 2, commutative: false},
+               {name: "SetLoInt32x16", argLength: 2, commutative: false},
+               {name: "SetLoInt64x4", argLength: 2, commutative: false},
+               {name: "SetLoInt64x8", argLength: 2, commutative: false},
+               {name: "SetLoUint8x32", argLength: 2, commutative: false},
+               {name: "SetLoUint8x64", argLength: 2, commutative: false},
+               {name: "SetLoUint16x16", argLength: 2, commutative: false},
+               {name: "SetLoUint16x32", argLength: 2, commutative: false},
+               {name: "SetLoUint32x8", argLength: 2, commutative: false},
+               {name: "SetLoUint32x16", argLength: 2, commutative: false},
+               {name: "SetLoUint64x4", argLength: 2, commutative: false},
+               {name: "SetLoUint64x8", argLength: 2, commutative: false},
                {name: "ShiftAllLeftInt16x8", argLength: 2, commutative: false},
                {name: "ShiftAllLeftInt16x16", argLength: 2, commutative: false},
                {name: "ShiftAllLeftInt16x32", argLength: 2, commutative: false},
@@ -1624,16 +1704,6 @@ func simdGenericOps() []opData {
                {name: "GaloisFieldAffineTransformUint8x16", argLength: 2, commutative: false, aux: "Int8"},
                {name: "GaloisFieldAffineTransformUint8x32", argLength: 2, commutative: false, aux: "Int8"},
                {name: "GaloisFieldAffineTransformUint8x64", argLength: 2, commutative: false, aux: "Int8"},
-               {name: "Get128Float32x8", argLength: 1, commutative: false, aux: "Int8"},
-               {name: "Get128Float64x4", argLength: 1, commutative: false, aux: "Int8"},
-               {name: "Get128Int8x32", argLength: 1, commutative: false, aux: "Int8"},
-               {name: "Get128Int16x16", argLength: 1, commutative: false, aux: "Int8"},
-               {name: "Get128Int32x8", argLength: 1, commutative: false, aux: "Int8"},
-               {name: "Get128Int64x4", argLength: 1, commutative: false, aux: "Int8"},
-               {name: "Get128Uint8x32", argLength: 1, commutative: false, aux: "Int8"},
-               {name: "Get128Uint16x16", argLength: 1, commutative: false, aux: "Int8"},
-               {name: "Get128Uint32x8", argLength: 1, commutative: false, aux: "Int8"},
-               {name: "Get128Uint64x4", argLength: 1, commutative: false, aux: "Int8"},
                {name: "GetElemInt8x16", argLength: 1, commutative: false, aux: "Int8"},
                {name: "GetElemInt16x8", argLength: 1, commutative: false, aux: "Int8"},
                {name: "GetElemInt32x4", argLength: 1, commutative: false, aux: "Int8"},
@@ -1714,16 +1784,6 @@ func simdGenericOps() []opData {
                {name: "RoundScaledResidueMaskedFloat64x2", argLength: 2, commutative: false, aux: "Int8"},
                {name: "RoundScaledResidueMaskedFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
                {name: "RoundScaledResidueMaskedFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
-               {name: "Set128Float32x8", argLength: 2, commutative: false, aux: "Int8"},
-               {name: "Set128Float64x4", argLength: 2, commutative: false, aux: "Int8"},
-               {name: "Set128Int8x32", argLength: 2, commutative: false, aux: "Int8"},
-               {name: "Set128Int16x16", argLength: 2, commutative: false, aux: "Int8"},
-               {name: "Set128Int32x8", argLength: 2, commutative: false, aux: "Int8"},
-               {name: "Set128Int64x4", argLength: 2, commutative: false, aux: "Int8"},
-               {name: "Set128Uint8x32", argLength: 2, commutative: false, aux: "Int8"},
-               {name: "Set128Uint16x16", argLength: 2, commutative: false, aux: "Int8"},
-               {name: "Set128Uint32x8", argLength: 2, commutative: false, aux: "Int8"},
-               {name: "Set128Uint64x4", argLength: 2, commutative: false, aux: "Int8"},
                {name: "SetElemInt8x16", argLength: 2, commutative: false, aux: "Int8"},
                {name: "SetElemInt16x8", argLength: 2, commutative: false, aux: "Int8"},
                {name: "SetElemInt32x4", argLength: 2, commutative: false, aux: "Int8"},
index 9ce9220901cb7d1633c225330014927eb5346ef4..b39311cd90f105eecdca27dbafb94c293d008798 100644 (file)
@@ -2131,12 +2131,14 @@ const (
        OpAMD64VGF2P8AFFINEQBMasked128
        OpAMD64VGF2P8AFFINEQBMasked256
        OpAMD64VGF2P8AFFINEQBMasked512
-       OpAMD64VEXTRACTF128128
-       OpAMD64VEXTRACTI128128
        OpAMD64VPEXTRB128
        OpAMD64VPEXTRW128
        OpAMD64VPEXTRD128
        OpAMD64VPEXTRQ128
+       OpAMD64VEXTRACTF128128
+       OpAMD64VEXTRACTF64X4256
+       OpAMD64VEXTRACTI128128
+       OpAMD64VEXTRACTI64X4256
        OpAMD64VPCMPUB128
        OpAMD64VPCMPUB256
        OpAMD64VPCMPUB512
@@ -2185,12 +2187,14 @@ const (
        OpAMD64VPRORQMasked128
        OpAMD64VPRORQMasked256
        OpAMD64VPRORQMasked512
-       OpAMD64VINSERTF128256
-       OpAMD64VINSERTI128256
        OpAMD64VPINSRB128
        OpAMD64VPINSRW128
        OpAMD64VPINSRD128
        OpAMD64VPINSRQ128
+       OpAMD64VINSERTF128256
+       OpAMD64VINSERTF64X4512
+       OpAMD64VINSERTI128256
+       OpAMD64VINSERTI64X4512
        OpAMD64VPSHLDW128
        OpAMD64VPSHLDW256
        OpAMD64VPSHLDW512
@@ -4967,6 +4971,46 @@ const (
        OpGaloisFieldMulUint8x16
        OpGaloisFieldMulUint8x32
        OpGaloisFieldMulUint8x64
+       OpGetHiFloat32x8
+       OpGetHiFloat32x16
+       OpGetHiFloat64x4
+       OpGetHiFloat64x8
+       OpGetHiInt8x32
+       OpGetHiInt8x64
+       OpGetHiInt16x16
+       OpGetHiInt16x32
+       OpGetHiInt32x8
+       OpGetHiInt32x16
+       OpGetHiInt64x4
+       OpGetHiInt64x8
+       OpGetHiUint8x32
+       OpGetHiUint8x64
+       OpGetHiUint16x16
+       OpGetHiUint16x32
+       OpGetHiUint32x8
+       OpGetHiUint32x16
+       OpGetHiUint64x4
+       OpGetHiUint64x8
+       OpGetLoFloat32x8
+       OpGetLoFloat32x16
+       OpGetLoFloat64x4
+       OpGetLoFloat64x8
+       OpGetLoInt8x32
+       OpGetLoInt8x64
+       OpGetLoInt16x16
+       OpGetLoInt16x32
+       OpGetLoInt32x8
+       OpGetLoInt32x16
+       OpGetLoInt64x4
+       OpGetLoInt64x8
+       OpGetLoUint8x32
+       OpGetLoUint8x64
+       OpGetLoUint16x16
+       OpGetLoUint16x32
+       OpGetLoUint32x8
+       OpGetLoUint32x16
+       OpGetLoUint64x4
+       OpGetLoUint64x8
        OpGreaterEqualFloat32x4
        OpGreaterEqualFloat32x8
        OpGreaterEqualFloat32x16
@@ -5737,6 +5781,46 @@ const (
        OpScaleMaskedFloat64x2
        OpScaleMaskedFloat64x4
        OpScaleMaskedFloat64x8
+       OpSetHiFloat32x8
+       OpSetHiFloat32x16
+       OpSetHiFloat64x4
+       OpSetHiFloat64x8
+       OpSetHiInt8x32
+       OpSetHiInt8x64
+       OpSetHiInt16x16
+       OpSetHiInt16x32
+       OpSetHiInt32x8
+       OpSetHiInt32x16
+       OpSetHiInt64x4
+       OpSetHiInt64x8
+       OpSetHiUint8x32
+       OpSetHiUint8x64
+       OpSetHiUint16x16
+       OpSetHiUint16x32
+       OpSetHiUint32x8
+       OpSetHiUint32x16
+       OpSetHiUint64x4
+       OpSetHiUint64x8
+       OpSetLoFloat32x8
+       OpSetLoFloat32x16
+       OpSetLoFloat64x4
+       OpSetLoFloat64x8
+       OpSetLoInt8x32
+       OpSetLoInt8x64
+       OpSetLoInt16x16
+       OpSetLoInt16x32
+       OpSetLoInt32x8
+       OpSetLoInt32x16
+       OpSetLoInt64x4
+       OpSetLoInt64x8
+       OpSetLoUint8x32
+       OpSetLoUint8x64
+       OpSetLoUint16x16
+       OpSetLoUint16x32
+       OpSetLoUint32x8
+       OpSetLoUint32x16
+       OpSetLoUint64x4
+       OpSetLoUint64x8
        OpShiftAllLeftInt16x8
        OpShiftAllLeftInt16x16
        OpShiftAllLeftInt16x32
@@ -6181,16 +6265,6 @@ const (
        OpGaloisFieldAffineTransformUint8x16
        OpGaloisFieldAffineTransformUint8x32
        OpGaloisFieldAffineTransformUint8x64
-       OpGet128Float32x8
-       OpGet128Float64x4
-       OpGet128Int8x32
-       OpGet128Int16x16
-       OpGet128Int32x8
-       OpGet128Int64x4
-       OpGet128Uint8x32
-       OpGet128Uint16x16
-       OpGet128Uint32x8
-       OpGet128Uint64x4
        OpGetElemInt8x16
        OpGetElemInt16x8
        OpGetElemInt32x4
@@ -6271,16 +6345,6 @@ const (
        OpRoundScaledResidueMaskedFloat64x2
        OpRoundScaledResidueMaskedFloat64x4
        OpRoundScaledResidueMaskedFloat64x8
-       OpSet128Float32x8
-       OpSet128Float64x4
-       OpSet128Int8x32
-       OpSet128Int16x16
-       OpSet128Int32x8
-       OpSet128Int64x4
-       OpSet128Uint8x32
-       OpSet128Uint16x16
-       OpSet128Uint32x8
-       OpSet128Uint64x4
        OpSetElemInt8x16
        OpSetElemInt16x8
        OpSetElemInt32x4
@@ -33034,41 +33098,41 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:    "VEXTRACTF128128",
+               name:    "VPEXTRB128",
                auxType: auxInt8,
                argLen:  1,
-               asm:     x86.AVEXTRACTF128,
+               asm:     x86.AVPEXTRB,
                reg: regInfo{
                        inputs: []inputInfo{
-                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
                        },
                        outputs: []outputInfo{
-                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
                        },
                },
        },
        {
-               name:    "VEXTRACTI128128",
+               name:    "VPEXTRW128",
                auxType: auxInt8,
                argLen:  1,
-               asm:     x86.AVEXTRACTI128,
+               asm:     x86.AVPEXTRW,
                reg: regInfo{
                        inputs: []inputInfo{
-                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
                        },
                        outputs: []outputInfo{
-                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
                        },
                },
        },
        {
-               name:    "VPEXTRB128",
+               name:    "VPEXTRD128",
                auxType: auxInt8,
                argLen:  1,
-               asm:     x86.AVPEXTRB,
+               asm:     x86.AVPEXTRD,
                reg: regInfo{
                        inputs: []inputInfo{
-                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
                        },
                        outputs: []outputInfo{
                                {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
@@ -33076,13 +33140,13 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:    "VPEXTRW128",
+               name:    "VPEXTRQ128",
                auxType: auxInt8,
                argLen:  1,
-               asm:     x86.AVPEXTRW,
+               asm:     x86.AVPEXTRQ,
                reg: regInfo{
                        inputs: []inputInfo{
-                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
                        },
                        outputs: []outputInfo{
                                {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
@@ -33090,30 +33154,58 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:    "VPEXTRD128",
+               name:    "VEXTRACTF128128",
                auxType: auxInt8,
                argLen:  1,
-               asm:     x86.AVPEXTRD,
+               asm:     x86.AVEXTRACTF128,
                reg: regInfo{
                        inputs: []inputInfo{
                                {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
                        },
                        outputs: []outputInfo{
-                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
                        },
                },
        },
        {
-               name:    "VPEXTRQ128",
+               name:    "VEXTRACTF64X4256",
                auxType: auxInt8,
                argLen:  1,
-               asm:     x86.AVPEXTRQ,
+               asm:     x86.AVEXTRACTF64X4,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:    "VEXTRACTI128128",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVEXTRACTI128,
                reg: regInfo{
                        inputs: []inputInfo{
                                {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
                        },
                        outputs: []outputInfo{
-                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
+       {
+               name:    "VEXTRACTI64X4256",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVEXTRACTI64X4,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
                        },
                },
        },
@@ -33826,14 +33918,14 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:    "VINSERTF128256",
+               name:    "VPINSRB128",
                auxType: auxInt8,
                argLen:  2,
-               asm:     x86.AVINSERTF128,
+               asm:     x86.AVPINSRB,
                reg: regInfo{
                        inputs: []inputInfo{
+                               {1, 49135},      // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
                                {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-                               {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
                        },
                        outputs: []outputInfo{
                                {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
@@ -33841,14 +33933,14 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:    "VINSERTI128256",
+               name:    "VPINSRW128",
                auxType: auxInt8,
                argLen:  2,
-               asm:     x86.AVINSERTI128,
+               asm:     x86.AVPINSRW,
                reg: regInfo{
                        inputs: []inputInfo{
+                               {1, 49135},      // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
                                {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-                               {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
                        },
                        outputs: []outputInfo{
                                {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
@@ -33856,10 +33948,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:    "VPINSRB128",
+               name:    "VPINSRD128",
                auxType: auxInt8,
                argLen:  2,
-               asm:     x86.AVPINSRB,
+               asm:     x86.AVPINSRD,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 49135},      // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
@@ -33871,10 +33963,10 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:    "VPINSRW128",
+               name:    "VPINSRQ128",
                auxType: auxInt8,
                argLen:  2,
-               asm:     x86.AVPINSRW,
+               asm:     x86.AVPINSRQ,
                reg: regInfo{
                        inputs: []inputInfo{
                                {1, 49135},      // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
@@ -33886,14 +33978,14 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:    "VPINSRD128",
+               name:    "VINSERTF128256",
                auxType: auxInt8,
                argLen:  2,
-               asm:     x86.AVPINSRD,
+               asm:     x86.AVINSERTF128,
                reg: regInfo{
                        inputs: []inputInfo{
-                               {1, 49135},      // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
                                {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                               {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
                        },
                        outputs: []outputInfo{
                                {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
@@ -33901,20 +33993,50 @@ var opcodeTable = [...]opInfo{
                },
        },
        {
-               name:    "VPINSRQ128",
+               name:    "VINSERTF64X4512",
                auxType: auxInt8,
                argLen:  2,
-               asm:     x86.AVPINSRQ,
+               asm:     x86.AVINSERTF64X4,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
+       {
+               name:    "VINSERTI128256",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVINSERTI128,
                reg: regInfo{
                        inputs: []inputInfo{
-                               {1, 49135},      // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
                                {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                               {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
                        },
                        outputs: []outputInfo{
                                {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
                        },
                },
        },
+       {
+               name:    "VINSERTI64X4512",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVINSERTI64X4,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                               {1, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+                       outputs: []outputInfo{
+                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+                       },
+               },
+       },
        {
                name:    "VPSHLDW128",
                auxType: auxInt8,
@@ -64937,6 +65059,206 @@ var opcodeTable = [...]opInfo{
                argLen:  2,
                generic: true,
        },
+       {
+               name:    "GetHiFloat32x8",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetHiFloat32x16",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetHiFloat64x4",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetHiFloat64x8",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetHiInt8x32",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetHiInt8x64",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetHiInt16x16",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetHiInt16x32",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetHiInt32x8",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetHiInt32x16",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetHiInt64x4",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetHiInt64x8",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetHiUint8x32",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetHiUint8x64",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetHiUint16x16",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetHiUint16x32",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetHiUint32x8",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetHiUint32x16",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetHiUint64x4",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetHiUint64x8",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetLoFloat32x8",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetLoFloat32x16",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetLoFloat64x4",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetLoFloat64x8",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetLoInt8x32",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetLoInt8x64",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetLoInt16x16",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetLoInt16x32",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetLoInt32x8",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetLoInt32x16",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetLoInt64x4",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetLoInt64x8",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetLoUint8x32",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetLoUint8x64",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetLoUint16x16",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetLoUint16x32",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetLoUint32x8",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetLoUint32x16",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetLoUint64x4",
+               argLen:  1,
+               generic: true,
+       },
+       {
+               name:    "GetLoUint64x8",
+               argLen:  1,
+               generic: true,
+       },
        {
                name:    "GreaterEqualFloat32x4",
                argLen:  2,
@@ -69073,6 +69395,206 @@ var opcodeTable = [...]opInfo{
                argLen:  3,
                generic: true,
        },
+       {
+               name:    "SetHiFloat32x8",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetHiFloat32x16",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetHiFloat64x4",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetHiFloat64x8",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetHiInt8x32",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetHiInt8x64",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetHiInt16x16",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetHiInt16x32",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetHiInt32x8",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetHiInt32x16",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetHiInt64x4",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetHiInt64x8",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetHiUint8x32",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetHiUint8x64",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetHiUint16x16",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetHiUint16x32",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetHiUint32x8",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetHiUint32x16",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetHiUint64x4",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetHiUint64x8",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetLoFloat32x8",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetLoFloat32x16",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetLoFloat64x4",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetLoFloat64x8",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetLoInt8x32",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetLoInt8x64",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetLoInt16x16",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetLoInt16x32",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetLoInt32x8",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetLoInt32x16",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetLoInt64x4",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetLoInt64x8",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetLoUint8x32",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetLoUint8x64",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetLoUint16x16",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetLoUint16x32",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetLoUint32x8",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetLoUint32x16",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetLoUint64x4",
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetLoUint64x8",
+               argLen:  2,
+               generic: true,
+       },
        {
                name:    "ShiftAllLeftInt16x8",
                argLen:  2,
@@ -71389,66 +71911,6 @@ var opcodeTable = [...]opInfo{
                argLen:  2,
                generic: true,
        },
-       {
-               name:    "Get128Float32x8",
-               auxType: auxInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Get128Float64x4",
-               auxType: auxInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Get128Int8x32",
-               auxType: auxInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Get128Int16x16",
-               auxType: auxInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Get128Int32x8",
-               auxType: auxInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Get128Int64x4",
-               auxType: auxInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Get128Uint8x32",
-               auxType: auxInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Get128Uint16x16",
-               auxType: auxInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Get128Uint32x8",
-               auxType: auxInt8,
-               argLen:  1,
-               generic: true,
-       },
-       {
-               name:    "Get128Uint64x4",
-               auxType: auxInt8,
-               argLen:  1,
-               generic: true,
-       },
        {
                name:    "GetElemInt8x16",
                auxType: auxInt8,
@@ -71929,66 +72391,6 @@ var opcodeTable = [...]opInfo{
                argLen:  2,
                generic: true,
        },
-       {
-               name:    "Set128Float32x8",
-               auxType: auxInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "Set128Float64x4",
-               auxType: auxInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "Set128Int8x32",
-               auxType: auxInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "Set128Int16x16",
-               auxType: auxInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "Set128Int32x8",
-               auxType: auxInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "Set128Int64x4",
-               auxType: auxInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "Set128Uint8x32",
-               auxType: auxInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "Set128Uint16x16",
-               auxType: auxInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "Set128Uint32x8",
-               auxType: auxInt8,
-               argLen:  2,
-               generic: true,
-       },
-       {
-               name:    "Set128Uint64x4",
-               auxType: auxInt8,
-               argLen:  2,
-               generic: true,
-       },
        {
                name:    "SetElemInt8x16",
                auxType: auxInt8,
index e181798245882fe914798434f48b92926f40daf0..91fd3fb470f080e9b087c00624ae4f490b8e467a 100644 (file)
@@ -1949,36 +1949,6 @@ func rewriteValueAMD64(v *Value) bool {
        case OpGaloisFieldMulUint8x64:
                v.Op = OpAMD64VGF2P8MULB512
                return true
-       case OpGet128Float32x8:
-               v.Op = OpAMD64VEXTRACTF128128
-               return true
-       case OpGet128Float64x4:
-               v.Op = OpAMD64VEXTRACTF128128
-               return true
-       case OpGet128Int16x16:
-               v.Op = OpAMD64VEXTRACTI128128
-               return true
-       case OpGet128Int32x8:
-               v.Op = OpAMD64VEXTRACTI128128
-               return true
-       case OpGet128Int64x4:
-               v.Op = OpAMD64VEXTRACTI128128
-               return true
-       case OpGet128Int8x32:
-               v.Op = OpAMD64VEXTRACTI128128
-               return true
-       case OpGet128Uint16x16:
-               v.Op = OpAMD64VEXTRACTI128128
-               return true
-       case OpGet128Uint32x8:
-               v.Op = OpAMD64VEXTRACTI128128
-               return true
-       case OpGet128Uint64x4:
-               v.Op = OpAMD64VEXTRACTI128128
-               return true
-       case OpGet128Uint8x32:
-               v.Op = OpAMD64VEXTRACTI128128
-               return true
        case OpGetCallerPC:
                v.Op = OpAMD64LoweredGetCallerPC
                return true
@@ -2014,6 +1984,86 @@ func rewriteValueAMD64(v *Value) bool {
                return true
        case OpGetG:
                return rewriteValueAMD64_OpGetG(v)
+       case OpGetHiFloat32x16:
+               return rewriteValueAMD64_OpGetHiFloat32x16(v)
+       case OpGetHiFloat32x8:
+               return rewriteValueAMD64_OpGetHiFloat32x8(v)
+       case OpGetHiFloat64x4:
+               return rewriteValueAMD64_OpGetHiFloat64x4(v)
+       case OpGetHiFloat64x8:
+               return rewriteValueAMD64_OpGetHiFloat64x8(v)
+       case OpGetHiInt16x16:
+               return rewriteValueAMD64_OpGetHiInt16x16(v)
+       case OpGetHiInt16x32:
+               return rewriteValueAMD64_OpGetHiInt16x32(v)
+       case OpGetHiInt32x16:
+               return rewriteValueAMD64_OpGetHiInt32x16(v)
+       case OpGetHiInt32x8:
+               return rewriteValueAMD64_OpGetHiInt32x8(v)
+       case OpGetHiInt64x4:
+               return rewriteValueAMD64_OpGetHiInt64x4(v)
+       case OpGetHiInt64x8:
+               return rewriteValueAMD64_OpGetHiInt64x8(v)
+       case OpGetHiInt8x32:
+               return rewriteValueAMD64_OpGetHiInt8x32(v)
+       case OpGetHiInt8x64:
+               return rewriteValueAMD64_OpGetHiInt8x64(v)
+       case OpGetHiUint16x16:
+               return rewriteValueAMD64_OpGetHiUint16x16(v)
+       case OpGetHiUint16x32:
+               return rewriteValueAMD64_OpGetHiUint16x32(v)
+       case OpGetHiUint32x16:
+               return rewriteValueAMD64_OpGetHiUint32x16(v)
+       case OpGetHiUint32x8:
+               return rewriteValueAMD64_OpGetHiUint32x8(v)
+       case OpGetHiUint64x4:
+               return rewriteValueAMD64_OpGetHiUint64x4(v)
+       case OpGetHiUint64x8:
+               return rewriteValueAMD64_OpGetHiUint64x8(v)
+       case OpGetHiUint8x32:
+               return rewriteValueAMD64_OpGetHiUint8x32(v)
+       case OpGetHiUint8x64:
+               return rewriteValueAMD64_OpGetHiUint8x64(v)
+       case OpGetLoFloat32x16:
+               return rewriteValueAMD64_OpGetLoFloat32x16(v)
+       case OpGetLoFloat32x8:
+               return rewriteValueAMD64_OpGetLoFloat32x8(v)
+       case OpGetLoFloat64x4:
+               return rewriteValueAMD64_OpGetLoFloat64x4(v)
+       case OpGetLoFloat64x8:
+               return rewriteValueAMD64_OpGetLoFloat64x8(v)
+       case OpGetLoInt16x16:
+               return rewriteValueAMD64_OpGetLoInt16x16(v)
+       case OpGetLoInt16x32:
+               return rewriteValueAMD64_OpGetLoInt16x32(v)
+       case OpGetLoInt32x16:
+               return rewriteValueAMD64_OpGetLoInt32x16(v)
+       case OpGetLoInt32x8:
+               return rewriteValueAMD64_OpGetLoInt32x8(v)
+       case OpGetLoInt64x4:
+               return rewriteValueAMD64_OpGetLoInt64x4(v)
+       case OpGetLoInt64x8:
+               return rewriteValueAMD64_OpGetLoInt64x8(v)
+       case OpGetLoInt8x32:
+               return rewriteValueAMD64_OpGetLoInt8x32(v)
+       case OpGetLoInt8x64:
+               return rewriteValueAMD64_OpGetLoInt8x64(v)
+       case OpGetLoUint16x16:
+               return rewriteValueAMD64_OpGetLoUint16x16(v)
+       case OpGetLoUint16x32:
+               return rewriteValueAMD64_OpGetLoUint16x32(v)
+       case OpGetLoUint32x16:
+               return rewriteValueAMD64_OpGetLoUint32x16(v)
+       case OpGetLoUint32x8:
+               return rewriteValueAMD64_OpGetLoUint32x8(v)
+       case OpGetLoUint64x4:
+               return rewriteValueAMD64_OpGetLoUint64x4(v)
+       case OpGetLoUint64x8:
+               return rewriteValueAMD64_OpGetLoUint64x8(v)
+       case OpGetLoUint8x32:
+               return rewriteValueAMD64_OpGetLoUint8x32(v)
+       case OpGetLoUint8x64:
+               return rewriteValueAMD64_OpGetLoUint8x64(v)
        case OpGreaterEqualFloat32x16:
                return rewriteValueAMD64_OpGreaterEqualFloat32x16(v)
        case OpGreaterEqualFloat32x4:
@@ -4306,36 +4356,6 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpSelect1(v)
        case OpSelectN:
                return rewriteValueAMD64_OpSelectN(v)
-       case OpSet128Float32x8:
-               v.Op = OpAMD64VINSERTF128256
-               return true
-       case OpSet128Float64x4:
-               v.Op = OpAMD64VINSERTF128256
-               return true
-       case OpSet128Int16x16:
-               v.Op = OpAMD64VINSERTI128256
-               return true
-       case OpSet128Int32x8:
-               v.Op = OpAMD64VINSERTI128256
-               return true
-       case OpSet128Int64x4:
-               v.Op = OpAMD64VINSERTI128256
-               return true
-       case OpSet128Int8x32:
-               v.Op = OpAMD64VINSERTI128256
-               return true
-       case OpSet128Uint16x16:
-               v.Op = OpAMD64VINSERTI128256
-               return true
-       case OpSet128Uint32x8:
-               v.Op = OpAMD64VINSERTI128256
-               return true
-       case OpSet128Uint64x4:
-               v.Op = OpAMD64VINSERTI128256
-               return true
-       case OpSet128Uint8x32:
-               v.Op = OpAMD64VINSERTI128256
-               return true
        case OpSetElemInt16x8:
                v.Op = OpAMD64VPINSRW128
                return true
@@ -4360,6 +4380,86 @@ func rewriteValueAMD64(v *Value) bool {
        case OpSetElemUint8x16:
                v.Op = OpAMD64VPINSRB128
                return true
+       case OpSetHiFloat32x16:
+               return rewriteValueAMD64_OpSetHiFloat32x16(v)
+       case OpSetHiFloat32x8:
+               return rewriteValueAMD64_OpSetHiFloat32x8(v)
+       case OpSetHiFloat64x4:
+               return rewriteValueAMD64_OpSetHiFloat64x4(v)
+       case OpSetHiFloat64x8:
+               return rewriteValueAMD64_OpSetHiFloat64x8(v)
+       case OpSetHiInt16x16:
+               return rewriteValueAMD64_OpSetHiInt16x16(v)
+       case OpSetHiInt16x32:
+               return rewriteValueAMD64_OpSetHiInt16x32(v)
+       case OpSetHiInt32x16:
+               return rewriteValueAMD64_OpSetHiInt32x16(v)
+       case OpSetHiInt32x8:
+               return rewriteValueAMD64_OpSetHiInt32x8(v)
+       case OpSetHiInt64x4:
+               return rewriteValueAMD64_OpSetHiInt64x4(v)
+       case OpSetHiInt64x8:
+               return rewriteValueAMD64_OpSetHiInt64x8(v)
+       case OpSetHiInt8x32:
+               return rewriteValueAMD64_OpSetHiInt8x32(v)
+       case OpSetHiInt8x64:
+               return rewriteValueAMD64_OpSetHiInt8x64(v)
+       case OpSetHiUint16x16:
+               return rewriteValueAMD64_OpSetHiUint16x16(v)
+       case OpSetHiUint16x32:
+               return rewriteValueAMD64_OpSetHiUint16x32(v)
+       case OpSetHiUint32x16:
+               return rewriteValueAMD64_OpSetHiUint32x16(v)
+       case OpSetHiUint32x8:
+               return rewriteValueAMD64_OpSetHiUint32x8(v)
+       case OpSetHiUint64x4:
+               return rewriteValueAMD64_OpSetHiUint64x4(v)
+       case OpSetHiUint64x8:
+               return rewriteValueAMD64_OpSetHiUint64x8(v)
+       case OpSetHiUint8x32:
+               return rewriteValueAMD64_OpSetHiUint8x32(v)
+       case OpSetHiUint8x64:
+               return rewriteValueAMD64_OpSetHiUint8x64(v)
+       case OpSetLoFloat32x16:
+               return rewriteValueAMD64_OpSetLoFloat32x16(v)
+       case OpSetLoFloat32x8:
+               return rewriteValueAMD64_OpSetLoFloat32x8(v)
+       case OpSetLoFloat64x4:
+               return rewriteValueAMD64_OpSetLoFloat64x4(v)
+       case OpSetLoFloat64x8:
+               return rewriteValueAMD64_OpSetLoFloat64x8(v)
+       case OpSetLoInt16x16:
+               return rewriteValueAMD64_OpSetLoInt16x16(v)
+       case OpSetLoInt16x32:
+               return rewriteValueAMD64_OpSetLoInt16x32(v)
+       case OpSetLoInt32x16:
+               return rewriteValueAMD64_OpSetLoInt32x16(v)
+       case OpSetLoInt32x8:
+               return rewriteValueAMD64_OpSetLoInt32x8(v)
+       case OpSetLoInt64x4:
+               return rewriteValueAMD64_OpSetLoInt64x4(v)
+       case OpSetLoInt64x8:
+               return rewriteValueAMD64_OpSetLoInt64x8(v)
+       case OpSetLoInt8x32:
+               return rewriteValueAMD64_OpSetLoInt8x32(v)
+       case OpSetLoInt8x64:
+               return rewriteValueAMD64_OpSetLoInt8x64(v)
+       case OpSetLoUint16x16:
+               return rewriteValueAMD64_OpSetLoUint16x16(v)
+       case OpSetLoUint16x32:
+               return rewriteValueAMD64_OpSetLoUint16x32(v)
+       case OpSetLoUint32x16:
+               return rewriteValueAMD64_OpSetLoUint32x16(v)
+       case OpSetLoUint32x8:
+               return rewriteValueAMD64_OpSetLoUint32x8(v)
+       case OpSetLoUint64x4:
+               return rewriteValueAMD64_OpSetLoUint64x4(v)
+       case OpSetLoUint64x8:
+               return rewriteValueAMD64_OpSetLoUint64x8(v)
+       case OpSetLoUint8x32:
+               return rewriteValueAMD64_OpSetLoUint8x32(v)
+       case OpSetLoUint8x64:
+               return rewriteValueAMD64_OpSetLoUint8x64(v)
        case OpShiftAllLeftConcatInt16x16:
                v.Op = OpAMD64VPSHLDW256
                return true
@@ -35376,6 +35476,486 @@ func rewriteValueAMD64_OpGetG(v *Value) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpGetHiFloat32x16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetHiFloat32x16 x)
+       // result: (VEXTRACTF64X4256 [1] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTF64X4256)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetHiFloat32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetHiFloat32x8 x)
+       // result: (VEXTRACTF128128 [1] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTF128128)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetHiFloat64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetHiFloat64x4 x)
+       // result: (VEXTRACTF128128 [1] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTF128128)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetHiFloat64x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetHiFloat64x8 x)
+       // result: (VEXTRACTF64X4256 [1] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTF64X4256)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetHiInt16x16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetHiInt16x16 x)
+       // result: (VEXTRACTI128128 [1] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetHiInt16x32(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetHiInt16x32 x)
+       // result: (VEXTRACTI64X4256 [1] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetHiInt32x16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetHiInt32x16 x)
+       // result: (VEXTRACTI64X4256 [1] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetHiInt32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetHiInt32x8 x)
+       // result: (VEXTRACTI128128 [1] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetHiInt64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetHiInt64x4 x)
+       // result: (VEXTRACTI128128 [1] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetHiInt64x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetHiInt64x8 x)
+       // result: (VEXTRACTI64X4256 [1] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetHiInt8x32(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetHiInt8x32 x)
+       // result: (VEXTRACTI128128 [1] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetHiInt8x64(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetHiInt8x64 x)
+       // result: (VEXTRACTI64X4256 [1] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetHiUint16x16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetHiUint16x16 x)
+       // result: (VEXTRACTI128128 [1] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetHiUint16x32(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetHiUint16x32 x)
+       // result: (VEXTRACTI64X4256 [1] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetHiUint32x16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetHiUint32x16 x)
+       // result: (VEXTRACTI64X4256 [1] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetHiUint32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetHiUint32x8 x)
+       // result: (VEXTRACTI128128 [1] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetHiUint64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetHiUint64x4 x)
+       // result: (VEXTRACTI128128 [1] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetHiUint64x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetHiUint64x8 x)
+       // result: (VEXTRACTI64X4256 [1] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetHiUint8x32(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetHiUint8x32 x)
+       // result: (VEXTRACTI128128 [1] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetHiUint8x64(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetHiUint8x64 x)
+       // result: (VEXTRACTI64X4256 [1] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetLoFloat32x16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetLoFloat32x16 x)
+       // result: (VEXTRACTF64X4256 [0] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTF64X4256)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetLoFloat32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetLoFloat32x8 x)
+       // result: (VEXTRACTF128128 [0] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTF128128)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetLoFloat64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetLoFloat64x4 x)
+       // result: (VEXTRACTF128128 [0] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTF128128)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetLoFloat64x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetLoFloat64x8 x)
+       // result: (VEXTRACTF64X4256 [0] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTF64X4256)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetLoInt16x16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetLoInt16x16 x)
+       // result: (VEXTRACTI128128 [0] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetLoInt16x32(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetLoInt16x32 x)
+       // result: (VEXTRACTI64X4256 [0] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetLoInt32x16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetLoInt32x16 x)
+       // result: (VEXTRACTI64X4256 [0] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetLoInt32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetLoInt32x8 x)
+       // result: (VEXTRACTI128128 [0] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetLoInt64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetLoInt64x4 x)
+       // result: (VEXTRACTI128128 [0] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetLoInt64x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetLoInt64x8 x)
+       // result: (VEXTRACTI64X4256 [0] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetLoInt8x32(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetLoInt8x32 x)
+       // result: (VEXTRACTI128128 [0] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetLoInt8x64(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetLoInt8x64 x)
+       // result: (VEXTRACTI64X4256 [0] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetLoUint16x16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetLoUint16x16 x)
+       // result: (VEXTRACTI128128 [0] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetLoUint16x32(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetLoUint16x32 x)
+       // result: (VEXTRACTI64X4256 [0] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetLoUint32x16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetLoUint32x16 x)
+       // result: (VEXTRACTI64X4256 [0] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetLoUint32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetLoUint32x8 x)
+       // result: (VEXTRACTI128128 [0] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetLoUint64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetLoUint64x4 x)
+       // result: (VEXTRACTI128128 [0] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetLoUint64x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetLoUint64x8 x)
+       // result: (VEXTRACTI64X4256 [0] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetLoUint8x32(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetLoUint8x32 x)
+       // result: (VEXTRACTI128128 [0] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetLoUint8x64(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetLoUint8x64 x)
+       // result: (VEXTRACTI64X4256 [0] x)
+       for {
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI64X4256)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg(x)
+               return true
+       }
+}
 func rewriteValueAMD64_OpGreaterEqualFloat32x16(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
@@ -50409,6 +50989,566 @@ func rewriteValueAMD64_OpSelectN(v *Value) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpSetHiFloat32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetHiFloat32x16 x y)
+       // result: (VINSERTF64X4512 [1] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTF64X4512)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetHiFloat32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetHiFloat32x8 x y)
+       // result: (VINSERTF128256 [1] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTF128256)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetHiFloat64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetHiFloat64x4 x y)
+       // result: (VINSERTF128256 [1] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTF128256)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetHiFloat64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetHiFloat64x8 x y)
+       // result: (VINSERTF64X4512 [1] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTF64X4512)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetHiInt16x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetHiInt16x16 x y)
+       // result: (VINSERTI128256 [1] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetHiInt16x32(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetHiInt16x32 x y)
+       // result: (VINSERTI64X4512 [1] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetHiInt32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetHiInt32x16 x y)
+       // result: (VINSERTI64X4512 [1] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetHiInt32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetHiInt32x8 x y)
+       // result: (VINSERTI128256 [1] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetHiInt64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetHiInt64x4 x y)
+       // result: (VINSERTI128256 [1] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetHiInt64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetHiInt64x8 x y)
+       // result: (VINSERTI64X4512 [1] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetHiInt8x32(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetHiInt8x32 x y)
+       // result: (VINSERTI128256 [1] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetHiInt8x64(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetHiInt8x64 x y)
+       // result: (VINSERTI64X4512 [1] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetHiUint16x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetHiUint16x16 x y)
+       // result: (VINSERTI128256 [1] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetHiUint16x32(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetHiUint16x32 x y)
+       // result: (VINSERTI64X4512 [1] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetHiUint32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetHiUint32x16 x y)
+       // result: (VINSERTI64X4512 [1] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetHiUint32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetHiUint32x8 x y)
+       // result: (VINSERTI128256 [1] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetHiUint64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetHiUint64x4 x y)
+       // result: (VINSERTI128256 [1] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetHiUint64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetHiUint64x8 x y)
+       // result: (VINSERTI64X4512 [1] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetHiUint8x32(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetHiUint8x32 x y)
+       // result: (VINSERTI128256 [1] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetHiUint8x64(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetHiUint8x64 x y)
+       // result: (VINSERTI64X4512 [1] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = int8ToAuxInt(1)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetLoFloat32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetLoFloat32x16 x y)
+       // result: (VINSERTF64X4512 [0] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTF64X4512)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetLoFloat32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetLoFloat32x8 x y)
+       // result: (VINSERTF128256 [0] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTF128256)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetLoFloat64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetLoFloat64x4 x y)
+       // result: (VINSERTF128256 [0] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTF128256)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetLoFloat64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetLoFloat64x8 x y)
+       // result: (VINSERTF64X4512 [0] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTF64X4512)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetLoInt16x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetLoInt16x16 x y)
+       // result: (VINSERTI128256 [0] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetLoInt16x32(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetLoInt16x32 x y)
+       // result: (VINSERTI64X4512 [0] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetLoInt32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetLoInt32x16 x y)
+       // result: (VINSERTI64X4512 [0] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetLoInt32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetLoInt32x8 x y)
+       // result: (VINSERTI128256 [0] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetLoInt64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetLoInt64x4 x y)
+       // result: (VINSERTI128256 [0] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetLoInt64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetLoInt64x8 x y)
+       // result: (VINSERTI64X4512 [0] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetLoInt8x32(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetLoInt8x32 x y)
+       // result: (VINSERTI128256 [0] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetLoInt8x64(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetLoInt8x64 x y)
+       // result: (VINSERTI64X4512 [0] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetLoUint16x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetLoUint16x16 x y)
+       // result: (VINSERTI128256 [0] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetLoUint16x32(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetLoUint16x32 x y)
+       // result: (VINSERTI64X4512 [0] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetLoUint32x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetLoUint32x16 x y)
+       // result: (VINSERTI64X4512 [0] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetLoUint32x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetLoUint32x8 x y)
+       // result: (VINSERTI128256 [0] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetLoUint64x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetLoUint64x4 x y)
+       // result: (VINSERTI128256 [0] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetLoUint64x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetLoUint64x8 x y)
+       // result: (VINSERTI64X4512 [0] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetLoUint8x32(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetLoUint8x32 x y)
+       // result: (VINSERTI128256 [0] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI128256)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetLoUint8x64(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetLoUint8x64 x y)
+       // result: (VINSERTI64X4512 [0] x y)
+       for {
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VINSERTI64X4512)
+               v.AuxInt = int8ToAuxInt(0)
+               v.AddArg2(x, y)
+               return true
+       }
+}
 func rewriteValueAMD64_OpShiftAllLeftConcatMaskedInt16x16(v *Value) bool {
        v_2 := v.Args[2]
        v_1 := v.Args[1]
index fb68846347d27364e4a5aee80d4ebc9e19d71565..873bb8e2de17c90b309481dc7f1b137dd61e5345 100644 (file)
@@ -478,16 +478,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint8x16.GaloisFieldMulMasked", opLen3(ssa.OpGaloisFieldMulMaskedUint8x16, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint8x32.GaloisFieldMulMasked", opLen3(ssa.OpGaloisFieldMulMaskedUint8x32, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint8x64.GaloisFieldMulMasked", opLen3(ssa.OpGaloisFieldMulMaskedUint8x64, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x8.Get128", opLen1Imm8(ssa.OpGet128Float32x8, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Float64x4.Get128", opLen1Imm8(ssa.OpGet128Float64x4, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Int8x32.Get128", opLen1Imm8(ssa.OpGet128Int8x32, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Int16x16.Get128", opLen1Imm8(ssa.OpGet128Int16x16, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Int32x8.Get128", opLen1Imm8(ssa.OpGet128Int32x8, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Int64x4.Get128", opLen1Imm8(ssa.OpGet128Int64x4, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Uint8x32.Get128", opLen1Imm8(ssa.OpGet128Uint8x32, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Uint16x16.Get128", opLen1Imm8(ssa.OpGet128Uint16x16, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Uint32x8.Get128", opLen1Imm8(ssa.OpGet128Uint32x8, types.TypeVec128, 0), sys.AMD64)
-       addF(simdPackage, "Uint64x4.Get128", opLen1Imm8(ssa.OpGet128Uint64x4, types.TypeVec128, 0), sys.AMD64)
        addF(simdPackage, "Int8x16.GetElem", opLen1Imm8(ssa.OpGetElemInt8x16, types.Types[types.TINT8], 0), sys.AMD64)
        addF(simdPackage, "Int16x8.GetElem", opLen1Imm8(ssa.OpGetElemInt16x8, types.Types[types.TINT16], 0), sys.AMD64)
        addF(simdPackage, "Int32x4.GetElem", opLen1Imm8(ssa.OpGetElemInt32x4, types.Types[types.TINT32], 0), sys.AMD64)
@@ -496,6 +486,46 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint16x8.GetElem", opLen1Imm8(ssa.OpGetElemUint16x8, types.Types[types.TUINT16], 0), sys.AMD64)
        addF(simdPackage, "Uint32x4.GetElem", opLen1Imm8(ssa.OpGetElemUint32x4, types.Types[types.TUINT32], 0), sys.AMD64)
        addF(simdPackage, "Uint64x2.GetElem", opLen1Imm8(ssa.OpGetElemUint64x2, types.Types[types.TUINT64], 0), sys.AMD64)
+       addF(simdPackage, "Float32x8.GetHi", opLen1(ssa.OpGetHiFloat32x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x16.GetHi", opLen1(ssa.OpGetHiFloat32x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x4.GetHi", opLen1(ssa.OpGetHiFloat64x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x8.GetHi", opLen1(ssa.OpGetHiFloat64x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x32.GetHi", opLen1(ssa.OpGetHiInt8x32, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x64.GetHi", opLen1(ssa.OpGetHiInt8x64, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x16.GetHi", opLen1(ssa.OpGetHiInt16x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x32.GetHi", opLen1(ssa.OpGetHiInt16x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x8.GetHi", opLen1(ssa.OpGetHiInt32x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x16.GetHi", opLen1(ssa.OpGetHiInt32x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x4.GetHi", opLen1(ssa.OpGetHiInt64x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x8.GetHi", opLen1(ssa.OpGetHiInt64x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x32.GetHi", opLen1(ssa.OpGetHiUint8x32, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x64.GetHi", opLen1(ssa.OpGetHiUint8x64, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x16.GetHi", opLen1(ssa.OpGetHiUint16x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x32.GetHi", opLen1(ssa.OpGetHiUint16x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x8.GetHi", opLen1(ssa.OpGetHiUint32x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x16.GetHi", opLen1(ssa.OpGetHiUint32x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x4.GetHi", opLen1(ssa.OpGetHiUint64x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x8.GetHi", opLen1(ssa.OpGetHiUint64x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x8.GetLo", opLen1(ssa.OpGetLoFloat32x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float32x16.GetLo", opLen1(ssa.OpGetLoFloat32x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x4.GetLo", opLen1(ssa.OpGetLoFloat64x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Float64x8.GetLo", opLen1(ssa.OpGetLoFloat64x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x32.GetLo", opLen1(ssa.OpGetLoInt8x32, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x64.GetLo", opLen1(ssa.OpGetLoInt8x64, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x16.GetLo", opLen1(ssa.OpGetLoInt16x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x32.GetLo", opLen1(ssa.OpGetLoInt16x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x8.GetLo", opLen1(ssa.OpGetLoInt32x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int32x16.GetLo", opLen1(ssa.OpGetLoInt32x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x4.GetLo", opLen1(ssa.OpGetLoInt64x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int64x8.GetLo", opLen1(ssa.OpGetLoInt64x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x32.GetLo", opLen1(ssa.OpGetLoUint8x32, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x64.GetLo", opLen1(ssa.OpGetLoUint8x64, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x16.GetLo", opLen1(ssa.OpGetLoUint16x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint16x32.GetLo", opLen1(ssa.OpGetLoUint16x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x8.GetLo", opLen1(ssa.OpGetLoUint32x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint32x16.GetLo", opLen1(ssa.OpGetLoUint32x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x4.GetLo", opLen1(ssa.OpGetLoUint64x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint64x8.GetLo", opLen1(ssa.OpGetLoUint64x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int8x16.Greater", opLen2(ssa.OpGreaterInt8x16, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int8x32.Greater", opLen2(ssa.OpGreaterInt8x32, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int8x64.Greater", opLen2(ssa.OpGreaterInt8x64, types.TypeVec512), sys.AMD64)
@@ -1338,16 +1368,6 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Float64x2.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float64x4.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x8.ScaleMasked", opLen3(ssa.OpScaleMaskedFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Float32x8.Set128", opLen2Imm8(ssa.OpSet128Float32x8, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Float64x4.Set128", opLen2Imm8(ssa.OpSet128Float64x4, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Int8x32.Set128", opLen2Imm8(ssa.OpSet128Int8x32, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Int16x16.Set128", opLen2Imm8(ssa.OpSet128Int16x16, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Int32x8.Set128", opLen2Imm8(ssa.OpSet128Int32x8, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Int64x4.Set128", opLen2Imm8(ssa.OpSet128Int64x4, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Uint8x32.Set128", opLen2Imm8(ssa.OpSet128Uint8x32, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Uint16x16.Set128", opLen2Imm8(ssa.OpSet128Uint16x16, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Uint32x8.Set128", opLen2Imm8(ssa.OpSet128Uint32x8, types.TypeVec256, 0), sys.AMD64)
-       addF(simdPackage, "Uint64x4.Set128", opLen2Imm8(ssa.OpSet128Uint64x4, types.TypeVec256, 0), sys.AMD64)
        addF(simdPackage, "Int8x16.SetElem", opLen2Imm8(ssa.OpSetElemInt8x16, types.TypeVec128, 0), sys.AMD64)
        addF(simdPackage, "Int16x8.SetElem", opLen2Imm8(ssa.OpSetElemInt16x8, types.TypeVec128, 0), sys.AMD64)
        addF(simdPackage, "Int32x4.SetElem", opLen2Imm8(ssa.OpSetElemInt32x4, types.TypeVec128, 0), sys.AMD64)
@@ -1356,6 +1376,46 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint16x8.SetElem", opLen2Imm8(ssa.OpSetElemUint16x8, types.TypeVec128, 0), sys.AMD64)
        addF(simdPackage, "Uint32x4.SetElem", opLen2Imm8(ssa.OpSetElemUint32x4, types.TypeVec128, 0), sys.AMD64)
        addF(simdPackage, "Uint64x2.SetElem", opLen2Imm8(ssa.OpSetElemUint64x2, types.TypeVec128, 0), sys.AMD64)
+       addF(simdPackage, "Float32x8.SetHi", opLen2(ssa.OpSetHiFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.SetHi", opLen2(ssa.OpSetHiFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x4.SetHi", opLen2(ssa.OpSetHiFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.SetHi", opLen2(ssa.OpSetHiFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x32.SetHi", opLen2(ssa.OpSetHiInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x64.SetHi", opLen2(ssa.OpSetHiInt8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x16.SetHi", opLen2(ssa.OpSetHiInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.SetHi", opLen2(ssa.OpSetHiInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x8.SetHi", opLen2(ssa.OpSetHiInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.SetHi", opLen2(ssa.OpSetHiInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x4.SetHi", opLen2(ssa.OpSetHiInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.SetHi", opLen2(ssa.OpSetHiInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x32.SetHi", opLen2(ssa.OpSetHiUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.SetHi", opLen2(ssa.OpSetHiUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x16.SetHi", opLen2(ssa.OpSetHiUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.SetHi", opLen2(ssa.OpSetHiUint16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint32x8.SetHi", opLen2(ssa.OpSetHiUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.SetHi", opLen2(ssa.OpSetHiUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint64x4.SetHi", opLen2(ssa.OpSetHiUint64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x8.SetHi", opLen2(ssa.OpSetHiUint64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x8.SetLo", opLen2(ssa.OpSetLoFloat32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float32x16.SetLo", opLen2(ssa.OpSetLoFloat32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float64x4.SetLo", opLen2(ssa.OpSetLoFloat64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Float64x8.SetLo", opLen2(ssa.OpSetLoFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x32.SetLo", opLen2(ssa.OpSetLoInt8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x64.SetLo", opLen2(ssa.OpSetLoInt8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x16.SetLo", opLen2(ssa.OpSetLoInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.SetLo", opLen2(ssa.OpSetLoInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int32x8.SetLo", opLen2(ssa.OpSetLoInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int32x16.SetLo", opLen2(ssa.OpSetLoInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int64x4.SetLo", opLen2(ssa.OpSetLoInt64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int64x8.SetLo", opLen2(ssa.OpSetLoInt64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x32.SetLo", opLen2(ssa.OpSetLoUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.SetLo", opLen2(ssa.OpSetLoUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint16x16.SetLo", opLen2(ssa.OpSetLoUint16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint16x32.SetLo", opLen2(ssa.OpSetLoUint16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint32x8.SetLo", opLen2(ssa.OpSetLoUint32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint32x16.SetLo", opLen2(ssa.OpSetLoUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint64x4.SetLo", opLen2(ssa.OpSetLoUint64x4, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint64x8.SetLo", opLen2(ssa.OpSetLoUint64x8, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Int16x8.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt16x8, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int16x16.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt16x16, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int16x32.ShiftAllLeft", opLen2(ssa.OpShiftAllLeftInt16x32, types.TypeVec512), sys.AMD64)
index 61a708b56e0cfbf0e4e5b6bce951f0f43121e004..5eb8fea47691a25292109ca490fb7f6550534b50 100644 (file)
@@ -3041,135 +3041,267 @@ func (x Uint8x32) GaloisFieldMulMasked(y Uint8x32, mask Mask8x32) Uint8x32
 // Asm: VGF2P8MULB, CPU Feature: AVX512GFNI
 func (x Uint8x64) GaloisFieldMulMasked(y Uint8x64, mask Mask8x64) Uint8x64
 
-/* Get128 */
+/* GetElem */
 
-// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+// GetElem retrieves a single constant-indexed element's value.
 //
 // index is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VEXTRACTF128, CPU Feature: AVX
-func (x Float32x8) Get128(index uint8) Float32x4
+// Asm: VPEXTRB, CPU Feature: AVX512BW
+func (x Int8x16) GetElem(index uint8) int8
 
-// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+// GetElem retrieves a single constant-indexed element's value.
 //
 // index is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VEXTRACTF128, CPU Feature: AVX
-func (x Float64x4) Get128(index uint8) Float64x2
+// Asm: VPEXTRW, CPU Feature: AVX512BW
+func (x Int16x8) GetElem(index uint8) int16
 
-// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+// GetElem retrieves a single constant-indexed element's value.
 //
 // index is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VEXTRACTI128, CPU Feature: AVX2
-func (x Int8x32) Get128(index uint8) Int8x16
+// Asm: VPEXTRD, CPU Feature: AVX
+func (x Int32x4) GetElem(index uint8) int32
 
-// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+// GetElem retrieves a single constant-indexed element's value.
 //
 // index is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VEXTRACTI128, CPU Feature: AVX2
-func (x Int16x16) Get128(index uint8) Int16x8
+// Asm: VPEXTRQ, CPU Feature: AVX
+func (x Int64x2) GetElem(index uint8) int64
 
-// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+// GetElem retrieves a single constant-indexed element's value.
 //
 // index is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VEXTRACTI128, CPU Feature: AVX2
-func (x Int32x8) Get128(index uint8) Int32x4
+// Asm: VPEXTRB, CPU Feature: AVX512BW
+func (x Uint8x16) GetElem(index uint8) uint8
 
-// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+// GetElem retrieves a single constant-indexed element's value.
 //
 // index is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VEXTRACTI128, CPU Feature: AVX2
-func (x Int64x4) Get128(index uint8) Int64x2
+// Asm: VPEXTRW, CPU Feature: AVX512BW
+func (x Uint16x8) GetElem(index uint8) uint16
 
-// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+// GetElem retrieves a single constant-indexed element's value.
 //
 // index is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VEXTRACTI128, CPU Feature: AVX2
-func (x Uint8x32) Get128(index uint8) Uint8x16
+// Asm: VPEXTRD, CPU Feature: AVX
+func (x Uint32x4) GetElem(index uint8) uint32
 
-// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+// GetElem retrieves a single constant-indexed element's value.
 //
 // index is expected to be a constant, non-constant value will trigger a runtime panic.
 //
+// Asm: VPEXTRQ, CPU Feature: AVX
+func (x Uint64x2) GetElem(index uint8) uint64
+
+/* GetHi */
+
+// GetHi returns the upper half of x.
+//
+// Asm: VEXTRACTF128, CPU Feature: AVX
+func (x Float32x8) GetHi() Float32x4
+
+// GetHi returns the upper half of x.
+//
+// Asm: VEXTRACTF64X4, CPU Feature: AVX512F
+func (x Float32x16) GetHi() Float32x8
+
+// GetHi returns the upper half of x.
+//
+// Asm: VEXTRACTF128, CPU Feature: AVX
+func (x Float64x4) GetHi() Float64x2
+
+// GetHi returns the upper half of x.
+//
+// Asm: VEXTRACTF64X4, CPU Feature: AVX512F
+func (x Float64x8) GetHi() Float64x4
+
+// GetHi returns the upper half of x.
+//
 // Asm: VEXTRACTI128, CPU Feature: AVX2
-func (x Uint16x16) Get128(index uint8) Uint16x8
+func (x Int8x32) GetHi() Int8x16
 
-// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+// GetHi returns the upper half of x.
 //
-// index is expected to be a constant, non-constant value will trigger a runtime panic.
+// Asm: VEXTRACTI64X4, CPU Feature: AVX512F
+func (x Int8x64) GetHi() Int8x32
+
+// GetHi returns the upper half of x.
 //
 // Asm: VEXTRACTI128, CPU Feature: AVX2
-func (x Uint32x8) Get128(index uint8) Uint32x4
+func (x Int16x16) GetHi() Int16x8
 
-// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+// GetHi returns the upper half of x.
 //
-// index is expected to be a constant, non-constant value will trigger a runtime panic.
+// Asm: VEXTRACTI64X4, CPU Feature: AVX512F
+func (x Int16x32) GetHi() Int16x16
+
+// GetHi returns the upper half of x.
 //
 // Asm: VEXTRACTI128, CPU Feature: AVX2
-func (x Uint64x4) Get128(index uint8) Uint64x2
+func (x Int32x8) GetHi() Int32x4
 
-/* GetElem */
+// GetHi returns the upper half of x.
+//
+// Asm: VEXTRACTI64X4, CPU Feature: AVX512F
+func (x Int32x16) GetHi() Int32x8
 
-// GetElem retrieves a single constant-indexed element's value.
+// GetHi returns the upper half of x.
 //
-// index is expected to be a constant, non-constant value will trigger a runtime panic.
+// Asm: VEXTRACTI128, CPU Feature: AVX2
+func (x Int64x4) GetHi() Int64x2
+
+// GetHi returns the upper half of x.
 //
-// Asm: VPEXTRB, CPU Feature: AVX512BW
-func (x Int8x16) GetElem(index uint8) int8
+// Asm: VEXTRACTI64X4, CPU Feature: AVX512F
+func (x Int64x8) GetHi() Int64x4
 
-// GetElem retrieves a single constant-indexed element's value.
+// GetHi returns the upper half of x.
 //
-// index is expected to be a constant, non-constant value will trigger a runtime panic.
+// Asm: VEXTRACTI128, CPU Feature: AVX2
+func (x Uint8x32) GetHi() Uint8x16
+
+// GetHi returns the upper half of x.
 //
-// Asm: VPEXTRW, CPU Feature: AVX512BW
-func (x Int16x8) GetElem(index uint8) int16
+// Asm: VEXTRACTI64X4, CPU Feature: AVX512F
+func (x Uint8x64) GetHi() Uint8x32
 
-// GetElem retrieves a single constant-indexed element's value.
+// GetHi returns the upper half of x.
 //
-// index is expected to be a constant, non-constant value will trigger a runtime panic.
+// Asm: VEXTRACTI128, CPU Feature: AVX2
+func (x Uint16x16) GetHi() Uint16x8
+
+// GetHi returns the upper half of x.
 //
-// Asm: VPEXTRD, CPU Feature: AVX
-func (x Int32x4) GetElem(index uint8) int32
+// Asm: VEXTRACTI64X4, CPU Feature: AVX512F
+func (x Uint16x32) GetHi() Uint16x16
 
-// GetElem retrieves a single constant-indexed element's value.
+// GetHi returns the upper half of x.
 //
-// index is expected to be a constant, non-constant value will trigger a runtime panic.
+// Asm: VEXTRACTI128, CPU Feature: AVX2
+func (x Uint32x8) GetHi() Uint32x4
+
+// GetHi returns the upper half of x.
 //
-// Asm: VPEXTRQ, CPU Feature: AVX
-func (x Int64x2) GetElem(index uint8) int64
+// Asm: VEXTRACTI64X4, CPU Feature: AVX512F
+func (x Uint32x16) GetHi() Uint32x8
 
-// GetElem retrieves a single constant-indexed element's value.
+// GetHi returns the upper half of x.
 //
-// index is expected to be a constant, non-constant value will trigger a runtime panic.
+// Asm: VEXTRACTI128, CPU Feature: AVX2
+func (x Uint64x4) GetHi() Uint64x2
+
+// GetHi returns the upper half of x.
 //
-// Asm: VPEXTRB, CPU Feature: AVX512BW
-func (x Uint8x16) GetElem(index uint8) uint8
+// Asm: VEXTRACTI64X4, CPU Feature: AVX512F
+func (x Uint64x8) GetHi() Uint64x4
 
-// GetElem retrieves a single constant-indexed element's value.
+/* GetLo */
+
+// GetLo returns the lower half of x.
 //
-// index is expected to be a constant, non-constant value will trigger a runtime panic.
+// Asm: VEXTRACTF128, CPU Feature: AVX
+func (x Float32x8) GetLo() Float32x4
+
+// GetLo returns the lower half of x.
 //
-// Asm: VPEXTRW, CPU Feature: AVX512BW
-func (x Uint16x8) GetElem(index uint8) uint16
+// Asm: VEXTRACTF64X4, CPU Feature: AVX512F
+func (x Float32x16) GetLo() Float32x8
 
-// GetElem retrieves a single constant-indexed element's value.
+// GetLo returns the lower half of x.
 //
-// index is expected to be a constant, non-constant value will trigger a runtime panic.
+// Asm: VEXTRACTF128, CPU Feature: AVX
+func (x Float64x4) GetLo() Float64x2
+
+// GetLo returns the lower half of x.
 //
-// Asm: VPEXTRD, CPU Feature: AVX
-func (x Uint32x4) GetElem(index uint8) uint32
+// Asm: VEXTRACTF64X4, CPU Feature: AVX512F
+func (x Float64x8) GetLo() Float64x4
 
-// GetElem retrieves a single constant-indexed element's value.
+// GetLo returns the lower half of x.
 //
-// index is expected to be a constant, non-constant value will trigger a runtime panic.
+// Asm: VEXTRACTI128, CPU Feature: AVX2
+func (x Int8x32) GetLo() Int8x16
+
+// GetLo returns the lower half of x.
 //
-// Asm: VPEXTRQ, CPU Feature: AVX
-func (x Uint64x2) GetElem(index uint8) uint64
+// Asm: VEXTRACTI64X4, CPU Feature: AVX512F
+func (x Int8x64) GetLo() Int8x32
+
+// GetLo returns the lower half of x.
+//
+// Asm: VEXTRACTI128, CPU Feature: AVX2
+func (x Int16x16) GetLo() Int16x8
+
+// GetLo returns the lower half of x.
+//
+// Asm: VEXTRACTI64X4, CPU Feature: AVX512F
+func (x Int16x32) GetLo() Int16x16
+
+// GetLo returns the lower half of x.
+//
+// Asm: VEXTRACTI128, CPU Feature: AVX2
+func (x Int32x8) GetLo() Int32x4
+
+// GetLo returns the lower half of x.
+//
+// Asm: VEXTRACTI64X4, CPU Feature: AVX512F
+func (x Int32x16) GetLo() Int32x8
+
+// GetLo returns the lower half of x.
+//
+// Asm: VEXTRACTI128, CPU Feature: AVX2
+func (x Int64x4) GetLo() Int64x2
+
+// GetLo returns the lower half of x.
+//
+// Asm: VEXTRACTI64X4, CPU Feature: AVX512F
+func (x Int64x8) GetLo() Int64x4
+
+// GetLo returns the lower half of x.
+//
+// Asm: VEXTRACTI128, CPU Feature: AVX2
+func (x Uint8x32) GetLo() Uint8x16
+
+// GetLo returns the lower half of x.
+//
+// Asm: VEXTRACTI64X4, CPU Feature: AVX512F
+func (x Uint8x64) GetLo() Uint8x32
+
+// GetLo returns the lower half of x.
+//
+// Asm: VEXTRACTI128, CPU Feature: AVX2
+func (x Uint16x16) GetLo() Uint16x8
+
+// GetLo returns the lower half of x.
+//
+// Asm: VEXTRACTI64X4, CPU Feature: AVX512F
+func (x Uint16x32) GetLo() Uint16x16
+
+// GetLo returns the lower half of x.
+//
+// Asm: VEXTRACTI128, CPU Feature: AVX2
+func (x Uint32x8) GetLo() Uint32x4
+
+// GetLo returns the lower half of x.
+//
+// Asm: VEXTRACTI64X4, CPU Feature: AVX512F
+func (x Uint32x16) GetLo() Uint32x8
+
+// GetLo returns the lower half of x.
+//
+// Asm: VEXTRACTI128, CPU Feature: AVX2
+func (x Uint64x4) GetLo() Uint64x2
+
+// GetLo returns the lower half of x.
+//
+// Asm: VEXTRACTI64X4, CPU Feature: AVX512F
+func (x Uint64x8) GetLo() Uint64x4
 
 /* Greater */
 
@@ -8757,135 +8889,267 @@ func (x Float64x4) ScaleMasked(y Float64x4, mask Mask64x4) Float64x4
 // Asm: VSCALEFPD, CPU Feature: AVX512F
 func (x Float64x8) ScaleMasked(y Float64x8, mask Mask64x8) Float64x8
 
-/* Set128 */
+/* SetElem */
 
-// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
+// SetElem sets a single constant-indexed element's value.
 //
 // index is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VINSERTF128, CPU Feature: AVX
-func (x Float32x8) Set128(index uint8, y Float32x4) Float32x8
+// Asm: VPINSRB, CPU Feature: AVX
+func (x Int8x16) SetElem(index uint8, y int8) Int8x16
 
-// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
+// SetElem sets a single constant-indexed element's value.
 //
 // index is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VINSERTF128, CPU Feature: AVX
-func (x Float64x4) Set128(index uint8, y Float64x2) Float64x4
+// Asm: VPINSRW, CPU Feature: AVX
+func (x Int16x8) SetElem(index uint8, y int16) Int16x8
 
-// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
+// SetElem sets a single constant-indexed element's value.
 //
 // index is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VINSERTI128, CPU Feature: AVX2
-func (x Int8x32) Set128(index uint8, y Int8x16) Int8x32
+// Asm: VPINSRD, CPU Feature: AVX
+func (x Int32x4) SetElem(index uint8, y int32) Int32x4
 
-// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
+// SetElem sets a single constant-indexed element's value.
 //
 // index is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VINSERTI128, CPU Feature: AVX2
-func (x Int16x16) Set128(index uint8, y Int16x8) Int16x16
+// Asm: VPINSRQ, CPU Feature: AVX
+func (x Int64x2) SetElem(index uint8, y int64) Int64x2
 
-// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
+// SetElem sets a single constant-indexed element's value.
 //
 // index is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VINSERTI128, CPU Feature: AVX2
-func (x Int32x8) Set128(index uint8, y Int32x4) Int32x8
+// Asm: VPINSRB, CPU Feature: AVX
+func (x Uint8x16) SetElem(index uint8, y uint8) Uint8x16
 
-// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
+// SetElem sets a single constant-indexed element's value.
 //
 // index is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VINSERTI128, CPU Feature: AVX2
-func (x Int64x4) Set128(index uint8, y Int64x2) Int64x4
+// Asm: VPINSRW, CPU Feature: AVX
+func (x Uint16x8) SetElem(index uint8, y uint16) Uint16x8
 
-// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
+// SetElem sets a single constant-indexed element's value.
 //
 // index is expected to be a constant, non-constant value will trigger a runtime panic.
 //
-// Asm: VINSERTI128, CPU Feature: AVX2
-func (x Uint8x32) Set128(index uint8, y Uint8x16) Uint8x32
+// Asm: VPINSRD, CPU Feature: AVX
+func (x Uint32x4) SetElem(index uint8, y uint32) Uint32x4
 
-// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
+// SetElem sets a single constant-indexed element's value.
 //
 // index is expected to be a constant, non-constant value will trigger a runtime panic.
 //
+// Asm: VPINSRQ, CPU Feature: AVX
+func (x Uint64x2) SetElem(index uint8, y uint64) Uint64x2
+
+/* SetHi */
+
+// SetHi returns x with its upper half set to y.
+//
+// Asm: VINSERTF128, CPU Feature: AVX
+func (x Float32x8) SetHi(y Float32x4) Float32x8
+
+// SetHi returns x with its upper half set to y.
+//
+// Asm: VINSERTF64X4, CPU Feature: AVX512F
+func (x Float32x16) SetHi(y Float32x8) Float32x16
+
+// SetHi returns x with its upper half set to y.
+//
+// Asm: VINSERTF128, CPU Feature: AVX
+func (x Float64x4) SetHi(y Float64x2) Float64x4
+
+// SetHi returns x with its upper half set to y.
+//
+// Asm: VINSERTF64X4, CPU Feature: AVX512F
+func (x Float64x8) SetHi(y Float64x4) Float64x8
+
+// SetHi returns x with its upper half set to y.
+//
 // Asm: VINSERTI128, CPU Feature: AVX2
-func (x Uint16x16) Set128(index uint8, y Uint16x8) Uint16x16
+func (x Int8x32) SetHi(y Int8x16) Int8x32
 
-// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
+// SetHi returns x with its upper half set to y.
 //
-// index is expected to be a constant, non-constant value will trigger a runtime panic.
+// Asm: VINSERTI64X4, CPU Feature: AVX512F
+func (x Int8x64) SetHi(y Int8x32) Int8x64
+
+// SetHi returns x with its upper half set to y.
 //
 // Asm: VINSERTI128, CPU Feature: AVX2
-func (x Uint32x8) Set128(index uint8, y Uint32x4) Uint32x8
+func (x Int16x16) SetHi(y Int16x8) Int16x16
 
-// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
+// SetHi returns x with its upper half set to y.
 //
-// index is expected to be a constant, non-constant value will trigger a runtime panic.
+// Asm: VINSERTI64X4, CPU Feature: AVX512F
+func (x Int16x32) SetHi(y Int16x16) Int16x32
+
+// SetHi returns x with its upper half set to y.
 //
 // Asm: VINSERTI128, CPU Feature: AVX2
-func (x Uint64x4) Set128(index uint8, y Uint64x2) Uint64x4
+func (x Int32x8) SetHi(y Int32x4) Int32x8
 
-/* SetElem */
+// SetHi returns x with its upper half set to y.
+//
+// Asm: VINSERTI64X4, CPU Feature: AVX512F
+func (x Int32x16) SetHi(y Int32x8) Int32x16
 
-// SetElem sets a single constant-indexed element's value.
+// SetHi returns x with its upper half set to y.
 //
-// index is expected to be a constant, non-constant value will trigger a runtime panic.
+// Asm: VINSERTI128, CPU Feature: AVX2
+func (x Int64x4) SetHi(y Int64x2) Int64x4
+
+// SetHi returns x with its upper half set to y.
 //
-// Asm: VPINSRB, CPU Feature: AVX
-func (x Int8x16) SetElem(index uint8, y int8) Int8x16
+// Asm: VINSERTI64X4, CPU Feature: AVX512F
+func (x Int64x8) SetHi(y Int64x4) Int64x8
 
-// SetElem sets a single constant-indexed element's value.
+// SetHi returns x with its upper half set to y.
 //
-// index is expected to be a constant, non-constant value will trigger a runtime panic.
+// Asm: VINSERTI128, CPU Feature: AVX2
+func (x Uint8x32) SetHi(y Uint8x16) Uint8x32
+
+// SetHi returns x with its upper half set to y.
 //
-// Asm: VPINSRW, CPU Feature: AVX
-func (x Int16x8) SetElem(index uint8, y int16) Int16x8
+// Asm: VINSERTI64X4, CPU Feature: AVX512F
+func (x Uint8x64) SetHi(y Uint8x32) Uint8x64
 
-// SetElem sets a single constant-indexed element's value.
+// SetHi returns x with its upper half set to y.
 //
-// index is expected to be a constant, non-constant value will trigger a runtime panic.
+// Asm: VINSERTI128, CPU Feature: AVX2
+func (x Uint16x16) SetHi(y Uint16x8) Uint16x16
+
+// SetHi returns x with its upper half set to y.
 //
-// Asm: VPINSRD, CPU Feature: AVX
-func (x Int32x4) SetElem(index uint8, y int32) Int32x4
+// Asm: VINSERTI64X4, CPU Feature: AVX512F
+func (x Uint16x32) SetHi(y Uint16x16) Uint16x32
 
-// SetElem sets a single constant-indexed element's value.
+// SetHi returns x with its upper half set to y.
 //
-// index is expected to be a constant, non-constant value will trigger a runtime panic.
+// Asm: VINSERTI128, CPU Feature: AVX2
+func (x Uint32x8) SetHi(y Uint32x4) Uint32x8
+
+// SetHi returns x with its upper half set to y.
 //
-// Asm: VPINSRQ, CPU Feature: AVX
-func (x Int64x2) SetElem(index uint8, y int64) Int64x2
+// Asm: VINSERTI64X4, CPU Feature: AVX512F
+func (x Uint32x16) SetHi(y Uint32x8) Uint32x16
 
-// SetElem sets a single constant-indexed element's value.
+// SetHi returns x with its upper half set to y.
 //
-// index is expected to be a constant, non-constant value will trigger a runtime panic.
+// Asm: VINSERTI128, CPU Feature: AVX2
+func (x Uint64x4) SetHi(y Uint64x2) Uint64x4
+
+// SetHi returns x with its upper half set to y.
 //
-// Asm: VPINSRB, CPU Feature: AVX
-func (x Uint8x16) SetElem(index uint8, y uint8) Uint8x16
+// Asm: VINSERTI64X4, CPU Feature: AVX512F
+func (x Uint64x8) SetHi(y Uint64x4) Uint64x8
 
-// SetElem sets a single constant-indexed element's value.
+/* SetLo */
+
+// SetLo returns x with its lower half set to y.
 //
-// index is expected to be a constant, non-constant value will trigger a runtime panic.
+// Asm: VINSERTF128, CPU Feature: AVX
+func (x Float32x8) SetLo(y Float32x4) Float32x8
+
+// SetLo returns x with its lower half set to y.
 //
-// Asm: VPINSRW, CPU Feature: AVX
-func (x Uint16x8) SetElem(index uint8, y uint16) Uint16x8
+// Asm: VINSERTF64X4, CPU Feature: AVX512F
+func (x Float32x16) SetLo(y Float32x8) Float32x16
 
-// SetElem sets a single constant-indexed element's value.
+// SetLo returns x with its lower half set to y.
 //
-// index is expected to be a constant, non-constant value will trigger a runtime panic.
+// Asm: VINSERTF128, CPU Feature: AVX
+func (x Float64x4) SetLo(y Float64x2) Float64x4
+
+// SetLo returns x with its lower half set to y.
 //
-// Asm: VPINSRD, CPU Feature: AVX
-func (x Uint32x4) SetElem(index uint8, y uint32) Uint32x4
+// Asm: VINSERTF64X4, CPU Feature: AVX512F
+func (x Float64x8) SetLo(y Float64x4) Float64x8
 
-// SetElem sets a single constant-indexed element's value.
+// SetLo returns x with its lower half set to y.
 //
-// index is expected to be a constant, non-constant value will trigger a runtime panic.
+// Asm: VINSERTI128, CPU Feature: AVX2
+func (x Int8x32) SetLo(y Int8x16) Int8x32
+
+// SetLo returns x with its lower half set to y.
 //
-// Asm: VPINSRQ, CPU Feature: AVX
-func (x Uint64x2) SetElem(index uint8, y uint64) Uint64x2
+// Asm: VINSERTI64X4, CPU Feature: AVX512F
+func (x Int8x64) SetLo(y Int8x32) Int8x64
+
+// SetLo returns x with its lower half set to y.
+//
+// Asm: VINSERTI128, CPU Feature: AVX2
+func (x Int16x16) SetLo(y Int16x8) Int16x16
+
+// SetLo returns x with its lower half set to y.
+//
+// Asm: VINSERTI64X4, CPU Feature: AVX512F
+func (x Int16x32) SetLo(y Int16x16) Int16x32
+
+// SetLo returns x with its lower half set to y.
+//
+// Asm: VINSERTI128, CPU Feature: AVX2
+func (x Int32x8) SetLo(y Int32x4) Int32x8
+
+// SetLo returns x with its lower half set to y.
+//
+// Asm: VINSERTI64X4, CPU Feature: AVX512F
+func (x Int32x16) SetLo(y Int32x8) Int32x16
+
+// SetLo returns x with its lower half set to y.
+//
+// Asm: VINSERTI128, CPU Feature: AVX2
+func (x Int64x4) SetLo(y Int64x2) Int64x4
+
+// SetLo returns x with its lower half set to y.
+//
+// Asm: VINSERTI64X4, CPU Feature: AVX512F
+func (x Int64x8) SetLo(y Int64x4) Int64x8
+
+// SetLo returns x with its lower half set to y.
+//
+// Asm: VINSERTI128, CPU Feature: AVX2
+func (x Uint8x32) SetLo(y Uint8x16) Uint8x32
+
+// SetLo returns x with its lower half set to y.
+//
+// Asm: VINSERTI64X4, CPU Feature: AVX512F
+func (x Uint8x64) SetLo(y Uint8x32) Uint8x64
+
+// SetLo returns x with its lower half set to y.
+//
+// Asm: VINSERTI128, CPU Feature: AVX2
+func (x Uint16x16) SetLo(y Uint16x8) Uint16x16
+
+// SetLo returns x with its lower half set to y.
+//
+// Asm: VINSERTI64X4, CPU Feature: AVX512F
+func (x Uint16x32) SetLo(y Uint16x16) Uint16x32
+
+// SetLo returns x with its lower half set to y.
+//
+// Asm: VINSERTI128, CPU Feature: AVX2
+func (x Uint32x8) SetLo(y Uint32x4) Uint32x8
+
+// SetLo returns x with its lower half set to y.
+//
+// Asm: VINSERTI64X4, CPU Feature: AVX512F
+func (x Uint32x16) SetLo(y Uint32x8) Uint32x16
+
+// SetLo returns x with its lower half set to y.
+//
+// Asm: VINSERTI128, CPU Feature: AVX2
+func (x Uint64x4) SetLo(y Uint64x2) Uint64x4
+
+// SetLo returns x with its lower half set to y.
+//
+// Asm: VINSERTI64X4, CPU Feature: AVX512F
+func (x Uint64x8) SetLo(y Uint64x4) Uint64x8
 
 /* ShiftAllLeft */
 
index 1df27f875760aa275a78494985d24e23fcc86564..571834783887b455c043a76870d066449e55f2d9 100644 (file)
@@ -257,93 +257,6 @@ func TestSlicesInt8GetElem(t *testing.T) {
 
 }
 
-func TestSlicesInt8Set128(t *testing.T) {
-       a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
-               17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
-       v := simd.LoadInt8x16Slice(a) // 1-16
-       u := simd.LoadInt8x32Slice(a) // 1-32
-
-       w := u.Set128(1, v) // 1-16:1-16
-
-       b := make([]int8, 32, 32)
-       w.StoreSlice(b)
-
-       checkSlices(t, a, b[:16])
-       checkSlices(t, a, b[16:])
-}
-
-func TestSlicesInt8Get128(t *testing.T) {
-       a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
-               17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
-       u := simd.LoadInt8x32Slice(a) // 1-32
-       v := u.Get128(0)              // 1-16
-       w := u.Get128(1)              // 17-32
-
-       b := make([]int8, 32, 32)
-       v.StoreSlice(b[:16])
-       w.StoreSlice(b[16:])
-
-       checkSlices(t, a, b)
-}
-
-func TestSlicesFloat32Set128(t *testing.T) {
-       a := []float32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
-               17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
-       v := simd.LoadFloat32x4Slice(a) // 1-4
-       u := simd.LoadFloat32x8Slice(a) // 1-4
-
-       w := u.Set128(1, v) // 1-4:1-4
-
-       b := make([]float32, 8, 8)
-       w.StoreSlice(b)
-
-       checkSlices(t, a, b[:4])
-       checkSlices(t, a, b[4:])
-}
-
-func TestSlicesFloat32Get128(t *testing.T) {
-       a := []float32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
-               17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
-       u := simd.LoadFloat32x8Slice(a) // 1-8
-       v := u.Get128(0)                // 1-4
-       w := u.Get128(1)                // 5-8
-
-       b := make([]float32, 8, 8)
-       v.StoreSlice(b[:4])
-       w.StoreSlice(b[4:])
-
-       checkSlices(t, a, b)
-}
-
-func TestSlicesFloat64Set128(t *testing.T) {
-       a := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
-               17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
-       v := simd.LoadFloat64x2Slice(a) // 1-2
-       u := simd.LoadFloat64x4Slice(a) // 1-2
-
-       w := u.Set128(1, v) // 1-2:1-2
-
-       b := make([]float64, 4, 4)
-       w.StoreSlice(b)
-
-       checkSlices(t, a, b[:2])
-       checkSlices(t, a, b[2:])
-}
-
-func TestSlicesFloat64Get128(t *testing.T) {
-       a := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
-               17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
-       u := simd.LoadFloat64x4Slice(a) // 1-4
-       v := u.Get128(0)                // 1-2
-       w := u.Get128(1)                // 3-4
-
-       b := make([]float64, 4, 4)
-       v.StoreSlice(b[:2])
-       w.StoreSlice(b[2:])
-
-       checkSlices(t, a, b)
-}
-
 func TestSlicesInt8TooShortLoad(t *testing.T) {
        defer func() {
                if r := recover(); r != nil {
index 6d0b5a41f298bcaa563fce915106c0a88776ddf2..206d3b98cb28f7896cab883492c2ff3a3a136e2a 100644 (file)
@@ -76,9 +76,9 @@ func LoadInt8x32SlicePart(s []int8) Int8x32 {
                return x
        }
        if l > 16 {
-               return x.Set128(0, LoadInt8x16Slice(s)).Set128(1, LoadInt8x16SlicePart(s[16:]))
+               return x.SetLo(LoadInt8x16Slice(s)).SetHi(LoadInt8x16SlicePart(s[16:]))
        } else {
-               return x.Set128(0, LoadInt8x16SlicePart(s))
+               return x.SetLo(LoadInt8x16SlicePart(s))
        }
 }
 
@@ -95,9 +95,9 @@ func LoadInt16x16SlicePart(s []int16) Int16x16 {
                return x
        }
        if l > 8 {
-               return x.Set128(0, LoadInt16x8Slice(s)).Set128(1, LoadInt16x8SlicePart(s[8:]))
+               return x.SetLo(LoadInt16x8Slice(s)).SetHi(LoadInt16x8SlicePart(s[8:]))
        } else {
-               return x.Set128(0, LoadInt16x8SlicePart(s))
+               return x.SetLo(LoadInt16x8SlicePart(s))
        }
 }
 
@@ -114,10 +114,10 @@ func (x Int8x32) StoreSlicePart(s []int8) {
                return
        }
        if l > 16 {
-               x.Get128(0).StoreSlice(s)
-               x.Get128(1).StoreSlicePart(s[16:])
+               x.GetLo().StoreSlice(s)
+               x.GetHi().StoreSlicePart(s[16:])
        } else { // fits in one
-               x.Get128(0).StoreSlicePart(s)
+               x.GetLo().StoreSlicePart(s)
        }
 }
 
@@ -134,10 +134,10 @@ func (x Int16x16) StoreSlicePart(s []int16) {
                return
        }
        if l > 8 {
-               x.Get128(0).StoreSlice(s)
-               x.Get128(1).StoreSlicePart(s[8:])
+               x.GetLo().StoreSlice(s)
+               x.GetHi().StoreSlicePart(s[8:])
        } else { // fits in one
-               x.Get128(0).StoreSlicePart(s)
+               x.GetLo().StoreSlicePart(s)
        }
 }