ssa.OpAMD64VGF2P8AFFINEINVQB128,
ssa.OpAMD64VGF2P8AFFINEINVQB256,
ssa.OpAMD64VGF2P8AFFINEINVQB512,
+ ssa.OpAMD64VINSERTF128256,
+ ssa.OpAMD64VINSERTI128256,
ssa.OpAMD64VPSHLDW128,
ssa.OpAMD64VPSHLDW256,
ssa.OpAMD64VPSHLDW512,
(SaturatedUnsignedSignedQuadDotProdAccumulateUint32x4 ...) => (VPDPBUSDS128 ...)
(SaturatedUnsignedSignedQuadDotProdAccumulateUint32x8 ...) => (VPDPBUSDS256 ...)
(SaturatedUnsignedSignedQuadDotProdAccumulateUint32x16 ...) => (VPDPBUSDS512 ...)
+(Set128Float32x8 [a] x y) => (VINSERTF128256 [a] x y)
+(Set128Float64x4 [a] x y) => (VINSERTF128256 [a] x y)
+(Set128Int8x32 [a] x y) => (VINSERTI128256 [a] x y)
+(Set128Int16x16 [a] x y) => (VINSERTI128256 [a] x y)
+(Set128Int32x8 [a] x y) => (VINSERTI128256 [a] x y)
+(Set128Int64x4 [a] x y) => (VINSERTI128256 [a] x y)
+(Set128Uint8x32 [a] x y) => (VINSERTI128256 [a] x y)
+(Set128Uint16x16 [a] x y) => (VINSERTI128256 [a] x y)
+(Set128Uint32x8 [a] x y) => (VINSERTI128256 [a] x y)
+(Set128Uint64x4 [a] x y) => (VINSERTI128256 [a] x y)
(SetElemInt8x16 [a] x y) => (VPINSRB128 [a] x y)
(SetElemInt16x8 [a] x y) => (VPINSRW128 [a] x y)
(SetElemInt32x4 [a] x y) => (VPINSRD128 [a] x y)
{name: "VRNDSCALEPSMasked256", argLength: 2, reg: fpkfp, asm: "VRNDSCALEPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VREDUCEPSMasked256", argLength: 2, reg: fpkfp, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VCMPPSMasked256", argLength: 3, reg: fp2kk, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+ {name: "VINSERTF128256", argLength: 2, reg: fp21, asm: "VINSERTF128", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VROUNDPD128", argLength: 1, reg: fp11, asm: "VROUNDPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VRNDSCALEPD128", argLength: 1, reg: fp11, asm: "VRNDSCALEPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VREDUCEPD128", argLength: 1, reg: fp11, asm: "VREDUCEPD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPINSRB128", argLength: 2, reg: fpgpfp, asm: "VPINSRB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPCMPB256", argLength: 2, reg: fp2k, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPBMasked256", argLength: 3, reg: fp2kk, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+ {name: "VINSERTI128256", argLength: 2, reg: fp21, asm: "VINSERTI128", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPCMPB512", argLength: 2, reg: fp2k, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPBMasked512", argLength: 3, reg: fp2kk, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPUW256", argLength: 2, reg: fp2k, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "MaskedRoundWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "MaskedTruncWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "RoundWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "Set128Float32x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "TruncWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
{name: "CeilWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
{name: "DiffWithCeilWithPrecisionFloat64x2", argLength: 1, commutative: false, aux: "Int8"},
{name: "MaskedRoundWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
{name: "MaskedTruncWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
{name: "RoundWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "Set128Float64x4", argLength: 2, commutative: false, aux: "Int8"},
{name: "TruncWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
{name: "CeilWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
{name: "DiffWithCeilWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
{name: "TruncWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
{name: "MaskedShiftAllLeftAndFillUpperFromInt16x16", argLength: 3, commutative: false, aux: "Int8"},
{name: "MaskedShiftAllRightAndFillUpperFromInt16x16", argLength: 3, commutative: false, aux: "Int8"},
+ {name: "Set128Int16x16", argLength: 2, commutative: false, aux: "Int8"},
{name: "ShiftAllLeftAndFillUpperFromInt16x16", argLength: 2, commutative: false, aux: "Int8"},
{name: "ShiftAllRightAndFillUpperFromInt16x16", argLength: 2, commutative: false, aux: "Int8"},
{name: "MaskedShiftAllLeftAndFillUpperFromInt16x32", argLength: 3, commutative: false, aux: "Int8"},
{name: "MaskedShiftAllRightAndFillUpperFromInt32x8", argLength: 3, commutative: false, aux: "Int8"},
{name: "RotateAllLeftInt32x8", argLength: 1, commutative: false, aux: "Int8"},
{name: "RotateAllRightInt32x8", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "Set128Int32x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "ShiftAllLeftAndFillUpperFromInt32x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "ShiftAllRightAndFillUpperFromInt32x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "GetElemInt64x2", argLength: 1, commutative: false, aux: "Int8"},
{name: "MaskedShiftAllRightAndFillUpperFromInt64x4", argLength: 3, commutative: false, aux: "Int8"},
{name: "RotateAllLeftInt64x4", argLength: 1, commutative: false, aux: "Int8"},
{name: "RotateAllRightInt64x4", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "Set128Int64x4", argLength: 2, commutative: false, aux: "Int8"},
{name: "ShiftAllLeftAndFillUpperFromInt64x4", argLength: 2, commutative: false, aux: "Int8"},
{name: "ShiftAllRightAndFillUpperFromInt64x4", argLength: 2, commutative: false, aux: "Int8"},
{name: "MaskedRotateAllLeftInt64x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "ShiftAllRightAndFillUpperFromInt64x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "GetElemInt8x16", argLength: 1, commutative: false, aux: "Int8"},
{name: "SetElemInt8x16", argLength: 2, commutative: false, aux: "Int8"},
+ {name: "Set128Int8x32", argLength: 2, commutative: false, aux: "Int8"},
{name: "MaskedShiftAllLeftAndFillUpperFromUint16x16", argLength: 3, commutative: false, aux: "Int8"},
{name: "MaskedShiftAllRightAndFillUpperFromUint16x16", argLength: 3, commutative: false, aux: "Int8"},
+ {name: "Set128Uint16x16", argLength: 2, commutative: false, aux: "Int8"},
{name: "ShiftAllLeftAndFillUpperFromUint16x16", argLength: 2, commutative: false, aux: "Int8"},
{name: "ShiftAllRightAndFillUpperFromUint16x16", argLength: 2, commutative: false, aux: "Int8"},
{name: "MaskedShiftAllLeftAndFillUpperFromUint16x32", argLength: 3, commutative: false, aux: "Int8"},
{name: "MaskedShiftAllRightAndFillUpperFromUint32x8", argLength: 3, commutative: false, aux: "Int8"},
{name: "RotateAllLeftUint32x8", argLength: 1, commutative: false, aux: "Int8"},
{name: "RotateAllRightUint32x8", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "Set128Uint32x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "ShiftAllLeftAndFillUpperFromUint32x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "ShiftAllRightAndFillUpperFromUint32x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "GetElemUint64x2", argLength: 1, commutative: false, aux: "Int8"},
{name: "MaskedShiftAllRightAndFillUpperFromUint64x4", argLength: 3, commutative: false, aux: "Int8"},
{name: "RotateAllLeftUint64x4", argLength: 1, commutative: false, aux: "Int8"},
{name: "RotateAllRightUint64x4", argLength: 1, commutative: false, aux: "Int8"},
+ {name: "Set128Uint64x4", argLength: 2, commutative: false, aux: "Int8"},
{name: "ShiftAllLeftAndFillUpperFromUint64x4", argLength: 2, commutative: false, aux: "Int8"},
{name: "ShiftAllRightAndFillUpperFromUint64x4", argLength: 2, commutative: false, aux: "Int8"},
{name: "MaskedRotateAllLeftUint64x8", argLength: 2, commutative: false, aux: "Int8"},
{name: "GaloisFieldAffineTransformInversedUint8x32", argLength: 2, commutative: false, aux: "Int8"},
{name: "MaskedGaloisFieldAffineTransformUint8x32", argLength: 3, commutative: false, aux: "Int8"},
{name: "MaskedGaloisFieldAffineTransformInversedUint8x32", argLength: 3, commutative: false, aux: "Int8"},
+ {name: "Set128Uint8x32", argLength: 2, commutative: false, aux: "Int8"},
{name: "GaloisFieldAffineTransformUint8x64", argLength: 2, commutative: false, aux: "Int8"},
{name: "GaloisFieldAffineTransformInversedUint8x64", argLength: 2, commutative: false, aux: "Int8"},
{name: "MaskedGaloisFieldAffineTransformUint8x64", argLength: 3, commutative: false, aux: "Int8"},
OpAMD64VRNDSCALEPSMasked256
OpAMD64VREDUCEPSMasked256
OpAMD64VCMPPSMasked256
+ OpAMD64VINSERTF128256
OpAMD64VROUNDPD128
OpAMD64VRNDSCALEPD128
OpAMD64VREDUCEPD128
OpAMD64VPINSRB128
OpAMD64VPCMPB256
OpAMD64VPCMPBMasked256
+ OpAMD64VINSERTI128256
OpAMD64VPCMPB512
OpAMD64VPCMPBMasked512
OpAMD64VPCMPUW256
OpMaskedRoundWithPrecisionFloat32x8
OpMaskedTruncWithPrecisionFloat32x8
OpRoundWithPrecisionFloat32x8
+ OpSet128Float32x8
OpTruncWithPrecisionFloat32x8
OpCeilWithPrecisionFloat64x2
OpDiffWithCeilWithPrecisionFloat64x2
OpMaskedRoundWithPrecisionFloat64x4
OpMaskedTruncWithPrecisionFloat64x4
OpRoundWithPrecisionFloat64x4
+ OpSet128Float64x4
OpTruncWithPrecisionFloat64x4
OpCeilWithPrecisionFloat64x8
OpDiffWithCeilWithPrecisionFloat64x8
OpTruncWithPrecisionFloat64x8
OpMaskedShiftAllLeftAndFillUpperFromInt16x16
OpMaskedShiftAllRightAndFillUpperFromInt16x16
+ OpSet128Int16x16
OpShiftAllLeftAndFillUpperFromInt16x16
OpShiftAllRightAndFillUpperFromInt16x16
OpMaskedShiftAllLeftAndFillUpperFromInt16x32
OpMaskedShiftAllRightAndFillUpperFromInt32x8
OpRotateAllLeftInt32x8
OpRotateAllRightInt32x8
+ OpSet128Int32x8
OpShiftAllLeftAndFillUpperFromInt32x8
OpShiftAllRightAndFillUpperFromInt32x8
OpGetElemInt64x2
OpMaskedShiftAllRightAndFillUpperFromInt64x4
OpRotateAllLeftInt64x4
OpRotateAllRightInt64x4
+ OpSet128Int64x4
OpShiftAllLeftAndFillUpperFromInt64x4
OpShiftAllRightAndFillUpperFromInt64x4
OpMaskedRotateAllLeftInt64x8
OpShiftAllRightAndFillUpperFromInt64x8
OpGetElemInt8x16
OpSetElemInt8x16
+ OpSet128Int8x32
OpMaskedShiftAllLeftAndFillUpperFromUint16x16
OpMaskedShiftAllRightAndFillUpperFromUint16x16
+ OpSet128Uint16x16
OpShiftAllLeftAndFillUpperFromUint16x16
OpShiftAllRightAndFillUpperFromUint16x16
OpMaskedShiftAllLeftAndFillUpperFromUint16x32
OpMaskedShiftAllRightAndFillUpperFromUint32x8
OpRotateAllLeftUint32x8
OpRotateAllRightUint32x8
+ OpSet128Uint32x8
OpShiftAllLeftAndFillUpperFromUint32x8
OpShiftAllRightAndFillUpperFromUint32x8
OpGetElemUint64x2
OpMaskedShiftAllRightAndFillUpperFromUint64x4
OpRotateAllLeftUint64x4
OpRotateAllRightUint64x4
+ OpSet128Uint64x4
OpShiftAllLeftAndFillUpperFromUint64x4
OpShiftAllRightAndFillUpperFromUint64x4
OpMaskedRotateAllLeftUint64x8
OpGaloisFieldAffineTransformInversedUint8x32
OpMaskedGaloisFieldAffineTransformUint8x32
OpMaskedGaloisFieldAffineTransformInversedUint8x32
+ OpSet128Uint8x32
OpGaloisFieldAffineTransformUint8x64
OpGaloisFieldAffineTransformInversedUint8x64
OpMaskedGaloisFieldAffineTransformUint8x64
},
},
},
+ {
+ name: "VINSERTF128256",
+ auxType: auxInt8,
+ argLen: 2,
+ asm: x86.AVINSERTF128,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VROUNDPD128",
auxType: auxInt8,
},
},
},
+ {
+ name: "VINSERTI128256",
+ auxType: auxInt8,
+ argLen: 2,
+ asm: x86.AVINSERTI128,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ },
+ },
{
name: "VPCMPB512",
auxType: auxInt8,
argLen: 1,
generic: true,
},
+ {
+ name: "Set128Float32x8",
+ auxType: auxInt8,
+ argLen: 2,
+ generic: true,
+ },
{
name: "TruncWithPrecisionFloat32x8",
auxType: auxInt8,
argLen: 1,
generic: true,
},
+ {
+ name: "Set128Float64x4",
+ auxType: auxInt8,
+ argLen: 2,
+ generic: true,
+ },
{
name: "TruncWithPrecisionFloat64x4",
auxType: auxInt8,
argLen: 3,
generic: true,
},
+ {
+ name: "Set128Int16x16",
+ auxType: auxInt8,
+ argLen: 2,
+ generic: true,
+ },
{
name: "ShiftAllLeftAndFillUpperFromInt16x16",
auxType: auxInt8,
argLen: 1,
generic: true,
},
+ {
+ name: "Set128Int32x8",
+ auxType: auxInt8,
+ argLen: 2,
+ generic: true,
+ },
{
name: "ShiftAllLeftAndFillUpperFromInt32x8",
auxType: auxInt8,
argLen: 1,
generic: true,
},
+ {
+ name: "Set128Int64x4",
+ auxType: auxInt8,
+ argLen: 2,
+ generic: true,
+ },
{
name: "ShiftAllLeftAndFillUpperFromInt64x4",
auxType: auxInt8,
argLen: 2,
generic: true,
},
+ {
+ name: "Set128Int8x32",
+ auxType: auxInt8,
+ argLen: 2,
+ generic: true,
+ },
{
name: "MaskedShiftAllLeftAndFillUpperFromUint16x16",
auxType: auxInt8,
argLen: 3,
generic: true,
},
+ {
+ name: "Set128Uint16x16",
+ auxType: auxInt8,
+ argLen: 2,
+ generic: true,
+ },
{
name: "ShiftAllLeftAndFillUpperFromUint16x16",
auxType: auxInt8,
argLen: 1,
generic: true,
},
+ {
+ name: "Set128Uint32x8",
+ auxType: auxInt8,
+ argLen: 2,
+ generic: true,
+ },
{
name: "ShiftAllLeftAndFillUpperFromUint32x8",
auxType: auxInt8,
argLen: 1,
generic: true,
},
+ {
+ name: "Set128Uint64x4",
+ auxType: auxInt8,
+ argLen: 2,
+ generic: true,
+ },
{
name: "ShiftAllLeftAndFillUpperFromUint64x4",
auxType: auxInt8,
argLen: 3,
generic: true,
},
+ {
+ name: "Set128Uint8x32",
+ auxType: auxInt8,
+ argLen: 2,
+ generic: true,
+ },
{
name: "GaloisFieldAffineTransformUint8x64",
auxType: auxInt8,
return rewriteValueAMD64_OpSelect1(v)
case OpSelectN:
return rewriteValueAMD64_OpSelectN(v)
+ case OpSet128Float32x8:
+ return rewriteValueAMD64_OpSet128Float32x8(v)
+ case OpSet128Float64x4:
+ return rewriteValueAMD64_OpSet128Float64x4(v)
+ case OpSet128Int16x16:
+ return rewriteValueAMD64_OpSet128Int16x16(v)
+ case OpSet128Int32x8:
+ return rewriteValueAMD64_OpSet128Int32x8(v)
+ case OpSet128Int64x4:
+ return rewriteValueAMD64_OpSet128Int64x4(v)
+ case OpSet128Int8x32:
+ return rewriteValueAMD64_OpSet128Int8x32(v)
+ case OpSet128Uint16x16:
+ return rewriteValueAMD64_OpSet128Uint16x16(v)
+ case OpSet128Uint32x8:
+ return rewriteValueAMD64_OpSet128Uint32x8(v)
+ case OpSet128Uint64x4:
+ return rewriteValueAMD64_OpSet128Uint64x4(v)
+ case OpSet128Uint8x32:
+ return rewriteValueAMD64_OpSet128Uint8x32(v)
case OpSetElemInt16x8:
return rewriteValueAMD64_OpSetElemInt16x8(v)
case OpSetElemInt32x4:
}
return false
}
+func rewriteValueAMD64_OpSet128Float32x8(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (Set128Float32x8 [a] x y)
+ // result: (VINSERTF128256 [a] x y)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ y := v_1
+ v.reset(OpAMD64VINSERTF128256)
+ v.AuxInt = int8ToAuxInt(a)
+ v.AddArg2(x, y)
+ return true
+ }
+}
+func rewriteValueAMD64_OpSet128Float64x4(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (Set128Float64x4 [a] x y)
+ // result: (VINSERTF128256 [a] x y)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ y := v_1
+ v.reset(OpAMD64VINSERTF128256)
+ v.AuxInt = int8ToAuxInt(a)
+ v.AddArg2(x, y)
+ return true
+ }
+}
+func rewriteValueAMD64_OpSet128Int16x16(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (Set128Int16x16 [a] x y)
+ // result: (VINSERTI128256 [a] x y)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ y := v_1
+ v.reset(OpAMD64VINSERTI128256)
+ v.AuxInt = int8ToAuxInt(a)
+ v.AddArg2(x, y)
+ return true
+ }
+}
+func rewriteValueAMD64_OpSet128Int32x8(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (Set128Int32x8 [a] x y)
+ // result: (VINSERTI128256 [a] x y)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ y := v_1
+ v.reset(OpAMD64VINSERTI128256)
+ v.AuxInt = int8ToAuxInt(a)
+ v.AddArg2(x, y)
+ return true
+ }
+}
+func rewriteValueAMD64_OpSet128Int64x4(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (Set128Int64x4 [a] x y)
+ // result: (VINSERTI128256 [a] x y)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ y := v_1
+ v.reset(OpAMD64VINSERTI128256)
+ v.AuxInt = int8ToAuxInt(a)
+ v.AddArg2(x, y)
+ return true
+ }
+}
+func rewriteValueAMD64_OpSet128Int8x32(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (Set128Int8x32 [a] x y)
+ // result: (VINSERTI128256 [a] x y)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ y := v_1
+ v.reset(OpAMD64VINSERTI128256)
+ v.AuxInt = int8ToAuxInt(a)
+ v.AddArg2(x, y)
+ return true
+ }
+}
+func rewriteValueAMD64_OpSet128Uint16x16(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (Set128Uint16x16 [a] x y)
+ // result: (VINSERTI128256 [a] x y)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ y := v_1
+ v.reset(OpAMD64VINSERTI128256)
+ v.AuxInt = int8ToAuxInt(a)
+ v.AddArg2(x, y)
+ return true
+ }
+}
+func rewriteValueAMD64_OpSet128Uint32x8(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (Set128Uint32x8 [a] x y)
+ // result: (VINSERTI128256 [a] x y)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ y := v_1
+ v.reset(OpAMD64VINSERTI128256)
+ v.AuxInt = int8ToAuxInt(a)
+ v.AddArg2(x, y)
+ return true
+ }
+}
+func rewriteValueAMD64_OpSet128Uint64x4(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (Set128Uint64x4 [a] x y)
+ // result: (VINSERTI128256 [a] x y)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ y := v_1
+ v.reset(OpAMD64VINSERTI128256)
+ v.AuxInt = int8ToAuxInt(a)
+ v.AddArg2(x, y)
+ return true
+ }
+}
+func rewriteValueAMD64_OpSet128Uint8x32(v *Value) bool {
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ // match: (Set128Uint8x32 [a] x y)
+ // result: (VINSERTI128256 [a] x y)
+ for {
+ a := auxIntToInt8(v.AuxInt)
+ x := v_0
+ y := v_1
+ v.reset(OpAMD64VINSERTI128256)
+ v.AuxInt = int8ToAuxInt(a)
+ v.AddArg2(x, y)
+ return true
+ }
+}
func rewriteValueAMD64_OpSetElemInt16x8(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
addF(simdPackage, "Uint32x4.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x8.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint32x16.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float32x8.Set128", opLen2Imm8(ssa.OpSet128Float32x8, types.TypeVec256, 0), sys.AMD64)
+ addF(simdPackage, "Float64x4.Set128", opLen2Imm8(ssa.OpSet128Float64x4, types.TypeVec256, 0), sys.AMD64)
+ addF(simdPackage, "Int8x32.Set128", opLen2Imm8(ssa.OpSet128Int8x32, types.TypeVec256, 0), sys.AMD64)
+ addF(simdPackage, "Int16x16.Set128", opLen2Imm8(ssa.OpSet128Int16x16, types.TypeVec256, 0), sys.AMD64)
+ addF(simdPackage, "Int32x8.Set128", opLen2Imm8(ssa.OpSet128Int32x8, types.TypeVec256, 0), sys.AMD64)
+ addF(simdPackage, "Int64x4.Set128", opLen2Imm8(ssa.OpSet128Int64x4, types.TypeVec256, 0), sys.AMD64)
+ addF(simdPackage, "Uint8x32.Set128", opLen2Imm8(ssa.OpSet128Uint8x32, types.TypeVec256, 0), sys.AMD64)
+ addF(simdPackage, "Uint16x16.Set128", opLen2Imm8(ssa.OpSet128Uint16x16, types.TypeVec256, 0), sys.AMD64)
+ addF(simdPackage, "Uint32x8.Set128", opLen2Imm8(ssa.OpSet128Uint32x8, types.TypeVec256, 0), sys.AMD64)
+ addF(simdPackage, "Uint64x4.Set128", opLen2Imm8(ssa.OpSet128Uint64x4, types.TypeVec256, 0), sys.AMD64)
addF(simdPackage, "Int8x16.SetElem", opLen2Imm8(ssa.OpSetElemInt8x16, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Int16x8.SetElem", opLen2Imm8(ssa.OpSetElemInt16x8, types.TypeVec128, 0), sys.AMD64)
addF(simdPackage, "Int32x4.SetElem", opLen2Imm8(ssa.OpSetElemInt32x4, types.TypeVec128, 0), sys.AMD64)
}
}
+
+func TestSlicesInt8Set128(t *testing.T) {
+ a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
+ v := simd.LoadInt8x16Slice(a) // 1-16
+ u := simd.LoadInt8x32Slice(a) // 1-32
+
+ w := u.Set128(1, v) // 1-16:1-16
+
+ b := make([]int8, 32, 32)
+ w.StoreSlice(b)
+
+ checkInt8Slices(t, a, b[:16])
+ checkInt8Slices(t, a, b[16:])
+}
+
func TestSlicesInt8TooShortLoad(t *testing.T) {
defer func() {
if r := recover(); r != nil {
// RotateAllLeft
// RotateAllRight
// RoundWithPrecision
+// Set128
// SetElem
// ShiftAllLeft
// ShiftAllLeftAndFillUpperFrom
// Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
func (x Uint32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64) Uint32x16
+/* Set128 */
+
+// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
+//
+// Asm: VINSERTF128, CPU Feature: AVX
+func (x Float32x8) Set128(imm uint8, y Float32x4) Float32x8
+
+// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
+//
+// Asm: VINSERTF128, CPU Feature: AVX
+func (x Float64x4) Set128(imm uint8, y Float64x2) Float64x4
+
+// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
+//
+// Asm: VINSERTI128, CPU Feature: AVX2
+func (x Int8x32) Set128(imm uint8, y Int8x16) Int8x32
+
+// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
+//
+// Asm: VINSERTI128, CPU Feature: AVX2
+func (x Int16x16) Set128(imm uint8, y Int16x8) Int16x16
+
+// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
+//
+// Asm: VINSERTI128, CPU Feature: AVX2
+func (x Int32x8) Set128(imm uint8, y Int32x4) Int32x8
+
+// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
+//
+// Asm: VINSERTI128, CPU Feature: AVX2
+func (x Int64x4) Set128(imm uint8, y Int64x2) Int64x4
+
+// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
+//
+// Asm: VINSERTI128, CPU Feature: AVX2
+func (x Uint8x32) Set128(imm uint8, y Uint8x16) Uint8x32
+
+// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
+//
+// Asm: VINSERTI128, CPU Feature: AVX2
+func (x Uint16x16) Set128(imm uint8, y Uint16x8) Uint16x16
+
+// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
+//
+// Asm: VINSERTI128, CPU Feature: AVX2
+func (x Uint32x8) Set128(imm uint8, y Uint32x4) Uint32x8
+
+// Set128 combines a 128-bit vector with a 256-bit vector, where the constant operand specifies whether the low (0) or high (1) half is receives the smaller vector.
+//
+// Asm: VINSERTI128, CPU Feature: AVX2
+func (x Uint64x4) Set128(imm uint8, y Uint64x2) Uint64x4
+
/* SetElem */
// SetElem sets a single constant-indexed element's value.