]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.simd] cmd/compile: add EXTRACT[IF]128 instructions
authorDavid Chase <drchase@google.com>
Wed, 25 Jun 2025 22:20:50 +0000 (18:20 -0400)
committerDavid Chase <drchase@google.com>
Tue, 8 Jul 2025 01:37:28 +0000 (18:37 -0700)
This is generated by simdgen CL 684080
and should be submitted after it.

Also includes tests.

Change-Id: I1d680911134d8fb92f4deccae4ec373f3ed9f752
Reviewed-on: https://go-review.googlesource.com/c/go/+/684115
Reviewed-by: Junyang Shao <shaojunyang@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>

src/cmd/compile/internal/amd64/simdssa.go
src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
src/cmd/compile/internal/ssagen/simdintrinsics.go
src/simd/simd_test.go
src/simd/simd_wrapped_test.go
src/simd/stubs_amd64.go

index ac2848d1bafa694e43e8a4e60a9f9a603d34eae0..fbb63ccaa14386e7bcef8a938031e9897ecdb59a 100644 (file)
@@ -655,6 +655,8 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VREDUCEPD128,
                ssa.OpAMD64VREDUCEPD256,
                ssa.OpAMD64VREDUCEPD512,
+               ssa.OpAMD64VEXTRACTF128128,
+               ssa.OpAMD64VEXTRACTI128128,
                ssa.OpAMD64VPROLD128,
                ssa.OpAMD64VPROLD256,
                ssa.OpAMD64VPROLD512,
index 6b1078e74127985956e3a4b3e19708bda8a4932b..6ba52a9e9c9f93100c630b3d058be162962d69d3 100644 (file)
 (GaloisFieldMulUint8x16 ...) => (VGF2P8MULB128 ...)
 (GaloisFieldMulUint8x32 ...) => (VGF2P8MULB256 ...)
 (GaloisFieldMulUint8x64 ...) => (VGF2P8MULB512 ...)
+(Get128Float32x8 [a] x) => (VEXTRACTF128128 [a] x)
+(Get128Float64x4 [a] x) => (VEXTRACTF128128 [a] x)
+(Get128Int8x32 [a] x) => (VEXTRACTI128128 [a] x)
+(Get128Int16x16 [a] x) => (VEXTRACTI128128 [a] x)
+(Get128Int32x8 [a] x) => (VEXTRACTI128128 [a] x)
+(Get128Int64x4 [a] x) => (VEXTRACTI128128 [a] x)
+(Get128Uint8x32 [a] x) => (VEXTRACTI128128 [a] x)
+(Get128Uint16x16 [a] x) => (VEXTRACTI128128 [a] x)
+(Get128Uint32x8 [a] x) => (VEXTRACTI128128 [a] x)
+(Get128Uint64x4 [a] x) => (VEXTRACTI128128 [a] x)
 (GetElemInt8x16 [a] x) => (VPEXTRB128 [a] x)
 (GetElemInt16x8 [a] x) => (VPEXTRW128 [a] x)
 (GetElemInt32x4 [a] x) => (VPEXTRD128 [a] x)
index 787d3c5fcbf50c9ff0d8acaddb1bcc89da9bae80..8c895d9f455950555096ac8c90f24bddb4a485db 100644 (file)
@@ -765,6 +765,7 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp,
                {name: "VRNDSCALEPS256", argLength: 1, reg: fp11, asm: "VRNDSCALEPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VREDUCEPS256", argLength: 1, reg: fp11, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VCMPPS256", argLength: 2, reg: fp21, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Vec256", resultInArg0: false},
+               {name: "VEXTRACTF128128", argLength: 1, reg: fp11, asm: "VEXTRACTF128", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VRNDSCALEPSMasked256", argLength: 2, reg: fpkfp, asm: "VRNDSCALEPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VREDUCEPSMasked256", argLength: 2, reg: fpkfp, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VCMPPSMasked256", argLength: 3, reg: fp2kk, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
@@ -878,6 +879,7 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp,
                {name: "VPCMPB128", argLength: 2, reg: fp2k, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VPCMPBMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                {name: "VPINSRB128", argLength: 2, reg: fpgpfp, asm: "VPINSRB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VEXTRACTI128128", argLength: 1, reg: fp11, asm: "VEXTRACTI128", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPCMPB256", argLength: 2, reg: fp2k, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VPCMPBMasked256", argLength: 3, reg: fp2kk, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                {name: "VINSERTI128256", argLength: 2, reg: fp21, asm: "VINSERTI128", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
index 076a16ebda611024b3fafd993816aa824d16cf8f..c74893b97a210f47b78193ca87311a681b812dc2 100644 (file)
@@ -1502,6 +1502,7 @@ func simdGenericOps() []opData {
                {name: "DiffWithRoundWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
                {name: "DiffWithTruncWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
                {name: "FloorWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "Get128Float32x8", argLength: 1, commutative: false, aux: "Int8"},
                {name: "MaskedCeilWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
                {name: "MaskedDiffWithCeilWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
                {name: "MaskedDiffWithFloorWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
@@ -1535,6 +1536,7 @@ func simdGenericOps() []opData {
                {name: "DiffWithRoundWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
                {name: "DiffWithTruncWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
                {name: "FloorWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "Get128Float64x4", argLength: 1, commutative: false, aux: "Int8"},
                {name: "MaskedCeilWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
                {name: "MaskedDiffWithCeilWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
                {name: "MaskedDiffWithFloorWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
@@ -1562,6 +1564,7 @@ func simdGenericOps() []opData {
                {name: "MaskedTruncWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
                {name: "RoundWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
                {name: "TruncWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "Get128Int16x16", argLength: 1, commutative: false, aux: "Int8"},
                {name: "MaskedShiftAllLeftAndFillUpperFromInt16x16", argLength: 3, commutative: false, aux: "Int8"},
                {name: "MaskedShiftAllRightAndFillUpperFromInt16x16", argLength: 3, commutative: false, aux: "Int8"},
                {name: "Set128Int16x16", argLength: 2, commutative: false, aux: "Int8"},
@@ -1595,6 +1598,7 @@ func simdGenericOps() []opData {
                {name: "SetElemInt32x4", argLength: 2, commutative: false, aux: "Int8"},
                {name: "ShiftAllLeftAndFillUpperFromInt32x4", argLength: 2, commutative: false, aux: "Int8"},
                {name: "ShiftAllRightAndFillUpperFromInt32x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "Get128Int32x8", argLength: 1, commutative: false, aux: "Int8"},
                {name: "MaskedRotateAllLeftInt32x8", argLength: 2, commutative: false, aux: "Int8"},
                {name: "MaskedRotateAllRightInt32x8", argLength: 2, commutative: false, aux: "Int8"},
                {name: "MaskedShiftAllLeftAndFillUpperFromInt32x8", argLength: 3, commutative: false, aux: "Int8"},
@@ -1614,6 +1618,7 @@ func simdGenericOps() []opData {
                {name: "SetElemInt64x2", argLength: 2, commutative: false, aux: "Int8"},
                {name: "ShiftAllLeftAndFillUpperFromInt64x2", argLength: 2, commutative: false, aux: "Int8"},
                {name: "ShiftAllRightAndFillUpperFromInt64x2", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "Get128Int64x4", argLength: 1, commutative: false, aux: "Int8"},
                {name: "MaskedRotateAllLeftInt64x4", argLength: 2, commutative: false, aux: "Int8"},
                {name: "MaskedRotateAllRightInt64x4", argLength: 2, commutative: false, aux: "Int8"},
                {name: "MaskedShiftAllLeftAndFillUpperFromInt64x4", argLength: 3, commutative: false, aux: "Int8"},
@@ -1633,7 +1638,9 @@ func simdGenericOps() []opData {
                {name: "ShiftAllRightAndFillUpperFromInt64x8", argLength: 2, commutative: false, aux: "Int8"},
                {name: "GetElemInt8x16", argLength: 1, commutative: false, aux: "Int8"},
                {name: "SetElemInt8x16", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "Get128Int8x32", argLength: 1, commutative: false, aux: "Int8"},
                {name: "Set128Int8x32", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "Get128Uint16x16", argLength: 1, commutative: false, aux: "Int8"},
                {name: "MaskedShiftAllLeftAndFillUpperFromUint16x16", argLength: 3, commutative: false, aux: "Int8"},
                {name: "MaskedShiftAllRightAndFillUpperFromUint16x16", argLength: 3, commutative: false, aux: "Int8"},
                {name: "Set128Uint16x16", argLength: 2, commutative: false, aux: "Int8"},
@@ -1667,6 +1674,7 @@ func simdGenericOps() []opData {
                {name: "SetElemUint32x4", argLength: 2, commutative: false, aux: "Int8"},
                {name: "ShiftAllLeftAndFillUpperFromUint32x4", argLength: 2, commutative: false, aux: "Int8"},
                {name: "ShiftAllRightAndFillUpperFromUint32x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "Get128Uint32x8", argLength: 1, commutative: false, aux: "Int8"},
                {name: "MaskedRotateAllLeftUint32x8", argLength: 2, commutative: false, aux: "Int8"},
                {name: "MaskedRotateAllRightUint32x8", argLength: 2, commutative: false, aux: "Int8"},
                {name: "MaskedShiftAllLeftAndFillUpperFromUint32x8", argLength: 3, commutative: false, aux: "Int8"},
@@ -1686,6 +1694,7 @@ func simdGenericOps() []opData {
                {name: "SetElemUint64x2", argLength: 2, commutative: false, aux: "Int8"},
                {name: "ShiftAllLeftAndFillUpperFromUint64x2", argLength: 2, commutative: false, aux: "Int8"},
                {name: "ShiftAllRightAndFillUpperFromUint64x2", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "Get128Uint64x4", argLength: 1, commutative: false, aux: "Int8"},
                {name: "MaskedRotateAllLeftUint64x4", argLength: 2, commutative: false, aux: "Int8"},
                {name: "MaskedRotateAllRightUint64x4", argLength: 2, commutative: false, aux: "Int8"},
                {name: "MaskedShiftAllLeftAndFillUpperFromUint64x4", argLength: 3, commutative: false, aux: "Int8"},
@@ -1711,6 +1720,7 @@ func simdGenericOps() []opData {
                {name: "SetElemUint8x16", argLength: 2, commutative: false, aux: "Int8"},
                {name: "GaloisFieldAffineTransformUint8x32", argLength: 2, commutative: false, aux: "Int8"},
                {name: "GaloisFieldAffineTransformInversedUint8x32", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "Get128Uint8x32", argLength: 1, commutative: false, aux: "Int8"},
                {name: "MaskedGaloisFieldAffineTransformUint8x32", argLength: 3, commutative: false, aux: "Int8"},
                {name: "MaskedGaloisFieldAffineTransformInversedUint8x32", argLength: 3, commutative: false, aux: "Int8"},
                {name: "Set128Uint8x32", argLength: 2, commutative: false, aux: "Int8"},
index ece791ca6cea71e10921ce0a27544684a6a38d99..91380e5e089e313675d084a14bf3fbfa278f1030 100644 (file)
@@ -1958,6 +1958,7 @@ const (
        OpAMD64VRNDSCALEPS256
        OpAMD64VREDUCEPS256
        OpAMD64VCMPPS256
+       OpAMD64VEXTRACTF128128
        OpAMD64VRNDSCALEPSMasked256
        OpAMD64VREDUCEPSMasked256
        OpAMD64VCMPPSMasked256
@@ -2071,6 +2072,7 @@ const (
        OpAMD64VPCMPB128
        OpAMD64VPCMPBMasked128
        OpAMD64VPINSRB128
+       OpAMD64VEXTRACTI128128
        OpAMD64VPCMPB256
        OpAMD64VPCMPBMasked256
        OpAMD64VINSERTI128256
@@ -5837,6 +5839,7 @@ const (
        OpDiffWithRoundWithPrecisionFloat32x8
        OpDiffWithTruncWithPrecisionFloat32x8
        OpFloorWithPrecisionFloat32x8
+       OpGet128Float32x8
        OpMaskedCeilWithPrecisionFloat32x8
        OpMaskedDiffWithCeilWithPrecisionFloat32x8
        OpMaskedDiffWithFloorWithPrecisionFloat32x8
@@ -5870,6 +5873,7 @@ const (
        OpDiffWithRoundWithPrecisionFloat64x4
        OpDiffWithTruncWithPrecisionFloat64x4
        OpFloorWithPrecisionFloat64x4
+       OpGet128Float64x4
        OpMaskedCeilWithPrecisionFloat64x4
        OpMaskedDiffWithCeilWithPrecisionFloat64x4
        OpMaskedDiffWithFloorWithPrecisionFloat64x4
@@ -5897,6 +5901,7 @@ const (
        OpMaskedTruncWithPrecisionFloat64x8
        OpRoundWithPrecisionFloat64x8
        OpTruncWithPrecisionFloat64x8
+       OpGet128Int16x16
        OpMaskedShiftAllLeftAndFillUpperFromInt16x16
        OpMaskedShiftAllRightAndFillUpperFromInt16x16
        OpSet128Int16x16
@@ -5930,6 +5935,7 @@ const (
        OpSetElemInt32x4
        OpShiftAllLeftAndFillUpperFromInt32x4
        OpShiftAllRightAndFillUpperFromInt32x4
+       OpGet128Int32x8
        OpMaskedRotateAllLeftInt32x8
        OpMaskedRotateAllRightInt32x8
        OpMaskedShiftAllLeftAndFillUpperFromInt32x8
@@ -5949,6 +5955,7 @@ const (
        OpSetElemInt64x2
        OpShiftAllLeftAndFillUpperFromInt64x2
        OpShiftAllRightAndFillUpperFromInt64x2
+       OpGet128Int64x4
        OpMaskedRotateAllLeftInt64x4
        OpMaskedRotateAllRightInt64x4
        OpMaskedShiftAllLeftAndFillUpperFromInt64x4
@@ -5968,7 +5975,9 @@ const (
        OpShiftAllRightAndFillUpperFromInt64x8
        OpGetElemInt8x16
        OpSetElemInt8x16
+       OpGet128Int8x32
        OpSet128Int8x32
+       OpGet128Uint16x16
        OpMaskedShiftAllLeftAndFillUpperFromUint16x16
        OpMaskedShiftAllRightAndFillUpperFromUint16x16
        OpSet128Uint16x16
@@ -6002,6 +6011,7 @@ const (
        OpSetElemUint32x4
        OpShiftAllLeftAndFillUpperFromUint32x4
        OpShiftAllRightAndFillUpperFromUint32x4
+       OpGet128Uint32x8
        OpMaskedRotateAllLeftUint32x8
        OpMaskedRotateAllRightUint32x8
        OpMaskedShiftAllLeftAndFillUpperFromUint32x8
@@ -6021,6 +6031,7 @@ const (
        OpSetElemUint64x2
        OpShiftAllLeftAndFillUpperFromUint64x2
        OpShiftAllRightAndFillUpperFromUint64x2
+       OpGet128Uint64x4
        OpMaskedRotateAllLeftUint64x4
        OpMaskedRotateAllRightUint64x4
        OpMaskedShiftAllLeftAndFillUpperFromUint64x4
@@ -6046,6 +6057,7 @@ const (
        OpSetElemUint8x16
        OpGaloisFieldAffineTransformUint8x32
        OpGaloisFieldAffineTransformInversedUint8x32
+       OpGet128Uint8x32
        OpMaskedGaloisFieldAffineTransformUint8x32
        OpMaskedGaloisFieldAffineTransformInversedUint8x32
        OpSet128Uint8x32
@@ -30096,6 +30108,20 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:    "VEXTRACTF128128",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVEXTRACTF128,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
        {
                name:    "VRNDSCALEPSMasked256",
                auxType: auxInt8,
@@ -31820,6 +31846,20 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:    "VEXTRACTI128128",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVEXTRACTI128,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
        {
                name:    "VPCMPB256",
                auxType: auxInt8,
@@ -67706,6 +67746,12 @@ var opcodeTable = [...]opInfo{
                argLen:  1,
                generic: true,
        },
+       {
+               name:    "Get128Float32x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
        {
                name:    "MaskedCeilWithPrecisionFloat32x8",
                auxType: auxInt8,
@@ -67904,6 +67950,12 @@ var opcodeTable = [...]opInfo{
                argLen:  1,
                generic: true,
        },
+       {
+               name:    "Get128Float64x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
        {
                name:    "MaskedCeilWithPrecisionFloat64x4",
                auxType: auxInt8,
@@ -68066,6 +68118,12 @@ var opcodeTable = [...]opInfo{
                argLen:  1,
                generic: true,
        },
+       {
+               name:    "Get128Int16x16",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
        {
                name:    "MaskedShiftAllLeftAndFillUpperFromInt16x16",
                auxType: auxInt8,
@@ -68264,6 +68322,12 @@ var opcodeTable = [...]opInfo{
                argLen:  2,
                generic: true,
        },
+       {
+               name:    "Get128Int32x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
        {
                name:    "MaskedRotateAllLeftInt32x8",
                auxType: auxInt8,
@@ -68378,6 +68442,12 @@ var opcodeTable = [...]opInfo{
                argLen:  2,
                generic: true,
        },
+       {
+               name:    "Get128Int64x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
        {
                name:    "MaskedRotateAllLeftInt64x4",
                auxType: auxInt8,
@@ -68492,12 +68562,24 @@ var opcodeTable = [...]opInfo{
                argLen:  2,
                generic: true,
        },
+       {
+               name:    "Get128Int8x32",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
        {
                name:    "Set128Int8x32",
                auxType: auxInt8,
                argLen:  2,
                generic: true,
        },
+       {
+               name:    "Get128Uint16x16",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
        {
                name:    "MaskedShiftAllLeftAndFillUpperFromUint16x16",
                auxType: auxInt8,
@@ -68696,6 +68778,12 @@ var opcodeTable = [...]opInfo{
                argLen:  2,
                generic: true,
        },
+       {
+               name:    "Get128Uint32x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
        {
                name:    "MaskedRotateAllLeftUint32x8",
                auxType: auxInt8,
@@ -68810,6 +68898,12 @@ var opcodeTable = [...]opInfo{
                argLen:  2,
                generic: true,
        },
+       {
+               name:    "Get128Uint64x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
        {
                name:    "MaskedRotateAllLeftUint64x4",
                auxType: auxInt8,
@@ -68960,6 +69054,12 @@ var opcodeTable = [...]opInfo{
                argLen:  2,
                generic: true,
        },
+       {
+               name:    "Get128Uint8x32",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
        {
                name:    "MaskedGaloisFieldAffineTransformUint8x32",
                auxType: auxInt8,
index 5c1872dcdfd313216a3dbf1b175c890f76be4768..1cf23c4ec5b0e49f3bb43dca1b3e3648ba78802f 100644 (file)
@@ -1388,6 +1388,26 @@ func rewriteValueAMD64(v *Value) bool {
        case OpGaloisFieldMulUint8x64:
                v.Op = OpAMD64VGF2P8MULB512
                return true
+       case OpGet128Float32x8:
+               return rewriteValueAMD64_OpGet128Float32x8(v)
+       case OpGet128Float64x4:
+               return rewriteValueAMD64_OpGet128Float64x4(v)
+       case OpGet128Int16x16:
+               return rewriteValueAMD64_OpGet128Int16x16(v)
+       case OpGet128Int32x8:
+               return rewriteValueAMD64_OpGet128Int32x8(v)
+       case OpGet128Int64x4:
+               return rewriteValueAMD64_OpGet128Int64x4(v)
+       case OpGet128Int8x32:
+               return rewriteValueAMD64_OpGet128Int8x32(v)
+       case OpGet128Uint16x16:
+               return rewriteValueAMD64_OpGet128Uint16x16(v)
+       case OpGet128Uint32x8:
+               return rewriteValueAMD64_OpGet128Uint32x8(v)
+       case OpGet128Uint64x4:
+               return rewriteValueAMD64_OpGet128Uint64x4(v)
+       case OpGet128Uint8x32:
+               return rewriteValueAMD64_OpGet128Uint8x32(v)
        case OpGetCallerPC:
                v.Op = OpAMD64LoweredGetCallerPC
                return true
@@ -30999,6 +31019,136 @@ func rewriteValueAMD64_OpGaloisFieldAffineTransformUint8x64(v *Value) bool {
                return true
        }
 }
+func rewriteValueAMD64_OpGet128Float32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (Get128Float32x8 [a] x)
+       // result: (VEXTRACTF128128 [a] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VEXTRACTF128128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGet128Float64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (Get128Float64x4 [a] x)
+       // result: (VEXTRACTF128128 [a] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VEXTRACTF128128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGet128Int16x16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (Get128Int16x16 [a] x)
+       // result: (VEXTRACTI128128 [a] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGet128Int32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (Get128Int32x8 [a] x)
+       // result: (VEXTRACTI128128 [a] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGet128Int64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (Get128Int64x4 [a] x)
+       // result: (VEXTRACTI128128 [a] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGet128Int8x32(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (Get128Int8x32 [a] x)
+       // result: (VEXTRACTI128128 [a] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGet128Uint16x16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (Get128Uint16x16 [a] x)
+       // result: (VEXTRACTI128128 [a] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGet128Uint32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (Get128Uint32x8 [a] x)
+       // result: (VEXTRACTI128128 [a] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGet128Uint64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (Get128Uint64x4 [a] x)
+       // result: (VEXTRACTI128128 [a] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGet128Uint8x32(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (Get128Uint8x32 [a] x)
+       // result: (VEXTRACTI128128 [a] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg(x)
+               return true
+       }
+}
 func rewriteValueAMD64_OpGetElemInt16x8(v *Value) bool {
        v_0 := v.Args[0]
        // match: (GetElemInt16x8 [a] x)
index 3d0e6fbd4aa75f7ebee1801596bda04eaa5abb22..27aad1cc0c449d4469e95eb0fbd4db27a286728a 100644 (file)
@@ -235,6 +235,16 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint8x16.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x16, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint8x32.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x32, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint8x64.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x8.Get128", opLen1Imm8(ssa.OpGet128Float32x8, types.TypeVec128, 0), sys.AMD64)
+       addF(simdPackage, "Float64x4.Get128", opLen1Imm8(ssa.OpGet128Float64x4, types.TypeVec128, 0), sys.AMD64)
+       addF(simdPackage, "Int8x32.Get128", opLen1Imm8(ssa.OpGet128Int8x32, types.TypeVec128, 0), sys.AMD64)
+       addF(simdPackage, "Int16x16.Get128", opLen1Imm8(ssa.OpGet128Int16x16, types.TypeVec128, 0), sys.AMD64)
+       addF(simdPackage, "Int32x8.Get128", opLen1Imm8(ssa.OpGet128Int32x8, types.TypeVec128, 0), sys.AMD64)
+       addF(simdPackage, "Int64x4.Get128", opLen1Imm8(ssa.OpGet128Int64x4, types.TypeVec128, 0), sys.AMD64)
+       addF(simdPackage, "Uint8x32.Get128", opLen1Imm8(ssa.OpGet128Uint8x32, types.TypeVec128, 0), sys.AMD64)
+       addF(simdPackage, "Uint16x16.Get128", opLen1Imm8(ssa.OpGet128Uint16x16, types.TypeVec128, 0), sys.AMD64)
+       addF(simdPackage, "Uint32x8.Get128", opLen1Imm8(ssa.OpGet128Uint32x8, types.TypeVec128, 0), sys.AMD64)
+       addF(simdPackage, "Uint64x4.Get128", opLen1Imm8(ssa.OpGet128Uint64x4, types.TypeVec128, 0), sys.AMD64)
        addF(simdPackage, "Int8x16.GetElem", opLen1Imm8(ssa.OpGetElemInt8x16, types.Types[types.TINT8], 0), sys.AMD64)
        addF(simdPackage, "Int16x8.GetElem", opLen1Imm8(ssa.OpGetElemInt16x8, types.Types[types.TINT16], 0), sys.AMD64)
        addF(simdPackage, "Int32x4.GetElem", opLen1Imm8(ssa.OpGetElemInt32x4, types.Types[types.TINT32], 0), sys.AMD64)
index f99938bb9d29e96a0e008e2a57c0bc603499dac4..1b47d2770ccd61be3ad1eb66dc530258a81127f3 100644 (file)
@@ -161,6 +161,22 @@ func checkInt8Slices(t *testing.T, a, b []int8) {
        }
 }
 
+func checkFloat32Slices(t *testing.T, a, b []float32) {
+       for i := range b {
+               if a[i] != b[i] {
+                       t.Errorf("a and b differ at index %d, a=%3.0f, b=%3.0f", i, a[i], b[i])
+               }
+       }
+}
+
+func checkFloat64Slices(t *testing.T, a, b []float64) {
+       for i := range b {
+               if a[i] != b[i] {
+                       t.Errorf("a and b differ at index %d, a=%3.0f, b=%3.0f", i, a[i], b[i])
+               }
+       }
+}
+
 func TestSlicesInt8(t *testing.T) {
        a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
                17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
@@ -209,6 +225,78 @@ func TestSlicesInt8Set128(t *testing.T) {
        checkInt8Slices(t, a, b[16:])
 }
 
+func TestSlicesInt8Get128(t *testing.T) {
+       a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+               17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
+       u := simd.LoadInt8x32Slice(a) // 1-32
+       v := u.Get128(0)              // 1-16
+       w := u.Get128(1)              // 17-32
+
+       b := make([]int8, 32, 32)
+       v.StoreSlice(b[:16])
+       w.StoreSlice(b[16:])
+
+       checkInt8Slices(t, a, b)
+}
+
+func TestSlicesFloat32Set128(t *testing.T) {
+       a := []float32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+               17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
+       v := simd.LoadFloat32x4Slice(a) // 1-4
+       u := simd.LoadFloat32x8Slice(a) // 1-4
+
+       w := u.Set128(1, v) // 1-4:1-4
+
+       b := make([]float32, 8, 8)
+       w.StoreSlice(b)
+
+       checkFloat32Slices(t, a, b[:4])
+       checkFloat32Slices(t, a, b[4:])
+}
+
+func TestSlicesFloat32Get128(t *testing.T) {
+       a := []float32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+               17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
+       u := simd.LoadFloat32x8Slice(a) // 1-8
+       v := u.Get128(0)                // 1-4
+       w := u.Get128(1)                // 5-8
+
+       b := make([]float32, 8, 8)
+       v.StoreSlice(b[:4])
+       w.StoreSlice(b[4:])
+
+       checkFloat32Slices(t, a, b)
+}
+
+func TestSlicesFloat64Set128(t *testing.T) {
+       a := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+               17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
+       v := simd.LoadFloat64x2Slice(a) // 1-2
+       u := simd.LoadFloat64x4Slice(a) // 1-2
+
+       w := u.Set128(1, v) // 1-2:1-2
+
+       b := make([]float64, 4, 4)
+       w.StoreSlice(b)
+
+       checkFloat64Slices(t, a, b[:2])
+       checkFloat64Slices(t, a, b[2:])
+}
+
+func TestSlicesFloat64Get128(t *testing.T) {
+       a := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+               17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
+       u := simd.LoadFloat64x4Slice(a) // 1-4
+       v := u.Get128(0)                // 1-2
+       w := u.Get128(1)                // 3-4
+
+       b := make([]float64, 4, 4)
+       v.StoreSlice(b[:2])
+       w.StoreSlice(b[2:])
+
+       checkFloat64Slices(t, a, b)
+}
+
 func TestSlicesInt8TooShortLoad(t *testing.T) {
        defer func() {
                if r := recover(); r != nil {
index 4a8c0957e5b37e9f753dcadf39f3066de996e9b9..b3f18b383772b697b82b69b56e4d480e7d85cb99 100644 (file)
@@ -7954,6 +7954,7 @@ func testUint64x8UnaryMasked(t *testing.T, v0 []uint64, v1 []int64, want []uint6
 // FloorWithPrecision
 // GaloisFieldAffineTransform
 // GaloisFieldAffineTransformInversed
+// Get128
 // GetElem
 // MaskedCeilWithPrecision
 // MaskedDiffWithCeilWithPrecision
index de54a9ada48cb377fe1be855e66b5ecaa05cf31f..3453843d0f7991402daac869414c51bac08e3bdb 100644 (file)
@@ -1198,6 +1198,58 @@ func (x Uint8x32) GaloisFieldMul(y Uint8x32) Uint8x32
 // Asm: VGF2P8MULB, CPU Feature: AVX512EVEX
 func (x Uint8x64) GaloisFieldMul(y Uint8x64) Uint8x64
 
+/* Get128 */
+
+// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+//
+// Asm: VEXTRACTF128, CPU Feature: AVX
+func (x Float32x8) Get128(imm uint8) Float32x4
+
+// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+//
+// Asm: VEXTRACTF128, CPU Feature: AVX
+func (x Float64x4) Get128(imm uint8) Float64x2
+
+// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+//
+// Asm: VEXTRACTI128, CPU Feature: AVX2
+func (x Int8x32) Get128(imm uint8) Int8x16
+
+// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+//
+// Asm: VEXTRACTI128, CPU Feature: AVX2
+func (x Int16x16) Get128(imm uint8) Int16x8
+
+// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+//
+// Asm: VEXTRACTI128, CPU Feature: AVX2
+func (x Int32x8) Get128(imm uint8) Int32x4
+
+// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+//
+// Asm: VEXTRACTI128, CPU Feature: AVX2
+func (x Int64x4) Get128(imm uint8) Int64x2
+
+// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+//
+// Asm: VEXTRACTI128, CPU Feature: AVX2
+func (x Uint8x32) Get128(imm uint8) Uint8x16
+
+// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+//
+// Asm: VEXTRACTI128, CPU Feature: AVX2
+func (x Uint16x16) Get128(imm uint8) Uint16x8
+
+// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+//
+// Asm: VEXTRACTI128, CPU Feature: AVX2
+func (x Uint32x8) Get128(imm uint8) Uint32x4
+
+// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+//
+// Asm: VEXTRACTI128, CPU Feature: AVX2
+func (x Uint64x4) Get128(imm uint8) Uint64x2
+
 /* GetElem */
 
 // GetElem retrieves a single constant-indexed element's value.