]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.simd] cmd/compile: add simd VPEXTRA*
authorDavid Chase <drchase@google.com>
Tue, 24 Jun 2025 22:29:38 +0000 (18:29 -0400)
committerDavid Chase <drchase@google.com>
Thu, 26 Jun 2025 20:34:29 +0000 (13:34 -0700)
This CL is generated by simdgen CL 683836
and this CL should be submitted after its
generator.

Change-Id: I1aa893b185826ad1f9fb60b85c75eda31f70623b
Reviewed-on: https://go-review.googlesource.com/c/go/+/683797
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
src/cmd/compile/internal/amd64/simdssa.go
src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
src/cmd/compile/internal/ssagen/simdintrinsics.go
src/simd/simd_test.go
src/simd/stubs_amd64.go

index 9364722c3a788db783b043cf95d1c22cefeffaea..5297680357844bcf4ba14774eb1fd79acefab034 100644 (file)
@@ -724,6 +724,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPINSRQ128:
                p = simdFpgpfpImm8(s, v)
 
+       case ssa.OpAMD64VPEXTRB128,
+               ssa.OpAMD64VPEXTRW128,
+               ssa.OpAMD64VPEXTRD128,
+               ssa.OpAMD64VPEXTRQ128:
+               p = simdFpgpImm8(s, v)
+
        default:
                // Unknown reg shape
                return false
index 615686166d1b4dadd357e7a3ac361bbe08180523..bb0476fc20c3e4d25c4383f773de2f6ba9efae33 100644 (file)
 (FusedMultiplySubAddFloat64x2 ...) => (VFMSUBADD213PD128 ...)
 (FusedMultiplySubAddFloat64x4 ...) => (VFMSUBADD213PD256 ...)
 (FusedMultiplySubAddFloat64x8 ...) => (VFMSUBADD213PD512 ...)
+(GetElemInt16x8 [a] x) => (VPEXTRW128 [a] x)
+(GetElemInt32x4 [a] x) => (VPEXTRD128 [a] x)
+(GetElemInt64x2 [a] x) => (VPEXTRQ128 [a] x)
+(GetElemInt8x16 [a] x) => (VPEXTRB128 [a] x)
+(GetElemUint16x8 [a] x) => (VPEXTRW128 [a] x)
+(GetElemUint32x4 [a] x) => (VPEXTRD128 [a] x)
+(GetElemUint64x2 [a] x) => (VPEXTRQ128 [a] x)
+(GetElemUint8x16 [a] x) => (VPEXTRB128 [a] x)
 (GreaterFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [6] x y))
 (GreaterFloat32x4 x y) => (VCMPPS128 [6] x y)
 (GreaterFloat32x8 x y) => (VCMPPS256 [6] x y)
index 88d90c2f85aadbc3406612c0fefdf5f002bcbdd1..93b136230d077868b3fd713693036fac6168513c 100644 (file)
@@ -643,16 +643,19 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp,
                {name: "VPCMPWMasked256", argLength: 3, reg: fp2kk, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                {name: "VPCMPW512", argLength: 2, reg: fp2k, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                {name: "VPCMPWMasked512", argLength: 3, reg: fp2kk, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+               {name: "VPEXTRW128", argLength: 1, reg: fpgp, asm: "VPEXTRW", aux: "Int8", commutative: false, typ: "int16", resultInArg0: false},
                {name: "VPCMPW128", argLength: 2, reg: fp2k, asm: "VPCMPW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VPCMPWMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                {name: "VPINSRW128", argLength: 2, reg: fpgpfp, asm: "VPINSRW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPCMPD512", argLength: 2, reg: fp2k, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                {name: "VPCMPDMasked512", argLength: 3, reg: fp2kk, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+               {name: "VPEXTRD128", argLength: 1, reg: fpgp, asm: "VPEXTRD", aux: "Int8", commutative: false, typ: "int32", resultInArg0: false},
                {name: "VPCMPD128", argLength: 2, reg: fp2k, asm: "VPCMPD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VPCMPDMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                {name: "VPINSRD128", argLength: 2, reg: fpgpfp, asm: "VPINSRD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPCMPD256", argLength: 2, reg: fp2k, asm: "VPCMPD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VPCMPDMasked256", argLength: 3, reg: fp2kk, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+               {name: "VPEXTRQ128", argLength: 1, reg: fpgp, asm: "VPEXTRQ", aux: "Int8", commutative: false, typ: "int64", resultInArg0: false},
                {name: "VPCMPQ128", argLength: 2, reg: fp2k, asm: "VPCMPQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VPCMPQMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                {name: "VPINSRQ128", argLength: 2, reg: fpgpfp, asm: "VPINSRQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
@@ -660,6 +663,7 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp,
                {name: "VPCMPQMasked256", argLength: 3, reg: fp2kk, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                {name: "VPCMPQ512", argLength: 2, reg: fp2k, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                {name: "VPCMPQMasked512", argLength: 3, reg: fp2kk, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+               {name: "VPEXTRB128", argLength: 1, reg: fpgp, asm: "VPEXTRB", aux: "Int8", commutative: false, typ: "int8", resultInArg0: false},
                {name: "VPCMPB128", argLength: 2, reg: fp2k, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VPCMPBMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                {name: "VPINSRB128", argLength: 2, reg: fpgpfp, asm: "VPINSRB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
index ca196cd9e19079de6594cd9c1c654fd19675face..1c33483f4242f045d110436a29c17f3ce5b44f8d 100644 (file)
@@ -1372,13 +1372,21 @@ func simdGenericOps() []opData {
                {name: "RoundWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
                {name: "TruncSuppressExceptionWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
                {name: "TruncWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "GetElemInt16x8", argLength: 1, commutative: false, aux: "Int8"},
                {name: "SetElemInt16x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "GetElemInt32x4", argLength: 1, commutative: false, aux: "Int8"},
                {name: "SetElemInt32x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "GetElemInt64x2", argLength: 1, commutative: false, aux: "Int8"},
                {name: "SetElemInt64x2", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "GetElemInt8x16", argLength: 1, commutative: false, aux: "Int8"},
                {name: "SetElemInt8x16", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "GetElemUint16x8", argLength: 1, commutative: false, aux: "Int8"},
                {name: "SetElemUint16x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "GetElemUint32x4", argLength: 1, commutative: false, aux: "Int8"},
                {name: "SetElemUint32x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "GetElemUint64x2", argLength: 1, commutative: false, aux: "Int8"},
                {name: "SetElemUint64x2", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "GetElemUint8x16", argLength: 1, commutative: false, aux: "Int8"},
                {name: "SetElemUint8x16", argLength: 2, commutative: false, aux: "Int8"},
        }
 }
index 121727e1f6b0033b4862321bfac760b5b9cd0604..7a1126d433f93c19a622297e505f238f08ea090f 100644 (file)
@@ -1836,16 +1836,19 @@ const (
        OpAMD64VPCMPWMasked256
        OpAMD64VPCMPW512
        OpAMD64VPCMPWMasked512
+       OpAMD64VPEXTRW128
        OpAMD64VPCMPW128
        OpAMD64VPCMPWMasked128
        OpAMD64VPINSRW128
        OpAMD64VPCMPD512
        OpAMD64VPCMPDMasked512
+       OpAMD64VPEXTRD128
        OpAMD64VPCMPD128
        OpAMD64VPCMPDMasked128
        OpAMD64VPINSRD128
        OpAMD64VPCMPD256
        OpAMD64VPCMPDMasked256
+       OpAMD64VPEXTRQ128
        OpAMD64VPCMPQ128
        OpAMD64VPCMPQMasked128
        OpAMD64VPINSRQ128
@@ -1853,6 +1856,7 @@ const (
        OpAMD64VPCMPQMasked256
        OpAMD64VPCMPQ512
        OpAMD64VPCMPQMasked512
+       OpAMD64VPEXTRB128
        OpAMD64VPCMPB128
        OpAMD64VPCMPBMasked128
        OpAMD64VPINSRB128
@@ -5479,13 +5483,21 @@ const (
        OpRoundWithPrecisionFloat64x8
        OpTruncSuppressExceptionWithPrecisionFloat64x8
        OpTruncWithPrecisionFloat64x8
+       OpGetElemInt16x8
        OpSetElemInt16x8
+       OpGetElemInt32x4
        OpSetElemInt32x4
+       OpGetElemInt64x2
        OpSetElemInt64x2
+       OpGetElemInt8x16
        OpSetElemInt8x16
+       OpGetElemUint16x8
        OpSetElemUint16x8
+       OpGetElemUint32x4
        OpSetElemUint32x4
+       OpGetElemUint64x2
        OpSetElemUint64x2
+       OpGetElemUint8x16
        OpSetElemUint8x16
 )
 
@@ -27718,6 +27730,20 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:    "VPEXTRW128",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVPEXTRW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+               },
+       },
        {
                name:    "VPCMPW128",
                auxType: auxInt8,
@@ -27798,6 +27824,20 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:    "VPEXTRD128",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVPEXTRD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+               },
+       },
        {
                name:    "VPCMPD128",
                auxType: auxInt8,
@@ -27877,6 +27917,20 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:    "VPEXTRQ128",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVPEXTRQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+               },
+       },
        {
                name:    "VPCMPQ128",
                auxType: auxInt8,
@@ -27989,6 +28043,20 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:    "VPEXTRB128",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVPEXTRB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                       },
+               },
+       },
        {
                name:    "VPCMPB128",
                auxType: auxInt8,
@@ -63225,48 +63293,96 @@ var opcodeTable = [...]opInfo{
                argLen:  1,
                generic: true,
        },
+       {
+               name:    "GetElemInt16x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
        {
                name:    "SetElemInt16x8",
                auxType: auxInt8,
                argLen:  2,
                generic: true,
        },
+       {
+               name:    "GetElemInt32x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
        {
                name:    "SetElemInt32x4",
                auxType: auxInt8,
                argLen:  2,
                generic: true,
        },
+       {
+               name:    "GetElemInt64x2",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
        {
                name:    "SetElemInt64x2",
                auxType: auxInt8,
                argLen:  2,
                generic: true,
        },
+       {
+               name:    "GetElemInt8x16",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
        {
                name:    "SetElemInt8x16",
                auxType: auxInt8,
                argLen:  2,
                generic: true,
        },
+       {
+               name:    "GetElemUint16x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
        {
                name:    "SetElemUint16x8",
                auxType: auxInt8,
                argLen:  2,
                generic: true,
        },
+       {
+               name:    "GetElemUint32x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
        {
                name:    "SetElemUint32x4",
                auxType: auxInt8,
                argLen:  2,
                generic: true,
        },
+       {
+               name:    "GetElemUint64x2",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
        {
                name:    "SetElemUint64x2",
                auxType: auxInt8,
                argLen:  2,
                generic: true,
        },
+       {
+               name:    "GetElemUint8x16",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
        {
                name:    "SetElemUint8x16",
                auxType: auxInt8,
index 7ac8c22e879359b35885e914732aeb0ceea466a6..668024a00fb52b2ff968ca6d3124b52cff96d41b 100644 (file)
@@ -1448,6 +1448,22 @@ func rewriteValueAMD64(v *Value) bool {
        case OpGetClosurePtr:
                v.Op = OpAMD64LoweredGetClosurePtr
                return true
+       case OpGetElemInt16x8:
+               return rewriteValueAMD64_OpGetElemInt16x8(v)
+       case OpGetElemInt32x4:
+               return rewriteValueAMD64_OpGetElemInt32x4(v)
+       case OpGetElemInt64x2:
+               return rewriteValueAMD64_OpGetElemInt64x2(v)
+       case OpGetElemInt8x16:
+               return rewriteValueAMD64_OpGetElemInt8x16(v)
+       case OpGetElemUint16x8:
+               return rewriteValueAMD64_OpGetElemUint16x8(v)
+       case OpGetElemUint32x4:
+               return rewriteValueAMD64_OpGetElemUint32x4(v)
+       case OpGetElemUint64x2:
+               return rewriteValueAMD64_OpGetElemUint64x2(v)
+       case OpGetElemUint8x16:
+               return rewriteValueAMD64_OpGetElemUint8x16(v)
        case OpGetG:
                return rewriteValueAMD64_OpGetG(v)
        case OpGreaterEqualFloat32x16:
@@ -30549,6 +30565,110 @@ func rewriteValueAMD64_OpFloorWithPrecisionFloat64x8(v *Value) bool {
                return true
        }
 }
+func rewriteValueAMD64_OpGetElemInt16x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetElemInt16x8 [a] x)
+       // result: (VPEXTRW128 [a] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VPEXTRW128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetElemInt32x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetElemInt32x4 [a] x)
+       // result: (VPEXTRD128 [a] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VPEXTRD128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetElemInt64x2(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetElemInt64x2 [a] x)
+       // result: (VPEXTRQ128 [a] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VPEXTRQ128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetElemInt8x16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetElemInt8x16 [a] x)
+       // result: (VPEXTRB128 [a] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VPEXTRB128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetElemUint16x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetElemUint16x8 [a] x)
+       // result: (VPEXTRW128 [a] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VPEXTRW128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetElemUint32x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetElemUint32x4 [a] x)
+       // result: (VPEXTRD128 [a] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VPEXTRD128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetElemUint64x2(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetElemUint64x2 [a] x)
+       // result: (VPEXTRQ128 [a] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VPEXTRQ128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGetElemUint8x16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (GetElemUint8x16 [a] x)
+       // result: (VPEXTRB128 [a] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VPEXTRB128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg(x)
+               return true
+       }
+}
 func rewriteValueAMD64_OpGetG(v *Value) bool {
        v_0 := v.Args[0]
        // match: (GetG mem)
index db4d24997918631f093da11a85af9a05808f8462..5d6ae7e3c06b7aad348cf60bab4e4bae72a5a797 100644 (file)
@@ -262,6 +262,14 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Float64x2.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float64x4.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x8.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat64x8, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x16.GetElem", opLen1Imm8(ssa.OpGetElemInt8x16, types.Types[types.TINT8], 0), sys.AMD64)
+       addF(simdPackage, "Int16x8.GetElem", opLen1Imm8(ssa.OpGetElemInt16x8, types.Types[types.TINT16], 0), sys.AMD64)
+       addF(simdPackage, "Int32x4.GetElem", opLen1Imm8(ssa.OpGetElemInt32x4, types.Types[types.TINT32], 0), sys.AMD64)
+       addF(simdPackage, "Int64x2.GetElem", opLen1Imm8(ssa.OpGetElemInt64x2, types.Types[types.TINT64], 0), sys.AMD64)
+       addF(simdPackage, "Uint8x16.GetElem", opLen1Imm8(ssa.OpGetElemUint8x16, types.Types[types.TUINT8], 0), sys.AMD64)
+       addF(simdPackage, "Uint16x8.GetElem", opLen1Imm8(ssa.OpGetElemUint16x8, types.Types[types.TUINT16], 0), sys.AMD64)
+       addF(simdPackage, "Uint32x4.GetElem", opLen1Imm8(ssa.OpGetElemUint32x4, types.Types[types.TUINT32], 0), sys.AMD64)
+       addF(simdPackage, "Uint64x2.GetElem", opLen1Imm8(ssa.OpGetElemUint64x2, types.Types[types.TUINT64], 0), sys.AMD64)
        addF(simdPackage, "Int8x16.Greater", opLen2(ssa.OpGreaterInt8x16, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int8x32.Greater", opLen2(ssa.OpGreaterInt8x32, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int16x8.Greater", opLen2(ssa.OpGreaterInt16x8, types.TypeVec128), sys.AMD64)
index 6df634b428f5d8baa7487a089009c63ba182c569..084b0af53937e62eac3f61b9cfc0db9084f33f93 100644 (file)
@@ -183,6 +183,16 @@ func TestSlicesInt8SetElem(t *testing.T) {
        checkInt8Slices(t, a, b)
 }
 
+func TestSlicesInt8GetElem(t *testing.T) {
+       a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+               17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
+       v := simd.LoadInt8x16Slice(a)
+       e := v.GetElem(2)
+       if e != a[2] {
+               t.Errorf("GetElem(2) = %d != a[2] = %d", e, a[2])
+       }
+
+}
 func TestSlicesInt8TooShortLoad(t *testing.T) {
        defer func() {
                if r := recover(); r != nil {
index 66ff8c545e6978a72fb4c1d727a7a5d6683e40d9..5037e4e024e1c93aa531b686776cf934594275d4 100644 (file)
@@ -1426,6 +1426,48 @@ func (x Float64x4) FusedMultiplySubAdd(y Float64x4, z Float64x4) Float64x4
 // Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX
 func (x Float64x8) FusedMultiplySubAdd(y Float64x8, z Float64x8) Float64x8
 
+/* GetElem */
+
+// GetElem retrieves a single constant-indexed element's value.
+//
+// Asm: VPEXTRB, CPU Feature: AVX512EVEX
+func (x Int8x16) GetElem(imm8 uint8) int8
+
+// GetElem retrieves a single constant-indexed element's value.
+//
+// Asm: VPEXTRW, CPU Feature: AVX512EVEX
+func (x Int16x8) GetElem(imm8 uint8) int16
+
+// GetElem retrieves a single constant-indexed element's value.
+//
+// Asm: VPEXTRD, CPU Feature: AVX
+func (x Int32x4) GetElem(imm8 uint8) int32
+
+// GetElem retrieves a single constant-indexed element's value.
+//
+// Asm: VPEXTRQ, CPU Feature: AVX
+func (x Int64x2) GetElem(imm8 uint8) int64
+
+// GetElem retrieves a single constant-indexed element's value.
+//
+// Asm: VPEXTRB, CPU Feature: AVX512EVEX
+func (x Uint8x16) GetElem(imm8 uint8) uint8
+
+// GetElem retrieves a single constant-indexed element's value.
+//
+// Asm: VPEXTRW, CPU Feature: AVX512EVEX
+func (x Uint16x8) GetElem(imm8 uint8) uint16
+
+// GetElem retrieves a single constant-indexed element's value.
+//
+// Asm: VPEXTRD, CPU Feature: AVX
+func (x Uint32x4) GetElem(imm8 uint8) uint32
+
+// GetElem retrieves a single constant-indexed element's value.
+//
+// Asm: VPEXTRQ, CPU Feature: AVX
+func (x Uint64x2) GetElem(imm8 uint8) uint64
+
 /* Greater */
 
 // Greater compares for greater than.