]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.simd] simd, cmd/compile: generated code for VPINSR[BWDQ], and test
authorDavid Chase <drchase@google.com>
Fri, 20 Jun 2025 19:30:55 +0000 (15:30 -0400)
committerDavid Chase <drchase@google.com>
Mon, 23 Jun 2025 17:24:57 +0000 (10:24 -0700)
This is paired with simdgen CL 683055

Change-Id: I91d2c08a97ddd7cf06dd24478d552b962846131c
Reviewed-on: https://go-review.googlesource.com/c/go/+/683035
Reviewed-by: Junyang Shao <shaojunyang@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
src/cmd/compile/internal/amd64/simdssa.go
src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
src/cmd/compile/internal/ssagen/simdintrinsics.go
src/simd/simd_test.go
src/simd/stubs_amd64.go

index 7b47a8dddbfadb5be2c008adc7049be3d05fad07..005a2601653322150244e15a1e9d1f332b99ccf6 100644 (file)
@@ -718,6 +718,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPDPBUSDMasked512:
                p = simdFp3k1fp1ResultInArg0(s, v)
 
+       case ssa.OpAMD64VPINSRB128,
+               ssa.OpAMD64VPINSRW128,
+               ssa.OpAMD64VPINSRD128,
+               ssa.OpAMD64VPINSRQ128:
+               p = simdFp1gp1fp1Imm8(s, v)
+
        default:
                // Unknown reg shape
                return false
index cb57ae31b62c42c99ed049da60d6e173ef80c3cd..615686166d1b4dadd357e7a3ac361bbe08180523 100644 (file)
 (SaturatedUnsignedSignedQuadDotProdAccumulateUint32x16 ...) => (VPDPBUSDS512 ...)
 (SaturatedUnsignedSignedQuadDotProdAccumulateUint32x4 ...) => (VPDPBUSDS128 ...)
 (SaturatedUnsignedSignedQuadDotProdAccumulateUint32x8 ...) => (VPDPBUSDS256 ...)
+(SetElemInt16x8 [a] x y) => (VPINSRW128 [a] x y)
+(SetElemInt32x4 [a] x y) => (VPINSRD128 [a] x y)
+(SetElemInt64x2 [a] x y) => (VPINSRQ128 [a] x y)
+(SetElemInt8x16 [a] x y) => (VPINSRB128 [a] x y)
+(SetElemUint16x8 [a] x y) => (VPINSRW128 [a] x y)
+(SetElemUint32x4 [a] x y) => (VPINSRD128 [a] x y)
+(SetElemUint64x2 [a] x y) => (VPINSRQ128 [a] x y)
+(SetElemUint8x16 [a] x y) => (VPINSRB128 [a] x y)
 (SignInt16x16 ...) => (VPSIGNW256 ...)
 (SignInt16x8 ...) => (VPSIGNW128 ...)
 (SignInt32x4 ...) => (VPSIGND128 ...)
index 259f1eff2332b96a504032f622e433dfd7763896..f4627d068cd960450ee94f17fb38f2f25c06c41b 100644 (file)
@@ -645,20 +645,24 @@ func simdAMD64Ops(fp11, fp21, fp2k1, fp1k1fp1, fp2k1fp1, fp2k1k1, fp31, fp3k1fp1
                {name: "VPCMPWMasked512", argLength: 3, reg: fp2k1k1, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                {name: "VPCMPW128", argLength: 2, reg: fp2k1, asm: "VPCMPW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VPCMPWMasked128", argLength: 3, reg: fp2k1k1, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+               {name: "VPINSRW128", argLength: 2, reg: fp1gp1fp1, asm: "VPINSRW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPCMPD512", argLength: 2, reg: fp2k1, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                {name: "VPCMPDMasked512", argLength: 3, reg: fp2k1k1, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                {name: "VPCMPD128", argLength: 2, reg: fp2k1, asm: "VPCMPD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VPCMPDMasked128", argLength: 3, reg: fp2k1k1, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+               {name: "VPINSRD128", argLength: 2, reg: fp1gp1fp1, asm: "VPINSRD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPCMPD256", argLength: 2, reg: fp2k1, asm: "VPCMPD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VPCMPDMasked256", argLength: 3, reg: fp2k1k1, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                {name: "VPCMPQ128", argLength: 2, reg: fp2k1, asm: "VPCMPQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VPCMPQMasked128", argLength: 3, reg: fp2k1k1, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+               {name: "VPINSRQ128", argLength: 2, reg: fp1gp1fp1, asm: "VPINSRQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPCMPQ256", argLength: 2, reg: fp2k1, asm: "VPCMPQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VPCMPQMasked256", argLength: 3, reg: fp2k1k1, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                {name: "VPCMPQ512", argLength: 2, reg: fp2k1, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                {name: "VPCMPQMasked512", argLength: 3, reg: fp2k1k1, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                {name: "VPCMPB128", argLength: 2, reg: fp2k1, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VPCMPBMasked128", argLength: 3, reg: fp2k1k1, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+               {name: "VPINSRB128", argLength: 2, reg: fp1gp1fp1, asm: "VPINSRB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
                {name: "VPCMPB256", argLength: 2, reg: fp2k1, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
                {name: "VPCMPBMasked256", argLength: 3, reg: fp2k1k1, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                {name: "VPCMPB512", argLength: 2, reg: fp2k1, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
index ab9b4ffd98c484e75bb41f90a1be559c22536179..ca196cd9e19079de6594cd9c1c654fd19675face 100644 (file)
@@ -1372,5 +1372,13 @@ func simdGenericOps() []opData {
                {name: "RoundWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
                {name: "TruncSuppressExceptionWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
                {name: "TruncWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "SetElemInt16x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "SetElemInt32x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "SetElemInt64x2", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "SetElemInt8x16", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "SetElemUint16x8", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "SetElemUint32x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "SetElemUint64x2", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "SetElemUint8x16", argLength: 2, commutative: false, aux: "Int8"},
        }
 }
index 4b25da4e506ab057b52e56169e827b92baf3dba0..121727e1f6b0033b4862321bfac760b5b9cd0604 100644 (file)
@@ -1838,20 +1838,24 @@ const (
        OpAMD64VPCMPWMasked512
        OpAMD64VPCMPW128
        OpAMD64VPCMPWMasked128
+       OpAMD64VPINSRW128
        OpAMD64VPCMPD512
        OpAMD64VPCMPDMasked512
        OpAMD64VPCMPD128
        OpAMD64VPCMPDMasked128
+       OpAMD64VPINSRD128
        OpAMD64VPCMPD256
        OpAMD64VPCMPDMasked256
        OpAMD64VPCMPQ128
        OpAMD64VPCMPQMasked128
+       OpAMD64VPINSRQ128
        OpAMD64VPCMPQ256
        OpAMD64VPCMPQMasked256
        OpAMD64VPCMPQ512
        OpAMD64VPCMPQMasked512
        OpAMD64VPCMPB128
        OpAMD64VPCMPBMasked128
+       OpAMD64VPINSRB128
        OpAMD64VPCMPB256
        OpAMD64VPCMPBMasked256
        OpAMD64VPCMPB512
@@ -5475,6 +5479,14 @@ const (
        OpRoundWithPrecisionFloat64x8
        OpTruncSuppressExceptionWithPrecisionFloat64x8
        OpTruncWithPrecisionFloat64x8
+       OpSetElemInt16x8
+       OpSetElemInt32x4
+       OpSetElemInt64x2
+       OpSetElemInt8x16
+       OpSetElemUint16x8
+       OpSetElemUint32x4
+       OpSetElemUint64x2
+       OpSetElemUint8x16
 )
 
 var opcodeTable = [...]opInfo{
@@ -27738,6 +27750,21 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:    "VPINSRW128",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPINSRW,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 49135},      // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
        {
                name:        "VPCMPD512",
                auxType:     auxInt8,
@@ -27803,6 +27830,21 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:    "VPINSRD128",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPINSRD,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 49135},      // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
        {
                name:    "VPCMPD256",
                auxType: auxInt8,
@@ -27867,6 +27909,21 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:    "VPINSRQ128",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPINSRQ,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 49135},      // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
        {
                name:    "VPCMPQ256",
                auxType: auxInt8,
@@ -27964,6 +28021,21 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
+       {
+               name:    "VPINSRB128",
+               auxType: auxInt8,
+               argLen:  2,
+               asm:     x86.AVPINSRB,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {1, 49135},      // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
        {
                name:    "VPCMPB256",
                auxType: auxInt8,
@@ -63153,6 +63225,54 @@ var opcodeTable = [...]opInfo{
                argLen:  1,
                generic: true,
        },
+       {
+               name:    "SetElemInt16x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetElemInt32x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetElemInt64x2",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetElemInt8x16",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetElemUint16x8",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetElemUint32x4",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetElemUint64x2",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
+       {
+               name:    "SetElemUint8x16",
+               auxType: auxInt8,
+               argLen:  2,
+               generic: true,
+       },
 }
 
 func (o Op) Asm() obj.As          { return opcodeTable[o].asm }
index c532b2caa3084bcfb66d199ba35437383a05272c..7ac8c22e879359b35885e914732aeb0ceea466a6 100644 (file)
@@ -4038,6 +4038,22 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpSelect1(v)
        case OpSelectN:
                return rewriteValueAMD64_OpSelectN(v)
+       case OpSetElemInt16x8:
+               return rewriteValueAMD64_OpSetElemInt16x8(v)
+       case OpSetElemInt32x4:
+               return rewriteValueAMD64_OpSetElemInt32x4(v)
+       case OpSetElemInt64x2:
+               return rewriteValueAMD64_OpSetElemInt64x2(v)
+       case OpSetElemInt8x16:
+               return rewriteValueAMD64_OpSetElemInt8x16(v)
+       case OpSetElemUint16x8:
+               return rewriteValueAMD64_OpSetElemUint16x8(v)
+       case OpSetElemUint32x4:
+               return rewriteValueAMD64_OpSetElemUint32x4(v)
+       case OpSetElemUint64x2:
+               return rewriteValueAMD64_OpSetElemUint64x2(v)
+       case OpSetElemUint8x16:
+               return rewriteValueAMD64_OpSetElemUint8x16(v)
        case OpSignExt16to32:
                v.Op = OpAMD64MOVWQSX
                return true
@@ -49462,6 +49478,126 @@ func rewriteValueAMD64_OpSelectN(v *Value) bool {
        }
        return false
 }
+func rewriteValueAMD64_OpSetElemInt16x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetElemInt16x8 [a] x y)
+       // result: (VPINSRW128 [a] x y)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPINSRW128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetElemInt32x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetElemInt32x4 [a] x y)
+       // result: (VPINSRD128 [a] x y)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPINSRD128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetElemInt64x2(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetElemInt64x2 [a] x y)
+       // result: (VPINSRQ128 [a] x y)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPINSRQ128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetElemInt8x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetElemInt8x16 [a] x y)
+       // result: (VPINSRB128 [a] x y)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPINSRB128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetElemUint16x8(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetElemUint16x8 [a] x y)
+       // result: (VPINSRW128 [a] x y)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPINSRW128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetElemUint32x4(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetElemUint32x4 [a] x y)
+       // result: (VPINSRD128 [a] x y)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPINSRD128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetElemUint64x2(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetElemUint64x2 [a] x y)
+       // result: (VPINSRQ128 [a] x y)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPINSRQ128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg2(x, y)
+               return true
+       }
+}
+func rewriteValueAMD64_OpSetElemUint8x16(v *Value) bool {
+       v_1 := v.Args[1]
+       v_0 := v.Args[0]
+       // match: (SetElemUint8x16 [a] x y)
+       // result: (VPINSRB128 [a] x y)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               y := v_1
+               v.reset(OpAMD64VPINSRB128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg2(x, y)
+               return true
+       }
+}
 func rewriteValueAMD64_OpSlicemask(v *Value) bool {
        v_0 := v.Args[0]
        b := v.Block
index dea1f649490b249beb6dc16f2e412e48fdc72e3f..db4d24997918631f093da11a85af9a05808f8462 100644 (file)
@@ -1290,6 +1290,14 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint32x4.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint32x8.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint32x16.SaturatedUnsignedSignedQuadDotProdAccumulate", opLen3(ssa.OpSaturatedUnsignedSignedQuadDotProdAccumulateUint32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x16.SetElem", opLen2Imm8(ssa.OpSetElemInt8x16, types.TypeVec128, 0), sys.AMD64)
+       addF(simdPackage, "Int16x8.SetElem", opLen2Imm8(ssa.OpSetElemInt16x8, types.TypeVec128, 0), sys.AMD64)
+       addF(simdPackage, "Int32x4.SetElem", opLen2Imm8(ssa.OpSetElemInt32x4, types.TypeVec128, 0), sys.AMD64)
+       addF(simdPackage, "Int64x2.SetElem", opLen2Imm8(ssa.OpSetElemInt64x2, types.TypeVec128, 0), sys.AMD64)
+       addF(simdPackage, "Uint8x16.SetElem", opLen2Imm8(ssa.OpSetElemUint8x16, types.TypeVec128, 0), sys.AMD64)
+       addF(simdPackage, "Uint16x8.SetElem", opLen2Imm8(ssa.OpSetElemUint16x8, types.TypeVec128, 0), sys.AMD64)
+       addF(simdPackage, "Uint32x4.SetElem", opLen2Imm8(ssa.OpSetElemUint32x4, types.TypeVec128, 0), sys.AMD64)
+       addF(simdPackage, "Uint64x2.SetElem", opLen2Imm8(ssa.OpSetElemUint64x2, types.TypeVec128, 0), sys.AMD64)
        addF(simdPackage, "Int8x16.Sign", opLen2(ssa.OpSignInt8x16, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int8x32.Sign", opLen2(ssa.OpSignInt8x32, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int16x8.Sign", opLen2(ssa.OpSignInt16x8, types.TypeVec128), sys.AMD64)
index 28e25132e63789e08ad43525463f19867435f773..8658631e45200c8bd5a182c562b950035c02834a 100644 (file)
@@ -230,6 +230,19 @@ func TestSlicesInt8(t *testing.T) {
        checkInt8Slices(t, a, b)
 }
 
+func TestSlicesInt8SetElem(t *testing.T) {
+       a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+               17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
+       v := simd.LoadInt8x16Slice(a)
+
+       v = v.SetElem(3, 13)
+       a[3] = 13
+
+       b := make([]int8, 16, 16)
+       v.StoreSlice(b)
+       checkInt8Slices(t, a, b)
+}
+
 func TestSlicesInt8TooShortLoad(t *testing.T) {
        defer func() {
                if r := recover(); r != nil {
index 95d8b99c8404e9353fac1e435d6e79a4dd2c0704..aeb8c6bda7bde10a27778361a1be42e00c5037ae 100644 (file)
@@ -7242,6 +7242,48 @@ func (x Uint32x8) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x32, z Int
 // Asm: VPDPBUSDS, CPU Feature: AVX512EVEX
 func (x Uint32x16) SaturatedUnsignedSignedQuadDotProdAccumulate(y Uint8x64, z Int8x64) Uint32x16
 
+/* SetElem */
+
+// SetElem sets a single constant-indexed element's value
+//
+// Asm: VPINSRB, CPU Feature: AVX
+func (x Int8x16) SetElem(imm uint8, y int8) Int8x16
+
+// SetElem sets a single constant-indexed element's value
+//
+// Asm: VPINSRW, CPU Feature: AVX
+func (x Int16x8) SetElem(imm uint8, y int16) Int16x8
+
+// SetElem sets a single constant-indexed element's value
+//
+// Asm: VPINSRD, CPU Feature: AVX
+func (x Int32x4) SetElem(imm uint8, y int8) Int32x4
+
+// SetElem sets a single constant-indexed element's value
+//
+// Asm: VPINSRQ, CPU Feature: AVX
+func (x Int64x2) SetElem(imm uint8, y int64) Int64x2
+
+// SetElem sets a single constant-indexed element's value
+//
+// Asm: VPINSRB, CPU Feature: AVX
+func (x Uint8x16) SetElem(imm uint8, y uint8) Uint8x16
+
+// SetElem sets a single constant-indexed element's value
+//
+// Asm: VPINSRW, CPU Feature: AVX
+func (x Uint16x8) SetElem(imm uint8, y uint16) Uint16x8
+
+// SetElem sets a single constant-indexed element's value
+//
+// Asm: VPINSRD, CPU Feature: AVX
+func (x Uint32x4) SetElem(imm uint8, y uint8) Uint32x4
+
+// SetElem sets a single constant-indexed element's value
+//
+// Asm: VPINSRQ, CPU Feature: AVX
+func (x Uint64x2) SetElem(imm uint8, y uint64) Uint64x2
+
 /* Sign */
 
 // Sign returns the product of the first operand with -1, 0, or 1,