From 7fadfa9638b8b2d7566677456dbd31acbc7c42cc Mon Sep 17 00:00:00 2001 From: David Chase Date: Tue, 24 Jun 2025 18:29:38 -0400 Subject: [PATCH] [dev.simd] cmd/compile: add simd VPEXTRA* This CL is generated by simdgen CL 683836 and this CL should be submitted after its generator. Change-Id: I1aa893b185826ad1f9fb60b85c75eda31f70623b Reviewed-on: https://go-review.googlesource.com/c/go/+/683797 LUCI-TryBot-Result: Go LUCI Reviewed-by: Cherry Mui --- src/cmd/compile/internal/amd64/simdssa.go | 6 + .../compile/internal/ssa/_gen/simdAMD64.rules | 8 ++ .../compile/internal/ssa/_gen/simdAMD64ops.go | 4 + .../internal/ssa/_gen/simdgenericOps.go | 8 ++ src/cmd/compile/internal/ssa/opGen.go | 116 +++++++++++++++++ src/cmd/compile/internal/ssa/rewriteAMD64.go | 120 ++++++++++++++++++ .../compile/internal/ssagen/simdintrinsics.go | 8 ++ src/simd/simd_test.go | 10 ++ src/simd/stubs_amd64.go | 42 ++++++ 9 files changed, 322 insertions(+) diff --git a/src/cmd/compile/internal/amd64/simdssa.go b/src/cmd/compile/internal/amd64/simdssa.go index 9364722c3a..5297680357 100644 --- a/src/cmd/compile/internal/amd64/simdssa.go +++ b/src/cmd/compile/internal/amd64/simdssa.go @@ -724,6 +724,12 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool { ssa.OpAMD64VPINSRQ128: p = simdFpgpfpImm8(s, v) + case ssa.OpAMD64VPEXTRB128, + ssa.OpAMD64VPEXTRW128, + ssa.OpAMD64VPEXTRD128, + ssa.OpAMD64VPEXTRQ128: + p = simdFpgpImm8(s, v) + default: // Unknown reg shape return false diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules index 615686166d..bb0476fc20 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules @@ -251,6 +251,14 @@ (FusedMultiplySubAddFloat64x2 ...) => (VFMSUBADD213PD128 ...) (FusedMultiplySubAddFloat64x4 ...) => (VFMSUBADD213PD256 ...) (FusedMultiplySubAddFloat64x8 ...) => (VFMSUBADD213PD512 ...) +(GetElemInt16x8 [a] x) => (VPEXTRW128 [a] x) +(GetElemInt32x4 [a] x) => (VPEXTRD128 [a] x) +(GetElemInt64x2 [a] x) => (VPEXTRQ128 [a] x) +(GetElemInt8x16 [a] x) => (VPEXTRB128 [a] x) +(GetElemUint16x8 [a] x) => (VPEXTRW128 [a] x) +(GetElemUint32x4 [a] x) => (VPEXTRD128 [a] x) +(GetElemUint64x2 [a] x) => (VPEXTRQ128 [a] x) +(GetElemUint8x16 [a] x) => (VPEXTRB128 [a] x) (GreaterFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [6] x y)) (GreaterFloat32x4 x y) => (VCMPPS128 [6] x y) (GreaterFloat32x8 x y) => (VCMPPS256 [6] x y) diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go index 88d90c2f85..93b136230d 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go +++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go @@ -643,16 +643,19 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp, {name: "VPCMPWMasked256", argLength: 3, reg: fp2kk, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPW512", argLength: 2, reg: fp2k, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPWMasked512", argLength: 3, reg: fp2kk, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VPEXTRW128", argLength: 1, reg: fpgp, asm: "VPEXTRW", aux: "Int8", commutative: false, typ: "int16", resultInArg0: false}, {name: "VPCMPW128", argLength: 2, reg: fp2k, asm: "VPCMPW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPWMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPINSRW128", argLength: 2, reg: fpgpfp, asm: "VPINSRW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPCMPD512", argLength: 2, reg: fp2k, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPDMasked512", argLength: 3, reg: fp2kk, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VPEXTRD128", argLength: 1, reg: fpgp, asm: "VPEXTRD", aux: "Int8", commutative: false, typ: "int32", resultInArg0: false}, {name: "VPCMPD128", argLength: 2, reg: fp2k, asm: "VPCMPD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPDMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPINSRD128", argLength: 2, reg: fpgpfp, asm: "VPINSRD", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, {name: "VPCMPD256", argLength: 2, reg: fp2k, asm: "VPCMPD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPDMasked256", argLength: 3, reg: fp2kk, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VPEXTRQ128", argLength: 1, reg: fpgp, asm: "VPEXTRQ", aux: "Int8", commutative: false, typ: "int64", resultInArg0: false}, {name: "VPCMPQ128", argLength: 2, reg: fp2k, asm: "VPCMPQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPQMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPINSRQ128", argLength: 2, reg: fpgpfp, asm: "VPINSRQ", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, @@ -660,6 +663,7 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp, {name: "VPCMPQMasked256", argLength: 3, reg: fp2kk, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPQ512", argLength: 2, reg: fp2k, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPCMPQMasked512", argLength: 3, reg: fp2kk, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, + {name: "VPEXTRB128", argLength: 1, reg: fpgp, asm: "VPEXTRB", aux: "Int8", commutative: false, typ: "int8", resultInArg0: false}, {name: "VPCMPB128", argLength: 2, reg: fp2k, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false}, {name: "VPCMPBMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false}, {name: "VPINSRB128", argLength: 2, reg: fpgpfp, asm: "VPINSRB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false}, diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go index ca196cd9e1..1c33483f42 100644 --- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go +++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go @@ -1372,13 +1372,21 @@ func simdGenericOps() []opData { {name: "RoundWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, {name: "TruncSuppressExceptionWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, {name: "TruncWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"}, + {name: "GetElemInt16x8", argLength: 1, commutative: false, aux: "Int8"}, {name: "SetElemInt16x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "GetElemInt32x4", argLength: 1, commutative: false, aux: "Int8"}, {name: "SetElemInt32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "GetElemInt64x2", argLength: 1, commutative: false, aux: "Int8"}, {name: "SetElemInt64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "GetElemInt8x16", argLength: 1, commutative: false, aux: "Int8"}, {name: "SetElemInt8x16", argLength: 2, commutative: false, aux: "Int8"}, + {name: "GetElemUint16x8", argLength: 1, commutative: false, aux: "Int8"}, {name: "SetElemUint16x8", argLength: 2, commutative: false, aux: "Int8"}, + {name: "GetElemUint32x4", argLength: 1, commutative: false, aux: "Int8"}, {name: "SetElemUint32x4", argLength: 2, commutative: false, aux: "Int8"}, + {name: "GetElemUint64x2", argLength: 1, commutative: false, aux: "Int8"}, {name: "SetElemUint64x2", argLength: 2, commutative: false, aux: "Int8"}, + {name: "GetElemUint8x16", argLength: 1, commutative: false, aux: "Int8"}, {name: "SetElemUint8x16", argLength: 2, commutative: false, aux: "Int8"}, } } diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 121727e1f6..7a1126d433 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1836,16 +1836,19 @@ const ( OpAMD64VPCMPWMasked256 OpAMD64VPCMPW512 OpAMD64VPCMPWMasked512 + OpAMD64VPEXTRW128 OpAMD64VPCMPW128 OpAMD64VPCMPWMasked128 OpAMD64VPINSRW128 OpAMD64VPCMPD512 OpAMD64VPCMPDMasked512 + OpAMD64VPEXTRD128 OpAMD64VPCMPD128 OpAMD64VPCMPDMasked128 OpAMD64VPINSRD128 OpAMD64VPCMPD256 OpAMD64VPCMPDMasked256 + OpAMD64VPEXTRQ128 OpAMD64VPCMPQ128 OpAMD64VPCMPQMasked128 OpAMD64VPINSRQ128 @@ -1853,6 +1856,7 @@ const ( OpAMD64VPCMPQMasked256 OpAMD64VPCMPQ512 OpAMD64VPCMPQMasked512 + OpAMD64VPEXTRB128 OpAMD64VPCMPB128 OpAMD64VPCMPBMasked128 OpAMD64VPINSRB128 @@ -5479,13 +5483,21 @@ const ( OpRoundWithPrecisionFloat64x8 OpTruncSuppressExceptionWithPrecisionFloat64x8 OpTruncWithPrecisionFloat64x8 + OpGetElemInt16x8 OpSetElemInt16x8 + OpGetElemInt32x4 OpSetElemInt32x4 + OpGetElemInt64x2 OpSetElemInt64x2 + OpGetElemInt8x16 OpSetElemInt8x16 + OpGetElemUint16x8 OpSetElemUint16x8 + OpGetElemUint32x4 OpSetElemUint32x4 + OpGetElemUint64x2 OpSetElemUint64x2 + OpGetElemUint8x16 OpSetElemUint8x16 ) @@ -27718,6 +27730,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPEXTRW128", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPEXTRW, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + }, + }, { name: "VPCMPW128", auxType: auxInt8, @@ -27798,6 +27824,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPEXTRD128", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPEXTRD, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + }, + }, { name: "VPCMPD128", auxType: auxInt8, @@ -27877,6 +27917,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPEXTRQ128", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPEXTRQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + }, + }, { name: "VPCMPQ128", auxType: auxInt8, @@ -27989,6 +28043,20 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "VPEXTRB128", + auxType: auxInt8, + argLen: 1, + asm: x86.AVPEXTRB, + reg: regInfo{ + inputs: []inputInfo{ + {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 + }, + outputs: []outputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + }, + }, { name: "VPCMPB128", auxType: auxInt8, @@ -63225,48 +63293,96 @@ var opcodeTable = [...]opInfo{ argLen: 1, generic: true, }, + { + name: "GetElemInt16x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, { name: "SetElemInt16x8", auxType: auxInt8, argLen: 2, generic: true, }, + { + name: "GetElemInt32x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, { name: "SetElemInt32x4", auxType: auxInt8, argLen: 2, generic: true, }, + { + name: "GetElemInt64x2", + auxType: auxInt8, + argLen: 1, + generic: true, + }, { name: "SetElemInt64x2", auxType: auxInt8, argLen: 2, generic: true, }, + { + name: "GetElemInt8x16", + auxType: auxInt8, + argLen: 1, + generic: true, + }, { name: "SetElemInt8x16", auxType: auxInt8, argLen: 2, generic: true, }, + { + name: "GetElemUint16x8", + auxType: auxInt8, + argLen: 1, + generic: true, + }, { name: "SetElemUint16x8", auxType: auxInt8, argLen: 2, generic: true, }, + { + name: "GetElemUint32x4", + auxType: auxInt8, + argLen: 1, + generic: true, + }, { name: "SetElemUint32x4", auxType: auxInt8, argLen: 2, generic: true, }, + { + name: "GetElemUint64x2", + auxType: auxInt8, + argLen: 1, + generic: true, + }, { name: "SetElemUint64x2", auxType: auxInt8, argLen: 2, generic: true, }, + { + name: "GetElemUint8x16", + auxType: auxInt8, + argLen: 1, + generic: true, + }, { name: "SetElemUint8x16", auxType: auxInt8, diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 7ac8c22e87..668024a00f 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -1448,6 +1448,22 @@ func rewriteValueAMD64(v *Value) bool { case OpGetClosurePtr: v.Op = OpAMD64LoweredGetClosurePtr return true + case OpGetElemInt16x8: + return rewriteValueAMD64_OpGetElemInt16x8(v) + case OpGetElemInt32x4: + return rewriteValueAMD64_OpGetElemInt32x4(v) + case OpGetElemInt64x2: + return rewriteValueAMD64_OpGetElemInt64x2(v) + case OpGetElemInt8x16: + return rewriteValueAMD64_OpGetElemInt8x16(v) + case OpGetElemUint16x8: + return rewriteValueAMD64_OpGetElemUint16x8(v) + case OpGetElemUint32x4: + return rewriteValueAMD64_OpGetElemUint32x4(v) + case OpGetElemUint64x2: + return rewriteValueAMD64_OpGetElemUint64x2(v) + case OpGetElemUint8x16: + return rewriteValueAMD64_OpGetElemUint8x16(v) case OpGetG: return rewriteValueAMD64_OpGetG(v) case OpGreaterEqualFloat32x16: @@ -30549,6 +30565,110 @@ func rewriteValueAMD64_OpFloorWithPrecisionFloat64x8(v *Value) bool { return true } } +func rewriteValueAMD64_OpGetElemInt16x8(v *Value) bool { + v_0 := v.Args[0] + // match: (GetElemInt16x8 [a] x) + // result: (VPEXTRW128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPEXTRW128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetElemInt32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (GetElemInt32x4 [a] x) + // result: (VPEXTRD128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPEXTRD128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetElemInt64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (GetElemInt64x2 [a] x) + // result: (VPEXTRQ128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPEXTRQ128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetElemInt8x16(v *Value) bool { + v_0 := v.Args[0] + // match: (GetElemInt8x16 [a] x) + // result: (VPEXTRB128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPEXTRB128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetElemUint16x8(v *Value) bool { + v_0 := v.Args[0] + // match: (GetElemUint16x8 [a] x) + // result: (VPEXTRW128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPEXTRW128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetElemUint32x4(v *Value) bool { + v_0 := v.Args[0] + // match: (GetElemUint32x4 [a] x) + // result: (VPEXTRD128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPEXTRD128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetElemUint64x2(v *Value) bool { + v_0 := v.Args[0] + // match: (GetElemUint64x2 [a] x) + // result: (VPEXTRQ128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPEXTRQ128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} +func rewriteValueAMD64_OpGetElemUint8x16(v *Value) bool { + v_0 := v.Args[0] + // match: (GetElemUint8x16 [a] x) + // result: (VPEXTRB128 [a] x) + for { + a := auxIntToInt8(v.AuxInt) + x := v_0 + v.reset(OpAMD64VPEXTRB128) + v.AuxInt = int8ToAuxInt(a) + v.AddArg(x) + return true + } +} func rewriteValueAMD64_OpGetG(v *Value) bool { v_0 := v.Args[0] // match: (GetG mem) diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go index db4d249979..5d6ae7e3c0 100644 --- a/src/cmd/compile/internal/ssagen/simdintrinsics.go +++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go @@ -262,6 +262,14 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies . addF(simdPackage, "Float64x2.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat64x2, types.TypeVec128), sys.AMD64) addF(simdPackage, "Float64x4.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat64x4, types.TypeVec256), sys.AMD64) addF(simdPackage, "Float64x8.FusedMultiplySubAdd", opLen3(ssa.OpFusedMultiplySubAddFloat64x8, types.TypeVec512), sys.AMD64) + addF(simdPackage, "Int8x16.GetElem", opLen1Imm8(ssa.OpGetElemInt8x16, types.Types[types.TINT8], 0), sys.AMD64) + addF(simdPackage, "Int16x8.GetElem", opLen1Imm8(ssa.OpGetElemInt16x8, types.Types[types.TINT16], 0), sys.AMD64) + addF(simdPackage, "Int32x4.GetElem", opLen1Imm8(ssa.OpGetElemInt32x4, types.Types[types.TINT32], 0), sys.AMD64) + addF(simdPackage, "Int64x2.GetElem", opLen1Imm8(ssa.OpGetElemInt64x2, types.Types[types.TINT64], 0), sys.AMD64) + addF(simdPackage, "Uint8x16.GetElem", opLen1Imm8(ssa.OpGetElemUint8x16, types.Types[types.TUINT8], 0), sys.AMD64) + addF(simdPackage, "Uint16x8.GetElem", opLen1Imm8(ssa.OpGetElemUint16x8, types.Types[types.TUINT16], 0), sys.AMD64) + addF(simdPackage, "Uint32x4.GetElem", opLen1Imm8(ssa.OpGetElemUint32x4, types.Types[types.TUINT32], 0), sys.AMD64) + addF(simdPackage, "Uint64x2.GetElem", opLen1Imm8(ssa.OpGetElemUint64x2, types.Types[types.TUINT64], 0), sys.AMD64) addF(simdPackage, "Int8x16.Greater", opLen2(ssa.OpGreaterInt8x16, types.TypeVec128), sys.AMD64) addF(simdPackage, "Int8x32.Greater", opLen2(ssa.OpGreaterInt8x32, types.TypeVec256), sys.AMD64) addF(simdPackage, "Int16x8.Greater", opLen2(ssa.OpGreaterInt16x8, types.TypeVec128), sys.AMD64) diff --git a/src/simd/simd_test.go b/src/simd/simd_test.go index 6df634b428..084b0af539 100644 --- a/src/simd/simd_test.go +++ b/src/simd/simd_test.go @@ -183,6 +183,16 @@ func TestSlicesInt8SetElem(t *testing.T) { checkInt8Slices(t, a, b) } +func TestSlicesInt8GetElem(t *testing.T) { + a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} + v := simd.LoadInt8x16Slice(a) + e := v.GetElem(2) + if e != a[2] { + t.Errorf("GetElem(2) = %d != a[2] = %d", e, a[2]) + } + +} func TestSlicesInt8TooShortLoad(t *testing.T) { defer func() { if r := recover(); r != nil { diff --git a/src/simd/stubs_amd64.go b/src/simd/stubs_amd64.go index 66ff8c545e..5037e4e024 100644 --- a/src/simd/stubs_amd64.go +++ b/src/simd/stubs_amd64.go @@ -1426,6 +1426,48 @@ func (x Float64x4) FusedMultiplySubAdd(y Float64x4, z Float64x4) Float64x4 // Asm: VFMSUBADD213PD, CPU Feature: AVX512EVEX func (x Float64x8) FusedMultiplySubAdd(y Float64x8, z Float64x8) Float64x8 +/* GetElem */ + +// GetElem retrieves a single constant-indexed element's value. +// +// Asm: VPEXTRB, CPU Feature: AVX512EVEX +func (x Int8x16) GetElem(imm8 uint8) int8 + +// GetElem retrieves a single constant-indexed element's value. +// +// Asm: VPEXTRW, CPU Feature: AVX512EVEX +func (x Int16x8) GetElem(imm8 uint8) int16 + +// GetElem retrieves a single constant-indexed element's value. +// +// Asm: VPEXTRD, CPU Feature: AVX +func (x Int32x4) GetElem(imm8 uint8) int32 + +// GetElem retrieves a single constant-indexed element's value. +// +// Asm: VPEXTRQ, CPU Feature: AVX +func (x Int64x2) GetElem(imm8 uint8) int64 + +// GetElem retrieves a single constant-indexed element's value. +// +// Asm: VPEXTRB, CPU Feature: AVX512EVEX +func (x Uint8x16) GetElem(imm8 uint8) uint8 + +// GetElem retrieves a single constant-indexed element's value. +// +// Asm: VPEXTRW, CPU Feature: AVX512EVEX +func (x Uint16x8) GetElem(imm8 uint8) uint16 + +// GetElem retrieves a single constant-indexed element's value. +// +// Asm: VPEXTRD, CPU Feature: AVX +func (x Uint32x4) GetElem(imm8 uint8) uint32 + +// GetElem retrieves a single constant-indexed element's value. +// +// Asm: VPEXTRQ, CPU Feature: AVX +func (x Uint64x2) GetElem(imm8 uint8) uint64 + /* Greater */ // Greater compares for greater than. -- 2.52.0