[dev.simd] cmd/compile: add EXTRACT[IF]128 instructions

author David Chase <drchase@google.com>

Wed, 25 Jun 2025 22:20:50 +0000 (18:20 -0400)

committer David Chase <drchase@google.com>

Tue, 8 Jul 2025 01:37:28 +0000 (18:37 -0700)
author David Chase <drchase@google.com>
Wed, 25 Jun 2025 22:20:50 +0000 (18:20 -0400)
committer David Chase <drchase@google.com>
Tue, 8 Jul 2025 01:37:28 +0000 (18:37 -0700)
diff --git a/src/cmd/compile/internal/amd64/simdssa.go b/src/cmd/compile/internal/amd64/simdssa.go

index ac2848d1bafa694e43e8a4e60a9f9a603d34eae0..fbb63ccaa14386e7bcef8a938031e9897ecdb59a 100644 (file)
--- a/src/cmd/compile/internal/amd64/simdssa.go
+++ b/src/cmd/compile/internal/amd64/simdssa.go
@@ -655,6 +655,8 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                 ssa.OpAMD64VREDUCEPD128,
                 ssa.OpAMD64VREDUCEPD256,
                 ssa.OpAMD64VREDUCEPD512,
+               ssa.OpAMD64VEXTRACTF128128,
+               ssa.OpAMD64VEXTRACTI128128,
                 ssa.OpAMD64VPROLD128,
                 ssa.OpAMD64VPROLD256,
                 ssa.OpAMD64VPROLD512,
diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules

index 6b1078e74127985956e3a4b3e19708bda8a4932b..6ba52a9e9c9f93100c630b3d058be162962d69d3 100644 (file)
--- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
@@ -224,6 +224,16 @@
  (GaloisFieldMulUint8x16 ...) => (VGF2P8MULB128 ...)
  (GaloisFieldMulUint8x32 ...) => (VGF2P8MULB256 ...)
  (GaloisFieldMulUint8x64 ...) => (VGF2P8MULB512 ...)
+(Get128Float32x8 [a] x) => (VEXTRACTF128128 [a] x)
+(Get128Float64x4 [a] x) => (VEXTRACTF128128 [a] x)
+(Get128Int8x32 [a] x) => (VEXTRACTI128128 [a] x)
+(Get128Int16x16 [a] x) => (VEXTRACTI128128 [a] x)
+(Get128Int32x8 [a] x) => (VEXTRACTI128128 [a] x)
+(Get128Int64x4 [a] x) => (VEXTRACTI128128 [a] x)
+(Get128Uint8x32 [a] x) => (VEXTRACTI128128 [a] x)
+(Get128Uint16x16 [a] x) => (VEXTRACTI128128 [a] x)
+(Get128Uint32x8 [a] x) => (VEXTRACTI128128 [a] x)
+(Get128Uint64x4 [a] x) => (VEXTRACTI128128 [a] x)
  (GetElemInt8x16 [a] x) => (VPEXTRB128 [a] x)
  (GetElemInt16x8 [a] x) => (VPEXTRW128 [a] x)
  (GetElemInt32x4 [a] x) => (VPEXTRD128 [a] x)
diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go

index 787d3c5fcbf50c9ff0d8acaddb1bcc89da9bae80..8c895d9f455950555096ac8c90f24bddb4a485db 100644 (file)
--- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
@@ -765,6 +765,7 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp,
                 {name: "VRNDSCALEPS256", argLength: 1, reg: fp11, asm: "VRNDSCALEPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
                 {name: "VREDUCEPS256", argLength: 1, reg: fp11, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
                 {name: "VCMPPS256", argLength: 2, reg: fp21, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Vec256", resultInArg0: false},
+               {name: "VEXTRACTF128128", argLength: 1, reg: fp11, asm: "VEXTRACTF128", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
                 {name: "VRNDSCALEPSMasked256", argLength: 2, reg: fpkfp, asm: "VRNDSCALEPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
                 {name: "VREDUCEPSMasked256", argLength: 2, reg: fpkfp, asm: "VREDUCEPS", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
                 {name: "VCMPPSMasked256", argLength: 3, reg: fp2kk, asm: "VCMPPS", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
@@ -878,6 +879,7 @@ func simdAMD64Ops(fp11, fp21, fp2k, fpkfp, fp2kfp, fp2kk, fp31, fp3kfp, fpgpfp,
                 {name: "VPCMPB128", argLength: 2, reg: fp2k, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
                 {name: "VPCMPBMasked128", argLength: 3, reg: fp2kk, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                 {name: "VPINSRB128", argLength: 2, reg: fpgpfp, asm: "VPINSRB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
+               {name: "VEXTRACTI128128", argLength: 1, reg: fp11, asm: "VEXTRACTI128", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
                 {name: "VPCMPB256", argLength: 2, reg: fp2k, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
                 {name: "VPCMPBMasked256", argLength: 3, reg: fp2kk, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
                 {name: "VINSERTI128256", argLength: 2, reg: fp21, asm: "VINSERTI128", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go

index 076a16ebda611024b3fafd993816aa824d16cf8f..c74893b97a210f47b78193ca87311a681b812dc2 100644 (file)
--- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
@@ -1502,6 +1502,7 @@ func simdGenericOps() []opData {
                 {name: "DiffWithRoundWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
                 {name: "DiffWithTruncWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
                 {name: "FloorWithPrecisionFloat32x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "Get128Float32x8", argLength: 1, commutative: false, aux: "Int8"},
                 {name: "MaskedCeilWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
                 {name: "MaskedDiffWithCeilWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
                 {name: "MaskedDiffWithFloorWithPrecisionFloat32x8", argLength: 2, commutative: false, aux: "Int8"},
@@ -1535,6 +1536,7 @@ func simdGenericOps() []opData {
                 {name: "DiffWithRoundWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
                 {name: "DiffWithTruncWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
                 {name: "FloorWithPrecisionFloat64x4", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "Get128Float64x4", argLength: 1, commutative: false, aux: "Int8"},
                 {name: "MaskedCeilWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
                 {name: "MaskedDiffWithCeilWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
                 {name: "MaskedDiffWithFloorWithPrecisionFloat64x4", argLength: 2, commutative: false, aux: "Int8"},
@@ -1562,6 +1564,7 @@ func simdGenericOps() []opData {
                 {name: "MaskedTruncWithPrecisionFloat64x8", argLength: 2, commutative: false, aux: "Int8"},
                 {name: "RoundWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
                 {name: "TruncWithPrecisionFloat64x8", argLength: 1, commutative: false, aux: "Int8"},
+               {name: "Get128Int16x16", argLength: 1, commutative: false, aux: "Int8"},
                 {name: "MaskedShiftAllLeftAndFillUpperFromInt16x16", argLength: 3, commutative: false, aux: "Int8"},
                 {name: "MaskedShiftAllRightAndFillUpperFromInt16x16", argLength: 3, commutative: false, aux: "Int8"},
                 {name: "Set128Int16x16", argLength: 2, commutative: false, aux: "Int8"},
@@ -1595,6 +1598,7 @@ func simdGenericOps() []opData {
                 {name: "SetElemInt32x4", argLength: 2, commutative: false, aux: "Int8"},
                 {name: "ShiftAllLeftAndFillUpperFromInt32x4", argLength: 2, commutative: false, aux: "Int8"},
                 {name: "ShiftAllRightAndFillUpperFromInt32x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "Get128Int32x8", argLength: 1, commutative: false, aux: "Int8"},
                 {name: "MaskedRotateAllLeftInt32x8", argLength: 2, commutative: false, aux: "Int8"},
                 {name: "MaskedRotateAllRightInt32x8", argLength: 2, commutative: false, aux: "Int8"},
                 {name: "MaskedShiftAllLeftAndFillUpperFromInt32x8", argLength: 3, commutative: false, aux: "Int8"},
@@ -1614,6 +1618,7 @@ func simdGenericOps() []opData {
                 {name: "SetElemInt64x2", argLength: 2, commutative: false, aux: "Int8"},
                 {name: "ShiftAllLeftAndFillUpperFromInt64x2", argLength: 2, commutative: false, aux: "Int8"},
                 {name: "ShiftAllRightAndFillUpperFromInt64x2", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "Get128Int64x4", argLength: 1, commutative: false, aux: "Int8"},
                 {name: "MaskedRotateAllLeftInt64x4", argLength: 2, commutative: false, aux: "Int8"},
                 {name: "MaskedRotateAllRightInt64x4", argLength: 2, commutative: false, aux: "Int8"},
                 {name: "MaskedShiftAllLeftAndFillUpperFromInt64x4", argLength: 3, commutative: false, aux: "Int8"},
@@ -1633,7 +1638,9 @@ func simdGenericOps() []opData {
                 {name: "ShiftAllRightAndFillUpperFromInt64x8", argLength: 2, commutative: false, aux: "Int8"},
                 {name: "GetElemInt8x16", argLength: 1, commutative: false, aux: "Int8"},
                 {name: "SetElemInt8x16", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "Get128Int8x32", argLength: 1, commutative: false, aux: "Int8"},
                 {name: "Set128Int8x32", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "Get128Uint16x16", argLength: 1, commutative: false, aux: "Int8"},
                 {name: "MaskedShiftAllLeftAndFillUpperFromUint16x16", argLength: 3, commutative: false, aux: "Int8"},
                 {name: "MaskedShiftAllRightAndFillUpperFromUint16x16", argLength: 3, commutative: false, aux: "Int8"},
                 {name: "Set128Uint16x16", argLength: 2, commutative: false, aux: "Int8"},
@@ -1667,6 +1674,7 @@ func simdGenericOps() []opData {
                 {name: "SetElemUint32x4", argLength: 2, commutative: false, aux: "Int8"},
                 {name: "ShiftAllLeftAndFillUpperFromUint32x4", argLength: 2, commutative: false, aux: "Int8"},
                 {name: "ShiftAllRightAndFillUpperFromUint32x4", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "Get128Uint32x8", argLength: 1, commutative: false, aux: "Int8"},
                 {name: "MaskedRotateAllLeftUint32x8", argLength: 2, commutative: false, aux: "Int8"},
                 {name: "MaskedRotateAllRightUint32x8", argLength: 2, commutative: false, aux: "Int8"},
                 {name: "MaskedShiftAllLeftAndFillUpperFromUint32x8", argLength: 3, commutative: false, aux: "Int8"},
@@ -1686,6 +1694,7 @@ func simdGenericOps() []opData {
                 {name: "SetElemUint64x2", argLength: 2, commutative: false, aux: "Int8"},
                 {name: "ShiftAllLeftAndFillUpperFromUint64x2", argLength: 2, commutative: false, aux: "Int8"},
                 {name: "ShiftAllRightAndFillUpperFromUint64x2", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "Get128Uint64x4", argLength: 1, commutative: false, aux: "Int8"},
                 {name: "MaskedRotateAllLeftUint64x4", argLength: 2, commutative: false, aux: "Int8"},
                 {name: "MaskedRotateAllRightUint64x4", argLength: 2, commutative: false, aux: "Int8"},
                 {name: "MaskedShiftAllLeftAndFillUpperFromUint64x4", argLength: 3, commutative: false, aux: "Int8"},
@@ -1711,6 +1720,7 @@ func simdGenericOps() []opData {
                 {name: "SetElemUint8x16", argLength: 2, commutative: false, aux: "Int8"},
                 {name: "GaloisFieldAffineTransformUint8x32", argLength: 2, commutative: false, aux: "Int8"},
                 {name: "GaloisFieldAffineTransformInversedUint8x32", argLength: 2, commutative: false, aux: "Int8"},
+               {name: "Get128Uint8x32", argLength: 1, commutative: false, aux: "Int8"},
                 {name: "MaskedGaloisFieldAffineTransformUint8x32", argLength: 3, commutative: false, aux: "Int8"},
                 {name: "MaskedGaloisFieldAffineTransformInversedUint8x32", argLength: 3, commutative: false, aux: "Int8"},
                 {name: "Set128Uint8x32", argLength: 2, commutative: false, aux: "Int8"},
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go

index ece791ca6cea71e10921ce0a27544684a6a38d99..91380e5e089e313675d084a14bf3fbfa278f1030 100644 (file)
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -1958,6 +1958,7 @@ const (
         OpAMD64VRNDSCALEPS256
         OpAMD64VREDUCEPS256
         OpAMD64VCMPPS256
+       OpAMD64VEXTRACTF128128
         OpAMD64VRNDSCALEPSMasked256
         OpAMD64VREDUCEPSMasked256
         OpAMD64VCMPPSMasked256
@@ -2071,6 +2072,7 @@ const (
         OpAMD64VPCMPB128
         OpAMD64VPCMPBMasked128
         OpAMD64VPINSRB128
+       OpAMD64VEXTRACTI128128
         OpAMD64VPCMPB256
         OpAMD64VPCMPBMasked256
         OpAMD64VINSERTI128256
@@ -5837,6 +5839,7 @@ const (
         OpDiffWithRoundWithPrecisionFloat32x8
         OpDiffWithTruncWithPrecisionFloat32x8
         OpFloorWithPrecisionFloat32x8
+       OpGet128Float32x8
         OpMaskedCeilWithPrecisionFloat32x8
         OpMaskedDiffWithCeilWithPrecisionFloat32x8
         OpMaskedDiffWithFloorWithPrecisionFloat32x8
@@ -5870,6 +5873,7 @@ const (
         OpDiffWithRoundWithPrecisionFloat64x4
         OpDiffWithTruncWithPrecisionFloat64x4
         OpFloorWithPrecisionFloat64x4
+       OpGet128Float64x4
         OpMaskedCeilWithPrecisionFloat64x4
         OpMaskedDiffWithCeilWithPrecisionFloat64x4
         OpMaskedDiffWithFloorWithPrecisionFloat64x4
@@ -5897,6 +5901,7 @@ const (
         OpMaskedTruncWithPrecisionFloat64x8
         OpRoundWithPrecisionFloat64x8
         OpTruncWithPrecisionFloat64x8
+       OpGet128Int16x16
         OpMaskedShiftAllLeftAndFillUpperFromInt16x16
         OpMaskedShiftAllRightAndFillUpperFromInt16x16
         OpSet128Int16x16
@@ -5930,6 +5935,7 @@ const (
         OpSetElemInt32x4
         OpShiftAllLeftAndFillUpperFromInt32x4
         OpShiftAllRightAndFillUpperFromInt32x4
+       OpGet128Int32x8
         OpMaskedRotateAllLeftInt32x8
         OpMaskedRotateAllRightInt32x8
         OpMaskedShiftAllLeftAndFillUpperFromInt32x8
@@ -5949,6 +5955,7 @@ const (
         OpSetElemInt64x2
         OpShiftAllLeftAndFillUpperFromInt64x2
         OpShiftAllRightAndFillUpperFromInt64x2
+       OpGet128Int64x4
         OpMaskedRotateAllLeftInt64x4
         OpMaskedRotateAllRightInt64x4
         OpMaskedShiftAllLeftAndFillUpperFromInt64x4
@@ -5968,7 +5975,9 @@ const (
         OpShiftAllRightAndFillUpperFromInt64x8
         OpGetElemInt8x16
         OpSetElemInt8x16
+       OpGet128Int8x32
         OpSet128Int8x32
+       OpGet128Uint16x16
         OpMaskedShiftAllLeftAndFillUpperFromUint16x16
         OpMaskedShiftAllRightAndFillUpperFromUint16x16
         OpSet128Uint16x16
@@ -6002,6 +6011,7 @@ const (
         OpSetElemUint32x4
         OpShiftAllLeftAndFillUpperFromUint32x4
         OpShiftAllRightAndFillUpperFromUint32x4
+       OpGet128Uint32x8
         OpMaskedRotateAllLeftUint32x8
         OpMaskedRotateAllRightUint32x8
         OpMaskedShiftAllLeftAndFillUpperFromUint32x8
@@ -6021,6 +6031,7 @@ const (
         OpSetElemUint64x2
         OpShiftAllLeftAndFillUpperFromUint64x2
         OpShiftAllRightAndFillUpperFromUint64x2
+       OpGet128Uint64x4
         OpMaskedRotateAllLeftUint64x4
         OpMaskedRotateAllRightUint64x4
         OpMaskedShiftAllLeftAndFillUpperFromUint64x4
@@ -6046,6 +6057,7 @@ const (
         OpSetElemUint8x16
         OpGaloisFieldAffineTransformUint8x32
         OpGaloisFieldAffineTransformInversedUint8x32
+       OpGet128Uint8x32
         OpMaskedGaloisFieldAffineTransformUint8x32
         OpMaskedGaloisFieldAffineTransformInversedUint8x32
         OpSet128Uint8x32
@@ -30096,6 +30108,20 @@ var opcodeTable = [...]opInfo{
                         },
                 },
         },
+       {
+               name:    "VEXTRACTF128128",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVEXTRACTF128,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
         {
                 name:    "VRNDSCALEPSMasked256",
                 auxType: auxInt8,
@@ -31820,6 +31846,20 @@ var opcodeTable = [...]opInfo{
                         },
                 },
         },
+       {
+               name:    "VEXTRACTI128128",
+               auxType: auxInt8,
+               argLen:  1,
+               asm:     x86.AVEXTRACTI128,
+               reg: regInfo{
+                       inputs: []inputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+                       outputs: []outputInfo{
+                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+                       },
+               },
+       },
         {
                 name:    "VPCMPB256",
                 auxType: auxInt8,
@@ -67706,6 +67746,12 @@ var opcodeTable = [...]opInfo{
                 argLen:  1,
                 generic: true,
         },
+       {
+               name:    "Get128Float32x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
         {
                 name:    "MaskedCeilWithPrecisionFloat32x8",
                 auxType: auxInt8,
@@ -67904,6 +67950,12 @@ var opcodeTable = [...]opInfo{
                 argLen:  1,
                 generic: true,
         },
+       {
+               name:    "Get128Float64x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
         {
                 name:    "MaskedCeilWithPrecisionFloat64x4",
                 auxType: auxInt8,
@@ -68066,6 +68118,12 @@ var opcodeTable = [...]opInfo{
                 argLen:  1,
                 generic: true,
         },
+       {
+               name:    "Get128Int16x16",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
         {
                 name:    "MaskedShiftAllLeftAndFillUpperFromInt16x16",
                 auxType: auxInt8,
@@ -68264,6 +68322,12 @@ var opcodeTable = [...]opInfo{
                 argLen:  2,
                 generic: true,
         },
+       {
+               name:    "Get128Int32x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
         {
                 name:    "MaskedRotateAllLeftInt32x8",
                 auxType: auxInt8,
@@ -68378,6 +68442,12 @@ var opcodeTable = [...]opInfo{
                 argLen:  2,
                 generic: true,
         },
+       {
+               name:    "Get128Int64x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
         {
                 name:    "MaskedRotateAllLeftInt64x4",
                 auxType: auxInt8,
@@ -68492,12 +68562,24 @@ var opcodeTable = [...]opInfo{
                 argLen:  2,
                 generic: true,
         },
+       {
+               name:    "Get128Int8x32",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
         {
                 name:    "Set128Int8x32",
                 auxType: auxInt8,
                 argLen:  2,
                 generic: true,
         },
+       {
+               name:    "Get128Uint16x16",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
         {
                 name:    "MaskedShiftAllLeftAndFillUpperFromUint16x16",
                 auxType: auxInt8,
@@ -68696,6 +68778,12 @@ var opcodeTable = [...]opInfo{
                 argLen:  2,
                 generic: true,
         },
+       {
+               name:    "Get128Uint32x8",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
         {
                 name:    "MaskedRotateAllLeftUint32x8",
                 auxType: auxInt8,
@@ -68810,6 +68898,12 @@ var opcodeTable = [...]opInfo{
                 argLen:  2,
                 generic: true,
         },
+       {
+               name:    "Get128Uint64x4",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
         {
                 name:    "MaskedRotateAllLeftUint64x4",
                 auxType: auxInt8,
@@ -68960,6 +69054,12 @@ var opcodeTable = [...]opInfo{
                 argLen:  2,
                 generic: true,
         },
+       {
+               name:    "Get128Uint8x32",
+               auxType: auxInt8,
+               argLen:  1,
+               generic: true,
+       },
         {
                 name:    "MaskedGaloisFieldAffineTransformUint8x32",
                 auxType: auxInt8,
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go

index 5c1872dcdfd313216a3dbf1b175c890f76be4768..1cf23c4ec5b0e49f3bb43dca1b3e3648ba78802f 100644 (file)
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -1388,6 +1388,26 @@ func rewriteValueAMD64(v *Value) bool {
         case OpGaloisFieldMulUint8x64:
                 v.Op = OpAMD64VGF2P8MULB512
                 return true
+       case OpGet128Float32x8:
+               return rewriteValueAMD64_OpGet128Float32x8(v)
+       case OpGet128Float64x4:
+               return rewriteValueAMD64_OpGet128Float64x4(v)
+       case OpGet128Int16x16:
+               return rewriteValueAMD64_OpGet128Int16x16(v)
+       case OpGet128Int32x8:
+               return rewriteValueAMD64_OpGet128Int32x8(v)
+       case OpGet128Int64x4:
+               return rewriteValueAMD64_OpGet128Int64x4(v)
+       case OpGet128Int8x32:
+               return rewriteValueAMD64_OpGet128Int8x32(v)
+       case OpGet128Uint16x16:
+               return rewriteValueAMD64_OpGet128Uint16x16(v)
+       case OpGet128Uint32x8:
+               return rewriteValueAMD64_OpGet128Uint32x8(v)
+       case OpGet128Uint64x4:
+               return rewriteValueAMD64_OpGet128Uint64x4(v)
+       case OpGet128Uint8x32:
+               return rewriteValueAMD64_OpGet128Uint8x32(v)
         case OpGetCallerPC:
                 v.Op = OpAMD64LoweredGetCallerPC
                 return true
@@ -30999,6 +31019,136 @@ func rewriteValueAMD64_OpGaloisFieldAffineTransformUint8x64(v *Value) bool {
                 return true
         }
  }
+func rewriteValueAMD64_OpGet128Float32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (Get128Float32x8 [a] x)
+       // result: (VEXTRACTF128128 [a] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VEXTRACTF128128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGet128Float64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (Get128Float64x4 [a] x)
+       // result: (VEXTRACTF128128 [a] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VEXTRACTF128128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGet128Int16x16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (Get128Int16x16 [a] x)
+       // result: (VEXTRACTI128128 [a] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGet128Int32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (Get128Int32x8 [a] x)
+       // result: (VEXTRACTI128128 [a] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGet128Int64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (Get128Int64x4 [a] x)
+       // result: (VEXTRACTI128128 [a] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGet128Int8x32(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (Get128Int8x32 [a] x)
+       // result: (VEXTRACTI128128 [a] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGet128Uint16x16(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (Get128Uint16x16 [a] x)
+       // result: (VEXTRACTI128128 [a] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGet128Uint32x8(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (Get128Uint32x8 [a] x)
+       // result: (VEXTRACTI128128 [a] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGet128Uint64x4(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (Get128Uint64x4 [a] x)
+       // result: (VEXTRACTI128128 [a] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg(x)
+               return true
+       }
+}
+func rewriteValueAMD64_OpGet128Uint8x32(v *Value) bool {
+       v_0 := v.Args[0]
+       // match: (Get128Uint8x32 [a] x)
+       // result: (VEXTRACTI128128 [a] x)
+       for {
+               a := auxIntToInt8(v.AuxInt)
+               x := v_0
+               v.reset(OpAMD64VEXTRACTI128128)
+               v.AuxInt = int8ToAuxInt(a)
+               v.AddArg(x)
+               return true
+       }
+}
  func rewriteValueAMD64_OpGetElemInt16x8(v *Value) bool {
         v_0 := v.Args[0]
         // match: (GetElemInt16x8 [a] x)
diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go

index 3d0e6fbd4aa75f7ebee1801596bda04eaa5abb22..27aad1cc0c449d4469e95eb0fbd4db27a286728a 100644 (file)
--- a/src/cmd/compile/internal/ssagen/simdintrinsics.go
+++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go
@@ -235,6 +235,16 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
         addF(simdPackage, "Uint8x16.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x16, types.TypeVec128), sys.AMD64)
         addF(simdPackage, "Uint8x32.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x32, types.TypeVec256), sys.AMD64)
         addF(simdPackage, "Uint8x64.GaloisFieldMul", opLen2(ssa.OpGaloisFieldMulUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Float32x8.Get128", opLen1Imm8(ssa.OpGet128Float32x8, types.TypeVec128, 0), sys.AMD64)
+       addF(simdPackage, "Float64x4.Get128", opLen1Imm8(ssa.OpGet128Float64x4, types.TypeVec128, 0), sys.AMD64)
+       addF(simdPackage, "Int8x32.Get128", opLen1Imm8(ssa.OpGet128Int8x32, types.TypeVec128, 0), sys.AMD64)
+       addF(simdPackage, "Int16x16.Get128", opLen1Imm8(ssa.OpGet128Int16x16, types.TypeVec128, 0), sys.AMD64)
+       addF(simdPackage, "Int32x8.Get128", opLen1Imm8(ssa.OpGet128Int32x8, types.TypeVec128, 0), sys.AMD64)
+       addF(simdPackage, "Int64x4.Get128", opLen1Imm8(ssa.OpGet128Int64x4, types.TypeVec128, 0), sys.AMD64)
+       addF(simdPackage, "Uint8x32.Get128", opLen1Imm8(ssa.OpGet128Uint8x32, types.TypeVec128, 0), sys.AMD64)
+       addF(simdPackage, "Uint16x16.Get128", opLen1Imm8(ssa.OpGet128Uint16x16, types.TypeVec128, 0), sys.AMD64)
+       addF(simdPackage, "Uint32x8.Get128", opLen1Imm8(ssa.OpGet128Uint32x8, types.TypeVec128, 0), sys.AMD64)
+       addF(simdPackage, "Uint64x4.Get128", opLen1Imm8(ssa.OpGet128Uint64x4, types.TypeVec128, 0), sys.AMD64)
         addF(simdPackage, "Int8x16.GetElem", opLen1Imm8(ssa.OpGetElemInt8x16, types.Types[types.TINT8], 0), sys.AMD64)
         addF(simdPackage, "Int16x8.GetElem", opLen1Imm8(ssa.OpGetElemInt16x8, types.Types[types.TINT16], 0), sys.AMD64)
         addF(simdPackage, "Int32x4.GetElem", opLen1Imm8(ssa.OpGetElemInt32x4, types.Types[types.TINT32], 0), sys.AMD64)
diff --git a/src/simd/simd_test.go b/src/simd/simd_test.go

index f99938bb9d29e96a0e008e2a57c0bc603499dac4..1b47d2770ccd61be3ad1eb66dc530258a81127f3 100644 (file)
--- a/src/simd/simd_test.go
+++ b/src/simd/simd_test.go
@@ -161,6 +161,22 @@ func checkInt8Slices(t *testing.T, a, b []int8) {
         }
  }
  
+func checkFloat32Slices(t *testing.T, a, b []float32) {
+       for i := range b {
+               if a[i] != b[i] {
+                       t.Errorf("a and b differ at index %d, a=%3.0f, b=%3.0f", i, a[i], b[i])
+               }
+       }
+}
+
+func checkFloat64Slices(t *testing.T, a, b []float64) {
+       for i := range b {
+               if a[i] != b[i] {
+                       t.Errorf("a and b differ at index %d, a=%3.0f, b=%3.0f", i, a[i], b[i])
+               }
+       }
+}
+
  func TestSlicesInt8(t *testing.T) {
         a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
                 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
@@ -209,6 +225,78 @@ func TestSlicesInt8Set128(t *testing.T) {
         checkInt8Slices(t, a, b[16:])
  }
  
+func TestSlicesInt8Get128(t *testing.T) {
+       a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+               17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
+       u := simd.LoadInt8x32Slice(a) // 1-32
+       v := u.Get128(0)              // 1-16
+       w := u.Get128(1)              // 17-32
+
+       b := make([]int8, 32, 32)
+       v.StoreSlice(b[:16])
+       w.StoreSlice(b[16:])
+
+       checkInt8Slices(t, a, b)
+}
+
+func TestSlicesFloat32Set128(t *testing.T) {
+       a := []float32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+               17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
+       v := simd.LoadFloat32x4Slice(a) // 1-4
+       u := simd.LoadFloat32x8Slice(a) // 1-4
+
+       w := u.Set128(1, v) // 1-4:1-4
+
+       b := make([]float32, 8, 8)
+       w.StoreSlice(b)
+
+       checkFloat32Slices(t, a, b[:4])
+       checkFloat32Slices(t, a, b[4:])
+}
+
+func TestSlicesFloat32Get128(t *testing.T) {
+       a := []float32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+               17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
+       u := simd.LoadFloat32x8Slice(a) // 1-8
+       v := u.Get128(0)                // 1-4
+       w := u.Get128(1)                // 5-8
+
+       b := make([]float32, 8, 8)
+       v.StoreSlice(b[:4])
+       w.StoreSlice(b[4:])
+
+       checkFloat32Slices(t, a, b)
+}
+
+func TestSlicesFloat64Set128(t *testing.T) {
+       a := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+               17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
+       v := simd.LoadFloat64x2Slice(a) // 1-2
+       u := simd.LoadFloat64x4Slice(a) // 1-2
+
+       w := u.Set128(1, v) // 1-2:1-2
+
+       b := make([]float64, 4, 4)
+       w.StoreSlice(b)
+
+       checkFloat64Slices(t, a, b[:2])
+       checkFloat64Slices(t, a, b[2:])
+}
+
+func TestSlicesFloat64Get128(t *testing.T) {
+       a := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+               17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}
+       u := simd.LoadFloat64x4Slice(a) // 1-4
+       v := u.Get128(0)                // 1-2
+       w := u.Get128(1)                // 3-4
+
+       b := make([]float64, 4, 4)
+       v.StoreSlice(b[:2])
+       w.StoreSlice(b[2:])
+
+       checkFloat64Slices(t, a, b)
+}
+
  func TestSlicesInt8TooShortLoad(t *testing.T) {
         defer func() {
                 if r := recover(); r != nil {
diff --git a/src/simd/simd_wrapped_test.go b/src/simd/simd_wrapped_test.go

index 4a8c0957e5b37e9f753dcadf39f3066de996e9b9..b3f18b383772b697b82b69b56e4d480e7d85cb99 100644 (file)
--- a/src/simd/simd_wrapped_test.go
+++ b/src/simd/simd_wrapped_test.go
@@ -7954,6 +7954,7 @@ func testUint64x8UnaryMasked(t *testing.T, v0 []uint64, v1 []int64, want []uint6
  // FloorWithPrecision
  // GaloisFieldAffineTransform
  // GaloisFieldAffineTransformInversed
+// Get128
  // GetElem
  // MaskedCeilWithPrecision
  // MaskedDiffWithCeilWithPrecision
diff --git a/src/simd/stubs_amd64.go b/src/simd/stubs_amd64.go

index de54a9ada48cb377fe1be855e66b5ecaa05cf31f..3453843d0f7991402daac869414c51bac08e3bdb 100644 (file)
--- a/src/simd/stubs_amd64.go
+++ b/src/simd/stubs_amd64.go
@@ -1198,6 +1198,58 @@ func (x Uint8x32) GaloisFieldMul(y Uint8x32) Uint8x32
  // Asm: VGF2P8MULB, CPU Feature: AVX512EVEX
  func (x Uint8x64) GaloisFieldMul(y Uint8x64) Uint8x64
  
+/* Get128 */
+
+// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+//
+// Asm: VEXTRACTF128, CPU Feature: AVX
+func (x Float32x8) Get128(imm uint8) Float32x4
+
+// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+//
+// Asm: VEXTRACTF128, CPU Feature: AVX
+func (x Float64x4) Get128(imm uint8) Float64x2
+
+// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+//
+// Asm: VEXTRACTI128, CPU Feature: AVX2
+func (x Int8x32) Get128(imm uint8) Int8x16
+
+// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+//
+// Asm: VEXTRACTI128, CPU Feature: AVX2
+func (x Int16x16) Get128(imm uint8) Int16x8
+
+// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+//
+// Asm: VEXTRACTI128, CPU Feature: AVX2
+func (x Int32x8) Get128(imm uint8) Int32x4
+
+// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+//
+// Asm: VEXTRACTI128, CPU Feature: AVX2
+func (x Int64x4) Get128(imm uint8) Int64x2
+
+// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+//
+// Asm: VEXTRACTI128, CPU Feature: AVX2
+func (x Uint8x32) Get128(imm uint8) Uint8x16
+
+// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+//
+// Asm: VEXTRACTI128, CPU Feature: AVX2
+func (x Uint16x16) Get128(imm uint8) Uint16x8
+
+// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+//
+// Asm: VEXTRACTI128, CPU Feature: AVX2
+func (x Uint32x8) Get128(imm uint8) Uint32x4
+
+// Get128 retrieves the upper (1) or lower (0) half of a 256-bit vector, depending on the constant operand.
+//
+// Asm: VEXTRACTI128, CPU Feature: AVX2
+func (x Uint64x4) Get128(imm uint8) Uint64x2
+
  /* GetElem */
  
  // GetElem retrieves a single constant-indexed element's value.
author	David Chase <drchase@google.com>
	Wed, 25 Jun 2025 22:20:50 +0000 (18:20 -0400)
committer	David Chase <drchase@google.com>
	Tue, 8 Jul 2025 01:37:28 +0000 (18:37 -0700)
src/cmd/compile/internal/amd64/simdssa.go		patch \| blob \| history
src/cmd/compile/internal/ssa/_gen/simdAMD64.rules		patch \| blob \| history
src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go		patch \| blob \| history
src/cmd/compile/internal/ssa/_gen/simdgenericOps.go		patch \| blob \| history
src/cmd/compile/internal/ssa/opGen.go		patch \| blob \| history
src/cmd/compile/internal/ssa/rewriteAMD64.go		patch \| blob \| history
src/cmd/compile/internal/ssagen/simdintrinsics.go		patch \| blob \| history
src/simd/simd_test.go		patch \| blob \| history
src/simd/simd_wrapped_test.go		patch \| blob \| history
src/simd/stubs_amd64.go		patch \| blob \| history