ssa.OpAMD64VPINSRW128:
p = simdVgpvImm8(s, v)
- case ssa.OpAMD64VPEXTRB128,
- ssa.OpAMD64VPEXTRW128,
- ssa.OpAMD64VPEXTRD128,
- ssa.OpAMD64VPEXTRQ128:
+ case ssa.OpAMD64VPEXTRD128,
+ ssa.OpAMD64VPEXTRQ128,
+ ssa.OpAMD64VPEXTRB128,
+ ssa.OpAMD64VPEXTRW128:
p = simdVgpImm8(s, v)
case ssa.OpAMD64VGF2P8AFFINEINVQBMasked128,
(GaloisFieldMulMaskedUint8x16 x y mask) => (VGF2P8MULBMasked128 x y (VPMOVVec8x16ToM <types.TypeMask> mask))
(GaloisFieldMulMaskedUint8x32 x y mask) => (VGF2P8MULBMasked256 x y (VPMOVVec8x32ToM <types.TypeMask> mask))
(GaloisFieldMulMaskedUint8x64 x y mask) => (VGF2P8MULBMasked512 x y (VPMOVVec8x64ToM <types.TypeMask> mask))
+(GetElemFloat32x4 ...) => (VPEXTRD128 ...)
+(GetElemFloat64x2 ...) => (VPEXTRQ128 ...)
(GetElemInt8x16 ...) => (VPEXTRB128 ...)
(GetElemInt16x8 ...) => (VPEXTRW128 ...)
(GetElemInt32x4 ...) => (VPEXTRD128 ...)
{name: "VGF2P8AFFINEQBMasked128", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEQB", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VGF2P8AFFINEQBMasked256", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEQB", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VGF2P8AFFINEQBMasked512", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEQB", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false},
- {name: "VPEXTRB128", argLength: 1, reg: wgp, asm: "VPEXTRB", aux: "UInt8", commutative: false, typ: "int8", resultInArg0: false},
- {name: "VPEXTRW128", argLength: 1, reg: wgp, asm: "VPEXTRW", aux: "UInt8", commutative: false, typ: "int16", resultInArg0: false},
{name: "VPEXTRD128", argLength: 1, reg: vgp, asm: "VPEXTRD", aux: "UInt8", commutative: false, typ: "int32", resultInArg0: false},
{name: "VPEXTRQ128", argLength: 1, reg: vgp, asm: "VPEXTRQ", aux: "UInt8", commutative: false, typ: "int64", resultInArg0: false},
+ {name: "VPEXTRB128", argLength: 1, reg: wgp, asm: "VPEXTRB", aux: "UInt8", commutative: false, typ: "int8", resultInArg0: false},
+ {name: "VPEXTRW128", argLength: 1, reg: wgp, asm: "VPEXTRW", aux: "UInt8", commutative: false, typ: "int16", resultInArg0: false},
{name: "VEXTRACTF128128", argLength: 1, reg: v11, asm: "VEXTRACTF128", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VEXTRACTF64X4256", argLength: 1, reg: w11, asm: "VEXTRACTF64X4", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VEXTRACTI128128", argLength: 1, reg: v11, asm: "VEXTRACTI128", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "GaloisFieldAffineTransformUint8x16", argLength: 2, commutative: false, aux: "UInt8"},
{name: "GaloisFieldAffineTransformUint8x32", argLength: 2, commutative: false, aux: "UInt8"},
{name: "GaloisFieldAffineTransformUint8x64", argLength: 2, commutative: false, aux: "UInt8"},
+ {name: "GetElemFloat32x4", argLength: 1, commutative: false, aux: "UInt8"},
+ {name: "GetElemFloat64x2", argLength: 1, commutative: false, aux: "UInt8"},
{name: "GetElemInt8x16", argLength: 1, commutative: false, aux: "UInt8"},
{name: "GetElemInt16x8", argLength: 1, commutative: false, aux: "UInt8"},
{name: "GetElemInt32x4", argLength: 1, commutative: false, aux: "UInt8"},
OpAMD64VGF2P8AFFINEQBMasked128
OpAMD64VGF2P8AFFINEQBMasked256
OpAMD64VGF2P8AFFINEQBMasked512
- OpAMD64VPEXTRB128
- OpAMD64VPEXTRW128
OpAMD64VPEXTRD128
OpAMD64VPEXTRQ128
+ OpAMD64VPEXTRB128
+ OpAMD64VPEXTRW128
OpAMD64VEXTRACTF128128
OpAMD64VEXTRACTF64X4256
OpAMD64VEXTRACTI128128
OpGaloisFieldAffineTransformUint8x16
OpGaloisFieldAffineTransformUint8x32
OpGaloisFieldAffineTransformUint8x64
+ OpGetElemFloat32x4
+ OpGetElemFloat64x2
OpGetElemInt8x16
OpGetElemInt16x8
OpGetElemInt32x4
},
},
{
- name: "VPEXTRB128",
+ name: "VPEXTRD128",
auxType: auxUInt8,
argLen: 1,
- asm: x86.AVPEXTRB,
+ asm: x86.AVPEXTRD,
reg: regInfo{
inputs: []inputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
},
{
- name: "VPEXTRW128",
+ name: "VPEXTRQ128",
auxType: auxUInt8,
argLen: 1,
- asm: x86.AVPEXTRW,
+ asm: x86.AVPEXTRQ,
reg: regInfo{
inputs: []inputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
},
{
- name: "VPEXTRD128",
+ name: "VPEXTRB128",
auxType: auxUInt8,
argLen: 1,
- asm: x86.AVPEXTRD,
+ asm: x86.AVPEXTRB,
reg: regInfo{
inputs: []inputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
},
outputs: []outputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
},
{
- name: "VPEXTRQ128",
+ name: "VPEXTRW128",
auxType: auxUInt8,
argLen: 1,
- asm: x86.AVPEXTRQ,
+ asm: x86.AVPEXTRW,
reg: regInfo{
inputs: []inputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
},
outputs: []outputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
argLen: 2,
generic: true,
},
+ {
+ name: "GetElemFloat32x4",
+ auxType: auxUInt8,
+ argLen: 1,
+ generic: true,
+ },
+ {
+ name: "GetElemFloat64x2",
+ auxType: auxUInt8,
+ argLen: 1,
+ generic: true,
+ },
{
name: "GetElemInt8x16",
auxType: auxUInt8,
case OpGetClosurePtr:
v.Op = OpAMD64LoweredGetClosurePtr
return true
+ case OpGetElemFloat32x4:
+ v.Op = OpAMD64VPEXTRD128
+ return true
+ case OpGetElemFloat64x2:
+ v.Op = OpAMD64VPEXTRQ128
+ return true
case OpGetElemInt16x8:
v.Op = OpAMD64VPEXTRW128
return true
addF(simdPackage, "Uint8x16.GaloisFieldMulMasked", opLen3(ssa.OpGaloisFieldMulMaskedUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x32.GaloisFieldMulMasked", opLen3(ssa.OpGaloisFieldMulMaskedUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x64.GaloisFieldMulMasked", opLen3(ssa.OpGaloisFieldMulMaskedUint8x64, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Float32x4.GetElem", opLen1Imm8(ssa.OpGetElemFloat32x4, types.Types[types.TFLOAT32], 0), sys.AMD64)
+ addF(simdPackage, "Float64x2.GetElem", opLen1Imm8(ssa.OpGetElemFloat64x2, types.Types[types.TFLOAT64], 0), sys.AMD64)
addF(simdPackage, "Int8x16.GetElem", opLen1Imm8(ssa.OpGetElemInt8x16, types.Types[types.TINT8], 0), sys.AMD64)
addF(simdPackage, "Int16x8.GetElem", opLen1Imm8(ssa.OpGetElemInt16x8, types.Types[types.TINT16], 0), sys.AMD64)
addF(simdPackage, "Int32x4.GetElem", opLen1Imm8(ssa.OpGetElemInt32x4, types.Types[types.TINT32], 0), sys.AMD64)
OpsData []opData
OpsDataImm []opData
}
- seen := map[string]struct{}{}
+
regInfoSet := map[string]bool{
"v11": true, "v21": true, "v2k": true, "v2kv": true, "v2kk": true, "vkv": true, "v31": true, "v3kv": true, "vgpv": true, "vgp": true, "vfpv": true, "vfpkv": true,
"w11": true, "w21": true, "w2k": true, "w2kw": true, "w2kk": true, "wkw": true, "w31": true, "w3kw": true, "wgpw": true, "wgp": true, "wfpw": true, "wfpkw": true}
opsData := make([]opData, 0)
opsDataImm := make([]opData, 0)
+
+ // Determine the "best" version of an instruction to use
+ best := make(map[string]Operation)
+ var mOpOrder []string
+ countOverrides := func(s []Operand) int {
+ a := 0
+ for _, o := range s {
+ if o.OverwriteBase != nil {
+ a++
+ }
+ }
+ return a
+ }
for _, op := range ops {
- shapeIn, shapeOut, maskType, _, gOp := op.shape()
+ _, _, maskType, _, gOp := op.shape()
asm := machineOpName(maskType, gOp)
+ other, ok := best[asm]
+ if !ok {
+ best[asm] = op
+ mOpOrder = append(mOpOrder, asm)
+ continue
+ }
+ // see if "op" is better than "other"
+ if countOverrides(op.In)+countOverrides(op.Out) < countOverrides(other.In)+countOverrides(other.Out) {
+ best[asm] = op
+ }
+ }
+
+ for _, asm := range mOpOrder {
+ op := best[asm]
+ shapeIn, shapeOut, _, _, gOp := op.shape()
// TODO: all our masked operations are now zeroing, we need to generate machine ops with merging masks, maybe copy
// one here with a name suffix "Merging". The rewrite rules will need them.
- if _, ok := seen[asm]; ok {
- continue
- }
- seen[asm] = struct{}{}
+
regInfo, err := op.regShape()
if err != nil {
panic(err)
NoTypes *string
// If non-nil, all generation in gen_simdGenericOps and gen_simdrules will be skipped.
NoGenericOps *string
- // If non-nil, this string will be attached to the machine ssa op name.
+ // If non-nil, this string will be attached to the machine ssa op name. E.g. "const"
SSAVariant *string
}
base: $b
bits: $e
+- go: GetElem
+ asm: "VPEXTR[DQ]"
+ in:
+ - class: vreg
+ base: int
+ elemBits: $e
+ OverwriteBase: float
+ - *imm
+ out:
+ - class: greg
+ base: int
+ bits: $e
+ OverwriteBase: float
+
- go: "SetHi|SetLo"
asm: "VINSERTI128|VINSERTI64X4"
inVariant: []
/* GetElem */
+// GetElem retrieves a single constant-indexed element's value.
+//
+// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
+//
+// Asm: VPEXTRD, CPU Feature: AVX
+func (x Float32x4) GetElem(index uint8) float32
+
+// GetElem retrieves a single constant-indexed element's value.
+//
+// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.
+//
+// Asm: VPEXTRQ, CPU Feature: AVX
+func (x Float64x2) GetElem(index uint8) float64
+
// GetElem retrieves a single constant-indexed element's value.
//
// index results in better performance when it's a constant, a non-constant value will be translated into a jump table.