]> Cypherpunks repositories - gostls13.git/commitdiff
[dev.simd] cmd/compile, simd: update DotProd to DotProduct
authorJunyang Shao <shaojunyang@google.com>
Fri, 10 Oct 2025 19:18:01 +0000 (19:18 +0000)
committerJunyang Shao <shaojunyang@google.com>
Tue, 14 Oct 2025 19:26:44 +0000 (12:26 -0700)
API naming changes.

This CL also remove AddDotProductPairsSaturated.

Change-Id: I02e6d45268704f3ed4eaf62f0ecb7dc936b42124
Reviewed-on: https://go-review.googlesource.com/c/go/+/710935
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
src/cmd/compile/internal/amd64/simdssa.go
src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteAMD64.go
src/cmd/compile/internal/ssagen/simdintrinsics.go
src/simd/_gen/simdgen/ops/MLOps/categories.yaml
src/simd/_gen/simdgen/ops/MLOps/go.yaml
src/simd/ops_amd64.go

index de9cad8a478a770162c3379b817392924b1b8642..fe2ae019acd0a4ac6d28255f8ddeda1a07c78be8 100644 (file)
@@ -1142,9 +1142,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
        case ssa.OpAMD64VPDPWSSD128,
                ssa.OpAMD64VPDPWSSD256,
                ssa.OpAMD64VPDPWSSD512,
-               ssa.OpAMD64VPDPWSSDS128,
-               ssa.OpAMD64VPDPWSSDS256,
-               ssa.OpAMD64VPDPWSSDS512,
                ssa.OpAMD64VPDPBUSD128,
                ssa.OpAMD64VPDPBUSD256,
                ssa.OpAMD64VPDPBUSD512,
@@ -1210,9 +1207,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
        case ssa.OpAMD64VPDPWSSDMasked128,
                ssa.OpAMD64VPDPWSSDMasked256,
                ssa.OpAMD64VPDPWSSDMasked512,
-               ssa.OpAMD64VPDPWSSDSMasked128,
-               ssa.OpAMD64VPDPWSSDSMasked256,
-               ssa.OpAMD64VPDPWSSDSMasked512,
                ssa.OpAMD64VPDPBUSDMasked128,
                ssa.OpAMD64VPDPBUSDMasked256,
                ssa.OpAMD64VPDPBUSDMasked512,
@@ -1500,7 +1494,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                p = simdV21load(s, v)
 
        case ssa.OpAMD64VPDPWSSD512load,
-               ssa.OpAMD64VPDPWSSDS512load,
                ssa.OpAMD64VPDPBUSD512load,
                ssa.OpAMD64VPDPBUSDS512load,
                ssa.OpAMD64VFMADD213PS128load,
@@ -1550,9 +1543,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
        case ssa.OpAMD64VPDPWSSDMasked128load,
                ssa.OpAMD64VPDPWSSDMasked256load,
                ssa.OpAMD64VPDPWSSDMasked512load,
-               ssa.OpAMD64VPDPWSSDSMasked128load,
-               ssa.OpAMD64VPDPWSSDSMasked256load,
-               ssa.OpAMD64VPDPWSSDSMasked512load,
                ssa.OpAMD64VPDPBUSDMasked128load,
                ssa.OpAMD64VPDPBUSDMasked256load,
                ssa.OpAMD64VPDPBUSDMasked512load,
@@ -1971,9 +1961,6 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
                ssa.OpAMD64VPDPWSSDMasked128,
                ssa.OpAMD64VPDPWSSDMasked256,
                ssa.OpAMD64VPDPWSSDMasked512,
-               ssa.OpAMD64VPDPWSSDSMasked128,
-               ssa.OpAMD64VPDPWSSDSMasked256,
-               ssa.OpAMD64VPDPWSSDSMasked512,
                ssa.OpAMD64VPDPBUSDMasked128,
                ssa.OpAMD64VPDPBUSDMasked256,
                ssa.OpAMD64VPDPBUSDMasked512,
index d9229e958adac0b26d0925988a7a5a0089c0cfd3..9e34d4b8816ca0e15c95ebbb7c5e8b7355de6be6 100644 (file)
 (AddUint64x2 ...) => (VPADDQ128 ...)
 (AddUint64x4 ...) => (VPADDQ256 ...)
 (AddUint64x8 ...) => (VPADDQ512 ...)
-(AddDotProdPairsSaturatedInt32x4 ...) => (VPDPWSSDS128 ...)
-(AddDotProdPairsSaturatedInt32x8 ...) => (VPDPWSSDS256 ...)
-(AddDotProdPairsSaturatedInt32x16 ...) => (VPDPWSSDS512 ...)
-(AddDotProdQuadrupleInt32x4 ...) => (VPDPBUSD128 ...)
-(AddDotProdQuadrupleInt32x8 ...) => (VPDPBUSD256 ...)
-(AddDotProdQuadrupleInt32x16 ...) => (VPDPBUSD512 ...)
-(AddDotProdQuadrupleSaturatedInt32x4 ...) => (VPDPBUSDS128 ...)
-(AddDotProdQuadrupleSaturatedInt32x8 ...) => (VPDPBUSDS256 ...)
-(AddDotProdQuadrupleSaturatedInt32x16 ...) => (VPDPBUSDS512 ...)
+(AddDotProductQuadrupleInt32x4 ...) => (VPDPBUSD128 ...)
+(AddDotProductQuadrupleInt32x8 ...) => (VPDPBUSD256 ...)
+(AddDotProductQuadrupleInt32x16 ...) => (VPDPBUSD512 ...)
+(AddDotProductQuadrupleSaturatedInt32x4 ...) => (VPDPBUSDS128 ...)
+(AddDotProductQuadrupleSaturatedInt32x8 ...) => (VPDPBUSDS256 ...)
+(AddDotProductQuadrupleSaturatedInt32x16 ...) => (VPDPBUSDS512 ...)
 (AddPairsFloat32x4 ...) => (VHADDPS128 ...)
 (AddPairsFloat32x8 ...) => (VHADDPS256 ...)
 (AddPairsFloat64x2 ...) => (VHADDPD128 ...)
 (DivFloat64x2 ...) => (VDIVPD128 ...)
 (DivFloat64x4 ...) => (VDIVPD256 ...)
 (DivFloat64x8 ...) => (VDIVPD512 ...)
-(DotProdPairsInt16x8 ...) => (VPMADDWD128 ...)
-(DotProdPairsInt16x16 ...) => (VPMADDWD256 ...)
-(DotProdPairsInt16x32 ...) => (VPMADDWD512 ...)
-(DotProdPairsSaturatedUint8x16 ...) => (VPMADDUBSW128 ...)
-(DotProdPairsSaturatedUint8x32 ...) => (VPMADDUBSW256 ...)
-(DotProdPairsSaturatedUint8x64 ...) => (VPMADDUBSW512 ...)
+(DotProductPairsInt16x8 ...) => (VPMADDWD128 ...)
+(DotProductPairsInt16x16 ...) => (VPMADDWD256 ...)
+(DotProductPairsInt16x32 ...) => (VPMADDWD512 ...)
+(DotProductPairsSaturatedUint8x16 ...) => (VPMADDUBSW128 ...)
+(DotProductPairsSaturatedUint8x32 ...) => (VPMADDUBSW256 ...)
+(DotProductPairsSaturatedUint8x64 ...) => (VPMADDUBSW512 ...)
 (EqualFloat32x4 x y) => (VCMPPS128 [0] x y)
 (EqualFloat32x8 x y) => (VCMPPS256 [0] x y)
 (EqualFloat32x16 x y) => (VPMOVMToVec32x16 (VCMPPS512 [0] x y))
 (VMOVDQU32Masked512 (VPABSD512 x) mask) => (VPABSDMasked512 x mask)
 (VMOVDQU64Masked512 (VPABSQ512 x) mask) => (VPABSQMasked512 x mask)
 (VMOVDQU32Masked512 (VPDPWSSD512 x y z) mask) => (VPDPWSSDMasked512 x y z mask)
-(VMOVDQU32Masked512 (VPDPWSSDS512 x y z) mask) => (VPDPWSSDSMasked512 x y z mask)
 (VMOVDQU32Masked512 (VPDPBUSD512 x y z) mask) => (VPDPBUSDMasked512 x y z mask)
 (VMOVDQU32Masked512 (VPDPBUSDS512 x y z) mask) => (VPDPBUSDSMasked512 x y z mask)
 (VMOVDQU32Masked512 (VADDPS512 x y) mask) => (VADDPSMasked512 x y mask)
 (VPDPWSSDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSDMasked128load {sym} [off] x y ptr mask mem)
 (VPDPWSSDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSDMasked256load {sym} [off] x y ptr mask mem)
 (VPDPWSSDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSDMasked512load {sym} [off] x y ptr mask mem)
-(VPDPWSSDS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSDS512load {sym} [off] x y ptr mem)
-(VPDPWSSDSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSDSMasked128load {sym} [off] x y ptr mask mem)
-(VPDPWSSDSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSDSMasked256load {sym} [off] x y ptr mask mem)
-(VPDPWSSDSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPWSSDSMasked512load {sym} [off] x y ptr mask mem)
 (VPDPBUSD512 x y l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSD512load {sym} [off] x y ptr mem)
 (VPDPBUSDMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDMasked128load {sym} [off] x y ptr mask mem)
 (VPDPBUSDMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPDPBUSDMasked256load {sym} [off] x y ptr mask mem)
index 680c576bb1468fa9764e7e28aa93873778cd6538..2cdf80c1ba1995ea24c864371abb9ac881cfe774 100644 (file)
@@ -368,12 +368,6 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
                {name: "VPDPWSSDMasked128", argLength: 4, reg: w3kw, asm: "VPDPWSSD", commutative: false, typ: "Vec128", resultInArg0: true},
                {name: "VPDPWSSDMasked256", argLength: 4, reg: w3kw, asm: "VPDPWSSD", commutative: false, typ: "Vec256", resultInArg0: true},
                {name: "VPDPWSSDMasked512", argLength: 4, reg: w3kw, asm: "VPDPWSSD", commutative: false, typ: "Vec512", resultInArg0: true},
-               {name: "VPDPWSSDS128", argLength: 3, reg: v31, asm: "VPDPWSSDS", commutative: false, typ: "Vec128", resultInArg0: true},
-               {name: "VPDPWSSDS256", argLength: 3, reg: v31, asm: "VPDPWSSDS", commutative: false, typ: "Vec256", resultInArg0: true},
-               {name: "VPDPWSSDS512", argLength: 3, reg: w31, asm: "VPDPWSSDS", commutative: false, typ: "Vec512", resultInArg0: true},
-               {name: "VPDPWSSDSMasked128", argLength: 4, reg: w3kw, asm: "VPDPWSSDS", commutative: false, typ: "Vec128", resultInArg0: true},
-               {name: "VPDPWSSDSMasked256", argLength: 4, reg: w3kw, asm: "VPDPWSSDS", commutative: false, typ: "Vec256", resultInArg0: true},
-               {name: "VPDPWSSDSMasked512", argLength: 4, reg: w3kw, asm: "VPDPWSSDS", commutative: false, typ: "Vec512", resultInArg0: true},
                {name: "VPERMB256", argLength: 2, reg: w21, asm: "VPERMB", commutative: false, typ: "Vec256", resultInArg0: false},
                {name: "VPERMB512", argLength: 2, reg: w21, asm: "VPERMB", commutative: false, typ: "Vec512", resultInArg0: false},
                {name: "VPERMBMasked256", argLength: 3, reg: w2kw, asm: "VPERMB", commutative: false, typ: "Vec256", resultInArg0: false},
@@ -1346,10 +1340,6 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
                {name: "VPDPWSSDMasked128load", argLength: 5, reg: w3kwload, asm: "VPDPWSSD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
                {name: "VPDPWSSDMasked256load", argLength: 5, reg: w3kwload, asm: "VPDPWSSD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
                {name: "VPDPWSSDMasked512load", argLength: 5, reg: w3kwload, asm: "VPDPWSSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
-               {name: "VPDPWSSDS512load", argLength: 4, reg: w31load, asm: "VPDPWSSDS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
-               {name: "VPDPWSSDSMasked128load", argLength: 5, reg: w3kwload, asm: "VPDPWSSDS", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
-               {name: "VPDPWSSDSMasked256load", argLength: 5, reg: w3kwload, asm: "VPDPWSSDS", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
-               {name: "VPDPWSSDSMasked512load", argLength: 5, reg: w3kwload, asm: "VPDPWSSDS", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
                {name: "VPDPBUSD512load", argLength: 4, reg: w31load, asm: "VPDPBUSD", commutative: false, typ: "Vec512", aux: "SymOff", symEffect: "Read", resultInArg0: true},
                {name: "VPDPBUSDMasked128load", argLength: 5, reg: w3kwload, asm: "VPDPBUSD", commutative: false, typ: "Vec128", aux: "SymOff", symEffect: "Read", resultInArg0: true},
                {name: "VPDPBUSDMasked256load", argLength: 5, reg: w3kwload, asm: "VPDPBUSD", commutative: false, typ: "Vec256", aux: "SymOff", symEffect: "Read", resultInArg0: true},
index 2e9f3ff1c49fd305e28e5f0d85a41f2fe222f7bf..f5eb9075d710b0ff13e6dede5f4ba357f1cb4ba7 100644 (file)
@@ -25,15 +25,12 @@ func simdGenericOps() []opData {
                {name: "AbsInt64x2", argLength: 1, commutative: false},
                {name: "AbsInt64x4", argLength: 1, commutative: false},
                {name: "AbsInt64x8", argLength: 1, commutative: false},
-               {name: "AddDotProdPairsSaturatedInt32x4", argLength: 3, commutative: false},
-               {name: "AddDotProdPairsSaturatedInt32x8", argLength: 3, commutative: false},
-               {name: "AddDotProdPairsSaturatedInt32x16", argLength: 3, commutative: false},
-               {name: "AddDotProdQuadrupleInt32x4", argLength: 3, commutative: false},
-               {name: "AddDotProdQuadrupleInt32x8", argLength: 3, commutative: false},
-               {name: "AddDotProdQuadrupleInt32x16", argLength: 3, commutative: false},
-               {name: "AddDotProdQuadrupleSaturatedInt32x4", argLength: 3, commutative: false},
-               {name: "AddDotProdQuadrupleSaturatedInt32x8", argLength: 3, commutative: false},
-               {name: "AddDotProdQuadrupleSaturatedInt32x16", argLength: 3, commutative: false},
+               {name: "AddDotProductQuadrupleInt32x4", argLength: 3, commutative: false},
+               {name: "AddDotProductQuadrupleInt32x8", argLength: 3, commutative: false},
+               {name: "AddDotProductQuadrupleInt32x16", argLength: 3, commutative: false},
+               {name: "AddDotProductQuadrupleSaturatedInt32x4", argLength: 3, commutative: false},
+               {name: "AddDotProductQuadrupleSaturatedInt32x8", argLength: 3, commutative: false},
+               {name: "AddDotProductQuadrupleSaturatedInt32x16", argLength: 3, commutative: false},
                {name: "AddFloat32x4", argLength: 2, commutative: true},
                {name: "AddFloat32x8", argLength: 2, commutative: true},
                {name: "AddFloat32x16", argLength: 2, commutative: true},
@@ -344,12 +341,12 @@ func simdGenericOps() []opData {
                {name: "DivFloat64x2", argLength: 2, commutative: false},
                {name: "DivFloat64x4", argLength: 2, commutative: false},
                {name: "DivFloat64x8", argLength: 2, commutative: false},
-               {name: "DotProdPairsInt16x8", argLength: 2, commutative: false},
-               {name: "DotProdPairsInt16x16", argLength: 2, commutative: false},
-               {name: "DotProdPairsInt16x32", argLength: 2, commutative: false},
-               {name: "DotProdPairsSaturatedUint8x16", argLength: 2, commutative: false},
-               {name: "DotProdPairsSaturatedUint8x32", argLength: 2, commutative: false},
-               {name: "DotProdPairsSaturatedUint8x64", argLength: 2, commutative: false},
+               {name: "DotProductPairsInt16x8", argLength: 2, commutative: false},
+               {name: "DotProductPairsInt16x16", argLength: 2, commutative: false},
+               {name: "DotProductPairsInt16x32", argLength: 2, commutative: false},
+               {name: "DotProductPairsSaturatedUint8x16", argLength: 2, commutative: false},
+               {name: "DotProductPairsSaturatedUint8x32", argLength: 2, commutative: false},
+               {name: "DotProductPairsSaturatedUint8x64", argLength: 2, commutative: false},
                {name: "EqualFloat32x4", argLength: 2, commutative: true},
                {name: "EqualFloat32x8", argLength: 2, commutative: true},
                {name: "EqualFloat32x16", argLength: 2, commutative: true},
index 30831e828a811c1c88bf05ebbe009da6c3fde02d..6dd7082e100651e3096c7dc2cf5c04f8ea0a857f 100644 (file)
@@ -1608,12 +1608,6 @@ const (
        OpAMD64VPDPWSSDMasked128
        OpAMD64VPDPWSSDMasked256
        OpAMD64VPDPWSSDMasked512
-       OpAMD64VPDPWSSDS128
-       OpAMD64VPDPWSSDS256
-       OpAMD64VPDPWSSDS512
-       OpAMD64VPDPWSSDSMasked128
-       OpAMD64VPDPWSSDSMasked256
-       OpAMD64VPDPWSSDSMasked512
        OpAMD64VPERMB256
        OpAMD64VPERMB512
        OpAMD64VPERMBMasked256
@@ -2586,10 +2580,6 @@ const (
        OpAMD64VPDPWSSDMasked128load
        OpAMD64VPDPWSSDMasked256load
        OpAMD64VPDPWSSDMasked512load
-       OpAMD64VPDPWSSDS512load
-       OpAMD64VPDPWSSDSMasked128load
-       OpAMD64VPDPWSSDSMasked256load
-       OpAMD64VPDPWSSDSMasked512load
        OpAMD64VPDPBUSD512load
        OpAMD64VPDPBUSDMasked128load
        OpAMD64VPDPBUSDMasked256load
@@ -5416,15 +5406,12 @@ const (
        OpAbsInt64x2
        OpAbsInt64x4
        OpAbsInt64x8
-       OpAddDotProdPairsSaturatedInt32x4
-       OpAddDotProdPairsSaturatedInt32x8
-       OpAddDotProdPairsSaturatedInt32x16
-       OpAddDotProdQuadrupleInt32x4
-       OpAddDotProdQuadrupleInt32x8
-       OpAddDotProdQuadrupleInt32x16
-       OpAddDotProdQuadrupleSaturatedInt32x4
-       OpAddDotProdQuadrupleSaturatedInt32x8
-       OpAddDotProdQuadrupleSaturatedInt32x16
+       OpAddDotProductQuadrupleInt32x4
+       OpAddDotProductQuadrupleInt32x8
+       OpAddDotProductQuadrupleInt32x16
+       OpAddDotProductQuadrupleSaturatedInt32x4
+       OpAddDotProductQuadrupleSaturatedInt32x8
+       OpAddDotProductQuadrupleSaturatedInt32x16
        OpAddFloat32x4
        OpAddFloat32x8
        OpAddFloat32x16
@@ -5735,12 +5722,12 @@ const (
        OpDivFloat64x2
        OpDivFloat64x4
        OpDivFloat64x8
-       OpDotProdPairsInt16x8
-       OpDotProdPairsInt16x16
-       OpDotProdPairsInt16x32
-       OpDotProdPairsSaturatedUint8x16
-       OpDotProdPairsSaturatedUint8x32
-       OpDotProdPairsSaturatedUint8x64
+       OpDotProductPairsInt16x8
+       OpDotProductPairsInt16x16
+       OpDotProductPairsInt16x32
+       OpDotProductPairsSaturatedUint8x16
+       OpDotProductPairsSaturatedUint8x32
+       OpDotProductPairsSaturatedUint8x64
        OpEqualFloat32x4
        OpEqualFloat32x8
        OpEqualFloat32x16
@@ -25338,105 +25325,6 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
-       {
-               name:         "VPDPWSSDS128",
-               argLen:       3,
-               resultInArg0: true,
-               asm:          x86.AVPDPWSSDS,
-               reg: regInfo{
-                       inputs: []inputInfo{
-                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-                               {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
-                               {2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
-                       },
-                       outputs: []outputInfo{
-                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-                       },
-               },
-       },
-       {
-               name:         "VPDPWSSDS256",
-               argLen:       3,
-               resultInArg0: true,
-               asm:          x86.AVPDPWSSDS,
-               reg: regInfo{
-                       inputs: []inputInfo{
-                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-                               {1, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
-                               {2, 4294901760}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15
-                       },
-                       outputs: []outputInfo{
-                               {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
-                       },
-               },
-       },
-       {
-               name:         "VPDPWSSDS512",
-               argLen:       3,
-               resultInArg0: true,
-               asm:          x86.AVPDPWSSDS,
-               reg: regInfo{
-                       inputs: []inputInfo{
-                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {1, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {2, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-                       outputs: []outputInfo{
-                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-               },
-       },
-       {
-               name:         "VPDPWSSDSMasked128",
-               argLen:       4,
-               resultInArg0: true,
-               asm:          x86.AVPDPWSSDS,
-               reg: regInfo{
-                       inputs: []inputInfo{
-                               {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {2, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-                       outputs: []outputInfo{
-                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-               },
-       },
-       {
-               name:         "VPDPWSSDSMasked256",
-               argLen:       4,
-               resultInArg0: true,
-               asm:          x86.AVPDPWSSDS,
-               reg: regInfo{
-                       inputs: []inputInfo{
-                               {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {2, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-                       outputs: []outputInfo{
-                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-               },
-       },
-       {
-               name:         "VPDPWSSDSMasked512",
-               argLen:       4,
-               resultInArg0: true,
-               asm:          x86.AVPDPWSSDS,
-               reg: regInfo{
-                       inputs: []inputInfo{
-                               {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {2, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-                       outputs: []outputInfo{
-                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-               },
-       },
        {
                name:   "VPERMB256",
                argLen: 2,
@@ -39773,81 +39661,6 @@ var opcodeTable = [...]opInfo{
                        },
                },
        },
-       {
-               name:         "VPDPWSSDS512load",
-               auxType:      auxSymOff,
-               argLen:       4,
-               resultInArg0: true,
-               symEffect:    SymRead,
-               asm:          x86.AVPDPWSSDS,
-               reg: regInfo{
-                       inputs: []inputInfo{
-                               {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
-                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-                       outputs: []outputInfo{
-                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-               },
-       },
-       {
-               name:         "VPDPWSSDSMasked128load",
-               auxType:      auxSymOff,
-               argLen:       5,
-               resultInArg0: true,
-               symEffect:    SymRead,
-               asm:          x86.AVPDPWSSDS,
-               reg: regInfo{
-                       inputs: []inputInfo{
-                               {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-                               {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
-                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-                       outputs: []outputInfo{
-                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-               },
-       },
-       {
-               name:         "VPDPWSSDSMasked256load",
-               auxType:      auxSymOff,
-               argLen:       5,
-               resultInArg0: true,
-               symEffect:    SymRead,
-               asm:          x86.AVPDPWSSDS,
-               reg: regInfo{
-                       inputs: []inputInfo{
-                               {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-                               {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
-                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-                       outputs: []outputInfo{
-                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-               },
-       },
-       {
-               name:         "VPDPWSSDSMasked512load",
-               auxType:      auxSymOff,
-               argLen:       5,
-               resultInArg0: true,
-               symEffect:    SymRead,
-               asm:          x86.AVPDPWSSDS,
-               reg: regInfo{
-                       inputs: []inputInfo{
-                               {3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
-                               {2, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
-                               {0, 281472829161472},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                               {1, 281474976645120},   // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-                       outputs: []outputInfo{
-                               {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
-                       },
-               },
-       },
        {
                name:         "VPDPBUSD512load",
                auxType:      auxSymOff,
@@ -76268,47 +76081,32 @@ var opcodeTable = [...]opInfo{
                generic: true,
        },
        {
-               name:    "AddDotProdPairsSaturatedInt32x4",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "AddDotProdPairsSaturatedInt32x8",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "AddDotProdPairsSaturatedInt32x16",
-               argLen:  3,
-               generic: true,
-       },
-       {
-               name:    "AddDotProdQuadrupleInt32x4",
+               name:    "AddDotProductQuadrupleInt32x4",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "AddDotProdQuadrupleInt32x8",
+               name:    "AddDotProductQuadrupleInt32x8",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "AddDotProdQuadrupleInt32x16",
+               name:    "AddDotProductQuadrupleInt32x16",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "AddDotProdQuadrupleSaturatedInt32x4",
+               name:    "AddDotProductQuadrupleSaturatedInt32x4",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "AddDotProdQuadrupleSaturatedInt32x8",
+               name:    "AddDotProductQuadrupleSaturatedInt32x8",
                argLen:  3,
                generic: true,
        },
        {
-               name:    "AddDotProdQuadrupleSaturatedInt32x16",
+               name:    "AddDotProductQuadrupleSaturatedInt32x16",
                argLen:  3,
                generic: true,
        },
@@ -77935,32 +77733,32 @@ var opcodeTable = [...]opInfo{
                generic: true,
        },
        {
-               name:    "DotProdPairsInt16x8",
+               name:    "DotProductPairsInt16x8",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "DotProdPairsInt16x16",
+               name:    "DotProductPairsInt16x16",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "DotProdPairsInt16x32",
+               name:    "DotProductPairsInt16x32",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "DotProdPairsSaturatedUint8x16",
+               name:    "DotProductPairsSaturatedUint8x16",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "DotProdPairsSaturatedUint8x32",
+               name:    "DotProductPairsSaturatedUint8x32",
                argLen:  2,
                generic: true,
        },
        {
-               name:    "DotProdPairsSaturatedUint8x64",
+               name:    "DotProductPairsSaturatedUint8x64",
                argLen:  2,
                generic: true,
        },
index 908fd71b783a1a3979b9faab7d03b6e94826cd1e..42814029144a6756702a88414bb050bbec26be66 100644 (file)
@@ -949,14 +949,6 @@ func rewriteValueAMD64(v *Value) bool {
                return rewriteValueAMD64_OpAMD64VPDPWSSDMasked256(v)
        case OpAMD64VPDPWSSDMasked512:
                return rewriteValueAMD64_OpAMD64VPDPWSSDMasked512(v)
-       case OpAMD64VPDPWSSDS512:
-               return rewriteValueAMD64_OpAMD64VPDPWSSDS512(v)
-       case OpAMD64VPDPWSSDSMasked128:
-               return rewriteValueAMD64_OpAMD64VPDPWSSDSMasked128(v)
-       case OpAMD64VPDPWSSDSMasked256:
-               return rewriteValueAMD64_OpAMD64VPDPWSSDSMasked256(v)
-       case OpAMD64VPDPWSSDSMasked512:
-               return rewriteValueAMD64_OpAMD64VPDPWSSDSMasked512(v)
        case OpAMD64VPERMD512:
                return rewriteValueAMD64_OpAMD64VPERMD512(v)
        case OpAMD64VPERMDMasked256:
@@ -1871,31 +1863,22 @@ func rewriteValueAMD64(v *Value) bool {
        case OpAdd8:
                v.Op = OpAMD64ADDL
                return true
-       case OpAddDotProdPairsSaturatedInt32x16:
-               v.Op = OpAMD64VPDPWSSDS512
-               return true
-       case OpAddDotProdPairsSaturatedInt32x4:
-               v.Op = OpAMD64VPDPWSSDS128
-               return true
-       case OpAddDotProdPairsSaturatedInt32x8:
-               v.Op = OpAMD64VPDPWSSDS256
-               return true
-       case OpAddDotProdQuadrupleInt32x16:
+       case OpAddDotProductQuadrupleInt32x16:
                v.Op = OpAMD64VPDPBUSD512
                return true
-       case OpAddDotProdQuadrupleInt32x4:
+       case OpAddDotProductQuadrupleInt32x4:
                v.Op = OpAMD64VPDPBUSD128
                return true
-       case OpAddDotProdQuadrupleInt32x8:
+       case OpAddDotProductQuadrupleInt32x8:
                v.Op = OpAMD64VPDPBUSD256
                return true
-       case OpAddDotProdQuadrupleSaturatedInt32x16:
+       case OpAddDotProductQuadrupleSaturatedInt32x16:
                v.Op = OpAMD64VPDPBUSDS512
                return true
-       case OpAddDotProdQuadrupleSaturatedInt32x4:
+       case OpAddDotProductQuadrupleSaturatedInt32x4:
                v.Op = OpAMD64VPDPBUSDS128
                return true
-       case OpAddDotProdQuadrupleSaturatedInt32x8:
+       case OpAddDotProductQuadrupleSaturatedInt32x8:
                v.Op = OpAMD64VPDPBUSDS256
                return true
        case OpAddFloat32x16:
@@ -3064,22 +3047,22 @@ func rewriteValueAMD64(v *Value) bool {
        case OpDivFloat64x8:
                v.Op = OpAMD64VDIVPD512
                return true
-       case OpDotProdPairsInt16x16:
+       case OpDotProductPairsInt16x16:
                v.Op = OpAMD64VPMADDWD256
                return true
-       case OpDotProdPairsInt16x32:
+       case OpDotProductPairsInt16x32:
                v.Op = OpAMD64VPMADDWD512
                return true
-       case OpDotProdPairsInt16x8:
+       case OpDotProductPairsInt16x8:
                v.Op = OpAMD64VPMADDWD128
                return true
-       case OpDotProdPairsSaturatedUint8x16:
+       case OpDotProductPairsSaturatedUint8x16:
                v.Op = OpAMD64VPMADDUBSW128
                return true
-       case OpDotProdPairsSaturatedUint8x32:
+       case OpDotProductPairsSaturatedUint8x32:
                v.Op = OpAMD64VPMADDUBSW256
                return true
-       case OpDotProdPairsSaturatedUint8x64:
+       case OpDotProductPairsSaturatedUint8x64:
                v.Op = OpAMD64VPMADDUBSW512
                return true
        case OpEq16:
@@ -31631,20 +31614,6 @@ func rewriteValueAMD64_OpAMD64VMOVDQU32Masked512(v *Value) bool {
                v.AddArg4(x, y, z, mask)
                return true
        }
-       // match: (VMOVDQU32Masked512 (VPDPWSSDS512 x y z) mask)
-       // result: (VPDPWSSDSMasked512 x y z mask)
-       for {
-               if v_0.Op != OpAMD64VPDPWSSDS512 {
-                       break
-               }
-               z := v_0.Args[2]
-               x := v_0.Args[0]
-               y := v_0.Args[1]
-               mask := v_1
-               v.reset(OpAMD64VPDPWSSDSMasked512)
-               v.AddArg4(x, y, z, mask)
-               return true
-       }
        // match: (VMOVDQU32Masked512 (VPDPBUSD512 x y z) mask)
        // result: (VPDPBUSDMasked512 x y z mask)
        for {
@@ -36686,128 +36655,6 @@ func rewriteValueAMD64_OpAMD64VPDPWSSDMasked512(v *Value) bool {
        }
        return false
 }
-func rewriteValueAMD64_OpAMD64VPDPWSSDS512(v *Value) bool {
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPDPWSSDS512 x y l:(VMOVDQUload512 {sym} [off] ptr mem))
-       // cond: canMergeLoad(v, l) && clobber(l)
-       // result: (VPDPWSSDS512load {sym} [off] x y ptr mem)
-       for {
-               x := v_0
-               y := v_1
-               l := v_2
-               if l.Op != OpAMD64VMOVDQUload512 {
-                       break
-               }
-               off := auxIntToInt32(l.AuxInt)
-               sym := auxToSym(l.Aux)
-               mem := l.Args[1]
-               ptr := l.Args[0]
-               if !(canMergeLoad(v, l) && clobber(l)) {
-                       break
-               }
-               v.reset(OpAMD64VPDPWSSDS512load)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg4(x, y, ptr, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPDPWSSDSMasked128(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPDPWSSDSMasked128 x y l:(VMOVDQUload128 {sym} [off] ptr mem) mask)
-       // cond: canMergeLoad(v, l) && clobber(l)
-       // result: (VPDPWSSDSMasked128load {sym} [off] x y ptr mask mem)
-       for {
-               x := v_0
-               y := v_1
-               l := v_2
-               if l.Op != OpAMD64VMOVDQUload128 {
-                       break
-               }
-               off := auxIntToInt32(l.AuxInt)
-               sym := auxToSym(l.Aux)
-               mem := l.Args[1]
-               ptr := l.Args[0]
-               mask := v_3
-               if !(canMergeLoad(v, l) && clobber(l)) {
-                       break
-               }
-               v.reset(OpAMD64VPDPWSSDSMasked128load)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg5(x, y, ptr, mask, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPDPWSSDSMasked256(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPDPWSSDSMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask)
-       // cond: canMergeLoad(v, l) && clobber(l)
-       // result: (VPDPWSSDSMasked256load {sym} [off] x y ptr mask mem)
-       for {
-               x := v_0
-               y := v_1
-               l := v_2
-               if l.Op != OpAMD64VMOVDQUload256 {
-                       break
-               }
-               off := auxIntToInt32(l.AuxInt)
-               sym := auxToSym(l.Aux)
-               mem := l.Args[1]
-               ptr := l.Args[0]
-               mask := v_3
-               if !(canMergeLoad(v, l) && clobber(l)) {
-                       break
-               }
-               v.reset(OpAMD64VPDPWSSDSMasked256load)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg5(x, y, ptr, mask, mem)
-               return true
-       }
-       return false
-}
-func rewriteValueAMD64_OpAMD64VPDPWSSDSMasked512(v *Value) bool {
-       v_3 := v.Args[3]
-       v_2 := v.Args[2]
-       v_1 := v.Args[1]
-       v_0 := v.Args[0]
-       // match: (VPDPWSSDSMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask)
-       // cond: canMergeLoad(v, l) && clobber(l)
-       // result: (VPDPWSSDSMasked512load {sym} [off] x y ptr mask mem)
-       for {
-               x := v_0
-               y := v_1
-               l := v_2
-               if l.Op != OpAMD64VMOVDQUload512 {
-                       break
-               }
-               off := auxIntToInt32(l.AuxInt)
-               sym := auxToSym(l.Aux)
-               mem := l.Args[1]
-               ptr := l.Args[0]
-               mask := v_3
-               if !(canMergeLoad(v, l) && clobber(l)) {
-                       break
-               }
-               v.reset(OpAMD64VPDPWSSDSMasked512load)
-               v.AuxInt = int32ToAuxInt(off)
-               v.Aux = symToAux(sym)
-               v.AddArg5(x, y, ptr, mask, mem)
-               return true
-       }
-       return false
-}
 func rewriteValueAMD64_OpAMD64VPERMD512(v *Value) bool {
        v_1 := v.Args[1]
        v_0 := v.Args[0]
index 47be7d67a41edf6b518682420c8493438ab15ffc..d4fb524b2471e8ac4ead85bd0affb78188030167 100644 (file)
@@ -64,15 +64,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Uint64x2.Add", opLen2(ssa.OpAddUint64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Uint64x4.Add", opLen2(ssa.OpAddUint64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Uint64x8.Add", opLen2(ssa.OpAddUint64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int32x4.AddDotProdPairsSaturated", opLen3(ssa.OpAddDotProdPairsSaturatedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int32x8.AddDotProdPairsSaturated", opLen3(ssa.OpAddDotProdPairsSaturatedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int32x16.AddDotProdPairsSaturated", opLen3(ssa.OpAddDotProdPairsSaturatedInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.AddDotProdQuadruple", opLen3_31(ssa.OpAddDotProdQuadrupleInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.AddDotProdQuadruple", opLen3_31(ssa.OpAddDotProdQuadrupleInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.AddDotProdQuadruple", opLen3_31(ssa.OpAddDotProdQuadrupleInt32x16, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int8x16.AddDotProdQuadrupleSaturated", opLen3_31(ssa.OpAddDotProdQuadrupleSaturatedInt32x4, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int8x32.AddDotProdQuadrupleSaturated", opLen3_31(ssa.OpAddDotProdQuadrupleSaturatedInt32x8, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int8x64.AddDotProdQuadrupleSaturated", opLen3_31(ssa.OpAddDotProdQuadrupleSaturatedInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x16.AddDotProductQuadruple", opLen3_31(ssa.OpAddDotProductQuadrupleInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.AddDotProductQuadruple", opLen3_31(ssa.OpAddDotProductQuadrupleInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x64.AddDotProductQuadruple", opLen3_31(ssa.OpAddDotProductQuadrupleInt32x16, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int8x16.AddDotProductQuadrupleSaturated", opLen3_31(ssa.OpAddDotProductQuadrupleSaturatedInt32x4, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int8x32.AddDotProductQuadrupleSaturated", opLen3_31(ssa.OpAddDotProductQuadrupleSaturatedInt32x8, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int8x64.AddDotProductQuadrupleSaturated", opLen3_31(ssa.OpAddDotProductQuadrupleSaturatedInt32x16, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Float32x4.AddPairs", opLen2(ssa.OpAddPairsFloat32x4, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float32x8.AddPairs", opLen2(ssa.OpAddPairsFloat32x8, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x2.AddPairs", opLen2(ssa.OpAddPairsFloat64x2, types.TypeVec128), sys.AMD64)
@@ -365,12 +362,12 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
        addF(simdPackage, "Float64x2.Div", opLen2(ssa.OpDivFloat64x2, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Float64x4.Div", opLen2(ssa.OpDivFloat64x4, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Float64x8.Div", opLen2(ssa.OpDivFloat64x8, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Int16x8.DotProdPairs", opLen2(ssa.OpDotProdPairsInt16x8, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Int16x16.DotProdPairs", opLen2(ssa.OpDotProdPairsInt16x16, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Int16x32.DotProdPairs", opLen2(ssa.OpDotProdPairsInt16x32, types.TypeVec512), sys.AMD64)
-       addF(simdPackage, "Uint8x16.DotProdPairsSaturated", opLen2(ssa.OpDotProdPairsSaturatedUint8x16, types.TypeVec128), sys.AMD64)
-       addF(simdPackage, "Uint8x32.DotProdPairsSaturated", opLen2(ssa.OpDotProdPairsSaturatedUint8x32, types.TypeVec256), sys.AMD64)
-       addF(simdPackage, "Uint8x64.DotProdPairsSaturated", opLen2(ssa.OpDotProdPairsSaturatedUint8x64, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Int16x8.DotProductPairs", opLen2(ssa.OpDotProductPairsInt16x8, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Int16x16.DotProductPairs", opLen2(ssa.OpDotProductPairsInt16x16, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Int16x32.DotProductPairs", opLen2(ssa.OpDotProductPairsInt16x32, types.TypeVec512), sys.AMD64)
+       addF(simdPackage, "Uint8x16.DotProductPairsSaturated", opLen2(ssa.OpDotProductPairsSaturatedUint8x16, types.TypeVec128), sys.AMD64)
+       addF(simdPackage, "Uint8x32.DotProductPairsSaturated", opLen2(ssa.OpDotProductPairsSaturatedUint8x32, types.TypeVec256), sys.AMD64)
+       addF(simdPackage, "Uint8x64.DotProductPairsSaturated", opLen2(ssa.OpDotProductPairsSaturatedUint8x64, types.TypeVec512), sys.AMD64)
        addF(simdPackage, "Int8x16.Equal", opLen2(ssa.OpEqualInt8x16, types.TypeVec128), sys.AMD64)
        addF(simdPackage, "Int8x32.Equal", opLen2(ssa.OpEqualInt8x32, types.TypeVec256), sys.AMD64)
        addF(simdPackage, "Int8x64.Equal", opLen2(ssa.OpEqualInt8x64, types.TypeVec512), sys.AMD64)
index 772a7b3cf67d92f763a117ada8bd50668ea26c36..0317b42c6afdc00211d6a34053ce25902d7ff46a 100644 (file)
@@ -1,38 +1,34 @@
 !sum
-- go: DotProdPairs
+- go: DotProductPairs
   commutative: false
   documentation: !string |-
     // NAME multiplies the elements and add the pairs together,
     // yielding a vector of half as many elements with twice the input element size.
 # TODO: maybe simplify this name within the receiver-type + method-naming scheme we use.
-- go: DotProdPairsSaturated
+- go: DotProductPairsSaturated
   commutative: false
   documentation: !string |-
     // NAME multiplies the elements and add the pairs together with saturation,
     // yielding a vector of half as many elements with twice the input element size.
-# QuadDotProd, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now.
-# - go: DotProdBroadcast
+# QuadDotProduct, i.e. VPDPBUSD(S) are operations with src/dst on the same register, we are not supporting this as of now.
+# - go: DotProductBroadcast
 #   commutative: true
 # #   documentation: !string |-
 #     // NAME multiplies all elements and broadcasts the sum.
-- go: AddDotProdQuadruple
+- go: AddDotProductQuadruple
   commutative: false
   documentation: !string |-
     // NAME performs dot products on groups of 4 elements of x and y and then adds z.
-- go: AddDotProdQuadrupleSaturated
+- go: AddDotProductQuadrupleSaturated
   commutative: false
   documentation: !string |-
     // NAME multiplies performs dot products on groups of 4 elements of x and y and then adds z.
-- go: AddDotProdPairs
+- go: AddDotProductPairs
   commutative: false
   noTypes: "true"
   noGenericOps: "true"
   documentation: !string |-
     // NAME performs dot products on pairs of elements of y and z and then adds x.
-- go: AddDotProdPairsSaturated
-  commutative: false
-  documentation: !string |-
-    // NAME performs dot products on pairs of elements of y and z and then adds x.
 - go: MulAdd
   commutative: false
   documentation: !string |-
index 5c2009dcf81ffa94c34f39e1853b2380bfbebbdc..162c47ea0ef3f04df0fd68d2ba030a09759f07a0 100644 (file)
@@ -1,5 +1,5 @@
 !sum
-- go: DotProdPairs
+- go: DotProductPairs
   asm: VPMADDWD
   in:
   - &int
@@ -10,7 +10,7 @@
   - &int2 # The elemBits are different
     go: $t2
     base: int
-- go: DotProdPairsSaturated
+- go: DotProductPairsSaturated
   asm: VPMADDUBSW
   in:
   - &uint
@@ -23,7 +23,7 @@
     overwriteElementBits: 8
   out:
   - *int2
-# - go: DotProdBroadcast
+# - go: DotProductBroadcast
 #   asm: VDPP[SD]
 #   in:
 #   - &dpb_src
@@ -33,7 +33,7 @@
 #     const: 127
 #   out:
 #   - *dpb_src
-- go: AddDotProdQuadruple
+- go: AddDotProductQuadruple
   asm: "VPDPBUSD"
   operandOrder: "31" # switch operand 3 and 1
   in:
@@ -51,7 +51,7 @@
     overwriteElementBits: 8
   out:
   - *qdpa_acc
-- go: AddDotProdQuadrupleSaturated
+- go: AddDotProductQuadrupleSaturated
   asm: "VPDPBUSDS"
   operandOrder: "31" # switch operand 3 and 1
   in:
@@ -60,7 +60,7 @@
   - *qdpa_src2
   out:
   - *qdpa_acc
-- go: AddDotProdPairs
+- go: AddDotProductPairs
   asm: "VPDPWSSD"
   in:
   - &pdpa_acc
     overwriteElementBits: 16
   out:
   - *pdpa_acc
-- go: AddDotProdPairsSaturated
-  asm: "VPDPWSSDS"
-  in:
-  - *pdpa_acc
-  - *pdpa_src1
-  - *pdpa_src2
-  out:
-  - *pdpa_acc
 - go: MulAdd
   asm: "VFMADD213PS|VFMADD213PD"
   in:
index 8956c2e0772c0dc69c63b01237ece5f401c94f7d..2331622361725ae971d5075537b09c7df104419d 100644 (file)
@@ -314,56 +314,39 @@ func (x Uint64x4) Add(y Uint64x4) Uint64x4
 // Asm: VPADDQ, CPU Feature: AVX512
 func (x Uint64x8) Add(y Uint64x8) Uint64x8
 
-/* AddDotProdPairsSaturated */
+/* AddDotProductQuadruple */
 
-// AddDotProdPairsSaturated performs dot products on pairs of elements of y and z and then adds x.
-//
-// Asm: VPDPWSSDS, CPU Feature: AVXVNNI
-func (x Int32x4) AddDotProdPairsSaturated(y Int16x8, z Int16x8) Int32x4
-
-// AddDotProdPairsSaturated performs dot products on pairs of elements of y and z and then adds x.
-//
-// Asm: VPDPWSSDS, CPU Feature: AVXVNNI
-func (x Int32x8) AddDotProdPairsSaturated(y Int16x16, z Int16x16) Int32x8
-
-// AddDotProdPairsSaturated performs dot products on pairs of elements of y and z and then adds x.
-//
-// Asm: VPDPWSSDS, CPU Feature: AVX512VNNI
-func (x Int32x16) AddDotProdPairsSaturated(y Int16x32, z Int16x32) Int32x16
-
-/* AddDotProdQuadruple */
-
-// AddDotProdQuadruple performs dot products on groups of 4 elements of x and y and then adds z.
+// AddDotProductQuadruple performs dot products on groups of 4 elements of x and y and then adds z.
 //
 // Asm: VPDPBUSD, CPU Feature: AVXVNNI
-func (x Int8x16) AddDotProdQuadruple(y Uint8x16, z Int32x4) Int32x4
+func (x Int8x16) AddDotProductQuadruple(y Uint8x16, z Int32x4) Int32x4
 
-// AddDotProdQuadruple performs dot products on groups of 4 elements of x and y and then adds z.
+// AddDotProductQuadruple performs dot products on groups of 4 elements of x and y and then adds z.
 //
 // Asm: VPDPBUSD, CPU Feature: AVXVNNI
-func (x Int8x32) AddDotProdQuadruple(y Uint8x32, z Int32x8) Int32x8
+func (x Int8x32) AddDotProductQuadruple(y Uint8x32, z Int32x8) Int32x8
 
-// AddDotProdQuadruple performs dot products on groups of 4 elements of x and y and then adds z.
+// AddDotProductQuadruple performs dot products on groups of 4 elements of x and y and then adds z.
 //
 // Asm: VPDPBUSD, CPU Feature: AVX512VNNI
-func (x Int8x64) AddDotProdQuadruple(y Uint8x64, z Int32x16) Int32x16
+func (x Int8x64) AddDotProductQuadruple(y Uint8x64, z Int32x16) Int32x16
 
-/* AddDotProdQuadrupleSaturated */
+/* AddDotProductQuadrupleSaturated */
 
-// AddDotProdQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y and then adds z.
+// AddDotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y and then adds z.
 //
 // Asm: VPDPBUSDS, CPU Feature: AVXVNNI
-func (x Int8x16) AddDotProdQuadrupleSaturated(y Uint8x16, z Int32x4) Int32x4
+func (x Int8x16) AddDotProductQuadrupleSaturated(y Uint8x16, z Int32x4) Int32x4
 
-// AddDotProdQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y and then adds z.
+// AddDotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y and then adds z.
 //
 // Asm: VPDPBUSDS, CPU Feature: AVXVNNI
-func (x Int8x32) AddDotProdQuadrupleSaturated(y Uint8x32, z Int32x8) Int32x8
+func (x Int8x32) AddDotProductQuadrupleSaturated(y Uint8x32, z Int32x8) Int32x8
 
-// AddDotProdQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y and then adds z.
+// AddDotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y and then adds z.
 //
 // Asm: VPDPBUSDS, CPU Feature: AVX512VNNI
-func (x Int8x64) AddDotProdQuadrupleSaturated(y Uint8x64, z Int32x16) Int32x16
+func (x Int8x64) AddDotProductQuadrupleSaturated(y Uint8x64, z Int32x16) Int32x16
 
 /* AddPairs */
 
@@ -2143,45 +2126,45 @@ func (x Float64x4) Div(y Float64x4) Float64x4
 // Asm: VDIVPD, CPU Feature: AVX512
 func (x Float64x8) Div(y Float64x8) Float64x8
 
-/* DotProdPairs */
+/* DotProductPairs */
 
-// DotProdPairs multiplies the elements and add the pairs together,
+// DotProductPairs multiplies the elements and add the pairs together,
 // yielding a vector of half as many elements with twice the input element size.
 //
 // Asm: VPMADDWD, CPU Feature: AVX
-func (x Int16x8) DotProdPairs(y Int16x8) Int32x4
+func (x Int16x8) DotProductPairs(y Int16x8) Int32x4
 
-// DotProdPairs multiplies the elements and add the pairs together,
+// DotProductPairs multiplies the elements and add the pairs together,
 // yielding a vector of half as many elements with twice the input element size.
 //
 // Asm: VPMADDWD, CPU Feature: AVX2
-func (x Int16x16) DotProdPairs(y Int16x16) Int32x8
+func (x Int16x16) DotProductPairs(y Int16x16) Int32x8
 
-// DotProdPairs multiplies the elements and add the pairs together,
+// DotProductPairs multiplies the elements and add the pairs together,
 // yielding a vector of half as many elements with twice the input element size.
 //
 // Asm: VPMADDWD, CPU Feature: AVX512
-func (x Int16x32) DotProdPairs(y Int16x32) Int32x16
+func (x Int16x32) DotProductPairs(y Int16x32) Int32x16
 
-/* DotProdPairsSaturated */
+/* DotProductPairsSaturated */
 
-// DotProdPairsSaturated multiplies the elements and add the pairs together with saturation,
+// DotProductPairsSaturated multiplies the elements and add the pairs together with saturation,
 // yielding a vector of half as many elements with twice the input element size.
 //
 // Asm: VPMADDUBSW, CPU Feature: AVX
-func (x Uint8x16) DotProdPairsSaturated(y Int8x16) Int16x8
+func (x Uint8x16) DotProductPairsSaturated(y Int8x16) Int16x8
 
-// DotProdPairsSaturated multiplies the elements and add the pairs together with saturation,
+// DotProductPairsSaturated multiplies the elements and add the pairs together with saturation,
 // yielding a vector of half as many elements with twice the input element size.
 //
 // Asm: VPMADDUBSW, CPU Feature: AVX2
-func (x Uint8x32) DotProdPairsSaturated(y Int8x32) Int16x16
+func (x Uint8x32) DotProductPairsSaturated(y Int8x32) Int16x16
 
-// DotProdPairsSaturated multiplies the elements and add the pairs together with saturation,
+// DotProductPairsSaturated multiplies the elements and add the pairs together with saturation,
 // yielding a vector of half as many elements with twice the input element size.
 //
 // Asm: VPMADDUBSW, CPU Feature: AVX512
-func (x Uint8x64) DotProdPairsSaturated(y Int8x64) Int16x32
+func (x Uint8x64) DotProductPairsSaturated(y Int8x64) Int16x32
 
 /* Equal */