]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/internal/obj/x86: add some missing AMD64 instructions
authorIlya Tocar <ilya.tocar@intel.com>
Wed, 19 Oct 2016 17:35:04 +0000 (20:35 +0300)
committerIlya Tocar <ilya.tocar@intel.com>
Thu, 20 Oct 2016 14:03:14 +0000 (14:03 +0000)
Add VBROADCASTSD, BROADCASTSS, MOVDDUP, MOVSHDUP, MOVSLDUP,
VMOVDDUP, VMOVSHDUP, VMOVSLDUP.

Fixes #16007

Change-Id: I9614e58eed6c1b6f299d9b4f0b1a7750aa7c1725
Reviewed-on: https://go-review.googlesource.com/31491
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
src/cmd/asm/internal/asm/testdata/amd64enc.s
src/cmd/internal/obj/x86/a.out.go
src/cmd/internal/obj/x86/anames.go
src/cmd/internal/obj/x86/asm6.go

index 22dfe127b3db0b390006342efe5c046923e511ee..b27faa5a360bc8e58b72652af53801dddffb7e80 100644 (file)
@@ -2551,14 +2551,14 @@ TEXT asmtest(SB),7,$0
        MOVQ (R11), X11                         // 66450f6e1b or 664d0f6e1b or f3450f7e1b
        MOVQ DX, X11                            // 66440f6eda or 664c0f6eda
        MOVQ R11, X11                           // 66450f6edb or 664d0f6edb
-       //TODO: MOVDDUP (BX), X2                // f20f1213
-       //TODO: MOVDDUP (R11), X2               // f2410f1213
-       //TODO: MOVDDUP X2, X2                  // f20f12d2
-       //TODO: MOVDDUP X11, X2                 // f2410f12d3
-       //TODO: MOVDDUP (BX), X11               // f2440f121b
-       //TODO: MOVDDUP (R11), X11              // f2450f121b
-       //TODO: MOVDDUP X2, X11                 // f2440f12da
-       //TODO: MOVDDUP X11, X11                // f2450f12db
+       MOVDDUP (BX), X2                        // f20f1213
+       MOVDDUP (R11), X2                       // f2410f1213
+       MOVDDUP X2, X2                          // f20f12d2
+       MOVDDUP X11, X2                         // f2410f12d3
+       MOVDDUP (BX), X11                       // f2440f121b
+       MOVDDUP (R11), X11                      // f2450f121b
+       MOVDDUP X2, X11                         // f2440f12da
+       MOVDDUP X11, X11                        // f2450f12db
        MOVQ X2, M2                             // f20fd6d2
        MOVQ X11, M2                            // f2410fd6d3
        MOVQ X2, M3                             // f20fd6da
@@ -2697,22 +2697,22 @@ TEXT asmtest(SB),7,$0
        //TODO: MOVSD X11, (BX)                 // f2440f111b
        //TODO: MOVSD X2, (R11)                 // f2410f1113
        //TODO: MOVSD X11, (R11)                // f2450f111b
-       //TODO: MOVSHDUP (BX), X2               // f30f1613
-       //TODO: MOVSHDUP (R11), X2              // f3410f1613
-       //TODO: MOVSHDUP X2, X2                 // f30f16d2
-       //TODO: MOVSHDUP X11, X2                // f3410f16d3
-       //TODO: MOVSHDUP (BX), X11              // f3440f161b
-       //TODO: MOVSHDUP (R11), X11             // f3450f161b
-       //TODO: MOVSHDUP X2, X11                // f3440f16da
-       //TODO: MOVSHDUP X11, X11               // f3450f16db
-       //TODO: MOVSLDUP (BX), X2               // f30f1213
-       //TODO: MOVSLDUP (R11), X2              // f3410f1213
-       //TODO: MOVSLDUP X2, X2                 // f30f12d2
-       //TODO: MOVSLDUP X11, X2                // f3410f12d3
-       //TODO: MOVSLDUP (BX), X11              // f3440f121b
-       //TODO: MOVSLDUP (R11), X11             // f3450f121b
-       //TODO: MOVSLDUP X2, X11                // f3440f12da
-       //TODO: MOVSLDUP X11, X11               // f3450f12db
+       MOVSHDUP (BX), X2                       // f30f1613
+       MOVSHDUP (R11), X2                      // f3410f1613
+       MOVSHDUP X2, X2                         // f30f16d2
+       MOVSHDUP X11, X2                        // f3410f16d3
+       MOVSHDUP (BX), X11                      // f3440f161b
+       MOVSHDUP (R11), X11                     // f3450f161b
+       MOVSHDUP X2, X11                        // f3440f16da
+       MOVSHDUP X11, X11                       // f3450f16db
+       MOVSLDUP (BX), X2                       // f30f1213
+       MOVSLDUP (R11), X2                      // f3410f1213
+       MOVSLDUP X2, X2                         // f30f12d2
+       MOVSLDUP X11, X2                        // f3410f12d3
+       MOVSLDUP (BX), X11                      // f3440f121b
+       MOVSLDUP (R11), X11                     // f3450f121b
+       MOVSLDUP X2, X11                        // f3440f12da
+       MOVSLDUP X11, X11                       // f3450f12db
        MOVSQ                                   // 48a5
        MOVSS (BX), X2                          // f30f1013
        MOVSS (R11), X2                         // f3410f1013
@@ -6116,30 +6116,30 @@ TEXT asmtest(SB),7,$0
        //TODO: VBROADCASTI128 (R11), Y2        // c4c27d5a13
        //TODO: VBROADCASTI128 (BX), Y11        // c4627d5a1b
        //TODO: VBROADCASTI128 (R11), Y11       // c4427d5a1b
-       //TODO: VBROADCASTSD (BX), Y2           // c4e27d1913
-       //TODO: VBROADCASTSD (R11), Y2          // c4c27d1913
-       //TODO: VBROADCASTSD (BX), Y11          // c4627d191b
-       //TODO: VBROADCASTSD (R11), Y11         // c4427d191b
-       //TODO: VBROADCASTSD X2, Y2             // c4e27d19d2
-       //TODO: VBROADCASTSD X11, Y2            // c4c27d19d3
-       //TODO: VBROADCASTSD X2, Y11            // c4627d19da
-       //TODO: VBROADCASTSD X11, Y11           // c4427d19db
-       //TODO: VBROADCASTSS (BX), X2           // c4e2791813
-       //TODO: VBROADCASTSS (R11), X2          // c4c2791813
-       //TODO: VBROADCASTSS (BX), X11          // c46279181b
-       //TODO: VBROADCASTSS (R11), X11         // c44279181b
-       //TODO: VBROADCASTSS X2, X2             // c4e27918d2
-       //TODO: VBROADCASTSS X11, X2            // c4c27918d3
-       //TODO: VBROADCASTSS X2, X11            // c4627918da
-       //TODO: VBROADCASTSS X11, X11           // c4427918db
-       //TODO: VBROADCASTSS (BX), Y2           // c4e27d1813
-       //TODO: VBROADCASTSS (R11), Y2          // c4c27d1813
-       //TODO: VBROADCASTSS (BX), Y11          // c4627d181b
-       //TODO: VBROADCASTSS (R11), Y11         // c4427d181b
-       //TODO: VBROADCASTSS X2, Y2             // c4e27d18d2
-       //TODO: VBROADCASTSS X11, Y2            // c4c27d18d3
-       //TODO: VBROADCASTSS X2, Y11            // c4627d18da
-       //TODO: VBROADCASTSS X11, Y11           // c4427d18db
+       VBROADCASTSD (BX), Y2                   // c4e27d1913
+       VBROADCASTSD (R11), Y2                  // c4c27d1913
+       VBROADCASTSD (BX), Y11                  // c4627d191b
+       VBROADCASTSD (R11), Y11                 // c4427d191b
+       VBROADCASTSD X2, Y2                     // c4e27d19d2
+       VBROADCASTSD X11, Y2                    // c4c27d19d3
+       VBROADCASTSD X2, Y11                    // c4627d19da
+       VBROADCASTSD X11, Y11                   // c4427d19db
+       VBROADCASTSS (BX), X2                   // c4e2791813
+       VBROADCASTSS (R11), X2                  // c4c2791813
+       VBROADCASTSS (BX), X11                  // c46279181b
+       VBROADCASTSS (R11), X11                 // c44279181b
+       VBROADCASTSS X2, X2                     // c4e27918d2
+       VBROADCASTSS X11, X2                    // c4c27918d3
+       VBROADCASTSS X2, X11                    // c4627918da
+       VBROADCASTSS X11, X11                   // c4427918db
+       VBROADCASTSS (BX), Y2                   // c4e27d1813
+       VBROADCASTSS (R11), Y2                  // c4c27d1813
+       VBROADCASTSS (BX), Y11                  // c4627d181b
+       VBROADCASTSS (R11), Y11                 // c4427d181b
+       VBROADCASTSS X2, Y2                     // c4e27d18d2
+       VBROADCASTSS X11, Y2                    // c4c27d18d3
+       VBROADCASTSS X2, Y11                    // c4627d18da
+       VBROADCASTSS X11, Y11                   // c4427d18db
        //TODO: VCMPPD $7, (BX), X9, X2         // c4e131c21307 or c5b1c21307
        //TODO: VCMPPD $7, (R11), X9, X2        // c4c131c21307
        //TODO: VCMPPD $7, X2, X9, X2           // c4e131c2d207 or c5b1c2d207
@@ -7642,22 +7642,22 @@ TEXT asmtest(SB),7,$0
        //TODO: VMOVD (R11), X11                // c441796e1b
        //TODO: VMOVD DX, X11                   // c461796eda or c5796eda
        //TODO: VMOVD R11, X11                  // c441796edb
-       //TODO: VMOVDDUP (BX), X2               // c4e17b1213 or c5fb1213
-       //TODO: VMOVDDUP (R11), X2              // c4c17b1213
-       //TODO: VMOVDDUP X2, X2                 // c4e17b12d2 or c5fb12d2
-       //TODO: VMOVDDUP X11, X2                // c4c17b12d3
-       //TODO: VMOVDDUP (BX), X11              // c4617b121b or c57b121b
-       //TODO: VMOVDDUP (R11), X11             // c4417b121b
-       //TODO: VMOVDDUP X2, X11                // c4617b12da or c57b12da
-       //TODO: VMOVDDUP X11, X11               // c4417b12db
-       //TODO: VMOVDDUP (BX), Y2               // c4e17f1213 or c5ff1213
-       //TODO: VMOVDDUP (R11), Y2              // c4c17f1213
-       //TODO: VMOVDDUP Y2, Y2                 // c4e17f12d2 or c5ff12d2
-       //TODO: VMOVDDUP Y11, Y2                // c4c17f12d3
-       //TODO: VMOVDDUP (BX), Y11              // c4617f121b or c57f121b
-       //TODO: VMOVDDUP (R11), Y11             // c4417f121b
-       //TODO: VMOVDDUP Y2, Y11                // c4617f12da or c57f12da
-       //TODO: VMOVDDUP Y11, Y11               // c4417f12db
+       VMOVDDUP (BX), X2                       // c4e17b1213 or c5fb1213
+       VMOVDDUP (R11), X2                      // c4c17b1213
+       VMOVDDUP X2, X2                         // c4e17b12d2 or c5fb12d2
+       VMOVDDUP X11, X2                        // c4c17b12d3
+       VMOVDDUP (BX), X11                      // c4617b121b or c57b121b
+       VMOVDDUP (R11), X11                     // c4417b121b
+       VMOVDDUP X2, X11                        // c4617b12da or c57b12da
+       VMOVDDUP X11, X11                       // c4417b12db
+       VMOVDDUP (BX), Y2                       // c4e17f1213 or c5ff1213
+       VMOVDDUP (R11), Y2                      // c4c17f1213
+       VMOVDDUP Y2, Y2                         // c4e17f12d2 or c5ff12d2
+       VMOVDDUP Y11, Y2                        // c4c17f12d3
+       VMOVDDUP (BX), Y11                      // c4617f121b or c57f121b
+       VMOVDDUP (R11), Y11                     // c4417f121b
+       VMOVDDUP Y2, Y11                        // c4617f12da or c57f12da
+       VMOVDDUP Y11, Y11                       // c4417f12db
        VMOVDQA (BX), X2                        // c4e1796f13 or c5f96f13
        VMOVDQA (R11), X2                       // c4c1796f13
        VMOVDQA X2, X2                          // c4e1796fd2 or c5f96fd2 or c4e1797fd2 or c5f97fd2
@@ -7826,38 +7826,38 @@ TEXT asmtest(SB),7,$0
        //TODO: VMOVSD X11, X9, X2              // c4c13310d3 or c4613311da or c53311da
        //TODO: VMOVSD X2, X9, X11              // c4613310da or c53310da or c4c13311d3
        //TODO: VMOVSD X11, X9, X11             // c4413310db or c4413311db
-       //TODO: VMOVSHDUP (BX), X2              // c4e17a1613 or c5fa1613
-       //TODO: VMOVSHDUP (R11), X2             // c4c17a1613
-       //TODO: VMOVSHDUP X2, X2                // c4e17a16d2 or c5fa16d2
-       //TODO: VMOVSHDUP X11, X2               // c4c17a16d3
-       //TODO: VMOVSHDUP (BX), X11             // c4617a161b or c57a161b
-       //TODO: VMOVSHDUP (R11), X11            // c4417a161b
-       //TODO: VMOVSHDUP X2, X11               // c4617a16da or c57a16da
-       //TODO: VMOVSHDUP X11, X11              // c4417a16db
-       //TODO: VMOVSHDUP (BX), Y2              // c4e17e1613 or c5fe1613
-       //TODO: VMOVSHDUP (R11), Y2             // c4c17e1613
-       //TODO: VMOVSHDUP Y2, Y2                // c4e17e16d2 or c5fe16d2
-       //TODO: VMOVSHDUP Y11, Y2               // c4c17e16d3
-       //TODO: VMOVSHDUP (BX), Y11             // c4617e161b or c57e161b
-       //TODO: VMOVSHDUP (R11), Y11            // c4417e161b
-       //TODO: VMOVSHDUP Y2, Y11               // c4617e16da or c57e16da
-       //TODO: VMOVSHDUP Y11, Y11              // c4417e16db
-       //TODO: VMOVSLDUP (BX), X2              // c4e17a1213 or c5fa1213
-       //TODO: VMOVSLDUP (R11), X2             // c4c17a1213
-       //TODO: VMOVSLDUP X2, X2                // c4e17a12d2 or c5fa12d2
-       //TODO: VMOVSLDUP X11, X2               // c4c17a12d3
-       //TODO: VMOVSLDUP (BX), X11             // c4617a121b or c57a121b
-       //TODO: VMOVSLDUP (R11), X11            // c4417a121b
-       //TODO: VMOVSLDUP X2, X11               // c4617a12da or c57a12da
-       //TODO: VMOVSLDUP X11, X11              // c4417a12db
-       //TODO: VMOVSLDUP (BX), Y2              // c4e17e1213 or c5fe1213
-       //TODO: VMOVSLDUP (R11), Y2             // c4c17e1213
-       //TODO: VMOVSLDUP Y2, Y2                // c4e17e12d2 or c5fe12d2
-       //TODO: VMOVSLDUP Y11, Y2               // c4c17e12d3
-       //TODO: VMOVSLDUP (BX), Y11             // c4617e121b or c57e121b
-       //TODO: VMOVSLDUP (R11), Y11            // c4417e121b
-       //TODO: VMOVSLDUP Y2, Y11               // c4617e12da or c57e12da
-       //TODO: VMOVSLDUP Y11, Y11              // c4417e12db
+       VMOVSHDUP (BX), X2                      // c4e17a1613 or c5fa1613
+       VMOVSHDUP (R11), X2                     // c4c17a1613
+       VMOVSHDUP X2, X2                        // c4e17a16d2 or c5fa16d2
+       VMOVSHDUP X11, X2                       // c4c17a16d3
+       VMOVSHDUP (BX), X11                     // c4617a161b or c57a161b
+       VMOVSHDUP (R11), X11                    // c4417a161b
+       VMOVSHDUP X2, X11                       // c4617a16da or c57a16da
+       VMOVSHDUP X11, X11                      // c4417a16db
+       VMOVSHDUP (BX), Y2                      // c4e17e1613 or c5fe1613
+       VMOVSHDUP (R11), Y2                     // c4c17e1613
+       VMOVSHDUP Y2, Y2                        // c4e17e16d2 or c5fe16d2
+       VMOVSHDUP Y11, Y2                       // c4c17e16d3
+       VMOVSHDUP (BX), Y11                     // c4617e161b or c57e161b
+       VMOVSHDUP (R11), Y11                    // c4417e161b
+       VMOVSHDUP Y2, Y11                       // c4617e16da or c57e16da
+       VMOVSHDUP Y11, Y11                      // c4417e16db
+       VMOVSLDUP (BX), X2                      // c4e17a1213 or c5fa1213
+       VMOVSLDUP (R11), X2                     // c4c17a1213
+       VMOVSLDUP X2, X2                        // c4e17a12d2 or c5fa12d2
+       VMOVSLDUP X11, X2                       // c4c17a12d3
+       VMOVSLDUP (BX), X11                     // c4617a121b or c57a121b
+       VMOVSLDUP (R11), X11                    // c4417a121b
+       VMOVSLDUP X2, X11                       // c4617a12da or c57a12da
+       VMOVSLDUP X11, X11                      // c4417a12db
+       VMOVSLDUP (BX), Y2                      // c4e17e1213 or c5fe1213
+       VMOVSLDUP (R11), Y2                     // c4c17e1213
+       VMOVSLDUP Y2, Y2                        // c4e17e12d2 or c5fe12d2
+       VMOVSLDUP Y11, Y2                       // c4c17e12d3
+       VMOVSLDUP (BX), Y11                     // c4617e121b or c57e121b
+       VMOVSLDUP (R11), Y11                    // c4417e121b
+       VMOVSLDUP Y2, Y11                       // c4617e12da or c57e12da
+       VMOVSLDUP Y11, Y11                      // c4417e12db
        //TODO: VMOVSS X2, (BX)                 // c4e17a1113 or c5fa1113
        //TODO: VMOVSS X11, (BX)                // c4617a111b or c57a111b
        //TODO: VMOVSS X2, (R11)                // c4c17a1113
index 9e7bbe49009ae4f060706ca6a0e4b42aa4f78772..02f92ed387e3f3db90096cc013453c33491be6d2 100644 (file)
@@ -771,6 +771,9 @@ const (
        AROUNDSS
        AROUNDPD
        AROUNDSD
+       AMOVDDUP
+       AMOVSHDUP
+       AMOVSLDUP
 
        APSHUFD
        APCLMULQDQ
@@ -803,6 +806,11 @@ const (
        AVPERM2I128
        ARORXL
        ARORXQ
+       AVBROADCASTSS
+       AVBROADCASTSD
+       AVMOVDDUP
+       AVMOVSHDUP
+       AVMOVSLDUP
 
        // from 386
        AJCXZW
index 3b30154625843141878d9ff792f2c83ba484b77e..8c5be807957b0a34af91a935a1874bf93d6dd1da 100644 (file)
@@ -708,6 +708,9 @@ var Anames = []string{
        "ROUNDSS",
        "ROUNDPD",
        "ROUNDSD",
+       "MOVDDUP",
+       "MOVSHDUP",
+       "MOVSLDUP",
        "PSHUFD",
        "PCLMULQDQ",
        "VZEROUPPER",
@@ -738,6 +741,11 @@ var Anames = []string{
        "VPERM2I128",
        "RORXL",
        "RORXQ",
+       "VBROADCASTSS",
+       "VBROADCASTSD",
+       "VMOVDDUP",
+       "VMOVSHDUP",
+       "VMOVSLDUP",
        "JCXZW",
        "FCMOVCC",
        "FCMOVCS",
index f7fa9a2edc773d49f8b52ed2e5331ee1aa4f4afa..bf678228226e360bc0375f7183bb6009d073afa2 100644 (file)
@@ -866,6 +866,10 @@ var yvex_vpbroadcast = []ytab{
        {Yxm, Ynone, Yyr, Zvex_rm_v_r, 2},
 }
 
+var yvex_vpbroadcast_sd = []ytab{
+       {Yxm, Ynone, Yyr, Zvex_rm_v_r, 2},
+}
+
 var ymmxmm0f38 = []ytab{
        {Ymm, Ynone, Ymr, Zlitm_r, 3},
        {Yxm, Ynone, Yxr, Zlitm_r, 5},
@@ -1630,6 +1634,9 @@ var optab =
        {APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
        {APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
        {APCMPESTRI, yxshuf, Pq, [23]uint8{0x3a, 0x61, 0}},
+       {AMOVDDUP, yxm, Pf2, [23]uint8{0x12}},
+       {AMOVSHDUP, yxm, Pf3, [23]uint8{0x16}},
+       {AMOVSLDUP, yxm, Pf3, [23]uint8{0x12}},
 
        {AANDNL, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF2}},
        {AANDNQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF2}},
@@ -1678,6 +1685,11 @@ var optab =
        {AVPERM2I128, yvex_yyi4, Pvex, [23]uint8{VEX_256_66_0F3A_WIG, 0x46}},
        {ARORXL, yvex_ri3, Pvex, [23]uint8{VEX_LZ_F2_0F3A_W0, 0xf0}},
        {ARORXQ, yvex_ri3, Pvex, [23]uint8{VEX_LZ_F2_0F3A_W1, 0xf0}},
+       {AVBROADCASTSD, yvex_vpbroadcast_sd, Pvex, [23]uint8{VEX_256_66_0F38_W0, 0x19}},
+       {AVBROADCASTSS, yvex_vpbroadcast, Pvex, [23]uint8{VEX_128_66_0F38_W0, 0x18, VEX_256_66_0F38_W0, 0x18}},
+       {AVMOVDDUP, yvex_xy2, Pvex, [23]uint8{VEX_128_F2_0F_WIG, 0x12, VEX_256_F2_0F_WIG, 0x12}},
+       {AVMOVSHDUP, yvex_xy2, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x16, VEX_256_F3_0F_WIG, 0x16}},
+       {AVMOVSLDUP, yvex_xy2, Pvex, [23]uint8{VEX_128_F3_0F_WIG, 0x12, VEX_256_F3_0F_WIG, 0x12}},
 
        {AXACQUIRE, ynone, Px, [23]uint8{0xf2}},
        {AXRELEASE, ynone, Px, [23]uint8{0xf3}},
@@ -3379,7 +3391,8 @@ func doasm(ctxt *obj.Link, p *obj.Prog) {
                                log.Fatalf("asmins bad table %v", p)
                        }
                        op = int(o.op[z])
-                       if op == 0x0f {
+                       // In vex case 0x0f is actually VEX_256_F2_0F_WIG
+                       if op == 0x0f && o.prefix != Pvex {
                                ctxt.AsmBuf.Put1(byte(op))
                                z++
                                op = int(o.op[z])