]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/internal/obj/x86: Add initial VEX support.
authorIlya Tocar <ilya.tocar@intel.com>
Tue, 1 Sep 2015 11:56:37 +0000 (14:56 +0300)
committerBrad Fitzpatrick <bradfitz@golang.org>
Mon, 5 Oct 2015 16:00:25 +0000 (16:00 +0000)
Support VZEROUPPER, VMOVNTDQ, VMOVDQU, VMOVDQA.
Use MOVHD* for names, where HD stands for HexaDeca (16).

Change-Id: I9b1ea52e7ef0714a3d2aeb31ec1823fe509a047e
Reviewed-on: https://go-review.googlesource.com/14127
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
src/cmd/internal/obj/link.go
src/cmd/internal/obj/x86/a.out.go
src/cmd/internal/obj/x86/anames.go
src/cmd/internal/obj/x86/asm6.go

index 6066493bafd74035b0c63c76695da4515c67f455..4217d6b0cb4842fd341671bebe9d153bbd2874a1 100644 (file)
@@ -512,6 +512,7 @@ type Link struct {
        Blitrl             *Prog
        Elitrl             *Prog
        Rexflag            int
+       Vexflag            int
        Rep                int
        Repn               int
        Lock               int
index 95868a8ba9b686aed05c0a662269242fe72771ca..7d002e9fc333ded1db15904f04f2faff7721d660 100644 (file)
@@ -742,6 +742,11 @@ const (
        APSHUFD
        APCLMULQDQ
 
+       AVZEROUPPER
+       AMOVHDU
+       AMOVNTHD
+       AMOVHDA
+
        // from 386
        AJCXZW
        AFCMOVCC
index 330e816119e2c5b9f7f77646f72cb3886d59ad70..c075a15c80bd22aa0bb055fba1b802e25cbce079 100644 (file)
@@ -683,6 +683,10 @@ var Anames = []string{
        "ROUNDSD",
        "PSHUFD",
        "PCLMULQDQ",
+       "VZEROUPPER",
+       "MOVHDU",
+       "MOVNTHD",
+       "MOVHDA",
        "JCXZW",
        "FCMOVCC",
        "FCMOVCS",
index 495b35df187378a09e4f24a0a8da322d6a726cce..f3be6cdfc7f425c24011f40aae242e1dff0af366 100644 (file)
@@ -181,6 +181,7 @@ const (
        Zm_r
        Zm2_r
        Zm_r_xm
+       Zm_r_xm_vex
        Zm_r_i_xm
        Zm_r_3d
        Zm_r_xm_nr
@@ -193,6 +194,7 @@ const (
        Zpseudo
        Zr_m
        Zr_m_xm
+       Zr_m_xm_vex
        Zrp_
        Z_ib
        Z_il
@@ -206,21 +208,23 @@ const (
 )
 
 const (
-       Px  = 0
-       Px1 = 1    // symbolic; exact value doesn't matter
-       P32 = 0x32 /* 32-bit only */
-       Pe  = 0x66 /* operand escape */
-       Pm  = 0x0f /* 2byte opcode escape */
-       Pq  = 0xff /* both escapes: 66 0f */
-       Pb  = 0xfe /* byte operands */
-       Pf2 = 0xf2 /* xmm escape 1: f2 0f */
-       Pf3 = 0xf3 /* xmm escape 2: f3 0f */
-       Pq3 = 0x67 /* xmm escape 3: 66 48 0f */
-       Pw  = 0x48 /* Rex.w */
-       Pw8 = 0x90 // symbolic; exact value doesn't matter
-       Py  = 0x80 /* defaults to 64-bit mode */
-       Py1 = 0x81 // symbolic; exact value doesn't matter
-       Py3 = 0x83 // symbolic; exact value doesn't matter
+       Px    = 0
+       Px1   = 1    // symbolic; exact value doesn't matter
+       P32   = 0x32 /* 32-bit only */
+       Pe    = 0x66 /* operand escape */
+       Pm    = 0x0f /* 2byte opcode escape */
+       Pq    = 0xff /* both escapes: 66 0f */
+       Pb    = 0xfe /* byte operands */
+       Pf2   = 0xf2 /* xmm escape 1: f2 0f */
+       Pf3   = 0xf3 /* xmm escape 2: f3 0f */
+       Pq3   = 0x67 /* xmm escape 3: 66 48 0f */
+       Pvex1 = 0xc5 /* 66 escape, vex encoding */
+       Pvex2 = 0xc6 /* f3 escape, vex encoding */
+       Pw    = 0x48 /* Rex.w */
+       Pw8   = 0x90 // symbolic; exact value doesn't matter
+       Py    = 0x80 /* defaults to 64-bit mode */
+       Py1   = 0x81 // symbolic; exact value doesn't matter
+       Py3   = 0x83 // symbolic; exact value doesn't matter
 
        Rxw = 1 << 3 /* =1, 64-bit operand size */
        Rxr = 1 << 2 /* extend modrm reg */
@@ -622,6 +626,10 @@ var yxr_ml = []ytab{
        {Yxr, Ynone, Yml, Zr_m_xm, 1},
 }
 
+var yxr_ml_vex = []ytab{
+       {Yxr, Ynone, Yml, Zr_m_xm_vex, 1},
+}
+
 var ymr = []ytab{
        {Ymr, Ynone, Ymr, Zm_r, 1},
 }
@@ -638,6 +646,11 @@ var yxcmpi = []ytab{
        {Yxm, Yxr, Yi8, Zm_r_i_xm, 2},
 }
 
+var yxmov_vex = []ytab{
+       {Yxm, Ynone, Yxr, Zm_r_xm_vex, 1},
+       {Yxr, Ynone, Yxm, Zr_m_xm_vex, 1},
+}
+
 var yxmov = []ytab{
        {Yxm, Ynone, Yxr, Zm_r_xm, 1},
        {Yxr, Ynone, Yxm, Zr_m_xm, 1},
@@ -1480,6 +1493,10 @@ var optab =
        {AROUNDSS, yaes2, Pq, [23]uint8{0x3a, 0x0a, 0}},
        {APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
        {APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
+       {AVZEROUPPER, ynone, Px, [23]uint8{0xc5, 0xf8, 0x77}},
+       {AMOVHDU, yxmov_vex, Pvex2, [23]uint8{0x6f, 0x7f}},
+       {AMOVNTHD, yxr_ml_vex, Pvex1, [23]uint8{0xe7}},
+       {AMOVHDA, yxmov_vex, Pvex1, [23]uint8{0x6f, 0x7f}},
        {obj.AUSEFIELD, ynop, Px, [23]uint8{0, 0}},
        {obj.ATYPE, nil, 0, [23]uint8{}},
        {obj.AFUNCDATA, yfuncdata, Px, [23]uint8{0, 0}},
@@ -2911,6 +2928,50 @@ var bpduff2 = []byte{
        0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP
 }
 
+func vexprefix(ctxt *obj.Link, to *obj.Addr, from *obj.Addr, pref uint8) {
+       rexR := regrex[to.Reg]
+       rexB := regrex[from.Reg]
+       rexX := regrex[from.Index]
+       var prefBit uint8
+       if pref == Pvex1 {
+               prefBit = 1
+       } else if pref == Pvex2 {
+               prefBit = 2
+       } // TODO add Pvex0,Pvex3
+
+       if rexX == 0 && rexB == 0 { // 2-byte vex prefix
+               ctxt.Andptr[0] = 0xc5
+               ctxt.Andptr = ctxt.Andptr[1:]
+
+               if rexR != 0 {
+                       ctxt.Andptr[0] = 0x7c
+               } else {
+                       ctxt.Andptr[0] = 0xfc
+               }
+               ctxt.Andptr[0] |= prefBit
+               ctxt.Andptr = ctxt.Andptr[1:]
+       } else {
+               ctxt.Andptr[0] = 0xc4
+               ctxt.Andptr = ctxt.Andptr[1:]
+
+               ctxt.Andptr[0] = 0x1 // TODO handle different prefix
+               if rexR == 0 {
+                       ctxt.Andptr[0] |= 0x80
+               }
+               if rexX == 0 {
+                       ctxt.Andptr[0] |= 0x40
+               }
+               if rexB == 0 {
+                       ctxt.Andptr[0] |= 0x20
+               }
+               ctxt.Andptr = ctxt.Andptr[1:]
+
+               ctxt.Andptr[0] = 0x7c
+               ctxt.Andptr[0] |= prefBit
+               ctxt.Andptr = ctxt.Andptr[1:]
+       }
+}
+
 func doasm(ctxt *obj.Link, p *obj.Prog) {
        ctxt.Curp = p // TODO
 
@@ -3144,6 +3205,13 @@ func doasm(ctxt *obj.Link, p *obj.Prog) {
                                mediaop(ctxt, o, op, int(yt.zoffset), z)
                                asmand(ctxt, p, &p.From, &p.To)
 
+                       case Zm_r_xm_vex:
+                               ctxt.Vexflag = 1
+                               vexprefix(ctxt, &p.To, &p.From, o.prefix)
+                               ctxt.Andptr[0] = byte(op)
+                               ctxt.Andptr = ctxt.Andptr[1:]
+                               asmand(ctxt, p, &p.From, &p.To)
+
                        case Zm_r_xm_nr:
                                ctxt.Rexflag = 0
                                mediaop(ctxt, o, op, int(yt.zoffset), z)
@@ -3199,6 +3267,13 @@ func doasm(ctxt *obj.Link, p *obj.Prog) {
                                ctxt.Andptr = ctxt.Andptr[1:]
                                asmand(ctxt, p, &p.To, &p.From)
 
+                       case Zr_m_xm_vex:
+                               ctxt.Vexflag = 1
+                               vexprefix(ctxt, &p.From, &p.To, o.prefix)
+                               ctxt.Andptr[0] = byte(op)
+                               ctxt.Andptr = ctxt.Andptr[1:]
+                               asmand(ctxt, p, &p.To, &p.From)
+
                        case Zr_m_xm:
                                mediaop(ctxt, o, op, int(yt.zoffset), z)
                                asmand(ctxt, p, &p.To, &p.From)
@@ -4307,10 +4382,11 @@ func asmins(ctxt *obj.Link, p *obj.Prog) {
        }
 
        ctxt.Rexflag = 0
+       ctxt.Vexflag = 0
        and0 := ctxt.Andptr
        ctxt.Asmode = int(p.Mode)
        doasm(ctxt, p)
-       if ctxt.Rexflag != 0 {
+       if ctxt.Rexflag != 0 && ctxt.Vexflag == 0 {
                /*
                 * as befits the whole approach of the architecture,
                 * the rex prefix must appear before the first opcode byte