From 9dd81d62275a963e3d9c37453fcde42ab929bd96 Mon Sep 17 00:00:00 2001 From: Ilya Tocar Date: Tue, 1 Sep 2015 14:56:37 +0300 Subject: [PATCH] cmd/internal/obj/x86: Add initial VEX support. Support VZEROUPPER, VMOVNTDQ, VMOVDQU, VMOVDQA. Use MOVHD* for names, where HD stands for HexaDeca (16). Change-Id: I9b1ea52e7ef0714a3d2aeb31ec1823fe509a047e Reviewed-on: https://go-review.googlesource.com/14127 Run-TryBot: Brad Fitzpatrick TryBot-Result: Gobot Gobot Reviewed-by: Brad Fitzpatrick --- src/cmd/internal/obj/link.go | 1 + src/cmd/internal/obj/x86/a.out.go | 5 ++ src/cmd/internal/obj/x86/anames.go | 4 ++ src/cmd/internal/obj/x86/asm6.go | 108 ++++++++++++++++++++++++----- 4 files changed, 102 insertions(+), 16 deletions(-) diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index 6066493baf..4217d6b0cb 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -512,6 +512,7 @@ type Link struct { Blitrl *Prog Elitrl *Prog Rexflag int + Vexflag int Rep int Repn int Lock int diff --git a/src/cmd/internal/obj/x86/a.out.go b/src/cmd/internal/obj/x86/a.out.go index 95868a8ba9..7d002e9fc3 100644 --- a/src/cmd/internal/obj/x86/a.out.go +++ b/src/cmd/internal/obj/x86/a.out.go @@ -742,6 +742,11 @@ const ( APSHUFD APCLMULQDQ + AVZEROUPPER + AMOVHDU + AMOVNTHD + AMOVHDA + // from 386 AJCXZW AFCMOVCC diff --git a/src/cmd/internal/obj/x86/anames.go b/src/cmd/internal/obj/x86/anames.go index 330e816119..c075a15c80 100644 --- a/src/cmd/internal/obj/x86/anames.go +++ b/src/cmd/internal/obj/x86/anames.go @@ -683,6 +683,10 @@ var Anames = []string{ "ROUNDSD", "PSHUFD", "PCLMULQDQ", + "VZEROUPPER", + "MOVHDU", + "MOVNTHD", + "MOVHDA", "JCXZW", "FCMOVCC", "FCMOVCS", diff --git a/src/cmd/internal/obj/x86/asm6.go b/src/cmd/internal/obj/x86/asm6.go index 495b35df18..f3be6cdfc7 100644 --- a/src/cmd/internal/obj/x86/asm6.go +++ b/src/cmd/internal/obj/x86/asm6.go @@ -181,6 +181,7 @@ const ( Zm_r Zm2_r Zm_r_xm + Zm_r_xm_vex Zm_r_i_xm Zm_r_3d Zm_r_xm_nr @@ -193,6 +194,7 @@ const ( Zpseudo Zr_m Zr_m_xm + Zr_m_xm_vex Zrp_ Z_ib Z_il @@ -206,21 +208,23 @@ const ( ) const ( - Px = 0 - Px1 = 1 // symbolic; exact value doesn't matter - P32 = 0x32 /* 32-bit only */ - Pe = 0x66 /* operand escape */ - Pm = 0x0f /* 2byte opcode escape */ - Pq = 0xff /* both escapes: 66 0f */ - Pb = 0xfe /* byte operands */ - Pf2 = 0xf2 /* xmm escape 1: f2 0f */ - Pf3 = 0xf3 /* xmm escape 2: f3 0f */ - Pq3 = 0x67 /* xmm escape 3: 66 48 0f */ - Pw = 0x48 /* Rex.w */ - Pw8 = 0x90 // symbolic; exact value doesn't matter - Py = 0x80 /* defaults to 64-bit mode */ - Py1 = 0x81 // symbolic; exact value doesn't matter - Py3 = 0x83 // symbolic; exact value doesn't matter + Px = 0 + Px1 = 1 // symbolic; exact value doesn't matter + P32 = 0x32 /* 32-bit only */ + Pe = 0x66 /* operand escape */ + Pm = 0x0f /* 2byte opcode escape */ + Pq = 0xff /* both escapes: 66 0f */ + Pb = 0xfe /* byte operands */ + Pf2 = 0xf2 /* xmm escape 1: f2 0f */ + Pf3 = 0xf3 /* xmm escape 2: f3 0f */ + Pq3 = 0x67 /* xmm escape 3: 66 48 0f */ + Pvex1 = 0xc5 /* 66 escape, vex encoding */ + Pvex2 = 0xc6 /* f3 escape, vex encoding */ + Pw = 0x48 /* Rex.w */ + Pw8 = 0x90 // symbolic; exact value doesn't matter + Py = 0x80 /* defaults to 64-bit mode */ + Py1 = 0x81 // symbolic; exact value doesn't matter + Py3 = 0x83 // symbolic; exact value doesn't matter Rxw = 1 << 3 /* =1, 64-bit operand size */ Rxr = 1 << 2 /* extend modrm reg */ @@ -622,6 +626,10 @@ var yxr_ml = []ytab{ {Yxr, Ynone, Yml, Zr_m_xm, 1}, } +var yxr_ml_vex = []ytab{ + {Yxr, Ynone, Yml, Zr_m_xm_vex, 1}, +} + var ymr = []ytab{ {Ymr, Ynone, Ymr, Zm_r, 1}, } @@ -638,6 +646,11 @@ var yxcmpi = []ytab{ {Yxm, Yxr, Yi8, Zm_r_i_xm, 2}, } +var yxmov_vex = []ytab{ + {Yxm, Ynone, Yxr, Zm_r_xm_vex, 1}, + {Yxr, Ynone, Yxm, Zr_m_xm_vex, 1}, +} + var yxmov = []ytab{ {Yxm, Ynone, Yxr, Zm_r_xm, 1}, {Yxr, Ynone, Yxm, Zr_m_xm, 1}, @@ -1480,6 +1493,10 @@ var optab = {AROUNDSS, yaes2, Pq, [23]uint8{0x3a, 0x0a, 0}}, {APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}}, {APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}}, + {AVZEROUPPER, ynone, Px, [23]uint8{0xc5, 0xf8, 0x77}}, + {AMOVHDU, yxmov_vex, Pvex2, [23]uint8{0x6f, 0x7f}}, + {AMOVNTHD, yxr_ml_vex, Pvex1, [23]uint8{0xe7}}, + {AMOVHDA, yxmov_vex, Pvex1, [23]uint8{0x6f, 0x7f}}, {obj.AUSEFIELD, ynop, Px, [23]uint8{0, 0}}, {obj.ATYPE, nil, 0, [23]uint8{}}, {obj.AFUNCDATA, yfuncdata, Px, [23]uint8{0, 0}}, @@ -2911,6 +2928,50 @@ var bpduff2 = []byte{ 0x48, 0x8b, 0x6d, 0x00, // MOVQ 0(BP), BP } +func vexprefix(ctxt *obj.Link, to *obj.Addr, from *obj.Addr, pref uint8) { + rexR := regrex[to.Reg] + rexB := regrex[from.Reg] + rexX := regrex[from.Index] + var prefBit uint8 + if pref == Pvex1 { + prefBit = 1 + } else if pref == Pvex2 { + prefBit = 2 + } // TODO add Pvex0,Pvex3 + + if rexX == 0 && rexB == 0 { // 2-byte vex prefix + ctxt.Andptr[0] = 0xc5 + ctxt.Andptr = ctxt.Andptr[1:] + + if rexR != 0 { + ctxt.Andptr[0] = 0x7c + } else { + ctxt.Andptr[0] = 0xfc + } + ctxt.Andptr[0] |= prefBit + ctxt.Andptr = ctxt.Andptr[1:] + } else { + ctxt.Andptr[0] = 0xc4 + ctxt.Andptr = ctxt.Andptr[1:] + + ctxt.Andptr[0] = 0x1 // TODO handle different prefix + if rexR == 0 { + ctxt.Andptr[0] |= 0x80 + } + if rexX == 0 { + ctxt.Andptr[0] |= 0x40 + } + if rexB == 0 { + ctxt.Andptr[0] |= 0x20 + } + ctxt.Andptr = ctxt.Andptr[1:] + + ctxt.Andptr[0] = 0x7c + ctxt.Andptr[0] |= prefBit + ctxt.Andptr = ctxt.Andptr[1:] + } +} + func doasm(ctxt *obj.Link, p *obj.Prog) { ctxt.Curp = p // TODO @@ -3144,6 +3205,13 @@ func doasm(ctxt *obj.Link, p *obj.Prog) { mediaop(ctxt, o, op, int(yt.zoffset), z) asmand(ctxt, p, &p.From, &p.To) + case Zm_r_xm_vex: + ctxt.Vexflag = 1 + vexprefix(ctxt, &p.To, &p.From, o.prefix) + ctxt.Andptr[0] = byte(op) + ctxt.Andptr = ctxt.Andptr[1:] + asmand(ctxt, p, &p.From, &p.To) + case Zm_r_xm_nr: ctxt.Rexflag = 0 mediaop(ctxt, o, op, int(yt.zoffset), z) @@ -3199,6 +3267,13 @@ func doasm(ctxt *obj.Link, p *obj.Prog) { ctxt.Andptr = ctxt.Andptr[1:] asmand(ctxt, p, &p.To, &p.From) + case Zr_m_xm_vex: + ctxt.Vexflag = 1 + vexprefix(ctxt, &p.From, &p.To, o.prefix) + ctxt.Andptr[0] = byte(op) + ctxt.Andptr = ctxt.Andptr[1:] + asmand(ctxt, p, &p.To, &p.From) + case Zr_m_xm: mediaop(ctxt, o, op, int(yt.zoffset), z) asmand(ctxt, p, &p.To, &p.From) @@ -4307,10 +4382,11 @@ func asmins(ctxt *obj.Link, p *obj.Prog) { } ctxt.Rexflag = 0 + ctxt.Vexflag = 0 and0 := ctxt.Andptr ctxt.Asmode = int(p.Mode) doasm(ctxt, p) - if ctxt.Rexflag != 0 { + if ctxt.Rexflag != 0 && ctxt.Vexflag == 0 { /* * as befits the whole approach of the architecture, * the rex prefix must appear before the first opcode byte -- 2.48.1