From: isharipo Date: Fri, 13 Oct 2017 13:06:39 +0000 (+0300) Subject: cmd/internal/obj: make it possible to have all AVX1/2 insts X-Git-Tag: go1.10beta1~448 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=1e83f883c54a37f637c557287b0ae8062cef3930;p=gostls13.git cmd/internal/obj: make it possible to have all AVX1/2 insts Current AllowedOpCodes is 1024, which is not enough for modern x86. Changed limit to 2048 (though AVX512 will exceed this). Additional Z-cases and ytab tables are added to make it possible to handle missing AVX1 and AVX2 instructions. This CL is required by x86avxgen to work properly: https://go-review.googlesource.com/c/arch/+/66972 Change-Id: I290214bbda554d2cba53349f50dcd34014fe4cee Reviewed-on: https://go-review.googlesource.com/70650 Run-TryBot: Iskander Sharipov TryBot-Result: Gobot Gobot Reviewed-by: Ilya Tocar --- diff --git a/src/cmd/internal/obj/link.go b/src/cmd/internal/obj/link.go index 5041a820df..09f522bdaf 100644 --- a/src/cmd/internal/obj/link.go +++ b/src/cmd/internal/obj/link.go @@ -351,7 +351,7 @@ const ( // Subspaces are aligned to a power of two so opcodes can be masked // with AMask and used as compact array indices. const ( - ABase386 = (1 + iota) << 10 + ABase386 = (1 + iota) << 11 ABaseARM ABaseAMD64 ABasePPC64 @@ -359,7 +359,7 @@ const ( ABaseMIPS ABaseS390X - AllowedOpCodes = 1 << 10 // The number of opcodes available for any given architecture. + AllowedOpCodes = 1 << 11 // The number of opcodes available for any given architecture. AMask = AllowedOpCodes - 1 // AND with this to use the opcode as an array index. ) diff --git a/src/cmd/internal/obj/x86/asm6.go b/src/cmd/internal/obj/x86/asm6.go index 6b5cb29cb4..dbdaebba8d 100644 --- a/src/cmd/internal/obj/x86/asm6.go +++ b/src/cmd/internal/obj/x86/asm6.go @@ -204,11 +204,17 @@ const ( Zil_rr Zbyte Zvex_rm_v_r + Zvex_rm_v_ro Zvex_r_v_rm Zvex_v_rm_r Zvex_i_rm_r Zvex_i_r_v Zvex_i_rm_v_r + Zvex + Zvex_rm_r_vo + Zvex_i_r_rm + Zvex_hr_rm_v_r + Zmax ) @@ -882,6 +888,152 @@ var yvex_vpbroadcast_sd = []ytab{ {Zvex_rm_v_r, 2, argList{Yxm, Yyr}}, } +var yvex_vpextrw = []ytab{ + {Zvex_i_rm_r, 2, argList{Yi8, Yxr, Yrl}}, + {Zvex_i_r_rm, 2, argList{Yi8, Yxr, Yml}}, +} + +var yvex_m = []ytab{ + {Zvex_rm_v_ro, 3, argList{Ym}}, +} + +var yvex_xx3 = []ytab{ + {Zvex_rm_v_r, 2, argList{Yxr, Yxr, Yxr}}, +} + +var yvex_yi3 = []ytab{ + {Zvex_i_r_rm, 2, argList{Yi8, Yyr, Yxm}}, +} + +var yvex_mxy = []ytab{ + {Zvex_rm_v_r, 2, argList{Ym, Yxr}}, + {Zvex_rm_v_r, 2, argList{Ym, Yyr}}, +} + +var yvex_yy3 = []ytab{ + {Zvex_rm_v_r, 2, argList{Yym, Yyr, Yyr}}, +} + +var yvex_xi3 = []ytab{ + {Zvex_i_rm_r, 2, argList{Yi8, Yxm, Yxr}}, +} + +var yvex_vpermpd = []ytab{ + {Zvex_i_rm_r, 2, argList{Yi8, Yym, Yyr}}, +} + +var yvex_vpermilp = []ytab{ + {Zvex_i_rm_r, 2, argList{Yi8, Yxm, Yxr}}, + {Zvex_rm_v_r, 2, argList{Yxm, Yxr, Yxr}}, + {Zvex_i_rm_r, 2, argList{Yi8, Yym, Yyr}}, + {Zvex_rm_v_r, 2, argList{Yym, Yyr, Yyr}}, +} + +var yvex_vcvtps2ph = []ytab{ + {Zvex_i_r_rm, 2, argList{Yi8, Yyr, Yxm}}, + {Zvex_i_r_rm, 2, argList{Yi8, Yxr, Yxm}}, +} + +var yvex_vbroadcastf = []ytab{ + {Zvex_rm_v_r, 2, argList{Ym, Yyr}}, +} + +var yvex_vmovd = []ytab{ + {Zvex_r_v_rm, 2, argList{Yxr, Yml}}, + {Zvex_rm_v_r, 2, argList{Yml, Yxr}}, +} + +var yvex_x2 = []ytab{ + {Zvex_rm_v_r, 2, argList{Yxm, Yxr}}, +} + +var yvex_y2 = []ytab{ + {Zvex_rm_v_r, 2, argList{Yym, Yxr}}, +} + +var yvex = []ytab{ + {Zvex, 2, argList{}}, +} + +var yvex_xx2 = []ytab{ + {Zvex_rm_v_r, 2, argList{Yxr, Yxr}}, +} + +var yvex_vpalignr = []ytab{ + {Zvex_i_rm_v_r, 2, argList{Yu8, Yxm, Yxr, Yxr}}, + {Zvex_i_rm_v_r, 2, argList{Yu8, Yym, Yyr, Yyr}}, +} + +var yvex_rxi4 = []ytab{ + {Zvex_i_rm_v_r, 2, argList{Yu8, Yml, Yxr, Yxr}}, +} + +var yvex_xxi4 = []ytab{ + {Zvex_i_rm_v_r, 2, argList{Yu8, Yxm, Yxr, Yxr}}, +} + +var yvex_xy4 = []ytab{ + {Zvex_hr_rm_v_r, 2, argList{Yxr, Yxm, Yxr, Yxr}}, + {Zvex_hr_rm_v_r, 2, argList{Yyr, Yym, Yyr, Yyr}}, +} + +var yvex_vpbroadcast_ss = []ytab{ + {Zvex_rm_v_r, 2, argList{Ym, Yxr}}, + {Zvex_rm_v_r, 2, argList{Yxr, Yxr}}, + {Zvex_rm_v_r, 2, argList{Ym, Yyr}}, + {Zvex_rm_v_r, 2, argList{Yxr, Yyr}}, +} + +var yvex_vblendvpd = []ytab{ + {Zvex_r_v_rm, 2, argList{Yxr, Yxr, Yml}}, + {Zvex_r_v_rm, 2, argList{Yyr, Yyr, Yml}}, + {Zvex_rm_v_r, 2, argList{Ym, Yxr, Yxr}}, + {Zvex_rm_v_r, 2, argList{Ym, Yyr, Yyr}}, +} + +var yvex_vmov = []ytab{ + {Zvex_r_v_rm, 2, argList{Yxr, Ym}}, + {Zvex_rm_v_r, 2, argList{Ym, Yxr}}, + {Zvex_rm_v_r, 2, argList{Yxr, Yxr, Yxr}}, + {Zvex_rm_v_r, 2, argList{Yxr, Yxr, Yxr}}, +} + +var yvex_vps = []ytab{ + {Zvex_rm_v_r, 2, argList{Yxm, Yxr, Yxr}}, + {Zvex_i_r_v, 3, argList{Yi8, Yxr, Yxr}}, + {Zvex_rm_v_r, 2, argList{Yxm, Yyr, Yyr}}, + {Zvex_i_r_v, 3, argList{Yi8, Yyr, Yyr}}, +} + +var yvex_r2 = []ytab{ + {Zvex_rm_r_vo, 3, argList{Yml, Yrl}}, +} + +var yvex_vpextr = []ytab{ + {Zvex_i_r_rm, 2, argList{Yi8, Yxr, Yml}}, +} + +var yvex_rx3 = []ytab{ + {Zvex_rm_v_r, 2, argList{Yml, Yxr, Yxr}}, +} + +var yvex_vcvtsd2si = []ytab{ + {Zvex_rm_v_r, 2, argList{Yxm, Yrl}}, +} + +var yvex_vmovhpd = []ytab{ + {Zvex_r_v_rm, 2, argList{Yxr, Ym}}, + {Zvex_rm_v_r, 2, argList{Ym, Yxr, Yxr}}, +} + +var yvex_vmovq = []ytab{ + {Zvex_r_v_rm, 2, argList{Yxr, Yml}}, + {Zvex_rm_v_r, 2, argList{Ym, Yxr}}, + {Zvex_rm_v_r, 2, argList{Yml, Yxr}}, + {Zvex_rm_v_r, 2, argList{Yxr, Yxr}}, + {Zvex_r_v_rm, 2, argList{Yxr, Yxm}}, +} + var ymmxmm0f38 = []ytab{ {Zlitm_r, 3, argList{Ymm, Ymr}}, {Zlitm_r, 5, argList{Yxm, Yxr}}, @@ -3617,10 +3769,17 @@ func (asmbuf *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) { asmbuf.Put1(byte(op)) asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From) + case Zvex: + asmbuf.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) + case Zvex_rm_v_r: asmbuf.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) asmbuf.asmand(ctxt, cursym, p, &p.From, &p.To) + case Zvex_rm_v_ro: + asmbuf.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1]) + asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2])) + case Zvex_i_r_v: asmbuf.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1]) regnum := byte(0x7) @@ -3651,6 +3810,21 @@ func (asmbuf *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) { asmbuf.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1]) asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From) + case Zvex_rm_r_vo: + asmbuf.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1]) + asmbuf.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2])) + + case Zvex_i_r_rm: + asmbuf.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1]) + asmbuf.asmand(ctxt, cursym, p, &p.To, p.GetFrom3()) + asmbuf.Put1(byte(p.From.Offset)) + + case Zvex_hr_rm_v_r: + hr, from, from3, to := unpackOps4(p) + asmbuf.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1]) + asmbuf.asmand(ctxt, cursym, p, from, to) + asmbuf.Put1(byte(regrex[hr.Reg]+reg[hr.Reg]+1) << 4) + case Zr_m_xm: asmbuf.mediaop(ctxt, o, op, int(yt.zoffset), z) asmbuf.asmand(ctxt, cursym, p, &p.To, &p.From)