--- /dev/null
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This input extends auto-generated amd64enc.s test suite
+// with manually added tests.
+
+#include "../../../../../runtime/textflag.h"
+
+TEXT asmtest(SB),DUPOK|NOSPLIT,$0
+ // AVX2GATHER: basic combinations.
+ VPGATHERDQ Y2, (BP)(X7*2), Y1 // c4e2ed904c7d00
+ VPGATHERDQ X12, (R13)(X14*2), X11 // c40299905c7500
+ VPGATHERDQ Y12, (R13)(X14*2), Y11 // c4029d905c7500
+ VPGATHERDQ Y0, 8(X4*1), Y6 // c4e2fd90342508000000
+ VPGATHERDQ Y0, -8(X4*1), Y6 // c4e2fd903425f8ffffff
+ VPGATHERDQ Y0, 0(X4*1), Y6 // c4e2fd90342500000000
+ VPGATHERDQ Y0, 664(X4*1), Y6 // c4e2fd90342598020000
+ VPGATHERDQ Y0, 8(X4*8), Y6 // c4e2fd9034e508000000
+ VPGATHERDQ Y0, -8(X4*8), Y6 // c4e2fd9034e5f8ffffff
+ VPGATHERDQ Y0, 0(X4*8), Y6 // c4e2fd9034e500000000
+ VPGATHERDQ Y0, 664(X4*8), Y6 // c4e2fd9034e598020000
+ VPGATHERDQ Y0, 8(X14*1), Y6 // c4a2fd90343508000000
+ VPGATHERDQ Y0, -8(X14*1), Y6 // c4a2fd903435f8ffffff
+ VPGATHERDQ Y0, 0(X14*1), Y6 // c4a2fd90343500000000
+ VPGATHERDQ Y0, 664(X14*1), Y6 // c4a2fd90343598020000
+ VPGATHERDQ Y0, 8(X14*8), Y6 // c4a2fd9034f508000000
+ VPGATHERDQ Y0, -8(X14*8), Y6 // c4a2fd9034f5f8ffffff
+ VPGATHERDQ Y0, 0(X14*8), Y6 // c4a2fd9034f500000000
+ VPGATHERDQ Y0, 664(X14*8), Y6 // c4a2fd9034f598020000
+ VPGATHERDQ X2, (BP)(X7*2), X1 // c4e2e9904c7d00
+ VPGATHERDQ Y2, (BP)(X7*2), Y1 // c4e2ed904c7d00
+ VPGATHERDQ X12, (R13)(X14*2), X11 // c40299905c7500
+ VPGATHERDQ Y12, (R13)(X14*2), Y11 // c4029d905c7500
+ VPGATHERDQ Y0, 8(X4*1), Y6 // c4e2fd90342508000000
+ VPGATHERDQ Y0, -8(X4*1), Y6 // c4e2fd903425f8ffffff
+ VPGATHERDQ Y0, 0(X4*1), Y6 // c4e2fd90342500000000
+ VPGATHERDQ Y0, 664(X4*1), Y6 // c4e2fd90342598020000
+ VPGATHERDQ Y0, 8(X4*8), Y6 // c4e2fd9034e508000000
+ VPGATHERDQ Y0, -8(X4*8), Y6 // c4e2fd9034e5f8ffffff
+ VPGATHERDQ Y0, 0(X4*8), Y6 // c4e2fd9034e500000000
+ VPGATHERDQ Y0, 664(X4*8), Y6 // c4e2fd9034e598020000
+ VPGATHERDQ Y0, 8(X14*1), Y6 // c4a2fd90343508000000
+ VPGATHERDQ Y0, -8(X14*1), Y6 // c4a2fd903435f8ffffff
+ VPGATHERDQ Y0, 0(X14*1), Y6 // c4a2fd90343500000000
+ VPGATHERDQ Y0, 664(X14*1), Y6 // c4a2fd90343598020000
+ VPGATHERDQ Y0, 8(X14*8), Y6 // c4a2fd9034f508000000
+ VPGATHERDQ Y0, -8(X14*8), Y6 // c4a2fd9034f5f8ffffff
+ VPGATHERDQ Y0, 0(X14*8), Y6 // c4a2fd9034f500000000
+ VPGATHERDQ Y0, 664(X14*8), Y6 // c4a2fd9034f598020000
+ VPGATHERQQ X2, (BP)(X7*2), X1 // c4e2e9914c7d00
+ VPGATHERQQ Y2, (BP)(Y7*2), Y1 // c4e2ed914c7d00
+ VPGATHERQQ X12, (R13)(X14*2), X11 // c40299915c7500
+ VPGATHERQQ Y12, (R13)(Y14*2), Y11 // c4029d915c7500
+ VPGATHERQQ X2, (BP)(X7*2), X1 // c4e2e9914c7d00
+ VPGATHERQQ Y2, (BP)(Y7*2), Y1 // c4e2ed914c7d00
+ VPGATHERQQ X12, (R13)(X14*2), X11 // c40299915c7500
+ VPGATHERQQ Y12, (R13)(Y14*2), Y11 // c4029d915c7500
+ VGATHERDPD X2, (BP)(X7*2), X1 // c4e2e9924c7d00
+ VGATHERDPD Y2, (BP)(X7*2), Y1 // c4e2ed924c7d00
+ VGATHERDPD X12, (R13)(X14*2), X11 // c40299925c7500
+ VGATHERDPD Y12, (R13)(X14*2), Y11 // c4029d925c7500
+ VGATHERDPD Y0, 8(X4*1), Y6 // c4e2fd92342508000000
+ VGATHERDPD Y0, -8(X4*1), Y6 // c4e2fd923425f8ffffff
+ VGATHERDPD Y0, 0(X4*1), Y6 // c4e2fd92342500000000
+ VGATHERDPD Y0, 664(X4*1), Y6 // c4e2fd92342598020000
+ VGATHERDPD Y0, 8(X4*8), Y6 // c4e2fd9234e508000000
+ VGATHERDPD Y0, -8(X4*8), Y6 // c4e2fd9234e5f8ffffff
+ VGATHERDPD Y0, 0(X4*8), Y6 // c4e2fd9234e500000000
+ VGATHERDPD Y0, 664(X4*8), Y6 // c4e2fd9234e598020000
+ VGATHERDPD Y0, 8(X14*1), Y6 // c4a2fd92343508000000
+ VGATHERDPD Y0, -8(X14*1), Y6 // c4a2fd923435f8ffffff
+ VGATHERDPD Y0, 0(X14*1), Y6 // c4a2fd92343500000000
+ VGATHERDPD Y0, 664(X14*1), Y6 // c4a2fd92343598020000
+ VGATHERDPD Y0, 8(X14*8), Y6 // c4a2fd9234f508000000
+ VGATHERDPD Y0, -8(X14*8), Y6 // c4a2fd9234f5f8ffffff
+ VGATHERDPD Y0, 0(X14*8), Y6 // c4a2fd9234f500000000
+ VGATHERDPD Y0, 664(X14*8), Y6 // c4a2fd9234f598020000
+ VGATHERDPD X2, (BP)(X7*2), X1 // c4e2e9924c7d00
+ VGATHERDPD Y2, (BP)(X7*2), Y1 // c4e2ed924c7d00
+ VGATHERDPD X12, (R13)(X14*2), X11 // c40299925c7500
+ VGATHERDPD Y12, (R13)(X14*2), Y11 // c4029d925c7500
+ VGATHERDPD Y0, 8(X4*1), Y6 // c4e2fd92342508000000
+ VGATHERDPD Y0, -8(X4*1), Y6 // c4e2fd923425f8ffffff
+ VGATHERDPD Y0, 0(X4*1), Y6 // c4e2fd92342500000000
+ VGATHERDPD Y0, 664(X4*1), Y6 // c4e2fd92342598020000
+ VGATHERDPD Y0, 8(X4*8), Y6 // c4e2fd9234e508000000
+ VGATHERDPD Y0, -8(X4*8), Y6 // c4e2fd9234e5f8ffffff
+ VGATHERDPD Y0, 0(X4*8), Y6 // c4e2fd9234e500000000
+ VGATHERDPD Y0, 664(X4*8), Y6 // c4e2fd9234e598020000
+ VGATHERDPD Y0, 8(X14*1), Y6 // c4a2fd92343508000000
+ VGATHERDPD Y0, -8(X14*1), Y6 // c4a2fd923435f8ffffff
+ VGATHERDPD Y0, 0(X14*1), Y6 // c4a2fd92343500000000
+ VGATHERDPD Y0, 664(X14*1), Y6 // c4a2fd92343598020000
+ VGATHERDPD Y0, 8(X14*8), Y6 // c4a2fd9234f508000000
+ VGATHERDPD Y0, -8(X14*8), Y6 // c4a2fd9234f5f8ffffff
+ VGATHERDPD Y0, 0(X14*8), Y6 // c4a2fd9234f500000000
+ VGATHERDPD Y0, 664(X14*8), Y6 // c4a2fd9234f598020000
+ VGATHERQPD X2, (BP)(X7*2), X1 // c4e2e9934c7d00
+ VGATHERQPD Y2, (BP)(Y7*2), Y1 // c4e2ed934c7d00
+ VGATHERQPD X12, (R13)(X14*2), X11 // c40299935c7500
+ VGATHERQPD Y12, (R13)(Y14*2), Y11 // c4029d935c7500
+ VGATHERQPD X2, (BP)(X7*2), X1 // c4e2e9934c7d00
+ VGATHERQPD Y2, (BP)(Y7*2), Y1 // c4e2ed934c7d00
+ VGATHERQPD X12, (R13)(X14*2), X11 // c40299935c7500
+ VGATHERQPD Y12, (R13)(Y14*2), Y11 // c4029d935c7500
+ VGATHERDPS X2, (BP)(X7*2), X1 // c4e269924c7d00
+ VGATHERDPS Y2, (BP)(Y7*2), Y1 // c4e26d924c7d00
+ VGATHERDPS X12, (R13)(X14*2), X11 // c40219925c7500
+ VGATHERDPS Y12, (R13)(Y14*2), Y11 // c4021d925c7500
+ VGATHERDPS X3, 8(X4*1), X6 // c4e26192342508000000
+ VGATHERDPS X3, -8(X4*1), X6 // c4e261923425f8ffffff
+ VGATHERDPS X3, 0(X4*1), X6 // c4e26192342500000000
+ VGATHERDPS X3, 664(X4*1), X6 // c4e26192342598020000
+ VGATHERDPS X3, 8(X4*8), X6 // c4e2619234e508000000
+ VGATHERDPS X3, -8(X4*8), X6 // c4e2619234e5f8ffffff
+ VGATHERDPS X3, 0(X4*8), X6 // c4e2619234e500000000
+ VGATHERDPS X3, 664(X4*8), X6 // c4e2619234e598020000
+ VGATHERDPS X3, 8(X14*1), X6 // c4a26192343508000000
+ VGATHERDPS X3, -8(X14*1), X6 // c4a261923435f8ffffff
+ VGATHERDPS X3, 0(X14*1), X6 // c4a26192343500000000
+ VGATHERDPS X3, 664(X14*1), X6 // c4a26192343598020000
+ VGATHERDPS X3, 8(X14*8), X6 // c4a2619234f508000000
+ VGATHERDPS X3, -8(X14*8), X6 // c4a2619234f5f8ffffff
+ VGATHERDPS X3, 0(X14*8), X6 // c4a2619234f500000000
+ VGATHERDPS X3, 664(X14*8), X6 // c4a2619234f598020000
+ VGATHERDPS X2, (BP)(X7*2), X1 // c4e269924c7d00
+ VGATHERDPS Y2, (BP)(Y7*2), Y1 // c4e26d924c7d00
+ VGATHERDPS X12, (R13)(X14*2), X11 // c40219925c7500
+ VGATHERDPS Y12, (R13)(Y14*2), Y11 // c4021d925c7500
+ VGATHERDPS X5, 8(X4*1), X6 // c4e25192342508000000
+ VGATHERDPS X3, -8(X4*1), X6 // c4e261923425f8ffffff
+ VGATHERDPS X3, 0(X4*1), X6 // c4e26192342500000000
+ VGATHERDPS X3, 664(X4*1), X6 // c4e26192342598020000
+ VGATHERDPS X3, 8(X4*8), X6 // c4e2619234e508000000
+ VGATHERDPS X3, -8(X4*8), X6 // c4e2619234e5f8ffffff
+ VGATHERDPS X3, 0(X4*8), X6 // c4e2619234e500000000
+ VGATHERDPS X3, 664(X4*8), X6 // c4e2619234e598020000
+ VGATHERDPS X3, 8(X14*1), X6 // c4a26192343508000000
+ VGATHERDPS X3, -8(X14*1), X6 // c4a261923435f8ffffff
+ VGATHERDPS X3, 0(X14*1), X6 // c4a26192343500000000
+ VGATHERDPS X3, 664(X14*1), X6 // c4a26192343598020000
+ VGATHERDPS X3, 8(X14*8), X6 // c4a2619234f508000000
+ VGATHERDPS X3, -8(X14*8), X6 // c4a2619234f5f8ffffff
+ VGATHERDPS X3, 0(X14*8), X6 // c4a2619234f500000000
+ VGATHERDPS X3, 664(X14*8), X6 // c4a2619234f598020000
+ VGATHERQPS X2, (BP)(X7*2), X1 // c4e269934c7d00
+ VGATHERQPS X2, (BP)(Y7*2), X1 // c4e26d934c7d00
+ VGATHERQPS X12, (R13)(X14*2), X11 // c40219935c7500
+ VGATHERQPS X12, (R13)(Y14*2), X11 // c4021d935c7500
+ VGATHERQPS X2, (BP)(X7*2), X1 // c4e269934c7d00
+ VGATHERQPS X2, (BP)(Y7*2), X1 // c4e26d934c7d00
+ VGATHERQPS X12, (R13)(X14*2), X11 // c40219935c7500
+ VGATHERQPS X12, (R13)(Y14*2), X11 // c4021d935c7500
+ VPGATHERDD X2, (BP)(X7*2), X1 // c4e269904c7d00
+ VPGATHERDD Y2, (BP)(Y7*2), Y1 // c4e26d904c7d00
+ VPGATHERDD X12, (R13)(X14*2), X11 // c40219905c7500
+ VPGATHERDD Y12, (R13)(Y14*2), Y11 // c4021d905c7500
+ VPGATHERDD X3, 8(X4*1), X6 // c4e26190342508000000
+ VPGATHERDD X3, -8(X4*1), X6 // c4e261903425f8ffffff
+ VPGATHERDD X3, 0(X4*1), X6 // c4e26190342500000000
+ VPGATHERDD X3, 664(X4*1), X6 // c4e26190342598020000
+ VPGATHERDD X3, 8(X4*8), X6 // c4e2619034e508000000
+ VPGATHERDD X3, -8(X4*8), X6 // c4e2619034e5f8ffffff
+ VPGATHERDD X3, 0(X4*8), X6 // c4e2619034e500000000
+ VPGATHERDD X3, 664(X4*8), X6 // c4e2619034e598020000
+ VPGATHERDD X3, 8(X14*1), X6 // c4a26190343508000000
+ VPGATHERDD X3, -8(X14*1), X6 // c4a261903435f8ffffff
+ VPGATHERDD X3, 0(X14*1), X6 // c4a26190343500000000
+ VPGATHERDD X3, 664(X14*1), X6 // c4a26190343598020000
+ VPGATHERDD X3, 8(X14*8), X6 // c4a2619034f508000000
+ VPGATHERDD X3, -8(X14*8), X6 // c4a2619034f5f8ffffff
+ VPGATHERDD X3, 0(X14*8), X6 // c4a2619034f500000000
+ VPGATHERDD X3, 664(X14*8), X6 // c4a2619034f598020000
+ VPGATHERDD X2, (BP)(X7*2), X1 // c4e269904c7d00
+ VPGATHERDD Y2, (BP)(Y7*2), Y1 // c4e26d904c7d00
+ VPGATHERDD X12, (R13)(X14*2), X11 // c40219905c7500
+ VPGATHERDD Y12, (R13)(Y14*2), Y11 // c4021d905c7500
+ VPGATHERDD X3, 8(X4*1), X6 // c4e26190342508000000
+ VPGATHERDD X3, -8(X4*1), X6 // c4e261903425f8ffffff
+ VPGATHERDD X3, 0(X4*1), X6 // c4e26190342500000000
+ VPGATHERDD X3, 664(X4*1), X6 // c4e26190342598020000
+ VPGATHERDD X3, 8(X4*8), X6 // c4e2619034e508000000
+ VPGATHERDD X3, -8(X4*8), X6 // c4e2619034e5f8ffffff
+ VPGATHERDD X3, 0(X4*8), X6 // c4e2619034e500000000
+ VPGATHERDD X3, 664(X4*8), X6 // c4e2619034e598020000
+ VPGATHERDD X3, 8(X14*1), X6 // c4a26190343508000000
+ VPGATHERDD X3, -8(X14*1), X6 // c4a261903435f8ffffff
+ VPGATHERDD X3, 0(X14*1), X6 // c4a26190343500000000
+ VPGATHERDD X3, 664(X14*1), X6 // c4a26190343598020000
+ VPGATHERDD X3, 8(X14*8), X6 // c4a2619034f508000000
+ VPGATHERDD X3, -8(X14*8), X6 // c4a2619034f5f8ffffff
+ VPGATHERDD X3, 0(X14*8), X6 // c4a2619034f500000000
+ VPGATHERDD X3, 664(X14*8), X6 // c4a2619034f598020000
+ VPGATHERQD X2, (BP)(X7*2), X1 // c4e269914c7d00
+ VPGATHERQD X2, (BP)(Y7*2), X1 // c4e26d914c7d00
+ VPGATHERQD X12, (R13)(X14*2), X11 // c40219915c7500
+ VPGATHERQD X12, (R13)(Y14*2), X11 // c4021d915c7500
+ VPGATHERQD X2, (BP)(X7*2), X1 // c4e269914c7d00
+ VPGATHERQD X2, (BP)(Y7*2), X1 // c4e26d914c7d00
+ VPGATHERQD X12, (R13)(X14*2), X11 // c40219915c7500
+ VPGATHERQD X12, (R13)(Y14*2), X11 // c4021d915c7500
+ VPGATHERQQ X0, 0(X1*1), X2 // c4e2f991140d00000000
+ VPGATHERQQ Y0, 0(Y1*1), Y2 // c4e2fd91140d00000000
+ VPGATHERQQ X8, 0(X9*1), X10 // c422b991140d00000000
+ VPGATHERQQ Y8, 0(Y9*1), Y10 // c422bd91140d00000000
+ VPGATHERQQ X0, 0(X1*4), X2 // c4e2f991148d00000000
+ VPGATHERQQ Y0, 0(Y1*4), Y2 // c4e2fd91148d00000000
+ VPGATHERQQ X8, 0(X9*4), X10 // c422b991148d00000000
+ VPGATHERQQ Y8, 0(Y9*4), Y10 // c422bd91148d00000000
+ // AVX2GATHER: test SP/BP base with different displacements.
+ VPGATHERQQ X0, (SP)(X1*1), X2 // c4e2f991140c
+ VPGATHERQQ X0, 16(SP)(X1*1), X2 // c4e2f991540c10
+ VPGATHERQQ X0, 512(SP)(X1*1), X2 // c4e2f991940c00020000
+ VPGATHERQQ X0, (R12)(X1*1), X2 // c4c2f991140c
+ VPGATHERQQ X0, 16(R12)(X1*1), X2 // c4c2f991540c10
+ VPGATHERQQ X0, 512(R12)(X1*1), X2 // c4c2f991940c00020000
+ VPGATHERQQ X0, (BP)(X1*1), X2 // c4e2f991540d00
+ VPGATHERQQ X0, 16(BP)(X1*1), X2 // c4e2f991540d10
+ VPGATHERQQ X0, 512(BP)(X1*1), X2 // c4e2f991940d00020000
+ VPGATHERQQ X0, (R13)(X1*1), X2 // c4c2f991540d00
+ VPGATHERQQ X0, 16(R13)(X1*1), X2 // c4c2f991540d10
+ VPGATHERQQ X0, 512(R13)(X1*1), X2 // c4c2f991940d00020000
+ VPGATHERQQ Y0, (SP)(Y1*1), Y2 // c4e2fd91140c
+ VPGATHERQQ Y0, 16(SP)(Y1*1), Y2 // c4e2fd91540c10
+ VPGATHERQQ Y0, 512(SP)(Y1*1), Y2 // c4e2fd91940c00020000
+ VPGATHERQQ Y0, (R12)(Y1*1), Y2 // c4c2fd91140c
+ VPGATHERQQ Y0, 16(R12)(Y1*1), Y2 // c4c2fd91540c10
+ VPGATHERQQ Y0, 512(R12)(Y1*1), Y2 // c4c2fd91940c00020000
+ VPGATHERQQ Y0, (BP)(Y1*1), Y2 // c4e2fd91540d00
+ VPGATHERQQ Y0, 16(BP)(Y1*1), Y2 // c4e2fd91540d10
+ VPGATHERQQ Y0, 512(BP)(Y1*1), Y2 // c4e2fd91940d00020000
+ VPGATHERQQ Y0, (R13)(Y1*1), Y2 // c4c2fd91540d00
+ VPGATHERQQ Y0, 16(R13)(Y1*1), Y2 // c4c2fd91540d10
+ VPGATHERQQ Y0, 512(R13)(Y1*1), Y2 // c4c2fd91940d00020000
+ // End of tests.
+ RET
Ymm
Yxr
Yxm
+ Yxvm // VSIB vector array; vm32x/vm64x
Yyr
Yym
+ Yyvm // VSIB vector array; vm32y/vm64y
Ytls
Ytextsize
Yindir
{Zvex_r_v_rm, 2, argList{Yxr, Yxm}},
}
+var yvpgatherdq = []ytab{
+ {Zvex_v_rm_r, 2, argList{Yxr, Yxvm, Yxr}},
+ {Zvex_v_rm_r, 2, argList{Yyr, Yxvm, Yyr}},
+}
+
+var yvpgatherqq = []ytab{
+ {Zvex_v_rm_r, 2, argList{Yxr, Yxvm, Yxr}},
+ {Zvex_v_rm_r, 2, argList{Yyr, Yyvm, Yyr}},
+}
+
+var yvgatherqps = []ytab{
+ {Zvex_v_rm_r, 2, argList{Yxr, Yxvm, Yxr}},
+ {Zvex_v_rm_r, 2, argList{Yxr, Yyvm, Yxr}},
+}
+
var ymmxmm0f38 = []ytab{
{Zlitm_r, 3, argList{Ymm, Ymr}},
{Zlitm_r, 5, argList{Yxm, Yxr}},
{obj.APCDATA, ypcdata, Px, [23]uint8{0, 0}},
{obj.ADUFFCOPY, yduff, Px, [23]uint8{0xe8}},
{obj.ADUFFZERO, yduff, Px, [23]uint8{0xe8}},
+
+ // AVX2 gather instructions.
+ // Added as a part of VSIB support implementation,
+ // when x86avxgen will output these, they will be moved to
+ // vex_optabs.go where they belong.
+ {AVGATHERDPD, yvpgatherdq, Pvex, [23]uint8{
+ vexDDS | vex128 | vex66 | vex0F38 | vexW1, 0x92,
+ vexDDS | vex256 | vex66 | vex0F38 | vexW1, 0x92,
+ }},
+ {AVGATHERQPD, yvpgatherqq, Pvex, [23]uint8{
+ vexDDS | vex128 | vex66 | vex0F38 | vexW1, 0x93,
+ vexDDS | vex256 | vex66 | vex0F38 | vexW1, 0x93,
+ }},
+ {AVGATHERDPS, yvpgatherqq, Pvex, [23]uint8{
+ vexDDS | vex128 | vex66 | vex0F38 | vexW0, 0x92,
+ vexDDS | vex256 | vex66 | vex0F38 | vexW0, 0x92,
+ }},
+ {AVGATHERQPS, yvgatherqps, Pvex, [23]uint8{
+ vexDDS | vex128 | vex66 | vex0F38 | vexW0, 0x93,
+ vexDDS | vex256 | vex66 | vex0F38 | vexW0, 0x93,
+ }},
+ {AVPGATHERDD, yvpgatherqq, Pvex, [23]uint8{
+ vexDDS | vex128 | vex66 | vex0F38 | vexW0, 0x90,
+ vexDDS | vex256 | vex66 | vex0F38 | vexW0, 0x90,
+ }},
+ {AVPGATHERQD, yvgatherqps, Pvex, [23]uint8{
+ vexDDS | vex128 | vex66 | vex0F38 | vexW0, 0x91,
+ vexDDS | vex256 | vex66 | vex0F38 | vexW0, 0x91,
+ }},
+ {AVPGATHERDQ, yvpgatherdq, Pvex, [23]uint8{
+ vexDDS | vex128 | vex66 | vex0F38 | vexW1, 0x90,
+ vexDDS | vex256 | vex66 | vex0F38 | vexW1, 0x90,
+ }},
+ {AVPGATHERQQ, yvpgatherqq, Pvex, [23]uint8{
+ vexDDS | vex128 | vex66 | vex0F38 | vexW1, 0x91,
+ vexDDS | vex256 | vex66 | vex0F38 | vexW1, 0x91,
+ }},
+
{obj.AEND, nil, 0, [23]uint8{}},
{0, nil, 0, [23]uint8{}},
}
// Can't use SP as the index register
return Yxxx
}
+ if a.Index >= REG_X0 && a.Index <= REG_X15 {
+ if ctxt.Arch.Family == sys.I386 && a.Index > REG_X7 {
+ return Yxxx
+ }
+ return Yxvm
+ }
+ if a.Index >= REG_Y0 && a.Index <= REG_Y15 {
+ if ctxt.Arch.Family == sys.I386 && a.Index > REG_Y7 {
+ return Yxxx
+ }
+ return Yyvm
+ }
if ctxt.Arch.Family == sys.AMD64 {
// Offset must fit in a 32-bit signed field (or fit in a 32-bit unsigned field
// where the sign extension doesn't matter).
// At returns the byte at offset i.
func (a *AsmBuf) At(i int) byte { return a.buf[i] }
+// asmidx emits SIB byte.
func (asmbuf *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
var i int
+ // X/Y index register is used in VSIB.
switch index {
default:
goto bad
REG_R12,
REG_R13,
REG_R14,
- REG_R15:
+ REG_R15,
+ REG_X8,
+ REG_X9,
+ REG_X10,
+ REG_X11,
+ REG_X12,
+ REG_X13,
+ REG_X14,
+ REG_X15,
+ REG_Y8,
+ REG_Y9,
+ REG_Y10,
+ REG_Y11,
+ REG_Y12,
+ REG_Y13,
+ REG_Y14,
+ REG_Y15:
if ctxt.Arch.Family == sys.I386 {
goto bad
}
REG_BX,
REG_BP,
REG_SI,
- REG_DI:
+ REG_DI,
+ REG_X0,
+ REG_X1,
+ REG_X2,
+ REG_X3,
+ REG_X4,
+ REG_X5,
+ REG_X6,
+ REG_X7,
+ REG_Y0,
+ REG_Y1,
+ REG_Y2,
+ REG_Y3,
+ REG_Y4,
+ REG_Y5,
+ REG_Y6,
+ REG_Y7:
i = reg[index] << 3
}
asmbuf.Put1(opcode)
}
+// regIndex returns register index that fits in 4 bits.
+//
+// Examples:
+// REG_X15 => 15
+// REG_R9 => 9
+// REG_AX => 0
+//
+func regIndex(r int16) int {
+ lower3bits := reg[r]
+ high4bit := regrex[r] & Rxr << 1
+ return lower3bits | high4bit
+}
+
+// avx2gatherValid returns true if p satisfies AVX2 gather constraints.
+// Reports errors via ctxt.
+func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
+ // If any pair of the index, mask, or destination registers
+ // are the same, this instruction results a #UD fault.
+ index := regIndex(p.GetFrom3().Index)
+ mask := regIndex(p.From.Reg)
+ dest := regIndex(p.To.Reg)
+ if dest == mask || dest == index || mask == index {
+ ctxt.Diag("mask, index, and destination registers should be distinct: %v", p)
+ return false
+ }
+
+ return true
+}
+
func (asmbuf *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
o := opindex[p.As&obj.AMask]
p.To.Offset = p.GetFrom3().Offset
p.GetFrom3().Offset = 0
}
+
+ case AVGATHERDPD,
+ AVGATHERQPD,
+ AVGATHERDPS,
+ AVGATHERQPS,
+ AVPGATHERDD,
+ AVPGATHERQD,
+ AVPGATHERDQ,
+ AVPGATHERQQ:
+ if !avx2gatherValid(ctxt, p) {
+ return
+ }
}
if p.Ft == 0 {