]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/asm: complete the support for VDUP on arm64
authorfanzha02 <fannie.zhang@arm.com>
Fri, 4 Dec 2020 06:02:55 +0000 (14:02 +0800)
committerfannie zhang <Fannie.Zhang@arm.com>
Fri, 19 Mar 2021 01:38:59 +0000 (01:38 +0000)
"VMOV Vn.<T>[index], Vn" is equivalent to "VDUP Vn.<T>[index], Vn", and
the latter has a higher priority in the disassembler than the former.
But the assembler doesn't support to encode this combination of VDUP,
this leads to an inconsistency between assembler and disassembler.

For example, if we assemble "VMOV V20.S[0], V20" to hex then decode it,
we'll get "VDUP V20.S[0], V20".

  VMOV V20.S[0], V20 -> 9406045e -> VDUP V20.S[0], V20 -> error

But we cannot assemble this VDUP again.

Similar reason for "VDUP Rn, Vd.<T>". This CL completes the support for
VDUP.

This patch is a copy of CL 276092. Co-authored-by: JunchenLi
<junchen.li@arm.com>

Change-Id: I8f8d86cf1911d5b16bb40d189f1dc34b24416aaf
Reviewed-on: https://go-review.googlesource.com/c/go/+/302929
Trust: fannie zhang <Fannie.Zhang@arm.com>
Run-TryBot: fannie zhang <Fannie.Zhang@arm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
src/cmd/asm/internal/asm/testdata/arm64.s
src/cmd/asm/internal/asm/testdata/arm64enc.s
src/cmd/internal/obj/arm64/asm7.go

index d859171103dafacc0ca9e32516dce1f780ae9422..1146c1a7898db2f3b457fe9f072269be2f5db36f 100644 (file)
@@ -596,9 +596,12 @@ TEXT       foo(SB), DUPOK|NOSPLIT, $-8
        VMOV    R20, V1.S[0]          // 811e044e
        VMOV    R20, V1.S[1]          // 811e0c4e
        VMOV    R1, V9.H4             // 290c020e
+       VDUP    R1, V9.H4             // 290c020e
        VMOV    R22, V11.D2           // cb0e084e
+       VDUP    R22, V11.D2           // cb0e084e
        VMOV    V2.B16, V4.B16        // 441ca24e
        VMOV    V20.S[0], V20         // 9406045e
+       VDUP    V20.S[0], V20         // 9406045e
        VMOV    V12.D[0], V12.D[1]    // 8c05186e
        VMOV    V10.S[0], V12.S[1]    // 4c050c6e
        VMOV    V9.H[0], V12.H[1]     // 2c05066e
index f71f7b048472f68662aa281d797e2297c440e4a0..a29862822d30a15b54d313e63676711f6bb7c0f3 100644 (file)
@@ -669,6 +669,7 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$-8
    VCMEQ V24.S4, V13.S4, V12.S4                                // ac8db86e
    VCNT V13.B8, V11.B8                                         // ab59200e
    VMOV V31.B[15], V18                                         // f2071f5e
+   VDUP V31.B[15], V18                                         // f2071f5e
    VDUP V31.B[13], V20.B16                                     // f4071b4e
    VEOR V4.B8, V18.B8, V7.B8                                   // 471e242e
    VEXT $4, V2.B8, V1.B8, V3.B8                                // 2320022e
@@ -700,6 +701,7 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$-8
    //TODO FMOVS.W 71(R29), F28                                 // bc7f44bc
    FMOVS 6160(R4), F23                                         // 971058bd
    VMOV V18.B[10], V27                                         // 5b06155e
+   VDUP V18.B[10], V27                                         // 5b06155e
    VMOV V12.B[2], V28.B[12]                                    // 9c15196e
    VMOV R30, V4.B[13]                                          // c41f1b4e
    VMOV V2.B16, V4.B16                                         // 441ca24e
index 275799aad32963fc4c4d16730e51096620660624..20f1843951e6347510575c6ca84a0fc556a4ca18 100644 (file)
@@ -501,6 +501,8 @@ var optab = []Optab{
        {AVMOV, C_REG, C_NONE, C_NONE, C_ELEM, 78, 4, 0, 0, 0},
        {AVMOV, C_ARNG, C_NONE, C_NONE, C_ARNG, 83, 4, 0, 0, 0},
        {AVDUP, C_ELEM, C_NONE, C_NONE, C_ARNG, 79, 4, 0, 0, 0},
+       {AVDUP, C_ELEM, C_NONE, C_NONE, C_VREG, 80, 4, 0, 0, 0},
+       {AVDUP, C_REG, C_NONE, C_NONE, C_ARNG, 82, 4, 0, 0, 0},
        {AVMOVI, C_ADDCON, C_NONE, C_NONE, C_ARNG, 86, 4, 0, 0, 0},
        {AVFMLA, C_ARNG, C_ARNG, C_NONE, C_ARNG, 72, 4, 0, 0, 0},
        {AVEXT, C_VCON, C_ARNG, C_ARNG, C_ARNG, 94, 4, 0, 0, 0},
@@ -4653,13 +4655,13 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
                o1 |= (uint32(Q&1) << 30) | (uint32(imm5&0x1f) << 16)
                o1 |= (uint32(rf&31) << 5) | uint32(rt&31)
 
-       case 80: /* vmov V.<T>[index], Vn */
+       case 80: /* vmov/vdup V.<T>[index], Vn */
                rf := int(p.From.Reg)
                rt := int(p.To.Reg)
                imm5 := 0
                index := int(p.From.Index)
                switch p.As {
-               case AVMOV:
+               case AVMOV, AVDUP:
                        o1 = 1<<30 | 15<<25 | 1<<10
                        switch (p.From.Reg >> 5) & 15 {
                        case ARNG_B:
@@ -4709,7 +4711,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
                o1 = c.maskOpvldvst(p, o1)
                o1 |= uint32(r&31) << 5
 
-       case 82: /* vmov Rn, Vd.<T> */
+       case 82: /* vmov/vdup Rn, Vd.<T> */
                rf := int(p.From.Reg)
                rt := int(p.To.Reg)
                o1 = 7<<25 | 3<<10
@@ -4737,7 +4739,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
                        Q = 1
                        imm5 = 2
                default:
-                       c.ctxt.Diag("invalid arrangement on VMOV Rn, Vd.<T>: %v\n", p)
+                       c.ctxt.Diag("invalid arrangement: %v\n", p)
                }
                o1 |= (Q & 1 << 30) | (imm5 & 0x1f << 16)
                o1 |= (uint32(rf&31) << 5) | uint32(rt&31)