From: Meng Zhuo Date: Wed, 2 Oct 2019 03:50:34 +0000 (+0800) Subject: cmd/asm: add VLD[1-4]R vector instructions on arm64 X-Git-Tag: go1.14beta1~881 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=2bf7a925712dca5646f9215cda17c5b61eea14ce;p=gostls13.git cmd/asm: add VLD[1-4]R vector instructions on arm64 This change adds VLD1R, VLD2R, VLD3R, VLD4R Change-Id: Ie19e9ae02fdfc94b9344acde8c9938849efb0bf0 Reviewed-on: https://go-review.googlesource.com/c/go/+/181697 Run-TryBot: Cherry Zhang TryBot-Result: Gobot Gobot Reviewed-by: Cherry Zhang --- diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s index 9f19ff1e8d..93f70045b7 100644 --- a/src/cmd/asm/internal/asm/testdata/arm64.s +++ b/src/cmd/asm/internal/asm/testdata/arm64.s @@ -352,6 +352,18 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 VLD4 (R15), [V10.H4, V11.H4, V12.H4, V13.H4] // ea05400c VLD4.P 32(R24), [V31.B8, V0.B8, V1.B8, V2.B8] // 1f03df0c VLD4.P (R13)(R9), [V14.S2, V15.S2, V16.S2, V17.S2] // VLD4.P (R13)(R9*1), [V14.S2,V15.S2,V16.S2,V17.S2] // ae09c90c + VLD1R (R0), [V0.B16] // 00c0404d + VLD1R.P 16(R0), [V0.B16] // 00c0df4d + VLD1R.P (R15)(R1), [V15.H4] // VLD1R.P (R15)(R1*1), [V15.H4] // efc5c10d + VLD2R (R15), [V15.H4, V16.H4] // efc5600d + VLD2R.P 32(R0), [V0.D2, V1.D2] // 00ccff4d + VLD2R.P (R0)(R5), [V31.D1, V0.D1] // VLD2R.P (R0)(R5*1), [V31.D1, V0.D1] // 1fcce50d + VLD3R (RSP), [V31.S2, V0.S2, V1.S2] // ffeb400d + VLD3R.P 24(R15), [V15.H4, V16.H4, V17.H4] // efe5df0d + VLD3R.P (R15)(R6), [V15.H8, V16.H8, V17.H8] // VLD3R.P (R15)(R6*1), [V15.H8, V16.H8, V17.H8] // efe5c64d + VLD4R (R0), [V0.B8, V1.B8, V2.B8, V3.B8] // 00e0600d + VLD4R.P 64(RSP), [V31.S4, V0.S4, V1.S4, V2.S4] // ffebff4d + VLD4R.P (R15)(R9), [V15.H4, V16.H4, V17.H4, V18.H4] // VLD4R.P (R15)(R9*1), [V15.H4, V16.H4, V17.H4, V18.H4] // efe5e90d VST1.P [V24.S2], 8(R2) // 58789f0c VST1 [V29.S2, V30.S2], (R29) // bdab000c VST1 [V14.H4, V15.H4, V16.H4], (R27) // 6e67000c diff --git a/src/cmd/internal/obj/arm64/a.out.go b/src/cmd/internal/obj/arm64/a.out.go index f793cdc4f9..4e5eb75a22 100644 --- a/src/cmd/internal/obj/arm64/a.out.go +++ b/src/cmd/internal/obj/arm64/a.out.go @@ -956,6 +956,10 @@ const ( AVLD2 AVLD3 AVLD4 + AVLD1R + AVLD2R + AVLD3R + AVLD4R AVORR AVREV32 AVREV64 diff --git a/src/cmd/internal/obj/arm64/anames.go b/src/cmd/internal/obj/arm64/anames.go index 621af6c195..2c277dfb95 100644 --- a/src/cmd/internal/obj/arm64/anames.go +++ b/src/cmd/internal/obj/arm64/anames.go @@ -463,6 +463,10 @@ var Anames = []string{ "VLD2", "VLD3", "VLD4", + "VLD1R", + "VLD2R", + "VLD3R", + "VLD4R", "VORR", "VREV32", "VREV64", diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 47586ba262..c7fa943e7e 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -784,15 +784,9 @@ var optab = []Optab{ {AVLD1, C_ZOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, 0}, {AVLD1, C_LOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST}, {AVLD1, C_ROFF, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST}, - {AVLD2, C_ZOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, 0}, - {AVLD2, C_LOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST}, - {AVLD2, C_ROFF, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST}, - {AVLD3, C_ZOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, 0}, - {AVLD3, C_LOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST}, - {AVLD3, C_ROFF, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST}, - {AVLD4, C_ZOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, 0}, - {AVLD4, C_LOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST}, - {AVLD4, C_ROFF, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST}, + {AVLD1R, C_ZOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, 0}, + {AVLD1R, C_LOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST}, + {AVLD1R, C_ROFF, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST}, {AVLD1, C_LOREG, C_NONE, C_NONE, C_ELEM, 97, 4, 0, 0, C_XPOST}, {AVLD1, C_ROFF, C_NONE, C_NONE, C_ELEM, 97, 4, 0, 0, C_XPOST}, {AVLD1, C_LOREG, C_NONE, C_NONE, C_ELEM, 97, 4, 0, 0, 0}, @@ -2709,13 +2703,18 @@ func buildop(ctxt *obj.Link) { case AVZIP1: oprangeset(AVZIP2, t) + case AVLD1R: + oprangeset(AVLD2, t) + oprangeset(AVLD2R, t) + oprangeset(AVLD3, t) + oprangeset(AVLD3R, t) + oprangeset(AVLD4, t) + oprangeset(AVLD4R, t) + case ASHA1H, AVCNT, AVMOV, AVLD1, - AVLD2, - AVLD3, - AVLD4, AVST1, AVST2, AVST3, @@ -2803,7 +2802,7 @@ func (c *ctxt7) checkindex(p *obj.Prog, index, maxindex int) { func (c *ctxt7) checkoffset(p *obj.Prog, as obj.As) { var offset, list, n, expect int64 switch as { - case AVLD1, AVLD2, AVLD3, AVLD4: + case AVLD1, AVLD2, AVLD3, AVLD4, AVLD1R, AVLD2R, AVLD3R, AVLD4R: offset = p.From.Offset list = p.To.Offset case AVST1, AVST2, AVST3, AVST4: @@ -2836,11 +2835,13 @@ func (c *ctxt7) checkoffset(p *obj.Prog, as obj.As) { switch as { case AVLD1, AVST1: return - case AVLD2, AVST2: + case AVLD1R: + expect = 1 + case AVLD2, AVST2, AVLD2R: expect = 2 - case AVLD3, AVST3: + case AVLD3, AVST3, AVLD3R: expect = 3 - case AVLD4, AVST4: + case AVLD4, AVST4, AVLD4R: expect = 4 } @@ -4344,10 +4345,10 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { } o1 |= (uint32(imm5&0x1f) << 16) | (uint32(rf&31) << 5) | uint32(rt&31) - case 81: /* vld[1-4] (Rn), [Vt1., Vt2., ...] */ + case 81: /* vld[1-4]|vld[1-4]r (Rn), [Vt1., Vt2., ...] */ c.checkoffset(p, p.As) r := int(p.From.Reg) - o1 = 3<<26 | 1<<22 + o1 = c.oprrr(p, p.As) if o.scond == C_XPOST { o1 |= 1 << 23 if p.From.Index == 0 { @@ -4358,7 +4359,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { if isRegShiftOrExt(&p.From) { c.ctxt.Diag("invalid extended register op: %v\n", p) } - o1 |= uint32(p.From.Index&31) << 16 + o1 |= uint32(p.From.Index&0x1f) << 16 } } o1 |= uint32(p.To.Offset) @@ -5591,6 +5592,15 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { case AVRBIT: return 0x2E<<24 | 1<<22 | 0x10<<17 | 5<<12 | 2<<10 + + case AVLD1, AVLD2, AVLD3, AVLD4: + return 3<<26 | 1<<22 + + case AVLD1R, AVLD3R: + return 0xD<<24 | 1<<22 + + case AVLD2R, AVLD4R: + return 0xD<<24 | 3<<21 } c.ctxt.Diag("%v: bad rrr %d %v", p, a, a) @@ -6779,6 +6789,10 @@ func (c *ctxt7) maskOpvldvst(p *obj.Prog, o1 uint32) uint32 { o1 &^= 0xf000 // mask out "opcode" field (bit 12-15) switch p.As { + case AVLD1R, AVLD2R: + o1 |= 0xC << 12 + case AVLD3R, AVLD4R: + o1 |= 0xE << 12 case AVLD2, AVST2: o1 |= 8 << 12 case AVLD3, AVST3: