From ad5e941a4559a402fa3f428e612c58ec86471517 Mon Sep 17 00:00:00 2001 From: Guoqi Chen Date: Thu, 30 Oct 2025 19:08:01 +0800 Subject: [PATCH] cmd/internal/obj/loong64: using {xv,v}slli.d to perform copying between vector registers Go asm syntax: VMOVQ Vj, Vd XVMOVQ Xj, Xd Equivalent platform assembler syntax: vslli.d vd, vj, 0x0 xvslli.d xd, xj, 0x0 Change-Id: Ifddc3d4d3fbaa6fee2e079bf2ebfe96a2febaa1c Reviewed-on: https://go-review.googlesource.com/c/go/+/716801 Reviewed-by: Michael Knyszek Reviewed-by: Michael Pratt Reviewed-by: Meidan Li Reviewed-by: sophie zhao LUCI-TryBot-Result: Go LUCI --- .../asm/internal/asm/testdata/loong64enc1.s | 8 +++++++- src/cmd/internal/obj/loong64/asm.go | 19 ++++++++++++++----- src/cmd/internal/obj/loong64/doc.go | 9 +++++++++ 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s index ca86ef8e6c..e0619f8ecd 100644 --- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s +++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s @@ -533,12 +533,18 @@ lable2: XVMOVQ X28.V[3], X8 // 88ef0377 XVMOVQ X27.V[0], X9 // 69e30377 - //Move vector element to vector. + // Move vector element to vector. VMOVQ V1.B[3], V9.B16 // 298cf772 VMOVQ V2.H[2], V8.H8 // 48c8f772 VMOVQ V3.W[1], V7.W4 // 67e4f772 VMOVQ V4.V[0], V6.V2 // 86f0f772 + // Move vector register to vector register. + VMOVQ V1, V9 // 29002d73 + VMOVQ V2, V8 // 48002d73 + XVMOVQ X3, X7 // 67002d77 + XVMOVQ X4, X6 // 86002d77 + // Load data from memory and broadcast to each element of a vector register: VMOVQ offset(Rj), . VMOVQ (R4), V0.B16 // 80008030 VMOVQ 1(R4), V0.B16 // 80048030 diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go index 8e2393bc1c..8769183886 100644 --- a/src/cmd/internal/obj/loong64/asm.go +++ b/src/cmd/internal/obj/loong64/asm.go @@ -58,6 +58,8 @@ var optab = []Optab{ {AMOVW, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 1, 4, 0, 0}, {AMOVV, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 1, 4, 0, 0}, + {AVMOVQ, C_VREG, C_NONE, C_NONE, C_VREG, C_NONE, 1, 4, 0, 0}, + {AXVMOVQ, C_XREG, C_NONE, C_NONE, C_XREG, C_NONE, 1, 4, 0, 0}, {AMOVB, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 12, 4, 0, 0}, {AMOVBU, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 12, 4, 0, 0}, {AMOVWU, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 12, 4, 0, 0}, @@ -2101,12 +2103,19 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { case 0: // pseudo ops break - case 1: // mov r1,r2 ==> OR r1,r0,r2 - a := AOR - if p.As == AMOVW { - a = ASLL + case 1: // mov rj, rd + switch p.As { + case AMOVW: + o1 = OP_RRR(c.oprrr(ASLL), uint32(REGZERO), uint32(p.From.Reg), uint32(p.To.Reg)) + case AMOVV: + o1 = OP_RRR(c.oprrr(AOR), uint32(REGZERO), uint32(p.From.Reg), uint32(p.To.Reg)) + case AVMOVQ: + o1 = OP_6IRR(c.opirr(AVSLLV), uint32(0), uint32(p.From.Reg), uint32(p.To.Reg)) + case AXVMOVQ: + o1 = OP_6IRR(c.opirr(AXVSLLV), uint32(0), uint32(p.From.Reg), uint32(p.To.Reg)) + default: + c.ctxt.Diag("unexpected encoding\n%v", p) } - o1 = OP_RRR(c.oprrr(a), uint32(REGZERO), uint32(p.From.Reg), uint32(p.To.Reg)) case 2: // add/sub r1,[r2],r3 r := int(p.Reg) diff --git a/src/cmd/internal/obj/loong64/doc.go b/src/cmd/internal/obj/loong64/doc.go index 45f75e6e70..c96501ea81 100644 --- a/src/cmd/internal/obj/loong64/doc.go +++ b/src/cmd/internal/obj/loong64/doc.go @@ -203,6 +203,15 @@ Note: In the following sections 3.1 to 3.6, "ui4" (4-bit unsigned int immediate) VMOVQ Vj.W[index], Vd.W4 | vreplvei.w vd, vj, ui2 | for i in range(4) : VR[vd].w[i] = VR[vj].w[ui2] VMOVQ Vj.V[index], Vd.V2 | vreplvei.d vd, vj, ui1 | for i in range(2) : VR[vd].d[i] = VR[vj].d[ui1] +3.7 Move vector register to vector register. + Instruction format: + VMOVQ Vj, Vd + + Mapping between Go and platform assembly: + Go assembly | platform assembly | semantics + VMOVQ Vj, Vd | vslli.d vd, vj, 0x0 | for i in range(2) : VR[vd].D[i] = SLL(VR[vj].D[i], 0) + VXMOVQ Xj, Xd | xvslli.d xd, xj, 0x0 | for i in range(4) : XR[xd].D[i] = SLL(XR[xj].D[i], 0) + 3.7 Load data from memory and broadcast to each element of a vector register. Instruction format: -- 2.52.0