From 882335e2cbe9b123ba5fa4ee7544e7283e41d07c Mon Sep 17 00:00:00 2001 From: Xiaolin Zhao Date: Mon, 25 Aug 2025 15:22:09 +0800 Subject: [PATCH] cmd/internal/obj/loong64: add LDPTR.{W/D} and STPTR.{W/D} instructions support Go asm syntax: MOVWP 4(R4), R5 MOVVP 8(R4), R5 MOVWP R4, 12(R5) MOVVP R4, 16(R5) Equivalent platform assembler syntax: ldptr.w r5, r4, $1 ldptr.d r5, r4, $2 stptr.w r4, r5, $3 stptr.d r4, r5, $4 Change-Id: I50a341cee2d875cb7c5da9db08b23799c9dc6c64 Reviewed-on: https://go-review.googlesource.com/c/go/+/699055 Reviewed-by: abner chenc Reviewed-by: Meidan Li LUCI-TryBot-Result: Go LUCI Reviewed-by: Carlos Amedee Reviewed-by: Cherry Mui --- .../asm/internal/asm/testdata/loong64enc1.s | 22 +++++++++++++ src/cmd/internal/obj/loong64/a.out.go | 4 +++ src/cmd/internal/obj/loong64/anames.go | 2 ++ src/cmd/internal/obj/loong64/asm.go | 33 +++++++++++++++++++ src/cmd/internal/obj/loong64/doc.go | 28 ++++++++++++++++ 5 files changed, 89 insertions(+) diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s index 72e6573466..63676cc785 100644 --- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s +++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s @@ -260,6 +260,28 @@ lable2: MOVV FCC0, R4 // 04dc1401 MOVV R4, FCC0 // 80d81401 + // LDPTR.{W/D} and STPTR.{W/D} instructions + MOVWP R5, -32768(R4) // 85008025 + MOVWP R5, 32764(R4) // 85fc7f25 + MOVWP R5, 32(R4) // 85200025 + MOVWP R5, 4(R4) // 85040025 + MOVWP R5, (R4) // 85000025 + MOVVP R5, -32768(R4) // 85008027 + MOVVP R5, 32764(R4) // 85fc7f27 + MOVVP R5, 32(R4) // 85200027 + MOVVP R5, 4(R4) // 85040027 + MOVVP R5, (R4) // 85000027 + MOVWP -32768(R5), R4 // a4008024 + MOVWP 32764(R5), R4 // a4fc7f24 + MOVWP 32(R5), R4 // a4200024 + MOVWP 4(R5), R4 // a4040024 + MOVWP (R5), R4 // a4000024 + MOVVP -32768(R5), R4 // a4008026 + MOVVP 32764(R5), R4 // a4fc7f26 + MOVVP 32(R5), R4 // a4200026 + MOVVP 4(R5), R4 // a4040026 + MOVVP (R5), R4 // a4000026 + // Loong64 atomic memory access instructions AMSWAPB R14, (R13), R12 // ac395c38 AMSWAPH R14, (R13), R12 // acb95c38 diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go index f5d20cfabe..8e651cdfef 100644 --- a/src/cmd/internal/obj/loong64/a.out.go +++ b/src/cmd/internal/obj/loong64/a.out.go @@ -666,6 +666,10 @@ const ( ABSTRPICKW ABSTRPICKV + // 2.2.5.3 + AMOVWP + AMOVVP + // 2.2.5.4. Prefetch Instructions APRELD APRELDX diff --git a/src/cmd/internal/obj/loong64/anames.go b/src/cmd/internal/obj/loong64/anames.go index 67b5f2fc80..c629553d55 100644 --- a/src/cmd/internal/obj/loong64/anames.go +++ b/src/cmd/internal/obj/loong64/anames.go @@ -202,6 +202,8 @@ var Anames = []string{ "BSTRINSV", "BSTRPICKW", "BSTRPICKV", + "MOVWP", + "MOVVP", "PRELD", "PRELDX", "CRCWBW", diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go index 5d85585ebe..1b982f6c86 100644 --- a/src/cmd/internal/obj/loong64/asm.go +++ b/src/cmd/internal/obj/loong64/asm.go @@ -212,6 +212,8 @@ var optab = []Optab{ {AMOVV, C_REG, C_NONE, C_NONE, C_TLS_LE, C_NONE, 53, 16, 0, 0}, {AMOVB, C_REG, C_NONE, C_NONE, C_TLS_LE, C_NONE, 53, 16, 0, 0}, {AMOVBU, C_REG, C_NONE, C_NONE, C_TLS_LE, C_NONE, 53, 16, 0, 0}, + {AMOVWP, C_REG, C_NONE, C_NONE, C_SOREG, C_NONE, 73, 4, 0, 0}, + {AMOVWP, C_REG, C_NONE, C_NONE, C_LOREG, C_NONE, 73, 4, 0, 0}, {AMOVW, C_LAUTO, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, REGSP, 0}, {AMOVWU, C_LAUTO, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, REGSP, 0}, @@ -233,6 +235,8 @@ var optab = []Optab{ {AMOVV, C_TLS_LE, C_NONE, C_NONE, C_REG, C_NONE, 54, 16, 0, 0}, {AMOVB, C_TLS_LE, C_NONE, C_NONE, C_REG, C_NONE, 54, 16, 0, 0}, {AMOVBU, C_TLS_LE, C_NONE, C_NONE, C_REG, C_NONE, 54, 16, 0, 0}, + {AMOVWP, C_SOREG, C_NONE, C_NONE, C_REG, C_NONE, 74, 4, 0, 0}, + {AMOVWP, C_LOREG, C_NONE, C_NONE, C_REG, C_NONE, 74, 4, 0, 0}, {AMOVW, C_SACON, C_NONE, C_NONE, C_REG, C_NONE, 3, 4, REGSP, 0}, {AMOVV, C_SACON, C_NONE, C_NONE, C_REG, C_NONE, 3, 4, REGSP, 0}, @@ -1437,6 +1441,9 @@ func buildop(ctxt *obj.Link) { case AMOVBU: opset(AMOVHU, r0) + case AMOVWP: + opset(AMOVVP, r0) + case AMUL: opset(AMULU, r0) opset(AMULH, r0) @@ -1964,6 +1971,10 @@ func OP_16IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 { return op | (i&0xFFFF)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0 } +func OP_14IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 { + return op | (i&0x3FFF)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0 +} + func OP_12IR_5I(op uint32, i1 uint32, r2 uint32, i2 uint32) uint32 { return op | (i1&0xFFF)<<10 | (r2&0x1F)<<5 | (i2&0x1F)<<0 } @@ -2893,6 +2904,20 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { o3 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(REGTMP), uint32(REGTMP)) } o4 = OP_RRR(c.oprrr(p.As), uint32(REGTMP), uint32(r), uint32(p.To.Reg)) + + case 73: + v := c.regoff(&p.To) + if v&3 != 0 { + c.ctxt.Diag("%v: offset must be a multiple of 4.\n", p) + } + o1 = OP_14IRR(c.opirr(p.As), uint32(v>>2), uint32(p.To.Reg), uint32(p.From.Reg)) + + case 74: + v := c.regoff(&p.From) + if v&3 != 0 { + c.ctxt.Diag("%v: offset must be a multiple of 4.\n", p) + } + o1 = OP_14IRR(c.opirr(-p.As), uint32(v>>2), uint32(p.From.Reg), uint32(p.To.Reg)) } out[0] = o1 @@ -4026,6 +4051,10 @@ func (c *ctxt0) opirr(a obj.As) uint32 { return 0x0ad << 22 case AMOVD: return 0x0af << 22 + case AMOVVP: + return 0x27 << 24 // stptr.d + case AMOVWP: + return 0x25 << 24 // stptr.w case -AMOVB: return 0x0a0 << 22 case -AMOVBU: @@ -4044,6 +4073,10 @@ func (c *ctxt0) opirr(a obj.As) uint32 { return 0x0ac << 22 case -AMOVD: return 0x0ae << 22 + case -AMOVVP: + return 0x26 << 24 // ldptr.d + case -AMOVWP: + return 0x24 << 24 // ldptr.w case -AVMOVQ: return 0x0b0 << 22 // vld case -AXVMOVQ: diff --git a/src/cmd/internal/obj/loong64/doc.go b/src/cmd/internal/obj/loong64/doc.go index 64bb41ae5a..6c8f2618a2 100644 --- a/src/cmd/internal/obj/loong64/doc.go +++ b/src/cmd/internal/obj/loong64/doc.go @@ -289,6 +289,34 @@ Note: In the following sections 3.1 to 3.6, "ui4" (4-bit unsigned int immediate) Go assembly | instruction Encoding ALSLV $4, r4, r5, R6 | 002d9486 + +5. Note of special memory access instructions + Instruction format: + MOVWP offset(Rj), Rd + MOVVP offset(Rj), Rd + MOVWP Rd, offset(Rj) + MOVVP Rd, offset(Rj) + + Mapping between Go and platform assembly: + Go assembly | platform assembly + MOVWP offset(Rj), Rd | ldptr.w rd, rj, si14 + MOVVP offset(Rj), Rd | ldptr.d rd, rj, si14 + MOVWP Rd, offset(Rj) | stptr.w rd, rj, si14 + MOVVP Rd, offset(Rj) | stptr.d rd, rj, si14 + + note: In Go assembly, for ease of understanding, offset is a 16-bit immediate number representing + the actual address offset, but in platform assembly, it need a 14-bit immediate number. + si14 = offset>>2 + + The addressing calculation for the above instruction involves logically left-shifting the 14-bit + immediate number si14 by 2 bits, then sign-extending it, and finally adding it to the value in the + general-purpose register rj to obtain the sum. + + For example: + + Go assembly | platform assembly + MOVWP 8(R4), R5 | ldptr.w r5, r4, $2 + */ package loong64 -- 2.52.0