From: Guoqi Chen Date: Thu, 23 May 2024 05:43:20 +0000 (+0800) Subject: cmd/internal/obj/loong64: add FLDX,FSTX,LDX.STX instructions support X-Git-Tag: go1.24rc1~1275 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=f428c7b729d3d9b37ed4dacddcd7ff88f4213f70;p=gostls13.git cmd/internal/obj/loong64: add FLDX,FSTX,LDX.STX instructions support The LDX.{B,BU,H,HU,W,WU,D},STX.{B,H,W,D}, FLDX.{S,D}, FSTX.{S,D} instruction on Loong64 implements memory access operations using register offset Go asm syntax: MOV{B,BU,H,HU,W,WU,V} (RJ)(RK), RD MOV{B,H,W,V} RD, (RJ)(RK) MOV{F,D} (RJ)(RK), FD MOV{F,D} FD, (RJ)(RK) Equivalent platform assembler syntax: ldx.{b,bu,h,hu,w,wu,d} rd, rj, rk stx.{b,h,w,d} rd, rj, rk fldx.{s,d} fd, rj, rk fstx.{s,d} fd, rj, rk Ref: https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html Change-Id: Ic7d13bf45dab8342f034b6469465e6337a087144 Reviewed-on: https://go-review.googlesource.com/c/go/+/588215 Reviewed-by: sophie zhao Reviewed-by: Michael Knyszek Reviewed-by: Cherry Mui LUCI-TryBot-Result: Go LUCI Reviewed-by: Qiqi Huang Reviewed-by: Meidan Li Auto-Submit: abner chenc --- diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s index ba3327d4e8..b24aa2fe7c 100644 --- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s +++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s @@ -309,3 +309,26 @@ lable2: FTINTRNEWD F0, F2 // 02c81a01 FTINTRNEVF F0, F2 // 02e41a01 FTINTRNEVD F0, F2 // 02e81a01 + + // LDX.{B,BU,H,HU,W,WU,D} instructions + MOVB (R14)(R13), R12 // cc350038 + MOVBU (R14)(R13), R12 // cc352038 + MOVH (R14)(R13), R12 // cc350438 + MOVHU (R14)(R13), R12 // cc352438 + MOVW (R14)(R13), R12 // cc350838 + MOVWU (R14)(R13), R12 // cc352838 + MOVV (R14)(R13), R12 // cc350c38 + + // STX.{B,H,W,D} instructions + MOVB R12, (R14)(R13) // cc351038 + MOVH R12, (R14)(R13) // cc351438 + MOVW R12, (R14)(R13) // cc351838 + MOVV R12, (R14)(R13) // cc351c38 + + // FLDX.{S,D} instructions + MOVF (R14)(R13), F2 // c2353038 + MOVD (R14)(R13), F2 // c2353438 + + // FSTX.{S,D} instructions + MOVF F2, (R14)(R13) // c2353838 + MOVD F2, (R14)(R13) // c2353c38 diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go index 22de5e8203..d1cd35b878 100644 --- a/src/cmd/internal/obj/loong64/a.out.go +++ b/src/cmd/internal/obj/loong64/a.out.go @@ -218,6 +218,7 @@ const ( C_ZOREG C_SOREG C_LOREG + C_ROFF // register offset C_ADDR C_TLS_LE C_TLS_IE diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go index 18578fdedc..52fe7b2c89 100644 --- a/src/cmd/internal/obj/loong64/asm.go +++ b/src/cmd/internal/obj/loong64/asm.go @@ -293,6 +293,22 @@ var optab = []Optab{ {AAMSWAPW, C_REG, C_NONE, C_NONE, C_ZOREG, C_REG, 66, 4, 0, 0}, {ANOOP, C_NONE, C_NONE, C_NONE, C_NONE, C_NONE, 49, 4, 0, 0}, + /* store with extended register offset */ + {AMOVB, C_REG, C_NONE, C_NONE, C_ROFF, C_NONE, 20, 4, 0, 0}, + {AMOVW, C_REG, C_NONE, C_NONE, C_ROFF, C_NONE, 20, 4, 0, 0}, + {AMOVV, C_REG, C_NONE, C_NONE, C_ROFF, C_NONE, 20, 4, 0, 0}, + {AMOVF, C_FREG, C_NONE, C_NONE, C_ROFF, C_NONE, 20, 4, 0, 0}, + {AMOVD, C_FREG, C_NONE, C_NONE, C_ROFF, C_NONE, 20, 4, 0, 0}, + + /* load with extended register offset */ + {AMOVB, C_ROFF, C_NONE, C_NONE, C_REG, C_NONE, 21, 4, 0, 0}, + {AMOVBU, C_ROFF, C_NONE, C_NONE, C_REG, C_NONE, 21, 4, 0, 0}, + {AMOVW, C_ROFF, C_NONE, C_NONE, C_REG, C_NONE, 21, 4, 0, 0}, + {AMOVWU, C_ROFF, C_NONE, C_NONE, C_REG, C_NONE, 21, 4, 0, 0}, + {AMOVV, C_ROFF, C_NONE, C_NONE, C_REG, C_NONE, 21, 4, 0, 0}, + {AMOVF, C_ROFF, C_NONE, C_NONE, C_FREG, C_NONE, 21, 4, 0, 0}, + {AMOVD, C_ROFF, C_NONE, C_NONE, C_FREG, C_NONE, 21, 4, 0, 0}, + {obj.APCALIGN, C_SCON, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0}, {obj.APCDATA, C_LCON, C_NONE, C_NONE, C_LCON, C_NONE, 0, 0, 0, 0}, {obj.APCDATA, C_DCON, C_NONE, C_NONE, C_DCON, C_NONE, 0, 0, 0, 0}, @@ -654,6 +670,14 @@ func (c *ctxt0) aclass(a *obj.Addr) int { return C_LAUTO case obj.NAME_NONE: + if a.Index != 0 { + if a.Offset != 0 { + return C_GOK + } + // register offset + return C_ROFF + } + c.instoffset = a.Offset if c.instoffset == 0 { return C_ZOREG @@ -1474,6 +1498,12 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(p.To.Reg)) o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(p.To.Reg), uint32(p.To.Reg)) + case 20: // mov Rsrc, (Rbase)(Roff) + o1 = OP_RRR(c.oprrr(p.As), uint32(p.To.Index), uint32(p.To.Reg), uint32(p.From.Reg)) + + case 21: // mov (Rbase)(Roff), Rdst + o1 = OP_RRR(c.oprrr(-p.As), uint32(p.From.Index), uint32(p.From.Reg), uint32(p.To.Reg)) + case 23: // add $lcon,r1,r2 v := c.regoff(&p.From) o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP)) @@ -1916,6 +1946,36 @@ func (c *ctxt0) oprrr(a obj.As) uint32 { return 0x225 << 15 // fcopysign.s case AFCOPYSGD: return 0x226 << 15 // fcopysign.d + case -AMOVB: + return 0x07000 << 15 // ldx.b + case -AMOVH: + return 0x07008 << 15 // ldx.h + case -AMOVW: + return 0x07010 << 15 // ldx.w + case -AMOVV: + return 0x07018 << 15 // ldx.d + case -AMOVBU: + return 0x07040 << 15 // ldx.bu + case -AMOVHU: + return 0x07048 << 15 // ldx.hu + case -AMOVWU: + return 0x07050 << 15 // ldx.wu + case AMOVB: + return 0x07020 << 15 // stx.b + case AMOVH: + return 0x07028 << 15 // stx.h + case AMOVW: + return 0x07030 << 15 // stx.w + case AMOVV: + return 0x07038 << 15 // stx.d + case -AMOVF: + return 0x07060 << 15 // fldx.s + case -AMOVD: + return 0x07068 << 15 // fldx.d + case AMOVF: + return 0x07070 << 15 // fstx.s + case AMOVD: + return 0x07078 << 15 // fstx.d } if a < 0 { diff --git a/src/cmd/internal/obj/loong64/cnames.go b/src/cmd/internal/obj/loong64/cnames.go index 985a331def..41721aae2d 100644 --- a/src/cmd/internal/obj/loong64/cnames.go +++ b/src/cmd/internal/obj/loong64/cnames.go @@ -30,6 +30,7 @@ var cnames0 = []string{ "ZOREG", "SOREG", "LOREG", + "ROFF", "ADDR", "TLS_LE", "TLS_IE", diff --git a/src/cmd/internal/obj/loong64/doc.go b/src/cmd/internal/obj/loong64/doc.go new file mode 100644 index 0000000000..19a0effcd5 --- /dev/null +++ b/src/cmd/internal/obj/loong64/doc.go @@ -0,0 +1,86 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package loong64 implements an LoongArch64 assembler. Go assembly syntax is different from +GNU LoongArch64 syntax, but we can still follow the general rules to map between them. + +# Instructions mnemonics mapping rules + +1. Bit widths represented by various instruction suffixes +V (vlong) = 64 bit +WU (word) = 32 bit unsigned +W (word) = 32 bit +H (half word) = 16 bit +HU = 16 bit unsigned +B (byte) = 8 bit +BU = 8 bit unsigned +F (float) = 32 bit float +D (double) = 64 bit float + +2. Align directive +Go asm supports the PCALIGN directive, which indicates that the next instruction should +be aligned to a specified boundary by padding with NOOP instruction. The alignment value +supported on loong64 must be a power of 2 and in the range of [8, 2048]. + +Examples: + + PCALIGN $16 + MOVV $2, R4 // This instruction is aligned with 16 bytes. + PCALIGN $1024 + MOVV $3, R5 // This instruction is aligned with 1024 bytes. + +# On loong64, auto-align loop heads to 16-byte boundaries + +Examples: + + TEXT ·Add(SB),NOSPLIT|NOFRAME,$0 + +start: + + MOVV $1, R4 // This instruction is aligned with 16 bytes. + MOVV $-1, R5 + BNE R5, start + RET + +# Register mapping rules + +1. All generial-prupose register names are written as Rn. + +2. All floating-poing register names are written as Fn. + +# Argument mapping rules + +1. The operands appear in left-to-right assignment order. + +Go reverses the arguments of most instructions. + +Examples: + + ADDV R11, R12, R13 <=> add.d R13, R12, R11 + LLV (R4), R7 <=> ll.d R7, R4 + OR R5, R6 <=> or R6, R6, R5 + +Special Cases. +Argument order is the same as in the GNU Loong64 syntax: jump instructions, + +Examples: + + BEQ R0, R4, lable1 <=> beq R0, R4, lable1 + JMP lable1 <=> b lable1 + +2. Expressions for special arguments. + +Memory references: a base register and an offset register is written as (Rbase)(Roff). + +Examples: + + MOVB (R4)(R5), R6 <=> ldx.b R6, R4, R5 + MOVV (R4)(R5), R6 <=> ldx.d R6, R4, R5 + MOVD (R4)(R5), F6 <=> fldx.d F6, R4, R5 + MOVB R6, (R4)(R5) <=> stx.b R6, R5, R5 + MOVV R6, (R4)(R5) <=> stx.d R6, R5, R5 + MOVV F6, (R4)(R5) <=> fstx.d F6, R5, R5 +*/ +package loong64