]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/internal/obj{,/loong64}: instructions and registers for loong64
authorXiaodong Liu <liuxiaodong@loongson.cn>
Sun, 15 Aug 2021 08:01:31 +0000 (16:01 +0800)
committerGopher Robot <gobot@golang.org>
Wed, 11 May 2022 20:11:34 +0000 (20:11 +0000)
Implemented an assembler for LoongArch64(loong64 is short name) -
this provides register definitions and instruction encoding as
defined in the LoongArch Instruction Set Manual.

LoongArch Instruction Set Manual:
  https://github.com/loongson/LoongArch-Documentation/releases

Contributors to the linux/loong64 port are:
  Weining Lu <luweining@loongson.cn>
  Lei Wang <wanglei@loongson.cn>
  Lingqin Gong <gonglingqin@loongson.cn>
  Xiaolin Zhao <zhaoxiaolin@loongson.cn>
  Meidan Li <limeidan@loongson.cn>
  Xiaojuan Zhai <zhaixiaojuan@loongson.cn>
  Qiyuan Pu <puqiyuan@loongson.cn>
  Guoqi Chen <chenguoqi@loongson.cn>

This port has been updated to Go 1.15.6:
  https://github.com/loongson/go

Updates #46229

Change-Id: I930d2a19246496e3ca36d55539183c0f9f650ad9
Reviewed-on: https://go-review.googlesource.com/c/go/+/342309
Reviewed-by: David Chase <drchase@google.com>
Auto-Submit: Ian Lance Taylor <iant@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@google.com>
Run-TryBot: Ian Lance Taylor <iant@google.com>

src/cmd/internal/obj/link.go
src/cmd/internal/obj/loong64/a.out.go [new file with mode: 0644]
src/cmd/internal/obj/loong64/anames.go [new file with mode: 0644]
src/cmd/internal/obj/loong64/asm.go [new file with mode: 0644]
src/cmd/internal/obj/loong64/cnames.go [new file with mode: 0644]
src/cmd/internal/obj/loong64/list.go [new file with mode: 0644]
src/cmd/internal/obj/loong64/obj.go [new file with mode: 0644]
src/cmd/internal/obj/util.go

index 5f6c135f8c4001f1f56c7e17d18bbdbdc4a550ba..8a50b2e4fe182f98f201063b7cfbda142a5f3ac4 100644 (file)
@@ -442,6 +442,7 @@ const (
        ABasePPC64
        ABaseARM64
        ABaseMIPS
+       ABaseLoong64
        ABaseRISCV
        ABaseS390X
        ABaseWasm
diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go
new file mode 100644 (file)
index 0000000..951eeb7
--- /dev/null
@@ -0,0 +1,414 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package loong64
+
+import (
+       "cmd/internal/obj"
+)
+
+//go:generate go run ../stringer.go -i $GOFILE -o anames.go -p loong64
+
+const (
+       NSNAME = 8
+       NSYM   = 50
+       NREG   = 32 // number of general registers
+       NFREG  = 32 // number of floating point registers
+)
+
+const (
+       REG_R0 = obj.RBaseLOONG64 + iota // must be a multiple of 32
+       REG_R1
+       REG_R2
+       REG_R3
+       REG_R4
+       REG_R5
+       REG_R6
+       REG_R7
+       REG_R8
+       REG_R9
+       REG_R10
+       REG_R11
+       REG_R12
+       REG_R13
+       REG_R14
+       REG_R15
+       REG_R16
+       REG_R17
+       REG_R18
+       REG_R19
+       REG_R20
+       REG_R21
+       REG_R22
+       REG_R23
+       REG_R24
+       REG_R25
+       REG_R26
+       REG_R27
+       REG_R28
+       REG_R29
+       REG_R30
+       REG_R31
+
+       REG_F0 // must be a multiple of 32
+       REG_F1
+       REG_F2
+       REG_F3
+       REG_F4
+       REG_F5
+       REG_F6
+       REG_F7
+       REG_F8
+       REG_F9
+       REG_F10
+       REG_F11
+       REG_F12
+       REG_F13
+       REG_F14
+       REG_F15
+       REG_F16
+       REG_F17
+       REG_F18
+       REG_F19
+       REG_F20
+       REG_F21
+       REG_F22
+       REG_F23
+       REG_F24
+       REG_F25
+       REG_F26
+       REG_F27
+       REG_F28
+       REG_F29
+       REG_F30
+       REG_F31
+
+       REG_FCSR0 // must be a multiple of 32
+       REG_FCSR1
+       REG_FCSR2
+       REG_FCSR3 // only four registers are needed
+       REG_FCSR4
+       REG_FCSR5
+       REG_FCSR6
+       REG_FCSR7
+       REG_FCSR8
+       REG_FCSR9
+       REG_FCSR10
+       REG_FCSR11
+       REG_FCSR12
+       REG_FCSR13
+       REG_FCSR14
+       REG_FCSR15
+       REG_FCSR16
+       REG_FCSR17
+       REG_FCSR18
+       REG_FCSR19
+       REG_FCSR20
+       REG_FCSR21
+       REG_FCSR22
+       REG_FCSR23
+       REG_FCSR24
+       REG_FCSR25
+       REG_FCSR26
+       REG_FCSR27
+       REG_FCSR28
+       REG_FCSR29
+       REG_FCSR30
+       REG_FCSR31
+
+       REG_FCC0 // must be a multiple of 32
+       REG_FCC1
+       REG_FCC2
+       REG_FCC3
+       REG_FCC4
+       REG_FCC5
+       REG_FCC6
+       REG_FCC7 // only eight registers are needed
+       REG_FCC8
+       REG_FCC9
+       REG_FCC10
+       REG_FCC11
+       REG_FCC12
+       REG_FCC13
+       REG_FCC14
+       REG_FCC15
+       REG_FCC16
+       REG_FCC17
+       REG_FCC18
+       REG_FCC19
+       REG_FCC20
+       REG_FCC21
+       REG_FCC22
+       REG_FCC23
+       REG_FCC24
+       REG_FCC25
+       REG_FCC26
+       REG_FCC27
+       REG_FCC28
+       REG_FCC29
+       REG_FCC30
+       REG_FCC31
+
+       REG_LAST = REG_FCC31 // the last defined register
+
+       REG_SPECIAL = REG_FCSR0
+
+       REGZERO = REG_R0 // set to zero
+       REGLINK = REG_R1
+       REGSP   = REG_R3
+       REGRET  = REG_R19
+       REGARG  = -1      // -1 disables passing the first argument in register
+       REGRT1  = REG_R19 // reserved for runtime, duffzero and duffcopy
+       REGRT2  = REG_R20 // reserved for runtime, duffcopy
+       REGCTXT = REG_R29 // context for closures
+       REGG    = REG_R22 // G in loong64
+       REGTMP  = REG_R30 // used by the assembler
+       FREGRET = REG_F0
+)
+
+var LOONG64DWARFRegisters = map[int16]int16{}
+
+func init() {
+       // f assigns dwarfregisters[from:to] = (base):(to-from+base)
+       f := func(from, to, base int16) {
+               for r := int16(from); r <= to; r++ {
+                       LOONG64DWARFRegisters[r] = (r - from) + base
+               }
+       }
+       f(REG_R0, REG_R31, 0)
+       f(REG_F0, REG_F31, 32)
+
+}
+
+const (
+       BIG = 2046
+)
+
+const (
+       // mark flags
+       LABEL  = 1 << 0
+       LEAF   = 1 << 1
+       SYNC   = 1 << 2
+       BRANCH = 1 << 3
+)
+
+const (
+       C_NONE = iota
+       C_REG
+       C_FREG
+       C_FCSRREG
+       C_FCCREG
+       C_ZCON
+       C_SCON // 12 bit signed
+       C_UCON // 32 bit signed, low 12 bits 0
+       C_ADD0CON
+       C_AND0CON
+       C_ADDCON // -0x800 <= v < 0
+       C_ANDCON // 0 < v <= 0xFFF
+       C_LCON   // other 32
+       C_DCON   // other 64 (could subdivide further)
+       C_SACON  // $n(REG) where n <= int12
+       C_SECON
+       C_LACON // $n(REG) where int12 < n <= int32
+       C_LECON
+       C_DACON // $n(REG) where int32 < n
+       C_STCON // $tlsvar
+       C_SBRA
+       C_LBRA
+       C_SAUTO
+       C_LAUTO
+       C_SEXT
+       C_LEXT
+       C_ZOREG
+       C_SOREG
+       C_LOREG
+       C_GOK
+       C_ADDR
+       C_TLS
+       C_TEXTSIZE
+
+       C_NCLASS // must be the last
+)
+
+const (
+       AABSD = obj.ABaseLoong64 + obj.A_ARCHSPECIFIC + iota
+       AABSF
+       AADD
+       AADDD
+       AADDF
+       AADDU
+
+       AADDW
+       AAND
+       ABEQ
+       ABGEZ
+       ABLEZ
+       ABGTZ
+       ABLTZ
+       ABFPF
+       ABFPT
+
+       ABNE
+       ABREAK
+       ACLO
+       ACLZ
+
+       ACMPEQD
+       ACMPEQF
+
+       ACMPGED // ACMPGED -> fcmp.sle.d
+       ACMPGEF // ACMPGEF -> fcmp.sle.s
+       ACMPGTD // ACMPGTD -> fcmp.slt.d
+       ACMPGTF // ACMPGTF -> fcmp.slt.s
+
+       ALU12IW
+       ALU32ID
+       ALU52ID
+       APCADDU12I
+       AJIRL
+       ABGE
+       ABLT
+       ABLTU
+       ABGEU
+
+       ADIV
+       ADIVD
+       ADIVF
+       ADIVU
+       ADIVW
+
+       ALL
+       ALLV
+
+       ALUI
+
+       AMOVB
+       AMOVBU
+
+       AMOVD
+       AMOVDF
+       AMOVDW
+       AMOVF
+       AMOVFD
+       AMOVFW
+
+       AMOVH
+       AMOVHU
+       AMOVW
+
+       AMOVWD
+       AMOVWF
+
+       AMOVWL
+       AMOVWR
+
+       AMUL
+       AMULD
+       AMULF
+       AMULU
+       AMULH
+       AMULHU
+       AMULW
+       ANEGD
+       ANEGF
+
+       ANEGW
+       ANEGV
+
+       ANOOP // hardware nop
+       ANOR
+       AOR
+       AREM
+       AREMU
+
+       ARFE
+
+       ASC
+       ASCV
+
+       ASGT
+       ASGTU
+
+       ASLL
+       ASQRTD
+       ASQRTF
+       ASRA
+       ASRL
+       ASUB
+       ASUBD
+       ASUBF
+
+       ASUBU
+       ASUBW
+       ADBAR
+       ASYSCALL
+
+       ATEQ
+       ATNE
+
+       AWORD
+
+       AXOR
+
+       // 64-bit
+       AMOVV
+       AMOVVL
+       AMOVVR
+
+       ASLLV
+       ASRAV
+       ASRLV
+       ADIVV
+       ADIVVU
+
+       AREMV
+       AREMVU
+
+       AMULV
+       AMULVU
+       AMULHV
+       AMULHVU
+       AADDV
+       AADDVU
+       ASUBV
+       ASUBVU
+
+       // 64-bit FP
+       ATRUNCFV
+       ATRUNCDV
+       ATRUNCFW
+       ATRUNCDW
+
+       AMOVWU
+       AMOVFV
+       AMOVDV
+       AMOVVF
+       AMOVVD
+
+       ALAST
+
+       // aliases
+       AJMP = obj.AJMP
+       AJAL = obj.ACALL
+       ARET = obj.ARET
+)
+
+func init() {
+       // The asm encoder generally assumes that the lowest 5 bits of the
+       // REG_XX constants match the machine instruction encoding, i.e.
+       // the lowest 5 bits is the register number.
+       // Check this here.
+       if REG_R0%32 != 0 {
+               panic("REG_R0 is not a multiple of 32")
+       }
+       if REG_F0%32 != 0 {
+               panic("REG_F0 is not a multiple of 32")
+       }
+       if REG_FCSR0%32 != 0 {
+               panic("REG_FCSR0 is not a multiple of 32")
+       }
+       if REG_FCC0%32 != 0 {
+               panic("REG_FCC0 is not a multiple of 32")
+       }
+}
diff --git a/src/cmd/internal/obj/loong64/anames.go b/src/cmd/internal/obj/loong64/anames.go
new file mode 100644 (file)
index 0000000..48d8a78
--- /dev/null
@@ -0,0 +1,130 @@
+// Code generated by stringer -i a.out.go -o anames.go -p loong64; DO NOT EDIT.
+
+package loong64
+
+import "cmd/internal/obj"
+
+var Anames = []string{
+       obj.A_ARCHSPECIFIC: "ABSD",
+       "ABSF",
+       "ADD",
+       "ADDD",
+       "ADDF",
+       "ADDU",
+       "ADDW",
+       "AND",
+       "BEQ",
+       "BGEZ",
+       "BLEZ",
+       "BGTZ",
+       "BLTZ",
+       "BFPF",
+       "BFPT",
+       "BNE",
+       "BREAK",
+       "CLO",
+       "CLZ",
+       "CMPEQD",
+       "CMPEQF",
+       "CMPGED",
+       "CMPGEF",
+       "CMPGTD",
+       "CMPGTF",
+       "LU12IW",
+       "LU32ID",
+       "LU52ID",
+       "PCADDU12I",
+       "JIRL",
+       "BGE",
+       "BLT",
+       "BLTU",
+       "BGEU",
+       "DIV",
+       "DIVD",
+       "DIVF",
+       "DIVU",
+       "DIVW",
+       "LL",
+       "LLV",
+       "LUI",
+       "MOVB",
+       "MOVBU",
+       "MOVD",
+       "MOVDF",
+       "MOVDW",
+       "MOVF",
+       "MOVFD",
+       "MOVFW",
+       "MOVH",
+       "MOVHU",
+       "MOVW",
+       "MOVWD",
+       "MOVWF",
+       "MOVWL",
+       "MOVWR",
+       "MUL",
+       "MULD",
+       "MULF",
+       "MULU",
+       "MULH",
+       "MULHU",
+       "MULW",
+       "NEGD",
+       "NEGF",
+       "NEGW",
+       "NEGV",
+       "NOOP",
+       "NOR",
+       "OR",
+       "REM",
+       "REMU",
+       "RFE",
+       "SC",
+       "SCV",
+       "SGT",
+       "SGTU",
+       "SLL",
+       "SQRTD",
+       "SQRTF",
+       "SRA",
+       "SRL",
+       "SUB",
+       "SUBD",
+       "SUBF",
+       "SUBU",
+       "SUBW",
+       "DBAR",
+       "SYSCALL",
+       "TEQ",
+       "TNE",
+       "WORD",
+       "XOR",
+       "MOVV",
+       "MOVVL",
+       "MOVVR",
+       "SLLV",
+       "SRAV",
+       "SRLV",
+       "DIVV",
+       "DIVVU",
+       "REMV",
+       "REMVU",
+       "MULV",
+       "MULVU",
+       "MULHV",
+       "MULHVU",
+       "ADDV",
+       "ADDVU",
+       "SUBV",
+       "SUBVU",
+       "TRUNCFV",
+       "TRUNCDV",
+       "TRUNCFW",
+       "TRUNCDW",
+       "MOVWU",
+       "MOVFV",
+       "MOVDV",
+       "MOVVF",
+       "MOVVD",
+       "LAST",
+}
diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go
new file mode 100644 (file)
index 0000000..c5829ad
--- /dev/null
@@ -0,0 +1,1960 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package loong64
+
+import (
+       "cmd/internal/obj"
+       "cmd/internal/objabi"
+       "cmd/internal/sys"
+       "fmt"
+       "log"
+       "sort"
+)
+
+// ctxt0 holds state while assembling a single function.
+// Each function gets a fresh ctxt0.
+// This allows for multiple functions to be safely concurrently assembled.
+type ctxt0 struct {
+       ctxt       *obj.Link
+       newprog    obj.ProgAlloc
+       cursym     *obj.LSym
+       autosize   int32
+       instoffset int64
+       pc         int64
+}
+
+// Instruction layout.
+
+const (
+       FuncAlign = 4
+)
+
+type Optab struct {
+       as     obj.As
+       a1     uint8
+       a2     uint8
+       a3     uint8
+       type_  int8
+       size   int8
+       param  int16
+       family sys.ArchFamily
+       flag   uint8
+}
+
+const (
+       NOTUSETMP = 1 << iota // p expands to multiple instructions, but does NOT use REGTMP
+)
+
+var optab = []Optab{
+       {obj.ATEXT, C_ADDR, C_NONE, C_TEXTSIZE, 0, 0, 0, 0, 0},
+
+       {AMOVW, C_REG, C_NONE, C_REG, 1, 4, 0, 0, 0},
+       {AMOVV, C_REG, C_NONE, C_REG, 1, 4, 0, sys.Loong64, 0},
+       {AMOVB, C_REG, C_NONE, C_REG, 12, 8, 0, 0, NOTUSETMP},
+       {AMOVBU, C_REG, C_NONE, C_REG, 13, 4, 0, 0, 0},
+       {AMOVWU, C_REG, C_NONE, C_REG, 14, 8, 0, sys.Loong64, NOTUSETMP},
+
+       {ASUB, C_REG, C_REG, C_REG, 2, 4, 0, 0, 0},
+       {ASUBV, C_REG, C_REG, C_REG, 2, 4, 0, sys.Loong64, 0},
+       {AADD, C_REG, C_REG, C_REG, 2, 4, 0, 0, 0},
+       {AADDV, C_REG, C_REG, C_REG, 2, 4, 0, sys.Loong64, 0},
+       {AAND, C_REG, C_REG, C_REG, 2, 4, 0, 0, 0},
+       {ASUB, C_REG, C_NONE, C_REG, 2, 4, 0, 0, 0},
+       {ASUBV, C_REG, C_NONE, C_REG, 2, 4, 0, sys.Loong64, 0},
+       {AADD, C_REG, C_NONE, C_REG, 2, 4, 0, 0, 0},
+       {AADDV, C_REG, C_NONE, C_REG, 2, 4, 0, sys.Loong64, 0},
+       {AAND, C_REG, C_NONE, C_REG, 2, 4, 0, 0, 0},
+       {ANEGW, C_REG, C_NONE, C_REG, 2, 4, 0, 0, 0},
+       {ANEGV, C_REG, C_NONE, C_REG, 2, 4, 0, sys.Loong64, 0},
+
+       {ASLL, C_REG, C_NONE, C_REG, 9, 4, 0, 0, 0},
+       {ASLL, C_REG, C_REG, C_REG, 9, 4, 0, 0, 0},
+       {ASLLV, C_REG, C_NONE, C_REG, 9, 4, 0, sys.Loong64, 0},
+       {ASLLV, C_REG, C_REG, C_REG, 9, 4, 0, sys.Loong64, 0},
+       {ACLO, C_REG, C_NONE, C_REG, 9, 4, 0, 0, 0},
+
+       {AADDF, C_FREG, C_NONE, C_FREG, 32, 4, 0, 0, 0},
+       {AADDF, C_FREG, C_REG, C_FREG, 32, 4, 0, 0, 0},
+       {ACMPEQF, C_FREG, C_REG, C_NONE, 32, 4, 0, 0, 0},
+       {AABSF, C_FREG, C_NONE, C_FREG, 33, 4, 0, 0, 0},
+       {AMOVVF, C_FREG, C_NONE, C_FREG, 33, 4, 0, sys.Loong64, 0},
+       {AMOVF, C_FREG, C_NONE, C_FREG, 33, 4, 0, 0, 0},
+       {AMOVD, C_FREG, C_NONE, C_FREG, 33, 4, 0, 0, 0},
+
+       {AMOVW, C_REG, C_NONE, C_SEXT, 7, 4, 0, sys.Loong64, 0},
+       {AMOVWU, C_REG, C_NONE, C_SEXT, 7, 4, 0, sys.Loong64, 0},
+       {AMOVV, C_REG, C_NONE, C_SEXT, 7, 4, 0, sys.Loong64, 0},
+       {AMOVB, C_REG, C_NONE, C_SEXT, 7, 4, 0, sys.Loong64, 0},
+       {AMOVBU, C_REG, C_NONE, C_SEXT, 7, 4, 0, sys.Loong64, 0},
+       {AMOVWL, C_REG, C_NONE, C_SEXT, 7, 4, 0, sys.Loong64, 0},
+       {AMOVVL, C_REG, C_NONE, C_SEXT, 7, 4, 0, sys.Loong64, 0},
+       {AMOVW, C_REG, C_NONE, C_SAUTO, 7, 4, REGSP, 0, 0},
+       {AMOVWU, C_REG, C_NONE, C_SAUTO, 7, 4, REGSP, sys.Loong64, 0},
+       {AMOVV, C_REG, C_NONE, C_SAUTO, 7, 4, REGSP, sys.Loong64, 0},
+       {AMOVB, C_REG, C_NONE, C_SAUTO, 7, 4, REGSP, 0, 0},
+       {AMOVBU, C_REG, C_NONE, C_SAUTO, 7, 4, REGSP, 0, 0},
+       {AMOVWL, C_REG, C_NONE, C_SAUTO, 7, 4, REGSP, 0, 0},
+       {AMOVVL, C_REG, C_NONE, C_SAUTO, 7, 4, REGSP, sys.Loong64, 0},
+       {AMOVW, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, 0, 0},
+       {AMOVWU, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, sys.Loong64, 0},
+       {AMOVV, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, sys.Loong64, 0},
+       {AMOVB, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, 0, 0},
+       {AMOVBU, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, 0, 0},
+       {AMOVWL, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, 0, 0},
+       {AMOVVL, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, sys.Loong64, 0},
+       {ASC, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, 0, 0},
+       {ASCV, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, sys.Loong64, 0},
+
+       {AMOVW, C_SEXT, C_NONE, C_REG, 8, 4, 0, sys.Loong64, 0},
+       {AMOVWU, C_SEXT, C_NONE, C_REG, 8, 4, 0, sys.Loong64, 0},
+       {AMOVV, C_SEXT, C_NONE, C_REG, 8, 4, 0, sys.Loong64, 0},
+       {AMOVB, C_SEXT, C_NONE, C_REG, 8, 4, 0, sys.Loong64, 0},
+       {AMOVBU, C_SEXT, C_NONE, C_REG, 8, 4, 0, sys.Loong64, 0},
+       {AMOVWL, C_SEXT, C_NONE, C_REG, 8, 4, 0, sys.Loong64, 0},
+       {AMOVVL, C_SEXT, C_NONE, C_REG, 8, 4, 0, sys.Loong64, 0},
+       {AMOVW, C_SAUTO, C_NONE, C_REG, 8, 4, REGSP, 0, 0},
+       {AMOVWU, C_SAUTO, C_NONE, C_REG, 8, 4, REGSP, sys.Loong64, 0},
+       {AMOVV, C_SAUTO, C_NONE, C_REG, 8, 4, REGSP, sys.Loong64, 0},
+       {AMOVB, C_SAUTO, C_NONE, C_REG, 8, 4, REGSP, 0, 0},
+       {AMOVBU, C_SAUTO, C_NONE, C_REG, 8, 4, REGSP, 0, 0},
+       {AMOVWL, C_SAUTO, C_NONE, C_REG, 8, 4, REGSP, 0, 0},
+       {AMOVVL, C_SAUTO, C_NONE, C_REG, 8, 4, REGSP, sys.Loong64, 0},
+       {AMOVW, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, 0, 0},
+       {AMOVWU, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, sys.Loong64, 0},
+       {AMOVV, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, sys.Loong64, 0},
+       {AMOVB, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, 0, 0},
+       {AMOVBU, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, 0, 0},
+       {AMOVWL, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, 0, 0},
+       {AMOVVL, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, sys.Loong64, 0},
+       {ALL, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, 0, 0},
+       {ALLV, C_SOREG, C_NONE, C_REG, 8, 4, REGZERO, sys.Loong64, 0},
+
+       {AMOVW, C_REG, C_NONE, C_LEXT, 35, 12, 0, sys.Loong64, 0},
+       {AMOVWU, C_REG, C_NONE, C_LEXT, 35, 12, 0, sys.Loong64, 0},
+       {AMOVV, C_REG, C_NONE, C_LEXT, 35, 12, 0, sys.Loong64, 0},
+       {AMOVB, C_REG, C_NONE, C_LEXT, 35, 12, 0, sys.Loong64, 0},
+       {AMOVBU, C_REG, C_NONE, C_LEXT, 35, 12, 0, sys.Loong64, 0},
+       {AMOVW, C_REG, C_NONE, C_LAUTO, 35, 12, REGSP, 0, 0},
+       {AMOVWU, C_REG, C_NONE, C_LAUTO, 35, 12, REGSP, sys.Loong64, 0},
+       {AMOVV, C_REG, C_NONE, C_LAUTO, 35, 12, REGSP, sys.Loong64, 0},
+       {AMOVB, C_REG, C_NONE, C_LAUTO, 35, 12, REGSP, 0, 0},
+       {AMOVBU, C_REG, C_NONE, C_LAUTO, 35, 12, REGSP, 0, 0},
+       {AMOVW, C_REG, C_NONE, C_LOREG, 35, 12, REGZERO, 0, 0},
+       {AMOVWU, C_REG, C_NONE, C_LOREG, 35, 12, REGZERO, sys.Loong64, 0},
+       {AMOVV, C_REG, C_NONE, C_LOREG, 35, 12, REGZERO, sys.Loong64, 0},
+       {AMOVB, C_REG, C_NONE, C_LOREG, 35, 12, REGZERO, 0, 0},
+       {AMOVBU, C_REG, C_NONE, C_LOREG, 35, 12, REGZERO, 0, 0},
+       {ASC, C_REG, C_NONE, C_LOREG, 35, 12, REGZERO, 0, 0},
+       {AMOVW, C_REG, C_NONE, C_ADDR, 50, 8, 0, 0, 0},
+       {AMOVW, C_REG, C_NONE, C_ADDR, 50, 8, 0, sys.Loong64, 0},
+       {AMOVWU, C_REG, C_NONE, C_ADDR, 50, 8, 0, sys.Loong64, 0},
+       {AMOVV, C_REG, C_NONE, C_ADDR, 50, 8, 0, sys.Loong64, 0},
+       {AMOVB, C_REG, C_NONE, C_ADDR, 50, 8, 0, 0, 0},
+       {AMOVB, C_REG, C_NONE, C_ADDR, 50, 8, 0, sys.Loong64, 0},
+       {AMOVBU, C_REG, C_NONE, C_ADDR, 50, 8, 0, 0, 0},
+       {AMOVBU, C_REG, C_NONE, C_ADDR, 50, 8, 0, sys.Loong64, 0},
+       {AMOVW, C_REG, C_NONE, C_TLS, 53, 16, 0, 0, 0},
+       {AMOVWU, C_REG, C_NONE, C_TLS, 53, 16, 0, sys.Loong64, 0},
+       {AMOVV, C_REG, C_NONE, C_TLS, 53, 16, 0, sys.Loong64, 0},
+       {AMOVB, C_REG, C_NONE, C_TLS, 53, 16, 0, 0, 0},
+       {AMOVBU, C_REG, C_NONE, C_TLS, 53, 16, 0, 0, 0},
+
+       {AMOVW, C_LEXT, C_NONE, C_REG, 36, 12, 0, sys.Loong64, 0},
+       {AMOVWU, C_LEXT, C_NONE, C_REG, 36, 12, 0, sys.Loong64, 0},
+       {AMOVV, C_LEXT, C_NONE, C_REG, 36, 12, 0, sys.Loong64, 0},
+       {AMOVB, C_LEXT, C_NONE, C_REG, 36, 12, 0, sys.Loong64, 0},
+       {AMOVBU, C_LEXT, C_NONE, C_REG, 36, 12, 0, sys.Loong64, 0},
+       {AMOVW, C_LAUTO, C_NONE, C_REG, 36, 12, REGSP, 0, 0},
+       {AMOVWU, C_LAUTO, C_NONE, C_REG, 36, 12, REGSP, sys.Loong64, 0},
+       {AMOVV, C_LAUTO, C_NONE, C_REG, 36, 12, REGSP, sys.Loong64, 0},
+       {AMOVB, C_LAUTO, C_NONE, C_REG, 36, 12, REGSP, 0, 0},
+       {AMOVBU, C_LAUTO, C_NONE, C_REG, 36, 12, REGSP, 0, 0},
+       {AMOVW, C_LOREG, C_NONE, C_REG, 36, 12, REGZERO, 0, 0},
+       {AMOVWU, C_LOREG, C_NONE, C_REG, 36, 12, REGZERO, sys.Loong64, 0},
+       {AMOVV, C_LOREG, C_NONE, C_REG, 36, 12, REGZERO, sys.Loong64, 0},
+       {AMOVB, C_LOREG, C_NONE, C_REG, 36, 12, REGZERO, 0, 0},
+       {AMOVBU, C_LOREG, C_NONE, C_REG, 36, 12, REGZERO, 0, 0},
+       {AMOVW, C_ADDR, C_NONE, C_REG, 51, 8, 0, 0, 0},
+       {AMOVW, C_ADDR, C_NONE, C_REG, 51, 8, 0, sys.Loong64, 0},
+       {AMOVWU, C_ADDR, C_NONE, C_REG, 51, 8, 0, sys.Loong64, 0},
+       {AMOVV, C_ADDR, C_NONE, C_REG, 51, 8, 0, sys.Loong64, 0},
+       {AMOVB, C_ADDR, C_NONE, C_REG, 51, 8, 0, 0, 0},
+       {AMOVB, C_ADDR, C_NONE, C_REG, 51, 8, 0, sys.Loong64, 0},
+       {AMOVBU, C_ADDR, C_NONE, C_REG, 51, 8, 0, 0, 0},
+       {AMOVBU, C_ADDR, C_NONE, C_REG, 51, 8, 0, sys.Loong64, 0},
+       {AMOVW, C_TLS, C_NONE, C_REG, 54, 16, 0, 0, 0},
+       {AMOVWU, C_TLS, C_NONE, C_REG, 54, 16, 0, sys.Loong64, 0},
+       {AMOVV, C_TLS, C_NONE, C_REG, 54, 16, 0, sys.Loong64, 0},
+       {AMOVB, C_TLS, C_NONE, C_REG, 54, 16, 0, 0, 0},
+       {AMOVBU, C_TLS, C_NONE, C_REG, 54, 16, 0, 0, 0},
+
+       {AMOVW, C_SECON, C_NONE, C_REG, 3, 4, 0, sys.Loong64, 0},
+       {AMOVV, C_SECON, C_NONE, C_REG, 3, 4, 0, sys.Loong64, 0},
+       {AMOVW, C_SACON, C_NONE, C_REG, 3, 4, REGSP, 0, 0},
+       {AMOVV, C_SACON, C_NONE, C_REG, 3, 4, REGSP, sys.Loong64, 0},
+       {AMOVW, C_LECON, C_NONE, C_REG, 52, 8, 0, 0, NOTUSETMP},
+       {AMOVW, C_LECON, C_NONE, C_REG, 52, 8, 0, sys.Loong64, NOTUSETMP},
+       {AMOVV, C_LECON, C_NONE, C_REG, 52, 8, 0, sys.Loong64, NOTUSETMP},
+
+       {AMOVW, C_LACON, C_NONE, C_REG, 26, 12, REGSP, 0, 0},
+       {AMOVV, C_LACON, C_NONE, C_REG, 26, 12, REGSP, sys.Loong64, 0},
+       {AMOVW, C_ADDCON, C_NONE, C_REG, 3, 4, REGZERO, 0, 0},
+       {AMOVV, C_ADDCON, C_NONE, C_REG, 3, 4, REGZERO, sys.Loong64, 0},
+       {AMOVW, C_ANDCON, C_NONE, C_REG, 3, 4, REGZERO, 0, 0},
+       {AMOVV, C_ANDCON, C_NONE, C_REG, 3, 4, REGZERO, sys.Loong64, 0},
+       {AMOVW, C_STCON, C_NONE, C_REG, 55, 12, 0, 0, 0},
+       {AMOVV, C_STCON, C_NONE, C_REG, 55, 12, 0, sys.Loong64, 0},
+
+       {AMOVW, C_UCON, C_NONE, C_REG, 24, 4, 0, 0, 0},
+       {AMOVV, C_UCON, C_NONE, C_REG, 24, 4, 0, sys.Loong64, 0},
+       {AMOVW, C_LCON, C_NONE, C_REG, 19, 8, 0, 0, NOTUSETMP},
+       {AMOVV, C_LCON, C_NONE, C_REG, 19, 8, 0, sys.Loong64, NOTUSETMP},
+       {AMOVV, C_DCON, C_NONE, C_REG, 59, 16, 0, sys.Loong64, NOTUSETMP},
+
+       {AMUL, C_REG, C_NONE, C_REG, 2, 4, 0, 0, 0},
+       {AMUL, C_REG, C_REG, C_REG, 2, 4, 0, 0, 0},
+       {AMULV, C_REG, C_NONE, C_REG, 2, 4, 0, sys.Loong64, 0},
+       {AMULV, C_REG, C_REG, C_REG, 2, 4, 0, sys.Loong64, 0},
+
+       {AADD, C_ADD0CON, C_REG, C_REG, 4, 4, 0, 0, 0},
+       {AADD, C_ADD0CON, C_NONE, C_REG, 4, 4, 0, 0, 0},
+       {AADD, C_ANDCON, C_REG, C_REG, 10, 8, 0, 0, 0},
+       {AADD, C_ANDCON, C_NONE, C_REG, 10, 8, 0, 0, 0},
+
+       {AADDV, C_ADD0CON, C_REG, C_REG, 4, 4, 0, sys.Loong64, 0},
+       {AADDV, C_ADD0CON, C_NONE, C_REG, 4, 4, 0, sys.Loong64, 0},
+       {AADDV, C_ANDCON, C_REG, C_REG, 10, 8, 0, sys.Loong64, 0},
+       {AADDV, C_ANDCON, C_NONE, C_REG, 10, 8, 0, sys.Loong64, 0},
+
+       {AAND, C_AND0CON, C_REG, C_REG, 4, 4, 0, 0, 0},
+       {AAND, C_AND0CON, C_NONE, C_REG, 4, 4, 0, 0, 0},
+       {AAND, C_ADDCON, C_REG, C_REG, 10, 8, 0, 0, 0},
+       {AAND, C_ADDCON, C_NONE, C_REG, 10, 8, 0, 0, 0},
+
+       {AADD, C_UCON, C_REG, C_REG, 25, 8, 0, 0, 0},
+       {AADD, C_UCON, C_NONE, C_REG, 25, 8, 0, 0, 0},
+       {AADDV, C_UCON, C_REG, C_REG, 25, 8, 0, sys.Loong64, 0},
+       {AADDV, C_UCON, C_NONE, C_REG, 25, 8, 0, sys.Loong64, 0},
+       {AAND, C_UCON, C_REG, C_REG, 25, 8, 0, 0, 0},
+       {AAND, C_UCON, C_NONE, C_REG, 25, 8, 0, 0, 0},
+
+       {AADD, C_LCON, C_NONE, C_REG, 23, 12, 0, 0, 0},
+       {AADDV, C_LCON, C_NONE, C_REG, 23, 12, 0, sys.Loong64, 0},
+       {AAND, C_LCON, C_NONE, C_REG, 23, 12, 0, 0, 0},
+       {AADD, C_LCON, C_REG, C_REG, 23, 12, 0, 0, 0},
+       {AADDV, C_LCON, C_REG, C_REG, 23, 12, 0, sys.Loong64, 0},
+       {AAND, C_LCON, C_REG, C_REG, 23, 12, 0, 0, 0},
+
+       {AADDV, C_DCON, C_NONE, C_REG, 60, 20, 0, sys.Loong64, 0},
+       {AADDV, C_DCON, C_REG, C_REG, 60, 20, 0, sys.Loong64, 0},
+
+       {ASLL, C_SCON, C_REG, C_REG, 16, 4, 0, 0, 0},
+       {ASLL, C_SCON, C_NONE, C_REG, 16, 4, 0, 0, 0},
+
+       {ASLLV, C_SCON, C_REG, C_REG, 16, 4, 0, sys.Loong64, 0},
+       {ASLLV, C_SCON, C_NONE, C_REG, 16, 4, 0, sys.Loong64, 0},
+
+       {ASYSCALL, C_NONE, C_NONE, C_NONE, 5, 4, 0, 0, 0},
+
+       {ABEQ, C_REG, C_REG, C_SBRA, 6, 4, 0, 0, 0},
+       {ABEQ, C_REG, C_NONE, C_SBRA, 6, 4, 0, 0, 0},
+       {ABLEZ, C_REG, C_NONE, C_SBRA, 6, 4, 0, 0, 0},
+       {ABFPT, C_NONE, C_NONE, C_SBRA, 6, 4, 0, 0, NOTUSETMP},
+
+       {AJMP, C_NONE, C_NONE, C_LBRA, 11, 4, 0, 0, 0}, // b
+       {AJAL, C_NONE, C_NONE, C_LBRA, 11, 4, 0, 0, 0}, // bl
+
+       {AJMP, C_NONE, C_NONE, C_ZOREG, 18, 4, REGZERO, 0, 0}, // jirl r0, rj, 0
+       {AJAL, C_NONE, C_NONE, C_ZOREG, 18, 4, REGLINK, 0, 0}, // jirl r1, rj, 0
+
+       {AMOVW, C_SEXT, C_NONE, C_FREG, 27, 4, 0, sys.Loong64, 0},
+       {AMOVF, C_SEXT, C_NONE, C_FREG, 27, 4, 0, sys.Loong64, 0},
+       {AMOVD, C_SEXT, C_NONE, C_FREG, 27, 4, 0, sys.Loong64, 0},
+       {AMOVW, C_SAUTO, C_NONE, C_FREG, 27, 4, REGSP, sys.Loong64, 0},
+       {AMOVF, C_SAUTO, C_NONE, C_FREG, 27, 4, REGSP, 0, 0},
+       {AMOVD, C_SAUTO, C_NONE, C_FREG, 27, 4, REGSP, 0, 0},
+       {AMOVW, C_SOREG, C_NONE, C_FREG, 27, 4, REGZERO, sys.Loong64, 0},
+       {AMOVF, C_SOREG, C_NONE, C_FREG, 27, 4, REGZERO, 0, 0},
+       {AMOVD, C_SOREG, C_NONE, C_FREG, 27, 4, REGZERO, 0, 0},
+
+       {AMOVW, C_LEXT, C_NONE, C_FREG, 27, 12, 0, sys.Loong64, 0},
+       {AMOVF, C_LEXT, C_NONE, C_FREG, 27, 12, 0, sys.Loong64, 0},
+       {AMOVD, C_LEXT, C_NONE, C_FREG, 27, 12, 0, sys.Loong64, 0},
+       {AMOVW, C_LAUTO, C_NONE, C_FREG, 27, 12, REGSP, sys.Loong64, 0},
+       {AMOVF, C_LAUTO, C_NONE, C_FREG, 27, 12, REGSP, 0, 0},
+       {AMOVD, C_LAUTO, C_NONE, C_FREG, 27, 12, REGSP, 0, 0},
+       {AMOVW, C_LOREG, C_NONE, C_FREG, 27, 12, REGZERO, sys.Loong64, 0},
+       {AMOVF, C_LOREG, C_NONE, C_FREG, 27, 12, REGZERO, 0, 0},
+       {AMOVD, C_LOREG, C_NONE, C_FREG, 27, 12, REGZERO, 0, 0},
+       {AMOVF, C_ADDR, C_NONE, C_FREG, 51, 8, 0, 0, 0},
+       {AMOVF, C_ADDR, C_NONE, C_FREG, 51, 8, 0, sys.Loong64, 0},
+       {AMOVD, C_ADDR, C_NONE, C_FREG, 51, 8, 0, 0, 0},
+       {AMOVD, C_ADDR, C_NONE, C_FREG, 51, 8, 0, sys.Loong64, 0},
+
+       {AMOVW, C_FREG, C_NONE, C_SEXT, 28, 4, 0, sys.Loong64, 0},
+       {AMOVF, C_FREG, C_NONE, C_SEXT, 28, 4, 0, sys.Loong64, 0},
+       {AMOVD, C_FREG, C_NONE, C_SEXT, 28, 4, 0, sys.Loong64, 0},
+       {AMOVW, C_FREG, C_NONE, C_SAUTO, 28, 4, REGSP, sys.Loong64, 0},
+       {AMOVF, C_FREG, C_NONE, C_SAUTO, 28, 4, REGSP, 0, 0},
+       {AMOVD, C_FREG, C_NONE, C_SAUTO, 28, 4, REGSP, 0, 0},
+       {AMOVW, C_FREG, C_NONE, C_SOREG, 28, 4, REGZERO, sys.Loong64, 0},
+       {AMOVF, C_FREG, C_NONE, C_SOREG, 28, 4, REGZERO, 0, 0},
+       {AMOVD, C_FREG, C_NONE, C_SOREG, 28, 4, REGZERO, 0, 0},
+
+       {AMOVW, C_FREG, C_NONE, C_LEXT, 28, 12, 0, sys.Loong64, 0},
+       {AMOVF, C_FREG, C_NONE, C_LEXT, 28, 12, 0, sys.Loong64, 0},
+       {AMOVD, C_FREG, C_NONE, C_LEXT, 28, 12, 0, sys.Loong64, 0},
+       {AMOVW, C_FREG, C_NONE, C_LAUTO, 28, 12, REGSP, sys.Loong64, 0},
+       {AMOVF, C_FREG, C_NONE, C_LAUTO, 28, 12, REGSP, 0, 0},
+       {AMOVD, C_FREG, C_NONE, C_LAUTO, 28, 12, REGSP, 0, 0},
+       {AMOVW, C_FREG, C_NONE, C_LOREG, 28, 12, REGZERO, sys.Loong64, 0},
+       {AMOVF, C_FREG, C_NONE, C_LOREG, 28, 12, REGZERO, 0, 0},
+       {AMOVD, C_FREG, C_NONE, C_LOREG, 28, 12, REGZERO, 0, 0},
+       {AMOVF, C_FREG, C_NONE, C_ADDR, 50, 8, 0, 0, 0},
+       {AMOVF, C_FREG, C_NONE, C_ADDR, 50, 8, 0, sys.Loong64, 0},
+       {AMOVD, C_FREG, C_NONE, C_ADDR, 50, 8, 0, 0, 0},
+       {AMOVD, C_FREG, C_NONE, C_ADDR, 50, 8, 0, sys.Loong64, 0},
+
+       {AMOVW, C_REG, C_NONE, C_FREG, 30, 4, 0, 0, 0},
+       {AMOVW, C_FREG, C_NONE, C_REG, 31, 4, 0, 0, 0},
+       {AMOVV, C_REG, C_NONE, C_FREG, 47, 4, 0, sys.Loong64, 0},
+       {AMOVV, C_FREG, C_NONE, C_REG, 48, 4, 0, sys.Loong64, 0},
+
+       {AMOVW, C_ADDCON, C_NONE, C_FREG, 34, 8, 0, sys.Loong64, 0},
+       {AMOVW, C_ANDCON, C_NONE, C_FREG, 34, 8, 0, sys.Loong64, 0},
+
+       {AWORD, C_LCON, C_NONE, C_NONE, 40, 4, 0, 0, 0},
+       {AWORD, C_DCON, C_NONE, C_NONE, 61, 4, 0, 0, 0},
+
+       {ATEQ, C_SCON, C_REG, C_REG, 15, 8, 0, 0, 0},
+       {ATEQ, C_SCON, C_NONE, C_REG, 15, 8, 0, 0, 0},
+
+       {ABREAK, C_REG, C_NONE, C_SEXT, 7, 4, 0, sys.Loong64, 0}, // really CACHE instruction
+       {ABREAK, C_REG, C_NONE, C_SAUTO, 7, 4, REGSP, sys.Loong64, 0},
+       {ABREAK, C_REG, C_NONE, C_SOREG, 7, 4, REGZERO, sys.Loong64, 0},
+       {ABREAK, C_NONE, C_NONE, C_NONE, 5, 4, 0, 0, 0},
+
+       {obj.AUNDEF, C_NONE, C_NONE, C_NONE, 49, 4, 0, 0, 0},
+       {obj.APCDATA, C_LCON, C_NONE, C_LCON, 0, 0, 0, 0, 0},
+       {obj.APCDATA, C_DCON, C_NONE, C_DCON, 0, 0, 0, 0, 0},
+       {obj.AFUNCDATA, C_SCON, C_NONE, C_ADDR, 0, 0, 0, 0, 0},
+       {obj.ANOP, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0, 0},
+       {obj.ANOP, C_LCON, C_NONE, C_NONE, 0, 0, 0, 0, 0}, // nop variants, see #40689
+       {obj.ANOP, C_DCON, C_NONE, C_NONE, 0, 0, 0, 0, 0}, // nop variants, see #40689
+       {obj.ANOP, C_REG, C_NONE, C_NONE, 0, 0, 0, 0, 0},
+       {obj.ANOP, C_FREG, C_NONE, C_NONE, 0, 0, 0, 0, 0},
+       {obj.ADUFFZERO, C_NONE, C_NONE, C_LBRA, 11, 4, 0, 0, 0}, // same as AJMP
+       {obj.ADUFFCOPY, C_NONE, C_NONE, C_LBRA, 11, 4, 0, 0, 0}, // same as AJMP
+
+       {obj.AXXX, C_NONE, C_NONE, C_NONE, 0, 4, 0, 0, 0},
+}
+
+var oprange [ALAST & obj.AMask][]Optab
+
+var xcmp [C_NCLASS][C_NCLASS]bool
+
+func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
+       if ctxt.Retpoline {
+               ctxt.Diag("-spectre=ret not supported on loong64")
+               ctxt.Retpoline = false // don't keep printing
+       }
+
+       p := cursym.Func().Text
+       if p == nil || p.Link == nil { // handle external functions and ELF section symbols
+               return
+       }
+
+       c := ctxt0{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset + ctxt.Arch.FixedFrameSize)}
+
+       if oprange[AOR&obj.AMask] == nil {
+               c.ctxt.Diag("loong64 ops not initialized, call loong64.buildop first")
+       }
+
+       pc := int64(0)
+       p.Pc = pc
+
+       var m int
+       var o *Optab
+       for p = p.Link; p != nil; p = p.Link {
+               p.Pc = pc
+               o = c.oplook(p)
+               m = int(o.size)
+               if m == 0 {
+                       if p.As != obj.ANOP && p.As != obj.AFUNCDATA && p.As != obj.APCDATA {
+                               c.ctxt.Diag("zero-width instruction\n%v", p)
+                       }
+                       continue
+               }
+
+               pc += int64(m)
+       }
+
+       c.cursym.Size = pc
+
+       /*
+        * if any procedure is large enough to
+        * generate a large SBRA branch, then
+        * generate extra passes putting branches
+        * around jmps to fix. this is rare.
+        */
+       bflag := 1
+
+       var otxt int64
+       var q *obj.Prog
+       for bflag != 0 {
+               bflag = 0
+               pc = 0
+               for p = c.cursym.Func().Text.Link; p != nil; p = p.Link {
+                       p.Pc = pc
+                       o = c.oplook(p)
+
+                       // very large conditional branches
+                       if o.type_ == 6 && p.To.Target() != nil {
+                               otxt = p.To.Target().Pc - pc
+                               if otxt < -(1<<17)+10 || otxt >= (1<<17)-10 {
+                                       q = c.newprog()
+                                       q.Link = p.Link
+                                       p.Link = q
+                                       q.As = AJMP
+                                       q.Pos = p.Pos
+                                       q.To.Type = obj.TYPE_BRANCH
+                                       q.To.SetTarget(p.To.Target())
+                                       p.To.SetTarget(q)
+                                       q = c.newprog()
+                                       q.Link = p.Link
+                                       p.Link = q
+                                       q.As = AJMP
+                                       q.Pos = p.Pos
+                                       q.To.Type = obj.TYPE_BRANCH
+                                       q.To.SetTarget(q.Link.Link)
+
+                                       c.addnop(p.Link)
+                                       c.addnop(p)
+                                       bflag = 1
+                               }
+                       }
+
+                       m = int(o.size)
+                       if m == 0 {
+                               if p.As != obj.ANOP && p.As != obj.AFUNCDATA && p.As != obj.APCDATA {
+                                       c.ctxt.Diag("zero-width instruction\n%v", p)
+                               }
+                               continue
+                       }
+
+                       pc += int64(m)
+               }
+
+               c.cursym.Size = pc
+       }
+       pc += -pc & (FuncAlign - 1)
+       c.cursym.Size = pc
+
+       // lay out the code, emitting code and data relocations.
+
+       c.cursym.Grow(c.cursym.Size)
+
+       bp := c.cursym.P
+       var i int32
+       var out [5]uint32
+       for p := c.cursym.Func().Text.Link; p != nil; p = p.Link {
+               c.pc = p.Pc
+               o = c.oplook(p)
+               if int(o.size) > 4*len(out) {
+                       log.Fatalf("out array in span0 is too small, need at least %d for %v", o.size/4, p)
+               }
+               c.asmout(p, o, out[:])
+               for i = 0; i < int32(o.size/4); i++ {
+                       c.ctxt.Arch.ByteOrder.PutUint32(bp, out[i])
+                       bp = bp[4:]
+               }
+       }
+
+       // Mark nonpreemptible instruction sequences.
+       // We use REGTMP as a scratch register during call injection,
+       // so instruction sequences that use REGTMP are unsafe to
+       // preempt asynchronously.
+       obj.MarkUnsafePoints(c.ctxt, c.cursym.Func().Text, c.newprog, c.isUnsafePoint, c.isRestartable)
+}
+
+// isUnsafePoint returns whether p is an unsafe point.
+func (c *ctxt0) isUnsafePoint(p *obj.Prog) bool {
+       // If p explicitly uses REGTMP, it's unsafe to preempt, because the
+       // preemption sequence clobbers REGTMP.
+       return p.From.Reg == REGTMP || p.To.Reg == REGTMP || p.Reg == REGTMP
+}
+
+// isRestartable returns whether p is a multi-instruction sequence that,
+// if preempted, can be restarted.
+func (c *ctxt0) isRestartable(p *obj.Prog) bool {
+       if c.isUnsafePoint(p) {
+               return false
+       }
+       // If p is a multi-instruction sequence with uses REGTMP inserted by
+       // the assembler in order to materialize a large constant/offset, we
+       // can restart p (at the start of the instruction sequence), recompute
+       // the content of REGTMP, upon async preemption. Currently, all cases
+       // of assembler-inserted REGTMP fall into this category.
+       // If p doesn't use REGTMP, it can be simply preempted, so we don't
+       // mark it.
+       o := c.oplook(p)
+       return o.size > 4 && o.flag&NOTUSETMP == 0
+}
+
+func isint32(v int64) bool {
+       return int64(int32(v)) == v
+}
+
+func isuint32(v uint64) bool {
+       return uint64(uint32(v)) == v
+}
+
+func (c *ctxt0) aclass(a *obj.Addr) int {
+       switch a.Type {
+       case obj.TYPE_NONE:
+               return C_NONE
+
+       case obj.TYPE_REG:
+               if REG_R0 <= a.Reg && a.Reg <= REG_R31 {
+                       return C_REG
+               }
+               if REG_F0 <= a.Reg && a.Reg <= REG_F31 {
+                       return C_FREG
+               }
+               if REG_FCSR0 <= a.Reg && a.Reg <= REG_FCSR31 {
+                       return C_FCSRREG
+               }
+               if REG_FCC0 <= a.Reg && a.Reg <= REG_FCC31 {
+                       return C_FCCREG
+               }
+               return C_GOK
+
+       case obj.TYPE_MEM:
+               switch a.Name {
+               case obj.NAME_EXTERN,
+                       obj.NAME_STATIC:
+                       if a.Sym == nil {
+                               break
+                       }
+                       c.instoffset = a.Offset
+                       if a.Sym != nil { // use relocation
+                               if a.Sym.Type == objabi.STLSBSS {
+                                       return C_TLS
+                               }
+                               return C_ADDR
+                       }
+                       return C_LEXT
+
+               case obj.NAME_AUTO:
+                       if a.Reg == REGSP {
+                               // unset base register for better printing, since
+                               // a.Offset is still relative to pseudo-SP.
+                               a.Reg = obj.REG_NONE
+                       }
+                       c.instoffset = int64(c.autosize) + a.Offset
+                       if c.instoffset >= -BIG && c.instoffset < BIG {
+                               return C_SAUTO
+                       }
+                       return C_LAUTO
+
+               case obj.NAME_PARAM:
+                       if a.Reg == REGSP {
+                               // unset base register for better printing, since
+                               // a.Offset is still relative to pseudo-FP.
+                               a.Reg = obj.REG_NONE
+                       }
+                       c.instoffset = int64(c.autosize) + a.Offset + c.ctxt.Arch.FixedFrameSize
+                       if c.instoffset >= -BIG && c.instoffset < BIG {
+                               return C_SAUTO
+                       }
+                       return C_LAUTO
+
+               case obj.NAME_NONE:
+                       c.instoffset = a.Offset
+                       if c.instoffset == 0 {
+                               return C_ZOREG
+                       }
+                       if c.instoffset >= -BIG && c.instoffset < BIG {
+                               return C_SOREG
+                       }
+                       return C_LOREG
+               }
+
+               return C_GOK
+
+       case obj.TYPE_TEXTSIZE:
+               return C_TEXTSIZE
+
+       case obj.TYPE_CONST,
+               obj.TYPE_ADDR:
+               switch a.Name {
+               case obj.NAME_NONE:
+                       c.instoffset = a.Offset
+                       if a.Reg != 0 {
+                               if -BIG <= c.instoffset && c.instoffset <= BIG {
+                                       return C_SACON
+                               }
+                               if isint32(c.instoffset) {
+                                       return C_LACON
+                               }
+                               return C_DACON
+                       }
+
+               case obj.NAME_EXTERN,
+                       obj.NAME_STATIC:
+                       s := a.Sym
+                       if s == nil {
+                               return C_GOK
+                       }
+
+                       c.instoffset = a.Offset
+                       if s.Type == objabi.STLSBSS {
+                               return C_STCON // address of TLS variable
+                       }
+                       return C_LECON
+
+               case obj.NAME_AUTO:
+                       if a.Reg == REGSP {
+                               // unset base register for better printing, since
+                               // a.Offset is still relative to pseudo-SP.
+                               a.Reg = obj.REG_NONE
+                       }
+                       c.instoffset = int64(c.autosize) + a.Offset
+                       if c.instoffset >= -BIG && c.instoffset < BIG {
+                               return C_SACON
+                       }
+                       return C_LACON
+
+               case obj.NAME_PARAM:
+                       if a.Reg == REGSP {
+                               // unset base register for better printing, since
+                               // a.Offset is still relative to pseudo-FP.
+                               a.Reg = obj.REG_NONE
+                       }
+                       c.instoffset = int64(c.autosize) + a.Offset + c.ctxt.Arch.FixedFrameSize
+                       if c.instoffset >= -BIG && c.instoffset < BIG {
+                               return C_SACON
+                       }
+                       return C_LACON
+
+               default:
+                       return C_GOK
+               }
+
+               if c.instoffset != int64(int32(c.instoffset)) {
+                       return C_DCON
+               }
+
+               if c.instoffset >= 0 {
+                       if c.instoffset == 0 {
+                               return C_ZCON
+                       }
+                       if c.instoffset <= 0x7ff {
+                               return C_SCON
+                       }
+                       if c.instoffset <= 0xfff {
+                               return C_ANDCON
+                       }
+                       if c.instoffset&0xfff == 0 && isuint32(uint64(c.instoffset)) { // && (instoffset & (1<<31)) == 0)
+                               return C_UCON
+                       }
+                       if isint32(c.instoffset) || isuint32(uint64(c.instoffset)) {
+                               return C_LCON
+                       }
+                       return C_LCON
+               }
+
+               if c.instoffset >= -0x800 {
+                       return C_ADDCON
+               }
+               if c.instoffset&0xfff == 0 && isint32(c.instoffset) {
+                       return C_UCON
+               }
+               if isint32(c.instoffset) {
+                       return C_LCON
+               }
+               return C_LCON
+
+       case obj.TYPE_BRANCH:
+               return C_SBRA
+       }
+
+       return C_GOK
+}
+
+func prasm(p *obj.Prog) {
+       fmt.Printf("%v\n", p)
+}
+
+func (c *ctxt0) oplook(p *obj.Prog) *Optab {
+       if oprange[AOR&obj.AMask] == nil {
+               c.ctxt.Diag("loong64 ops not initialized, call loong64.buildop first")
+       }
+
+       a1 := int(p.Optab)
+       if a1 != 0 {
+               return &optab[a1-1]
+       }
+       a1 = int(p.From.Class)
+       if a1 == 0 {
+               a1 = c.aclass(&p.From) + 1
+               p.From.Class = int8(a1)
+       }
+
+       a1--
+       a3 := int(p.To.Class)
+       if a3 == 0 {
+               a3 = c.aclass(&p.To) + 1
+               p.To.Class = int8(a3)
+       }
+
+       a3--
+       a2 := C_NONE
+       if p.Reg != 0 {
+               a2 = C_REG
+       }
+
+       ops := oprange[p.As&obj.AMask]
+       c1 := &xcmp[a1]
+       c3 := &xcmp[a3]
+       for i := range ops {
+               op := &ops[i]
+               if int(op.a2) == a2 && c1[op.a1] && c3[op.a3] && (op.family == 0 || c.ctxt.Arch.Family == op.family) {
+                       p.Optab = uint16(cap(optab) - cap(ops) + i + 1)
+                       return op
+               }
+       }
+
+       c.ctxt.Diag("illegal combination %v %v %v %v", p.As, DRconv(a1), DRconv(a2), DRconv(a3))
+       prasm(p)
+       // Turn illegal instruction into an UNDEF, avoid crashing in asmout.
+       return &Optab{obj.AUNDEF, C_NONE, C_NONE, C_NONE, 49, 4, 0, 0, 0}
+}
+
+func cmp(a int, b int) bool {
+       if a == b {
+               return true
+       }
+       switch a {
+       case C_DCON:
+               if b == C_LCON {
+                       return true
+               }
+               fallthrough
+       case C_LCON:
+               if b == C_ZCON || b == C_SCON || b == C_UCON || b == C_ADDCON || b == C_ANDCON {
+                       return true
+               }
+
+       case C_ADD0CON:
+               if b == C_ADDCON {
+                       return true
+               }
+               fallthrough
+
+       case C_ADDCON:
+               if b == C_ZCON || b == C_SCON {
+                       return true
+               }
+
+       case C_AND0CON:
+               if b == C_ANDCON {
+                       return true
+               }
+               fallthrough
+
+       case C_ANDCON:
+               if b == C_ZCON || b == C_SCON {
+                       return true
+               }
+
+       case C_UCON:
+               if b == C_ZCON {
+                       return true
+               }
+
+       case C_SCON:
+               if b == C_ZCON {
+                       return true
+               }
+
+       case C_LACON:
+               if b == C_SACON {
+                       return true
+               }
+
+       case C_LBRA:
+               if b == C_SBRA {
+                       return true
+               }
+
+       case C_LEXT:
+               if b == C_SEXT {
+                       return true
+               }
+
+       case C_LAUTO:
+               if b == C_SAUTO {
+                       return true
+               }
+
+       case C_REG:
+               if b == C_ZCON {
+                       return true
+               }
+
+       case C_LOREG:
+               if b == C_ZOREG || b == C_SOREG {
+                       return true
+               }
+
+       case C_SOREG:
+               if b == C_ZOREG {
+                       return true
+               }
+       }
+
+       return false
+}
+
+type ocmp []Optab
+
+func (x ocmp) Len() int {
+       return len(x)
+}
+
+func (x ocmp) Swap(i, j int) {
+       x[i], x[j] = x[j], x[i]
+}
+
+func (x ocmp) Less(i, j int) bool {
+       p1 := &x[i]
+       p2 := &x[j]
+       n := int(p1.as) - int(p2.as)
+       if n != 0 {
+               return n < 0
+       }
+       n = int(p1.a1) - int(p2.a1)
+       if n != 0 {
+               return n < 0
+       }
+       n = int(p1.a2) - int(p2.a2)
+       if n != 0 {
+               return n < 0
+       }
+       n = int(p1.a3) - int(p2.a3)
+       if n != 0 {
+               return n < 0
+       }
+       return false
+}
+
+func opset(a, b0 obj.As) {
+       oprange[a&obj.AMask] = oprange[b0]
+}
+
+func buildop(ctxt *obj.Link) {
+       if ctxt.DiagFunc == nil {
+               ctxt.DiagFunc = func(format string, args ...interface{}) {
+                       log.Printf(format, args...)
+               }
+       }
+
+       if oprange[AOR&obj.AMask] != nil {
+               // Already initialized; stop now.
+               // This happens in the cmd/asm tests,
+               // each of which re-initializes the arch.
+               return
+       }
+
+       var n int
+
+       for i := 0; i < C_NCLASS; i++ {
+               for n = 0; n < C_NCLASS; n++ {
+                       if cmp(n, i) {
+                               xcmp[i][n] = true
+                       }
+               }
+       }
+       for n = 0; optab[n].as != obj.AXXX; n++ {
+       }
+       sort.Sort(ocmp(optab[:n]))
+       for i := 0; i < n; i++ {
+               r := optab[i].as
+               r0 := r & obj.AMask
+               start := i
+               for optab[i].as == r {
+                       i++
+               }
+               oprange[r0] = optab[start:i]
+               i--
+
+               switch r {
+               default:
+                       ctxt.Diag("unknown op in build: %v", r)
+                       ctxt.DiagFlush()
+                       log.Fatalf("bad code")
+
+               case AABSF:
+                       opset(AMOVFD, r0)
+                       opset(AMOVDF, r0)
+                       opset(AMOVWF, r0)
+                       opset(AMOVFW, r0)
+                       opset(AMOVWD, r0)
+                       opset(AMOVDW, r0)
+                       opset(ANEGF, r0)
+                       opset(ANEGD, r0)
+                       opset(AABSD, r0)
+                       opset(ATRUNCDW, r0)
+                       opset(ATRUNCFW, r0)
+                       opset(ASQRTF, r0)
+                       opset(ASQRTD, r0)
+
+               case AMOVVF:
+                       opset(AMOVVD, r0)
+                       opset(AMOVFV, r0)
+                       opset(AMOVDV, r0)
+                       opset(ATRUNCDV, r0)
+                       opset(ATRUNCFV, r0)
+
+               case AADD:
+                       opset(ASGT, r0)
+                       opset(ASGTU, r0)
+                       opset(AADDU, r0)
+
+               case AADDV:
+                       opset(AADDVU, r0)
+
+               case AADDF:
+                       opset(ADIVF, r0)
+                       opset(ADIVD, r0)
+                       opset(AMULF, r0)
+                       opset(AMULD, r0)
+                       opset(ASUBF, r0)
+                       opset(ASUBD, r0)
+                       opset(AADDD, r0)
+
+               case AAND:
+                       opset(AOR, r0)
+                       opset(AXOR, r0)
+
+               case ABEQ:
+                       opset(ABNE, r0)
+                       opset(ABLT, r0)
+                       opset(ABGE, r0)
+                       opset(ABGEU, r0)
+                       opset(ABLTU, r0)
+
+               case ABLEZ:
+                       opset(ABGEZ, r0)
+                       opset(ABLTZ, r0)
+                       opset(ABGTZ, r0)
+
+               case AMOVB:
+                       opset(AMOVH, r0)
+
+               case AMOVBU:
+                       opset(AMOVHU, r0)
+
+               case AMUL:
+                       opset(AMULU, r0)
+                       opset(AMULH, r0)
+                       opset(AMULHU, r0)
+                       opset(AREM, r0)
+                       opset(AREMU, r0)
+                       opset(ADIV, r0)
+                       opset(ADIVU, r0)
+
+               case AMULV:
+                       opset(AMULVU, r0)
+                       opset(AMULHV, r0)
+                       opset(AMULHVU, r0)
+                       opset(AREMV, r0)
+                       opset(AREMVU, r0)
+                       opset(ADIVV, r0)
+                       opset(ADIVVU, r0)
+
+               case ASLL:
+                       opset(ASRL, r0)
+                       opset(ASRA, r0)
+
+               case ASLLV:
+                       opset(ASRAV, r0)
+                       opset(ASRLV, r0)
+
+               case ASUB:
+                       opset(ASUBU, r0)
+                       opset(ANOR, r0)
+
+               case ASUBV:
+                       opset(ASUBVU, r0)
+
+               case ASYSCALL:
+                       opset(ADBAR, r0)
+                       opset(ANOOP, r0)
+
+               case ACMPEQF:
+                       opset(ACMPGTF, r0)
+                       opset(ACMPGTD, r0)
+                       opset(ACMPGEF, r0)
+                       opset(ACMPGED, r0)
+                       opset(ACMPEQD, r0)
+
+               case ABFPT:
+                       opset(ABFPF, r0)
+
+               case AMOVWL:
+                       opset(AMOVWR, r0)
+
+               case AMOVVL:
+                       opset(AMOVVR, r0)
+
+               case AMOVW,
+                       AMOVD,
+                       AMOVF,
+                       AMOVV,
+                       ABREAK,
+                       ARFE,
+                       AJAL,
+                       AJMP,
+                       AMOVWU,
+                       ALL,
+                       ALLV,
+                       ASC,
+                       ASCV,
+                       ANEGW,
+                       ANEGV,
+                       AWORD,
+                       obj.ANOP,
+                       obj.ATEXT,
+                       obj.AUNDEF,
+                       obj.AFUNCDATA,
+                       obj.APCDATA,
+                       obj.ADUFFZERO,
+                       obj.ADUFFCOPY:
+                       break
+
+               case ACLO:
+                       opset(ACLZ, r0)
+
+               case ATEQ:
+                       opset(ATNE, r0)
+               }
+       }
+}
+
+func OP(x uint32, y uint32) uint32 {
+       return x<<3 | y<<0
+}
+
+func SP(x uint32, y uint32) uint32 {
+       return x<<29 | y<<26
+}
+
+func OP_TEN(x uint32, y uint32) uint32 {
+       return x<<21 | y<<10
+}
+
+// r1 -> rk
+// r2 -> rj
+// r3 -> rd
+func OP_RRR(op uint32, r1 uint32, r2 uint32, r3 uint32) uint32 {
+       return op | (r1&0x1F)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0
+}
+
+// r2 -> rj
+// r3 -> rd
+func OP_RR(op uint32, r2 uint32, r3 uint32) uint32 {
+       return op | (r2&0x1F)<<5 | (r3&0x1F)<<0
+}
+
+func OP_16IR_5I(op uint32, i uint32, r2 uint32) uint32 {
+       return op | (i&0xFFFF)<<10 | (r2&0x7)<<5 | ((i >> 16) & 0x1F)
+}
+
+func OP_16IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 {
+       return op | (i&0xFFFF)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0
+}
+
+func OP_12IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 {
+       return op | (i&0xFFF)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0
+}
+
+func OP_IR(op uint32, i uint32, r2 uint32) uint32 {
+       return op | (i&0xFFFFF)<<5 | (r2&0x1F)<<0 // ui20, rd5
+}
+
+// Encoding for the 'b' or 'bl' instruction
+func OP_B_BL(op uint32, i uint32) uint32 {
+       return op | ((i & 0xFFFF) << 10) | ((i >> 16) & 0x3FF)
+}
+
+func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) {
+       o1 := uint32(0)
+       o2 := uint32(0)
+       o3 := uint32(0)
+       o4 := uint32(0)
+       o5 := uint32(0)
+
+       add := AADDU
+       add = AADDVU
+
+       switch o.type_ {
+       default:
+               c.ctxt.Diag("unknown type %d %v", o.type_)
+               prasm(p)
+
+       case 0: // pseudo ops
+               break
+
+       case 1: // mov r1,r2 ==> OR r1,r0,r2
+               a := AOR
+               if p.As == AMOVW {
+                       a = ASLL
+               }
+               o1 = OP_RRR(c.oprrr(a), uint32(REGZERO), uint32(p.From.Reg), uint32(p.To.Reg))
+
+       case 2: // add/sub r1,[r2],r3
+               r := int(p.Reg)
+               if p.As == ANEGW || p.As == ANEGV {
+                       r = REGZERO
+               }
+               if r == 0 {
+                       r = int(p.To.Reg)
+               }
+               o1 = OP_RRR(c.oprrr(p.As), uint32(p.From.Reg), uint32(r), uint32(p.To.Reg))
+
+       case 3: // mov $soreg, r ==> or/add $i,o,r
+               v := c.regoff(&p.From)
+
+               r := int(p.From.Reg)
+               if r == 0 {
+                       r = int(o.param)
+               }
+               a := add
+               if o.a1 == C_ANDCON {
+                       a = AOR
+               }
+
+               o1 = OP_12IRR(c.opirr(a), uint32(v), uint32(r), uint32(p.To.Reg))
+
+       case 4: // add $scon,[r1],r2
+               v := c.regoff(&p.From)
+
+               r := int(p.Reg)
+               if r == 0 {
+                       r = int(p.To.Reg)
+               }
+
+               o1 = OP_12IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg))
+
+       case 5: // syscall
+               o1 = c.oprrr(p.As)
+
+       case 6: // beq r1,[r2],sbra
+               v := int32(0)
+               vcmp := int32(0)
+               if p.To.Target() != nil {
+                       v = int32(p.To.Target().Pc-p.Pc) >> 2
+               }
+               if v < 0 {
+                       vcmp = -v
+               }
+               if (p.As == ABFPT || p.As == ABFPF) && ((uint32(vcmp))>>21)&0x7FF != 0 {
+                       c.ctxt.Diag("21 bit-width, short branch too far\n%v", p)
+               } else if p.As != ABFPT && p.As != ABFPF && (v<<16)>>16 != v {
+                       c.ctxt.Diag("16 bit-width, short branch too far\n%v", p)
+               }
+               if p.As == ABGTZ || p.As == ABLEZ {
+                       o1 = OP_16IRR(c.opirr(p.As), uint32(v), uint32(p.Reg), uint32(p.From.Reg))
+               } else if p.As == ABFPT || p.As == ABFPF {
+                       // BCNEZ cj offset21 ,cj = fcc0
+                       // BCEQZ cj offset21 ,cj = fcc0
+                       o1 = OP_16IR_5I(c.opirr(p.As), uint32(v), uint32(REG_FCC0))
+               } else {
+                       o1 = OP_16IRR(c.opirr(p.As), uint32(v), uint32(p.From.Reg), uint32(p.Reg))
+               }
+
+       case 7: // mov r, soreg
+               r := int(p.To.Reg)
+               if r == 0 {
+                       r = int(o.param)
+               }
+               v := c.regoff(&p.To)
+               o1 = OP_12IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.From.Reg))
+
+       case 8: // mov soreg, r
+               r := int(p.From.Reg)
+               if r == 0 {
+                       r = int(o.param)
+               }
+               v := c.regoff(&p.From)
+               o1 = OP_12IRR(c.opirr(-p.As), uint32(v), uint32(r), uint32(p.To.Reg))
+
+       case 9: // sll r1,[r2],r3
+               if p.As != ACLO && p.As != ACLZ {
+                       r := int(p.Reg)
+                       if r == 0 {
+                               r = int(p.To.Reg)
+                       }
+                       o1 = OP_RRR(c.oprrr(p.As), uint32(p.From.Reg), uint32(r), uint32(p.To.Reg))
+               } else { // clo r1,r2
+                       o1 = OP_RR(c.oprr(p.As), uint32(p.From.Reg), uint32(p.To.Reg))
+               }
+
+       case 10: // add $con,[r1],r2 ==> mov $con, t; add t,[r1],r2
+               v := c.regoff(&p.From)
+               a := AOR
+               if v < 0 {
+                       a = AADDU
+               }
+               o1 = OP_12IRR(c.opirr(a), uint32(v), uint32(0), uint32(REGTMP))
+               r := int(p.Reg)
+               if r == 0 {
+                       r = int(p.To.Reg)
+               }
+               o2 = OP_RRR(c.oprrr(p.As), uint32(REGTMP), uint32(r), uint32(p.To.Reg))
+
+       case 11: // jmp lbra
+               v := int32(0)
+               if c.aclass(&p.To) == C_SBRA && p.To.Sym == nil && p.As == AJMP {
+                       // use PC-relative branch for short branches
+                       // BEQ  R0, R0, sbra
+                       if p.To.Target() != nil {
+                               v = int32(p.To.Target().Pc-p.Pc) >> 2
+                       }
+                       if (v<<16)>>16 == v {
+                               o1 = OP_16IRR(c.opirr(ABEQ), uint32(v), uint32(REGZERO), uint32(REGZERO))
+                               break
+                       }
+               }
+               if p.To.Target() == nil {
+                       v = int32(p.Pc) >> 2
+               } else {
+                       v = int32(p.To.Target().Pc) >> 2
+               }
+               o1 = OP_B_BL(c.opirr(p.As), uint32(v))
+               if p.To.Sym == nil {
+                       p.To.Sym = c.cursym.Func().Text.From.Sym
+                       p.To.Offset = p.To.Target().Pc
+               }
+               rel := obj.Addrel(c.cursym)
+               rel.Off = int32(c.pc)
+               rel.Siz = 4
+               rel.Sym = p.To.Sym
+               rel.Add = p.To.Offset
+               rel.Type = objabi.R_CALLLOONG64
+
+       case 12: // movbs r,r
+               // NOTE: this case does not use REGTMP. If it ever does,
+               // remove the NOTUSETMP flag in optab.
+               v := 16
+               if p.As == AMOVB {
+                       v = 24
+               }
+               o1 = OP_16IRR(c.opirr(ASLL), uint32(v), uint32(p.From.Reg), uint32(p.To.Reg))
+               o2 = OP_16IRR(c.opirr(ASRA), uint32(v), uint32(p.To.Reg), uint32(p.To.Reg))
+
+       case 13: // movbu r,r
+               if p.As == AMOVBU {
+                       o1 = OP_12IRR(c.opirr(AAND), uint32(0xff), uint32(p.From.Reg), uint32(p.To.Reg))
+               } else {
+                       // bstrpick.d (msbd=15, lsbd=0)
+                       o1 = (0x33c0 << 10) | ((uint32(p.From.Reg) & 0x1f) << 5) | (uint32(p.To.Reg) & 0x1F)
+               }
+
+       case 14: // movwu r,r
+               // NOTE: this case does not use REGTMP. If it ever does,
+               // remove the NOTUSETMP flag in optab.
+               o1 = OP_16IRR(c.opirr(-ASLLV), uint32(32)&0x3f, uint32(p.From.Reg), uint32(p.To.Reg))
+               o2 = OP_16IRR(c.opirr(-ASRLV), uint32(32)&0x3f, uint32(p.To.Reg), uint32(p.To.Reg))
+
+       case 15: // teq $c r,r
+               v := c.regoff(&p.From)
+               r := int(p.Reg)
+               if r == 0 {
+                       r = REGZERO
+               }
+               /*
+                       teq c, r1, r2
+                       fallthrough
+                       ==>
+                       bne r1, r2, 2
+                       break c
+                       fallthrough
+               */
+               if p.As == ATEQ {
+                       o1 = OP_16IRR(c.opirr(ABNE), uint32(2), uint32(r), uint32(p.To.Reg))
+               } else { // ATNE
+                       o1 = OP_16IRR(c.opirr(ABEQ), uint32(2), uint32(r), uint32(p.To.Reg))
+               }
+               o2 = c.oprrr(ABREAK) | (uint32(v) & 0x7FFF)
+
+       case 16: // sll $c,[r1],r2
+               v := c.regoff(&p.From)
+               r := int(p.Reg)
+               if r == 0 {
+                       r = int(p.To.Reg)
+               }
+
+               // instruction ending with V:6-digit immediate, others:5-digit immediate
+               if v >= 32 && vshift(p.As) {
+                       o1 = OP_16IRR(c.opirr(p.As), uint32(v)&0x3f, uint32(r), uint32(p.To.Reg))
+               } else {
+                       o1 = OP_16IRR(c.opirr(p.As), uint32(v)&0x1f, uint32(r), uint32(p.To.Reg))
+               }
+
+       case 17:
+               o1 = OP_RRR(c.oprrr(p.As), uint32(REGZERO), uint32(p.From.Reg), uint32(p.To.Reg))
+
+       case 18: // jmp [r1],0(r2)
+               r := int(p.Reg)
+               if r == 0 {
+                       r = int(o.param)
+               }
+               o1 = OP_RRR(c.oprrr(p.As), uint32(0), uint32(p.To.Reg), uint32(r))
+               if p.As == obj.ACALL {
+                       rel := obj.Addrel(c.cursym)
+                       rel.Off = int32(c.pc)
+                       rel.Siz = 0
+                       rel.Type = objabi.R_CALLIND
+               }
+
+       case 19: // mov $lcon,r
+               // NOTE: this case does not use REGTMP. If it ever does,
+               // remove the NOTUSETMP flag in optab.
+               v := c.regoff(&p.From)
+               o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(p.To.Reg))
+               o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(p.To.Reg), uint32(p.To.Reg))
+
+       case 23: // add $lcon,r1,r2
+               v := c.regoff(&p.From)
+               o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP))
+               o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(REGTMP), uint32(REGTMP))
+               r := int(p.Reg)
+               if r == 0 {
+                       r = int(p.To.Reg)
+               }
+               o3 = OP_RRR(c.oprrr(p.As), uint32(REGTMP), uint32(r), uint32(p.To.Reg))
+
+       case 24: // mov $ucon,r
+               v := c.regoff(&p.From)
+               o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(p.To.Reg))
+
+       case 25: // add/and $ucon,[r1],r2
+               v := c.regoff(&p.From)
+               o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP))
+               r := int(p.Reg)
+               if r == 0 {
+                       r = int(p.To.Reg)
+               }
+               o2 = OP_RRR(c.oprrr(p.As), uint32(REGTMP), uint32(r), uint32(p.To.Reg))
+
+       case 26: // mov $lsext/auto/oreg,r
+               v := c.regoff(&p.From)
+               o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP))
+               o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(REGTMP), uint32(REGTMP))
+               r := int(p.From.Reg)
+               if r == 0 {
+                       r = int(o.param)
+               }
+               o3 = OP_RRR(c.oprrr(add), uint32(REGTMP), uint32(r), uint32(p.To.Reg))
+
+       case 27: // mov [sl]ext/auto/oreg,fr
+               v := c.regoff(&p.From)
+               r := int(p.From.Reg)
+               if r == 0 {
+                       r = int(o.param)
+               }
+               a := -AMOVF
+               if p.As == AMOVD {
+                       a = -AMOVD
+               }
+               switch o.size {
+               case 12:
+                       o1 = OP_IR(c.opir(ALU12IW), uint32((v+1<<11)>>12), uint32(REGTMP))
+                       o2 = OP_RRR(c.oprrr(add), uint32(r), uint32(REGTMP), uint32(REGTMP))
+                       o3 = OP_12IRR(c.opirr(a), uint32(v), uint32(REGTMP), uint32(p.To.Reg))
+
+               case 4:
+                       o1 = OP_12IRR(c.opirr(a), uint32(v), uint32(r), uint32(p.To.Reg))
+               }
+
+       case 28: // mov fr,[sl]ext/auto/oreg
+               v := c.regoff(&p.To)
+               r := int(p.To.Reg)
+               if r == 0 {
+                       r = int(o.param)
+               }
+               a := AMOVF
+               if p.As == AMOVD {
+                       a = AMOVD
+               }
+               switch o.size {
+               case 12:
+                       o1 = OP_IR(c.opir(ALU12IW), uint32((v+1<<11)>>12), uint32(REGTMP))
+                       o2 = OP_RRR(c.oprrr(add), uint32(r), uint32(REGTMP), uint32(REGTMP))
+                       o3 = OP_12IRR(c.opirr(a), uint32(v), uint32(REGTMP), uint32(p.From.Reg))
+
+               case 4:
+                       o1 = OP_12IRR(c.opirr(a), uint32(v), uint32(r), uint32(p.From.Reg))
+               }
+
+       case 30: // movw r,fr
+               a := OP_TEN(8, 1321) // movgr2fr.w
+               o1 = OP_RR(a, uint32(p.From.Reg), uint32(p.To.Reg))
+
+       case 31: // movw fr,r
+               a := OP_TEN(8, 1325) // movfr2gr.s
+               o1 = OP_RR(a, uint32(p.From.Reg), uint32(p.To.Reg))
+
+       case 32: // fadd fr1,[fr2],fr3
+               r := int(p.Reg)
+               if r == 0 {
+                       r = int(p.To.Reg)
+               }
+               o1 = OP_RRR(c.oprrr(p.As), uint32(p.From.Reg), uint32(r), uint32(p.To.Reg))
+
+       case 33: // fabs fr1, fr3
+               o1 = OP_RRR(c.oprrr(p.As), uint32(0), uint32(p.From.Reg), uint32(p.To.Reg))
+
+       case 34: // mov $con,fr
+               v := c.regoff(&p.From)
+               a := AADDU
+               if o.a1 == C_ANDCON {
+                       a = AOR
+               }
+               o1 = OP_12IRR(c.opirr(a), uint32(v), uint32(0), uint32(REGTMP))
+               o2 = OP_RR(OP_TEN(8, 1321), uint32(REGTMP), uint32(p.To.Reg)) // movgr2fr.w
+
+       case 35: // mov r,lext/auto/oreg
+               v := c.regoff(&p.To)
+               r := int(p.To.Reg)
+               if r == 0 {
+                       r = int(o.param)
+               }
+               o1 = OP_IR(c.opir(ALU12IW), uint32((v+1<<11)>>12), uint32(REGTMP))
+               o2 = OP_RRR(c.oprrr(add), uint32(r), uint32(REGTMP), uint32(REGTMP))
+               o3 = OP_12IRR(c.opirr(p.As), uint32(v), uint32(REGTMP), uint32(p.From.Reg))
+
+       case 36: // mov lext/auto/oreg,r
+               v := c.regoff(&p.From)
+               r := int(p.From.Reg)
+               if r == 0 {
+                       r = int(o.param)
+               }
+               o1 = OP_IR(c.opir(ALU12IW), uint32((v+1<<11)>>12), uint32(REGTMP))
+               o2 = OP_RRR(c.oprrr(add), uint32(r), uint32(REGTMP), uint32(REGTMP))
+               o3 = OP_12IRR(c.opirr(-p.As), uint32(v), uint32(REGTMP), uint32(p.To.Reg))
+
+       case 40: // word
+               o1 = uint32(c.regoff(&p.From))
+
+       case 47: // movv r,fr
+               a := OP_TEN(8, 1322) // movgr2fr.d
+               o1 = OP_RR(a, uint32(p.From.Reg), uint32(p.To.Reg))
+
+       case 48: // movv fr,r
+               a := OP_TEN(8, 1326) // movfr2gr.d
+               o1 = OP_RR(a, uint32(p.From.Reg), uint32(p.To.Reg))
+
+       case 49: // undef
+               o1 = c.oprrr(ABREAK)
+
+       // relocation operations
+       case 50: // mov r,addr ==> pcaddu12i + sw
+               o1 = OP_IR(c.opir(APCADDU12I), uint32(0), uint32(REGTMP))
+               rel := obj.Addrel(c.cursym)
+               rel.Off = int32(c.pc)
+               rel.Siz = 4
+               rel.Sym = p.To.Sym
+               rel.Add = p.To.Offset
+               rel.Type = objabi.R_ADDRLOONG64U
+
+               o2 = OP_12IRR(c.opirr(p.As), uint32(0), uint32(REGTMP), uint32(p.From.Reg))
+               rel2 := obj.Addrel(c.cursym)
+               rel2.Off = int32(c.pc + 4)
+               rel2.Siz = 4
+               rel2.Sym = p.To.Sym
+               rel2.Add = p.To.Offset
+               rel2.Type = objabi.R_ADDRLOONG64
+
+       case 51: // mov addr,r ==> pcaddu12i + lw
+               o1 = OP_IR(c.opir(APCADDU12I), uint32(0), uint32(REGTMP))
+               rel := obj.Addrel(c.cursym)
+               rel.Off = int32(c.pc)
+               rel.Siz = 4
+               rel.Sym = p.From.Sym
+               rel.Add = p.From.Offset
+               rel.Type = objabi.R_ADDRLOONG64U
+               o2 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(REGTMP), uint32(p.To.Reg))
+               rel2 := obj.Addrel(c.cursym)
+               rel2.Off = int32(c.pc + 4)
+               rel2.Siz = 4
+               rel2.Sym = p.From.Sym
+               rel2.Add = p.From.Offset
+               rel2.Type = objabi.R_ADDRLOONG64
+
+       case 52: // mov $lext, r
+               // NOTE: this case does not use REGTMP. If it ever does,
+               // remove the NOTUSETMP flag in optab.
+               o1 = OP_IR(c.opir(APCADDU12I), uint32(0), uint32(p.To.Reg))
+               rel := obj.Addrel(c.cursym)
+               rel.Off = int32(c.pc)
+               rel.Siz = 4
+               rel.Sym = p.From.Sym
+               rel.Add = p.From.Offset
+               rel.Type = objabi.R_ADDRLOONG64U
+               o2 = OP_12IRR(c.opirr(add), uint32(0), uint32(p.To.Reg), uint32(p.To.Reg))
+               rel2 := obj.Addrel(c.cursym)
+               rel2.Off = int32(c.pc + 4)
+               rel2.Siz = 4
+               rel2.Sym = p.From.Sym
+               rel2.Add = p.From.Offset
+               rel2.Type = objabi.R_ADDRLOONG64
+
+       case 53: // mov r, tlsvar ==>  lu12i.w + ori + add r2, regtmp + sw o(regtmp)
+               // NOTE: this case does not use REGTMP. If it ever does,
+               // remove the NOTUSETMP flag in optab.
+               o1 = OP_IR(c.opir(ALU12IW), uint32(0), uint32(REGTMP))
+               rel := obj.Addrel(c.cursym)
+               rel.Off = int32(c.pc)
+               rel.Siz = 4
+               rel.Sym = p.To.Sym
+               rel.Add = p.To.Offset
+               rel.Type = objabi.R_ADDRLOONG64TLSU
+               o2 = OP_12IRR(c.opirr(AOR), uint32(0), uint32(REGTMP), uint32(REGTMP))
+               rel2 := obj.Addrel(c.cursym)
+               rel2.Off = int32(c.pc + 4)
+               rel2.Siz = 4
+               rel2.Sym = p.To.Sym
+               rel2.Add = p.To.Offset
+               rel2.Type = objabi.R_ADDRLOONG64TLS
+               o3 = OP_RRR(c.oprrr(AADDV), uint32(REG_R2), uint32(REGTMP), uint32(REGTMP))
+               o4 = OP_12IRR(c.opirr(p.As), uint32(0), uint32(REGTMP), uint32(p.From.Reg))
+
+       case 54: // lu12i.w + ori + add r2, regtmp + lw o(regtmp)
+               // NOTE: this case does not use REGTMP. If it ever does,
+               // remove the NOTUSETMP flag in optab.
+               o1 = OP_IR(c.opir(ALU12IW), uint32(0), uint32(REGTMP))
+               rel := obj.Addrel(c.cursym)
+               rel.Off = int32(c.pc)
+               rel.Siz = 4
+               rel.Sym = p.From.Sym
+               rel.Add = p.From.Offset
+               rel.Type = objabi.R_ADDRLOONG64TLSU
+               o2 = OP_12IRR(c.opirr(AOR), uint32(0), uint32(REGTMP), uint32(REGTMP))
+               rel2 := obj.Addrel(c.cursym)
+               rel2.Off = int32(c.pc + 4)
+               rel2.Siz = 4
+               rel2.Sym = p.From.Sym
+               rel2.Add = p.From.Offset
+               rel2.Type = objabi.R_ADDRLOONG64TLS
+               o3 = OP_RRR(c.oprrr(AADDV), uint32(REG_R2), uint32(REGTMP), uint32(REGTMP))
+               o4 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(REGTMP), uint32(p.To.Reg))
+
+       case 55: //  lu12i.w + ori + add r2, regtmp
+               // NOTE: this case does not use REGTMP. If it ever does,
+               // remove the NOTUSETMP flag in optab.
+               o1 = OP_IR(c.opir(ALU12IW), uint32(0), uint32(REGTMP))
+               rel := obj.Addrel(c.cursym)
+               rel.Off = int32(c.pc)
+               rel.Siz = 4
+               rel.Sym = p.From.Sym
+               rel.Add = p.From.Offset
+               rel.Type = objabi.R_ADDRLOONG64TLSU
+               o2 = OP_12IRR(c.opirr(AOR), uint32(0), uint32(REGTMP), uint32(REGTMP))
+               rel2 := obj.Addrel(c.cursym)
+               rel2.Off = int32(c.pc + 4)
+               rel2.Siz = 4
+               rel2.Sym = p.From.Sym
+               rel2.Add = p.From.Offset
+               rel2.Type = objabi.R_ADDRLOONG64TLS
+               o3 = OP_RRR(c.oprrr(AADDV), uint32(REG_R2), uint32(REGTMP), uint32(p.To.Reg))
+
+       case 59: // mov $dcon,r
+               // NOTE: this case does not use REGTMP. If it ever does,
+               // remove the NOTUSETMP flag in optab.
+               v := c.vregoff(&p.From)
+               o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(p.To.Reg))
+               o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(p.To.Reg), uint32(p.To.Reg))
+               o3 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(p.To.Reg))
+               o4 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(p.To.Reg), uint32(p.To.Reg))
+
+       case 60: // add $dcon,r1,r2
+               v := c.vregoff(&p.From)
+               o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP))
+               o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(REGTMP), uint32(REGTMP))
+               o3 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(REGTMP))
+               o4 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(REGTMP), uint32(REGTMP))
+               r := int(p.Reg)
+               if r == 0 {
+                       r = int(p.To.Reg)
+               }
+               o5 = OP_RRR(c.oprrr(p.As), uint32(REGTMP), uint32(r), uint32(p.To.Reg))
+
+       case 61: // word C_DCON
+               o1 = uint32(c.vregoff(&p.From))
+               o2 = uint32(c.vregoff(&p.From) >> 32)
+       }
+
+       out[0] = o1
+       out[1] = o2
+       out[2] = o3
+       out[3] = o4
+       out[4] = o5
+}
+
+func (c *ctxt0) vregoff(a *obj.Addr) int64 {
+       c.instoffset = 0
+       c.aclass(a)
+       return c.instoffset
+}
+
+func (c *ctxt0) regoff(a *obj.Addr) int32 {
+       return int32(c.vregoff(a))
+}
+
+func (c *ctxt0) oprrr(a obj.As) uint32 {
+       switch a {
+       case AADD:
+               return 0x20 << 15
+       case AADDU:
+               return 0x20 << 15
+       case ASGT:
+               return 0x24 << 15 // SLT
+       case ASGTU:
+               return 0x25 << 15 // SLTU
+       case AAND:
+               return 0x29 << 15
+       case AOR:
+               return 0x2a << 15
+       case AXOR:
+               return 0x2b << 15
+       case ASUB:
+               return 0x22 << 15
+       case ASUBU, ANEGW:
+               return 0x22 << 15
+       case ANOR:
+               return 0x28 << 15
+       case ASLL:
+               return 0x2e << 15
+       case ASRL:
+               return 0x2f << 15
+       case ASRA:
+               return 0x30 << 15
+       case ASLLV:
+               return 0x31 << 15
+       case ASRLV:
+               return 0x32 << 15
+       case ASRAV:
+               return 0x33 << 15
+       case AADDV:
+               return 0x21 << 15
+       case AADDVU:
+               return 0x21 << 15
+       case ASUBV:
+               return 0x23 << 15
+       case ASUBVU, ANEGV:
+               return 0x23 << 15
+
+       case AMUL:
+               return 0x38 << 15 // mul.w
+       case AMULU:
+               return 0x38 << 15 // mul.w
+       case AMULH:
+               return 0x39 << 15 // mulh.w
+       case AMULHU:
+               return 0x3a << 15 // mulhu.w
+       case AMULV:
+               return 0x3b << 15 // mul.d
+       case AMULVU:
+               return 0x3b << 15 // mul.d
+       case AMULHV:
+               return 0x3c << 15 // mulh.d
+       case AMULHVU:
+               return 0x3d << 15 // mulhu.d
+       case ADIV:
+               return 0x40 << 15 // div.w
+       case ADIVU:
+               return 0x42 << 15 // div.wu
+       case ADIVV:
+               return 0x44 << 15 // div.d
+       case ADIVVU:
+               return 0x46 << 15 // div.du
+       case AREM:
+               return 0x41 << 15 // mod.w
+       case AREMU:
+               return 0x43 << 15 // mod.wu
+       case AREMV:
+               return 0x45 << 15 // mod.d
+       case AREMVU:
+               return 0x47 << 15 // mod.du
+
+       case AJMP:
+               return 0x13 << 26 // jirl r0, rj, 0
+       case AJAL:
+               return (0x13 << 26) | 1 // jirl r1, rj, 0
+
+       case ABREAK:
+               return 0x54 << 15
+       case ASYSCALL:
+               return 0x56 << 15
+       case ADIVF:
+               return 0x20d << 15
+       case ADIVD:
+               return 0x20e << 15
+       case AMULF:
+               return 0x209 << 15
+       case AMULD:
+               return 0x20a << 15
+       case ASUBF:
+               return 0x205 << 15
+       case ASUBD:
+               return 0x206 << 15
+       case AADDF:
+               return 0x201 << 15
+       case AADDD:
+               return 0x202 << 15
+       case ATRUNCFV:
+               return 0x46a9 << 10
+       case ATRUNCDV:
+               return 0x46aa << 10
+       case ATRUNCFW:
+               return 0x46a1 << 10
+       case ATRUNCDW:
+               return 0x46a2 << 10
+       case AMOVFV:
+               return 0x46c9 << 10
+       case AMOVDV:
+               return 0x46ca << 10
+       case AMOVVF:
+               return 0x4746 << 10
+       case AMOVVD:
+               return 0x474a << 10
+       case AMOVFW:
+               return 0x46c1 << 10
+       case AMOVDW:
+               return 0x46c2 << 10
+       case AMOVWF:
+               return 0x4744 << 10
+       case AMOVDF:
+               return 0x4646 << 10
+       case AMOVWD:
+               return 0x4748 << 10
+       case AMOVFD:
+               return 0x4649 << 10
+       case AABSF:
+               return 0x4501 << 10
+       case AABSD:
+               return 0x4502 << 10
+       case AMOVF:
+               return 0x4525 << 10
+       case AMOVD:
+               return 0x4526 << 10
+       case ANEGF:
+               return 0x4505 << 10
+       case ANEGD:
+               return 0x4506 << 10
+       case ACMPEQF:
+               return 0x0c1<<20 | 0x4<<15 // FCMP.CEQ.S
+       case ACMPEQD:
+               return 0x0c2<<20 | 0x4<<15 // FCMP.CEQ.D
+       case ACMPGED:
+               return 0x0c2<<20 | 0x7<<15 // FCMP.SLE.D
+       case ACMPGEF:
+               return 0x0c1<<20 | 0x7<<15 // FCMP.SLE.S
+       case ACMPGTD:
+               return 0x0c2<<20 | 0x3<<15 // FCMP.SLT.D
+       case ACMPGTF:
+               return 0x0c1<<20 | 0x3<<15 // FCMP.SLT.S
+
+       case ASQRTF:
+               return 0x4511 << 10
+       case ASQRTD:
+               return 0x4512 << 10
+
+       case ADBAR:
+               return 0x70e4 << 15
+       case ANOOP:
+               // andi r0, r0, 0
+               return 0x03400000
+       }
+
+       if a < 0 {
+               c.ctxt.Diag("bad rrr opcode -%v", -a)
+       } else {
+               c.ctxt.Diag("bad rrr opcode %v", a)
+       }
+       return 0
+}
+
+func (c *ctxt0) oprr(a obj.As) uint32 {
+       switch a {
+       case ACLO:
+               return 0x4 << 10
+       case ACLZ:
+               return 0x5 << 10
+       }
+
+       c.ctxt.Diag("bad rr opcode %v", a)
+       return 0
+}
+
+func (c *ctxt0) opir(a obj.As) uint32 {
+       switch a {
+       case ALU12IW:
+               return 0x0a << 25
+       case ALU32ID:
+               return 0x0b << 25
+       case APCADDU12I:
+               return 0x0e << 25
+       }
+       return 0
+}
+
+func (c *ctxt0) opirr(a obj.As) uint32 {
+       switch a {
+       case AADD, AADDU:
+               return 0x00a << 22
+       case ASGT:
+               return 0x008 << 22
+       case ASGTU:
+               return 0x009 << 22
+       case AAND:
+               return 0x00d << 22
+       case AOR:
+               return 0x00e << 22
+       case ALU52ID:
+               return 0x00c << 22
+       case AXOR:
+               return 0x00f << 22
+       case ASLL:
+               return 0x00081 << 15
+       case ASRL:
+               return 0x00089 << 15
+       case ASRA:
+               return 0x00091 << 15
+       case AADDV:
+               return 0x00b << 22
+       case AADDVU:
+               return 0x00b << 22
+
+       case AJMP:
+               return 0x14 << 26
+       case AJAL,
+               obj.ADUFFZERO,
+               obj.ADUFFCOPY:
+               return 0x15 << 26
+
+       case AJIRL:
+               return 0x13 << 26
+       case ABLTU:
+               return 0x1a << 26
+       case ABLT, ABLTZ, ABGTZ:
+               return 0x18 << 26
+       case ABGEU:
+               return 0x1b << 26
+       case ABGE, ABGEZ, ABLEZ:
+               return 0x19 << 26
+       case ABEQ:
+               return 0x16 << 26
+       case ABNE:
+               return 0x17 << 26
+       case ABFPT:
+               return 0x12<<26 | 0x1<<8
+       case ABFPF:
+               return 0x12<<26 | 0x0<<8
+
+       case AMOVB,
+               AMOVBU:
+               return 0x0a4 << 22
+       case AMOVH,
+               AMOVHU:
+               return 0x0a5 << 22
+       case AMOVW,
+               AMOVWU:
+               return 0x0a6 << 22
+       case AMOVV:
+               return 0x0a7 << 22
+       case AMOVF:
+               return 0x0ad << 22
+       case AMOVD:
+               return 0x0af << 22
+       case AMOVWL:
+               return 0x0bc << 22
+       case AMOVWR:
+               return 0x0bd << 22
+       case AMOVVL:
+               return 0x0be << 22
+       case AMOVVR:
+               return 0x0bf << 22
+
+       case ABREAK:
+               return 0x018 << 22
+
+       case -AMOVWL:
+               return 0x0b8 << 22
+       case -AMOVWR:
+               return 0x0b9 << 22
+       case -AMOVVL:
+               return 0x0ba << 22
+       case -AMOVVR:
+               return 0x0bb << 22
+       case -AMOVB:
+               return 0x0a0 << 22
+       case -AMOVBU:
+               return 0x0a8 << 22
+       case -AMOVH:
+               return 0x0a1 << 22
+       case -AMOVHU:
+               return 0x0a9 << 22
+       case -AMOVW:
+               return 0x0a2 << 22
+       case -AMOVWU:
+               return 0x0aa << 22
+       case -AMOVV:
+               return 0x0a3 << 22
+       case -AMOVF:
+               return 0x0ac << 22
+       case -AMOVD:
+               return 0x0ae << 22
+
+       case ASLLV,
+               -ASLLV:
+               return 0x0041 << 16
+       case ASRLV,
+               -ASRLV:
+               return 0x0045 << 16
+       case ASRAV,
+               -ASRAV:
+               return 0x0049 << 16
+       case -ALL:
+               return 0x020 << 24
+       case -ALLV:
+               return 0x022 << 24
+       case ASC:
+               return 0x021 << 24
+       case ASCV:
+               return 0x023 << 24
+       }
+
+       if a < 0 {
+               c.ctxt.Diag("bad irr opcode -%v", -a)
+       } else {
+               c.ctxt.Diag("bad irr opcode %v", a)
+       }
+       return 0
+}
+
+func vshift(a obj.As) bool {
+       switch a {
+       case ASLLV,
+               ASRLV,
+               ASRAV:
+               return true
+       }
+       return false
+}
diff --git a/src/cmd/internal/obj/loong64/cnames.go b/src/cmd/internal/obj/loong64/cnames.go
new file mode 100644 (file)
index 0000000..f397077
--- /dev/null
@@ -0,0 +1,43 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package loong64
+
+var cnames0 = []string{
+       "NONE",
+       "REG",
+       "FREG",
+       "FCREG",
+       "FCSRREG",
+       "FCCREG",
+       "ZCON",
+       "SCON",
+       "UCON",
+       "ADD0CON",
+       "AND0CON",
+       "ADDCON",
+       "ANDCON",
+       "LCON",
+       "DCON",
+       "SACON",
+       "SECON",
+       "LACON",
+       "LECON",
+       "DACON",
+       "STCON",
+       "SBRA",
+       "LBRA",
+       "SAUTO",
+       "LAUTO",
+       "SEXT",
+       "LEXT",
+       "ZOREG",
+       "SOREG",
+       "LOREG",
+       "GOK",
+       "ADDR",
+       "TLS",
+       "TEXTSIZE",
+       "NCLASS",
+}
diff --git a/src/cmd/internal/obj/loong64/list.go b/src/cmd/internal/obj/loong64/list.go
new file mode 100644 (file)
index 0000000..4890430
--- /dev/null
@@ -0,0 +1,46 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package loong64
+
+import (
+       "cmd/internal/obj"
+       "fmt"
+)
+
+func init() {
+       obj.RegisterRegister(obj.RBaseLOONG64, REG_LAST+1, rconv)
+       obj.RegisterOpcode(obj.ABaseLoong64, Anames)
+}
+
+func rconv(r int) string {
+       if r == 0 {
+               return "NONE"
+       }
+       if r == REGG {
+               // Special case.
+               return "g"
+       }
+       if REG_R0 <= r && r <= REG_R31 {
+               return fmt.Sprintf("R%d", r-REG_R0)
+       }
+       if REG_F0 <= r && r <= REG_F31 {
+               return fmt.Sprintf("F%d", r-REG_F0)
+       }
+       if REG_FCSR0 <= r && r <= REG_FCSR31 {
+               return fmt.Sprintf("FCSR%d", r-REG_FCSR0)
+       }
+       if REG_FCC0 <= r && r <= REG_FCC31 {
+               return fmt.Sprintf("FCC%d", r-REG_FCC0)
+       }
+       return fmt.Sprintf("Rgok(%d)", r-obj.RBaseLOONG64)
+}
+
+func DRconv(a int) string {
+       s := "C_??"
+       if a >= C_NONE && a <= C_NCLASS {
+               s = cnames0[a]
+       }
+       return s
+}
diff --git a/src/cmd/internal/obj/loong64/obj.go b/src/cmd/internal/obj/loong64/obj.go
new file mode 100644 (file)
index 0000000..f8cc5f2
--- /dev/null
@@ -0,0 +1,701 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package loong64
+
+import (
+       "cmd/internal/obj"
+       "cmd/internal/objabi"
+       "cmd/internal/sys"
+
+       "math"
+)
+
+func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
+       // Rewrite JMP/JAL to symbol as TYPE_BRANCH.
+       switch p.As {
+       case AJMP,
+               AJAL,
+               ARET,
+               obj.ADUFFZERO,
+               obj.ADUFFCOPY:
+               if p.To.Sym != nil {
+                       p.To.Type = obj.TYPE_BRANCH
+               }
+       }
+
+       // Rewrite float constants to values stored in memory.
+       switch p.As {
+       case AMOVF:
+               if p.From.Type == obj.TYPE_FCONST {
+                       f32 := float32(p.From.Val.(float64))
+                       if math.Float32bits(f32) == 0 {
+                               p.As = AMOVW
+                               p.From.Type = obj.TYPE_REG
+                               p.From.Reg = REGZERO
+                               break
+                       }
+                       p.From.Type = obj.TYPE_MEM
+                       p.From.Sym = ctxt.Float32Sym(f32)
+                       p.From.Name = obj.NAME_EXTERN
+                       p.From.Offset = 0
+               }
+
+       case AMOVD:
+               if p.From.Type == obj.TYPE_FCONST {
+                       f64 := p.From.Val.(float64)
+                       if math.Float64bits(f64) == 0 {
+                               p.As = AMOVV
+                               p.From.Type = obj.TYPE_REG
+                               p.From.Reg = REGZERO
+                               break
+                       }
+                       p.From.Type = obj.TYPE_MEM
+                       p.From.Sym = ctxt.Float64Sym(f64)
+                       p.From.Name = obj.NAME_EXTERN
+                       p.From.Offset = 0
+               }
+       }
+
+       // Rewrite SUB constants into ADD.
+       switch p.As {
+       case ASUB:
+               if p.From.Type == obj.TYPE_CONST {
+                       p.From.Offset = -p.From.Offset
+                       p.As = AADD
+               }
+
+       case ASUBU:
+               if p.From.Type == obj.TYPE_CONST {
+                       p.From.Offset = -p.From.Offset
+                       p.As = AADDU
+               }
+
+       case ASUBV:
+               if p.From.Type == obj.TYPE_CONST {
+                       p.From.Offset = -p.From.Offset
+                       p.As = AADDV
+               }
+
+       case ASUBVU:
+               if p.From.Type == obj.TYPE_CONST {
+                       p.From.Offset = -p.From.Offset
+                       p.As = AADDVU
+               }
+       }
+}
+
+func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
+       c := ctxt0{ctxt: ctxt, newprog: newprog, cursym: cursym}
+
+       p := c.cursym.Func().Text
+       textstksiz := p.To.Offset
+
+       if textstksiz < 0 {
+               c.ctxt.Diag("negative frame size %d - did you mean NOFRAME?", textstksiz)
+       }
+       if p.From.Sym.NoFrame() {
+               if textstksiz != 0 {
+                       c.ctxt.Diag("NOFRAME functions must have a frame size of 0, not %d", textstksiz)
+               }
+       }
+
+       c.cursym.Func().Args = p.To.Val.(int32)
+       c.cursym.Func().Locals = int32(textstksiz)
+
+       /*
+        * find leaf subroutines
+        * expand RET
+        */
+
+       for p := c.cursym.Func().Text; p != nil; p = p.Link {
+               switch p.As {
+               case obj.ATEXT:
+                       p.Mark |= LABEL | LEAF | SYNC
+                       if p.Link != nil {
+                               p.Link.Mark |= LABEL
+                       }
+
+               case AMOVW,
+                       AMOVV:
+                       if p.To.Type == obj.TYPE_REG && p.To.Reg >= REG_SPECIAL {
+                               p.Mark |= LABEL | SYNC
+                               break
+                       }
+                       if p.From.Type == obj.TYPE_REG && p.From.Reg >= REG_SPECIAL {
+                               p.Mark |= LABEL | SYNC
+                       }
+
+               case ASYSCALL,
+                       AWORD:
+                       p.Mark |= LABEL | SYNC
+
+               case ANOR:
+                       if p.To.Type == obj.TYPE_REG {
+                               if p.To.Reg == REGZERO {
+                                       p.Mark |= LABEL | SYNC
+                               }
+                       }
+
+               case AJAL,
+                       obj.ADUFFZERO,
+                       obj.ADUFFCOPY:
+                       c.cursym.Func().Text.Mark &^= LEAF
+                       fallthrough
+
+               case AJMP,
+                       ABEQ,
+                       ABGEU,
+                       ABLTU,
+                       ABLTZ,
+                       ABNE,
+                       ABFPT, ABFPF:
+                       p.Mark |= BRANCH
+                       q1 := p.To.Target()
+                       if q1 != nil {
+                               for q1.As == obj.ANOP {
+                                       q1 = q1.Link
+                                       p.To.SetTarget(q1)
+                               }
+
+                               if q1.Mark&LEAF == 0 {
+                                       q1.Mark |= LABEL
+                               }
+                       }
+                       q1 = p.Link
+                       if q1 != nil {
+                               q1.Mark |= LABEL
+                       }
+
+               case ARET:
+                       if p.Link != nil {
+                               p.Link.Mark |= LABEL
+                       }
+               }
+       }
+
+       var mov, add obj.As
+
+       add = AADDV
+       mov = AMOVV
+
+       var q *obj.Prog
+       var q1 *obj.Prog
+       autosize := int32(0)
+       var p1 *obj.Prog
+       var p2 *obj.Prog
+       for p := c.cursym.Func().Text; p != nil; p = p.Link {
+               o := p.As
+               switch o {
+               case obj.ATEXT:
+                       autosize = int32(textstksiz)
+
+                       if p.Mark&LEAF != 0 && autosize == 0 {
+                               // A leaf function with no locals has no frame.
+                               p.From.Sym.Set(obj.AttrNoFrame, true)
+                       }
+
+                       if !p.From.Sym.NoFrame() {
+                               // If there is a stack frame at all, it includes
+                               // space to save the LR.
+                               autosize += int32(c.ctxt.Arch.FixedFrameSize)
+                       }
+
+                       if autosize&4 != 0 {
+                               autosize += 4
+                       }
+
+                       if autosize == 0 && c.cursym.Func().Text.Mark&LEAF == 0 {
+                               if c.cursym.Func().Text.From.Sym.NoSplit() {
+                                       if ctxt.Debugvlog {
+                                               ctxt.Logf("save suppressed in: %s\n", c.cursym.Name)
+                                       }
+
+                                       c.cursym.Func().Text.Mark |= LEAF
+                               }
+                       }
+
+                       p.To.Offset = int64(autosize) - ctxt.Arch.FixedFrameSize
+
+                       if c.cursym.Func().Text.Mark&LEAF != 0 {
+                               c.cursym.Set(obj.AttrLeaf, true)
+                               if p.From.Sym.NoFrame() {
+                                       break
+                               }
+                       }
+
+                       if !p.From.Sym.NoSplit() {
+                               p = c.stacksplit(p, autosize) // emit split check
+                       }
+
+                       q = p
+
+                       if autosize != 0 {
+                               // Make sure to save link register for non-empty frame, even if
+                               // it is a leaf function, so that traceback works.
+                               // Store link register before decrement SP, so if a signal comes
+                               // during the execution of the function prologue, the traceback
+                               // code will not see a half-updated stack frame.
+                               // This sequence is not async preemptible, as if we open a frame
+                               // at the current SP, it will clobber the saved LR.
+                               q = c.ctxt.StartUnsafePoint(q, c.newprog)
+
+                               q = obj.Appendp(q, newprog)
+                               q.As = mov
+                               q.Pos = p.Pos
+                               q.From.Type = obj.TYPE_REG
+                               q.From.Reg = REGLINK
+                               q.To.Type = obj.TYPE_MEM
+                               q.To.Offset = int64(-autosize)
+                               q.To.Reg = REGSP
+
+                               q = obj.Appendp(q, newprog)
+                               q.As = add
+                               q.Pos = p.Pos
+                               q.From.Type = obj.TYPE_CONST
+                               q.From.Offset = int64(-autosize)
+                               q.To.Type = obj.TYPE_REG
+                               q.To.Reg = REGSP
+                               q.Spadj = +autosize
+
+                               q = c.ctxt.EndUnsafePoint(q, c.newprog, -1)
+                       }
+
+                       if c.cursym.Func().Text.From.Sym.Wrapper() && c.cursym.Func().Text.Mark&LEAF == 0 {
+                               // if(g->panic != nil && g->panic->argp == FP) g->panic->argp = bottom-of-frame
+                               //
+                               //      MOV     g_panic(g), R1
+                               //      BEQ     R1, end
+                               //      MOV     panic_argp(R1), R2
+                               //      ADD     $(autosize+FIXED_FRAME), R29, R3
+                               //      BNE     R2, R3, end
+                               //      ADD     $FIXED_FRAME, R29, R2
+                               //      MOV     R2, panic_argp(R1)
+                               // end:
+                               //      NOP
+                               //
+                               // The NOP is needed to give the jumps somewhere to land.
+                               // It is a liblink NOP, not an hardware NOP: it encodes to 0 instruction bytes.
+                               //
+                               // We don't generate this for leafs because that means the wrapped
+                               // function was inlined into the wrapper.
+
+                               q = obj.Appendp(q, newprog)
+
+                               q.As = mov
+                               q.From.Type = obj.TYPE_MEM
+                               q.From.Reg = REGG
+                               q.From.Offset = 4 * int64(c.ctxt.Arch.PtrSize) // G.panic
+                               q.To.Type = obj.TYPE_REG
+                               q.To.Reg = REG_R19
+
+                               q = obj.Appendp(q, newprog)
+                               q.As = ABEQ
+                               q.From.Type = obj.TYPE_REG
+                               q.From.Reg = REG_R19
+                               q.To.Type = obj.TYPE_BRANCH
+                               q.Mark |= BRANCH
+                               p1 = q
+
+                               q = obj.Appendp(q, newprog)
+                               q.As = mov
+                               q.From.Type = obj.TYPE_MEM
+                               q.From.Reg = REG_R19
+                               q.From.Offset = 0 // Panic.argp
+                               q.To.Type = obj.TYPE_REG
+                               q.To.Reg = REG_R4
+
+                               q = obj.Appendp(q, newprog)
+                               q.As = add
+                               q.From.Type = obj.TYPE_CONST
+                               q.From.Offset = int64(autosize) + ctxt.Arch.FixedFrameSize
+                               q.Reg = REGSP
+                               q.To.Type = obj.TYPE_REG
+                               q.To.Reg = REG_R5
+
+                               q = obj.Appendp(q, newprog)
+                               q.As = ABNE
+                               q.From.Type = obj.TYPE_REG
+                               q.From.Reg = REG_R4
+                               q.Reg = REG_R5
+                               q.To.Type = obj.TYPE_BRANCH
+                               q.Mark |= BRANCH
+                               p2 = q
+
+                               q = obj.Appendp(q, newprog)
+                               q.As = add
+                               q.From.Type = obj.TYPE_CONST
+                               q.From.Offset = ctxt.Arch.FixedFrameSize
+                               q.Reg = REGSP
+                               q.To.Type = obj.TYPE_REG
+                               q.To.Reg = REG_R4
+
+                               q = obj.Appendp(q, newprog)
+                               q.As = mov
+                               q.From.Type = obj.TYPE_REG
+                               q.From.Reg = REG_R4
+                               q.To.Type = obj.TYPE_MEM
+                               q.To.Reg = REG_R19
+                               q.To.Offset = 0 // Panic.argp
+
+                               q = obj.Appendp(q, newprog)
+
+                               q.As = obj.ANOP
+                               p1.To.SetTarget(q)
+                               p2.To.SetTarget(q)
+                       }
+
+               case ARET:
+                       if p.From.Type == obj.TYPE_CONST {
+                               ctxt.Diag("using BECOME (%v) is not supported!", p)
+                               break
+                       }
+
+                       retSym := p.To.Sym
+                       p.To.Name = obj.NAME_NONE // clear fields as we may modify p to other instruction
+                       p.To.Sym = nil
+
+                       if c.cursym.Func().Text.Mark&LEAF != 0 {
+                               if autosize == 0 {
+                                       p.As = AJMP
+                                       p.From = obj.Addr{}
+                                       if retSym != nil { // retjmp
+                                               p.To.Type = obj.TYPE_BRANCH
+                                               p.To.Name = obj.NAME_EXTERN
+                                               p.To.Sym = retSym
+                                       } else {
+                                               p.To.Type = obj.TYPE_MEM
+                                               p.To.Reg = REGLINK
+                                               p.To.Offset = 0
+                                       }
+                                       p.Mark |= BRANCH
+                                       break
+                               }
+
+                               p.As = add
+                               p.From.Type = obj.TYPE_CONST
+                               p.From.Offset = int64(autosize)
+                               p.To.Type = obj.TYPE_REG
+                               p.To.Reg = REGSP
+                               p.Spadj = -autosize
+
+                               q = c.newprog()
+                               q.As = AJMP
+                               q.Pos = p.Pos
+                               if retSym != nil { // retjmp
+                                       q.To.Type = obj.TYPE_BRANCH
+                                       q.To.Name = obj.NAME_EXTERN
+                                       q.To.Sym = retSym
+                               } else {
+                                       q.To.Type = obj.TYPE_MEM
+                                       q.To.Offset = 0
+                                       q.To.Reg = REGLINK
+                               }
+                               q.Mark |= BRANCH
+                               q.Spadj = +autosize
+
+                               q.Link = p.Link
+                               p.Link = q
+                               break
+                       }
+
+                       p.As = mov
+                       p.From.Type = obj.TYPE_MEM
+                       p.From.Offset = 0
+                       p.From.Reg = REGSP
+                       p.To.Type = obj.TYPE_REG
+                       p.To.Reg = REGLINK
+
+                       if autosize != 0 {
+                               q = c.newprog()
+                               q.As = add
+                               q.Pos = p.Pos
+                               q.From.Type = obj.TYPE_CONST
+                               q.From.Offset = int64(autosize)
+                               q.To.Type = obj.TYPE_REG
+                               q.To.Reg = REGSP
+                               q.Spadj = -autosize
+
+                               q.Link = p.Link
+                               p.Link = q
+                       }
+
+                       q1 = c.newprog()
+                       q1.As = AJMP
+                       q1.Pos = p.Pos
+                       if retSym != nil { // retjmp
+                               q1.To.Type = obj.TYPE_BRANCH
+                               q1.To.Name = obj.NAME_EXTERN
+                               q1.To.Sym = retSym
+                       } else {
+                               q1.To.Type = obj.TYPE_MEM
+                               q1.To.Offset = 0
+                               q1.To.Reg = REGLINK
+                       }
+                       q1.Mark |= BRANCH
+                       q1.Spadj = +autosize
+
+                       q1.Link = q.Link
+                       q.Link = q1
+
+               case AADD,
+                       AADDU,
+                       AADDV,
+                       AADDVU:
+                       if p.To.Type == obj.TYPE_REG && p.To.Reg == REGSP && p.From.Type == obj.TYPE_CONST {
+                               p.Spadj = int32(-p.From.Offset)
+                       }
+
+               case obj.AGETCALLERPC:
+                       if cursym.Leaf() {
+                               // MOV LR, Rd
+                               p.As = mov
+                               p.From.Type = obj.TYPE_REG
+                               p.From.Reg = REGLINK
+                       } else {
+                               // MOV (RSP), Rd
+                               p.As = mov
+                               p.From.Type = obj.TYPE_MEM
+                               p.From.Reg = REGSP
+                       }
+               }
+       }
+}
+
+func (c *ctxt0) stacksplit(p *obj.Prog, framesize int32) *obj.Prog {
+       var mov, add obj.As
+
+       add = AADDV
+       mov = AMOVV
+       if c.ctxt.Flag_maymorestack != "" {
+               // Save LR and REGCTXT.
+               frameSize := 2 * c.ctxt.Arch.PtrSize
+
+               p = c.ctxt.StartUnsafePoint(p, c.newprog)
+
+               // MOV  REGLINK, -8/-16(SP)
+               p = obj.Appendp(p, c.newprog)
+               p.As = mov
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = REGLINK
+               p.To.Type = obj.TYPE_MEM
+               p.To.Offset = int64(-frameSize)
+               p.To.Reg = REGSP
+
+               // MOV  REGCTXT, -4/-8(SP)
+               p = obj.Appendp(p, c.newprog)
+               p.As = mov
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = REGCTXT
+               p.To.Type = obj.TYPE_MEM
+               p.To.Offset = -int64(c.ctxt.Arch.PtrSize)
+               p.To.Reg = REGSP
+
+               // ADD  $-8/$-16, SP
+               p = obj.Appendp(p, c.newprog)
+               p.As = add
+               p.From.Type = obj.TYPE_CONST
+               p.From.Offset = int64(-frameSize)
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = REGSP
+               p.Spadj = int32(frameSize)
+
+               // JAL  maymorestack
+               p = obj.Appendp(p, c.newprog)
+               p.As = AJAL
+               p.To.Type = obj.TYPE_BRANCH
+               // See ../x86/obj6.go
+               p.To.Sym = c.ctxt.LookupABI(c.ctxt.Flag_maymorestack, c.cursym.ABI())
+               p.Mark |= BRANCH
+
+               // Restore LR and REGCTXT.
+
+               // MOV  0(SP), REGLINK
+               p = obj.Appendp(p, c.newprog)
+               p.As = mov
+               p.From.Type = obj.TYPE_MEM
+               p.From.Offset = 0
+               p.From.Reg = REGSP
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = REGLINK
+
+               // MOV  4/8(SP), REGCTXT
+               p = obj.Appendp(p, c.newprog)
+               p.As = mov
+               p.From.Type = obj.TYPE_MEM
+               p.From.Offset = int64(c.ctxt.Arch.PtrSize)
+               p.From.Reg = REGSP
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = REGCTXT
+
+               // ADD  $8/$16, SP
+               p = obj.Appendp(p, c.newprog)
+               p.As = add
+               p.From.Type = obj.TYPE_CONST
+               p.From.Offset = int64(frameSize)
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = REGSP
+               p.Spadj = int32(-frameSize)
+
+               p = c.ctxt.EndUnsafePoint(p, c.newprog, -1)
+       }
+
+       // Jump back to here after morestack returns.
+       startPred := p
+
+       // MOV  g_stackguard(g), R19
+       p = obj.Appendp(p, c.newprog)
+
+       p.As = mov
+       p.From.Type = obj.TYPE_MEM
+       p.From.Reg = REGG
+       p.From.Offset = 2 * int64(c.ctxt.Arch.PtrSize) // G.stackguard0
+       if c.cursym.CFunc() {
+               p.From.Offset = 3 * int64(c.ctxt.Arch.PtrSize) // G.stackguard1
+       }
+       p.To.Type = obj.TYPE_REG
+       p.To.Reg = REG_R19
+
+       // Mark the stack bound check and morestack call async nonpreemptible.
+       // If we get preempted here, when resumed the preemption request is
+       // cleared, but we'll still call morestack, which will double the stack
+       // unnecessarily. See issue #35470.
+       p = c.ctxt.StartUnsafePoint(p, c.newprog)
+
+       var q *obj.Prog
+       if framesize <= objabi.StackSmall {
+               // small stack: SP < stackguard
+               //      AGTU    SP, stackguard, R19
+               p = obj.Appendp(p, c.newprog)
+
+               p.As = ASGTU
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = REGSP
+               p.Reg = REG_R19
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = REG_R19
+       } else {
+               // large stack: SP-framesize < stackguard-StackSmall
+               offset := int64(framesize) - objabi.StackSmall
+               if framesize > objabi.StackBig {
+                       // Such a large stack we need to protect against underflow.
+                       // The runtime guarantees SP > objabi.StackBig, but
+                       // framesize is large enough that SP-framesize may
+                       // underflow, causing a direct comparison with the
+                       // stack guard to incorrectly succeed. We explicitly
+                       // guard against underflow.
+                       //
+                       //      SGTU    $(framesize-StackSmall), SP, R4
+                       //      BNE     R4, label-of-call-to-morestack
+
+                       p = obj.Appendp(p, c.newprog)
+                       p.As = ASGTU
+                       p.From.Type = obj.TYPE_CONST
+                       p.From.Offset = offset
+                       p.Reg = REGSP
+                       p.To.Type = obj.TYPE_REG
+                       p.To.Reg = REG_R4
+
+                       p = obj.Appendp(p, c.newprog)
+                       q = p
+                       p.As = ABNE
+                       p.From.Type = obj.TYPE_REG
+                       p.From.Reg = REG_R4
+                       p.To.Type = obj.TYPE_BRANCH
+                       p.Mark |= BRANCH
+               }
+
+               p = obj.Appendp(p, c.newprog)
+
+               p.As = add
+               p.From.Type = obj.TYPE_CONST
+               p.From.Offset = -offset
+               p.Reg = REGSP
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = REG_R4
+
+               p = obj.Appendp(p, c.newprog)
+               p.As = ASGTU
+               p.From.Type = obj.TYPE_REG
+               p.From.Reg = REG_R4
+               p.Reg = REG_R19
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = REG_R19
+       }
+
+       // q1: BNE      R19, done
+       p = obj.Appendp(p, c.newprog)
+       q1 := p
+
+       p.As = ABNE
+       p.From.Type = obj.TYPE_REG
+       p.From.Reg = REG_R19
+       p.To.Type = obj.TYPE_BRANCH
+       p.Mark |= BRANCH
+
+       // MOV  LINK, R5
+       p = obj.Appendp(p, c.newprog)
+
+       p.As = mov
+       p.From.Type = obj.TYPE_REG
+       p.From.Reg = REGLINK
+       p.To.Type = obj.TYPE_REG
+       p.To.Reg = REG_R5
+       if q != nil {
+               q.To.SetTarget(p)
+               p.Mark |= LABEL
+       }
+
+       p = c.ctxt.EmitEntryStackMap(c.cursym, p, c.newprog)
+
+       // JAL  runtime.morestack(SB)
+       p = obj.Appendp(p, c.newprog)
+
+       p.As = AJAL
+       p.To.Type = obj.TYPE_BRANCH
+       if c.cursym.CFunc() {
+               p.To.Sym = c.ctxt.Lookup("runtime.morestackc")
+       } else if !c.cursym.Func().Text.From.Sym.NeedCtxt() {
+               p.To.Sym = c.ctxt.Lookup("runtime.morestack_noctxt")
+       } else {
+               p.To.Sym = c.ctxt.Lookup("runtime.morestack")
+       }
+       p.Mark |= BRANCH
+
+       p = c.ctxt.EndUnsafePoint(p, c.newprog, -1)
+
+       // JMP  start
+       p = obj.Appendp(p, c.newprog)
+
+       p.As = AJMP
+       p.To.Type = obj.TYPE_BRANCH
+       p.To.SetTarget(startPred.Link)
+       startPred.Link.Mark |= LABEL
+       p.Mark |= BRANCH
+
+       // placeholder for q1's jump target
+       p = obj.Appendp(p, c.newprog)
+
+       p.As = obj.ANOP // zero-width place holder
+       q1.To.SetTarget(p)
+
+       return p
+}
+
+func (c *ctxt0) addnop(p *obj.Prog) {
+       q := c.newprog()
+       q.As = ANOOP
+       q.Pos = p.Pos
+       q.Link = p.Link
+       p.Link = q
+}
+
+var Linkloong64 = obj.LinkArch{
+       Arch:           sys.ArchLoong64,
+       Init:           buildop,
+       Preprocess:     preprocess,
+       Assemble:       span0,
+       Progedit:       progedit,
+       DWARFRegisters: LOONG64DWARFRegisters,
+}
index 4e1a2d19b676a5090a6688b9c2aa210a19ed3d89..55b3dd26edc3daaa9d16deefb4e462cf576c057e 100644 (file)
@@ -507,15 +507,16 @@ var regSpace []regSet
 const (
        // Because of masking operations in the encodings, each register
        // space should start at 0 modulo some power of 2.
-       RBase386   = 1 * 1024
-       RBaseAMD64 = 2 * 1024
-       RBaseARM   = 3 * 1024
-       RBasePPC64 = 4 * 1024  // range [4k, 8k)
-       RBaseARM64 = 8 * 1024  // range [8k, 13k)
-       RBaseMIPS  = 13 * 1024 // range [13k, 14k)
-       RBaseS390X = 14 * 1024 // range [14k, 15k)
-       RBaseRISCV = 15 * 1024 // range [15k, 16k)
-       RBaseWasm  = 16 * 1024
+       RBase386     = 1 * 1024
+       RBaseAMD64   = 2 * 1024
+       RBaseARM     = 3 * 1024
+       RBasePPC64   = 4 * 1024  // range [4k, 8k)
+       RBaseARM64   = 8 * 1024  // range [8k, 13k)
+       RBaseMIPS    = 13 * 1024 // range [13k, 14k)
+       RBaseS390X   = 14 * 1024 // range [14k, 15k)
+       RBaseRISCV   = 15 * 1024 // range [15k, 16k)
+       RBaseWasm    = 16 * 1024
+       RBaseLOONG64 = 17 * 1024
 )
 
 // RegisterRegister binds a pretty-printer (Rconv) for register