}
func genLoong64(g *gen) {
- p := g.p
+ const xReg = "R4" // *xRegState
+
+ p, label := g.p, g.label
mov := "MOVV"
- movf := "MOVD"
add := "ADDV"
sub := "SUBV"
regsize := 8
l.add(mov, reg, regsize)
}
- // Add floating point registers F0-F31.
- for i := 0; i <= 31; i++ {
- reg := fmt.Sprintf("F%d", i)
- l.add(movf, reg, regsize)
- }
-
// Add condition flag register fcc0-fcc7
sv := ""
rs := ""
mov+" %d(R3), R5\n"+rs,
regsize)
+ // Create layouts for lasx, lsx and fp registers.
+ lasxRegs := layout{sp: xReg}
+ lsxRegs := lasxRegs
+ fpRegs := lasxRegs
+ for i := 0; i <= 31; i++ {
+ lasxRegs.add("XVMOVQ", fmt.Sprintf("X%d", i), 256/8)
+ lsxRegs.add("VMOVQ", fmt.Sprintf("V%d", i), 128/8)
+ fpRegs.add("MOVD", fmt.Sprintf("F%d", i), 64/8)
+ }
+
+ for i := range lsxRegs.regs {
+ for j := range lsxRegs.regs[i].regs {
+ lsxRegs.regs[i].regs[j].pos = lasxRegs.regs[i].regs[j].pos
+ fpRegs.regs[i].regs[j].pos = lasxRegs.regs[i].regs[j].pos
+ }
+ }
+ writeXRegs(g.goarch, &lasxRegs)
+
// allocate frame, save PC of interrupted instruction (in LR)
p(mov+" R1, -%d(R3)", l.stack)
p(sub+" $%d, R3", l.stack)
+ p("// Save GPs")
l.save(g)
+
+ p("// Save extended register state to p.xRegs.scratch")
+ p("MOVV g_m(g), %s", xReg)
+ p("MOVV m_p(%s), %s", xReg, xReg)
+ p("ADDV $(p_xRegs+xRegPerP_scratch), %s, %s", xReg, xReg)
+
+ p("MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLASX(SB), R5")
+ p("BNE R5, saveLASX")
+
+ p("MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R5")
+ p("BNE R5, saveLSX")
+
+ label("saveFP:")
+ fpRegs.save(g)
+ p("JMP preempt")
+
+ label("saveLSX:")
+ lsxRegs.save(g)
+ p("JMP preempt")
+
+ label("saveLASX:")
+ lasxRegs.save(g)
+
+ label("preempt:")
p("CALL ·asyncPreempt2(SB)")
+
+ p("// Restore non-GPs from *p.xRegs.cache")
+ p("MOVV g_m(g), %s", xReg)
+ p("MOVV m_p(%s), %s", xReg, xReg)
+ p("MOVV (p_xRegs+xRegPerP_cache)(%s), %s", xReg, xReg)
+
+ p("MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLASX(SB), R5")
+ p("BNE R5, restoreLASX")
+
+ p("MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R5")
+ p("BNE R5, restoreLSX")
+
+ label("restoreFP:")
+ fpRegs.restore(g)
+ p("JMP restoreGPs")
+
+ label("restoreLSX:")
+ lsxRegs.restore(g)
+ p("JMP restoreGPs")
+
+ label("restoreLASX:")
+ lasxRegs.restore(g)
+
+ p("// Restore GPs")
+ label("restoreGPs:")
l.restore(g)
p(mov+" %d(R3), R1", l.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
#include "textflag.h"
TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
- MOVV R1, -480(R3)
- SUBV $480, R3
+ MOVV R1, -224(R3)
+ SUBV $224, R3
+ // Save GPs
MOVV R4, 8(R3)
MOVV R5, 16(R3)
MOVV R6, 24(R3)
MOVV R28, 192(R3)
MOVV R29, 200(R3)
MOVV R31, 208(R3)
- MOVD F0, 216(R3)
- MOVD F1, 224(R3)
- MOVD F2, 232(R3)
- MOVD F3, 240(R3)
- MOVD F4, 248(R3)
- MOVD F5, 256(R3)
- MOVD F6, 264(R3)
- MOVD F7, 272(R3)
- MOVD F8, 280(R3)
- MOVD F9, 288(R3)
- MOVD F10, 296(R3)
- MOVD F11, 304(R3)
- MOVD F12, 312(R3)
- MOVD F13, 320(R3)
- MOVD F14, 328(R3)
- MOVD F15, 336(R3)
- MOVD F16, 344(R3)
- MOVD F17, 352(R3)
- MOVD F18, 360(R3)
- MOVD F19, 368(R3)
- MOVD F20, 376(R3)
- MOVD F21, 384(R3)
- MOVD F22, 392(R3)
- MOVD F23, 400(R3)
- MOVD F24, 408(R3)
- MOVD F25, 416(R3)
- MOVD F26, 424(R3)
- MOVD F27, 432(R3)
- MOVD F28, 440(R3)
- MOVD F29, 448(R3)
- MOVD F30, 456(R3)
- MOVD F31, 464(R3)
MOVV FCC0, R4
BSTRINSV $7, R4, $0, R5
MOVV FCC1, R4
BSTRINSV $55, R4, $48, R5
MOVV FCC7, R4
BSTRINSV $63, R4, $56, R5
- MOVV R5, 472(R3)
+ MOVV R5, 216(R3)
+ // Save extended register state to p.xRegs.scratch
+ MOVV g_m(g), R4
+ MOVV m_p(R4), R4
+ ADDV $(p_xRegs+xRegPerP_scratch), R4, R4
+ MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLASX(SB), R5
+ BNE R5, saveLASX
+ MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R5
+ BNE R5, saveLSX
+saveFP:
+ MOVD F0, 0(R4)
+ MOVD F1, 32(R4)
+ MOVD F2, 64(R4)
+ MOVD F3, 96(R4)
+ MOVD F4, 128(R4)
+ MOVD F5, 160(R4)
+ MOVD F6, 192(R4)
+ MOVD F7, 224(R4)
+ MOVD F8, 256(R4)
+ MOVD F9, 288(R4)
+ MOVD F10, 320(R4)
+ MOVD F11, 352(R4)
+ MOVD F12, 384(R4)
+ MOVD F13, 416(R4)
+ MOVD F14, 448(R4)
+ MOVD F15, 480(R4)
+ MOVD F16, 512(R4)
+ MOVD F17, 544(R4)
+ MOVD F18, 576(R4)
+ MOVD F19, 608(R4)
+ MOVD F20, 640(R4)
+ MOVD F21, 672(R4)
+ MOVD F22, 704(R4)
+ MOVD F23, 736(R4)
+ MOVD F24, 768(R4)
+ MOVD F25, 800(R4)
+ MOVD F26, 832(R4)
+ MOVD F27, 864(R4)
+ MOVD F28, 896(R4)
+ MOVD F29, 928(R4)
+ MOVD F30, 960(R4)
+ MOVD F31, 992(R4)
+ JMP preempt
+saveLSX:
+ VMOVQ V0, 0(R4)
+ VMOVQ V1, 32(R4)
+ VMOVQ V2, 64(R4)
+ VMOVQ V3, 96(R4)
+ VMOVQ V4, 128(R4)
+ VMOVQ V5, 160(R4)
+ VMOVQ V6, 192(R4)
+ VMOVQ V7, 224(R4)
+ VMOVQ V8, 256(R4)
+ VMOVQ V9, 288(R4)
+ VMOVQ V10, 320(R4)
+ VMOVQ V11, 352(R4)
+ VMOVQ V12, 384(R4)
+ VMOVQ V13, 416(R4)
+ VMOVQ V14, 448(R4)
+ VMOVQ V15, 480(R4)
+ VMOVQ V16, 512(R4)
+ VMOVQ V17, 544(R4)
+ VMOVQ V18, 576(R4)
+ VMOVQ V19, 608(R4)
+ VMOVQ V20, 640(R4)
+ VMOVQ V21, 672(R4)
+ VMOVQ V22, 704(R4)
+ VMOVQ V23, 736(R4)
+ VMOVQ V24, 768(R4)
+ VMOVQ V25, 800(R4)
+ VMOVQ V26, 832(R4)
+ VMOVQ V27, 864(R4)
+ VMOVQ V28, 896(R4)
+ VMOVQ V29, 928(R4)
+ VMOVQ V30, 960(R4)
+ VMOVQ V31, 992(R4)
+ JMP preempt
+saveLASX:
+ XVMOVQ X0, 0(R4)
+ XVMOVQ X1, 32(R4)
+ XVMOVQ X2, 64(R4)
+ XVMOVQ X3, 96(R4)
+ XVMOVQ X4, 128(R4)
+ XVMOVQ X5, 160(R4)
+ XVMOVQ X6, 192(R4)
+ XVMOVQ X7, 224(R4)
+ XVMOVQ X8, 256(R4)
+ XVMOVQ X9, 288(R4)
+ XVMOVQ X10, 320(R4)
+ XVMOVQ X11, 352(R4)
+ XVMOVQ X12, 384(R4)
+ XVMOVQ X13, 416(R4)
+ XVMOVQ X14, 448(R4)
+ XVMOVQ X15, 480(R4)
+ XVMOVQ X16, 512(R4)
+ XVMOVQ X17, 544(R4)
+ XVMOVQ X18, 576(R4)
+ XVMOVQ X19, 608(R4)
+ XVMOVQ X20, 640(R4)
+ XVMOVQ X21, 672(R4)
+ XVMOVQ X22, 704(R4)
+ XVMOVQ X23, 736(R4)
+ XVMOVQ X24, 768(R4)
+ XVMOVQ X25, 800(R4)
+ XVMOVQ X26, 832(R4)
+ XVMOVQ X27, 864(R4)
+ XVMOVQ X28, 896(R4)
+ XVMOVQ X29, 928(R4)
+ XVMOVQ X30, 960(R4)
+ XVMOVQ X31, 992(R4)
+preempt:
CALL ·asyncPreempt2(SB)
- MOVV 472(R3), R5
+ // Restore non-GPs from *p.xRegs.cache
+ MOVV g_m(g), R4
+ MOVV m_p(R4), R4
+ MOVV (p_xRegs+xRegPerP_cache)(R4), R4
+ MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLASX(SB), R5
+ BNE R5, restoreLASX
+ MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLSX(SB), R5
+ BNE R5, restoreLSX
+restoreFP:
+ MOVD 992(R4), F31
+ MOVD 960(R4), F30
+ MOVD 928(R4), F29
+ MOVD 896(R4), F28
+ MOVD 864(R4), F27
+ MOVD 832(R4), F26
+ MOVD 800(R4), F25
+ MOVD 768(R4), F24
+ MOVD 736(R4), F23
+ MOVD 704(R4), F22
+ MOVD 672(R4), F21
+ MOVD 640(R4), F20
+ MOVD 608(R4), F19
+ MOVD 576(R4), F18
+ MOVD 544(R4), F17
+ MOVD 512(R4), F16
+ MOVD 480(R4), F15
+ MOVD 448(R4), F14
+ MOVD 416(R4), F13
+ MOVD 384(R4), F12
+ MOVD 352(R4), F11
+ MOVD 320(R4), F10
+ MOVD 288(R4), F9
+ MOVD 256(R4), F8
+ MOVD 224(R4), F7
+ MOVD 192(R4), F6
+ MOVD 160(R4), F5
+ MOVD 128(R4), F4
+ MOVD 96(R4), F3
+ MOVD 64(R4), F2
+ MOVD 32(R4), F1
+ MOVD 0(R4), F0
+ JMP restoreGPs
+restoreLSX:
+ VMOVQ 992(R4), V31
+ VMOVQ 960(R4), V30
+ VMOVQ 928(R4), V29
+ VMOVQ 896(R4), V28
+ VMOVQ 864(R4), V27
+ VMOVQ 832(R4), V26
+ VMOVQ 800(R4), V25
+ VMOVQ 768(R4), V24
+ VMOVQ 736(R4), V23
+ VMOVQ 704(R4), V22
+ VMOVQ 672(R4), V21
+ VMOVQ 640(R4), V20
+ VMOVQ 608(R4), V19
+ VMOVQ 576(R4), V18
+ VMOVQ 544(R4), V17
+ VMOVQ 512(R4), V16
+ VMOVQ 480(R4), V15
+ VMOVQ 448(R4), V14
+ VMOVQ 416(R4), V13
+ VMOVQ 384(R4), V12
+ VMOVQ 352(R4), V11
+ VMOVQ 320(R4), V10
+ VMOVQ 288(R4), V9
+ VMOVQ 256(R4), V8
+ VMOVQ 224(R4), V7
+ VMOVQ 192(R4), V6
+ VMOVQ 160(R4), V5
+ VMOVQ 128(R4), V4
+ VMOVQ 96(R4), V3
+ VMOVQ 64(R4), V2
+ VMOVQ 32(R4), V1
+ VMOVQ 0(R4), V0
+ JMP restoreGPs
+restoreLASX:
+ XVMOVQ 992(R4), X31
+ XVMOVQ 960(R4), X30
+ XVMOVQ 928(R4), X29
+ XVMOVQ 896(R4), X28
+ XVMOVQ 864(R4), X27
+ XVMOVQ 832(R4), X26
+ XVMOVQ 800(R4), X25
+ XVMOVQ 768(R4), X24
+ XVMOVQ 736(R4), X23
+ XVMOVQ 704(R4), X22
+ XVMOVQ 672(R4), X21
+ XVMOVQ 640(R4), X20
+ XVMOVQ 608(R4), X19
+ XVMOVQ 576(R4), X18
+ XVMOVQ 544(R4), X17
+ XVMOVQ 512(R4), X16
+ XVMOVQ 480(R4), X15
+ XVMOVQ 448(R4), X14
+ XVMOVQ 416(R4), X13
+ XVMOVQ 384(R4), X12
+ XVMOVQ 352(R4), X11
+ XVMOVQ 320(R4), X10
+ XVMOVQ 288(R4), X9
+ XVMOVQ 256(R4), X8
+ XVMOVQ 224(R4), X7
+ XVMOVQ 192(R4), X6
+ XVMOVQ 160(R4), X5
+ XVMOVQ 128(R4), X4
+ XVMOVQ 96(R4), X3
+ XVMOVQ 64(R4), X2
+ XVMOVQ 32(R4), X1
+ XVMOVQ 0(R4), X0
+ // Restore GPs
+restoreGPs:
+ MOVV 216(R3), R5
BSTRPICKV $7, R5, $0, R4
MOVV R4, FCC0
BSTRPICKV $15, R5, $8, R4
MOVV R4, FCC6
BSTRPICKV $63, R5, $56, R4
MOVV R4, FCC7
- MOVD 464(R3), F31
- MOVD 456(R3), F30
- MOVD 448(R3), F29
- MOVD 440(R3), F28
- MOVD 432(R3), F27
- MOVD 424(R3), F26
- MOVD 416(R3), F25
- MOVD 408(R3), F24
- MOVD 400(R3), F23
- MOVD 392(R3), F22
- MOVD 384(R3), F21
- MOVD 376(R3), F20
- MOVD 368(R3), F19
- MOVD 360(R3), F18
- MOVD 352(R3), F17
- MOVD 344(R3), F16
- MOVD 336(R3), F15
- MOVD 328(R3), F14
- MOVD 320(R3), F13
- MOVD 312(R3), F12
- MOVD 304(R3), F11
- MOVD 296(R3), F10
- MOVD 288(R3), F9
- MOVD 280(R3), F8
- MOVD 272(R3), F7
- MOVD 264(R3), F6
- MOVD 256(R3), F5
- MOVD 248(R3), F4
- MOVD 240(R3), F3
- MOVD 232(R3), F2
- MOVD 224(R3), F1
- MOVD 216(R3), F0
MOVV 208(R3), R31
MOVV 200(R3), R29
MOVV 192(R3), R28
MOVV 24(R3), R6
MOVV 16(R3), R5
MOVV 8(R3), R4
- MOVV 480(R3), R1
+ MOVV 224(R3), R1
MOVV (R3), R30
- ADDV $488, R3
+ ADDV $232, R3
JMP (R30)