type xRegs struct {
`)
pos := 0
- for _, reg := range l.regs {
- if reg.pos != pos {
- log.Fatalf("padding not implemented")
- }
- typ := fmt.Sprintf("[%d]byte", reg.size)
- switch {
- case reg.size == 4 && reg.pos%4 == 0:
- typ = "uint32"
- case reg.size == 8 && reg.pos%8 == 0:
- typ = "uint64"
+ for _, seq := range l.regs {
+ for _, r := range seq.regs {
+ if r.pos != pos && !seq.fixedOffset {
+ log.Fatalf("padding not implemented")
+ }
+ typ := fmt.Sprintf("[%d]byte", r.size)
+ switch {
+ case r.size == 4 && r.pos%4 == 0:
+ typ = "uint32"
+ case r.size == 8 && r.pos%8 == 0:
+ typ = "uint64"
+ }
+ fmt.Fprintf(g.w, "\t%s %s\n", r.name, typ)
+ pos += r.size
}
- fmt.Fprintf(g.w, "\t%s %s\n", reg.reg, typ)
- pos += reg.size
}
fmt.Fprintf(g.w, "}\n")
type layout struct {
stack int
- regs []regPos
+ regs []regSeq
sp string // stack pointer register
}
-type regPos struct {
- pos, size int
+type regInfo struct {
+ size int // register size in bytes
+ name string // register name
+
+ // Some register names may require a specific suffix.
+ // In ARM64, a suffix called an "arrangement specifier" can be added to
+ // a register name. For example:
+ //
+ // V0.B16
+ //
+ // In this case, "V0" is the register name, and ".B16" is the suffix.
+ suffix string
+ pos int // position on stack
+}
+
+// Some save/restore operations can involve multiple registers in a single
+// instruction. For example, the LDP/STP instructions in ARM64:
+//
+// LDP 8(RSP), (R0, R1)
+// STP (R0, R1), 8(RSP)
+//
+// In these cases, a pair of registers (R0, R1) is used as a single argument.
+type regSeq struct {
saveOp string
restoreOp string
- reg string
+ regs []regInfo
+
+ // By default, all registers are saved on the stack, and the stack pointer offset
+ // is calculated based on the size of each register. For example (ARM64):
+ //
+ // STP (R0, R1), 8(RSP)
+ // STP (R2, R3), 24(RSP)
+ //
+ // However, automatic offset calculation may not always be desirable.
+ // In some cases, the offset must remain fixed:
+ //
+ // VST1.P [V0.B16, V1.B16, V2.B16, V3.B16], 64(R0)
+ // VST1.P [V4.B16, V5.B16, V6.B16, V7.B16], 64(R0)
+ //
+ // In this example, R0 is post-incremented after each instruction,
+ // so the offset should not be recalculated. For such cases,
+ // `fixedOffset` is set to true.
+ fixedOffset bool
+
+ // After conversion to a string, register names are separated by commas
+ // and may be wrapped in a custom pair of brackets. For example (ARM64):
+ //
+ // (R0, R1) // wrapped in parentheses
+ // [V0.B16, V1.B16, V2.B16, V3.B16] // wrapped in square brackets
+ brackets [2]string
// If this register requires special save and restore, these
// give those operations with a %d placeholder for the stack
save, restore string
}
-func (l *layout) add(op, reg string, size int) {
- l.regs = append(l.regs, regPos{saveOp: op, restoreOp: op, reg: reg, pos: l.stack, size: size})
+func (l *layout) add(op, regname string, size int) {
+ l.regs = append(l.regs, regSeq{saveOp: op, restoreOp: op, regs: []regInfo{{size, regname, "", l.stack}}})
l.stack += size
}
-func (l *layout) add2(sop, rop, reg string, size int) {
- l.regs = append(l.regs, regPos{saveOp: sop, restoreOp: rop, reg: reg, pos: l.stack, size: size})
- l.stack += size
+func (l *layout) add2(sop, rop string, regs []regInfo, brackets [2]string, fixedOffset bool) {
+ l.regs = append(l.regs, regSeq{saveOp: sop, restoreOp: rop, regs: regs, brackets: brackets, fixedOffset: fixedOffset})
+ if !fixedOffset {
+ for i := range regs {
+ regs[i].pos = l.stack
+ l.stack += regs[i].size
+ }
+ }
}
func (l *layout) addSpecial(save, restore string, size int) {
- l.regs = append(l.regs, regPos{save: save, restore: restore, pos: l.stack, size: size})
+ l.regs = append(l.regs, regSeq{save: save, restore: restore, regs: []regInfo{{size, "", "", l.stack}}})
l.stack += size
}
+func (rs *regSeq) String() string {
+ switch len(rs.regs) {
+ case 0:
+ log.Fatal("Register sequence must not be empty!")
+ case 1:
+ return rs.regs[0].name
+ default:
+ names := make([]string, 0)
+ for _, r := range rs.regs {
+ name := r.name + r.suffix
+ names = append(names, name)
+ }
+ return rs.brackets[0] + strings.Join(names, ", ") + rs.brackets[1]
+ }
+ return ""
+}
+
func (l *layout) save(g *gen) {
- for _, reg := range l.regs {
- if reg.save != "" {
- g.p(reg.save, reg.pos)
+ for _, seq := range l.regs {
+ if len(seq.regs) < 1 {
+ log.Fatal("Register sequence must not be empty!")
+ }
+ // When dealing with a sequence of registers, we assume that only the position
+ // of the first register is relevant. For example:
+ //
+ // STP (R0, R1), 8(RSP)
+ // STP (R2, R3), 24(RSP)
+ //
+ // Here, R0.pos is 8. While we can infer that R1.pos is 16, it doesn't need to
+ // be explicitly specified, as the STP instruction calculates it automatically.
+ pos := seq.regs[0].pos
+ if seq.save != "" {
+ g.p(seq.save, pos)
} else {
- g.p("%s %s, %d(%s)", reg.saveOp, reg.reg, reg.pos, l.sp)
+ name := seq.String()
+ g.p("%s %s, %d(%s)", seq.saveOp, name, pos, l.sp)
}
}
}
-func (l *layout) restore(g *gen) {
- for i := len(l.regs) - 1; i >= 0; i-- {
- reg := l.regs[i]
+func (l *layout) restoreInOrder(g *gen, reverse bool) {
+ var seq []regSeq
+ if reverse {
+ seq = make([]regSeq, 0)
+ for i := len(l.regs) - 1; i >= 0; i-- {
+ seq = append(seq, l.regs[i])
+ }
+ } else {
+ seq = l.regs
+ }
+ for _, reg := range seq {
+ if len(reg.regs) < 1 {
+ log.Fatal("Register sequence must not be empty!")
+ }
+ pos := reg.regs[0].pos
if reg.restore != "" {
- g.p(reg.restore, reg.pos)
+ g.p(reg.restore, pos)
} else {
- g.p("%s %d(%s), %s", reg.restoreOp, reg.pos, l.sp, reg.reg)
+ g.p("%s %d(%s), %s", reg.restoreOp, pos, l.sp, reg.String())
}
}
}
+func (l *layout) restore(g *gen) {
+ l.restoreInOrder(g, true)
+}
+
+func (l *layout) restoreDirect(g *gen) {
+ l.restoreInOrder(g, false)
+}
+
func gen386(g *gen) {
p := g.p
// We don't have to do this, but it results in a nice Go type. If we split
// this into multiple types, we probably should stop doing this.
for i := range lXRegs.regs {
- lXRegs.regs[i].pos = lZRegs.regs[i].pos
- lYRegs.regs[i].pos = lZRegs.regs[i].pos
+ for j := range lXRegs.regs[i].regs {
+ lXRegs.regs[i].regs[j].pos = lZRegs.regs[i].regs[j].pos
+ lYRegs.regs[i].regs[j].pos = lZRegs.regs[i].regs[j].pos
+ }
+
}
writeXRegs(g.goarch, &lZRegs)
}
func genARM64(g *gen) {
+ const vReg = "R0" // *xRegState
p := g.p
// Add integer registers R0-R26
// R27 (REGTMP), R28 (g), R29 (FP), R30 (LR), R31 (SP) are special
i--
continue // R18 is not used, skip
}
- reg := fmt.Sprintf("(R%d, R%d)", i, i+1)
- l.add2("STP", "LDP", reg, 16)
+ regs := []regInfo{
+ {name: fmt.Sprintf("R%d", i), size: 8},
+ {name: fmt.Sprintf("R%d", i+1), size: 8},
+ }
+ l.add2("STP", "LDP", regs, [2]string{"(", ")"}, false)
}
// Add flag registers.
l.addSpecial(
8)
// TODO: FPCR? I don't think we'll change it, so no need to save.
// Add floating point registers F0-F31.
- for i := 0; i < 31; i += 2 {
- reg := fmt.Sprintf("(F%d, F%d)", i, i+1)
- l.add2("FSTPD", "FLDPD", reg, 16)
+ lVRegs := layout{sp: vReg} // Non-GP registers
+ for i := 0; i < 31; i += 4 {
+ regs := []regInfo{
+ {name: fmt.Sprintf("V%d", i), suffix: ".B16", size: 16, pos: 64},
+ {name: fmt.Sprintf("V%d", i+1), suffix: ".B16", size: 16, pos: 64},
+ {name: fmt.Sprintf("V%d", i+2), suffix: ".B16", size: 16, pos: 64},
+ {name: fmt.Sprintf("V%d", i+3), suffix: ".B16", size: 16, pos: 64},
+ }
+ lVRegs.add2("VST1.P", "VLD1.P", regs, [2]string{"[", "]"}, true)
}
+ writeXRegs(g.goarch, &lVRegs)
if l.stack%16 != 0 {
l.stack += 8 // SP needs 16-byte alignment
}
p("MOVD R30, (RSP)")
p("#endif")
+ p("// Save GPs")
l.save(g)
+ p("// Save extended register state to p.xRegs.scratch")
+ p("MOVD g_m(g), %s", vReg)
+ p("MOVD m_p(%s), %s", vReg, vReg)
+ p("ADD $(p_xRegs+xRegPerP_scratch), %s, %s", vReg, vReg)
+ lVRegs.save(g)
p("CALL ·asyncPreempt2(SB)")
+ p("// Restore non-GPs from *p.xRegs.cache")
+ p("MOVD g_m(g), %s", vReg)
+ p("MOVD m_p(%s), %s", vReg, vReg)
+ p("MOVD (p_xRegs+xRegPerP_cache)(%s), %s", vReg, vReg)
+ lVRegs.restoreDirect(g)
+ p("// Restore GPs")
l.restore(g)
p("MOVD %d(RSP), R30", l.stack) // sigctxt.pushCall has pushed LR (at interrupt) on stack, restore it
#include "textflag.h"
TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
- MOVD R30, -496(RSP)
- SUB $496, RSP
+ MOVD R30, -240(RSP)
+ SUB $240, RSP
MOVD R29, -8(RSP)
SUB $8, RSP, R29
#ifdef GOOS_ios
MOVD R30, (RSP)
#endif
+ // Save GPs
STP (R0, R1), 8(RSP)
STP (R2, R3), 24(RSP)
STP (R4, R5), 40(RSP)
MOVD R0, 216(RSP)
MOVD FPSR, R0
MOVD R0, 224(RSP)
- FSTPD (F0, F1), 232(RSP)
- FSTPD (F2, F3), 248(RSP)
- FSTPD (F4, F5), 264(RSP)
- FSTPD (F6, F7), 280(RSP)
- FSTPD (F8, F9), 296(RSP)
- FSTPD (F10, F11), 312(RSP)
- FSTPD (F12, F13), 328(RSP)
- FSTPD (F14, F15), 344(RSP)
- FSTPD (F16, F17), 360(RSP)
- FSTPD (F18, F19), 376(RSP)
- FSTPD (F20, F21), 392(RSP)
- FSTPD (F22, F23), 408(RSP)
- FSTPD (F24, F25), 424(RSP)
- FSTPD (F26, F27), 440(RSP)
- FSTPD (F28, F29), 456(RSP)
- FSTPD (F30, F31), 472(RSP)
+ // Save extended register state to p.xRegs.scratch
+ MOVD g_m(g), R0
+ MOVD m_p(R0), R0
+ ADD $(p_xRegs+xRegPerP_scratch), R0, R0
+ VST1.P [V0.B16, V1.B16, V2.B16, V3.B16], 64(R0)
+ VST1.P [V4.B16, V5.B16, V6.B16, V7.B16], 64(R0)
+ VST1.P [V8.B16, V9.B16, V10.B16, V11.B16], 64(R0)
+ VST1.P [V12.B16, V13.B16, V14.B16, V15.B16], 64(R0)
+ VST1.P [V16.B16, V17.B16, V18.B16, V19.B16], 64(R0)
+ VST1.P [V20.B16, V21.B16, V22.B16, V23.B16], 64(R0)
+ VST1.P [V24.B16, V25.B16, V26.B16, V27.B16], 64(R0)
+ VST1.P [V28.B16, V29.B16, V30.B16, V31.B16], 64(R0)
CALL ·asyncPreempt2(SB)
- FLDPD 472(RSP), (F30, F31)
- FLDPD 456(RSP), (F28, F29)
- FLDPD 440(RSP), (F26, F27)
- FLDPD 424(RSP), (F24, F25)
- FLDPD 408(RSP), (F22, F23)
- FLDPD 392(RSP), (F20, F21)
- FLDPD 376(RSP), (F18, F19)
- FLDPD 360(RSP), (F16, F17)
- FLDPD 344(RSP), (F14, F15)
- FLDPD 328(RSP), (F12, F13)
- FLDPD 312(RSP), (F10, F11)
- FLDPD 296(RSP), (F8, F9)
- FLDPD 280(RSP), (F6, F7)
- FLDPD 264(RSP), (F4, F5)
- FLDPD 248(RSP), (F2, F3)
- FLDPD 232(RSP), (F0, F1)
+ // Restore non-GPs from *p.xRegs.cache
+ MOVD g_m(g), R0
+ MOVD m_p(R0), R0
+ MOVD (p_xRegs+xRegPerP_cache)(R0), R0
+ VLD1.P 64(R0), [V0.B16, V1.B16, V2.B16, V3.B16]
+ VLD1.P 64(R0), [V4.B16, V5.B16, V6.B16, V7.B16]
+ VLD1.P 64(R0), [V8.B16, V9.B16, V10.B16, V11.B16]
+ VLD1.P 64(R0), [V12.B16, V13.B16, V14.B16, V15.B16]
+ VLD1.P 64(R0), [V16.B16, V17.B16, V18.B16, V19.B16]
+ VLD1.P 64(R0), [V20.B16, V21.B16, V22.B16, V23.B16]
+ VLD1.P 64(R0), [V24.B16, V25.B16, V26.B16, V27.B16]
+ VLD1.P 64(R0), [V28.B16, V29.B16, V30.B16, V31.B16]
+ // Restore GPs
MOVD 224(RSP), R0
MOVD R0, FPSR
MOVD 216(RSP), R0
LDP 40(RSP), (R4, R5)
LDP 24(RSP), (R2, R3)
LDP 8(RSP), (R0, R1)
- MOVD 496(RSP), R30
+ MOVD 240(RSP), R30
MOVD -8(RSP), R29
MOVD (RSP), R27
- ADD $512, RSP
+ ADD $256, RSP
RET (R27)