Implement runtime.duffzero and runtime.duffcopy for riscv64.
Use obj.ADUFFZERO/obj.ADUFFCOPY for medium size, word aligned
zeroing/moving.
Change-Id: I42ec622055630c94cb77e286d8d33dbe7c9f846c
Reviewed-on: https://go-review.googlesource.com/c/go/+/237797
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: Joel Sing <joel@sing.id.au>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
return p
}
- // TODO(jsing): Add a duff zero implementation for medium sized ranges.
+ if cnt <= int64(128*gc.Widthptr) {
+ p = pp.Appendpp(p, riscv.AADDI, obj.TYPE_CONST, 0, off, obj.TYPE_REG, riscv.REG_A0, 0)
+ p.Reg = riscv.REG_SP
+ p = pp.Appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0)
+ p.To.Name = obj.NAME_EXTERN
+ p.To.Sym = gc.Duffzero
+ p.To.Offset = 8 * (128 - cnt/int64(gc.Widthptr))
+ return p
+ }
// Loop, zeroing pointer width bytes at a time.
// ADD $(off), SP, T0
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
+ case ssa.OpRISCV64DUFFZERO:
+ p := s.Prog(obj.ADUFFZERO)
+ p.To.Type = obj.TYPE_MEM
+ p.To.Name = obj.NAME_EXTERN
+ p.To.Sym = gc.Duffzero
+ p.To.Offset = v.AuxInt
+
+ case ssa.OpRISCV64DUFFCOPY:
+ p := s.Prog(obj.ADUFFCOPY)
+ p.To.Type = obj.TYPE_MEM
+ p.To.Name = obj.NAME_EXTERN
+ p.To.Sym = gc.Duffcopy
+ p.To.Offset = v.AuxInt
+
default:
v.Fatalf("Unhandled op %v", v.Op)
}
(Zero [4] ptr mem) => (MOVWstore ptr (MOVWconst) mem)
(Zero [8] ptr mem) => (MOVDstore ptr (MOVDconst) mem)
+// Medium zeroing uses a Duff's device
+// 8 and 128 are magic constants, see runtime/mkduff.go
+(Zero [s] {t} ptr mem)
+ && s%8 == 0 && s >= 16 && s <= 8*128
+ && t.Alignment()%8 == 0 && !config.noDuffDevice =>
+ (DUFFZERO [8 * (128 - s/8)] ptr mem)
+
// Generic zeroing uses a loop
(Zero [s] {t} ptr mem) =>
(LoweredZero [t.Alignment()]
(Move [4] dst src mem) => (MOVWstore dst (MOVWload src mem) mem)
(Move [8] dst src mem) => (MOVDstore dst (MOVDload src mem) mem)
+// Medium move uses a Duff's device
+// 16 and 128 are magic constants, see runtime/mkduff.go
+(Move [s] {t} dst src mem)
+ && s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0
+ && !config.noDuffDevice && logLargeCopy(v, s) =>
+ (DUFFCOPY [16 * (128 - s/8)] dst src mem)
+
// Generic move uses a loop
(Move [s] {t} dst src mem) && (s <= 16 || logLargeCopy(v, s)) =>
(LoweredMove [t.Alignment()]
{name: "CALLclosure", argLength: 3, reg: callClosure, aux: "CallOff", call: true}, // call function via closure. arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
{name: "CALLinter", argLength: 2, reg: callInter, aux: "CallOff", call: true}, // call fn by pointer. arg0=codeptr, arg1=mem, auxint=argsize, returns mem
+ // duffzero
+ // arg0 = address of memory to zero (in X10, changed as side effect)
+ // arg1 = mem
+ // auxint = offset into duffzero code to start executing
+ // X1 (link register) changed because of function call
+ // returns mem
+ {
+ name: "DUFFZERO",
+ aux: "Int64",
+ argLength: 2,
+ reg: regInfo{
+ inputs: []regMask{regNamed["X10"]},
+ clobbers: regNamed["X1"] | regNamed["X10"],
+ },
+ typ: "Mem",
+ faultOnNilArg0: true,
+ },
+
+ // duffcopy
+ // arg0 = address of dst memory (in X11, changed as side effect)
+ // arg1 = address of src memory (in X10, changed as side effect)
+ // arg2 = mem
+ // auxint = offset into duffcopy code to start executing
+ // X1 (link register) changed because of function call
+ // returns mem
+ {
+ name: "DUFFCOPY",
+ aux: "Int64",
+ argLength: 3,
+ reg: regInfo{
+ inputs: []regMask{regNamed["X11"], regNamed["X10"]},
+ clobbers: regNamed["X1"] | regNamed["X10"] | regNamed["X11"],
+ },
+ typ: "Mem",
+ faultOnNilArg0: true,
+ faultOnNilArg1: true,
+ },
+
// Generic moves and zeros
// general unaligned zeroing
OpRISCV64CALLstatic
OpRISCV64CALLclosure
OpRISCV64CALLinter
+ OpRISCV64DUFFZERO
+ OpRISCV64DUFFCOPY
OpRISCV64LoweredZero
OpRISCV64LoweredMove
OpRISCV64LoweredAtomicLoad8
clobbers: 9223372035781033972, // X3 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 g X28 X29 X30 F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
},
+ {
+ name: "DUFFZERO",
+ auxType: auxInt64,
+ argLen: 2,
+ faultOnNilArg0: true,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 512}, // X10
+ },
+ clobbers: 512, // X10
+ },
+ },
+ {
+ name: "DUFFCOPY",
+ auxType: auxInt64,
+ argLen: 3,
+ faultOnNilArg0: true,
+ faultOnNilArg1: true,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1024}, // X11
+ {1, 512}, // X10
+ },
+ clobbers: 1536, // X10 X11
+ },
+ },
{
name: "LoweredZero",
auxType: auxInt64,
return true
}
// match: (Move [s] {t} dst src mem)
+ // cond: s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice && logLargeCopy(v, s)
+ // result: (DUFFCOPY [16 * (128 - s/8)] dst src mem)
+ for {
+ s := auxIntToInt64(v.AuxInt)
+ t := auxToType(v.Aux)
+ dst := v_0
+ src := v_1
+ mem := v_2
+ if !(s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice && logLargeCopy(v, s)) {
+ break
+ }
+ v.reset(OpRISCV64DUFFCOPY)
+ v.AuxInt = int64ToAuxInt(16 * (128 - s/8))
+ v.AddArg3(dst, src, mem)
+ return true
+ }
+ // match: (Move [s] {t} dst src mem)
// cond: (s <= 16 || logLargeCopy(v, s))
// result: (LoweredMove [t.Alignment()] dst src (ADDI <src.Type> [s-moveSize(t.Alignment(), config)] src) mem)
for {
return true
}
// match: (Zero [s] {t} ptr mem)
+ // cond: s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice
+ // result: (DUFFZERO [8 * (128 - s/8)] ptr mem)
+ for {
+ s := auxIntToInt64(v.AuxInt)
+ t := auxToType(v.Aux)
+ ptr := v_0
+ mem := v_1
+ if !(s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0 && !config.noDuffDevice) {
+ break
+ }
+ v.reset(OpRISCV64DUFFZERO)
+ v.AuxInt = int64ToAuxInt(8 * (128 - s/8))
+ v.AddArg2(ptr, mem)
+ return true
+ }
+ // match: (Zero [s] {t} ptr mem)
// result: (LoweredZero [t.Alignment()] ptr (ADD <ptr.Type> ptr (MOVDconst [s-moveSize(t.Alignment(), config)])) mem)
for {
s := auxIntToInt64(v.AuxInt)
// lr is the link register to use for the JALR.
// p must be a CALL, JMP or RET.
func jalrToSym(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc, lr int16) *obj.Prog {
- if p.As != obj.ACALL && p.As != obj.AJMP && p.As != obj.ARET {
+ if p.As != obj.ACALL && p.As != obj.AJMP && p.As != obj.ARET && p.As != obj.ADUFFZERO && p.As != obj.ADUFFCOPY {
ctxt.Diag("unexpected Prog in jalrToSym: %v", p)
return p
}
// CALLs are CALL or JAL(R) with link register LR.
for p := sym.Func().Text; p != nil; p = p.Link {
switch p.As {
- case obj.ACALL:
+ case obj.ACALL, obj.ADUFFZERO, obj.ADUFFCOPY:
return true
case AJAL, AJALR:
if p.From.Type == obj.TYPE_REG && p.From.Reg == REG_LR {
p.From.Reg = REG_SP
}
- case obj.ACALL:
+ case obj.ACALL, obj.ADUFFZERO, obj.ADUFFCOPY:
switch p.To.Type {
case obj.TYPE_MEM:
jalrToSym(ctxt, p, newprog, REG_LR)
obj.APCDATA: pseudoOpEncoding,
obj.ATEXT: pseudoOpEncoding,
obj.ANOP: pseudoOpEncoding,
+ obj.ADUFFZERO: pseudoOpEncoding,
+ obj.ADUFFCOPY: pseudoOpEncoding,
}
// encodingForAs returns the encoding for an obj.As.
--- /dev/null
+// Code generated by mkduff.go; DO NOT EDIT.
+// Run go generate from src/runtime to update.
+// See mkduff.go for comments.
+
+#include "textflag.h"
+
+TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ MOV ZERO, (X10)
+ ADD $8, X10
+ RET
+
+TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ MOV (X10), X31
+ ADD $8, X10
+ MOV X31, (X11)
+ ADD $8, X11
+
+ RET
gen("arm64", notags, zeroARM64, copyARM64)
gen("ppc64x", tagsPPC64x, zeroPPC64x, copyPPC64x)
gen("mips64x", tagsMIPS64x, zeroMIPS64x, copyMIPS64x)
+ gen("riscv64", notags, zeroRISCV64, copyRISCV64)
}
func gen(arch string, tags, zero, copy func(io.Writer)) {
}
fmt.Fprintln(w, "\tRET")
}
+
+func zeroRISCV64(w io.Writer) {
+ // ZERO: always zero
+ // X10: ptr to memory to be zeroed
+ // X10 is updated as a side effect.
+ fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0")
+ for i := 0; i < 128; i++ {
+ fmt.Fprintln(w, "\tMOV\tZERO, (X10)")
+ fmt.Fprintln(w, "\tADD\t$8, X10")
+ }
+ fmt.Fprintln(w, "\tRET")
+}
+
+func copyRISCV64(w io.Writer) {
+ // X10: ptr to source memory
+ // X11: ptr to destination memory
+ // X10 and X11 are updated as a side effect
+ fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0")
+ for i := 0; i < 128; i++ {
+ fmt.Fprintln(w, "\tMOV\t(X10), X31")
+ fmt.Fprintln(w, "\tADD\t$8, X10")
+ fmt.Fprintln(w, "\tMOV\tX31, (X11)")
+ fmt.Fprintln(w, "\tADD\t$8, X11")
+ fmt.Fprintln(w)
+ }
+ fmt.Fprintln(w, "\tRET")
+}