From: Meng Zhuo Date: Mon, 8 Jul 2019 15:45:44 +0000 (+0800) Subject: runtime, cmd/compile: implement and use DUFFCOPY on MIPS64 X-Git-Tag: go1.14beta1~1306 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=307544f427754010d82755a06f8a680da93e5261;p=gostls13.git runtime, cmd/compile: implement and use DUFFCOPY on MIPS64 OS: Linux loongson 3.10.84 mips64el CPU: Loongson 3A3000 quad core name old time/op new time/op delta BinaryTree17 23.5s ± 1% 23.2s ± 0% -1.12% (p=0.008 n=5+5) Fannkuch11 10.2s ± 0% 10.1s ± 0% -0.19% (p=0.008 n=5+5) FmtFprintfEmpty 450ns ± 0% 446ns ± 1% -0.89% (p=0.024 n=5+5) FmtFprintfString 722ns ± 1% 721ns ± 1% ~ (p=0.762 n=5+5) FmtFprintfInt 693ns ± 2% 691ns ± 2% ~ (p=0.889 n=5+5) FmtFprintfIntInt 912ns ± 1% 911ns ± 0% ~ (p=0.722 n=5+5) FmtFprintfPrefixedInt 1.35µs ± 2% 1.35µs ± 2% ~ (p=1.000 n=5+5) FmtFprintfFloat 1.79µs ± 0% 1.78µs ± 0% ~ (p=0.683 n=5+5) FmtManyArgs 3.46µs ± 1% 3.48µs ± 1% ~ (p=0.246 n=5+5) GobDecode 48.8ms ± 1% 48.6ms ± 0% ~ (p=0.222 n=5+5) GobEncode 37.7ms ± 1% 37.4ms ± 1% ~ (p=0.095 n=5+5) Gzip 1.72s ± 1% 1.72s ± 0% ~ (p=0.905 n=5+4) Gunzip 342ms ± 0% 342ms ± 0% ~ (p=0.421 n=5+5) HTTPClientServer 219µs ± 1% 219µs ± 1% ~ (p=1.000 n=5+5) JSONEncode 89.1ms ± 1% 89.4ms ± 1% ~ (p=0.222 n=5+5) JSONDecode 292ms ± 1% 291ms ± 0% ~ (p=0.421 n=5+5) Mandelbrot200 15.7ms ± 0% 15.6ms ± 0% ~ (p=0.690 n=5+5) GoParse 19.5ms ± 1% 19.6ms ± 1% ~ (p=0.310 n=5+5) RegexpMatchEasy0_32 534ns ± 1% 529ns ± 1% ~ (p=0.056 n=5+5) RegexpMatchEasy0_1K 2.75µs ± 0% 2.74µs ± 0% -0.46% (p=0.008 n=5+5) RegexpMatchEasy1_32 572ns ± 2% 565ns ± 3% ~ (p=0.310 n=5+5) RegexpMatchEasy1_1K 4.15µs ± 0% 4.15µs ± 1% ~ (p=0.548 n=5+5) RegexpMatchMedium_32 31.2ns ± 0% 31.1ns ± 0% -0.45% (p=0.016 n=5+4) RegexpMatchMedium_1K 235µs ± 1% 235µs ± 0% ~ (p=1.000 n=5+5) RegexpMatchHard_32 13.9µs ± 1% 13.5µs ± 1% -2.74% (p=0.008 n=5+5) RegexpMatchHard_1K 416µs ± 2% 410µs ± 2% ~ (p=0.056 n=5+5) Revcomp 6.36s ± 0% 6.34s ± 0% -0.31% (p=0.008 n=5+5) Template 352ms ± 1% 353ms ± 0% +0.45% (p=0.032 n=5+5) TimeParse 2.04µs ± 4% 2.01µs ± 0% ~ (p=0.056 n=5+5) TimeFormat 2.97µs ± 0% 2.97µs ± 0% ~ (p=1.000 n=5+5) name old speed new speed delta GobDecode 15.7MB/s ± 1% 15.8MB/s ± 0% ~ (p=0.206 n=5+5) GobEncode 20.4MB/s ± 1% 20.5MB/s ± 1% ~ (p=0.056 n=5+5) Gzip 11.3MB/s ± 1% 11.3MB/s ± 0% ~ (p=0.841 n=5+4) Gunzip 56.7MB/s ± 0% 56.8MB/s ± 0% ~ (p=0.389 n=5+5) JSONEncode 21.8MB/s ± 1% 21.7MB/s ± 1% ~ (p=0.246 n=5+5) JSONDecode 6.66MB/s ± 0% 6.67MB/s ± 0% ~ (p=0.857 n=4+5) GoParse 2.97MB/s ± 1% 2.96MB/s ± 1% ~ (p=0.238 n=5+5) RegexpMatchEasy0_32 59.9MB/s ± 1% 60.5MB/s ± 1% +0.92% (p=0.032 n=5+5) RegexpMatchEasy0_1K 372MB/s ± 0% 374MB/s ± 0% +0.46% (p=0.008 n=5+5) RegexpMatchEasy1_32 56.0MB/s ± 2% 56.7MB/s ± 3% ~ (p=0.310 n=5+5) RegexpMatchEasy1_1K 247MB/s ± 0% 247MB/s ± 1% ~ (p=0.548 n=5+5) RegexpMatchMedium_32 32.0MB/s ± 0% 32.1MB/s ± 0% ~ (p=0.135 n=5+5) RegexpMatchMedium_1K 4.35MB/s ± 1% 4.35MB/s ± 1% ~ (p=0.825 n=5+5) RegexpMatchHard_32 2.30MB/s ± 1% 2.37MB/s ± 1% +2.78% (p=0.008 n=5+5) RegexpMatchHard_1K 2.47MB/s ± 1% 2.50MB/s ± 2% ~ (p=0.095 n=5+5) Revcomp 40.0MB/s ± 0% 40.1MB/s ± 0% +0.31% (p=0.016 n=5+5) Template 5.51MB/s ± 1% 5.49MB/s ± 0% ~ (p=0.190 n=5+5) Change-Id: I540a2e4e7992376ce04f93b332f64fc3b6071237 Reviewed-on: https://go-review.googlesource.com/c/go/+/185078 Reviewed-by: Cherry Zhang Run-TryBot: Cherry Zhang TryBot-Result: Gobot Gobot --- diff --git a/src/cmd/compile/internal/mips64/ssa.go b/src/cmd/compile/internal/mips64/ssa.go index 68eff97dfa..7eae35373d 100644 --- a/src/cmd/compile/internal/mips64/ssa.go +++ b/src/cmd/compile/internal/mips64/ssa.go @@ -426,6 +426,12 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p4.Reg = mips.REG_R1 p4.To.Type = obj.TYPE_BRANCH gc.Patch(p4, p2) + case ssa.OpMIPS64DUFFCOPY: + p := s.Prog(obj.ADUFFCOPY) + p.To.Type = obj.TYPE_MEM + p.To.Name = obj.NAME_EXTERN + p.To.Sym = gc.Duffcopy + p.To.Offset = v.AuxInt case ssa.OpMIPS64LoweredMove: // SUBV $8, R1 // MOVV 8(R1), Rtmp diff --git a/src/cmd/compile/internal/ssa/gen/MIPS64.rules b/src/cmd/compile/internal/ssa/gen/MIPS64.rules index a3df00aa33..69fe4b721e 100644 --- a/src/cmd/compile/internal/ssa/gen/MIPS64.rules +++ b/src/cmd/compile/internal/ssa/gen/MIPS64.rules @@ -370,6 +370,18 @@ (MOVVstore [8] dst (MOVVload [8] src mem) (MOVVstore dst (MOVVload src mem) mem))) +// medium move uses a duff device +(Move [s] {t} dst src mem) + && s%8 == 0 && s >= 24 && s <= 8*128 && t.(*types.Type).Alignment()%8 == 0 + && !config.noDuffDevice -> + (DUFFCOPY [16 * (128 - s/8)] dst src mem) +// 16 and 128 are magic constants. 16 is the number of bytes to encode: +// MOVV (R1), R23 +// ADDV $8, R1 +// MOVV R23, (R2) +// ADDV $8, R2 +// and 128 is the number of such blocks. See runtime/duff_mips64.s:duffcopy. + // large or unaligned move uses a loop (Move [s] {t} dst src mem) && s > 24 || t.(*types.Type).Alignment()%8 != 0 -> diff --git a/src/cmd/compile/internal/ssa/gen/MIPS64Ops.go b/src/cmd/compile/internal/ssa/gen/MIPS64Ops.go index ba02e0fcb5..e0a920f23d 100644 --- a/src/cmd/compile/internal/ssa/gen/MIPS64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/MIPS64Ops.go @@ -292,6 +292,24 @@ func init() { faultOnNilArg0: true, }, + // duffcopy + // arg0 = address of dst memory (in R2, changed as side effect) + // arg1 = address of src memory (in R1, changed as side effect) + // arg2 = mem + // auxint = offset into duffcopy code to start executing + // returns mem + { + name: "DUFFCOPY", + aux: "Int64", + argLength: 3, + reg: regInfo{ + inputs: []regMask{buildReg("R2"), buildReg("R1")}, + clobbers: buildReg("R1 R2 R31"), + }, + faultOnNilArg0: true, + faultOnNilArg1: true, + }, + // large or unaligned zeroing // arg0 = address of memory to zero (in R1, changed as side effect) // arg1 = address of the last element to zero diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index d692ed21e8..ef99da2330 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1635,6 +1635,7 @@ const ( OpMIPS64CALLclosure OpMIPS64CALLinter OpMIPS64DUFFZERO + OpMIPS64DUFFCOPY OpMIPS64LoweredZero OpMIPS64LoweredMove OpMIPS64LoweredAtomicLoad8 @@ -21768,6 +21769,20 @@ var opcodeTable = [...]opInfo{ clobbers: 134217730, // R1 R31 }, }, + { + name: "DUFFCOPY", + auxType: auxInt64, + argLen: 3, + faultOnNilArg0: true, + faultOnNilArg1: true, + reg: regInfo{ + inputs: []inputInfo{ + {0, 4}, // R2 + {1, 2}, // R1 + }, + clobbers: 134217734, // R1 R2 R31 + }, + }, { name: "LoweredZero", auxType: auxInt64, diff --git a/src/cmd/compile/internal/ssa/rewriteMIPS64.go b/src/cmd/compile/internal/ssa/rewriteMIPS64.go index 8b75c032cc..a5a58b9bbe 100644 --- a/src/cmd/compile/internal/ssa/rewriteMIPS64.go +++ b/src/cmd/compile/internal/ssa/rewriteMIPS64.go @@ -6969,6 +6969,25 @@ func rewriteValueMIPS64_OpMove_10(v *Value) bool { return true } // match: (Move [s] {t} dst src mem) + // cond: s%8 == 0 && s >= 24 && s <= 8*128 && t.(*types.Type).Alignment()%8 == 0 && !config.noDuffDevice + // result: (DUFFCOPY [16 * (128 - s/8)] dst src mem) + for { + s := v.AuxInt + t := v.Aux + mem := v.Args[2] + dst := v.Args[0] + src := v.Args[1] + if !(s%8 == 0 && s >= 24 && s <= 8*128 && t.(*types.Type).Alignment()%8 == 0 && !config.noDuffDevice) { + break + } + v.reset(OpMIPS64DUFFCOPY) + v.AuxInt = 16 * (128 - s/8) + v.AddArg(dst) + v.AddArg(src) + v.AddArg(mem) + return true + } + // match: (Move [s] {t} dst src mem) // cond: s > 24 || t.(*types.Type).Alignment()%8 != 0 // result: (LoweredMove [t.(*types.Type).Alignment()] dst src (ADDVconst src [s-moveSize(t.(*types.Type).Alignment(), config)]) mem) for { diff --git a/src/runtime/duff_mips64x.s b/src/runtime/duff_mips64x.s index acf0a4e698..c4e04ccc9d 100644 --- a/src/runtime/duff_mips64x.s +++ b/src/runtime/duff_mips64x.s @@ -265,7 +265,645 @@ TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0 ADDV $8, R1 RET -// TODO: Implement runtime·duffcopy. -TEXT runtime·duffcopy(SB),NOSPLIT|NOFRAME,$0-0 - MOVV R0, 2(R0) +TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0 + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + + MOVV (R1), R23 + ADDV $8, R1 + MOVV R23, (R2) + ADDV $8, R2 + RET diff --git a/src/runtime/mkduff.go b/src/runtime/mkduff.go index b6fe701497..6ac5e7da44 100644 --- a/src/runtime/mkduff.go +++ b/src/runtime/mkduff.go @@ -216,5 +216,13 @@ func zeroMIPS64x(w io.Writer) { } func copyMIPS64x(w io.Writer) { - fmt.Fprintln(w, "// TODO: Implement runtime·duffcopy.") + fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0") + for i := 0; i < 128; i++ { + fmt.Fprintln(w, "\tMOVV\t(R1), R23") + fmt.Fprintln(w, "\tADDV\t$8, R1") + fmt.Fprintln(w, "\tMOVV\tR23, (R2)") + fmt.Fprintln(w, "\tADDV\t$8, R2") + fmt.Fprintln(w) + } + fmt.Fprintln(w, "\tRET") }