p3 := gc.Prog(arm64.ABLE)
p3.To.Type = obj.TYPE_BRANCH
gc.Patch(p3, p)
+ case ssa.OpARM64DUFFCOPY:
+ p := gc.Prog(obj.ADUFFCOPY)
+ p.To.Type = obj.TYPE_MEM
+ p.To.Name = obj.NAME_EXTERN
+ p.To.Sym = gc.Linksym(gc.Pkglookup("duffcopy", gc.Runtimepkg))
+ p.To.Offset = v.AuxInt
case ssa.OpARM64LoweredMove:
// MOVD.P 8(R16), Rtmp
// MOVD.P Rtmp, 8(R17)
(OffPtr <src.Type> src [SizeAndAlign(s).Size()-SizeAndAlign(s).Size()%8])
(Move [MakeSizeAndAlign(SizeAndAlign(s).Size()-SizeAndAlign(s).Size()%8, 1).Int64()] dst src mem))
+// medium move uses a duff device
+// 8 and 128 are magic constants, see runtime/mkduff.go
+(Move [s] dst src mem)
+ && SizeAndAlign(s).Size()%8 == 0 && SizeAndAlign(s).Size() > 24 && SizeAndAlign(s).Size() <= 8*128
+ && !config.noDuffDevice ->
+ (DUFFCOPY [8 * (128 - int64(SizeAndAlign(s).Size()/8))] dst src mem)
+
// large move uses a loop
-// DUFFCOPY is not implemented on ARM64 (TODO)
(Move [s] dst src mem)
&& SizeAndAlign(s).Size() > 24 && SizeAndAlign(s).Size()%8 == 0 ->
(LoweredMove
faultOnNilArg0: true,
},
+ // duffcopy
+ // arg0 = address of dst memory (in R17 aka arm64.REGRT2, changed as side effect)
+ // arg1 = address of src memory (in R16 aka arm64.REGRT1, changed as side effect)
+ // arg2 = mem
+ // auxint = offset into duffcopy code to start executing
+ // returns mem
+ // R16, R17 changed as side effect
+ {
+ name: "DUFFCOPY",
+ aux: "Int64",
+ argLength: 3,
+ reg: regInfo{
+ inputs: []regMask{buildReg("R17"), buildReg("R16")},
+ clobbers: buildReg("R16 R17"),
+ },
+ faultOnNilArg0: true,
+ faultOnNilArg1: true,
+ },
+
// large move
// arg0 = address of dst memory (in R17 aka arm64.REGRT2, changed as side effect)
// arg1 = address of src memory (in R16 aka arm64.REGRT1, changed as side effect)
OpARM64GreaterEqualU
OpARM64DUFFZERO
OpARM64LoweredZero
+ OpARM64DUFFCOPY
OpARM64LoweredMove
OpARM64LoweredGetClosurePtr
OpARM64MOVDconvert
clobbers: 65536, // R16
},
},
+ {
+ name: "DUFFCOPY",
+ auxType: auxInt64,
+ argLen: 3,
+ faultOnNilArg0: true,
+ faultOnNilArg1: true,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 131072}, // R17
+ {1, 65536}, // R16
+ },
+ clobbers: 196608, // R16 R17
+ },
+ },
{
name: "LoweredMove",
argLen: 4,
return true
}
// match: (Move [s] dst src mem)
+ // cond: SizeAndAlign(s).Size()%8 == 0 && SizeAndAlign(s).Size() > 24 && SizeAndAlign(s).Size() <= 8*128 && !config.noDuffDevice
+ // result: (DUFFCOPY [8 * (128 - int64(SizeAndAlign(s).Size()/8))] dst src mem)
+ for {
+ s := v.AuxInt
+ dst := v.Args[0]
+ src := v.Args[1]
+ mem := v.Args[2]
+ if !(SizeAndAlign(s).Size()%8 == 0 && SizeAndAlign(s).Size() > 24 && SizeAndAlign(s).Size() <= 8*128 && !config.noDuffDevice) {
+ break
+ }
+ v.reset(OpARM64DUFFCOPY)
+ v.AuxInt = 8 * (128 - int64(SizeAndAlign(s).Size()/8))
+ v.AddArg(dst)
+ v.AddArg(src)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (Move [s] dst src mem)
// cond: SizeAndAlign(s).Size() > 24 && SizeAndAlign(s).Size()%8 == 0
// result: (LoweredMove dst src (ADDconst <src.Type> src [SizeAndAlign(s).Size()-moveSize(SizeAndAlign(s).Align(), config)]) mem)
for {
case AB:
return 0<<31 | 5<<26 /* imm26 */
- case obj.ADUFFZERO,
- ABL:
+ case obj.ADUFFZERO, obj.ADUFFCOPY, ABL:
return 1<<31 | 5<<26
}
MOVD.W ZR, 8(R16)
RET
-// TODO: Implement runtime·duffcopy.
+TEXT runtime·duffcopy(SB), NOSPLIT, $0-0
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ MOVD.P 8(R16), R27
+ MOVD.P R27, 8(R17)
+
+ RET
}
func copyARM64(w io.Writer) {
- fmt.Fprintln(w, "// TODO: Implement runtime·duffcopy.")
+ // R16 (aka REGRT1): ptr to source memory
+ // R17 (aka REGRT2): ptr to destination memory
+ // R27 (aka REGTMP): scratch space
+ // R16 and R17 are updated as a side effect
+ fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT, $0-0")
+ for i := 0; i < 128; i++ {
+ fmt.Fprintln(w, "\tMOVD.P\t8(R16), R27")
+ fmt.Fprintln(w, "\tMOVD.P\tR27, 8(R17)")
+ fmt.Fprintln(w)
+ }
+ fmt.Fprintln(w, "\tRET")
}
func tagsPPC64x(w io.Writer) {