From 24c2ee7b6578c683a608c3d8c41fe626d50e6b61 Mon Sep 17 00:00:00 2001 From: Lynn Boger Date: Wed, 22 Sep 2021 09:37:12 -0500 Subject: [PATCH] cmd/compile: enable reg args and add duffcopy support on ppc64x This adds support for duffcopy on ppc64x and updates the ssa/config.go file to enable register args and recognize the duffDevice is available on ppc64x. Change-Id: Ifc472cc9cc19c9a80e468fb52078c75f7dd44d36 Reviewed-on: https://go-review.googlesource.com/c/go/+/351490 Run-TryBot: Lynn Boger TryBot-Result: Go Bot Trust: Lynn Boger Reviewed-by: Cherry Mui --- src/cmd/compile/internal/ssa/config.go | 5 +- src/cmd/internal/obj/ppc64/a.out.go | 4 +- src/runtime/duff_ppc64x.s | 518 ++++++++++++++++++------- src/runtime/mkduff.go | 12 +- 4 files changed, 399 insertions(+), 140 deletions(-) diff --git a/src/cmd/compile/internal/ssa/config.go b/src/cmd/compile/internal/ssa/config.go index 61d1dea642..5ab7240acf 100644 --- a/src/cmd/compile/internal/ssa/config.go +++ b/src/cmd/compile/internal/ssa/config.go @@ -239,11 +239,10 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo c.registers = registersPPC64[:] c.gpRegMask = gpRegMaskPPC64 c.fpRegMask = fpRegMaskPPC64 - //c.intParamRegs = paramIntRegPPC64 - //c.floatParamRegs = paramFloatRegPPC64 + c.intParamRegs = paramIntRegPPC64 + c.floatParamRegs = paramFloatRegPPC64 c.FPReg = framepointerRegPPC64 c.LinkReg = linkRegPPC64 - c.noDuffDevice = true // TODO: Resolve PPC64 DuffDevice (has zero, but not copy) c.hasGReg = true case "mips64": c.BigEndian = true diff --git a/src/cmd/internal/obj/ppc64/a.out.go b/src/cmd/internal/obj/ppc64/a.out.go index dda24a0b96..b5696f7993 100644 --- a/src/cmd/internal/obj/ppc64/a.out.go +++ b/src/cmd/internal/obj/ppc64/a.out.go @@ -242,8 +242,8 @@ const ( REGSB = REG_R2 REGRET = REG_R3 REGARG = -1 /* -1 disables passing the first argument in register */ - REGRT1 = REG_R3 /* reserved for runtime, duffzero and duffcopy */ - REGRT2 = REG_R4 /* reserved for runtime, duffcopy */ + REGRT1 = REG_R20 /* reserved for runtime, duffzero and duffcopy */ + REGRT2 = REG_R21 /* reserved for runtime, duffcopy */ REGMIN = REG_R7 /* register variables allocated from here to REGMAX */ REGCTXT = REG_R11 /* context for closures */ REGTLS = REG_R13 /* C ABI TLS base pointer */ diff --git a/src/runtime/duff_ppc64x.s b/src/runtime/duff_ppc64x.s index d4e3b409d2..eeecf13df1 100644 --- a/src/runtime/duff_ppc64x.s +++ b/src/runtime/duff_ppc64x.s @@ -7,136 +7,392 @@ #include "textflag.h" -TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0 - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) - MOVDU R0, 8(R3) +TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0 + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) + MOVDU R0, 8(R20) RET -TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0 - UNDEF +TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0 + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + MOVDU 8(R20), R5 + MOVDU R5, 8(R21) + RET diff --git a/src/runtime/mkduff.go b/src/runtime/mkduff.go index f036745092..f3c3d8dfb5 100644 --- a/src/runtime/mkduff.go +++ b/src/runtime/mkduff.go @@ -188,17 +188,21 @@ func zeroPPC64x(w io.Writer) { // R0: always zero // R3 (aka REGRT1): ptr to memory to be zeroed - 8 // On return, R3 points to the last zeroed dword. - fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0") + fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0") for i := 0; i < 128; i++ { - fmt.Fprintln(w, "\tMOVDU\tR0, 8(R3)") + fmt.Fprintln(w, "\tMOVDU\tR0, 8(R20)") } fmt.Fprintln(w, "\tRET") } func copyPPC64x(w io.Writer) { // duffcopy is not used on PPC64. - fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0") - fmt.Fprintln(w, "\tUNDEF") + fmt.Fprintln(w, "TEXT runtime·duffcopy(SB), NOSPLIT|NOFRAME, $0-0") + for i := 0; i < 128; i++ { + fmt.Fprintln(w, "\tMOVDU\t8(R20), R5") + fmt.Fprintln(w, "\tMOVDU\tR5, 8(R21)") + } + fmt.Fprintln(w, "\tRET") } func tagsMIPS64x(w io.Writer) { -- 2.50.0