From e9379a8f8bd5f00463f67fe7e052cbcb262b169a Mon Sep 17 00:00:00 2001 From: "Paul E. Murphy" Date: Tue, 12 Sep 2023 16:06:45 -0500 Subject: [PATCH] cmd/internal/asm/ppc64: support 34b ADD/MOVD $const, Rx For constant signed values which require 34b to represent, the assembler will generate a pli instruction on linux/power10/PPC64 instead of loading a constant. Similarly, ADD is extended to support 34b signed constants. On linux/power10/PPC64, this generates a paddi instruction. For assembler consistency, a second form is added if paddi cannot be used. The second form is provided for assembly writers. Change-Id: I98144306af766b02fbbe36b72856a23cdf51d247 Reviewed-on: https://go-review.googlesource.com/c/go/+/528317 TryBot-Result: Gopher Robot LUCI-TryBot-Result: Go LUCI Reviewed-by: Eli Bendersky Run-TryBot: Paul Murphy Reviewed-by: Michael Pratt Reviewed-by: Lynn Boger --- src/cmd/asm/internal/asm/testdata/ppc64.s | 4 +++ src/cmd/internal/obj/ppc64/asm9.go | 32 ++++++++++++++--------- src/cmd/internal/obj/ppc64/obj9.go | 8 ++++-- 3 files changed, 29 insertions(+), 15 deletions(-) diff --git a/src/cmd/asm/internal/asm/testdata/ppc64.s b/src/cmd/asm/internal/asm/testdata/ppc64.s index 2bf04971ed..6de8b13709 100644 --- a/src/cmd/asm/internal/asm/testdata/ppc64.s +++ b/src/cmd/asm/internal/asm/testdata/ppc64.s @@ -33,6 +33,10 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 MOVD $2147483649, R5 // 6405800060a50001 or 0600800038a00001 // Hex constant 0xFFFFFFFF80000001 MOVD $-2147483647, R5 // 3ca0800060a50001 or 0603800038a00001 + // Hex constant 0xFFFFFFFE00000000 (load of constant on < power10, pli on >= power10 + MOVD $-8589934592, R5 // 3ca00000e8a50000 or 0602000038a00000 + // Hex constant 0xFFFFFFFE00000000 + ADD $-8589934592, R5 // 3fe0fffe63ff00007bff83e463ff00007cbf2a14 or 0602000038a50000 MOVD 8(R3), R4 // e8830008 MOVD (R3)(R4), R5 // 7ca4182a MOVD (R3)(R0), R5 // 7ca0182a diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index 4ce506cbf9..65b8c583d9 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -545,7 +545,7 @@ type PrefixableOptab struct { // // This requires an ISA 3.1 compatible cpu (e.g Power10), and when linking externally an ELFv2 1.5 compliant. var prefixableOptab = []PrefixableOptab{ - {Optab: Optab{as: AMOVD, a1: C_LCON, a6: C_REG, type_: 19, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AMOVD, a1: C_S34CON, a6: C_REG, type_: 19, size: 8}, minGOPPC64: 10, pfxsize: 8}, {Optab: Optab{as: AMOVD, a1: C_ADDR, a6: C_REG, type_: 75, size: 8}, minGOPPC64: 10, pfxsize: 8}, {Optab: Optab{as: AMOVD, a1: C_TLS_LE, a6: C_REG, type_: 79, size: 8}, minGOPPC64: 10, pfxsize: 8}, {Optab: Optab{as: AMOVD, a1: C_TLS_IE, a6: C_REG, type_: 80, size: 12}, minGOPPC64: 10, pfxsize: 12}, @@ -578,6 +578,8 @@ var prefixableOptab = []PrefixableOptab{ {Optab: Optab{as: AADD, a1: C_LCON, a2: C_REG, a6: C_REG, type_: 22, size: 12}, minGOPPC64: 10, pfxsize: 8}, {Optab: Optab{as: AADD, a1: C_LCON, a6: C_REG, type_: 22, size: 12}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AADD, a1: C_S34CON, a2: C_REG, a6: C_REG, type_: 22, size: 20}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AADD, a1: C_S34CON, a6: C_REG, type_: 22, size: 20}, minGOPPC64: 10, pfxsize: 8}, } var oprange [ALAST & obj.AMask][]Optab @@ -2978,7 +2980,7 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) { o1 = AOP_IRR(c.opirr(AADDIS), uint32(p.To.Reg), uint32(r), uint32(v)>>16) } - case 22: /* add $lcon/$andcon,r1,r2 ==> oris+ori+add/ori+add */ + case 22: /* add $lcon/$andcon,r1,r2 ==> oris+ori+add/ori+add, add $s34con,r1 ==> addis+ori+slw+ori+add */ if p.To.Reg == REGTMP || p.Reg == REGTMP { c.ctxt.Diag("can't synthesize large constant\n%v", p) } @@ -2990,19 +2992,23 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) { if p.From.Sym != nil { c.ctxt.Diag("%v is not supported", p) } - // If operand is ANDCON, generate 2 instructions using - // ORI for unsigned value; with LCON 3 instructions. - if o.size == 8 { - o1 = LOP_IRR(OP_ORI, REGTMP, REGZERO, uint32(int32(d))) - o2 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r)) - } else { - o1 = loadu32(REGTMP, d) - o2 = LOP_IRR(OP_ORI, REGTMP, REGTMP, uint32(int32(d))) - o3 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r)) - } - if o.ispfx { o1, o2 = pfxadd(int16(p.To.Reg), int16(r), PFX_R_ABS, d) + } else if o.size == 8 { + o1 = LOP_IRR(OP_ORI, REGTMP, REGZERO, uint32(int32(d))) // tmp = uint16(d) + o2 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r)) // to = tmp + from + } else if o.size == 12 { + // Note, o1 is ADDIS if d is negative, ORIS otherwise. + o1 = loadu32(REGTMP, d) // tmp = d & 0xFFFF0000 + o2 = LOP_IRR(OP_ORI, REGTMP, REGTMP, uint32(int32(d))) // tmp |= d & 0xFFFF + o3 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r)) // to = from + tmp + } else { + // For backwards compatibility with GOPPC64 < 10, generate 34b constants in register. + o1 = LOP_IRR(OP_ADDIS, REGZERO, REGTMP, uint32(d>>32)) // tmp = sign_extend((d>>32)&0xFFFF0000) + o2 = LOP_IRR(OP_ORI, REGTMP, REGTMP, uint32(d>>16)) // tmp |= (d>>16)&0xFFFF + o3 = AOP_RLDIC(OP_RLDICR, REGTMP, REGTMP, 16, 63-16) // tmp <<= 16 + o4 = LOP_IRR(OP_ORI, REGTMP, REGTMP, uint32(uint16(d))) // tmp |= d&0xFFFF + o5 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r)) } case 23: /* and $lcon/$addcon,r1,r2 ==> oris+ori+and/addi+and */ diff --git a/src/cmd/internal/obj/ppc64/obj9.go b/src/cmd/internal/obj/ppc64/obj9.go index da95e65079..7cd4dc2396 100644 --- a/src/cmd/internal/obj/ppc64/obj9.go +++ b/src/cmd/internal/obj/ppc64/obj9.go @@ -81,10 +81,14 @@ func progedit(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) { case AMOVD: // 32b constants (signed and unsigned) can be generated via 1 or 2 instructions. - // All others must be placed in memory and loaded. isS32 := int64(int32(p.From.Offset)) == p.From.Offset isU32 := uint64(uint32(p.From.Offset)) == uint64(p.From.Offset) - if p.From.Type == obj.TYPE_CONST && p.From.Name == obj.NAME_NONE && p.From.Reg == 0 && !isS32 && !isU32 { + + // If prefixed instructions are supported, a 34b signed constant can be generated by one pli instruction. + isS34 := pfxEnabled && (p.From.Offset<<30)>>30 == p.From.Offset + + // If the constant cannot be generated with 2 or less instructions, it must be placed in memory and loaded. + if p.From.Type == obj.TYPE_CONST && p.From.Name == obj.NAME_NONE && p.From.Reg == 0 && !isS32 && !isU32 && !isS34 { p.From.Type = obj.TYPE_MEM p.From.Sym = ctxt.Int64Sym(p.From.Offset) p.From.Name = obj.NAME_EXTERN -- 2.50.0