From 72301a9863fb43ff26e9779a086e02cf02031ceb Mon Sep 17 00:00:00 2001 From: "Paul E. Murphy" Date: Tue, 9 Mar 2021 16:55:22 -0600 Subject: [PATCH] cmd/internal/obj: use prefix insn in MOV* opcodes for GOPPC64=power10 As background, Power10 adds prefixed load, store, and add immediate instructions which encode 34b signed displacements. Likewise, they also give the option to compute addresses against the PC. This enables using simpler PC relative (PC-rel) relocations instead of maintaining a dedicated pointer (the TOC) to the code/data blob on PPC64/linux. Similary, there are several Go opcodes where it can be advantageous to use prefixed instructions instead of composite sequences like oris/ori/add to implement "MOVD , Rx" or "ADD , Rx, Ry", or large offset load/stores like "MOVD (Rx), Ry" using the same framework which dynamically configures optab. When selecting prefixed instruction forms, the assembler must also use new relocations. These new relocations are always PC-rel by design, thus code assembled as such has no implicit requirement to maintain a TOC pointer when assembling shared objects. Thus, we can safely avoid situations where some Go objects use a TOC pointer, and some do not. This greatly simplifies linking Go objects. For more details about the challenges of linking TOC and PC-rel compiled code, see the PPC64 ELFv2 ABI. The TOC pointer in R2 is still maintained in those build configurations which previously required it (e.x buildmode=pie). However, Go code built with PC-rel relocations does not require the TOC pointer. A future change could remove the overhead of maintaining a TOC pointer in those build configurations. This is enabled only for power10/ppc64le/linux. A final noteworthy difference between the prefixed and regular load/store instruction forms is the removal of the DS/DQ form restrictions. That is, the immediate operand does not need to be aligned. Updates #44549 Change-Id: If59c216d203c3eed963bfa08855e21771e6ed669 Reviewed-on: https://go-review.googlesource.com/c/go/+/355150 Reviewed-by: Michael Pratt Reviewed-by: Matthew Dempsky TryBot-Result: Gopher Robot Reviewed-by: Lynn Boger Run-TryBot: Paul Murphy --- src/cmd/asm/internal/asm/endtoend_test.go | 12 +- src/cmd/asm/internal/asm/testdata/ppc64.s | 27 +- src/cmd/internal/obj/ppc64/asm9.go | 296 ++++++++++++++++++---- src/cmd/internal/obj/ppc64/doc.go | 29 +++ 4 files changed, 293 insertions(+), 71 deletions(-) diff --git a/src/cmd/asm/internal/asm/endtoend_test.go b/src/cmd/asm/internal/asm/endtoend_test.go index 3928e364ab..ef41667c8e 100644 --- a/src/cmd/asm/internal/asm/endtoend_test.go +++ b/src/cmd/asm/internal/asm/endtoend_test.go @@ -457,10 +457,14 @@ func TestLOONG64Encoder(t *testing.T) { } func TestPPC64EndToEnd(t *testing.T) { - testEndToEnd(t, "ppc64", "ppc64") - - // The assembler accepts all instructions irrespective of the GOPPC64 value. - testEndToEnd(t, "ppc64", "ppc64_p10") + defer func(old int) { buildcfg.GOPPC64 = old }(buildcfg.GOPPC64) + for _, goppc64 := range []int{8, 9, 10} { + t.Logf("GOPPC64=power%d", goppc64) + buildcfg.GOPPC64 = goppc64 + // Some pseudo-ops may assemble differently depending on GOPPC64 + testEndToEnd(t, "ppc64", "ppc64") + testEndToEnd(t, "ppc64", "ppc64_p10") + } } func TestRISCVEndToEnd(t *testing.T) { diff --git a/src/cmd/asm/internal/asm/testdata/ppc64.s b/src/cmd/asm/internal/asm/testdata/ppc64.s index 367d7b77db..72ae796018 100644 --- a/src/cmd/asm/internal/asm/testdata/ppc64.s +++ b/src/cmd/asm/internal/asm/testdata/ppc64.s @@ -20,19 +20,19 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 MOVD $65536, R6 // 64060001 MOVD $-32767, R5 // 38a08001 MOVD $-32768, R6 // 38c08000 - MOVD $1234567, R5 // 6405001260a5d687 + MOVD $1234567, R5 // 6405001260a5d687 or 0600001238a0d687 MOVW $1, R3 // 38600001 MOVW $-1, R4 // 3880ffff MOVW $65535, R5 // 6005ffff MOVW $65536, R6 // 64060001 MOVW $-32767, R5 // 38a08001 MOVW $-32768, R6 // 38c08000 - MOVW $1234567, R5 // 6405001260a5d687 + MOVW $1234567, R5 // 6405001260a5d687 or 0600001238a0d687 // Hex constant 0x80000001 - MOVW $2147483649, R5 // 6405800060a50001 - MOVD $2147483649, R5 // 6405800060a50001 + MOVW $2147483649, R5 // 6405800060a50001 or 0600800038a00001 + MOVD $2147483649, R5 // 6405800060a50001 or 0600800038a00001 // Hex constant 0xFFFFFFFF80000001 - MOVD $-2147483647, R5 // 3ca0800060a50001 + MOVD $-2147483647, R5 // 3ca0800060a50001 or 0603800038a00001 MOVD 8(R3), R4 // e8830008 MOVD (R3)(R4), R5 // 7ca4182a MOVD (R3)(R0), R5 // 7ca0182a @@ -71,8 +71,8 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 MOVHBR (R3)(R4), R5 // 7ca41e2c MOVHBR (R3)(R0), R5 // 7ca01e2c MOVHBR (R3), R5 // 7ca01e2c - MOVD $foo+4009806848(FP), R5 // 3ca1ef0138a5cc40 - MOVD $foo(SB), R5 // 3ca0000038a50000 + MOVD $foo+4009806848(FP), R5 // 3ca1ef0138a5cc40 or 0600ef0038a1cc40 + MOVD $foo(SB), R5 // 3ca0000038a50000 or 0610000038a00000 MOVDU 8(R3), R4 // e8830009 MOVDU (R3)(R4), R5 // 7ca4186a @@ -156,16 +156,21 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 ADD $1, R3, R4 // 38830001 ADD $-1, R4 // 3884ffff ADD $-1, R4, R5 // 38a4ffff - ADD $65535, R5 // 601fffff7cbf2a14 - ADD $65535, R5, R6 // 601fffff7cdf2a14 + ADD $65535, R5 // 601fffff7cbf2a14 or 0600000038a5ffff + ADD $65535, R5, R6 // 601fffff7cdf2a14 or 0600000038c5ffff ADD $65536, R6 // 3cc60001 ADD $65536, R6, R7 // 3ce60001 ADD $-32767, R5 // 38a58001 ADD $-32767, R5, R4 // 38858001 ADD $-32768, R6 // 38c68000 ADD $-32768, R6, R5 // 38a68000 - ADD $1234567, R5 // 641f001263ffd6877cbf2a14 - ADD $1234567, R5, R6 // 641f001263ffd6877cdf2a14 + + //TODO: this compiles to add r5,r6,r0. It should be addi r5,r6,0. + // this is OK since r0 == $0, but the latter is preferred. + ADD $0, R6, R5 // 7ca60214 + + ADD $1234567, R5 // 641f001263ffd6877cbf2a14 or 0600001238a5d687 + ADD $1234567, R5, R6 // 641f001263ffd6877cdf2a14 or 0600001238c5d687 ADDEX R3, R5, $3, R6 // 7cc32f54 ADDEX R3, $3, R5, R6 // 7cc32f54 ADDIS $8, R3 // 3c630008 diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go index 9469edaf4c..40258ca0b2 100644 --- a/src/cmd/internal/obj/ppc64/asm9.go +++ b/src/cmd/internal/obj/ppc64/asm9.go @@ -34,6 +34,7 @@ import ( "cmd/internal/objabi" "encoding/binary" "fmt" + "internal/buildcfg" "log" "math" "math/bits" @@ -58,6 +59,12 @@ const ( r0iszero = 1 ) +const ( + // R bit option in prefixed load/store/add D-form operations + PFX_R_ABS = 0 // Offset is absolute + PFX_R_PCREL = 1 // Offset is relative to PC, RA should be 0 +) + type Optab struct { as obj.As // Opcode a1 uint8 // p.From argument (obj.Addr). p is of type obj.Prog. @@ -108,8 +115,6 @@ var optab = []Optab{ {as: AADD, a1: C_UCON, a6: C_REG, type_: 20, size: 4}, {as: AADD, a1: C_ANDCON, a2: C_REG, a6: C_REG, type_: 22, size: 8}, {as: AADD, a1: C_ANDCON, a6: C_REG, type_: 22, size: 8}, - {as: AADD, a1: C_LCON, a2: C_REG, a6: C_REG, type_: 22, size: 12}, - {as: AADD, a1: C_LCON, a6: C_REG, type_: 22, size: 12}, {as: AADDIS, a1: C_ADDCON, a2: C_REG, a6: C_REG, type_: 20, size: 4}, {as: AADDIS, a1: C_ADDCON, a6: C_REG, type_: 20, size: 4}, {as: AADDC, a1: C_REG, a2: C_REG, a6: C_REG, type_: 2, size: 4}, @@ -211,64 +216,42 @@ var optab = []Optab{ {as: AMOVHBR, a1: C_REG, a6: C_XOREG, type_: 44, size: 4}, {as: AMOVHBR, a1: C_XOREG, a6: C_REG, type_: 45, size: 4}, - {as: AMOVB, a1: C_ADDR, a6: C_REG, type_: 75, size: 12}, - {as: AMOVB, a1: C_LOREG, a6: C_REG, type_: 36, size: 12}, {as: AMOVB, a1: C_SOREG, a6: C_REG, type_: 8, size: 8}, {as: AMOVB, a1: C_XOREG, a6: C_REG, type_: 109, size: 8}, - {as: AMOVB, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, {as: AMOVB, a1: C_REG, a6: C_SOREG, type_: 7, size: 4}, - {as: AMOVB, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, {as: AMOVB, a1: C_REG, a6: C_XOREG, type_: 108, size: 4}, {as: AMOVB, a1: C_REG, a6: C_REG, type_: 13, size: 4}, - {as: AMOVBZ, a1: C_ADDR, a6: C_REG, type_: 75, size: 8}, - {as: AMOVBZ, a1: C_LOREG, a6: C_REG, type_: 36, size: 8}, {as: AMOVBZ, a1: C_SOREG, a6: C_REG, type_: 8, size: 4}, {as: AMOVBZ, a1: C_XOREG, a6: C_REG, type_: 109, size: 4}, - {as: AMOVBZ, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, {as: AMOVBZ, a1: C_REG, a6: C_SOREG, type_: 7, size: 4}, - {as: AMOVBZ, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, {as: AMOVBZ, a1: C_REG, a6: C_XOREG, type_: 108, size: 4}, {as: AMOVBZ, a1: C_REG, a6: C_REG, type_: 13, size: 4}, {as: AMOVD, a1: C_ADDCON, a6: C_REG, type_: 3, size: 4}, {as: AMOVD, a1: C_ANDCON, a6: C_REG, type_: 3, size: 4}, {as: AMOVD, a1: C_UCON, a6: C_REG, type_: 3, size: 4}, - {as: AMOVD, a1: C_LCON, a6: C_REG, type_: 19, size: 8}, {as: AMOVD, a1: C_SACON, a6: C_REG, type_: 3, size: 4}, - {as: AMOVD, a1: C_LACON, a6: C_REG, type_: 26, size: 8}, - {as: AMOVD, a1: C_ADDR, a6: C_REG, type_: 75, size: 8}, {as: AMOVD, a1: C_SOREG, a6: C_REG, type_: 8, size: 4}, {as: AMOVD, a1: C_XOREG, a6: C_REG, type_: 109, size: 4}, {as: AMOVD, a1: C_SOREG, a6: C_SPR, type_: 107, size: 8}, - {as: AMOVD, a1: C_LOREG, a6: C_REG, type_: 36, size: 8}, - {as: AMOVD, a1: C_TLS_LE, a6: C_REG, type_: 79, size: 8}, - {as: AMOVD, a1: C_TLS_IE, a6: C_REG, type_: 80, size: 12}, {as: AMOVD, a1: C_SPR, a6: C_REG, type_: 66, size: 4}, - {as: AMOVD, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, {as: AMOVD, a1: C_REG, a6: C_SOREG, type_: 7, size: 4}, {as: AMOVD, a1: C_REG, a6: C_XOREG, type_: 108, size: 4}, {as: AMOVD, a1: C_SPR, a6: C_SOREG, type_: 106, size: 8}, - {as: AMOVD, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, {as: AMOVD, a1: C_REG, a6: C_SPR, type_: 66, size: 4}, {as: AMOVD, a1: C_REG, a6: C_REG, type_: 13, size: 4}, {as: AMOVW, a1: C_ADDCON, a6: C_REG, type_: 3, size: 4}, {as: AMOVW, a1: C_ANDCON, a6: C_REG, type_: 3, size: 4}, {as: AMOVW, a1: C_UCON, a6: C_REG, type_: 3, size: 4}, - {as: AMOVW, a1: C_LCON, a6: C_REG, type_: 19, size: 8}, {as: AMOVW, a1: C_SACON, a6: C_REG, type_: 3, size: 4}, - {as: AMOVW, a1: C_LACON, a6: C_REG, type_: 26, size: 8}, - {as: AMOVW, a1: C_ADDR, a6: C_REG, type_: 75, size: 8}, {as: AMOVW, a1: C_CREG, a6: C_REG, type_: 68, size: 4}, {as: AMOVW, a1: C_SOREG, a6: C_REG, type_: 8, size: 4}, - {as: AMOVW, a1: C_LOREG, a6: C_REG, type_: 36, size: 8}, {as: AMOVW, a1: C_XOREG, a6: C_REG, type_: 109, size: 4}, {as: AMOVW, a1: C_SPR, a6: C_REG, type_: 66, size: 4}, - {as: AMOVW, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, {as: AMOVW, a1: C_REG, a6: C_CREG, type_: 69, size: 4}, {as: AMOVW, a1: C_REG, a6: C_SOREG, type_: 7, size: 4}, - {as: AMOVW, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, {as: AMOVW, a1: C_REG, a6: C_XOREG, type_: 108, size: 4}, {as: AMOVW, a1: C_REG, a6: C_SPR, type_: 66, size: 4}, {as: AMOVW, a1: C_REG, a6: C_REG, type_: 13, size: 4}, @@ -276,14 +259,10 @@ var optab = []Optab{ {as: AFMOVD, a1: C_ADDCON, a6: C_FREG, type_: 24, size: 8}, {as: AFMOVD, a1: C_SOREG, a6: C_FREG, type_: 8, size: 4}, {as: AFMOVD, a1: C_XOREG, a6: C_FREG, type_: 109, size: 4}, - {as: AFMOVD, a1: C_LOREG, a6: C_FREG, type_: 36, size: 8}, {as: AFMOVD, a1: C_ZCON, a6: C_FREG, type_: 24, size: 4}, - {as: AFMOVD, a1: C_ADDR, a6: C_FREG, type_: 75, size: 8}, {as: AFMOVD, a1: C_FREG, a6: C_FREG, type_: 33, size: 4}, {as: AFMOVD, a1: C_FREG, a6: C_SOREG, type_: 7, size: 4}, {as: AFMOVD, a1: C_FREG, a6: C_XOREG, type_: 108, size: 4}, - {as: AFMOVD, a1: C_FREG, a6: C_LOREG, type_: 35, size: 8}, - {as: AFMOVD, a1: C_FREG, a6: C_ADDR, type_: 74, size: 8}, {as: AFMOVSX, a1: C_XOREG, a6: C_FREG, type_: 45, size: 4}, {as: AFMOVSX, a1: C_FREG, a6: C_XOREG, type_: 44, size: 4}, @@ -535,10 +514,71 @@ var optab = []Optab{ {as: obj.APCALIGN, a1: C_LCON, type_: 0, size: 0}, // align code } +// These are opcodes above which may generate different sequences depending on whether prefix opcode support +// is available +type PrefixableOptab struct { + Optab + minGOPPC64 int // Minimum GOPPC64 required to support this. + pfxsize int8 // Instruction sequence size when prefixed opcodes are used +} + +// The prefixable optab entry contains the pseudo-opcodes which generate relocations, or may generate +// a more efficient sequence of instructions if a prefixed version exists (ex. paddi instead of oris/ori/add). +// +// This table is meant to transform all sequences which might be TOC-relative into an equivalent PC-relative +// sequence. It also encompasses several transformations which do not involve relocations, those could be +// separated and applied to AIX and other non-ELF targets. Likewise, the prefixed forms do not have encoding +// restrictions on the offset, so they are also used for static binary to allow better code generation. e.x +// +// MOVD something-byte-aligned(Rx), Ry +// MOVD 3(Rx), Ry +// +// is allowed when the prefixed forms are used. +// +// This requires an ISA 3.1 compatible cpu (e.g Power10), and when linking externally an ELFv2 1.5 compliant. +var prefixableOptab = []PrefixableOptab{ + {Optab: Optab{as: AMOVD, a1: C_LCON, a6: C_REG, type_: 19, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AMOVD, a1: C_ADDR, a6: C_REG, type_: 75, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AMOVD, a1: C_TLS_LE, a6: C_REG, type_: 79, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AMOVD, a1: C_TLS_IE, a6: C_REG, type_: 80, size: 12}, minGOPPC64: 10, pfxsize: 12}, + {Optab: Optab{as: AMOVD, a1: C_LACON, a6: C_REG, type_: 26, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AMOVD, a1: C_LOREG, a6: C_REG, type_: 36, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AMOVD, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AMOVD, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, minGOPPC64: 10, pfxsize: 8}, + + {Optab: Optab{as: AMOVW, a1: C_LCON, a6: C_REG, type_: 19, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AMOVW, a1: C_LACON, a6: C_REG, type_: 26, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AMOVW, a1: C_LOREG, a6: C_REG, type_: 36, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AMOVW, a1: C_ADDR, a6: C_REG, type_: 75, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AMOVW, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AMOVW, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, minGOPPC64: 10, pfxsize: 8}, + + {Optab: Optab{as: AMOVB, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AMOVB, a1: C_LOREG, a6: C_REG, type_: 36, size: 12}, minGOPPC64: 10, pfxsize: 12}, + {Optab: Optab{as: AMOVB, a1: C_ADDR, a6: C_REG, type_: 75, size: 12}, minGOPPC64: 10, pfxsize: 12}, + {Optab: Optab{as: AMOVB, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, minGOPPC64: 10, pfxsize: 8}, + + {Optab: Optab{as: AMOVBZ, a1: C_LOREG, a6: C_REG, type_: 36, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AMOVBZ, a1: C_ADDR, a6: C_REG, type_: 75, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AMOVBZ, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AMOVBZ, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, minGOPPC64: 10, pfxsize: 8}, + + {Optab: Optab{as: AFMOVD, a1: C_LOREG, a6: C_FREG, type_: 36, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AFMOVD, a1: C_ADDR, a6: C_FREG, type_: 75, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AFMOVD, a1: C_FREG, a6: C_LOREG, type_: 35, size: 8}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AFMOVD, a1: C_FREG, a6: C_ADDR, type_: 74, size: 8}, minGOPPC64: 10, pfxsize: 8}, + + {Optab: Optab{as: AADD, a1: C_LCON, a2: C_REG, a6: C_REG, type_: 22, size: 12}, minGOPPC64: 10, pfxsize: 8}, + {Optab: Optab{as: AADD, a1: C_LCON, a6: C_REG, type_: 22, size: 12}, minGOPPC64: 10, pfxsize: 8}, +} + var oprange [ALAST & obj.AMask][]Optab var xcmp [C_NCLASS][C_NCLASS]bool +var pfxEnabled = false // ISA 3.1 prefixed instructions are supported. +var buildOpCfg = "" // Save the os/cpu/arch tuple used to configure the assembler in buildop + // padding bytes to add to align code as requested. func addpad(pc, a int64, ctxt *obj.Link, cursym *obj.LSym) int { // For 16 and 32 byte alignment, there is a tradeoff @@ -1256,12 +1296,34 @@ func opset(a, b0 obj.As) { // Build the opcode table func buildop(ctxt *obj.Link) { - if oprange[AANDN&obj.AMask] != nil { - // Already initialized; stop now. + // PC-rel relocation support is available only for targets which support + // ELFv2 1.5 (only power10/ppc64le/linux today). + pfxEnabled = buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux" && buildcfg.GOARCH == "ppc64le" + cfg := fmt.Sprintf("power%d/%s/%s", buildcfg.GOPPC64, buildcfg.GOARCH, buildcfg.GOOS) + if cfg == buildOpCfg { + // Already initialized to correct OS/cpu; stop now. // This happens in the cmd/asm tests, // each of which re-initializes the arch. return } + buildOpCfg = cfg + + // Configure the optab entries which may generate prefix opcodes. + prefixOptab := make([]Optab, 0, len(prefixableOptab)) + for _, entry := range prefixableOptab { + entry := entry + if pfxEnabled && buildcfg.GOPPC64 >= entry.minGOPPC64 { + // Enable prefix opcode generation and resize. + entry.ispfx = true + entry.size = entry.pfxsize + } + // Use the legacy assembler function if none provided. + if entry.asmout == nil { + entry.asmout = asmout + } + prefixOptab = append(prefixOptab, entry.Optab) + + } for i := 0; i < C_NCLASS; i++ { for n := 0; n < C_NCLASS; n++ { @@ -1278,7 +1340,9 @@ func buildop(ctxt *obj.Link) { } // Append the generated entries, sort, and fill out oprange. optab = append(optab, optabGen...) + optab = append(optab, prefixOptab...) sort.Slice(optab, optabLess) + for i := 0; i < len(optab); { r := optab[i].as r0 := r & obj.AMask @@ -2227,6 +2291,13 @@ func AOP_ISEL(op uint32, t uint32, a uint32, b uint32, bc uint32) uint32 { return op | (t&31)<<21 | (a&31)<<16 | (b&31)<<11 | (bc&0x1F)<<6 } +func AOP_PFX_00_8LS(r, ie uint32) uint32 { + return 1<<26 | 0<<24 | 0<<23 | (r&1)<<20 | (ie & 0x3FFFF) +} +func AOP_PFX_10_MLS(r, ie uint32) uint32 { + return 1<<26 | 2<<24 | 0<<23 | (r&1)<<20 | (ie & 0x3FFFF) +} + const ( /* each rhs is OPVCC(_, _, _, _) */ OP_ADD = 31<<26 | 266<<1 | 0<<10 | 0 @@ -2266,6 +2337,52 @@ const ( OP_EXTSWSLI = 31<<26 | 445<<2 ) +func pfxadd(rt, ra int16, r uint32, imm32 int64) (uint32, uint32) { + return AOP_PFX_10_MLS(r, uint32(imm32>>16)), AOP_IRR(14<<26, uint32(rt), uint32(ra), uint32(imm32)) +} + +func pfxload(a obj.As, reg int16, base int16, r uint32) (uint32, uint32) { + switch a { + case AMOVH: + return AOP_PFX_10_MLS(r, 0), AOP_IRR(42<<26, uint32(reg), uint32(base), 0) + case AMOVW: + return AOP_PFX_00_8LS(r, 0), AOP_IRR(41<<26, uint32(reg), uint32(base), 0) + case AMOVD: + return AOP_PFX_00_8LS(r, 0), AOP_IRR(57<<26, uint32(reg), uint32(base), 0) + case AMOVBZ, AMOVB: + return AOP_PFX_10_MLS(r, 0), AOP_IRR(34<<26, uint32(reg), uint32(base), 0) + case AMOVHZ: + return AOP_PFX_10_MLS(r, 0), AOP_IRR(40<<26, uint32(reg), uint32(base), 0) + case AMOVWZ: + return AOP_PFX_10_MLS(r, 0), AOP_IRR(32<<26, uint32(reg), uint32(base), 0) + case AFMOVS: + return AOP_PFX_10_MLS(r, 0), AOP_IRR(48<<26, uint32(reg), uint32(base), 0) + case AFMOVD: + return AOP_PFX_10_MLS(r, 0), AOP_IRR(50<<26, uint32(reg), uint32(base), 0) + } + log.Fatalf("Error no pfxload for %v\n", a) + return 0, 0 +} + +func pfxstore(a obj.As, reg int16, base int16, r uint32) (uint32, uint32) { + switch a { + case AMOVD: + return AOP_PFX_00_8LS(r, 0), AOP_IRR(61<<26, uint32(reg), uint32(base), 0) + case AMOVBZ, AMOVB: + return AOP_PFX_10_MLS(r, 0), AOP_IRR(38<<26, uint32(reg), uint32(base), 0) + case AMOVHZ, AMOVH: + return AOP_PFX_10_MLS(r, 0), AOP_IRR(44<<26, uint32(reg), uint32(base), 0) + case AMOVWZ, AMOVW: + return AOP_PFX_10_MLS(r, 0), AOP_IRR(36<<26, uint32(reg), uint32(base), 0) + case AFMOVS: + return AOP_PFX_10_MLS(r, 0), AOP_IRR(52<<26, uint32(reg), uint32(base), 0) + case AFMOVD: + return AOP_PFX_10_MLS(r, 0), AOP_IRR(54<<26, uint32(reg), uint32(base), 0) + } + log.Fatalf("Error no pfxstore for %v\n", a) + return 0, 0 +} + func oclass(a *obj.Addr) int { return int(a.Class) - 1 } @@ -2324,7 +2441,7 @@ func (c *ctxt9) opform(insn uint32) int { // Encode instructions and create relocation for accessing s+d according to the // instruction op with source or destination (as appropriate) register reg. -func (c *ctxt9) symbolAccess(s *obj.LSym, d int64, reg int16, op uint32, reuse bool) (o1, o2 uint32) { +func (c *ctxt9) symbolAccess(s *obj.LSym, d int64, reg int16, op uint32, reuse bool) (o1, o2 uint32, rel *obj.Reloc) { if c.ctxt.Headtype == objabi.Haix { // Every symbol access must be made via a TOC anchor. c.ctxt.Diag("symbolAccess called for %s", s.Name) @@ -2345,7 +2462,7 @@ func (c *ctxt9) symbolAccess(s *obj.LSym, d int64, reg int16, op uint32, reuse b o1 = AOP_IRR(OP_ADDIS, uint32(reg), base, 0) o2 = AOP_IRR(op, uint32(reg), uint32(reg), 0) } - rel := obj.Addrel(c.cursym) + rel = obj.Addrel(c.cursym) rel.Off = int32(c.pc) rel.Siz = 8 rel.Sym = s @@ -2841,8 +2958,12 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) { case 19: /* mov $lcon,r ==> cau+or */ d := c.vregoff(&p.From) - o1 = loadu32(int(p.To.Reg), d) - o2 = LOP_IRR(OP_ORI, uint32(p.To.Reg), uint32(p.To.Reg), uint32(int32(d))) + if o.ispfx { + o1, o2 = pfxadd(p.To.Reg, REG_R0, PFX_R_ABS, d) + } else { + o1 = loadu32(int(p.To.Reg), d) + o2 = LOP_IRR(OP_ORI, uint32(p.To.Reg), uint32(p.To.Reg), uint32(int32(d))) + } case 20: /* add $ucon,,r | addis $addcon,r,r */ v := c.regoff(&p.From) @@ -2883,6 +3004,10 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) { o3 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r)) } + if o.ispfx { + o1, o2 = pfxadd(int16(p.To.Reg), int16(r), PFX_R_ABS, d) + } + case 23: /* and $lcon/$addcon,r1,r2 ==> oris+ori+and/addi+and */ if p.To.Reg == REGTMP || p.Reg == REGTMP { c.ctxt.Diag("can't synthesize large constant\n%v", p) @@ -2962,11 +3087,12 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) { case 26: /* mov $lsext/auto/oreg,,r2 ==> addis+addi */ v := c.vregoff(&p.From) r := int(p.From.Reg) + var rel *obj.Reloc switch p.From.Name { case obj.NAME_EXTERN, obj.NAME_STATIC: // Load a 32 bit constant, or relocation depending on if a symbol is attached - o1, o2 = c.symbolAccess(p.From.Sym, v, p.To.Reg, OP_ADDI, true) + o1, o2, rel = c.symbolAccess(p.From.Sym, v, p.To.Reg, OP_ADDI, true) default: if r == 0 { r = c.getimpliedreg(&p.From, p) @@ -2976,6 +3102,15 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) { o2 = AOP_IRR(OP_ADDI, uint32(p.To.Reg), uint32(p.To.Reg), uint32(v)) } + if o.ispfx { + if rel == nil { + o1, o2 = pfxadd(int16(p.To.Reg), int16(r), PFX_R_ABS, v) + } else { + o1, o2 = pfxadd(int16(p.To.Reg), REG_R0, PFX_R_PCREL, 0) + rel.Type = objabi.R_ADDRPOWER_PCREL34 + } + } + case 27: /* subc ra,$simm,rd => subfic rd,ra,$simm */ v := c.regoff(p.GetFrom3()) @@ -3118,12 +3253,18 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) { r = c.getimpliedreg(&p.To, p) } // Offsets in DS form stores must be a multiple of 4 - inst := c.opstore(p.As) - if c.opform(inst) == DS_FORM && v&0x3 != 0 { - log.Fatalf("invalid offset for DS form load/store %v", p) + if o.ispfx { + o1, o2 = pfxstore(p.As, p.From.Reg, int16(r), PFX_R_ABS) + o1 |= uint32((v >> 16) & 0x3FFFF) + o2 |= uint32(v & 0xFFFF) + } else { + inst := c.opstore(p.As) + if c.opform(inst) == DS_FORM && v&0x3 != 0 { + log.Fatalf("invalid offset for DS form load/store %v", p) + } + o1 = AOP_IRR(OP_ADDIS, REGTMP, uint32(r), uint32(high16adjusted(v))) + o2 = AOP_IRR(inst, uint32(p.From.Reg), REGTMP, uint32(v)) } - o1 = AOP_IRR(OP_ADDIS, REGTMP, uint32(r), uint32(high16adjusted(v))) - o2 = AOP_IRR(inst, uint32(p.From.Reg), REGTMP, uint32(v)) case 36: /* mov b/bz/h/hz lext/lauto/lreg,r ==> lbz+extsb/lbz/lha/lhz etc */ v := c.regoff(&p.From) @@ -3132,8 +3273,15 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) { if r == 0 { r = c.getimpliedreg(&p.From, p) } - o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), uint32(r), uint32(high16adjusted(v))) - o2 = AOP_IRR(c.opload(p.As), uint32(p.To.Reg), uint32(p.To.Reg), uint32(v)) + + if o.ispfx { + o1, o2 = pfxload(p.As, p.To.Reg, int16(r), PFX_R_ABS) + o1 |= uint32((v >> 16) & 0x3FFFF) + o2 |= uint32(v & 0xFFFF) + } else { + o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), uint32(r), uint32(high16adjusted(v))) + o2 = AOP_IRR(c.opload(p.As), uint32(p.To.Reg), uint32(p.To.Reg), uint32(v)) + } // Sign extend MOVB if needed o3 = LOP_RRR(OP_EXTSB, uint32(p.To.Reg), uint32(p.To.Reg), 0) @@ -3484,23 +3632,28 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) { /* relocation operations */ case 74: + var rel *obj.Reloc v := c.vregoff(&p.To) // Offsets in DS form stores must be a multiple of 4 inst := c.opstore(p.As) - if c.opform(inst) == DS_FORM && v&0x3 != 0 { + + // Can't reuse base for store instructions. + o1, o2, rel = c.symbolAccess(p.To.Sym, v, p.From.Reg, inst, false) + + // Rewrite as a prefixed store if supported. + if o.ispfx { + o1, o2 = pfxstore(p.As, p.From.Reg, REG_R0, PFX_R_PCREL) + rel.Type = objabi.R_ADDRPOWER_PCREL34 + } else if c.opform(inst) == DS_FORM && v&0x3 != 0 { log.Fatalf("invalid offset for DS form load/store %v", p) } - // Can't reuse base for store instructions. - o1, o2 = c.symbolAccess(p.To.Sym, v, p.From.Reg, inst, false) case 75: // 32 bit offset symbol loads (got/toc/addr) + var rel *obj.Reloc v := p.From.Offset // Offsets in DS form loads must be a multiple of 4 inst := c.opload(p.As) - if c.opform(inst) == DS_FORM && v&0x3 != 0 { - log.Fatalf("invalid offset for DS form load/store %v", p) - } switch p.From.Name { case obj.NAME_GOTREF, obj.NAME_TOCREF: if v != 0 { @@ -3508,7 +3661,7 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) { } o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R2, 0) o2 = AOP_IRR(inst, uint32(p.To.Reg), uint32(p.To.Reg), 0) - rel := obj.Addrel(c.cursym) + rel = obj.Addrel(c.cursym) rel.Off = int32(c.pc) rel.Siz = 8 rel.Sym = p.From.Sym @@ -3521,7 +3674,28 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) { default: reuseBaseReg := p.As != AFMOVD && p.As != AFMOVS // Reuse To.Reg as base register if not FP move. - o1, o2 = c.symbolAccess(p.From.Sym, v, p.To.Reg, inst, reuseBaseReg) + o1, o2, rel = c.symbolAccess(p.From.Sym, v, p.To.Reg, inst, reuseBaseReg) + } + + // Convert to prefixed forms if supported. + if o.ispfx { + switch rel.Type { + case objabi.R_ADDRPOWER, objabi.R_ADDRPOWER_DS, + objabi.R_ADDRPOWER_TOCREL, objabi.R_ADDRPOWER_TOCREL_DS: + o1, o2 = pfxload(p.As, p.To.Reg, REG_R0, PFX_R_PCREL) + rel.Type = objabi.R_ADDRPOWER_PCREL34 + case objabi.R_POWER_TLS_IE: + o1, o2 = pfxload(p.As, p.To.Reg, REG_R0, PFX_R_PCREL) + rel.Type = objabi.R_POWER_TLS_IE_PCREL34 + case objabi.R_ADDRPOWER_GOT: + o1, o2 = pfxload(p.As, p.To.Reg, REG_R0, PFX_R_PCREL) + rel.Type = objabi.R_ADDRPOWER_GOT_PCREL34 + default: + // We've failed to convert a TOC-relative relocation to a PC-relative one. + log.Fatalf("Unable convert TOC-relative relocation %v to PC-relative", rel.Type) + } + } else if c.opform(inst) == DS_FORM && v&0x3 != 0 { + log.Fatalf("invalid offset for DS form load/store %v", p) } o3 = LOP_RRR(OP_EXTSB, uint32(p.To.Reg), uint32(p.To.Reg), 0) @@ -3530,26 +3704,36 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) { if p.From.Offset != 0 { c.ctxt.Diag("invalid offset against tls var %v", p) } - o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R13, 0) - o2 = AOP_IRR(OP_ADDI, uint32(p.To.Reg), uint32(p.To.Reg), 0) rel := obj.Addrel(c.cursym) rel.Off = int32(c.pc) rel.Siz = 8 rel.Sym = p.From.Sym - rel.Type = objabi.R_POWER_TLS_LE + if !o.ispfx { + o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R13, 0) + o2 = AOP_IRR(OP_ADDI, uint32(p.To.Reg), uint32(p.To.Reg), 0) + rel.Type = objabi.R_POWER_TLS_LE + } else { + o1, o2 = pfxadd(p.To.Reg, REG_R13, PFX_R_ABS, 0) + rel.Type = objabi.R_POWER_TLS_LE_TPREL34 + } case 80: if p.From.Offset != 0 { c.ctxt.Diag("invalid offset against tls var %v", p) } - o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R2, 0) - o2 = AOP_IRR(c.opload(AMOVD), uint32(p.To.Reg), uint32(p.To.Reg), 0) - o3 = AOP_RRR(OP_ADD, uint32(p.To.Reg), uint32(p.To.Reg), REG_R13) rel := obj.Addrel(c.cursym) rel.Off = int32(c.pc) rel.Siz = 8 rel.Sym = p.From.Sym rel.Type = objabi.R_POWER_TLS_IE + if !o.ispfx { + o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R2, 0) + o2 = AOP_IRR(c.opload(AMOVD), uint32(p.To.Reg), uint32(p.To.Reg), 0) + } else { + o1, o2 = pfxload(p.As, p.To.Reg, REG_R0, PFX_R_PCREL) + rel.Type = objabi.R_POWER_TLS_IE_PCREL34 + } + o3 = AOP_RRR(OP_ADD, uint32(p.To.Reg), uint32(p.To.Reg), REG_R13) rel = obj.Addrel(c.cursym) rel.Off = int32(c.pc) + 8 rel.Siz = 4 diff --git a/src/cmd/internal/obj/ppc64/doc.go b/src/cmd/internal/obj/ppc64/doc.go index 28340e425d..835182bcc6 100644 --- a/src/cmd/internal/obj/ppc64/doc.go +++ b/src/cmd/internal/obj/ppc64/doc.go @@ -250,5 +250,34 @@ Register names: CRnGT represents CR bit 1 of CR field n. (0-7) CRnEQ represents CR bit 2 of CR field n. (0-7) CRnSO represents CR bit 3 of CR field n. (0-7) + +# GOPPC64 >= power10 and its effects on Go asm + +When GOPPC64=power10 is used to compile a Go program for ppc64le/linux, MOV*, FMOV*, and ADD +opcodes which would require 2 or more machine instructions to emulate a 32 bit constant, or +symbolic reference are implemented using prefixed instructions. + +A user who wishes granular control over the generated machine code is advised to use Go asm +opcodes which explicitly translate to one PPC64 machine instruction. Most common opcodes +are supported. + +Some examples of how pseudo-op assembly changes with GOPPC64: + + Go asm GOPPC64 <= power9 GOPPC64 >= power10 + MOVD mypackage·foo(SB), R3 addis r2, r3, ... pld r3, ... + ld r3, r3, ... + + MOVD 131072(R3), R4 addis r31, r4, 2 pld r4, 131072(r3) + ld r4, 0(R3) + + ADD $131073, R3 lis r31, 2 paddi r3, r3, 131073 + addi r31, 1 + add r3,r31,r3 + + MOVD $131073, R3 lis r3, 2 pli r3, 131073 + addi r3, 1 + + MOVD $mypackage·foo(SB), R3 addis r2, r3, ... pla r3, ... + addi r3, r3, ... */ package ppc64 -- 2.50.0