cmd/internal/obj: use prefix insn in MOV* opcodes for GOPPC64=power10

author Paul E. Murphy <murp@ibm.com>

Tue, 9 Mar 2021 22:55:22 +0000 (16:55 -0600)

committer Paul Murphy <murp@ibm.com>

Tue, 28 Feb 2023 15:06:46 +0000 (15:06 +0000)
author Paul E. Murphy <murp@ibm.com>
Tue, 9 Mar 2021 22:55:22 +0000 (16:55 -0600)
committer Paul Murphy <murp@ibm.com>
Tue, 28 Feb 2023 15:06:46 +0000 (15:06 +0000)
diff --git a/src/cmd/asm/internal/asm/endtoend_test.go b/src/cmd/asm/internal/asm/endtoend_test.go

index 3928e364ab8436af3aca92aa4df9ee1356e43f24..ef41667c8e871cba9604d2de353eff7c26073fae 100644 (file)
--- a/src/cmd/asm/internal/asm/endtoend_test.go
+++ b/src/cmd/asm/internal/asm/endtoend_test.go
@@ -457,10 +457,14 @@ func TestLOONG64Encoder(t *testing.T) {
  }
  
  func TestPPC64EndToEnd(t *testing.T) {
-       testEndToEnd(t, "ppc64", "ppc64")
-
-       // The assembler accepts all instructions irrespective of the GOPPC64 value.
-       testEndToEnd(t, "ppc64", "ppc64_p10")
+       defer func(old int) { buildcfg.GOPPC64 = old }(buildcfg.GOPPC64)
+       for _, goppc64 := range []int{8, 9, 10} {
+               t.Logf("GOPPC64=power%d", goppc64)
+               buildcfg.GOPPC64 = goppc64
+               // Some pseudo-ops may assemble differently depending on GOPPC64
+               testEndToEnd(t, "ppc64", "ppc64")
+               testEndToEnd(t, "ppc64", "ppc64_p10")
+       }
  }
  
  func TestRISCVEndToEnd(t *testing.T) {
diff --git a/src/cmd/asm/internal/asm/testdata/ppc64.s b/src/cmd/asm/internal/asm/testdata/ppc64.s

index 367d7b77db0ba58d07af3fb1dd851dd1757ae8c8..72ae796018f3378dd7a99e83424c112026fa4eed 100644 (file)
--- a/src/cmd/asm/internal/asm/testdata/ppc64.s
+++ b/src/cmd/asm/internal/asm/testdata/ppc64.s
@@ -20,19 +20,19 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
         MOVD $65536, R6                 // 64060001
         MOVD $-32767, R5                // 38a08001
         MOVD $-32768, R6                // 38c08000
-       MOVD $1234567, R5               // 6405001260a5d687
+       MOVD $1234567, R5               // 6405001260a5d687 or 0600001238a0d687
         MOVW $1, R3                     // 38600001
         MOVW $-1, R4                    // 3880ffff
         MOVW $65535, R5                 // 6005ffff
         MOVW $65536, R6                 // 64060001
         MOVW $-32767, R5                // 38a08001
         MOVW $-32768, R6                // 38c08000
-       MOVW $1234567, R5               // 6405001260a5d687
+       MOVW $1234567, R5               // 6405001260a5d687 or 0600001238a0d687
         // Hex constant 0x80000001
-       MOVW $2147483649, R5            // 6405800060a50001
-       MOVD $2147483649, R5            // 6405800060a50001
+       MOVW $2147483649, R5            // 6405800060a50001 or 0600800038a00001
+       MOVD $2147483649, R5            // 6405800060a50001 or 0600800038a00001
         // Hex constant 0xFFFFFFFF80000001
-       MOVD $-2147483647, R5           // 3ca0800060a50001
+       MOVD $-2147483647, R5           // 3ca0800060a50001 or 0603800038a00001
         MOVD 8(R3), R4                  // e8830008
         MOVD (R3)(R4), R5               // 7ca4182a
         MOVD (R3)(R0), R5               // 7ca0182a
@@ -71,8 +71,8 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
         MOVHBR (R3)(R4), R5             // 7ca41e2c
         MOVHBR (R3)(R0), R5             // 7ca01e2c
         MOVHBR (R3), R5                 // 7ca01e2c
-       MOVD $foo+4009806848(FP), R5    // 3ca1ef0138a5cc40
-       MOVD $foo(SB), R5               // 3ca0000038a50000
+       MOVD $foo+4009806848(FP), R5    // 3ca1ef0138a5cc40 or 0600ef0038a1cc40
+       MOVD $foo(SB), R5               // 3ca0000038a50000 or 0610000038a00000
  
         MOVDU 8(R3), R4                 // e8830009
         MOVDU (R3)(R4), R5              // 7ca4186a
@@ -156,16 +156,21 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
         ADD $1, R3, R4                  // 38830001
         ADD $-1, R4                     // 3884ffff
         ADD $-1, R4, R5                 // 38a4ffff
-       ADD $65535, R5                  // 601fffff7cbf2a14
-       ADD $65535, R5, R6              // 601fffff7cdf2a14
+       ADD $65535, R5                  // 601fffff7cbf2a14 or 0600000038a5ffff
+       ADD $65535, R5, R6              // 601fffff7cdf2a14 or 0600000038c5ffff
         ADD $65536, R6                  // 3cc60001
         ADD $65536, R6, R7              // 3ce60001
         ADD $-32767, R5                 // 38a58001
         ADD $-32767, R5, R4             // 38858001
         ADD $-32768, R6                 // 38c68000
         ADD $-32768, R6, R5             // 38a68000
-       ADD $1234567, R5                // 641f001263ffd6877cbf2a14
-       ADD $1234567, R5, R6            // 641f001263ffd6877cdf2a14
+
+       //TODO: this compiles to add r5,r6,r0. It should be addi r5,r6,0.
+       //      this is OK since r0 == $0, but the latter is preferred.
+       ADD $0, R6, R5                  // 7ca60214
+
+       ADD $1234567, R5                // 641f001263ffd6877cbf2a14 or 0600001238a5d687
+       ADD $1234567, R5, R6            // 641f001263ffd6877cdf2a14 or 0600001238c5d687
         ADDEX R3, R5, $3, R6            // 7cc32f54
         ADDEX R3, $3, R5, R6            // 7cc32f54
         ADDIS $8, R3                    // 3c630008
diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go

index 9469edaf4c8052e38d7aa285049cccefeabe41c7..40258ca0b2b86afd80be49d59b923e2c46e75c0b 100644 (file)
--- a/src/cmd/internal/obj/ppc64/asm9.go
+++ b/src/cmd/internal/obj/ppc64/asm9.go
@@ -34,6 +34,7 @@ import (
         "cmd/internal/objabi"
         "encoding/binary"
         "fmt"
+       "internal/buildcfg"
         "log"
         "math"
         "math/bits"
@@ -58,6 +59,12 @@ const (
         r0iszero = 1
  )
  
+const (
+       // R bit option in prefixed load/store/add D-form operations
+       PFX_R_ABS   = 0 // Offset is absolute
+       PFX_R_PCREL = 1 // Offset is relative to PC, RA should be 0
+)
+
  type Optab struct {
         as    obj.As // Opcode
         a1    uint8  // p.From argument (obj.Addr). p is of type obj.Prog.
@@ -108,8 +115,6 @@ var optab = []Optab{
         {as: AADD, a1: C_UCON, a6: C_REG, type_: 20, size: 4},
         {as: AADD, a1: C_ANDCON, a2: C_REG, a6: C_REG, type_: 22, size: 8},
         {as: AADD, a1: C_ANDCON, a6: C_REG, type_: 22, size: 8},
-       {as: AADD, a1: C_LCON, a2: C_REG, a6: C_REG, type_: 22, size: 12},
-       {as: AADD, a1: C_LCON, a6: C_REG, type_: 22, size: 12},
         {as: AADDIS, a1: C_ADDCON, a2: C_REG, a6: C_REG, type_: 20, size: 4},
         {as: AADDIS, a1: C_ADDCON, a6: C_REG, type_: 20, size: 4},
         {as: AADDC, a1: C_REG, a2: C_REG, a6: C_REG, type_: 2, size: 4},
@@ -211,64 +216,42 @@ var optab = []Optab{
         {as: AMOVHBR, a1: C_REG, a6: C_XOREG, type_: 44, size: 4},
         {as: AMOVHBR, a1: C_XOREG, a6: C_REG, type_: 45, size: 4},
  
-       {as: AMOVB, a1: C_ADDR, a6: C_REG, type_: 75, size: 12},
-       {as: AMOVB, a1: C_LOREG, a6: C_REG, type_: 36, size: 12},
         {as: AMOVB, a1: C_SOREG, a6: C_REG, type_: 8, size: 8},
         {as: AMOVB, a1: C_XOREG, a6: C_REG, type_: 109, size: 8},
-       {as: AMOVB, a1: C_REG, a6: C_ADDR, type_: 74, size: 8},
         {as: AMOVB, a1: C_REG, a6: C_SOREG, type_: 7, size: 4},
-       {as: AMOVB, a1: C_REG, a6: C_LOREG, type_: 35, size: 8},
         {as: AMOVB, a1: C_REG, a6: C_XOREG, type_: 108, size: 4},
         {as: AMOVB, a1: C_REG, a6: C_REG, type_: 13, size: 4},
  
-       {as: AMOVBZ, a1: C_ADDR, a6: C_REG, type_: 75, size: 8},
-       {as: AMOVBZ, a1: C_LOREG, a6: C_REG, type_: 36, size: 8},
         {as: AMOVBZ, a1: C_SOREG, a6: C_REG, type_: 8, size: 4},
         {as: AMOVBZ, a1: C_XOREG, a6: C_REG, type_: 109, size: 4},
-       {as: AMOVBZ, a1: C_REG, a6: C_ADDR, type_: 74, size: 8},
         {as: AMOVBZ, a1: C_REG, a6: C_SOREG, type_: 7, size: 4},
-       {as: AMOVBZ, a1: C_REG, a6: C_LOREG, type_: 35, size: 8},
         {as: AMOVBZ, a1: C_REG, a6: C_XOREG, type_: 108, size: 4},
         {as: AMOVBZ, a1: C_REG, a6: C_REG, type_: 13, size: 4},
  
         {as: AMOVD, a1: C_ADDCON, a6: C_REG, type_: 3, size: 4},
         {as: AMOVD, a1: C_ANDCON, a6: C_REG, type_: 3, size: 4},
         {as: AMOVD, a1: C_UCON, a6: C_REG, type_: 3, size: 4},
-       {as: AMOVD, a1: C_LCON, a6: C_REG, type_: 19, size: 8},
         {as: AMOVD, a1: C_SACON, a6: C_REG, type_: 3, size: 4},
-       {as: AMOVD, a1: C_LACON, a6: C_REG, type_: 26, size: 8},
-       {as: AMOVD, a1: C_ADDR, a6: C_REG, type_: 75, size: 8},
         {as: AMOVD, a1: C_SOREG, a6: C_REG, type_: 8, size: 4},
         {as: AMOVD, a1: C_XOREG, a6: C_REG, type_: 109, size: 4},
         {as: AMOVD, a1: C_SOREG, a6: C_SPR, type_: 107, size: 8},
-       {as: AMOVD, a1: C_LOREG, a6: C_REG, type_: 36, size: 8},
-       {as: AMOVD, a1: C_TLS_LE, a6: C_REG, type_: 79, size: 8},
-       {as: AMOVD, a1: C_TLS_IE, a6: C_REG, type_: 80, size: 12},
         {as: AMOVD, a1: C_SPR, a6: C_REG, type_: 66, size: 4},
-       {as: AMOVD, a1: C_REG, a6: C_ADDR, type_: 74, size: 8},
         {as: AMOVD, a1: C_REG, a6: C_SOREG, type_: 7, size: 4},
         {as: AMOVD, a1: C_REG, a6: C_XOREG, type_: 108, size: 4},
         {as: AMOVD, a1: C_SPR, a6: C_SOREG, type_: 106, size: 8},
-       {as: AMOVD, a1: C_REG, a6: C_LOREG, type_: 35, size: 8},
         {as: AMOVD, a1: C_REG, a6: C_SPR, type_: 66, size: 4},
         {as: AMOVD, a1: C_REG, a6: C_REG, type_: 13, size: 4},
  
         {as: AMOVW, a1: C_ADDCON, a6: C_REG, type_: 3, size: 4},
         {as: AMOVW, a1: C_ANDCON, a6: C_REG, type_: 3, size: 4},
         {as: AMOVW, a1: C_UCON, a6: C_REG, type_: 3, size: 4},
-       {as: AMOVW, a1: C_LCON, a6: C_REG, type_: 19, size: 8},
         {as: AMOVW, a1: C_SACON, a6: C_REG, type_: 3, size: 4},
-       {as: AMOVW, a1: C_LACON, a6: C_REG, type_: 26, size: 8},
-       {as: AMOVW, a1: C_ADDR, a6: C_REG, type_: 75, size: 8},
         {as: AMOVW, a1: C_CREG, a6: C_REG, type_: 68, size: 4},
         {as: AMOVW, a1: C_SOREG, a6: C_REG, type_: 8, size: 4},
-       {as: AMOVW, a1: C_LOREG, a6: C_REG, type_: 36, size: 8},
         {as: AMOVW, a1: C_XOREG, a6: C_REG, type_: 109, size: 4},
         {as: AMOVW, a1: C_SPR, a6: C_REG, type_: 66, size: 4},
-       {as: AMOVW, a1: C_REG, a6: C_ADDR, type_: 74, size: 8},
         {as: AMOVW, a1: C_REG, a6: C_CREG, type_: 69, size: 4},
         {as: AMOVW, a1: C_REG, a6: C_SOREG, type_: 7, size: 4},
-       {as: AMOVW, a1: C_REG, a6: C_LOREG, type_: 35, size: 8},
         {as: AMOVW, a1: C_REG, a6: C_XOREG, type_: 108, size: 4},
         {as: AMOVW, a1: C_REG, a6: C_SPR, type_: 66, size: 4},
         {as: AMOVW, a1: C_REG, a6: C_REG, type_: 13, size: 4},
@@ -276,14 +259,10 @@ var optab = []Optab{
         {as: AFMOVD, a1: C_ADDCON, a6: C_FREG, type_: 24, size: 8},
         {as: AFMOVD, a1: C_SOREG, a6: C_FREG, type_: 8, size: 4},
         {as: AFMOVD, a1: C_XOREG, a6: C_FREG, type_: 109, size: 4},
-       {as: AFMOVD, a1: C_LOREG, a6: C_FREG, type_: 36, size: 8},
         {as: AFMOVD, a1: C_ZCON, a6: C_FREG, type_: 24, size: 4},
-       {as: AFMOVD, a1: C_ADDR, a6: C_FREG, type_: 75, size: 8},
         {as: AFMOVD, a1: C_FREG, a6: C_FREG, type_: 33, size: 4},
         {as: AFMOVD, a1: C_FREG, a6: C_SOREG, type_: 7, size: 4},
         {as: AFMOVD, a1: C_FREG, a6: C_XOREG, type_: 108, size: 4},
-       {as: AFMOVD, a1: C_FREG, a6: C_LOREG, type_: 35, size: 8},
-       {as: AFMOVD, a1: C_FREG, a6: C_ADDR, type_: 74, size: 8},
  
         {as: AFMOVSX, a1: C_XOREG, a6: C_FREG, type_: 45, size: 4},
         {as: AFMOVSX, a1: C_FREG, a6: C_XOREG, type_: 44, size: 4},
@@ -535,10 +514,71 @@ var optab = []Optab{
         {as: obj.APCALIGN, a1: C_LCON, type_: 0, size: 0},   // align code
  }
  
+// These are opcodes above which may generate different sequences depending on whether prefix opcode support
+// is available
+type PrefixableOptab struct {
+       Optab
+       minGOPPC64 int  // Minimum GOPPC64 required to support this.
+       pfxsize    int8 // Instruction sequence size when prefixed opcodes are used
+}
+
+// The prefixable optab entry contains the pseudo-opcodes which generate relocations, or may generate
+// a more efficient sequence of instructions if a prefixed version exists (ex. paddi instead of oris/ori/add).
+//
+// This table is meant to transform all sequences which might be TOC-relative into an equivalent PC-relative
+// sequence. It also encompasses several transformations which do not involve relocations, those could be
+// separated and applied to AIX and other non-ELF targets. Likewise, the prefixed forms do not have encoding
+// restrictions on the offset, so they are also used for static binary to allow better code generation. e.x
+//
+//     MOVD something-byte-aligned(Rx), Ry
+//     MOVD 3(Rx), Ry
+//
+// is allowed when the prefixed forms are used.
+//
+// This requires an ISA 3.1 compatible cpu (e.g Power10), and when linking externally an ELFv2 1.5 compliant.
+var prefixableOptab = []PrefixableOptab{
+       {Optab: Optab{as: AMOVD, a1: C_LCON, a6: C_REG, type_: 19, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AMOVD, a1: C_ADDR, a6: C_REG, type_: 75, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AMOVD, a1: C_TLS_LE, a6: C_REG, type_: 79, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AMOVD, a1: C_TLS_IE, a6: C_REG, type_: 80, size: 12}, minGOPPC64: 10, pfxsize: 12},
+       {Optab: Optab{as: AMOVD, a1: C_LACON, a6: C_REG, type_: 26, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AMOVD, a1: C_LOREG, a6: C_REG, type_: 36, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AMOVD, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AMOVD, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, minGOPPC64: 10, pfxsize: 8},
+
+       {Optab: Optab{as: AMOVW, a1: C_LCON, a6: C_REG, type_: 19, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AMOVW, a1: C_LACON, a6: C_REG, type_: 26, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AMOVW, a1: C_LOREG, a6: C_REG, type_: 36, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AMOVW, a1: C_ADDR, a6: C_REG, type_: 75, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AMOVW, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AMOVW, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, minGOPPC64: 10, pfxsize: 8},
+
+       {Optab: Optab{as: AMOVB, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AMOVB, a1: C_LOREG, a6: C_REG, type_: 36, size: 12}, minGOPPC64: 10, pfxsize: 12},
+       {Optab: Optab{as: AMOVB, a1: C_ADDR, a6: C_REG, type_: 75, size: 12}, minGOPPC64: 10, pfxsize: 12},
+       {Optab: Optab{as: AMOVB, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, minGOPPC64: 10, pfxsize: 8},
+
+       {Optab: Optab{as: AMOVBZ, a1: C_LOREG, a6: C_REG, type_: 36, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AMOVBZ, a1: C_ADDR, a6: C_REG, type_: 75, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AMOVBZ, a1: C_REG, a6: C_LOREG, type_: 35, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AMOVBZ, a1: C_REG, a6: C_ADDR, type_: 74, size: 8}, minGOPPC64: 10, pfxsize: 8},
+
+       {Optab: Optab{as: AFMOVD, a1: C_LOREG, a6: C_FREG, type_: 36, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AFMOVD, a1: C_ADDR, a6: C_FREG, type_: 75, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AFMOVD, a1: C_FREG, a6: C_LOREG, type_: 35, size: 8}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AFMOVD, a1: C_FREG, a6: C_ADDR, type_: 74, size: 8}, minGOPPC64: 10, pfxsize: 8},
+
+       {Optab: Optab{as: AADD, a1: C_LCON, a2: C_REG, a6: C_REG, type_: 22, size: 12}, minGOPPC64: 10, pfxsize: 8},
+       {Optab: Optab{as: AADD, a1: C_LCON, a6: C_REG, type_: 22, size: 12}, minGOPPC64: 10, pfxsize: 8},
+}
+
  var oprange [ALAST & obj.AMask][]Optab
  
  var xcmp [C_NCLASS][C_NCLASS]bool
  
+var pfxEnabled = false // ISA 3.1 prefixed instructions are supported.
+var buildOpCfg = ""    // Save the os/cpu/arch tuple used to configure the assembler in buildop
+
  // padding bytes to add to align code as requested.
  func addpad(pc, a int64, ctxt *obj.Link, cursym *obj.LSym) int {
         // For 16 and 32 byte alignment, there is a tradeoff
@@ -1256,12 +1296,34 @@ func opset(a, b0 obj.As) {
  
  // Build the opcode table
  func buildop(ctxt *obj.Link) {
-       if oprange[AANDN&obj.AMask] != nil {
-               // Already initialized; stop now.
+       // PC-rel relocation support is available only for targets which support
+       // ELFv2 1.5 (only power10/ppc64le/linux today).
+       pfxEnabled = buildcfg.GOPPC64 >= 10 && buildcfg.GOOS == "linux" && buildcfg.GOARCH == "ppc64le"
+       cfg := fmt.Sprintf("power%d/%s/%s", buildcfg.GOPPC64, buildcfg.GOARCH, buildcfg.GOOS)
+       if cfg == buildOpCfg {
+               // Already initialized to correct OS/cpu; stop now.
                 // This happens in the cmd/asm tests,
                 // each of which re-initializes the arch.
                 return
         }
+       buildOpCfg = cfg
+
+       // Configure the optab entries which may generate prefix opcodes.
+       prefixOptab := make([]Optab, 0, len(prefixableOptab))
+       for _, entry := range prefixableOptab {
+               entry := entry
+               if pfxEnabled && buildcfg.GOPPC64 >= entry.minGOPPC64 {
+                       // Enable prefix opcode generation and resize.
+                       entry.ispfx = true
+                       entry.size = entry.pfxsize
+               }
+               // Use the legacy assembler function if none provided.
+               if entry.asmout == nil {
+                       entry.asmout = asmout
+               }
+               prefixOptab = append(prefixOptab, entry.Optab)
+
+       }
  
         for i := 0; i < C_NCLASS; i++ {
                 for n := 0; n < C_NCLASS; n++ {
@@ -1278,7 +1340,9 @@ func buildop(ctxt *obj.Link) {
         }
         // Append the generated entries, sort, and fill out oprange.
         optab = append(optab, optabGen...)
+       optab = append(optab, prefixOptab...)
         sort.Slice(optab, optabLess)
+
         for i := 0; i < len(optab); {
                 r := optab[i].as
                 r0 := r & obj.AMask
@@ -2227,6 +2291,13 @@ func AOP_ISEL(op uint32, t uint32, a uint32, b uint32, bc uint32) uint32 {
         return op | (t&31)<<21 | (a&31)<<16 | (b&31)<<11 | (bc&0x1F)<<6
  }
  
+func AOP_PFX_00_8LS(r, ie uint32) uint32 {
+       return 1<<26 | 0<<24 | 0<<23 | (r&1)<<20 | (ie & 0x3FFFF)
+}
+func AOP_PFX_10_MLS(r, ie uint32) uint32 {
+       return 1<<26 | 2<<24 | 0<<23 | (r&1)<<20 | (ie & 0x3FFFF)
+}
+
  const (
         /* each rhs is OPVCC(_, _, _, _) */
         OP_ADD      = 31<<26 | 266<<1 | 0<<10 | 0
@@ -2266,6 +2337,52 @@ const (
         OP_EXTSWSLI = 31<<26 | 445<<2
  )
  
+func pfxadd(rt, ra int16, r uint32, imm32 int64) (uint32, uint32) {
+       return AOP_PFX_10_MLS(r, uint32(imm32>>16)), AOP_IRR(14<<26, uint32(rt), uint32(ra), uint32(imm32))
+}
+
+func pfxload(a obj.As, reg int16, base int16, r uint32) (uint32, uint32) {
+       switch a {
+       case AMOVH:
+               return AOP_PFX_10_MLS(r, 0), AOP_IRR(42<<26, uint32(reg), uint32(base), 0)
+       case AMOVW:
+               return AOP_PFX_00_8LS(r, 0), AOP_IRR(41<<26, uint32(reg), uint32(base), 0)
+       case AMOVD:
+               return AOP_PFX_00_8LS(r, 0), AOP_IRR(57<<26, uint32(reg), uint32(base), 0)
+       case AMOVBZ, AMOVB:
+               return AOP_PFX_10_MLS(r, 0), AOP_IRR(34<<26, uint32(reg), uint32(base), 0)
+       case AMOVHZ:
+               return AOP_PFX_10_MLS(r, 0), AOP_IRR(40<<26, uint32(reg), uint32(base), 0)
+       case AMOVWZ:
+               return AOP_PFX_10_MLS(r, 0), AOP_IRR(32<<26, uint32(reg), uint32(base), 0)
+       case AFMOVS:
+               return AOP_PFX_10_MLS(r, 0), AOP_IRR(48<<26, uint32(reg), uint32(base), 0)
+       case AFMOVD:
+               return AOP_PFX_10_MLS(r, 0), AOP_IRR(50<<26, uint32(reg), uint32(base), 0)
+       }
+       log.Fatalf("Error no pfxload for %v\n", a)
+       return 0, 0
+}
+
+func pfxstore(a obj.As, reg int16, base int16, r uint32) (uint32, uint32) {
+       switch a {
+       case AMOVD:
+               return AOP_PFX_00_8LS(r, 0), AOP_IRR(61<<26, uint32(reg), uint32(base), 0)
+       case AMOVBZ, AMOVB:
+               return AOP_PFX_10_MLS(r, 0), AOP_IRR(38<<26, uint32(reg), uint32(base), 0)
+       case AMOVHZ, AMOVH:
+               return AOP_PFX_10_MLS(r, 0), AOP_IRR(44<<26, uint32(reg), uint32(base), 0)
+       case AMOVWZ, AMOVW:
+               return AOP_PFX_10_MLS(r, 0), AOP_IRR(36<<26, uint32(reg), uint32(base), 0)
+       case AFMOVS:
+               return AOP_PFX_10_MLS(r, 0), AOP_IRR(52<<26, uint32(reg), uint32(base), 0)
+       case AFMOVD:
+               return AOP_PFX_10_MLS(r, 0), AOP_IRR(54<<26, uint32(reg), uint32(base), 0)
+       }
+       log.Fatalf("Error no pfxstore for %v\n", a)
+       return 0, 0
+}
+
  func oclass(a *obj.Addr) int {
         return int(a.Class) - 1
  }
@@ -2324,7 +2441,7 @@ func (c *ctxt9) opform(insn uint32) int {
  
  // Encode instructions and create relocation for accessing s+d according to the
  // instruction op with source or destination (as appropriate) register reg.
-func (c *ctxt9) symbolAccess(s *obj.LSym, d int64, reg int16, op uint32, reuse bool) (o1, o2 uint32) {
+func (c *ctxt9) symbolAccess(s *obj.LSym, d int64, reg int16, op uint32, reuse bool) (o1, o2 uint32, rel *obj.Reloc) {
         if c.ctxt.Headtype == objabi.Haix {
                 // Every symbol access must be made via a TOC anchor.
                 c.ctxt.Diag("symbolAccess called for %s", s.Name)
@@ -2345,7 +2462,7 @@ func (c *ctxt9) symbolAccess(s *obj.LSym, d int64, reg int16, op uint32, reuse b
                 o1 = AOP_IRR(OP_ADDIS, uint32(reg), base, 0)
                 o2 = AOP_IRR(op, uint32(reg), uint32(reg), 0)
         }
-       rel := obj.Addrel(c.cursym)
+       rel = obj.Addrel(c.cursym)
         rel.Off = int32(c.pc)
         rel.Siz = 8
         rel.Sym = s
@@ -2841,8 +2958,12 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) {
  
         case 19: /* mov $lcon,r ==> cau+or */
                 d := c.vregoff(&p.From)
-               o1 = loadu32(int(p.To.Reg), d)
-               o2 = LOP_IRR(OP_ORI, uint32(p.To.Reg), uint32(p.To.Reg), uint32(int32(d)))
+               if o.ispfx {
+                       o1, o2 = pfxadd(p.To.Reg, REG_R0, PFX_R_ABS, d)
+               } else {
+                       o1 = loadu32(int(p.To.Reg), d)
+                       o2 = LOP_IRR(OP_ORI, uint32(p.To.Reg), uint32(p.To.Reg), uint32(int32(d)))
+               }
  
         case 20: /* add $ucon,,r | addis $addcon,r,r */
                 v := c.regoff(&p.From)
@@ -2883,6 +3004,10 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) {
                         o3 = AOP_RRR(c.oprrr(p.As), uint32(p.To.Reg), REGTMP, uint32(r))
                 }
  
+               if o.ispfx {
+                       o1, o2 = pfxadd(int16(p.To.Reg), int16(r), PFX_R_ABS, d)
+               }
+
         case 23: /* and $lcon/$addcon,r1,r2 ==> oris+ori+and/addi+and */
                 if p.To.Reg == REGTMP || p.Reg == REGTMP {
                         c.ctxt.Diag("can't synthesize large constant\n%v", p)
@@ -2962,11 +3087,12 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) {
         case 26: /* mov $lsext/auto/oreg,,r2 ==> addis+addi */
                 v := c.vregoff(&p.From)
                 r := int(p.From.Reg)
+               var rel *obj.Reloc
  
                 switch p.From.Name {
                 case obj.NAME_EXTERN, obj.NAME_STATIC:
                         // Load a 32 bit constant, or relocation depending on if a symbol is attached
-                       o1, o2 = c.symbolAccess(p.From.Sym, v, p.To.Reg, OP_ADDI, true)
+                       o1, o2, rel = c.symbolAccess(p.From.Sym, v, p.To.Reg, OP_ADDI, true)
                 default:
                         if r == 0 {
                                 r = c.getimpliedreg(&p.From, p)
@@ -2976,6 +3102,15 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) {
                         o2 = AOP_IRR(OP_ADDI, uint32(p.To.Reg), uint32(p.To.Reg), uint32(v))
                 }
  
+               if o.ispfx {
+                       if rel == nil {
+                               o1, o2 = pfxadd(int16(p.To.Reg), int16(r), PFX_R_ABS, v)
+                       } else {
+                               o1, o2 = pfxadd(int16(p.To.Reg), REG_R0, PFX_R_PCREL, 0)
+                               rel.Type = objabi.R_ADDRPOWER_PCREL34
+                       }
+               }
+
         case 27: /* subc ra,$simm,rd => subfic rd,ra,$simm */
                 v := c.regoff(p.GetFrom3())
  
@@ -3118,12 +3253,18 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) {
                         r = c.getimpliedreg(&p.To, p)
                 }
                 // Offsets in DS form stores must be a multiple of 4
-               inst := c.opstore(p.As)
-               if c.opform(inst) == DS_FORM && v&0x3 != 0 {
-                       log.Fatalf("invalid offset for DS form load/store %v", p)
+               if o.ispfx {
+                       o1, o2 = pfxstore(p.As, p.From.Reg, int16(r), PFX_R_ABS)
+                       o1 |= uint32((v >> 16) & 0x3FFFF)
+                       o2 |= uint32(v & 0xFFFF)
+               } else {
+                       inst := c.opstore(p.As)
+                       if c.opform(inst) == DS_FORM && v&0x3 != 0 {
+                               log.Fatalf("invalid offset for DS form load/store %v", p)
+                       }
+                       o1 = AOP_IRR(OP_ADDIS, REGTMP, uint32(r), uint32(high16adjusted(v)))
+                       o2 = AOP_IRR(inst, uint32(p.From.Reg), REGTMP, uint32(v))
                 }
-               o1 = AOP_IRR(OP_ADDIS, REGTMP, uint32(r), uint32(high16adjusted(v)))
-               o2 = AOP_IRR(inst, uint32(p.From.Reg), REGTMP, uint32(v))
  
         case 36: /* mov b/bz/h/hz lext/lauto/lreg,r ==> lbz+extsb/lbz/lha/lhz etc */
                 v := c.regoff(&p.From)
@@ -3132,8 +3273,15 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) {
                 if r == 0 {
                         r = c.getimpliedreg(&p.From, p)
                 }
-               o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), uint32(r), uint32(high16adjusted(v)))
-               o2 = AOP_IRR(c.opload(p.As), uint32(p.To.Reg), uint32(p.To.Reg), uint32(v))
+
+               if o.ispfx {
+                       o1, o2 = pfxload(p.As, p.To.Reg, int16(r), PFX_R_ABS)
+                       o1 |= uint32((v >> 16) & 0x3FFFF)
+                       o2 |= uint32(v & 0xFFFF)
+               } else {
+                       o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), uint32(r), uint32(high16adjusted(v)))
+                       o2 = AOP_IRR(c.opload(p.As), uint32(p.To.Reg), uint32(p.To.Reg), uint32(v))
+               }
  
                 // Sign extend MOVB if needed
                 o3 = LOP_RRR(OP_EXTSB, uint32(p.To.Reg), uint32(p.To.Reg), 0)
@@ -3484,23 +3632,28 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) {
  
         /* relocation operations */
         case 74:
+               var rel *obj.Reloc
                 v := c.vregoff(&p.To)
                 // Offsets in DS form stores must be a multiple of 4
                 inst := c.opstore(p.As)
-               if c.opform(inst) == DS_FORM && v&0x3 != 0 {
+
+               // Can't reuse base for store instructions.
+               o1, o2, rel = c.symbolAccess(p.To.Sym, v, p.From.Reg, inst, false)
+
+               // Rewrite as a prefixed store if supported.
+               if o.ispfx {
+                       o1, o2 = pfxstore(p.As, p.From.Reg, REG_R0, PFX_R_PCREL)
+                       rel.Type = objabi.R_ADDRPOWER_PCREL34
+               } else if c.opform(inst) == DS_FORM && v&0x3 != 0 {
                         log.Fatalf("invalid offset for DS form load/store %v", p)
                 }
-               // Can't reuse base for store instructions.
-               o1, o2 = c.symbolAccess(p.To.Sym, v, p.From.Reg, inst, false)
  
         case 75: // 32 bit offset symbol loads (got/toc/addr)
+               var rel *obj.Reloc
                 v := p.From.Offset
  
                 // Offsets in DS form loads must be a multiple of 4
                 inst := c.opload(p.As)
-               if c.opform(inst) == DS_FORM && v&0x3 != 0 {
-                       log.Fatalf("invalid offset for DS form load/store %v", p)
-               }
                 switch p.From.Name {
                 case obj.NAME_GOTREF, obj.NAME_TOCREF:
                         if v != 0 {
@@ -3508,7 +3661,7 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) {
                         }
                         o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R2, 0)
                         o2 = AOP_IRR(inst, uint32(p.To.Reg), uint32(p.To.Reg), 0)
-                       rel := obj.Addrel(c.cursym)
+                       rel = obj.Addrel(c.cursym)
                         rel.Off = int32(c.pc)
                         rel.Siz = 8
                         rel.Sym = p.From.Sym
@@ -3521,7 +3674,28 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) {
                 default:
                         reuseBaseReg := p.As != AFMOVD && p.As != AFMOVS
                         // Reuse To.Reg as base register if not FP move.
-                       o1, o2 = c.symbolAccess(p.From.Sym, v, p.To.Reg, inst, reuseBaseReg)
+                       o1, o2, rel = c.symbolAccess(p.From.Sym, v, p.To.Reg, inst, reuseBaseReg)
+               }
+
+               // Convert to prefixed forms if supported.
+               if o.ispfx {
+                       switch rel.Type {
+                       case objabi.R_ADDRPOWER, objabi.R_ADDRPOWER_DS,
+                               objabi.R_ADDRPOWER_TOCREL, objabi.R_ADDRPOWER_TOCREL_DS:
+                               o1, o2 = pfxload(p.As, p.To.Reg, REG_R0, PFX_R_PCREL)
+                               rel.Type = objabi.R_ADDRPOWER_PCREL34
+                       case objabi.R_POWER_TLS_IE:
+                               o1, o2 = pfxload(p.As, p.To.Reg, REG_R0, PFX_R_PCREL)
+                               rel.Type = objabi.R_POWER_TLS_IE_PCREL34
+                       case objabi.R_ADDRPOWER_GOT:
+                               o1, o2 = pfxload(p.As, p.To.Reg, REG_R0, PFX_R_PCREL)
+                               rel.Type = objabi.R_ADDRPOWER_GOT_PCREL34
+                       default:
+                               // We've failed to convert a TOC-relative relocation to a PC-relative one.
+                               log.Fatalf("Unable convert TOC-relative relocation %v to PC-relative", rel.Type)
+                       }
+               } else if c.opform(inst) == DS_FORM && v&0x3 != 0 {
+                       log.Fatalf("invalid offset for DS form load/store %v", p)
                 }
  
                 o3 = LOP_RRR(OP_EXTSB, uint32(p.To.Reg), uint32(p.To.Reg), 0)
@@ -3530,26 +3704,36 @@ func asmout(c *ctxt9, p *obj.Prog, o *Optab, out *[5]uint32) {
                 if p.From.Offset != 0 {
                         c.ctxt.Diag("invalid offset against tls var %v", p)
                 }
-               o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R13, 0)
-               o2 = AOP_IRR(OP_ADDI, uint32(p.To.Reg), uint32(p.To.Reg), 0)
                 rel := obj.Addrel(c.cursym)
                 rel.Off = int32(c.pc)
                 rel.Siz = 8
                 rel.Sym = p.From.Sym
-               rel.Type = objabi.R_POWER_TLS_LE
+               if !o.ispfx {
+                       o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R13, 0)
+                       o2 = AOP_IRR(OP_ADDI, uint32(p.To.Reg), uint32(p.To.Reg), 0)
+                       rel.Type = objabi.R_POWER_TLS_LE
+               } else {
+                       o1, o2 = pfxadd(p.To.Reg, REG_R13, PFX_R_ABS, 0)
+                       rel.Type = objabi.R_POWER_TLS_LE_TPREL34
+               }
  
         case 80:
                 if p.From.Offset != 0 {
                         c.ctxt.Diag("invalid offset against tls var %v", p)
                 }
-               o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R2, 0)
-               o2 = AOP_IRR(c.opload(AMOVD), uint32(p.To.Reg), uint32(p.To.Reg), 0)
-               o3 = AOP_RRR(OP_ADD, uint32(p.To.Reg), uint32(p.To.Reg), REG_R13)
                 rel := obj.Addrel(c.cursym)
                 rel.Off = int32(c.pc)
                 rel.Siz = 8
                 rel.Sym = p.From.Sym
                 rel.Type = objabi.R_POWER_TLS_IE
+               if !o.ispfx {
+                       o1 = AOP_IRR(OP_ADDIS, uint32(p.To.Reg), REG_R2, 0)
+                       o2 = AOP_IRR(c.opload(AMOVD), uint32(p.To.Reg), uint32(p.To.Reg), 0)
+               } else {
+                       o1, o2 = pfxload(p.As, p.To.Reg, REG_R0, PFX_R_PCREL)
+                       rel.Type = objabi.R_POWER_TLS_IE_PCREL34
+               }
+               o3 = AOP_RRR(OP_ADD, uint32(p.To.Reg), uint32(p.To.Reg), REG_R13)
                 rel = obj.Addrel(c.cursym)
                 rel.Off = int32(c.pc) + 8
                 rel.Siz = 4
diff --git a/src/cmd/internal/obj/ppc64/doc.go b/src/cmd/internal/obj/ppc64/doc.go

index 28340e425d2747a55bbc2b6666f5b8eb349698da..835182bcc602394504a3b457b084304bf56cc48d 100644 (file)
--- a/src/cmd/internal/obj/ppc64/doc.go
+++ b/src/cmd/internal/obj/ppc64/doc.go
@@ -250,5 +250,34 @@ Register names:
         CRnGT represents CR bit 1 of CR field n. (0-7)
         CRnEQ represents CR bit 2 of CR field n. (0-7)
         CRnSO represents CR bit 3 of CR field n. (0-7)
+
+# GOPPC64 >= power10 and its effects on Go asm
+
+When GOPPC64=power10 is used to compile a Go program for ppc64le/linux, MOV*, FMOV*, and ADD
+opcodes which would require 2 or more machine instructions to emulate a 32 bit constant, or
+symbolic reference are implemented using prefixed instructions.
+
+A user who wishes granular control over the generated machine code is advised to use Go asm
+opcodes which explicitly translate to one PPC64 machine instruction. Most common opcodes
+are supported.
+
+Some examples of how pseudo-op assembly changes with GOPPC64:
+
+       Go asm                       GOPPC64 <= power9          GOPPC64 >= power10
+       MOVD mypackage·foo(SB), R3   addis r2, r3, ...          pld r3, ...
+                                    ld    r3, r3, ...
+
+       MOVD 131072(R3), R4          addis r31, r4, 2           pld r4, 131072(r3)
+                                    ld    r4, 0(R3)
+
+       ADD $131073, R3              lis  r31, 2                paddi r3, r3, 131073
+                                    addi r31, 1
+                                    add  r3,r31,r3
+
+       MOVD $131073, R3             lis  r3, 2                 pli r3, 131073
+                                    addi r3, 1
+
+       MOVD $mypackage·foo(SB), R3  addis r2, r3, ...          pla r3, ...
+                                    addi  r3, r3, ...
  */
  package ppc64
author	Paul E. Murphy <murp@ibm.com>
	Tue, 9 Mar 2021 22:55:22 +0000 (16:55 -0600)
committer	Paul Murphy <murp@ibm.com>
	Tue, 28 Feb 2023 15:06:46 +0000 (15:06 +0000)
src/cmd/asm/internal/asm/endtoend_test.go		patch \| blob \| history
src/cmd/asm/internal/asm/testdata/ppc64.s		patch \| blob \| history
src/cmd/internal/obj/ppc64/asm9.go		patch \| blob \| history
src/cmd/internal/obj/ppc64/doc.go		patch \| blob \| history