From d821ae2a9e4e5dafef904d38f75be44656128742 Mon Sep 17 00:00:00 2001
From: Michael Hudson-Doyle <michael.hudson@canonical.com>
Date: Mon, 31 Aug 2015 13:51:37 +1200
Subject: [PATCH] cmd/internal/obj, cmd/link: simplify ppc64 archreloc now that
 the original value is passed to it

And get rid of the stupid game of encoding the instruction in the addend.

Change-Id: Ib4de7515196cbc1e63b4261b01931cf02a44c1e6
Reviewed-on: https://go-review.googlesource.com/14055
Reviewed-by: Russ Cox <rsc@golang.org>
---
 src/cmd/internal/obj/ppc64/asm9.go | 38 ++++++++++-----------
 src/cmd/link/internal/ppc64/asm.go | 55 ++++++++++++++++--------------
 2 files changed, 49 insertions(+), 44 deletions(-)

diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go
index c7bd835774..993cf178cd 100644
--- a/src/cmd/internal/obj/ppc64/asm9.go
+++ b/src/cmd/internal/obj/ppc64/asm9.go
@@ -1372,13 +1372,13 @@ func oclass(a *obj.Addr) int {
 	return int(a.Class) - 1
 }
 
-// add R_ADDRPOWER relocation to symbol s for the two instructions o1 and o2.
-func addaddrreloc(ctxt *obj.Link, s *obj.LSym, o1 *uint32, o2 *uint32) {
+// add R_ADDRPOWER relocation to symbol s with addend d
+func addaddrreloc(ctxt *obj.Link, s *obj.LSym, d int64) {
 	rel := obj.Addrel(ctxt.Cursym)
 	rel.Off = int32(ctxt.Pc)
 	rel.Siz = 8
 	rel.Sym = s
-	rel.Add = int64(uint64(*o1)<<32 | uint64(uint32(*o2)))
+	rel.Add = d
 	rel.Type = obj.R_ADDRPOWER
 }
 
@@ -1805,9 +1805,9 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) {
 			o1 = loadu32(int(p.To.Reg), d)
 			o2 = LOP_IRR(OP_ORI, uint32(p.To.Reg), uint32(p.To.Reg), uint32(int32(d)))
 		} else {
-			o1 = AOP_IRR(OP_ADDIS, REGTMP, REGZERO, uint32(high16adjusted(int32(d))))
-			o2 = AOP_IRR(OP_ADDI, uint32(p.To.Reg), REGTMP, uint32(d))
-			addaddrreloc(ctxt, p.From.Sym, &o1, &o2)
+			o1 = AOP_IRR(OP_ADDIS, REGTMP, REGZERO, 0)
+			o2 = AOP_IRR(OP_ADDI, uint32(p.To.Reg), REGTMP, 0)
+			addaddrreloc(ctxt, p.From.Sym, d)
 		}
 
 	//if(dlm) reloc(&p->from, p->pc, 0);
@@ -2369,29 +2369,29 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) {
 		o1 = 0 /* "An instruction consisting entirely of binary 0s is guaranteed
 		   always to be an illegal instruction."  */
 
-		/* relocation operations */
+	/* relocation operations */
 	case 74:
-		v := regoff(ctxt, &p.To)
+		v := vregoff(ctxt, &p.To)
 
-		o1 = AOP_IRR(OP_ADDIS, REGTMP, REGZERO, uint32(high16adjusted(v)))
-		o2 = AOP_IRR(uint32(opstore(ctxt, int(p.As))), uint32(p.From.Reg), REGTMP, uint32(v))
-		addaddrreloc(ctxt, p.To.Sym, &o1, &o2)
+		o1 = AOP_IRR(OP_ADDIS, REGTMP, REGZERO, 0)
+		o2 = AOP_IRR(uint32(opstore(ctxt, int(p.As))), uint32(p.From.Reg), REGTMP, 0)
+		addaddrreloc(ctxt, p.To.Sym, v)
 
 	//if(dlm) reloc(&p->to, p->pc, 1);
 
 	case 75:
-		v := regoff(ctxt, &p.From)
-		o1 = AOP_IRR(OP_ADDIS, REGTMP, REGZERO, uint32(high16adjusted(v)))
-		o2 = AOP_IRR(uint32(opload(ctxt, int(p.As))), uint32(p.To.Reg), REGTMP, uint32(v))
-		addaddrreloc(ctxt, p.From.Sym, &o1, &o2)
+		v := vregoff(ctxt, &p.From)
+		o1 = AOP_IRR(OP_ADDIS, REGTMP, REGZERO, 0)
+		o2 = AOP_IRR(uint32(opload(ctxt, int(p.As))), uint32(p.To.Reg), REGTMP, 0)
+		addaddrreloc(ctxt, p.From.Sym, v)
 
 	//if(dlm) reloc(&p->from, p->pc, 1);
 
 	case 76:
-		v := regoff(ctxt, &p.From)
-		o1 = AOP_IRR(OP_ADDIS, REGTMP, REGZERO, uint32(high16adjusted(v)))
-		o2 = AOP_IRR(uint32(opload(ctxt, int(p.As))), uint32(p.To.Reg), REGTMP, uint32(v))
-		addaddrreloc(ctxt, p.From.Sym, &o1, &o2)
+		v := vregoff(ctxt, &p.From)
+		o1 = AOP_IRR(OP_ADDIS, REGTMP, REGZERO, 0)
+		o2 = AOP_IRR(uint32(opload(ctxt, int(p.As))), uint32(p.To.Reg), REGTMP, 0)
+		addaddrreloc(ctxt, p.From.Sym, v)
 		o3 = LOP_RRR(OP_EXTSB, uint32(p.To.Reg), uint32(p.To.Reg), 0)
 
 		//if(dlm) reloc(&p->from, p->pc, 1);
diff --git a/src/cmd/link/internal/ppc64/asm.go b/src/cmd/link/internal/ppc64/asm.go
index f070921ecf..3798b24f21 100644
--- a/src/cmd/link/internal/ppc64/asm.go
+++ b/src/cmd/link/internal/ppc64/asm.go
@@ -348,26 +348,39 @@ func archreloc(r *ld.Reloc, s *ld.LSym, val *int64) int {
 		return 0
 
 	case obj.R_ADDRPOWER:
-		// r->add is two ppc64 instructions holding an immediate 32-bit constant.
-		// We want to add r->sym's address to that constant.
-		// The encoding of the immediate x<<16 + y,
-		// where x is the low 16 bits of the first instruction and y is the low 16
-		// bits of the second. Both x and y are signed (int16, not uint16).
-		o1 := uint32(r.Add >> 32)
-		o2 := uint32(r.Add)
-		t := ld.Symaddr(r.Sym)
-		if t < 0 {
+		// We are spreading a 31-bit address across two instructions,
+		// putting the high (adjusted) part in the low 16 bits of the
+		// first instruction and the low part in the low 16 bits of the
+		// second instruction.
+		t := ld.Symaddr(r.Sym) + r.Add
+		if t < 0 || t >= 1<<31 {
 			ld.Ctxt.Diag("relocation for %s is too big (>=2G): %d", s.Name, ld.Symaddr(r.Sym))
 		}
-
-		t += int64((o1&0xffff)<<16 + uint32(int32(o2)<<16>>16))
+		var o1, o2 uint32
+		if ld.Ctxt.Arch.ByteOrder == binary.BigEndian {
+			o1 = uint32(*val >> 32)
+			o2 = uint32(*val)
+		} else {
+			o1 = uint32(*val)
+			o2 = uint32(*val >> 32)
+		}
 		if t&0x8000 != 0 {
 			t += 0x10000
 		}
-		o1 = o1&0xffff0000 | (uint32(t)>>16)&0xffff
-		o2 = o2&0xffff0000 | uint32(t)&0xffff
+		// There is an almost-bug here. When R_ADDRPOWER is relocating a
+		// load, the two instructions are addi and then a load. addi and
+		// almost all loads are "D-form" instructions, which have a
+		// 16-bit immediate in the lower 16-bits of the instruction
+		// word. But the load doubleword instruction is a "DS-form"
+		// instruction: the immediate only occupies bits 16-29 of the
+		// instruction and is implicity padded with zeros on the
+		// right. The reason the belows isn't a bug is because we only
+		// ever use immediates that have zeros on in their lower bits
+		// with ld, and we combine the immediate with | so bits 30 and
+		// 31 are preserved.
+		o1 |= (uint32(t) >> 16) & 0xffff
+		o2 |= uint32(t) & 0xffff
 
-		// when laid out, the instruction order must always be o1, o2.
 		if ld.Ctxt.Arch.ByteOrder == binary.BigEndian {
 			*val = int64(o1)<<32 | int64(o2)
 		} else {
@@ -377,12 +390,6 @@ func archreloc(r *ld.Reloc, s *ld.LSym, val *int64) int {
 
 	case obj.R_CALLPOWER:
 		// Bits 6 through 29 = (S + A - P) >> 2
-		var o1 uint32
-		if ld.Ctxt.Arch.ByteOrder == binary.BigEndian {
-			o1 = ld.Be32(s.P[r.Off:])
-		} else {
-			o1 = ld.Le32(s.P[r.Off:])
-		}
 
 		t := ld.Symaddr(r.Sym) + r.Add - (s.Value + int64(r.Off))
 		if t&3 != 0 {
@@ -394,7 +401,7 @@ func archreloc(r *ld.Reloc, s *ld.LSym, val *int64) int {
 			ld.Ctxt.Diag("relocation for %s+%d is too big: %d", r.Sym.Name, r.Off, t)
 		}
 
-		*val = int64(o1&0xfc000003 | uint32(t)&^0xfc000003)
+		*val |= int64(uint32(t) &^ 0xfc000003)
 		return 0
 
 	case obj.R_POWER_TOC: // S + A - .TOC.
@@ -578,10 +585,8 @@ func ensureglinkresolver() *ld.LSym {
 	r.Siz = 8
 	r.Type = obj.R_ADDRPOWER
 
-	// addis r11,0,.plt@ha; addi r11,r11,.plt@l
-	r.Add = 0x3d600000<<32 | 0x396b0000
-
-	glink.Size += 8
+	ld.Adduint32(ld.Ctxt, glink, 0x3d600000) // addis r11,0,.plt@ha
+	ld.Adduint32(ld.Ctxt, glink, 0x396b0000) // addi r11,r11,.plt@l
 
 	// Load r12 = dynamic resolver address and r11 = DSO
 	// identifier from the first two doublewords of the PLT.
-- 
2.51.0