From c7e855658d73b85f345c9a0ac81de42acad7ae9b Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Mon, 17 Oct 2016 13:40:18 -0400 Subject: [PATCH] cmd/link: trampoline support for external linking on ARM all.bash passes with -debugtramp=2 (except the unavoidable disassembly test as we change instructions). And successfully build k8s.io/kubernetes/cmd/hyperkube in both internal linking and external linking mode. Fixes #17028. Change-Id: Ic8fac6a394488155c5eba9215662db1c1086e24b Reviewed-on: https://go-review.googlesource.com/31143 Reviewed-by: David Chase --- src/cmd/link/internal/arm/asm.go | 121 ++++++++++++++++++++++++++++--- src/cmd/link/internal/ld/data.go | 3 - src/cmd/link/linkbig_test.go | 28 +++---- 3 files changed, 126 insertions(+), 26 deletions(-) diff --git a/src/cmd/link/internal/arm/asm.go b/src/cmd/link/internal/arm/asm.go index e1f3ed52aa..ee57df11b3 100644 --- a/src/cmd/link/internal/arm/asm.go +++ b/src/cmd/link/internal/arm/asm.go @@ -416,6 +416,17 @@ func signext24(x int64) int32 { return (int32(x) << 8) >> 8 } +// encode an immediate in ARM's imm12 format. copied from ../../../internal/obj/arm/asm5.go +func immrot(v uint32) uint32 { + for i := 0; i < 16; i++ { + if v&^0xff == 0 { + return uint32(i<<8) | v | 1<<25 + } + v = v<<2 | v>>30 + } + return 0 +} + // Convert the direct jump relocation r to refer to a trampoline if the target is too far func trampoline(ctxt *ld.Link, r *ld.Reloc, s *ld.Symbol) { switch r.Type { @@ -424,12 +435,18 @@ func trampoline(ctxt *ld.Link, r *ld.Reloc, s *ld.Symbol) { // low 24-bit encodes the target address t := (ld.Symaddr(r.Sym) + int64(signext24(r.Add&0xffffff)*4) - (s.Value + int64(r.Off))) / 4 if t > 0x7fffff || t < -0x800000 || (*ld.FlagDebugTramp > 1 && s.File != r.Sym.File) { - // direct call too far, need to insert trampoline + // direct call too far, need to insert trampoline. + // look up existing trampolines first. if we found one within the range + // of direct call, we can reuse it. otherwise create a new one. offset := (signext24(r.Add&0xffffff) + 2) * 4 var tramp *ld.Symbol for i := 0; ; i++ { name := r.Sym.Name + fmt.Sprintf("%+d-tramp%d", offset, i) tramp = ctxt.Syms.Lookup(name, int(r.Sym.Version)) + if tramp.Type == obj.SDYNIMPORT { + // don't reuse trampoline defined in other module + continue + } if tramp.Value == 0 { // either the trampoline does not exist -- we need to create one, // or found one the address which is not assigned -- this will be @@ -447,15 +464,16 @@ func trampoline(ctxt *ld.Link, r *ld.Reloc, s *ld.Symbol) { if tramp.Type == 0 { // trampoline does not exist, create one ctxt.AddTramp(tramp) - tramp.Size = 12 // 3 instructions - tramp.P = make([]byte, tramp.Size) - t = ld.Symaddr(r.Sym) + int64(offset) - o1 := uint32(0xe5900000 | 11<<12 | 15<<16) // MOVW (R15), R11 // R15 is actual pc + 8 - o2 := uint32(0xe12fff10 | 11) // JMP (R11) - o3 := uint32(t) // WORD $target - ld.SysArch.ByteOrder.PutUint32(tramp.P, o1) - ld.SysArch.ByteOrder.PutUint32(tramp.P[4:], o2) - ld.SysArch.ByteOrder.PutUint32(tramp.P[8:], o3) + if ctxt.DynlinkingGo() { + if immrot(uint32(offset)) == 0 { + ld.Errorf(s, "odd offset in dynlink direct call: %v+%d", r.Sym, offset) + } + gentrampdyn(tramp, r.Sym, int64(offset)) + } else if ld.Buildmode == ld.BuildmodeCArchive || ld.Buildmode == ld.BuildmodeCShared || ld.Buildmode == ld.BuildmodePIE { + gentramppic(tramp, r.Sym, int64(offset)) + } else { + gentramp(tramp, r.Sym, int64(offset)) + } } // modify reloc to point to tramp, which will be resolved later r.Sym = tramp @@ -467,6 +485,89 @@ func trampoline(ctxt *ld.Link, r *ld.Reloc, s *ld.Symbol) { } } +// generate a trampoline to target+offset +func gentramp(tramp, target *ld.Symbol, offset int64) { + tramp.Size = 12 // 3 instructions + tramp.P = make([]byte, tramp.Size) + t := ld.Symaddr(target) + int64(offset) + o1 := uint32(0xe5900000 | 11<<12 | 15<<16) // MOVW (R15), R11 // R15 is actual pc + 8 + o2 := uint32(0xe12fff10 | 11) // JMP (R11) + o3 := uint32(t) // WORD $target + ld.SysArch.ByteOrder.PutUint32(tramp.P, o1) + ld.SysArch.ByteOrder.PutUint32(tramp.P[4:], o2) + ld.SysArch.ByteOrder.PutUint32(tramp.P[8:], o3) + + if ld.Linkmode == ld.LinkExternal { + r := ld.Addrel(tramp) + r.Off = 8 + r.Type = obj.R_ADDR + r.Siz = 4 + r.Sym = target + r.Add = offset + } +} + +// generate a trampoline to target+offset in position independent code +func gentramppic(tramp, target *ld.Symbol, offset int64) { + tramp.Size = 16 // 4 instructions + tramp.P = make([]byte, tramp.Size) + o1 := uint32(0xe5900000 | 11<<12 | 15<<16 | 4) // MOVW 4(R15), R11 // R15 is actual pc + 8 + o2 := uint32(0xe0800000 | 11<<12 | 15<<16 | 11) // ADD R15, R11, R11 + o3 := uint32(0xe12fff10 | 11) // JMP (R11) + o4 := uint32(0) // WORD $(target-pc) // filled in with relocation + ld.SysArch.ByteOrder.PutUint32(tramp.P, o1) + ld.SysArch.ByteOrder.PutUint32(tramp.P[4:], o2) + ld.SysArch.ByteOrder.PutUint32(tramp.P[8:], o3) + ld.SysArch.ByteOrder.PutUint32(tramp.P[12:], o4) + + r := ld.Addrel(tramp) + r.Off = 12 + r.Type = obj.R_PCREL + r.Siz = 4 + r.Sym = target + r.Add = offset + 4 +} + +// generate a trampoline to target+offset in dynlink mode (using GOT) +func gentrampdyn(tramp, target *ld.Symbol, offset int64) { + tramp.Size = 20 // 5 instructions + o1 := uint32(0xe5900000 | 11<<12 | 15<<16 | 8) // MOVW 8(R15), R11 // R15 is actual pc + 8 + o2 := uint32(0xe0800000 | 11<<12 | 15<<16 | 11) // ADD R15, R11, R11 + o3 := uint32(0xe5900000 | 11<<12 | 11<<16) // MOVW (R11), R11 + o4 := uint32(0xe12fff10 | 11) // JMP (R11) + o5 := uint32(0) // WORD $target@GOT // filled in with relocation + o6 := uint32(0) + if offset != 0 { + // insert an instruction to add offset + tramp.Size = 24 // 6 instructions + o6 = o5 + o5 = o4 + o4 = uint32(0xe2800000 | 11<<12 | 11<<16 | immrot(uint32(offset))) // ADD $offset, R11, R11 + o1 = uint32(0xe5900000 | 11<<12 | 15<<16 | 12) // MOVW 12(R15), R11 + } + tramp.P = make([]byte, tramp.Size) + ld.SysArch.ByteOrder.PutUint32(tramp.P, o1) + ld.SysArch.ByteOrder.PutUint32(tramp.P[4:], o2) + ld.SysArch.ByteOrder.PutUint32(tramp.P[8:], o3) + ld.SysArch.ByteOrder.PutUint32(tramp.P[12:], o4) + ld.SysArch.ByteOrder.PutUint32(tramp.P[16:], o5) + if offset != 0 { + ld.SysArch.ByteOrder.PutUint32(tramp.P[20:], o6) + } + + r := ld.Addrel(tramp) + r.Off = 16 + r.Type = obj.R_GOTPCREL + r.Siz = 4 + r.Sym = target + r.Add = 8 + if offset != 0 { + // increase reloc offset by 4 as we inserted an ADD instruction + r.Off = 20 + r.Add = 12 + } +} + func archreloc(ctxt *ld.Link, r *ld.Reloc, s *ld.Symbol, val *int64) int { if ld.Linkmode == ld.LinkExternal { switch r.Type { diff --git a/src/cmd/link/internal/ld/data.go b/src/cmd/link/internal/ld/data.go index e7cb2523d1..4714d20866 100644 --- a/src/cmd/link/internal/ld/data.go +++ b/src/cmd/link/internal/ld/data.go @@ -330,9 +330,6 @@ func trampoline(ctxt *Link, s *Symbol) { if Thearch.Trampoline == nil { return // no need or no support of trampolines on this arch } - if Linkmode == LinkExternal { - return // currently only support internal linking - } for ri := range s.R { r := &s.R[ri] diff --git a/src/cmd/link/linkbig_test.go b/src/cmd/link/linkbig_test.go index ce251704aa..d793c2f5f2 100644 --- a/src/cmd/link/linkbig_test.go +++ b/src/cmd/link/linkbig_test.go @@ -12,6 +12,7 @@ import ( "bytes" "cmd/internal/obj" "fmt" + "internal/testenv" "io/ioutil" "os" "os/exec" @@ -19,29 +20,33 @@ import ( ) func TestLargeText(t *testing.T) { + if testing.Short() || (obj.GOARCH != "ppc64le" && obj.GOARCH != "ppc64" && obj.GOARCH != "arm") { + t.Skip("Skipping large text section test in short mode or on %s", obj.GOARCH) + } + testenv.MustHaveGoBuild(t) var w bytes.Buffer - - if testing.Short() || (obj.GOARCH != "ppc64le" && obj.GOARCH != "ppc64") { - t.Skip("Skipping large text section test in short mode or if not ppc64x") - } const FN = 4 tmpdir, err := ioutil.TempDir("", "bigtext") defer os.RemoveAll(tmpdir) // Generate the scenario where the total amount of text exceeds the - // limit for the bl instruction, on RISC architectures like ppc64le, + // limit for the jmp/call instruction, on RISC architectures like ppc64le, // which is 2^26. When that happens the call requires special trampolines or // long branches inserted by the linker where supported. - // Multiple .s files are generated instead of one. - + instOnArch := map[string]string{ + "ppc64": "\tMOVD\tR0,R3\n", + "ppc64le": "\tMOVD\tR0,R3\n", + "arm": "\tMOVW\tR0,R1\n", + } + inst := instOnArch[obj.GOARCH] for j := 0; j < FN; j++ { testname := fmt.Sprintf("bigfn%d", j) fmt.Fprintf(&w, "TEXT ·%s(SB),$0\n", testname) for i := 0; i < 2200000; i++ { - fmt.Fprintf(&w, "\tMOVD\tR0,R3\n") + fmt.Fprintf(&w, inst) } fmt.Fprintf(&w, "\tRET\n") err := ioutil.WriteFile(tmpdir+"/"+testname+".s", w.Bytes(), 0666) @@ -64,7 +69,6 @@ func TestLargeText(t *testing.T) { // There are lots of dummy code generated in the .s files just to generate a lot // of text. Link them in but guard their call so their code is not executed but // the main part of the program can be run. - fmt.Fprintf(&w, "\tif os.Getenv(\"LINKTESTARG\") != \"\" {\n") for i := 0; i < FN; i++ { fmt.Fprintf(&w, "\t\tbigfn%d()\n", i) @@ -78,9 +82,8 @@ func TestLargeText(t *testing.T) { } // Build and run with internal linking. - os.Chdir(tmpdir) - cmd := exec.Command("go", "build", "-o", "bigtext") + cmd := exec.Command(testenv.GoToolPath(t), "build", "-o", "bigtext") out, err := cmd.CombinedOutput() if err != nil { t.Fatalf("Build failed for big text program with internal linking: %v, output: %s", err, out) @@ -92,9 +95,8 @@ func TestLargeText(t *testing.T) { } // Build and run with external linking - os.Chdir(tmpdir) - cmd = exec.Command("go", "build", "-o", "bigtext", "-ldflags", "'-linkmode=external'") + cmd = exec.Command(testenv.GoToolPath(t), "build", "-o", "bigtext", "-ldflags", "'-linkmode=external'") out, err = cmd.CombinedOutput() if err != nil { t.Fatalf("Build failed for big text program with external linking: %v, output: %s", err, out) -- 2.48.1