]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/link: write DWARF PC table without seeking
authorDavid Crawshaw <crawshaw@golang.org>
Mon, 29 Feb 2016 22:07:50 +0000 (13:07 -0900)
committerDavid Crawshaw <crawshaw@golang.org>
Wed, 2 Mar 2016 12:19:35 +0000 (12:19 +0000)
This per-symbol table was written with the strategy:

1. record offset and write fake header
2. write body
3. seek back to fake header
4. write real header

This CL collects the per-symbol body into a []byte, then writes the
real header followed by the body to the output file. This saves two
seeks per-symbol and overwriting the fake header.

Small performance improvement (3.5%) in best-of-ten links of godoc:

tip:  real 0m1.132s user 0m1.256s
this: real 0m1.090s user 0m1.210s

I'm not sure if the performance measured here alone justifies it,
but I think this is an easier to read style of code.

Change-Id: I1663901eb7c2ee330591b8b6550cdff0402ed5dc
Reviewed-on: https://go-review.googlesource.com/20074
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>

src/cmd/link/internal/ld/dwarf.go

index 200f4ccd9ede110c980999581c76cb49738b067b..d26689b9bada93b0b37ba7b7e69b1de6d7dbe4d0 100644 (file)
@@ -111,64 +111,47 @@ func addrput(addr int64) {
        }
 }
 
-func uleb128enc(v uint64, dst []byte) int {
-       var c uint8
-
-       length := uint8(0)
+func appendUleb128(b []byte, v uint64) []byte {
        for {
-               c = uint8(v & 0x7f)
+               c := uint8(v & 0x7f)
                v >>= 7
                if v != 0 {
                        c |= 0x80
                }
-               if dst != nil {
-                       dst[0] = byte(c)
-                       dst = dst[1:]
-               }
-               length++
+               b = append(b, c)
                if c&0x80 == 0 {
                        break
                }
        }
-
-       return int(length)
+       return b
 }
 
-func sleb128enc(v int64, dst []byte) int {
-       var c uint8
-       var s uint8
-
-       length := uint8(0)
+func appendSleb128(b []byte, v int64) []byte {
        for {
-               c = uint8(v & 0x7f)
-               s = uint8(v & 0x40)
+               c := uint8(v & 0x7f)
+               s := uint8(v & 0x40)
                v >>= 7
                if (v != -1 || s == 0) && (v != 0 || s != 0) {
                        c |= 0x80
                }
-               if dst != nil {
-                       dst[0] = byte(c)
-                       dst = dst[1:]
-               }
-               length++
+               b = append(b, c)
                if c&0x80 == 0 {
                        break
                }
        }
-
-       return int(length)
+       return b
 }
 
 var encbuf [10]byte
 
 func uleb128put(v int64) {
-       n := uleb128enc(uint64(v), encbuf[:])
-       Cwrite(encbuf[:n])
+       b := appendUleb128(encbuf[:0], uint64(v))
+       Cwrite(b)
 }
 
 func sleb128put(v int64) {
-       n := sleb128enc(v, encbuf[:])
-       Cwrite(encbuf[:n])
+       b := appendSleb128(encbuf[:0], v)
+       Cwrite(b)
 }
 
 /*
@@ -892,12 +875,9 @@ func reversetree(list **DWDie) {
 
 func newmemberoffsetattr(die *DWDie, offs int32) {
        var block [20]byte
-
-       i := 0
-       block[i] = DW_OP_plus_uconst
-       i++
-       i += uleb128enc(uint64(offs), block[i:])
-       newattr(die, DW_AT_data_member_location, DW_CLS_BLOCK, int64(i), block[:i])
+       b := append(block[:0], DW_OP_plus_uconst)
+       b = appendUleb128(b, uint64(offs))
+       newattr(die, DW_AT_data_member_location, DW_CLS_BLOCK, int64(len(b)), b)
 }
 
 // GDB doesn't like DW_FORM_addr for DW_AT_location, so emit a
@@ -1462,20 +1442,15 @@ func putpclcdelta(delta_pc int64, delta_lc int64) {
 
 func newcfaoffsetattr(die *DWDie, offs int32) {
        var block [20]byte
+       b := append(block[:0], DW_OP_call_frame_cfa)
 
-       i := 0
-
-       block[i] = DW_OP_call_frame_cfa
-       i++
        if offs != 0 {
-               block[i] = DW_OP_consts
-               i++
-               i += sleb128enc(int64(offs), block[i:])
-               block[i] = DW_OP_plus
-               i++
+               b = append(b, DW_OP_consts)
+               b = appendSleb128(b, int64(offs))
+               b = append(b, DW_OP_plus)
        }
 
-       newattr(die, DW_AT_location, DW_CLS_BLOCK, int64(i), block[:i])
+       newattr(die, DW_AT_location, DW_CLS_BLOCK, int64(len(b)), b)
 }
 
 func mkvarname(name string, da int) string {
@@ -1719,22 +1694,25 @@ const (
        DATAALIGNMENTFACTOR = -4
 )
 
-func putpccfadelta(deltapc int64, cfa int64) {
-       Cput(DW_CFA_def_cfa_offset_sf)
-       sleb128put(cfa / DATAALIGNMENTFACTOR)
-
-       if deltapc < 0x40 {
-               Cput(uint8(DW_CFA_advance_loc + deltapc))
-       } else if deltapc < 0x100 {
-               Cput(DW_CFA_advance_loc1)
-               Cput(uint8(deltapc))
-       } else if deltapc < 0x10000 {
-               Cput(DW_CFA_advance_loc2)
-               Thearch.Wput(uint16(deltapc))
-       } else {
-               Cput(DW_CFA_advance_loc4)
-               Thearch.Lput(uint32(deltapc))
+// appendPCDeltaCFA appends per-PC CFA deltas to b and returns the final slice.
+func appendPCDeltaCFA(b []byte, deltapc, cfa int64) []byte {
+       b = append(b, DW_CFA_def_cfa_offset_sf)
+       b = appendSleb128(b, cfa/DATAALIGNMENTFACTOR)
+
+       switch {
+       case deltapc < 0x40:
+               b = append(b, uint8(DW_CFA_advance_loc+deltapc))
+       case deltapc < 0x100:
+               b = append(b, DW_CFA_advance_loc1)
+               b = append(b, uint8(deltapc))
+       case deltapc < 0x10000:
+               b = append(b, DW_CFA_advance_loc2)
+               b = Thearch.Append16(b, uint16(deltapc))
+       default:
+               b = append(b, DW_CFA_advance_loc4)
+               b = Thearch.Append32(b, uint32(deltapc))
        }
+       return b
 }
 
 func writeframes() {
@@ -1779,6 +1757,7 @@ func writeframes() {
 
        strnput("", int(pad))
 
+       var deltaBuf []byte
        var pcsp Pciter
        for Ctxt.Cursym = Ctxt.Textp; Ctxt.Cursym != nil; Ctxt.Cursym = Ctxt.Cursym.Next {
                s := Ctxt.Cursym
@@ -1786,14 +1765,9 @@ func writeframes() {
                        continue
                }
 
-               fdeo := Cpos()
-
-               // Emit a FDE, Section 6.4.1, starting wit a placeholder.
-               Thearch.Lput(0) // length, must be multiple of thearch.ptrsize
-               Thearch.Lput(0) // Pointer to the CIE above, at offset 0
-               addrput(0)      // initial location
-               addrput(0)      // address range
-
+               // Emit a FDE, Section 6.4.1.
+               // First build the section contents into a byte buffer.
+               deltaBuf = deltaBuf[:0]
                for pciterinit(Ctxt, &pcsp, &s.Pcln.Pcsp); pcsp.done == 0; pciternext(&pcsp) {
                        nextpc := pcsp.nextpc
 
@@ -1807,31 +1781,30 @@ func writeframes() {
                        }
 
                        if haslinkregister() {
-                               putpccfadelta(int64(nextpc)-int64(pcsp.pc), int64(pcsp.value))
+                               deltaBuf = appendPCDeltaCFA(deltaBuf, int64(nextpc)-int64(pcsp.pc), int64(pcsp.value))
                        } else {
-                               putpccfadelta(int64(nextpc)-int64(pcsp.pc), int64(Thearch.Ptrsize)+int64(pcsp.value))
+                               deltaBuf = appendPCDeltaCFA(deltaBuf, int64(nextpc)-int64(pcsp.pc), int64(Thearch.Ptrsize)+int64(pcsp.value))
                        }
                }
+               pad := int(Rnd(int64(len(deltaBuf)), int64(Thearch.Ptrsize))) - len(deltaBuf)
+               deltaBuf = append(deltaBuf, zeros[:pad]...)
 
-               fdesize := Cpos() - fdeo - 4 // exclude the length field.
-               pad = Rnd(fdesize, int64(Thearch.Ptrsize)) - fdesize
-               strnput("", int(pad))
-               fdesize += pad
-
-               // Emit the FDE header for real, Section 6.4.1.
-               Cseek(fdeo)
-
-               Thearch.Lput(uint32(fdesize))
+               // Emit the FDE header, Section 6.4.1.
+               //      4 bytes: length, must be multiple of thearch.ptrsize
+               //      4 bytes: Pointer to the CIE above, at offset 0
+               //      ptrsize: initial location
+               //      ptrsize: address range
+               Thearch.Lput(uint32(4 + 2*Thearch.Ptrsize + len(deltaBuf))) // length (excludes itself)
                if Linkmode == LinkExternal {
-                       adddwarfrel(framesec, framesym, frameo, 4, 0)
-                       adddwarfrel(framesec, s, frameo, Thearch.Ptrsize, 0)
+                       adddwarfrel(framesec, framesym, frameo, 4, 0)        // CIE offset
+                       adddwarfrel(framesec, s, frameo, Thearch.Ptrsize, 0) // initial location
                } else {
-                       Thearch.Lput(0)
-                       addrput(s.Value)
+                       Thearch.Lput(0)  // CIE offset
+                       addrput(s.Value) // initial location
                }
+               addrput(s.Size) // address range
 
-               addrput(s.Size)
-               Cseek(fdeo + 4 + fdesize)
+               Cwrite(deltaBuf)
        }
 
        Cflush()