]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: add hexdumper
authorAustin Clements <austin@google.com>
Tue, 10 Jun 2025 23:19:08 +0000 (19:19 -0400)
committerGopher Robot <gobot@golang.org>
Tue, 18 Nov 2025 19:41:32 +0000 (11:41 -0800)
Currently, we have a simple hexdumpWords facility for debugging. It's
useful but pretty limited.

This CL adds a much more configurable and capable "hexdumper". It can
be configured for any word size (including bytes), handles unaligned
data, includes an ASCII dump, and accepts data in multiple slices. It
also has a much nicer "mark" facility for annotating the hexdump that
isn't limited to a single character per word.

We use this to improve our existing hexdumps, particularly the new
mark facility. The next CL will integrate hexdumps into debuglog,
which will make use of several other new capabilities.

Also this adds an actual test.

The output looks like:

                       7 6 5 4  3 2 1 0   f e d c  b a 9 8  0123456789abcdef
    000000c00006ef70:                    03000000 00000000          ........
    000000c00006ef8000000000 0053da80  000000c0 000bc380  ..S.............
                      ^ <testing.tRunner.func2+0x0>
    000000c00006ef9000000000 0053dac0  000000c0 000bc380  ..S.............
                      ^ <testing.tRunner.func1+0x0>
    000000c00006efa0000000c0 0006ef90  000000c0 0006ef80  ................
    000000c00006efb0000000c0 0006efd0  00000000 0053eb65  ........e.S.....
                                         ^ <testing.(*T).Run.gowrap1+0x25>
    000000c00006efc0000000c0 000bc380  00000000 009aaae8  ................
    000000c00006efd000000000 00000000  00000000 00496b01  .........kI.....
                                         ^ <runtime.goexit+0x1>
    000000c00006efe000000000 00000000  00000000 00000000  ................
    000000c00006eff000000000 00000000                     ........

The header gives column labels, indicating the order of bytes within
the following words. The addresses on the left are always 16-byte
aligned so it's easy to combine that address with the column header to
determine the full address of a byte. Annotations are no longer
interleaved with the data, so the data stays in nicely aligned
columns. The annotations are also now much more flexible, including
support for multiple annotations on the same word (not shown).

Change-Id: I27e83800a1f6a7bdd3cc2c59614661a810a57d4d
Reviewed-on: https://go-review.googlesource.com/c/go/+/681375
Reviewed-by: Michael Pratt <mpratt@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Austin Clements <austin@google.com>

src/runtime/export_test.go
src/runtime/hexdump.go [new file with mode: 0644]
src/runtime/hexdump_test.go [new file with mode: 0644]
src/runtime/mgcmark.go
src/runtime/mgcsweep.go
src/runtime/print.go
src/runtime/traceback.go

index 8438603b9e603fda92bc9160a08fa39290cdf004..2db8add7e4f66589c963246a7bad5bb8f16f6566 100644 (file)
@@ -2029,3 +2029,36 @@ func (head *ListHeadManual) Pop() unsafe.Pointer {
 func (head *ListHeadManual) Remove(p unsafe.Pointer) {
        head.l.remove(p)
 }
+
+func Hexdumper(base uintptr, wordBytes int, mark func(addr uintptr, start func()), data ...[]byte) string {
+       buf := make([]byte, 0, 2048)
+       getg().writebuf = buf
+       h := hexdumper{addr: base, addrBytes: 4, wordBytes: uint8(wordBytes)}
+       if mark != nil {
+               h.mark = func(addr uintptr, m hexdumpMarker) {
+                       mark(addr, m.start)
+               }
+       }
+       for _, d := range data {
+               h.write(d)
+       }
+       h.close()
+       n := len(getg().writebuf)
+       getg().writebuf = nil
+       if n == cap(buf) {
+               panic("Hexdumper buf too small")
+       }
+       return string(buf[:n])
+}
+
+func HexdumpWords(p, bytes uintptr) string {
+       buf := make([]byte, 0, 2048)
+       getg().writebuf = buf
+       hexdumpWords(p, bytes, nil)
+       n := len(getg().writebuf)
+       getg().writebuf = nil
+       if n == cap(buf) {
+               panic("HexdumpWords buf too small")
+       }
+       return string(buf[:n])
+}
diff --git a/src/runtime/hexdump.go b/src/runtime/hexdump.go
new file mode 100644 (file)
index 0000000..0d7dbb5
--- /dev/null
@@ -0,0 +1,269 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+       "internal/goarch"
+       "unsafe"
+)
+
+// hexdumpWords prints a word-oriented hex dump of [p, p+len).
+//
+// If mark != nil, it will be passed to hexdumper.mark.
+func hexdumpWords(p, len uintptr, mark func(uintptr, hexdumpMarker)) {
+       printlock()
+
+       // Provide a default annotation
+       symMark := func(u uintptr, hm hexdumpMarker) {
+               if mark != nil {
+                       mark(u, hm)
+               }
+
+               // Can we symbolize this value?
+               val := *(*uintptr)(unsafe.Pointer(u))
+               fn := findfunc(val)
+               if fn.valid() {
+                       hm.start()
+                       print("<", funcname(fn), "+", hex(val-fn.entry()), ">\n")
+               }
+       }
+
+       h := hexdumper{addr: p, mark: symMark}
+       h.write(unsafe.Slice((*byte)(unsafe.Pointer(p)), len))
+       h.close()
+       printunlock()
+}
+
+// hexdumper is a Swiss-army knife hex dumper.
+//
+// To use, optionally set addr and wordBytes, then call write repeatedly,
+// followed by close.
+type hexdumper struct {
+       // addr is the address to print for the first byte of data.
+       addr uintptr
+
+       // addrBytes is the number of bytes of addr to print. If this is 0, it
+       // defaults to goarch.PtrSize.
+       addrBytes uint8
+
+       // wordBytes is the number of bytes in a word. If wordBytes is 1, this
+       // prints a byte-oriented dump. If it's > 1, this interprets the data as a
+       // sequence of words of the given size. If it's 0, it's treated as
+       // goarch.PtrSize.
+       wordBytes uint8
+
+       // mark is an optional function that can annotate values in the hex dump.
+       //
+       // If non-nil, it is called with the address of every complete, aligned word
+       // in the hex dump.
+       //
+       // If it decides to print an annotation, it must first call m.start(), then
+       // print the annotation, followed by a new line.
+       mark func(addr uintptr, m hexdumpMarker)
+
+       // Below here is state
+
+       ready int8 // 0=need to init state; 1=need to print header; 2=ready
+
+       // dataBuf accumulates a line at a time of data, in case it's split across
+       // buffers.
+       dataBuf  [16]byte
+       dataPos  uint8
+       dataSkip uint8 // Skip first n bytes of buf on first line
+
+       // toPos maps from byte offset in data to a visual offset in the printed line.
+       toPos [16]byte
+}
+
+type hexdumpMarker struct {
+       chars int
+}
+
+func (h *hexdumper) write(data []byte) {
+       if h.ready == 0 {
+               h.init()
+       }
+
+       // Handle leading data
+       if h.dataPos > 0 {
+               n := copy(h.dataBuf[h.dataPos:], data)
+               h.dataPos += uint8(n)
+               data = data[n:]
+               if h.dataPos < uint8(len(h.dataBuf)) {
+                       return
+               }
+               h.flushLine(h.dataBuf[:])
+               h.dataPos = 0
+       }
+
+       // Handle full lines in data
+       for len(data) >= len(h.dataBuf) {
+               h.flushLine(data[:len(h.dataBuf)])
+               data = data[len(h.dataBuf):]
+       }
+
+       // Handle trailing data
+       h.dataPos = uint8(copy(h.dataBuf[:], data))
+}
+
+func (h *hexdumper) close() {
+       if h.dataPos > 0 {
+               h.flushLine(h.dataBuf[:h.dataPos])
+       }
+}
+
+func (h *hexdumper) init() {
+       const bytesPerLine = len(h.dataBuf)
+
+       if h.addrBytes == 0 {
+               h.addrBytes = goarch.PtrSize
+       } else if h.addrBytes < 0 || h.addrBytes > goarch.PtrSize {
+               throw("invalid addrBytes")
+       }
+
+       if h.wordBytes == 0 {
+               h.wordBytes = goarch.PtrSize
+       }
+       wb := int(h.wordBytes)
+       if wb < 0 || wb >= bytesPerLine || wb&(wb-1) != 0 {
+               throw("invalid wordBytes")
+       }
+
+       // Construct position mapping.
+       for i := range h.toPos {
+               // First, calculate the "field" within the line, applying byte swizzling.
+               field := 0
+               if goarch.BigEndian {
+                       field = i
+               } else {
+                       field = i ^ int(wb-1)
+               }
+               // Translate this field into a visual offset.
+               // "00112233 44556677  8899AABB CCDDEEFF"
+               h.toPos[i] = byte(field*2 + field/4 + field/8)
+       }
+
+       // The first line may need to skip some fields to get to alignment.
+       // Round down the starting address.
+       nAddr := h.addr &^ uintptr(bytesPerLine-1)
+       // Skip bytes to get to alignment.
+       h.dataPos = uint8(h.addr - nAddr)
+       h.dataSkip = uint8(h.addr - nAddr)
+       h.addr = nAddr
+
+       // We're ready to print the header.
+       h.ready = 1
+}
+
+func (h *hexdumper) flushLine(data []byte) {
+       const bytesPerLine = len(h.dataBuf)
+
+       const maxAddrChars = 2 * goarch.PtrSize
+       const addrSep = ": "
+       dataStart := int(2*h.addrBytes) + len(addrSep)
+       // dataChars uses the same formula to toPos above. We calculate it with the
+       // "last field", then add the size of the last field.
+       const dataChars = (bytesPerLine-1)*2 + (bytesPerLine-1)/4 + (bytesPerLine-1)/8 + 2
+       const asciiSep = "  "
+       asciiStart := dataStart + dataChars + len(asciiSep)
+       const asciiChars = bytesPerLine
+       nlPos := asciiStart + asciiChars
+
+       var lineBuf [maxAddrChars + len(addrSep) + dataChars + len(asciiSep) + asciiChars + 1]byte
+       clear := func() {
+               for i := range lineBuf {
+                       lineBuf[i] = ' '
+               }
+       }
+       clear()
+
+       if h.ready == 1 {
+               // Print column offsets header.
+               for offset, pos := range h.toPos {
+                       h.fmtHex(lineBuf[dataStart+int(pos+1):][:1], uint64(offset))
+               }
+               // Print ASCII offsets.
+               for offset := range asciiChars {
+                       h.fmtHex(lineBuf[asciiStart+offset:][:1], uint64(offset))
+               }
+               lineBuf[nlPos] = '\n'
+               gwrite(lineBuf[:nlPos+1])
+               clear()
+               h.ready = 2
+       }
+
+       // Format address.
+       h.fmtHex(lineBuf[:2*h.addrBytes], uint64(h.addr))
+       copy(lineBuf[2*h.addrBytes:], addrSep)
+       // Format data in hex and ASCII.
+       for offset, b := range data {
+               if offset < int(h.dataSkip) {
+                       continue
+               }
+
+               pos := h.toPos[offset]
+               h.fmtHex(lineBuf[dataStart+int(pos):][:2], uint64(b))
+
+               copy(lineBuf[dataStart+dataChars:], asciiSep)
+               ascii := uint8('.')
+               if b >= ' ' && b <= '~' {
+                       ascii = b
+               }
+               lineBuf[asciiStart+offset] = ascii
+       }
+       // Trim buffer.
+       end := asciiStart + len(data)
+       lineBuf[end] = '\n'
+       buf := lineBuf[:end+1]
+
+       // Print.
+       gwrite(buf)
+
+       // Print marks.
+       if h.mark != nil {
+               clear()
+               for offset := 0; offset+int(h.wordBytes) <= len(data); offset += int(h.wordBytes) {
+                       if offset < int(h.dataSkip) {
+                               continue
+                       }
+                       addr := h.addr + uintptr(offset)
+                       // Find the position of the left edge of this word
+                       caret := dataStart + int(min(h.toPos[offset], h.toPos[offset+int(h.wordBytes)-1]))
+                       h.mark(addr, hexdumpMarker{caret})
+               }
+       }
+
+       h.addr += uintptr(bytesPerLine)
+       h.dataPos = 0
+       h.dataSkip = 0
+}
+
+// fmtHex formats v in base 16 into buf. It fills all of buf. If buf is too
+// small to represent v, it the output will start with '*'.
+func (h *hexdumper) fmtHex(buf []byte, v uint64) {
+       const dig = "0123456789abcdef"
+       i := len(buf) - 1
+       for ; i >= 0; i-- {
+               buf[i] = dig[v%16]
+               v /= 16
+       }
+       if v != 0 {
+               // Indicate that we couldn't fit the whole number.
+               buf[0] = '*'
+       }
+}
+
+func (m hexdumpMarker) start() {
+       var spaces [64]byte
+       for i := range spaces {
+               spaces[i] = ' '
+       }
+       for m.chars > len(spaces) {
+               gwrite(spaces[:])
+               m.chars -= len(spaces)
+       }
+       gwrite(spaces[:m.chars])
+       print("^ ")
+}
diff --git a/src/runtime/hexdump_test.go b/src/runtime/hexdump_test.go
new file mode 100644 (file)
index 0000000..cc44e48
--- /dev/null
@@ -0,0 +1,151 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime_test
+
+import (
+       "fmt"
+       "internal/abi"
+       "internal/goarch"
+       "runtime"
+       "slices"
+       "strings"
+       "testing"
+       "unsafe"
+)
+
+func TestHexdumper(t *testing.T) {
+       check := func(label, got, want string) {
+               got = strings.TrimRight(got, "\n")
+               want = strings.TrimPrefix(want, "\n")
+               want = strings.TrimRight(want, "\n")
+               if got != want {
+                       t.Errorf("%s: got\n%s\nwant\n%s", label, got, want)
+               }
+       }
+
+       data := make([]byte, 32)
+       for i := range data {
+               data[i] = 0x10 + byte(i)
+       }
+
+       check("basic", runtime.Hexdumper(0, 1, nil, data), `
+           0 1 2 3  4 5 6 7   8 9 a b  c d e f  0123456789abcdef
+00000000: 10111213 14151617  18191a1b 1c1d1e1f  ................
+00000010: 20212223 24252627  28292a2b 2c2d2e2f   !"#$%&'()*+,-./`)
+
+       if !goarch.BigEndian {
+               // Different word sizes
+               check("word=4", runtime.Hexdumper(0, 4, nil, data), `
+           3 2 1 0  7 6 5 4   b a 9 8  f e d c  0123456789abcdef
+00000000: 13121110 17161514  1b1a1918 1f1e1d1c  ................
+00000010: 23222120 27262524  2b2a2928 2f2e2d2c   !"#$%&'()*+,-./`)
+               check("word=8", runtime.Hexdumper(0, 8, nil, data), `
+           7 6 5 4  3 2 1 0   f e d c  b a 9 8  0123456789abcdef
+00000000: 17161514 13121110  1f1e1d1c 1b1a1918  ................
+00000010: 27262524 23222120  2f2e2d2c 2b2a2928   !"#$%&'()*+,-./`)
+       }
+
+       // Starting offset
+       check("offset=1", runtime.Hexdumper(1, 1, nil, data), `
+           0 1 2 3  4 5 6 7   8 9 a b  c d e f  0123456789abcdef
+00000000:   101112 13141516  1718191a 1b1c1d1e   ...............
+00000010: 1f202122 23242526  2728292a 2b2c2d2e  . !"#$%&'()*+,-.
+00000020: 2f                                    /`)
+       if !goarch.BigEndian {
+               // ... combined with a word size
+               check("offset=1 and word=4", runtime.Hexdumper(1, 4, nil, data), `
+           3 2 1 0  7 6 5 4   b a 9 8  f e d c  0123456789abcdef
+00000000: 121110   16151413  1a191817 1e1d1c1b   ...............
+00000010: 2221201f 26252423  2a292827 2e2d2c2b  . !"#$%&'()*+,-.
+00000020:       2f                              /`)
+       }
+
+       // Partial data full of annoying boundaries.
+       partials := make([][]byte, 0)
+       for i := 0; i < len(data); i += 2 {
+               partials = append(partials, data[i:i+2])
+       }
+       check("partials", runtime.Hexdumper(1, 1, nil, partials...), `
+           0 1 2 3  4 5 6 7   8 9 a b  c d e f  0123456789abcdef
+00000000:   101112 13141516  1718191a 1b1c1d1e   ...............
+00000010: 1f202122 23242526  2728292a 2b2c2d2e  . !"#$%&'()*+,-.
+00000020: 2f                                    /`)
+
+       // Marks.
+       check("marks", runtime.Hexdumper(0, 1, func(addr uintptr, start func()) {
+               if addr%7 == 0 {
+                       start()
+                       println("mark")
+               }
+       }, data), `
+           0 1 2 3  4 5 6 7   8 9 a b  c d e f  0123456789abcdef
+00000000: 10111213 14151617  18191a1b 1c1d1e1f  ................
+          ^ mark
+                         ^ mark
+                                          ^ mark
+00000010: 20212223 24252627  28292a2b 2c2d2e2f   !"#$%&'()*+,-./
+                     ^ mark
+                                      ^ mark`)
+       if !goarch.BigEndian {
+               check("marks and word=4", runtime.Hexdumper(0, 4, func(addr uintptr, start func()) {
+                       if addr%7 == 0 {
+                               start()
+                               println("mark")
+                       }
+               }, data), `
+           3 2 1 0  7 6 5 4   b a 9 8  f e d c  0123456789abcdef
+00000000: 13121110 17161514  1b1a1918 1f1e1d1c  ................
+          ^ mark
+00000010: 23222120 27262524  2b2a2928 2f2e2d2c   !"#$%&'()*+,-./
+                                      ^ mark`)
+       }
+}
+
+func TestHexdumpWords(t *testing.T) {
+       if goarch.BigEndian || goarch.PtrSize != 8 {
+               // We could support these, but it's kind of a pain.
+               t.Skip("requires 64-bit little endian")
+       }
+
+       // Most of this is in hexdumper. Here we just test the symbolizer.
+
+       pc := abi.FuncPCABIInternal(TestHexdumpWords)
+       pcs := slices.Repeat([]uintptr{pc}, 3)
+
+       // Make sure pcs doesn't move around on us.
+       var p runtime.Pinner
+       defer p.Unpin()
+       p.Pin(&pcs[0])
+       // Get a 16 byte, 16-byte-aligned chunk of pcs so the hexdump is simple.
+       start := uintptr(unsafe.Pointer(&pcs[0]))
+       start = (start + 15) &^ uintptr(15)
+
+       // Do the hex dump.
+       got := runtime.HexdumpWords(start, 16)
+
+       // Construct the expected output.
+       pcStr := fmt.Sprintf("%016x", pc)
+       pcStr = pcStr[:8] + " " + pcStr[8:] // Add middle space
+       ascii := make([]byte, 8)
+       for i := range ascii {
+               b := byte(pc >> (8 * i))
+               if b >= ' ' && b <= '~' {
+                       ascii[i] = b
+               } else {
+                       ascii[i] = '.'
+               }
+       }
+       want := fmt.Sprintf(`
+                   7 6 5 4  3 2 1 0   f e d c  b a 9 8  0123456789abcdef
+%016x: %s  %s  %s%s
+                  ^ <runtime_test.TestHexdumpWords+0x0>
+                                     ^ <runtime_test.TestHexdumpWords+0x0>
+`, start, pcStr, pcStr, ascii, ascii)
+       want = strings.TrimPrefix(want, "\n")
+
+       if got != want {
+               t.Errorf("got\n%s\nwant\n%s", got, want)
+       }
+}
index c9234c508471f0c363ae353d48290adcb5b5a8f3..714b9a51df4c3a225a5ddf7cd3e33d00e14f0099 100644 (file)
@@ -1524,29 +1524,32 @@ func scanConservative(b, n uintptr, ptrmask *uint8, gcw *gcWork, state *stackSca
        if debugScanConservative {
                printlock()
                print("conservatively scanning [", hex(b), ",", hex(b+n), ")\n")
-               hexdumpWords(b, b+n, func(p uintptr) byte {
+               hexdumpWords(b, n, func(p uintptr, m hexdumpMarker) {
                        if ptrmask != nil {
                                word := (p - b) / goarch.PtrSize
                                bits := *addb(ptrmask, word/8)
                                if (bits>>(word%8))&1 == 0 {
-                                       return '$'
+                                       return
                                }
                        }
 
                        val := *(*uintptr)(unsafe.Pointer(p))
                        if state != nil && state.stack.lo <= val && val < state.stack.hi {
-                               return '@'
+                               m.start()
+                               println("ptr to stack")
+                               return
                        }
 
                        span := spanOfHeap(val)
                        if span == nil {
-                               return ' '
+                               return
                        }
                        idx := span.objIndex(val)
                        if span.isFreeOrNewlyAllocated(idx) {
-                               return ' '
+                               return
                        }
-                       return '*'
+                       m.start()
+                       println("ptr to heap")
                })
                printunlock()
        }
index c3d6afb90a54fe793ceb9673c3195ef18be60dca..4eecb1cfd9379e0076bb2b751fc8c246ff646de5 100644 (file)
@@ -885,7 +885,7 @@ func (s *mspan) reportZombies() {
                        if length > 1024 {
                                length = 1024
                        }
-                       hexdumpWords(addr, addr+length, nil)
+                       hexdumpWords(addr, length, nil)
                }
                mbits.advance()
                abits.advance()
index c01db9d7f9868983a8ed09776dec530cf98e6f59..d2733fb2661f5eebabe05f59f9e08e437b4cd8a5 100644 (file)
@@ -5,7 +5,6 @@
 package runtime
 
 import (
-       "internal/goarch"
        "internal/strconv"
        "unsafe"
 )
@@ -212,43 +211,3 @@ func printeface(e eface) {
 func printiface(i iface) {
        print("(", i.tab, ",", i.data, ")")
 }
-
-// hexdumpWords prints a word-oriented hex dump of [p, end).
-//
-// If mark != nil, it will be called with each printed word's address
-// and should return a character mark to appear just before that
-// word's value. It can return 0 to indicate no mark.
-func hexdumpWords(p, end uintptr, mark func(uintptr) byte) {
-       printlock()
-       var markbuf [1]byte
-       markbuf[0] = ' '
-       minhexdigits = int(unsafe.Sizeof(uintptr(0)) * 2)
-       for i := uintptr(0); p+i < end; i += goarch.PtrSize {
-               if i%16 == 0 {
-                       if i != 0 {
-                               println()
-                       }
-                       print(hex(p+i), ": ")
-               }
-
-               if mark != nil {
-                       markbuf[0] = mark(p + i)
-                       if markbuf[0] == 0 {
-                               markbuf[0] = ' '
-                       }
-               }
-               gwrite(markbuf[:])
-               val := *(*uintptr)(unsafe.Pointer(p + i))
-               print(hex(val))
-               print(" ")
-
-               // Can we symbolize val?
-               fn := findfunc(val)
-               if fn.valid() {
-                       print("<", funcname(fn), "+", hex(val-fn.entry()), "> ")
-               }
-       }
-       minhexdigits = 0
-       println()
-       printunlock()
-}
index 6649f72471629a91f73968b9ebe3cc8afe8e2c39..74aaeba876709608a623c4a2fae3973fe66c6a4d 100644 (file)
@@ -1366,16 +1366,19 @@ func tracebackHexdump(stk stack, frame *stkframe, bad uintptr) {
 
        // Print the hex dump.
        print("stack: frame={sp:", hex(frame.sp), ", fp:", hex(frame.fp), "} stack=[", hex(stk.lo), ",", hex(stk.hi), ")\n")
-       hexdumpWords(lo, hi, func(p uintptr) byte {
-               switch p {
-               case frame.fp:
-                       return '>'
-               case frame.sp:
-                       return '<'
-               case bad:
-                       return '!'
+       hexdumpWords(lo, hi-lo, func(p uintptr, m hexdumpMarker) {
+               if p == frame.fp {
+                       m.start()
+                       println("FP")
+               }
+               if p == frame.sp {
+                       m.start()
+                       println("SP")
+               }
+               if p == bad {
+                       m.start()
+                       println("bad")
                }
-               return 0
        })
 }