Updates #20650 (CL 45099 introduced the feature to x86).
Change-Id: If40cc9d87417a05281d8633f05cd91f6f434b136
Reviewed-on: https://go-review.googlesource.com/80843
Run-TryBot: Cherry Zhang <cherryyz@google.com>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
debug = false
)
-// A ExtInst represents a single decoded instruction parsed
+// An ExtInst represents a single decoded instruction parsed
// from an external disassembler's output.
type ExtInst struct {
addr uint32
return fmt.Sprintf("#%#x", uint32(i))
}
-// A ImmAlt is an alternate encoding of an integer constant.
+// An ImmAlt is an alternate encoding of an integer constant.
type ImmAlt struct {
Val uint8
Rot uint8
debug = false
)
-// A ExtInst represents a single decoded instruction parsed
+// An ExtInst represents a single decoded instruction parsed
// from an external disassembler's output.
type ExtInst struct {
addr uint32
} else {
switch syntax {
case "gnu":
- out = GNUSyntax(inst)
+ out = GNUSyntax(inst, 0, nil)
case "intel":
- out = IntelSyntax(inst)
+ out = IntelSyntax(inst, 0, nil)
case "plan9": // [sic]
out = GoSyntax(inst, 0, nil)
default:
debug = false
)
-// A ExtInst represents a single decoded instruction parsed
+// An ExtInst represents a single decoded instruction parsed
// from an external disassembler's output.
type ExtInst struct {
addr uint32
} else {
switch syntax {
case "gnu":
- text = GNUSyntax(inst)
+ text = GNUSyntax(inst, 0, nil)
case "intel":
- text = IntelSyntax(inst)
+ text = IntelSyntax(inst, 0, nil)
case "plan9": // [sic]
text = GoSyntax(inst, 0, nil)
default:
--- /dev/null
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package x86asm
+
+import (
+ "encoding/hex"
+ "testing"
+)
+
+func testFormattingSymname(addr uint64) (string, uint64) {
+ switch addr {
+ case 0x424080:
+ return "runtime.printint", 0x424080
+ case 0x4c8068:
+ return "main.A", 0x4c8068
+ }
+ return "", 0
+}
+
+func TestFormatting(t *testing.T) {
+ testCases := []struct {
+ PC uint64
+ bytes string
+
+ goSyntax, intelSyntax, gnuSyntax string
+ }{
+ {0x4816b2, "0f8677010000",
+ "JBE 0x48182f",
+ "jbe 0x48182f",
+ "jbe 0x48182f"},
+ {0x45065b, "488b442408",
+ "MOVQ 0x8(SP), AX",
+ "mov rax, qword ptr [rsp+0x8]",
+ "mov 0x8(%rsp),%rax"},
+ {0x450678, "488b05e9790700",
+ "MOVQ main.A(SB), AX",
+ "mov rax, qword ptr [main.A]",
+ "mov main.A,%rax"},
+ {0x450664, "e8173afdff",
+ "CALL runtime.printint(SB)",
+ "call runtime.printint",
+ "callq runtime.printint"},
+ {0x45069b, "488d0575d90100",
+ "LEAQ 0x1d975(IP), AX",
+ "lea rax, ptr [rip+0x1d975]",
+ "lea 0x1d975(%rip),%rax"},
+ }
+
+ for _, testCase := range testCases {
+ t.Logf("%#x %s %s", testCase.PC, testCase.bytes, testCase.goSyntax)
+ bs, _ := hex.DecodeString(testCase.bytes)
+ inst, err := Decode(bs, 64)
+ if err != nil {
+ t.Errorf("decode error %v", err)
+ }
+ if out := GoSyntax(inst, testCase.PC, testFormattingSymname); out != testCase.goSyntax {
+ t.Errorf("GoSyntax: %q", out)
+ }
+ if out := IntelSyntax(inst, testCase.PC, testFormattingSymname); out != testCase.intelSyntax {
+ t.Errorf("IntelSyntax: %q expected: %q", out, testCase.intelSyntax)
+ }
+ if out := GNUSyntax(inst, testCase.PC, testFormattingSymname); out != testCase.gnuSyntax {
+ t.Errorf("GNUSyntax: %q expected: %q", out, testCase.gnuSyntax)
+ }
+ }
+}
// GNUSyntax returns the GNU assembler syntax for the instruction, as defined by GNU binutils.
// This general form is often called ``AT&T syntax'' as a reference to AT&T System V Unix.
-func GNUSyntax(inst Inst) string {
+func GNUSyntax(inst Inst, pc uint64, symname SymLookup) string {
// Rewrite instruction to mimic GNU peculiarities.
// Note that inst has been passed by value and contains
// no pointers, so any changes we make here are local
// and will not propagate back out to the caller.
+ if symname == nil {
+ symname = func(uint64) (string, uint64) { return "", 0 }
+ }
+
// Adjust opcode [sic].
switch inst.Op {
case FDIV, FDIVR, FSUB, FSUBR, FDIVP, FDIVRP, FSUBP, FSUBRP:
if a == Imm(1) && (inst.Opcode>>24)&^1 == 0xD0 {
continue
}
- args = append(args, gnuArg(&inst, a, &usedPrefixes))
+ args = append(args, gnuArg(&inst, pc, symname, a, &usedPrefixes))
}
// The default is to print the arguments in reverse Intel order.
// gnuArg returns the GNU syntax for the argument x from the instruction inst.
// If *usedPrefixes is false and x is a Mem, then the formatting
// includes any segment prefixes and sets *usedPrefixes to true.
-func gnuArg(inst *Inst, x Arg, usedPrefixes *bool) string {
+func gnuArg(inst *Inst, pc uint64, symname SymLookup, x Arg, usedPrefixes *bool) string {
if x == nil {
return "<nil>"
}
}
return gccRegName[x]
case Mem:
+ if s, disp := memArgToSymbol(x, pc, inst.Len, symname); s != "" {
+ suffix := ""
+ if disp != 0 {
+ suffix = fmt.Sprintf("%+d", disp)
+ }
+ return fmt.Sprintf("%s%s", s, suffix)
+ }
seg := ""
var haveCS, haveDS, haveES, haveFS, haveGS, haveSS bool
switch x.Segment {
}
return fmt.Sprintf("%s%s(%s,%s,%d)", seg, disp, base, index, x.Scale)
case Rel:
- return fmt.Sprintf(".%+#x", int32(x))
+ if pc == 0 {
+ return fmt.Sprintf(".%+#x", int64(x))
+ } else {
+ addr := pc + uint64(inst.Len) + uint64(x)
+ if s, base := symname(addr); s != "" && addr == base {
+ return fmt.Sprintf("%s", s)
+ } else {
+ addr := pc + uint64(inst.Len) + uint64(x)
+ return fmt.Sprintf("%#x", addr)
+ }
+ }
case Imm:
+ if s, base := symname(uint64(x)); s != "" {
+ suffix := ""
+ if uint64(x) != base {
+ suffix = fmt.Sprintf("%+d", uint64(x)-base)
+ }
+ return fmt.Sprintf("$%s%s", s, suffix)
+ }
if inst.Mode == 32 {
return fmt.Sprintf("$%#x", uint32(x))
}
)
// IntelSyntax returns the Intel assembler syntax for the instruction, as defined by Intel's XED tool.
-func IntelSyntax(inst Inst) string {
+func IntelSyntax(inst Inst, pc uint64, symname SymLookup) string {
+ if symname == nil {
+ symname = func(uint64) (string, uint64) { return "", 0 }
+ }
+
var iargs []Arg
for _, a := range inst.Args {
if a == nil {
if a == nil {
break
}
- args = append(args, intelArg(&inst, a))
+ args = append(args, intelArg(&inst, pc, symname, a))
}
var op string
return prefix + op
}
-func intelArg(inst *Inst, arg Arg) string {
+func intelArg(inst *Inst, pc uint64, symname SymLookup, arg Arg) string {
switch a := arg.(type) {
case Imm:
+ if s, base := symname(uint64(a)); s != "" {
+ suffix := ""
+ if uint64(a) != base {
+ suffix = fmt.Sprintf("%+d", uint64(a)-base)
+ }
+ return fmt.Sprintf("$%s%s", s, suffix)
+ }
if inst.Mode == 32 {
return fmt.Sprintf("%#x", uint32(a))
}
}
prefix += "ptr "
+ if s, disp := memArgToSymbol(a, pc, inst.Len, symname); s != "" {
+ suffix := ""
+ if disp != 0 {
+ suffix = fmt.Sprintf("%+d", disp)
+ }
+ return prefix + fmt.Sprintf("[%s%s]", s, suffix)
+ }
if a.Segment != 0 {
prefix += strings.ToLower(a.Segment.String()) + ":"
}
prefix += "["
if a.Base != 0 {
- prefix += intelArg(inst, a.Base)
+ prefix += intelArg(inst, pc, symname, a.Base)
}
if a.Scale != 0 && a.Index != 0 {
if a.Base != 0 {
prefix += "+"
}
- prefix += fmt.Sprintf("%s*%d", intelArg(inst, a.Index), a.Scale)
+ prefix += fmt.Sprintf("%s*%d", intelArg(inst, pc, symname, a.Index), a.Scale)
}
if a.Disp != 0 {
if prefix[len(prefix)-1] == '[' && (a.Disp >= 0 || int64(int32(a.Disp)) != a.Disp) {
prefix += "]"
return prefix
case Rel:
- return fmt.Sprintf(".%+#x", int64(a))
+ if pc == 0 {
+ return fmt.Sprintf(".%+#x", int64(a))
+ } else {
+ addr := pc + uint64(inst.Len) + uint64(a)
+ if s, base := symname(addr); s != "" && addr == base {
+ return fmt.Sprintf("%s", s)
+ } else {
+ addr := pc + uint64(inst.Len) + uint64(a)
+ return fmt.Sprintf("%#x", addr)
+ }
+ }
case Reg:
if int(a) < len(intelReg) && intelReg[a] != "" {
switch inst.Op {
"strings"
)
+type SymLookup func(uint64) (string, uint64)
+
// GoSyntax returns the Go assembler syntax for the instruction.
// The syntax was originally defined by Plan 9.
// The pc is the program counter of the instruction, used for expanding
// The symname function queries the symbol table for the program
// being disassembled. Given a target address it returns the name and base
// address of the symbol containing the target, if any; otherwise it returns "", 0.
-func GoSyntax(inst Inst, pc uint64, symname func(uint64) (string, uint64)) string {
+func GoSyntax(inst Inst, pc uint64, symname SymLookup) string {
if symname == nil {
symname = func(uint64) (string, uint64) { return "", 0 }
}
}
return fmt.Sprintf("$%#x", uint64(a))
case Mem:
- if a.Segment == 0 && a.Disp != 0 && a.Base == 0 && (a.Index == 0 || a.Scale == 0) {
- if s, base := symname(uint64(a.Disp)); s != "" {
- suffix := ""
- if uint64(a.Disp) != base {
- suffix = fmt.Sprintf("%+d", uint64(a.Disp)-base)
- }
- return fmt.Sprintf("%s%s(SB)", s, suffix)
+ if s, disp := memArgToSymbol(a, pc, inst.Len, symname); s != "" {
+ suffix := ""
+ if disp != 0 {
+ suffix = fmt.Sprintf("%+d", disp)
}
+ return fmt.Sprintf("%s%s(SB)", s, suffix)
}
s := ""
if a.Segment != 0 {
return arg.String()
}
+func memArgToSymbol(a Mem, pc uint64, instrLen int, symname SymLookup) (string, int64) {
+ if a.Segment != 0 || a.Disp == 0 || a.Index != 0 || a.Scale != 0 {
+ return "", 0
+ }
+
+ var disp uint64
+ switch a.Base {
+ case IP, EIP, RIP:
+ disp = uint64(a.Disp + int64(pc) + int64(instrLen))
+ case 0:
+ disp = uint64(a.Disp)
+ default:
+ return "", 0
+ }
+
+ s, base := symname(disp)
+ return s, int64(disp) - int64(base)
+}
+
var plan9Suffix = [maxOp + 1]bool{
ADC: true,
ADD: true,