Following CL 357330, use jump tables on ARM64.
name old time/op new time/op delta
Switch8Predictable-4 3.41ns ± 0% 3.21ns ± 0% ~ (p=0.079 n=4+5)
Switch8Unpredictable-4 12.0ns ± 0% 9.5ns ± 0% -21.17% (p=0.000 n=5+4)
Switch32Predictable-4 3.06ns ± 0% 2.82ns ± 0% -7.78% (p=0.008 n=5+5)
Switch32Unpredictable-4 13.3ns ± 0% 9.5ns ± 0% -28.87% (p=0.016 n=4+5)
SwitchStringPredictable-4 3.71ns ± 0% 3.21ns ± 0% -13.43% (p=0.000 n=5+4)
SwitchStringUnpredictable-4 14.8ns ± 0% 15.1ns ± 0% +2.37% (p=0.008 n=5+5)
Change-Id: Ia0b85df7ca9273cf70c05eb957225c6e61822fa6
Reviewed-on: https://go-review.googlesource.com/c/go/+/403979
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Cherry Mui <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
return p
}
-// generate the memory operand for the indexed load/store instructions
-func genIndexedOperand(v *ssa.Value) obj.Addr {
+// generate the memory operand for the indexed load/store instructions.
+// base and idx are registers.
+func genIndexedOperand(op ssa.Op, base, idx int16) obj.Addr {
// Reg: base register, Index: (shifted) index register
- mop := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
- switch v.Op {
+ mop := obj.Addr{Type: obj.TYPE_MEM, Reg: base}
+ switch op {
case ssa.OpARM64MOVDloadidx8, ssa.OpARM64MOVDstoreidx8, ssa.OpARM64MOVDstorezeroidx8,
ssa.OpARM64FMOVDloadidx8, ssa.OpARM64FMOVDstoreidx8:
- mop.Index = arm64.REG_LSL | 3<<5 | v.Args[1].Reg()&31
+ mop.Index = arm64.REG_LSL | 3<<5 | idx&31
case ssa.OpARM64MOVWloadidx4, ssa.OpARM64MOVWUloadidx4, ssa.OpARM64MOVWstoreidx4, ssa.OpARM64MOVWstorezeroidx4,
ssa.OpARM64FMOVSloadidx4, ssa.OpARM64FMOVSstoreidx4:
- mop.Index = arm64.REG_LSL | 2<<5 | v.Args[1].Reg()&31
+ mop.Index = arm64.REG_LSL | 2<<5 | idx&31
case ssa.OpARM64MOVHloadidx2, ssa.OpARM64MOVHUloadidx2, ssa.OpARM64MOVHstoreidx2, ssa.OpARM64MOVHstorezeroidx2:
- mop.Index = arm64.REG_LSL | 1<<5 | v.Args[1].Reg()&31
+ mop.Index = arm64.REG_LSL | 1<<5 | idx&31
default: // not shifted
- mop.Index = v.Args[1].Reg()
+ mop.Index = idx
}
return mop
}
ssa.OpARM64FMOVDloadidx8,
ssa.OpARM64FMOVSloadidx4:
p := s.Prog(v.Op.Asm())
- p.From = genIndexedOperand(v)
+ p.From = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg())
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
case ssa.OpARM64LDAR,
ssa.OpARM64MOVDstoreidx8,
ssa.OpARM64FMOVDstoreidx8:
p := s.Prog(v.Op.Asm())
- p.To = genIndexedOperand(v)
+ p.To = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg())
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[2].Reg()
case ssa.OpARM64STP:
ssa.OpARM64MOVWstorezeroidx4,
ssa.OpARM64MOVDstorezeroidx8:
p := s.Prog(v.Op.Asm())
- p.To = genIndexedOperand(v)
+ p.To = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg())
p.From.Type = obj.TYPE_REG
p.From.Reg = arm64.REGZERO
case ssa.OpARM64MOVQstorezero:
s.CombJump(b, next, &leJumps)
case ssa.BlockARM64GTnoov:
s.CombJump(b, next, >Jumps)
+
+ case ssa.BlockARM64JUMPTABLE:
+ // MOVD (TABLE)(IDX<<3), Rtmp
+ // JMP (Rtmp)
+ p := s.Prog(arm64.AMOVD)
+ p.From = genIndexedOperand(ssa.OpARM64MOVDloadidx8, b.Controls[1].Reg(), b.Controls[0].Reg())
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = arm64.REGTMP
+ p = s.Prog(obj.AJMP)
+ p.To.Type = obj.TYPE_MEM
+ p.To.Reg = arm64.REGTMP
+ // Save jump tables for later resolution of the target blocks.
+ s.JumpTables = append(s.JumpTables, b)
+
default:
b.Fatalf("branch not implemented: %s", b.LongString())
}
(If cond yes no) => (TBNZ [0] cond yes no)
+(JumpTable idx) => (JUMPTABLE {makeJumpTableSym(b)} idx (MOVDaddr <typ.Uintptr> {makeJumpTableSym(b)} (SB)))
+
// atomic intrinsics
// Note: these ops do not accept offset.
(AtomicLoad8 ...) => (LDARB ...)
{name: "LEnoov", controls: 1}, // 'LE' but without honoring overflow
{name: "GTnoov", controls: 1}, // 'GT' but without honoring overflow
{name: "GEnoov", controls: 1}, // 'GE' but without honoring overflow
+
+ // JUMPTABLE implements jump tables.
+ // Aux is the symbol (an *obj.LSym) for the jump table.
+ // control[0] is the index into the jump table.
+ // control[1] is the address of the jump table (the address of the symbol stored in Aux).
+ {name: "JUMPTABLE", controls: 2, aux: "Sym"},
}
archs = append(archs, arch{
BlockARM64LEnoov
BlockARM64GTnoov
BlockARM64GEnoov
+ BlockARM64JUMPTABLE
BlockLOONG64EQ
BlockLOONG64NE
BlockARMGTnoov: "GTnoov",
BlockARMGEnoov: "GEnoov",
- BlockARM64EQ: "EQ",
- BlockARM64NE: "NE",
- BlockARM64LT: "LT",
- BlockARM64LE: "LE",
- BlockARM64GT: "GT",
- BlockARM64GE: "GE",
- BlockARM64ULT: "ULT",
- BlockARM64ULE: "ULE",
- BlockARM64UGT: "UGT",
- BlockARM64UGE: "UGE",
- BlockARM64Z: "Z",
- BlockARM64NZ: "NZ",
- BlockARM64ZW: "ZW",
- BlockARM64NZW: "NZW",
- BlockARM64TBZ: "TBZ",
- BlockARM64TBNZ: "TBNZ",
- BlockARM64FLT: "FLT",
- BlockARM64FLE: "FLE",
- BlockARM64FGT: "FGT",
- BlockARM64FGE: "FGE",
- BlockARM64LTnoov: "LTnoov",
- BlockARM64LEnoov: "LEnoov",
- BlockARM64GTnoov: "GTnoov",
- BlockARM64GEnoov: "GEnoov",
+ BlockARM64EQ: "EQ",
+ BlockARM64NE: "NE",
+ BlockARM64LT: "LT",
+ BlockARM64LE: "LE",
+ BlockARM64GT: "GT",
+ BlockARM64GE: "GE",
+ BlockARM64ULT: "ULT",
+ BlockARM64ULE: "ULE",
+ BlockARM64UGT: "UGT",
+ BlockARM64UGE: "UGE",
+ BlockARM64Z: "Z",
+ BlockARM64NZ: "NZ",
+ BlockARM64ZW: "ZW",
+ BlockARM64NZW: "NZW",
+ BlockARM64TBZ: "TBZ",
+ BlockARM64TBNZ: "TBNZ",
+ BlockARM64FLT: "FLT",
+ BlockARM64FLE: "FLE",
+ BlockARM64FGT: "FGT",
+ BlockARM64FGE: "FGE",
+ BlockARM64LTnoov: "LTnoov",
+ BlockARM64LEnoov: "LEnoov",
+ BlockARM64GTnoov: "GTnoov",
+ BlockARM64GEnoov: "GEnoov",
+ BlockARM64JUMPTABLE: "JUMPTABLE",
BlockLOONG64EQ: "EQ",
BlockLOONG64NE: "NE",
return false
}
func rewriteBlockARM64(b *Block) bool {
+ typ := &b.Func.Config.Types
switch b.Kind {
case BlockARM64EQ:
// match: (EQ (CMPWconst [0] x:(ANDconst [c] y)) yes no)
b.AuxInt = int64ToAuxInt(0)
return true
}
+ case BlockJumpTable:
+ // match: (JumpTable idx)
+ // result: (JUMPTABLE {makeJumpTableSym(b)} idx (MOVDaddr <typ.Uintptr> {makeJumpTableSym(b)} (SB)))
+ for {
+ idx := b.Controls[0]
+ v0 := b.NewValue0(b.Pos, OpARM64MOVDaddr, typ.Uintptr)
+ v0.Aux = symToAux(makeJumpTableSym(b))
+ v1 := b.NewValue0(b.Pos, OpSB, typ.Uintptr)
+ v0.AddArg(v1)
+ b.resetWithControl2(BlockARM64JUMPTABLE, idx, v0)
+ b.Aux = symToAux(makeJumpTableSym(b))
+ return true
+ }
case BlockARM64LE:
// match: (LE (CMPWconst [0] x:(ANDconst [c] y)) yes no)
// cond: x.Uses == 1
// so instruction sequences that use REGTMP are unsafe to
// preempt asynchronously.
obj.MarkUnsafePoints(c.ctxt, c.cursym.Func().Text, c.newprog, c.isUnsafePoint, c.isRestartable)
+
+ // Now that we know byte offsets, we can generate jump table entries.
+ for _, jt := range cursym.Func().JumpTables {
+ for i, p := range jt.Targets {
+ // The ith jumptable entry points to the p.Pc'th
+ // byte in the function symbol s.
+ // TODO: try using relative PCs.
+ jt.Sym.WriteAddr(ctxt, int64(i)*8, 8, cursym, p.Pc)
+ }
+ }
}
// isUnsafePoint returns whether p is an unsafe point.
MinLC: 4,
Alignment: 1,
CanMergeLoads: true,
+ CanJumpTable: true,
HasLR: true,
FixedFrameSize: 8, // LR
}
// use jump tables for 8+ int cases
func square(x int) int {
// amd64:`JMP\s\(.*\)\(.*\)$`
+ // arm64:`MOVD\s\(R.*\)\(R.*<<3\)`,`JMP\s\(R.*\)$`
switch x {
case 1:
return 1
// use jump tables for 8+ string lengths
func length(x string) int {
// amd64:`JMP\s\(.*\)\(.*\)$`
+ // arm64:`MOVD\s\(R.*\)\(R.*<<3\)`,`JMP\s\(R.*\)$`
switch x {
case "a":
return 1