From 540f8c2b50f5def060244853673ccfc94d2d3e43 Mon Sep 17 00:00:00 2001 From: Cherry Mui Date: Mon, 25 Apr 2022 17:18:19 -0400 Subject: [PATCH] cmd/compile: use jump table on ARM64 MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Following CL 357330, use jump tables on ARM64. name old time/op new time/op delta Switch8Predictable-4 3.41ns ± 0% 3.21ns ± 0% ~ (p=0.079 n=4+5) Switch8Unpredictable-4 12.0ns ± 0% 9.5ns ± 0% -21.17% (p=0.000 n=5+4) Switch32Predictable-4 3.06ns ± 0% 2.82ns ± 0% -7.78% (p=0.008 n=5+5) Switch32Unpredictable-4 13.3ns ± 0% 9.5ns ± 0% -28.87% (p=0.016 n=4+5) SwitchStringPredictable-4 3.71ns ± 0% 3.21ns ± 0% -13.43% (p=0.000 n=5+4) SwitchStringUnpredictable-4 14.8ns ± 0% 15.1ns ± 0% +2.37% (p=0.008 n=5+5) Change-Id: Ia0b85df7ca9273cf70c05eb957225c6e61822fa6 Reviewed-on: https://go-review.googlesource.com/c/go/+/403979 TryBot-Result: Gopher Robot Reviewed-by: Keith Randall Run-TryBot: Cherry Mui Reviewed-by: David Chase --- src/cmd/compile/internal/arm64/ssa.go | 37 ++++++++++----- src/cmd/compile/internal/ssa/gen/ARM64.rules | 2 + src/cmd/compile/internal/ssa/gen/ARM64Ops.go | 6 +++ src/cmd/compile/internal/ssa/opGen.go | 50 ++++++++++---------- src/cmd/compile/internal/ssa/rewriteARM64.go | 14 ++++++ src/cmd/internal/obj/arm64/asm7.go | 10 ++++ src/cmd/internal/sys/arch.go | 1 + test/codegen/switch.go | 2 + 8 files changed, 87 insertions(+), 35 deletions(-) diff --git a/src/cmd/compile/internal/arm64/ssa.go b/src/cmd/compile/internal/arm64/ssa.go index 3b6e6f6723..c93e6e6cf8 100644 --- a/src/cmd/compile/internal/arm64/ssa.go +++ b/src/cmd/compile/internal/arm64/ssa.go @@ -99,21 +99,22 @@ func genshift(s *ssagen.State, v *ssa.Value, as obj.As, r0, r1, r int16, typ int return p } -// generate the memory operand for the indexed load/store instructions -func genIndexedOperand(v *ssa.Value) obj.Addr { +// generate the memory operand for the indexed load/store instructions. +// base and idx are registers. +func genIndexedOperand(op ssa.Op, base, idx int16) obj.Addr { // Reg: base register, Index: (shifted) index register - mop := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()} - switch v.Op { + mop := obj.Addr{Type: obj.TYPE_MEM, Reg: base} + switch op { case ssa.OpARM64MOVDloadidx8, ssa.OpARM64MOVDstoreidx8, ssa.OpARM64MOVDstorezeroidx8, ssa.OpARM64FMOVDloadidx8, ssa.OpARM64FMOVDstoreidx8: - mop.Index = arm64.REG_LSL | 3<<5 | v.Args[1].Reg()&31 + mop.Index = arm64.REG_LSL | 3<<5 | idx&31 case ssa.OpARM64MOVWloadidx4, ssa.OpARM64MOVWUloadidx4, ssa.OpARM64MOVWstoreidx4, ssa.OpARM64MOVWstorezeroidx4, ssa.OpARM64FMOVSloadidx4, ssa.OpARM64FMOVSstoreidx4: - mop.Index = arm64.REG_LSL | 2<<5 | v.Args[1].Reg()&31 + mop.Index = arm64.REG_LSL | 2<<5 | idx&31 case ssa.OpARM64MOVHloadidx2, ssa.OpARM64MOVHUloadidx2, ssa.OpARM64MOVHstoreidx2, ssa.OpARM64MOVHstorezeroidx2: - mop.Index = arm64.REG_LSL | 1<<5 | v.Args[1].Reg()&31 + mop.Index = arm64.REG_LSL | 1<<5 | idx&31 default: // not shifted - mop.Index = v.Args[1].Reg() + mop.Index = idx } return mop } @@ -465,7 +466,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { ssa.OpARM64FMOVDloadidx8, ssa.OpARM64FMOVSloadidx4: p := s.Prog(v.Op.Asm()) - p.From = genIndexedOperand(v) + p.From = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg()) p.To.Type = obj.TYPE_REG p.To.Reg = v.Reg() case ssa.OpARM64LDAR, @@ -504,7 +505,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { ssa.OpARM64MOVDstoreidx8, ssa.OpARM64FMOVDstoreidx8: p := s.Prog(v.Op.Asm()) - p.To = genIndexedOperand(v) + p.To = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg()) p.From.Type = obj.TYPE_REG p.From.Reg = v.Args[2].Reg() case ssa.OpARM64STP: @@ -533,7 +534,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { ssa.OpARM64MOVWstorezeroidx4, ssa.OpARM64MOVDstorezeroidx8: p := s.Prog(v.Op.Asm()) - p.To = genIndexedOperand(v) + p.To = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg()) p.From.Type = obj.TYPE_REG p.From.Reg = arm64.REGZERO case ssa.OpARM64MOVQstorezero: @@ -1325,6 +1326,20 @@ func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) { s.CombJump(b, next, &leJumps) case ssa.BlockARM64GTnoov: s.CombJump(b, next, >Jumps) + + case ssa.BlockARM64JUMPTABLE: + // MOVD (TABLE)(IDX<<3), Rtmp + // JMP (Rtmp) + p := s.Prog(arm64.AMOVD) + p.From = genIndexedOperand(ssa.OpARM64MOVDloadidx8, b.Controls[1].Reg(), b.Controls[0].Reg()) + p.To.Type = obj.TYPE_REG + p.To.Reg = arm64.REGTMP + p = s.Prog(obj.AJMP) + p.To.Type = obj.TYPE_MEM + p.To.Reg = arm64.REGTMP + // Save jump tables for later resolution of the target blocks. + s.JumpTables = append(s.JumpTables, b) + default: b.Fatalf("branch not implemented: %s", b.LongString()) } diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules index 3614b3208d..07eb4f68e2 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules @@ -534,6 +534,8 @@ (If cond yes no) => (TBNZ [0] cond yes no) +(JumpTable idx) => (JUMPTABLE {makeJumpTableSym(b)} idx (MOVDaddr {makeJumpTableSym(b)} (SB))) + // atomic intrinsics // Note: these ops do not accept offset. (AtomicLoad8 ...) => (LDARB ...) diff --git a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go index 2d03c44988..8234bce26e 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go @@ -773,6 +773,12 @@ func init() { {name: "LEnoov", controls: 1}, // 'LE' but without honoring overflow {name: "GTnoov", controls: 1}, // 'GT' but without honoring overflow {name: "GEnoov", controls: 1}, // 'GE' but without honoring overflow + + // JUMPTABLE implements jump tables. + // Aux is the symbol (an *obj.LSym) for the jump table. + // control[0] is the index into the jump table. + // control[1] is the address of the jump table (the address of the symbol stored in Aux). + {name: "JUMPTABLE", controls: 2, aux: "Sym"}, } archs = append(archs, arch{ diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index ac879faa61..30eca08692 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -92,6 +92,7 @@ const ( BlockARM64LEnoov BlockARM64GTnoov BlockARM64GEnoov + BlockARM64JUMPTABLE BlockLOONG64EQ BlockLOONG64NE @@ -217,30 +218,31 @@ var blockString = [...]string{ BlockARMGTnoov: "GTnoov", BlockARMGEnoov: "GEnoov", - BlockARM64EQ: "EQ", - BlockARM64NE: "NE", - BlockARM64LT: "LT", - BlockARM64LE: "LE", - BlockARM64GT: "GT", - BlockARM64GE: "GE", - BlockARM64ULT: "ULT", - BlockARM64ULE: "ULE", - BlockARM64UGT: "UGT", - BlockARM64UGE: "UGE", - BlockARM64Z: "Z", - BlockARM64NZ: "NZ", - BlockARM64ZW: "ZW", - BlockARM64NZW: "NZW", - BlockARM64TBZ: "TBZ", - BlockARM64TBNZ: "TBNZ", - BlockARM64FLT: "FLT", - BlockARM64FLE: "FLE", - BlockARM64FGT: "FGT", - BlockARM64FGE: "FGE", - BlockARM64LTnoov: "LTnoov", - BlockARM64LEnoov: "LEnoov", - BlockARM64GTnoov: "GTnoov", - BlockARM64GEnoov: "GEnoov", + BlockARM64EQ: "EQ", + BlockARM64NE: "NE", + BlockARM64LT: "LT", + BlockARM64LE: "LE", + BlockARM64GT: "GT", + BlockARM64GE: "GE", + BlockARM64ULT: "ULT", + BlockARM64ULE: "ULE", + BlockARM64UGT: "UGT", + BlockARM64UGE: "UGE", + BlockARM64Z: "Z", + BlockARM64NZ: "NZ", + BlockARM64ZW: "ZW", + BlockARM64NZW: "NZW", + BlockARM64TBZ: "TBZ", + BlockARM64TBNZ: "TBNZ", + BlockARM64FLT: "FLT", + BlockARM64FLE: "FLE", + BlockARM64FGT: "FGT", + BlockARM64FGE: "FGE", + BlockARM64LTnoov: "LTnoov", + BlockARM64LEnoov: "LEnoov", + BlockARM64GTnoov: "GTnoov", + BlockARM64GEnoov: "GEnoov", + BlockARM64JUMPTABLE: "JUMPTABLE", BlockLOONG64EQ: "EQ", BlockLOONG64NE: "NE", diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index a9af833fbb..efeadf64cc 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -27491,6 +27491,7 @@ func rewriteValueARM64_OpZero(v *Value) bool { return false } func rewriteBlockARM64(b *Block) bool { + typ := &b.Func.Config.Types switch b.Kind { case BlockARM64EQ: // match: (EQ (CMPWconst [0] x:(ANDconst [c] y)) yes no) @@ -28845,6 +28846,19 @@ func rewriteBlockARM64(b *Block) bool { b.AuxInt = int64ToAuxInt(0) return true } + case BlockJumpTable: + // match: (JumpTable idx) + // result: (JUMPTABLE {makeJumpTableSym(b)} idx (MOVDaddr {makeJumpTableSym(b)} (SB))) + for { + idx := b.Controls[0] + v0 := b.NewValue0(b.Pos, OpARM64MOVDaddr, typ.Uintptr) + v0.Aux = symToAux(makeJumpTableSym(b)) + v1 := b.NewValue0(b.Pos, OpSB, typ.Uintptr) + v0.AddArg(v1) + b.resetWithControl2(BlockARM64JUMPTABLE, idx, v0) + b.Aux = symToAux(makeJumpTableSym(b)) + return true + } case BlockARM64LE: // match: (LE (CMPWconst [0] x:(ANDconst [c] y)) yes no) // cond: x.Uses == 1 diff --git a/src/cmd/internal/obj/arm64/asm7.go b/src/cmd/internal/obj/arm64/asm7.go index 57d4e7a8d3..9cf1fcd460 100644 --- a/src/cmd/internal/obj/arm64/asm7.go +++ b/src/cmd/internal/obj/arm64/asm7.go @@ -1222,6 +1222,16 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) { // so instruction sequences that use REGTMP are unsafe to // preempt asynchronously. obj.MarkUnsafePoints(c.ctxt, c.cursym.Func().Text, c.newprog, c.isUnsafePoint, c.isRestartable) + + // Now that we know byte offsets, we can generate jump table entries. + for _, jt := range cursym.Func().JumpTables { + for i, p := range jt.Targets { + // The ith jumptable entry points to the p.Pc'th + // byte in the function symbol s. + // TODO: try using relative PCs. + jt.Sym.WriteAddr(ctxt, int64(i)*8, 8, cursym, p.Pc) + } + } } // isUnsafePoint returns whether p is an unsafe point. diff --git a/src/cmd/internal/sys/arch.go b/src/cmd/internal/sys/arch.go index 5886b42e51..2e35284137 100644 --- a/src/cmd/internal/sys/arch.go +++ b/src/cmd/internal/sys/arch.go @@ -130,6 +130,7 @@ var ArchARM64 = &Arch{ MinLC: 4, Alignment: 1, CanMergeLoads: true, + CanJumpTable: true, HasLR: true, FixedFrameSize: 8, // LR } diff --git a/test/codegen/switch.go b/test/codegen/switch.go index a6566834a8..af3762869a 100644 --- a/test/codegen/switch.go +++ b/test/codegen/switch.go @@ -24,6 +24,7 @@ func f(x string) int { // use jump tables for 8+ int cases func square(x int) int { // amd64:`JMP\s\(.*\)\(.*\)$` + // arm64:`MOVD\s\(R.*\)\(R.*<<3\)`,`JMP\s\(R.*\)$` switch x { case 1: return 1 @@ -49,6 +50,7 @@ func square(x int) int { // use jump tables for 8+ string lengths func length(x string) int { // amd64:`JMP\s\(.*\)\(.*\)$` + // arm64:`MOVD\s\(R.*\)\(R.*<<3\)`,`JMP\s\(R.*\)$` switch x { case "a": return 1 -- 2.50.0