]> Cypherpunks repositories - gostls13.git/commitdiff
cmd/compile: use jump table on ARM64
authorCherry Mui <cherryyz@google.com>
Mon, 25 Apr 2022 21:18:19 +0000 (17:18 -0400)
committerCherry Mui <cherryyz@google.com>
Fri, 13 May 2022 19:51:03 +0000 (19:51 +0000)
Following CL 357330, use jump tables on ARM64.

name                         old time/op  new time/op  delta
Switch8Predictable-4         3.41ns ± 0%  3.21ns ± 0%     ~     (p=0.079 n=4+5)
Switch8Unpredictable-4       12.0ns ± 0%   9.5ns ± 0%  -21.17%  (p=0.000 n=5+4)
Switch32Predictable-4        3.06ns ± 0%  2.82ns ± 0%   -7.78%  (p=0.008 n=5+5)
Switch32Unpredictable-4      13.3ns ± 0%   9.5ns ± 0%  -28.87%  (p=0.016 n=4+5)
SwitchStringPredictable-4    3.71ns ± 0%  3.21ns ± 0%  -13.43%  (p=0.000 n=5+4)
SwitchStringUnpredictable-4  14.8ns ± 0%  15.1ns ± 0%   +2.37%  (p=0.008 n=5+5)

Change-Id: Ia0b85df7ca9273cf70c05eb957225c6e61822fa6
Reviewed-on: https://go-review.googlesource.com/c/go/+/403979
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Cherry Mui <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
src/cmd/compile/internal/arm64/ssa.go
src/cmd/compile/internal/ssa/gen/ARM64.rules
src/cmd/compile/internal/ssa/gen/ARM64Ops.go
src/cmd/compile/internal/ssa/opGen.go
src/cmd/compile/internal/ssa/rewriteARM64.go
src/cmd/internal/obj/arm64/asm7.go
src/cmd/internal/sys/arch.go
test/codegen/switch.go

index 3b6e6f672309d3d67a8c3289ed7e4debe305de4c..c93e6e6cf8e400dfff914eb5bd6402d10681a903 100644 (file)
@@ -99,21 +99,22 @@ func genshift(s *ssagen.State, v *ssa.Value, as obj.As, r0, r1, r int16, typ int
        return p
 }
 
-// generate the memory operand for the indexed load/store instructions
-func genIndexedOperand(v *ssa.Value) obj.Addr {
+// generate the memory operand for the indexed load/store instructions.
+// base and idx are registers.
+func genIndexedOperand(op ssa.Op, base, idx int16) obj.Addr {
        // Reg: base register, Index: (shifted) index register
-       mop := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
-       switch v.Op {
+       mop := obj.Addr{Type: obj.TYPE_MEM, Reg: base}
+       switch op {
        case ssa.OpARM64MOVDloadidx8, ssa.OpARM64MOVDstoreidx8, ssa.OpARM64MOVDstorezeroidx8,
                ssa.OpARM64FMOVDloadidx8, ssa.OpARM64FMOVDstoreidx8:
-               mop.Index = arm64.REG_LSL | 3<<5 | v.Args[1].Reg()&31
+               mop.Index = arm64.REG_LSL | 3<<5 | idx&31
        case ssa.OpARM64MOVWloadidx4, ssa.OpARM64MOVWUloadidx4, ssa.OpARM64MOVWstoreidx4, ssa.OpARM64MOVWstorezeroidx4,
                ssa.OpARM64FMOVSloadidx4, ssa.OpARM64FMOVSstoreidx4:
-               mop.Index = arm64.REG_LSL | 2<<5 | v.Args[1].Reg()&31
+               mop.Index = arm64.REG_LSL | 2<<5 | idx&31
        case ssa.OpARM64MOVHloadidx2, ssa.OpARM64MOVHUloadidx2, ssa.OpARM64MOVHstoreidx2, ssa.OpARM64MOVHstorezeroidx2:
-               mop.Index = arm64.REG_LSL | 1<<5 | v.Args[1].Reg()&31
+               mop.Index = arm64.REG_LSL | 1<<5 | idx&31
        default: // not shifted
-               mop.Index = v.Args[1].Reg()
+               mop.Index = idx
        }
        return mop
 }
@@ -465,7 +466,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                ssa.OpARM64FMOVDloadidx8,
                ssa.OpARM64FMOVSloadidx4:
                p := s.Prog(v.Op.Asm())
-               p.From = genIndexedOperand(v)
+               p.From = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg())
                p.To.Type = obj.TYPE_REG
                p.To.Reg = v.Reg()
        case ssa.OpARM64LDAR,
@@ -504,7 +505,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                ssa.OpARM64MOVDstoreidx8,
                ssa.OpARM64FMOVDstoreidx8:
                p := s.Prog(v.Op.Asm())
-               p.To = genIndexedOperand(v)
+               p.To = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg())
                p.From.Type = obj.TYPE_REG
                p.From.Reg = v.Args[2].Reg()
        case ssa.OpARM64STP:
@@ -533,7 +534,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
                ssa.OpARM64MOVWstorezeroidx4,
                ssa.OpARM64MOVDstorezeroidx8:
                p := s.Prog(v.Op.Asm())
-               p.To = genIndexedOperand(v)
+               p.To = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg())
                p.From.Type = obj.TYPE_REG
                p.From.Reg = arm64.REGZERO
        case ssa.OpARM64MOVQstorezero:
@@ -1325,6 +1326,20 @@ func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
                s.CombJump(b, next, &leJumps)
        case ssa.BlockARM64GTnoov:
                s.CombJump(b, next, &gtJumps)
+
+       case ssa.BlockARM64JUMPTABLE:
+               // MOVD (TABLE)(IDX<<3), Rtmp
+               // JMP  (Rtmp)
+               p := s.Prog(arm64.AMOVD)
+               p.From = genIndexedOperand(ssa.OpARM64MOVDloadidx8, b.Controls[1].Reg(), b.Controls[0].Reg())
+               p.To.Type = obj.TYPE_REG
+               p.To.Reg = arm64.REGTMP
+               p = s.Prog(obj.AJMP)
+               p.To.Type = obj.TYPE_MEM
+               p.To.Reg = arm64.REGTMP
+               // Save jump tables for later resolution of the target blocks.
+               s.JumpTables = append(s.JumpTables, b)
+
        default:
                b.Fatalf("branch not implemented: %s", b.LongString())
        }
index 3614b3208d1d07283380dbdfac3ec6d8b6e2329e..07eb4f68e280b2d7d92e5fecfe389377040785bb 100644 (file)
 
 (If cond yes no) => (TBNZ [0] cond yes no)
 
+(JumpTable idx) => (JUMPTABLE {makeJumpTableSym(b)} idx (MOVDaddr <typ.Uintptr> {makeJumpTableSym(b)} (SB)))
+
 // atomic intrinsics
 // Note: these ops do not accept offset.
 (AtomicLoad8   ...) => (LDARB ...)
index 2d03c44988696e9e40a20163090b2b32777029cd..8234bce26ea22d4719c4a46bd7823a1660c0315a 100644 (file)
@@ -773,6 +773,12 @@ func init() {
                {name: "LEnoov", controls: 1}, // 'LE' but without honoring overflow
                {name: "GTnoov", controls: 1}, // 'GT' but without honoring overflow
                {name: "GEnoov", controls: 1}, // 'GE' but without honoring overflow
+
+               // JUMPTABLE implements jump tables.
+               // Aux is the symbol (an *obj.LSym) for the jump table.
+               // control[0] is the index into the jump table.
+               // control[1] is the address of the jump table (the address of the symbol stored in Aux).
+               {name: "JUMPTABLE", controls: 2, aux: "Sym"},
        }
 
        archs = append(archs, arch{
index ac879faa615cf2edc5d754a122667727ecf9cc1e..30eca086925d24615a4bd7f6a13146c40a0c61e4 100644 (file)
@@ -92,6 +92,7 @@ const (
        BlockARM64LEnoov
        BlockARM64GTnoov
        BlockARM64GEnoov
+       BlockARM64JUMPTABLE
 
        BlockLOONG64EQ
        BlockLOONG64NE
@@ -217,30 +218,31 @@ var blockString = [...]string{
        BlockARMGTnoov: "GTnoov",
        BlockARMGEnoov: "GEnoov",
 
-       BlockARM64EQ:     "EQ",
-       BlockARM64NE:     "NE",
-       BlockARM64LT:     "LT",
-       BlockARM64LE:     "LE",
-       BlockARM64GT:     "GT",
-       BlockARM64GE:     "GE",
-       BlockARM64ULT:    "ULT",
-       BlockARM64ULE:    "ULE",
-       BlockARM64UGT:    "UGT",
-       BlockARM64UGE:    "UGE",
-       BlockARM64Z:      "Z",
-       BlockARM64NZ:     "NZ",
-       BlockARM64ZW:     "ZW",
-       BlockARM64NZW:    "NZW",
-       BlockARM64TBZ:    "TBZ",
-       BlockARM64TBNZ:   "TBNZ",
-       BlockARM64FLT:    "FLT",
-       BlockARM64FLE:    "FLE",
-       BlockARM64FGT:    "FGT",
-       BlockARM64FGE:    "FGE",
-       BlockARM64LTnoov: "LTnoov",
-       BlockARM64LEnoov: "LEnoov",
-       BlockARM64GTnoov: "GTnoov",
-       BlockARM64GEnoov: "GEnoov",
+       BlockARM64EQ:        "EQ",
+       BlockARM64NE:        "NE",
+       BlockARM64LT:        "LT",
+       BlockARM64LE:        "LE",
+       BlockARM64GT:        "GT",
+       BlockARM64GE:        "GE",
+       BlockARM64ULT:       "ULT",
+       BlockARM64ULE:       "ULE",
+       BlockARM64UGT:       "UGT",
+       BlockARM64UGE:       "UGE",
+       BlockARM64Z:         "Z",
+       BlockARM64NZ:        "NZ",
+       BlockARM64ZW:        "ZW",
+       BlockARM64NZW:       "NZW",
+       BlockARM64TBZ:       "TBZ",
+       BlockARM64TBNZ:      "TBNZ",
+       BlockARM64FLT:       "FLT",
+       BlockARM64FLE:       "FLE",
+       BlockARM64FGT:       "FGT",
+       BlockARM64FGE:       "FGE",
+       BlockARM64LTnoov:    "LTnoov",
+       BlockARM64LEnoov:    "LEnoov",
+       BlockARM64GTnoov:    "GTnoov",
+       BlockARM64GEnoov:    "GEnoov",
+       BlockARM64JUMPTABLE: "JUMPTABLE",
 
        BlockLOONG64EQ:  "EQ",
        BlockLOONG64NE:  "NE",
index a9af833fbb988747fa9fffee6bf42b9f0512cbfe..efeadf64cc8ea893346c03d3ccd4e264fd4f07e0 100644 (file)
@@ -27491,6 +27491,7 @@ func rewriteValueARM64_OpZero(v *Value) bool {
        return false
 }
 func rewriteBlockARM64(b *Block) bool {
+       typ := &b.Func.Config.Types
        switch b.Kind {
        case BlockARM64EQ:
                // match: (EQ (CMPWconst [0] x:(ANDconst [c] y)) yes no)
@@ -28845,6 +28846,19 @@ func rewriteBlockARM64(b *Block) bool {
                        b.AuxInt = int64ToAuxInt(0)
                        return true
                }
+       case BlockJumpTable:
+               // match: (JumpTable idx)
+               // result: (JUMPTABLE {makeJumpTableSym(b)} idx (MOVDaddr <typ.Uintptr> {makeJumpTableSym(b)} (SB)))
+               for {
+                       idx := b.Controls[0]
+                       v0 := b.NewValue0(b.Pos, OpARM64MOVDaddr, typ.Uintptr)
+                       v0.Aux = symToAux(makeJumpTableSym(b))
+                       v1 := b.NewValue0(b.Pos, OpSB, typ.Uintptr)
+                       v0.AddArg(v1)
+                       b.resetWithControl2(BlockARM64JUMPTABLE, idx, v0)
+                       b.Aux = symToAux(makeJumpTableSym(b))
+                       return true
+               }
        case BlockARM64LE:
                // match: (LE (CMPWconst [0] x:(ANDconst [c] y)) yes no)
                // cond: x.Uses == 1
index 57d4e7a8d32dd7c503bbda49f67600474872913d..9cf1fcd460be9b1d23a5ff318d33025a61fc4fbf 100644 (file)
@@ -1222,6 +1222,16 @@ func span7(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
        // so instruction sequences that use REGTMP are unsafe to
        // preempt asynchronously.
        obj.MarkUnsafePoints(c.ctxt, c.cursym.Func().Text, c.newprog, c.isUnsafePoint, c.isRestartable)
+
+       // Now that we know byte offsets, we can generate jump table entries.
+       for _, jt := range cursym.Func().JumpTables {
+               for i, p := range jt.Targets {
+                       // The ith jumptable entry points to the p.Pc'th
+                       // byte in the function symbol s.
+                       // TODO: try using relative PCs.
+                       jt.Sym.WriteAddr(ctxt, int64(i)*8, 8, cursym, p.Pc)
+               }
+       }
 }
 
 // isUnsafePoint returns whether p is an unsafe point.
index 5886b42e5155dedf901baf36d756d0233b35aea5..2e35284137833a8cd980c45f5ffb7bc0f2b03273 100644 (file)
@@ -130,6 +130,7 @@ var ArchARM64 = &Arch{
        MinLC:          4,
        Alignment:      1,
        CanMergeLoads:  true,
+       CanJumpTable:   true,
        HasLR:          true,
        FixedFrameSize: 8, // LR
 }
index a6566834a867978f801657e8b9782732f25d095c..af3762869a6b365e005e584600c88c30631f2ae2 100644 (file)
@@ -24,6 +24,7 @@ func f(x string) int {
 // use jump tables for 8+ int cases
 func square(x int) int {
        // amd64:`JMP\s\(.*\)\(.*\)$`
+       // arm64:`MOVD\s\(R.*\)\(R.*<<3\)`,`JMP\s\(R.*\)$`
        switch x {
        case 1:
                return 1
@@ -49,6 +50,7 @@ func square(x int) int {
 // use jump tables for 8+ string lengths
 func length(x string) int {
        // amd64:`JMP\s\(.*\)\(.*\)$`
+       // arm64:`MOVD\s\(R.*\)\(R.*<<3\)`,`JMP\s\(R.*\)$`
        switch x {
        case "a":
                return 1