From 8a5175df35d20aa97afcbea63e86ba14ecafdc88 Mon Sep 17 00:00:00 2001 From: Cherry Zhang Date: Wed, 22 Mar 2017 21:34:12 -0400 Subject: [PATCH] cmd/compile: improve startRegs calculation MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit In register allocation, we calculate what values are used in and after the current block. If a value is used only after a function call, since registers are clobbered in call, we don't need to mark the value live at the entrance of the block. Before this CL it is considered live, and unnecessary copy or load may be generated when resolving merge edge. Fixes #14761. On AMD64: name old time/op new time/op delta BinaryTree17-12 2.84s ± 1% 2.81s ± 1% -1.06% (p=0.000 n=10+9) Fannkuch11-12 3.61s ± 0% 3.55s ± 1% -1.77% (p=0.000 n=10+9) FmtFprintfEmpty-12 50.4ns ± 4% 50.0ns ± 1% ~ (p=0.785 n=9+8) FmtFprintfString-12 80.0ns ± 3% 78.2ns ± 3% -2.35% (p=0.004 n=10+9) FmtFprintfInt-12 81.3ns ± 4% 81.8ns ± 2% ~ (p=0.159 n=10+10) FmtFprintfIntInt-12 120ns ± 4% 118ns ± 2% ~ (p=0.218 n=10+10) FmtFprintfPrefixedInt-12 152ns ± 3% 155ns ± 2% +2.11% (p=0.026 n=10+10) FmtFprintfFloat-12 240ns ± 1% 238ns ± 1% -0.79% (p=0.005 n=9+9) FmtManyArgs-12 504ns ± 1% 510ns ± 1% +1.14% (p=0.000 n=8+9) GobDecode-12 7.00ms ± 1% 6.99ms ± 0% ~ (p=0.497 n=9+10) GobEncode-12 5.47ms ± 1% 5.48ms ± 1% ~ (p=0.218 n=10+10) Gzip-12 258ms ± 2% 256ms ± 1% -0.96% (p=0.043 n=10+9) Gunzip-12 38.6ms ± 0% 38.3ms ± 0% -0.64% (p=0.000 n=9+8) HTTPClientServer-12 90.4µs ± 3% 87.2µs ±11% ~ (p=0.053 n=9+10) JSONEncode-12 15.6ms ± 0% 15.6ms ± 1% ~ (p=0.077 n=9+9) JSONDecode-12 55.1ms ± 1% 54.6ms ± 1% -0.85% (p=0.010 n=10+9) Mandelbrot200-12 4.49ms ± 0% 4.47ms ± 0% -0.25% (p=0.000 n=10+8) GoParse-12 3.38ms ± 0% 3.37ms ± 1% ~ (p=0.315 n=8+10) RegexpMatchEasy0_32-12 82.5ns ± 4% 82.0ns ± 0% ~ (p=0.164 n=10+8) RegexpMatchEasy0_1K-12 203ns ± 1% 202ns ± 1% -0.85% (p=0.000 n=9+10) RegexpMatchEasy1_32-12 82.3ns ± 1% 81.1ns ± 0% -1.39% (p=0.000 n=10+8) RegexpMatchEasy1_1K-12 357ns ± 1% 357ns ± 1% ~ (p=0.697 n=8+9) RegexpMatchMedium_32-12 125ns ± 2% 126ns ± 2% ~ (p=0.197 n=10+10) RegexpMatchMedium_1K-12 39.6µs ± 3% 39.6µs ± 1% ~ (p=0.971 n=10+10) RegexpMatchHard_32-12 1.99µs ± 2% 1.99µs ± 4% ~ (p=0.891 n=10+9) RegexpMatchHard_1K-12 60.1µs ± 3% 60.4µs ± 3% ~ (p=0.684 n=10+10) Revcomp-12 531ms ± 6% 441ms ± 0% -16.94% (p=0.000 n=10+9) Template-12 58.9ms ± 1% 58.7ms ± 1% ~ (p=0.315 n=10+10) TimeParse-12 319ns ± 1% 320ns ± 4% ~ (p=0.215 n=9+9) TimeFormat-12 345ns ± 0% 333ns ± 1% -3.36% (p=0.000 n=9+10) [Geo mean] 52.2µs 51.6µs -1.13% On ARM64: name old time/op new time/op delta BinaryTree17-8 8.53s ± 0% 8.36s ± 0% -1.89% (p=0.000 n=10+10) Fannkuch11-8 6.15s ± 0% 6.10s ± 0% -0.67% (p=0.000 n=10+10) FmtFprintfEmpty-8 117ns ± 0% 117ns ± 0% ~ (all equal) FmtFprintfString-8 192ns ± 0% 192ns ± 0% ~ (all equal) FmtFprintfInt-8 198ns ± 0% 198ns ± 0% ~ (p=0.211 n=10+10) FmtFprintfIntInt-8 289ns ± 0% 291ns ± 0% +0.59% (p=0.000 n=7+10) FmtFprintfPrefixedInt-8 320ns ± 2% 317ns ± 0% ~ (p=0.431 n=10+8) FmtFprintfFloat-8 538ns ± 0% 538ns ± 0% ~ (all equal) FmtManyArgs-8 1.17µs ± 1% 1.18µs ± 1% ~ (p=0.063 n=10+10) GobDecode-8 17.0ms ± 1% 17.2ms ± 1% +0.83% (p=0.000 n=10+10) GobEncode-8 14.2ms ± 0% 14.1ms ± 1% -0.78% (p=0.001 n=9+10) Gzip-8 806ms ± 0% 797ms ± 0% -1.12% (p=0.000 n=6+9) Gunzip-8 131ms ± 0% 130ms ± 0% -0.51% (p=0.000 n=10+9) HTTPClientServer-8 206µs ± 9% 212µs ± 2% ~ (p=0.829 n=10+8) JSONEncode-8 40.1ms ± 0% 40.1ms ± 0% ~ (p=0.136 n=9+9) JSONDecode-8 157ms ± 0% 151ms ± 0% -3.32% (p=0.000 n=9+9) Mandelbrot200-8 10.1ms ± 0% 10.1ms ± 0% -0.05% (p=0.000 n=9+8) GoParse-8 8.43ms ± 0% 8.43ms ± 0% ~ (p=0.912 n=10+10) RegexpMatchEasy0_32-8 228ns ± 1% 227ns ± 0% -0.26% (p=0.026 n=10+9) RegexpMatchEasy0_1K-8 1.92µs ± 0% 1.63µs ± 0% -15.18% (p=0.001 n=7+7) RegexpMatchEasy1_32-8 258ns ± 1% 250ns ± 0% -2.83% (p=0.000 n=10+10) RegexpMatchEasy1_1K-8 2.39µs ± 0% 2.13µs ± 0% -10.94% (p=0.000 n=9+9) RegexpMatchMedium_32-8 352ns ± 0% 351ns ± 0% -0.29% (p=0.004 n=9+10) RegexpMatchMedium_1K-8 104µs ± 0% 105µs ± 0% +0.58% (p=0.000 n=8+9) RegexpMatchHard_32-8 5.84µs ± 0% 5.82µs ± 0% -0.27% (p=0.000 n=9+10) RegexpMatchHard_1K-8 177µs ± 0% 177µs ± 0% -0.07% (p=0.000 n=9+9) Revcomp-8 1.57s ± 1% 1.50s ± 1% -4.60% (p=0.000 n=9+10) Template-8 157ms ± 1% 153ms ± 1% -2.28% (p=0.000 n=10+9) TimeParse-8 779ns ± 1% 770ns ± 1% -1.18% (p=0.013 n=10+10) TimeFormat-8 823ns ± 2% 826ns ± 1% ~ (p=0.324 n=10+9) [Geo mean] 144µs 142µs -1.45% Reduce cmd/go text size by 0.5%. Change-Id: I9288ff983c4a7cf03fc0cb35b9b1750828013117 Reviewed-on: https://go-review.googlesource.com/38457 Run-TryBot: Cherry Zhang TryBot-Result: Gobot Gobot Reviewed-by: David Chase Reviewed-by: Keith Randall --- src/cmd/compile/internal/ssa/regalloc.go | 52 ++++++++++++++++++------ 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go index f934edfcac..7361e1392b 100644 --- a/src/cmd/compile/internal/ssa/regalloc.go +++ b/src/cmd/compile/internal/ssa/regalloc.go @@ -241,6 +241,9 @@ type regAllocState struct { // current state of each (preregalloc) Value values []valState + // ID of SP, SB values + sp, sb ID + // For each Value, map from its value ID back to the // preregalloc Value it was derived from. orig []*Value @@ -709,8 +712,8 @@ func (s *regAllocState) compatRegs(t Type) regMask { } func (s *regAllocState) regalloc(f *Func) { - liveSet := f.newSparseSet(f.NumValues()) - defer f.retSparseSet(liveSet) + regValLiveSet := f.newSparseSet(f.NumValues()) // set of values that may be live in register + defer f.retSparseSet(regValLiveSet) var oldSched []*Value var phis []*Value var phiRegs []register @@ -733,32 +736,42 @@ func (s *regAllocState) regalloc(f *Func) { for _, b := range f.Blocks { s.curBlock = b - // Initialize liveSet and uses fields for this block. + // Initialize regValLiveSet and uses fields for this block. // Walk backwards through the block doing liveness analysis. - liveSet.clear() + regValLiveSet.clear() for _, e := range s.live[b.ID] { s.addUse(e.ID, int32(len(b.Values))+e.dist, e.pos) // pseudo-uses from beyond end of block - liveSet.add(e.ID) + regValLiveSet.add(e.ID) } if v := b.Control; v != nil && s.values[v.ID].needReg { s.addUse(v.ID, int32(len(b.Values)), b.Pos) // pseudo-use by control value - liveSet.add(v.ID) + regValLiveSet.add(v.ID) } for i := len(b.Values) - 1; i >= 0; i-- { v := b.Values[i] - liveSet.remove(v.ID) + regValLiveSet.remove(v.ID) if v.Op == OpPhi { // Remove v from the live set, but don't add // any inputs. This is the state the len(b.Preds)>1 // case below desires; it wants to process phis specially. continue } + if opcodeTable[v.Op].call { + // Function call clobbers all the registers but SP and SB. + regValLiveSet.clear() + if s.sp != 0 && s.values[s.sp].uses != nil { + regValLiveSet.add(s.sp) + } + if s.sb != 0 && s.values[s.sb].uses != nil { + regValLiveSet.add(s.sb) + } + } for _, a := range v.Args { if !s.values[a.ID].needReg { continue } s.addUse(a.ID, int32(i), v.Pos) - liveSet.add(a.ID) + regValLiveSet.add(a.ID) } } if s.f.pass.debug > regDebug { @@ -808,7 +821,7 @@ func (s *regAllocState) regalloc(f *Func) { // live but only used by some other successor of p. for r := register(0); r < s.numRegs; r++ { v := s.regs[r].v - if v != nil && !liveSet.contains(v.ID) { + if v != nil && !regValLiveSet.contains(v.ID) { s.freeReg(r) } } @@ -864,7 +877,7 @@ func (s *regAllocState) regalloc(f *Func) { continue } a := v.Args[idx] - if !liveSet.contains(a.ID) { + if !regValLiveSet.contains(a.ID) { // Input is dead beyond the phi, deallocate // anywhere else it might live. s.freeRegs(s.values[a.ID].regs) @@ -932,6 +945,17 @@ func (s *regAllocState) regalloc(f *Func) { s.assignReg(r, v, v) } + // Deallocate any values which are no longer live. Phis are excluded. + for r := register(0); r < s.numRegs; r++ { + if phiUsed>>r&1 != 0 { + continue + } + v := s.regs[r].v + if v != nil && !regValLiveSet.contains(v.ID) { + s.freeReg(r) + } + } + // Save the starting state for use by merge edges. var regList []startReg for r := register(0); r < s.numRegs; r++ { @@ -1034,12 +1058,14 @@ func (s *regAllocState) regalloc(f *Func) { s.assignReg(s.SPReg, v, v) b.Values = append(b.Values, v) s.advanceUses(v) + s.sp = v.ID continue } if v.Op == OpSB { s.assignReg(s.SBReg, v, v) b.Values = append(b.Values, v) s.advanceUses(v) + s.sb = v.ID continue } if v.Op == OpSelect0 || v.Op == OpSelect1 { @@ -1435,16 +1461,16 @@ func (s *regAllocState) regalloc(f *Func) { s.endRegs[b.ID] = regList if checkEnabled { - liveSet.clear() + regValLiveSet.clear() for _, x := range s.live[b.ID] { - liveSet.add(x.ID) + regValLiveSet.add(x.ID) } for r := register(0); r < s.numRegs; r++ { v := s.regs[r].v if v == nil { continue } - if !liveSet.contains(v.ID) { + if !regValLiveSet.contains(v.ID) { s.f.Fatalf("val %s is in reg but not live at end of %s", v, b) } } -- 2.48.1