From: Cherry Zhang Date: Sat, 29 Oct 2016 03:11:04 +0000 (-0400) Subject: cmd/compile: make a copy of Phi input if it is still live X-Git-Tag: go1.8beta1~105 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=348275cda626edf60e1d11a83b2e78d83088ccef;p=gostls13.git cmd/compile: make a copy of Phi input if it is still live Register of Phi input is allocated to the Phi. So if the Phi input is still live after Phi, we may need to use a spill. In this case, copy the Phi input to a spare register to avoid a spill. Originally targeted the code in issue #16187, and this CL indeed removes the spill, but doesn't seem to help on benchmark result. It may help in general, though. On AMD64: name old time/op new time/op delta BinaryTree17-12 2.79s ± 1% 2.76s ± 0% -1.33% (p=0.000 n=10+10) Fannkuch11-12 3.02s ± 0% 3.14s ± 0% +3.99% (p=0.000 n=10+10) FmtFprintfEmpty-12 51.2ns ± 0% 51.4ns ± 3% ~ (p=0.368 n=8+10) FmtFprintfString-12 145ns ± 0% 144ns ± 0% -0.69% (p=0.000 n=6+9) FmtFprintfInt-12 127ns ± 1% 124ns ± 1% -2.79% (p=0.000 n=10+9) FmtFprintfIntInt-12 186ns ± 0% 184ns ± 0% -1.34% (p=0.000 n=10+9) FmtFprintfPrefixedInt-12 196ns ± 0% 194ns ± 0% -0.97% (p=0.000 n=9+9) FmtFprintfFloat-12 293ns ± 2% 287ns ± 0% -2.00% (p=0.000 n=10+9) FmtManyArgs-12 847ns ± 1% 829ns ± 0% -2.17% (p=0.000 n=10+7) GobDecode-12 7.17ms ± 0% 7.18ms ± 0% ~ (p=0.123 n=10+10) GobEncode-12 6.08ms ± 1% 6.08ms ± 0% ~ (p=0.497 n=10+9) Gzip-12 277ms ± 1% 275ms ± 1% -0.47% (p=0.028 n=10+9) Gunzip-12 39.1ms ± 2% 38.2ms ± 1% -2.20% (p=0.000 n=10+9) HTTPClientServer-12 90.9µs ± 4% 87.7µs ± 2% -3.51% (p=0.001 n=9+10) JSONEncode-12 17.3ms ± 1% 16.5ms ± 0% -5.02% (p=0.000 n=9+9) JSONDecode-12 54.6ms ± 1% 54.1ms ± 0% -0.99% (p=0.000 n=9+9) Mandelbrot200-12 4.45ms ± 0% 4.45ms ± 0% -0.02% (p=0.006 n=8+9) GoParse-12 3.44ms ± 0% 3.48ms ± 1% +0.95% (p=0.000 n=10+10) RegexpMatchEasy0_32-12 84.9ns ± 0% 85.0ns ± 0% ~ (p=0.241 n=8+8) RegexpMatchEasy0_1K-12 867ns ± 3% 915ns ±11% +5.55% (p=0.037 n=10+10) RegexpMatchEasy1_32-12 82.7ns ± 5% 83.9ns ± 4% ~ (p=0.161 n=9+10) RegexpMatchEasy1_1K-12 361ns ± 1% 363ns ± 0% ~ (p=0.098 n=10+8) RegexpMatchMedium_32-12 126ns ± 0% 126ns ± 1% ~ (p=0.549 n=8+10) RegexpMatchMedium_1K-12 38.8µs ± 0% 39.1µs ± 0% +0.67% (p=0.000 n=9+8) RegexpMatchHard_32-12 1.95µs ± 0% 1.96µs ± 0% +0.43% (p=0.000 n=9+9) RegexpMatchHard_1K-12 59.0µs ± 0% 59.1µs ± 0% +0.27% (p=0.000 n=10+9) Revcomp-12 436ms ± 1% 431ms ± 1% -1.19% (p=0.005 n=10+10) Template-12 56.7ms ± 1% 57.1ms ± 1% +0.71% (p=0.001 n=10+9) TimeParse-12 312ns ± 0% 310ns ± 0% -0.80% (p=0.000 n=10+9) TimeFormat-12 336ns ± 0% 332ns ± 0% -1.19% (p=0.000 n=8+7) [Geo mean] 59.2µs 58.9µs -0.42% On PPC64: name old time/op new time/op delta BinaryTree17-2 4.67s ± 2% 4.71s ± 1% ~ (p=0.421 n=5+5) Fannkuch11-2 3.92s ± 1% 3.94s ± 0% +0.46% (p=0.032 n=5+5) FmtFprintfEmpty-2 122ns ± 0% 120ns ± 2% -1.80% (p=0.016 n=4+5) FmtFprintfString-2 305ns ± 1% 299ns ± 1% -1.84% (p=0.008 n=5+5) FmtFprintfInt-2 243ns ± 0% 241ns ± 1% -0.66% (p=0.016 n=4+5) FmtFprintfIntInt-2 361ns ± 1% 356ns ± 1% -1.49% (p=0.016 n=5+5) FmtFprintfPrefixedInt-2 355ns ± 1% 357ns ± 1% ~ (p=0.333 n=5+5) FmtFprintfFloat-2 502ns ± 2% 498ns ± 1% ~ (p=0.151 n=5+5) FmtManyArgs-2 1.55µs ± 2% 1.59µs ± 1% +2.52% (p=0.008 n=5+5) GobDecode-2 13.0ms ± 1% 13.0ms ± 1% ~ (p=0.841 n=5+5) GobEncode-2 11.8ms ± 1% 11.8ms ± 1% ~ (p=0.690 n=5+5) Gzip-2 499ms ± 1% 503ms ± 0% ~ (p=0.421 n=5+5) Gunzip-2 86.5ms ± 0% 86.4ms ± 1% ~ (p=0.841 n=5+5) HTTPClientServer-2 68.2µs ± 2% 69.6µs ± 3% ~ (p=0.151 n=5+5) JSONEncode-2 39.0ms ± 1% 37.2ms ± 1% -4.65% (p=0.008 n=5+5) JSONDecode-2 122ms ± 1% 126ms ± 1% +2.63% (p=0.008 n=5+5) Mandelbrot200-2 6.08ms ± 1% 5.89ms ± 1% -3.06% (p=0.008 n=5+5) GoParse-2 5.95ms ± 2% 5.98ms ± 1% ~ (p=0.421 n=5+5) RegexpMatchEasy0_32-2 331ns ± 1% 328ns ± 1% ~ (p=0.056 n=5+5) RegexpMatchEasy0_1K-2 1.45µs ± 0% 1.47µs ± 0% +1.13% (p=0.008 n=5+5) RegexpMatchEasy1_32-2 359ns ± 0% 353ns ± 0% -1.84% (p=0.008 n=5+5) RegexpMatchEasy1_1K-2 1.79µs ± 0% 1.81µs ± 1% +1.16% (p=0.008 n=5+5) RegexpMatchMedium_32-2 420ns ± 2% 413ns ± 0% -1.72% (p=0.008 n=5+5) RegexpMatchMedium_1K-2 70.2µs ± 1% 69.5µs ± 1% -1.09% (p=0.032 n=5+5) RegexpMatchHard_32-2 3.87µs ± 1% 3.65µs ± 0% -5.86% (p=0.008 n=5+5) RegexpMatchHard_1K-2 111µs ± 0% 105µs ± 0% -5.49% (p=0.016 n=5+4) Revcomp-2 1.00s ± 1% 1.01s ± 2% ~ (p=0.151 n=5+5) Template-2 113ms ± 1% 113ms ± 2% ~ (p=0.841 n=5+5) TimeParse-2 555ns ± 0% 550ns ± 1% -0.87% (p=0.032 n=5+5) TimeFormat-2 736ns ± 1% 704ns ± 1% -4.35% (p=0.008 n=5+5) [Geo mean] 120µs 119µs -0.77% Reduce "spilled value remains" by 0.6% in cmd/go on AMD64. Change-Id: If655df343b0f30d1a49ab1ab644f10c698b96f3e Reviewed-on: https://go-review.googlesource.com/32442 Run-TryBot: Cherry Zhang TryBot-Result: Gobot Gobot Reviewed-by: Keith Randall --- diff --git a/src/cmd/compile/internal/ssa/regalloc.go b/src/cmd/compile/internal/ssa/regalloc.go index dc157aed5e..2b66982340 100644 --- a/src/cmd/compile/internal/ssa/regalloc.go +++ b/src/cmd/compile/internal/ssa/regalloc.go @@ -869,7 +869,6 @@ func (s *regAllocState) regalloc(f *Func) { m := s.values[a.ID].regs &^ phiUsed & s.allocatable if m != 0 { r := pickReg(m) - s.freeReg(r) phiUsed |= regMask(1) << r phiRegs = append(phiRegs, r) } else { @@ -878,7 +877,7 @@ func (s *regAllocState) regalloc(f *Func) { } // Second pass - deallocate any phi inputs which are now dead. - for _, v := range phis { + for i, v := range phis { if !s.values[v.ID].needReg { continue } @@ -887,6 +886,31 @@ func (s *regAllocState) regalloc(f *Func) { // Input is dead beyond the phi, deallocate // anywhere else it might live. s.freeRegs(s.values[a.ID].regs) + } else { + // Input is still live. + // Try to move it around before kicking out, if there is a free register. + // We generate a Copy in the predecessor block and record it. It will be + // deleted if never used. + r := phiRegs[i] + if r == noRegister { + continue + } + // Pick a free register. At this point some registers used in the predecessor + // block may have been deallocated. Those are the ones used for Phis. Exclude + // them (and they are not going to be helpful anyway). + m := s.compatRegs(a.Type) &^ s.used &^ phiUsed + if m != 0 && !s.values[a.ID].rematerializeable && countRegs(s.values[a.ID].regs) == 1 { + r2 := pickReg(m) + c := p.NewValue1(a.Line, OpCopy, a.Type, s.regs[r].c) + s.copies[c] = false + if s.f.pass.debug > regDebug { + fmt.Printf("copy %s to %s : %s\n", a, c, s.registers[r2].Name()) + } + s.setOrig(c, a) + s.assignReg(r2, a, c) + s.endRegs[p.ID] = append(s.endRegs[p.ID], endReg{r2, a, c}) + } + s.freeReg(r) } }