From: Ilya Tocar Date: Wed, 19 Oct 2016 17:21:42 +0000 (+0300) Subject: cmd/compile/internal/amd64: break dependency for CVTS[LQ]2S[DS] X-Git-Tag: go1.8beta1~743 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=8589049d5c1cc1ff9856defb6d9097428ad09b82;p=gostls13.git cmd/compile/internal/amd64: break dependency for CVTS[LQ]2S[DS] CVTSL2SS, CVTSQ2SS, CVTSL2SD, CVTSQ2SD preserve upper part of xmm register, introducing false dependency on a previous value. Break it by xoring destination with itself. Increases size of go executable by 320 bytes, but shows nice improvement on go1. Also fixes performance degradation introduced by 1.7. name old time/op new time/op delta BinaryTree17-4 2.20s ± 1% 2.19s ± 0% -0.36% (p=0.000 n=18+16) Fannkuch11-4 2.44s ± 1% 2.45s ± 2% +0.47% (p=0.030 n=20+20) FmtFprintfEmpty-4 40.9ns ± 7% 40.5ns ± 1% ~ (p=0.531 n=20+16) FmtFprintfString-4 111ns ± 2% 111ns ± 1% ~ (p=0.510 n=18+19) FmtFprintfInt-4 98.3ns ± 3% 99.3ns ± 1% +1.01% (p=0.003 n=20+18) FmtFprintfIntInt-4 148ns ± 3% 147ns ± 1% ~ (p=0.919 n=20+17) FmtFprintfPrefixedInt-4 149ns ± 1% 152ns ± 0% +1.73% (p=0.000 n=19+17) FmtFprintfFloat-4 231ns ± 0% 231ns ± 1% ~ (p=0.678 n=18+19) FmtManyArgs-4 667ns ± 1% 672ns ± 1% +0.73% (p=0.005 n=20+20) GobDecode-4 5.60ms ± 0% 5.61ms ± 0% +0.24% (p=0.000 n=20+20) GobEncode-4 4.74ms ± 0% 4.73ms ± 1% -0.20% (p=0.002 n=20+20) Gzip-4 199ms ± 0% 199ms ± 1% +0.35% (p=0.000 n=19+20) Gunzip-4 31.8ms ± 1% 31.5ms ± 1% -0.89% (p=0.000 n=20+20) HTTPClientServer-4 38.1µs ± 1% 38.0µs ± 1% ~ (p=0.117 n=19+18) JSONEncode-4 14.2ms ± 1% 13.4ms ± 0% -5.73% (p=0.000 n=20+20) JSONDecode-4 42.7ms ± 0% 42.7ms ± 1% +0.18% (p=0.019 n=18+19) Mandelbrot200-4 3.26ms ± 0% 2.99ms ± 0% -8.38% (p=0.000 n=19+19) GoParse-4 2.76ms ± 1% 2.76ms ± 1% ~ (p=0.583 n=20+20) RegexpMatchEasy0_32-4 69.5ns ± 0% 69.6ns ± 0% +0.10% (p=0.017 n=16+17) RegexpMatchEasy0_1K-4 703ns ± 0% 708ns ± 3% +0.65% (p=0.000 n=17+18) RegexpMatchEasy1_32-4 68.2ns ± 1% 68.2ns ± 2% ~ (p=0.094 n=18+20) RegexpMatchEasy1_1K-4 288ns ± 1% 288ns ± 0% ~ (p=0.403 n=17+18) RegexpMatchMedium_32-4 104ns ± 2% 103ns ± 1% ~ (p=0.110 n=20+16) RegexpMatchMedium_1K-4 31.7µs ± 3% 31.7µs ± 3% ~ (p=0.091 n=19+20) RegexpMatchHard_32-4 1.59µs ± 2% 1.58µs ± 2% ~ (p=0.083 n=20+20) RegexpMatchHard_1K-4 48.1µs ± 3% 47.9µs ± 2% ~ (p=0.461 n=20+19) Revcomp-4 344ms ± 0% 345ms ± 0% +0.08% (p=0.009 n=18+17) Template-4 44.8ms ± 1% 44.7ms ± 1% ~ (p=0.277 n=20+20) TimeParse-4 258ns ± 0% 258ns ± 0% ~ (all samples are equal) TimeFormat-4 275ns ± 0% 273ns ± 0% -0.64% (p=0.000 n=20+18) name old speed new speed delta GobDecode-4 137MB/s ± 0% 137MB/s ± 0% -0.24% (p=0.000 n=20+20) GobEncode-4 162MB/s ± 0% 162MB/s ± 0% +0.20% (p=0.002 n=20+20) Gzip-4 97.6MB/s ± 0% 97.3MB/s ± 1% -0.35% (p=0.000 n=19+20) Gunzip-4 610MB/s ± 1% 615MB/s ± 1% +0.89% (p=0.000 n=20+20) JSONEncode-4 136MB/s ± 1% 145MB/s ± 0% +6.08% (p=0.000 n=20+20) JSONDecode-4 45.5MB/s ± 0% 45.4MB/s ± 1% -0.17% (p=0.017 n=18+19) GoParse-4 21.0MB/s ± 1% 21.0MB/s ± 1% ~ (p=0.578 n=20+20) RegexpMatchEasy0_32-4 460MB/s ± 0% 460MB/s ± 0% -0.09% (p=0.031 n=16+17) RegexpMatchEasy0_1K-4 1.46GB/s ± 0% 1.45GB/s ± 3% -0.64% (p=0.000 n=17+18) RegexpMatchEasy1_32-4 469MB/s ± 0% 469MB/s ± 2% +0.06% (p=0.043 n=18+20) RegexpMatchEasy1_1K-4 3.55GB/s ± 1% 3.55GB/s ± 0% ~ (p=0.057 n=17+18) RegexpMatchMedium_32-4 9.61MB/s ± 2% 9.64MB/s ± 2% ~ (p=0.856 n=20+20) RegexpMatchMedium_1K-4 32.3MB/s ± 3% 32.3MB/s ± 3% ~ (p=0.085 n=19+20) RegexpMatchHard_32-4 20.1MB/s ± 2% 20.2MB/s ± 2% ~ (p=0.086 n=20+20) RegexpMatchHard_1K-4 21.3MB/s ± 3% 21.4MB/s ± 2% ~ (p=0.578 n=20+20) Revcomp-4 738MB/s ± 0% 737MB/s ± 0% -0.08% (p=0.009 n=18+17) Template-4 43.3MB/s ± 1% 43.4MB/s ± 1% ~ (p=0.274 n=20+20) Fixes #16982 Change-Id: If574d66f39f4183a9b1d5ffff0339909cc73f59d Reviewed-on: https://go-review.googlesource.com/31490 Run-TryBot: Ilya Tocar TryBot-Result: Gobot Gobot Reviewed-by: Keith Randall --- diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go index 54c0c0fb5a..3c997f20ae 100644 --- a/src/cmd/compile/internal/amd64/ssa.go +++ b/src/cmd/compile/internal/amd64/ssa.go @@ -637,10 +637,14 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p.To.Index = i gc.AddAux2(&p.To, v, sc.Off()) case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX, - ssa.OpAMD64CVTSL2SS, ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ, ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS: opregreg(v.Op.Asm(), v.Reg(), v.Args[0].Reg()) + case ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSL2SS: + r := v.Reg() + // Break false dependency on destination register. + opregreg(x86.AXORPS, r, r) + opregreg(v.Op.Asm(), r, v.Args[0].Reg()) case ssa.OpAMD64DUFFZERO: off := duffStart(v.AuxInt) adj := duffAdj(v.AuxInt)