From cf77dd37e7e7ae8eefe8c21a9ff3f04de989b808 Mon Sep 17 00:00:00 2001 From: =?utf8?q?R=C3=A9my=20Oudompheng?= Date: Thu, 3 Jan 2013 00:44:31 +0100 Subject: [PATCH] cmd/8g: extend elimination of temporaries to SSE2 code. Before: (erf.go:188) TEXT Erf+0(SB),$220 (erf.go:265) TEXT Erfc+0(SB),$204 (lgamma.go:174) TEXT Lgamma+0(SB),$948 After: (erf.go:188) TEXT Erf+0(SB),$84 (erf.go:265) TEXT Erfc+0(SB),$84 (lgamma.go:174) TEXT Lgamma+0(SB),$44 SSE before vs. SSE after: benchmark old ns/op new ns/op delta BenchmarkAcosh 81 49 -39.14% BenchmarkAsinh 109 109 +0.00% BenchmarkAtanh 73 74 +0.68% BenchmarkLgamma 138 42 -69.20% BenchmarkModf 24 15 -36.95% BenchmarkSqrtGo 565 556 -1.59% R=rsc CC=golang-dev https://golang.org/cl/7028048 --- src/cmd/8g/reg.c | 43 ++++++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/src/cmd/8g/reg.c b/src/cmd/8g/reg.c index 2ae819548b..80230bc866 100644 --- a/src/cmd/8g/reg.c +++ b/src/cmd/8g/reg.c @@ -1845,26 +1845,33 @@ fixtemp(Prog *firstp) for(p=firstp; p!=P; p=p->link) { if(debug['R'] && debug['v']) print("%P\n", p); - if(p->link == P - || !RtoB(p->from.type) - || p->to.type != D_AUTO - || isfloat[p->to.etype]) + if(p->link == P || p->to.type != D_AUTO) continue; - switch(p->as) { - case AMOVB: - if(p->to.width == 1) - break; - case AMOVW: - if(p->to.width == 2) + if(isfloat[p->to.etype] && FtoB(p->from.type)) { + switch(p->as) { + case AMOVSS: + case AMOVSD: break; - case AMOVL: - if(p->to.width == 4) - break; - default: + default: + continue; + } + } else if(!isfloat[p->to.etype] && RtoB(p->from.type)) { + switch(p->as) { + case AMOVB: + if(p->to.width == 1) + break; + case AMOVW: + if(p->to.width == 2) + break; + case AMOVL: + if(p->to.width == 4) + break; + default: + continue; + } + } else continue; - } // p is a MOV reg, mem. - // and it is not a float. p2 = p->link; h = hash32to16(fnv1(p->to.sym)); if(counts[h] != 2) { @@ -1872,7 +1879,9 @@ fixtemp(Prog *firstp) } switch(p2->as) { case ALEAL: - case AFMOVL: + case AFMOVD: + case AFMOVF: + case AFMOVL: case AFMOVW: case AFMOVV: // funny -- 2.48.1