From: Shenghou Ma Date: Tue, 7 Aug 2012 15:45:50 +0000 (+0800) Subject: runtime: inline several float64 routines to speed up complex128 division X-Git-Tag: go1.1rc2~2681 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=0157c72d133471631c13419f61117b75dcd7c255;p=gostls13.git runtime: inline several float64 routines to speed up complex128 division Depends on CL 6197045. Result obtained on Core i7 620M, Darwin/amd64: benchmark old ns/op new ns/op delta BenchmarkComplex128DivNormal 57 28 -50.78% BenchmarkComplex128DivNisNaN 49 15 -68.90% BenchmarkComplex128DivDisNaN 49 15 -67.88% BenchmarkComplex128DivNisInf 40 12 -68.50% BenchmarkComplex128DivDisInf 33 13 -61.06% Result obtained on Core i7 620M, Darwin/386: benchmark old ns/op new ns/op delta BenchmarkComplex128DivNormal 89 50 -44.05% BenchmarkComplex128DivNisNaN 307 802 +161.24% BenchmarkComplex128DivDisNaN 309 788 +155.02% BenchmarkComplex128DivNisInf 278 237 -14.75% BenchmarkComplex128DivDisInf 46 22 -52.46% Result obtained on 700MHz OMAP4460, Linux/ARM: benchmark old ns/op new ns/op delta BenchmarkComplex128DivNormal 1557 465 -70.13% BenchmarkComplex128DivNisNaN 1443 220 -84.75% BenchmarkComplex128DivDisNaN 1481 218 -85.28% BenchmarkComplex128DivNisInf 952 216 -77.31% BenchmarkComplex128DivDisInf 861 231 -73.17% The 386 version has a performance regression, but as we have decided to use SSE2 instead of x87 FPU for 386 too (issue 3912), I won't address this issue. R=dsymonds, mchaten, iant, dave, mtj, rsc, r CC=golang-dev https://golang.org/cl/6024045 --- diff --git a/src/pkg/runtime/complex.c b/src/pkg/runtime/complex.c index eeb9439405..395e70fe34 100644 --- a/src/pkg/runtime/complex.c +++ b/src/pkg/runtime/complex.c @@ -13,28 +13,30 @@ runtime·complex128div(Complex128 n, Complex128 d, Complex128 q) float64 a, b, ratio, denom; // Special cases as in C99. - ninf = runtime·isInf(n.real, 0) || runtime·isInf(n.imag, 0); - dinf = runtime·isInf(d.real, 0) || runtime·isInf(d.imag, 0); + ninf = n.real == runtime·posinf || n.real == runtime·neginf || + n.imag == runtime·posinf || n.imag == runtime·neginf; + dinf = d.real == runtime·posinf || d.real == runtime·neginf || + d.imag == runtime·posinf || d.imag == runtime·neginf; - nnan = !ninf && (runtime·isNaN(n.real) || runtime·isNaN(n.imag)); - dnan = !dinf && (runtime·isNaN(d.real) || runtime·isNaN(d.imag)); + nnan = !ninf && (ISNAN(n.real) || ISNAN(n.imag)); + dnan = !dinf && (ISNAN(d.real) || ISNAN(d.imag)); if(nnan || dnan) { - q.real = runtime·NaN(); - q.imag = runtime·NaN(); - } else if(ninf && !dinf && !dnan) { - q.real = runtime·Inf(0); - q.imag = runtime·Inf(0); - } else if(!ninf && !nnan && dinf) { + q.real = runtime·nan; + q.imag = runtime·nan; + } else if(ninf && !dinf) { + q.real = runtime·posinf; + q.imag = runtime·posinf; + } else if(!ninf && dinf) { q.real = 0; q.imag = 0; } else if(d.real == 0 && d.imag == 0) { if(n.real == 0 && n.imag == 0) { - q.real = runtime·NaN(); - q.imag = runtime·NaN(); + q.real = runtime·nan; + q.imag = runtime·nan; } else { - q.real = runtime·Inf(0); - q.imag = runtime·Inf(0); + q.real = runtime·posinf; + q.imag = runtime·posinf; } } else { // Standard complex arithmetic, factored to avoid unnecessary overflow. diff --git a/src/pkg/runtime/float.c b/src/pkg/runtime/float.c index 4d9f125977..42082e4347 100644 --- a/src/pkg/runtime/float.c +++ b/src/pkg/runtime/float.c @@ -4,170 +4,7 @@ #include "runtime.h" -static uint64 uvnan = 0x7FF8000000000001ULL; -static uint64 uvinf = 0x7FF0000000000000ULL; -static uint64 uvneginf = 0xFFF0000000000000ULL; - -uint32 -runtime·float32tobits(float32 f) -{ - // The obvious cast-and-pointer code is technically - // not valid, and gcc miscompiles it. Use a union instead. - union { - float32 f; - uint32 i; - } u; - u.f = f; - return u.i; -} - -uint64 -runtime·float64tobits(float64 f) -{ - // The obvious cast-and-pointer code is technically - // not valid, and gcc miscompiles it. Use a union instead. - union { - float64 f; - uint64 i; - } u; - u.f = f; - return u.i; -} - -float64 -runtime·float64frombits(uint64 i) -{ - // The obvious cast-and-pointer code is technically - // not valid, and gcc miscompiles it. Use a union instead. - union { - float64 f; - uint64 i; - } u; - u.i = i; - return u.f; -} - -float32 -runtime·float32frombits(uint32 i) -{ - // The obvious cast-and-pointer code is technically - // not valid, and gcc miscompiles it. Use a union instead. - union { - float32 f; - uint32 i; - } u; - u.i = i; - return u.f; -} - -bool -runtime·isInf(float64 f, int32 sign) -{ - uint64 x; - - x = runtime·float64tobits(f); - if(sign == 0) - return x == uvinf || x == uvneginf; - if(sign > 0) - return x == uvinf; - return x == uvneginf; -} - -float64 -runtime·NaN(void) -{ - return runtime·float64frombits(uvnan); -} - -bool -runtime·isNaN(float64 f) -{ - uint64 x; - - x = runtime·float64tobits(f); - return ((uint32)(x>>52) & 0x7FF) == 0x7FF && !runtime·isInf(f, 0); -} - -float64 -runtime·Inf(int32 sign) -{ - if(sign >= 0) - return runtime·float64frombits(uvinf); - else - return runtime·float64frombits(uvneginf); -} - -enum -{ - MASK = 0x7ffL, - SHIFT = 64-11-1, - BIAS = 1022L, -}; - -float64 -runtime·frexp(float64 d, int32 *ep) -{ - uint64 x; - - if(d == 0) { - *ep = 0; - return 0; - } - x = runtime·float64tobits(d); - *ep = (int32)((x >> SHIFT) & MASK) - BIAS; - x &= ~((uint64)MASK << SHIFT); - x |= (uint64)BIAS << SHIFT; - return runtime·float64frombits(x); -} - -float64 -runtime·ldexp(float64 d, int32 e) -{ - uint64 x; - - if(d == 0) - return 0; - x = runtime·float64tobits(d); - e += (int32)(x >> SHIFT) & MASK; - if(e <= 0) - return 0; /* underflow */ - if(e >= MASK){ /* overflow */ - if(d < 0) - return runtime·Inf(-1); - return runtime·Inf(1); - } - x &= ~((uint64)MASK << SHIFT); - x |= (uint64)e << SHIFT; - return runtime·float64frombits(x); -} - -float64 -runtime·modf(float64 d, float64 *ip) -{ - float64 dd; - uint64 x; - int32 e; - - if(d < 1) { - if(d < 0) { - d = runtime·modf(-d, ip); - *ip = -*ip; - return -d; - } - *ip = 0; - return d; - } - - x = runtime·float64tobits(d); - e = (int32)((x >> SHIFT) & MASK) - BIAS; - - /* - * Keep the top 11+e bits; clear the rest. - */ - if(e <= 64-11) - x &= ~(((uint64)1 << (64LL-11LL-e))-1); - dd = runtime·float64frombits(x); - *ip = dd; - return d - dd; -} - +// used as float64 via runtime· names +uint64 ·nan = 0x7FF8000000000001ULL; +uint64 ·posinf = 0x7FF0000000000000ULL; +uint64 ·neginf = 0xFFF0000000000000ULL; diff --git a/src/pkg/runtime/print.c b/src/pkg/runtime/print.c index b41e28b37a..fe21f1691a 100644 --- a/src/pkg/runtime/print.c +++ b/src/pkg/runtime/print.c @@ -209,15 +209,15 @@ runtime·printfloat(float64 v) int32 e, s, i, n; float64 h; - if(runtime·isNaN(v)) { + if(ISNAN(v)) { gwrite("NaN", 3); return; } - if(runtime·isInf(v, 1)) { + if(v == runtime·posinf) { gwrite("+Inf", 4); return; } - if(runtime·isInf(v, -1)) { + if(v == runtime·neginf) { gwrite("-Inf", 4); return; } diff --git a/src/pkg/runtime/runtime.h b/src/pkg/runtime/runtime.h index dfdb3663c9..c8df87e5e8 100644 --- a/src/pkg/runtime/runtime.h +++ b/src/pkg/runtime/runtime.h @@ -815,3 +815,12 @@ uintptr runtime·memlimit(void); // is forced to deliver the signal to a thread that's actually running. // This is a no-op on other systems. void runtime·setprof(bool); + +// float.c +extern float64 runtime·nan; +extern float64 runtime·posinf; +extern float64 runtime·neginf; +extern uint64 ·nan; +extern uint64 ·posinf; +extern uint64 ·neginf; +#define ISNAN(f) ((f) != (f))