]> Cypherpunks repositories - gostls13.git/commitdiff
runtime: inline several float64 routines to speed up complex128 division
authorShenghou Ma <minux.ma@gmail.com>
Tue, 7 Aug 2012 15:45:50 +0000 (23:45 +0800)
committerShenghou Ma <minux.ma@gmail.com>
Tue, 7 Aug 2012 15:45:50 +0000 (23:45 +0800)
Depends on CL 6197045.

Result obtained on Core i7 620M, Darwin/amd64:
benchmark                       old ns/op    new ns/op    delta
BenchmarkComplex128DivNormal           57           28  -50.78%
BenchmarkComplex128DivNisNaN           49           15  -68.90%
BenchmarkComplex128DivDisNaN           49           15  -67.88%
BenchmarkComplex128DivNisInf           40           12  -68.50%
BenchmarkComplex128DivDisInf           33           13  -61.06%

Result obtained on Core i7 620M, Darwin/386:
benchmark                       old ns/op    new ns/op    delta
BenchmarkComplex128DivNormal           89           50  -44.05%
BenchmarkComplex128DivNisNaN          307          802  +161.24%
BenchmarkComplex128DivDisNaN          309          788  +155.02%
BenchmarkComplex128DivNisInf          278          237  -14.75%
BenchmarkComplex128DivDisInf           46           22  -52.46%

Result obtained on 700MHz OMAP4460, Linux/ARM:
benchmark                       old ns/op    new ns/op    delta
BenchmarkComplex128DivNormal         1557          465  -70.13%
BenchmarkComplex128DivNisNaN         1443          220  -84.75%
BenchmarkComplex128DivDisNaN         1481          218  -85.28%
BenchmarkComplex128DivNisInf          952          216  -77.31%
BenchmarkComplex128DivDisInf          861          231  -73.17%

The 386 version has a performance regression, but as we have
decided to use SSE2 instead of x87 FPU for 386 too (issue 3912),
I won't address this issue.

R=dsymonds, mchaten, iant, dave, mtj, rsc, r
CC=golang-dev
https://golang.org/cl/6024045

src/pkg/runtime/complex.c
src/pkg/runtime/float.c
src/pkg/runtime/print.c
src/pkg/runtime/runtime.h

index eeb94394057bcf9fc72425a4683e1b5de9fea99c..395e70fe34b87f635672d834df1263828a036854 100644 (file)
@@ -13,28 +13,30 @@ runtime·complex128div(Complex128 n, Complex128 d, Complex128 q)
        float64 a, b, ratio, denom;
 
        // Special cases as in C99.
-       ninf = runtime·isInf(n.real, 0) || runtime·isInf(n.imag, 0);
-       dinf = runtime·isInf(d.real, 0) || runtime·isInf(d.imag, 0);
+       ninf = n.real == runtime·posinf || n.real == runtime·neginf ||
+              n.imag == runtime·posinf || n.imag == runtime·neginf;
+       dinf = d.real == runtime·posinf || d.real == runtime·neginf ||
+              d.imag == runtime·posinf || d.imag == runtime·neginf;
 
-       nnan = !ninf && (runtime·isNaN(n.real) || runtime·isNaN(n.imag));
-       dnan = !dinf && (runtime·isNaN(d.real) || runtime·isNaN(d.imag));
+       nnan = !ninf && (ISNAN(n.real) || ISNAN(n.imag));
+       dnan = !dinf && (ISNAN(d.real) || ISNAN(d.imag));
 
        if(nnan || dnan) {
-               q.real = runtime·NaN();
-               q.imag = runtime·NaN();
-       } else if(ninf && !dinf && !dnan) {
-               q.real = runtime·Inf(0);
-               q.imag = runtime·Inf(0);
-       } else if(!ninf && !nnan && dinf) {
+               q.real = runtime·nan;
+               q.imag = runtime·nan;
+       } else if(ninf && !dinf) {
+               q.real = runtime·posinf;
+               q.imag = runtime·posinf;
+       } else if(!ninf && dinf) {
                q.real = 0;
                q.imag = 0;
        } else if(d.real == 0 && d.imag == 0) {
                if(n.real == 0 && n.imag == 0) {
-                       q.real = runtime·NaN();
-                       q.imag = runtime·NaN();
+                       q.real = runtime·nan;
+                       q.imag = runtime·nan;
                } else {
-                       q.real = runtime·Inf(0);
-                       q.imag = runtime·Inf(0);
+                       q.real = runtime·posinf;
+                       q.imag = runtime·posinf;
                }
        } else {
                // Standard complex arithmetic, factored to avoid unnecessary overflow.
index 4d9f1259771c31ccb1167f4925d5e39a3d78dcee..42082e434794e4f2892ee897303df51a79184aad 100644 (file)
@@ -4,170 +4,7 @@
 
 #include "runtime.h"
 
-static uint64  uvnan           = 0x7FF8000000000001ULL;
-static uint64  uvinf           = 0x7FF0000000000000ULL;
-static uint64  uvneginf        = 0xFFF0000000000000ULL;
-
-uint32
-runtime·float32tobits(float32 f)
-{
-       // The obvious cast-and-pointer code is technically
-       // not valid, and gcc miscompiles it.  Use a union instead.
-       union {
-               float32 f;
-               uint32 i;
-       } u;
-       u.f = f;
-       return u.i;
-}
-
-uint64
-runtime·float64tobits(float64 f)
-{
-       // The obvious cast-and-pointer code is technically
-       // not valid, and gcc miscompiles it.  Use a union instead.
-       union {
-               float64 f;
-               uint64 i;
-       } u;
-       u.f = f;
-       return u.i;
-}
-
-float64
-runtime·float64frombits(uint64 i)
-{
-       // The obvious cast-and-pointer code is technically
-       // not valid, and gcc miscompiles it.  Use a union instead.
-       union {
-               float64 f;
-               uint64 i;
-       } u;
-       u.i = i;
-       return u.f;
-}
-
-float32
-runtime·float32frombits(uint32 i)
-{
-       // The obvious cast-and-pointer code is technically
-       // not valid, and gcc miscompiles it.  Use a union instead.
-       union {
-               float32 f;
-               uint32 i;
-       } u;
-       u.i = i;
-       return u.f;
-}
-
-bool
-runtime·isInf(float64 f, int32 sign)
-{
-       uint64 x;
-
-       x = runtime·float64tobits(f);
-       if(sign == 0)
-               return x == uvinf || x == uvneginf;
-       if(sign > 0)
-               return x == uvinf;
-       return x == uvneginf;
-}
-
-float64
-runtime·NaN(void)
-{
-       return runtime·float64frombits(uvnan);
-}
-
-bool
-runtime·isNaN(float64 f)
-{
-       uint64 x;
-
-       x = runtime·float64tobits(f);
-       return ((uint32)(x>>52) & 0x7FF) == 0x7FF && !runtime·isInf(f, 0);
-}
-
-float64
-runtime·Inf(int32 sign)
-{
-       if(sign >= 0)
-               return runtime·float64frombits(uvinf);
-       else
-               return runtime·float64frombits(uvneginf);
-}
-
-enum
-{
-       MASK    = 0x7ffL,
-       SHIFT   = 64-11-1,
-       BIAS    = 1022L,
-};
-
-float64
-runtime·frexp(float64 d, int32 *ep)
-{
-       uint64 x;
-
-       if(d == 0) {
-               *ep = 0;
-               return 0;
-       }
-       x = runtime·float64tobits(d);
-       *ep = (int32)((x >> SHIFT) & MASK) - BIAS;
-       x &= ~((uint64)MASK << SHIFT);
-       x |= (uint64)BIAS << SHIFT;
-       return runtime·float64frombits(x);
-}
-
-float64
-runtime·ldexp(float64 d, int32 e)
-{
-       uint64 x;
-
-       if(d == 0)
-               return 0;
-       x = runtime·float64tobits(d);
-       e += (int32)(x >> SHIFT) & MASK;
-       if(e <= 0)
-               return 0;       /* underflow */
-       if(e >= MASK){          /* overflow */
-               if(d < 0)
-                       return runtime·Inf(-1);
-               return runtime·Inf(1);
-       }
-       x &= ~((uint64)MASK << SHIFT);
-       x |= (uint64)e << SHIFT;
-       return runtime·float64frombits(x);
-}
-
-float64
-runtime·modf(float64 d, float64 *ip)
-{
-       float64 dd;
-       uint64 x;
-       int32 e;
-
-       if(d < 1) {
-               if(d < 0) {
-                       d = runtime·modf(-d, ip);
-                       *ip = -*ip;
-                       return -d;
-               }
-               *ip = 0;
-               return d;
-       }
-
-       x = runtime·float64tobits(d);
-       e = (int32)((x >> SHIFT) & MASK) - BIAS;
-
-       /*
-        * Keep the top 11+e bits; clear the rest.
-        */
-       if(e <= 64-11)
-               x &= ~(((uint64)1 << (64LL-11LL-e))-1);
-       dd = runtime·float64frombits(x);
-       *ip = dd;
-       return d - dd;
-}
-
+// used as float64 via runtime· names
+uint64 ·nan           = 0x7FF8000000000001ULL;
+uint64 ·posinf        = 0x7FF0000000000000ULL;
+uint64 ·neginf        = 0xFFF0000000000000ULL;
index b41e28b37a86fd841be292bdc8a9426cbaa1902a..fe21f1691a805c6b7aee54059929929a5d6e5323 100644 (file)
@@ -209,15 +209,15 @@ runtime·printfloat(float64 v)
        int32 e, s, i, n;
        float64 h;
 
-       if(runtime·isNaN(v)) {
+       if(ISNAN(v)) {
                gwrite("NaN", 3);
                return;
        }
-       if(runtime·isInf(v, 1)) {
+       if(v == runtime·posinf) {
                gwrite("+Inf", 4);
                return;
        }
-       if(runtime·isInf(v, -1)) {
+       if(v == runtime·neginf) {
                gwrite("-Inf", 4);
                return;
        }
index dfdb3663c903f3a195e253bb7cfc221afbdf7566..c8df87e5e804067bff7edfe7fe73f00bd9b16659 100644 (file)
@@ -815,3 +815,12 @@ uintptr    runtime·memlimit(void);
 // is forced to deliver the signal to a thread that's actually running.
 // This is a no-op on other systems.
 void   runtime·setprof(bool);
+
+// float.c
+extern float64 runtime·nan;
+extern float64 runtime·posinf;
+extern float64 runtime·neginf;
+extern uint64 ·nan;
+extern uint64 ·posinf;
+extern uint64 ·neginf;
+#define ISNAN(f) ((f) != (f))