]> Cypherpunks repositories - gostls13.git/commitdiff
math/big: more conservative use of lock for divisor table
authorRobert Griesemer <gri@golang.org>
Thu, 11 Oct 2012 23:04:03 +0000 (16:04 -0700)
committerRobert Griesemer <gri@golang.org>
Thu, 11 Oct 2012 23:04:03 +0000 (16:04 -0700)
Minor performance impact running sequentially:

benchmark                      old ns/op    new ns/op    delta
BenchmarkString10Base2               389          391   +0.51%
BenchmarkString100Base2             1530         1534   +0.26%
BenchmarkString1000Base2           11789        11787   -0.02%
BenchmarkString10000Base2         111443       112030   +0.53%
BenchmarkString100000Base2       1017483      1015347   -0.21%
BenchmarkString10Base8               339          344   +1.47%
BenchmarkString100Base8              753          756   +0.40%
BenchmarkString1000Base8            4618         4641   +0.50%
BenchmarkString10000Base8          43217        43534   +0.73%
BenchmarkString100000Base8        397518       400602   +0.78%
BenchmarkString10Base10              630          630   +0.00%
BenchmarkString100Base10            1975         1960   -0.76%
BenchmarkString1000Base10          10179        10174   -0.05%
BenchmarkString10000Base10         44527        44416   -0.25%
BenchmarkString100000Base10     14404694     14425308   +0.14%
BenchmarkString10Base16              283          288   +1.77%
BenchmarkString100Base16             597          598   +0.17%
BenchmarkString1000Base16           3189         3186   -0.09%
BenchmarkString10000Base16         29403        29364   -0.13%
BenchmarkString100000Base16       265657       265587   -0.03%

Note that due to other improvements (faster assembly routines,
better code generation by compiler), these benchmarks now run
up to 37% faster than they used to at the last time measured (1/9/2012).

Minor performance impact for StringPiParallel running in parallel:

Current CL but with Lock/Unlock commented out (removed):

BenchmarkStringPiParallel     5000     343581 ns/op
BenchmarkStringPiParallel-2    10000     184511 ns/op
BenchmarkStringPiParallel-3    10000     129768 ns/op
BenchmarkStringPiParallel-4    10000     102326 ns/op

Current CL:

BenchmarkStringPiParallel     5000     345169 ns/op
BenchmarkStringPiParallel-2    10000     185827 ns/op
BenchmarkStringPiParallel-3    10000     131168 ns/op
BenchmarkStringPiParallel-4    10000     102353 ns/op

Fixes #4218.

R=dvyukov, michael.jones, dave
CC=golang-dev
https://golang.org/cl/6643053

src/pkg/math/big/nat.go
src/pkg/math/big/nat_test.go

index 2d5a5c9587d6878d6d8d4b65124fb0a572843d3b..b2d6cd96c63f0bbd9a833f593844c9f8d36ff121 100644 (file)
@@ -920,8 +920,10 @@ type divisor struct {
        ndigits int // digit length of divisor in terms of output base digits
 }
 
-var cacheBase10 [64]divisor // cached divisors for base 10
-var cacheLock sync.Mutex    // protects cacheBase10
+var cacheBase10 struct {
+       sync.Mutex
+       table [64]divisor // cached divisors for base 10
+}
 
 // expWW computes x**y
 func (z nat) expWW(x, y Word) nat {
@@ -937,34 +939,28 @@ func divisors(m int, b Word, ndigits int, bb Word) []divisor {
 
        // determine k where (bb**leafSize)**(2**k) >= sqrt(x)
        k := 1
-       for words := leafSize; words < m>>1 && k < len(cacheBase10); words <<= 1 {
+       for words := leafSize; words < m>>1 && k < len(cacheBase10.table); words <<= 1 {
                k++
        }
 
-       // create new table of divisors or extend and reuse existing table as appropriate
-       var table []divisor
-       var cached bool
-       switch b {
-       case 10:
-               table = cacheBase10[0:k] // reuse old table for this conversion
-               cached = true
-       default:
-               table = make([]divisor, k) // new table for this conversion
+       // reuse and extend existing table of divisors or create new table as appropriate
+       var table []divisor // for b == 10, table overlaps with cacheBase10.table
+       if b == 10 {
+               cacheBase10.Lock()
+               table = cacheBase10.table[0:k] // reuse old table for this conversion
+       } else {
+               table = make([]divisor, k) // create new table for this conversion
        }
 
        // extend table
        if table[k-1].ndigits == 0 {
-               if cached {
-                       cacheLock.Lock() // begin critical section
-               }
-
                // add new entries as needed
                var larger nat
                for i := 0; i < k; i++ {
                        if table[i].ndigits == 0 {
                                if i == 0 {
-                                       table[i].bbb = nat(nil).expWW(bb, Word(leafSize))
-                                       table[i].ndigits = ndigits * leafSize
+                                       table[0].bbb = nat(nil).expWW(bb, Word(leafSize))
+                                       table[0].ndigits = ndigits * leafSize
                                } else {
                                        table[i].bbb = nat(nil).mul(table[i-1].bbb, table[i-1].bbb)
                                        table[i].ndigits = 2 * table[i-1].ndigits
@@ -980,10 +976,10 @@ func divisors(m int, b Word, ndigits int, bb Word) []divisor {
                                table[i].nbits = table[i].bbb.bitLen()
                        }
                }
+       }
 
-               if cached {
-                       cacheLock.Unlock() // end critical section
-               }
+       if b == 10 {
+               cacheBase10.Unlock()
        }
 
        return table
index 68dd1a96d3c00d3578b92d5c10fb7c0e54d0862e..8dfbf092b44145354d02e9c9f948e500394c22eb 100644 (file)
@@ -409,6 +409,20 @@ func TestScanPi(t *testing.T) {
        }
 }
 
+func TestScanPiParallel(t *testing.T) {
+       const n = 2
+       c := make(chan int)
+       for i := 0; i < n; i++ {
+               go func() {
+                       TestScanPi(t)
+                       c <- 0
+               }()
+       }
+       for i := 0; i < n; i++ {
+               <-c
+       }
+}
+
 func BenchmarkScanPi(b *testing.B) {
        for i := 0; i < b.N; i++ {
                var x nat
@@ -416,6 +430,28 @@ func BenchmarkScanPi(b *testing.B) {
        }
 }
 
+func BenchmarkStringPiParallel(b *testing.B) {
+       var x nat
+       x, _, _ = x.scan(strings.NewReader(pi), 0)
+       if x.decimalString() != pi {
+               panic("benchmark incorrect: conversion failed")
+       }
+       n := runtime.GOMAXPROCS(0)
+       m := b.N / n // n*m <= b.N due to flooring, but the error is neglibible (n is not very large)
+       c := make(chan int, n)
+       for i := 0; i < n; i++ {
+               go func() {
+                       for j := 0; j < m; j++ {
+                               x.decimalString()
+                       }
+                       c <- 0
+               }()
+       }
+       for i := 0; i < n; i++ {
+               <-c
+       }
+}
+
 func BenchmarkScan10Base2(b *testing.B)     { ScanHelper(b, 2, 10, 10) }
 func BenchmarkScan100Base2(b *testing.B)    { ScanHelper(b, 2, 10, 100) }
 func BenchmarkScan1000Base2(b *testing.B)   { ScanHelper(b, 2, 10, 1000) }
@@ -516,7 +552,7 @@ func BenchmarkLeafSize64(b *testing.B) { LeafSizeHelper(b, 10, 64) }
 func LeafSizeHelper(b *testing.B, base Word, size int) {
        b.StopTimer()
        originalLeafSize := leafSize
-       resetTable(cacheBase10[:])
+       resetTable(cacheBase10.table[:])
        leafSize = size
        b.StartTimer()
 
@@ -533,7 +569,7 @@ func LeafSizeHelper(b *testing.B, base Word, size int) {
        }
 
        b.StopTimer()
-       resetTable(cacheBase10[:])
+       resetTable(cacheBase10.table[:])
        leafSize = originalLeafSize
        b.StartTimer()
 }