From cc1890cbe3053953a5967474c8fad5005aba4165 Mon Sep 17 00:00:00 2001 From: Robert Griesemer Date: Mon, 4 Jun 2012 09:48:27 -0700 Subject: [PATCH] math/big: improved karatsuba calibration code, better mul benchmark An attempt to profit from CL 6176043 (fix to superpolinomial runtime of karatsuba multiplication) and determine a better karatsuba threshold. The result indicates that 32 is still a reasonable value. Left the threshold as is (== 32), but made some minor changes to the calibrate code which are worthwhile saving (use of existing benchmarking code for better results, better use of package time). R=golang-dev, rsc CC=golang-dev https://golang.org/cl/6260062 --- src/pkg/math/big/calibrate_test.go | 42 +++++++++++++++--------------- src/pkg/math/big/nat_test.go | 23 +++++++--------- 2 files changed, 31 insertions(+), 34 deletions(-) diff --git a/src/pkg/math/big/calibrate_test.go b/src/pkg/math/big/calibrate_test.go index efe1837bba..f69ffbf5cf 100644 --- a/src/pkg/math/big/calibrate_test.go +++ b/src/pkg/math/big/calibrate_test.go @@ -21,15 +21,17 @@ import ( var calibrate = flag.Bool("calibrate", false, "run calibration test") -// measure returns the time to run f -func measure(f func()) time.Duration { - const N = 100 - start := time.Now() - for i := N; i > 0; i-- { - f() - } - stop := time.Now() - return stop.Sub(start) / N +func karatsubaLoad(b *testing.B) { + BenchmarkMul(b) +} + +// measureKaratsuba returns the time to run a Karatsuba-relevant benchmark +// given Karatsuba threshold th. +func measureKaratsuba(th int) time.Duration { + th, karatsubaThreshold = karatsubaThreshold, th + res := testing.Benchmark(karatsubaLoad) + karatsubaThreshold = th + return time.Duration(res.NsPerOp()) } func computeThresholds() { @@ -37,35 +39,33 @@ func computeThresholds() { fmt.Printf("(run repeatedly for good results)\n") // determine Tk, the work load execution time using basic multiplication - karatsubaThreshold = 1e9 // disable karatsuba - Tb := measure(benchmarkMulLoad) - fmt.Printf("Tb = %dns\n", Tb) + Tb := measureKaratsuba(1e9) // th == 1e9 => Karatsuba multiplication disabled + fmt.Printf("Tb = %10s\n", Tb) // thresholds - n := 8 // any lower values for the threshold lead to very slow multiplies + th := 4 th1 := -1 th2 := -1 var deltaOld time.Duration - for count := -1; count != 0; count-- { + for count := -1; count != 0 && th < 128; count-- { // determine Tk, the work load execution time using Karatsuba multiplication - karatsubaThreshold = n // enable karatsuba - Tk := measure(benchmarkMulLoad) + Tk := measureKaratsuba(th) // improvement over Tb delta := (Tb - Tk) * 100 / Tb - fmt.Printf("n = %3d Tk = %8dns %4d%%", n, Tk, delta) + fmt.Printf("th = %3d Tk = %10s %4d%%", th, Tk, delta) // determine break-even point if Tk < Tb && th1 < 0 { - th1 = n + th1 = th fmt.Print(" break-even point") } // determine diminishing return if 0 < delta && delta < deltaOld && th2 < 0 { - th2 = n + th2 = th fmt.Print(" diminishing return") } deltaOld = delta @@ -74,10 +74,10 @@ func computeThresholds() { // trigger counter if th1 >= 0 && th2 >= 0 && count < 0 { - count = 20 // this many extra measurements after we got both thresholds + count = 10 // this many extra measurements after we got both thresholds } - n++ + th++ } } diff --git a/src/pkg/math/big/nat_test.go b/src/pkg/math/big/nat_test.go index becde5d171..64a8ac07c5 100644 --- a/src/pkg/math/big/nat_test.go +++ b/src/pkg/math/big/nat_test.go @@ -6,6 +6,7 @@ package big import ( "io" + "math/rand" "strings" "testing" ) @@ -135,26 +136,22 @@ func TestMulRangeN(t *testing.T) { } } -var mulArg, mulTmp nat +var rnd = rand.New(rand.NewSource(0x43de683f473542af)) +var mulx = rndNat(1e4) +var muly = rndNat(1e4) -func init() { - const n = 1000 - mulArg = make(nat, n) +func rndNat(n int) nat { + x := make(nat, n) for i := 0; i < n; i++ { - mulArg[i] = _M - } -} - -func benchmarkMulLoad() { - for j := 1; j <= 10; j++ { - x := mulArg[0 : j*100] - mulTmp.mul(x, x) + x[i] = Word(rnd.Int63()<<1 + rnd.Int63n(2)) } + return x } func BenchmarkMul(b *testing.B) { for i := 0; i < b.N; i++ { - benchmarkMulLoad() + var z nat + z.mul(mulx, muly) } } -- 2.48.1