From 8f30d25168cae6380d9ef50528063716261356c4 Mon Sep 17 00:00:00 2001 From: =?utf8?q?R=C3=A9my=20Oudompheng?= Date: Sun, 14 Apr 2019 08:00:38 +0200 Subject: [PATCH] math/big: use nat pool to reduce allocations in mul and sqr This notably allows to reuse temporaries across the karatsubaSqr recursion. benchmark old ns/op new ns/op delta BenchmarkNatMul/10-4 227 228 +0.44% BenchmarkNatMul/100-4 8339 8589 +3.00% BenchmarkNatMul/1000-4 313796 312272 -0.49% BenchmarkNatMul/10000-4 11924720 11873589 -0.43% BenchmarkNatMul/100000-4 503813354 503839058 +0.01% BenchmarkNatSqr/20-4 549 513 -6.56% BenchmarkNatSqr/30-4 945 874 -7.51% BenchmarkNatSqr/50-4 1993 1832 -8.08% BenchmarkNatSqr/80-4 4096 3874 -5.42% BenchmarkNatSqr/100-4 6192 5712 -7.75% BenchmarkNatSqr/200-4 20388 19543 -4.14% BenchmarkNatSqr/300-4 38735 36715 -5.21% BenchmarkNatSqr/500-4 99562 93542 -6.05% BenchmarkNatSqr/800-4 195554 184907 -5.44% BenchmarkNatSqr/1000-4 286302 275053 -3.93% BenchmarkNatSqr/10000-4 9817057 9441641 -3.82% BenchmarkNatSqr/100000-4 390713416 379696789 -2.82% benchmark old allocs new allocs delta BenchmarkNatMul/10-4 1 1 +0.00% BenchmarkNatMul/100-4 1 1 +0.00% BenchmarkNatMul/1000-4 2 1 -50.00% BenchmarkNatMul/10000-4 2 1 -50.00% BenchmarkNatMul/100000-4 9 11 +22.22% BenchmarkNatSqr/20-4 2 1 -50.00% BenchmarkNatSqr/30-4 2 1 -50.00% BenchmarkNatSqr/50-4 2 1 -50.00% BenchmarkNatSqr/80-4 2 1 -50.00% BenchmarkNatSqr/100-4 2 1 -50.00% BenchmarkNatSqr/200-4 2 1 -50.00% BenchmarkNatSqr/300-4 4 1 -75.00% BenchmarkNatSqr/500-4 4 1 -75.00% BenchmarkNatSqr/800-4 10 1 -90.00% BenchmarkNatSqr/1000-4 10 1 -90.00% BenchmarkNatSqr/10000-4 731 1 -99.86% BenchmarkNatSqr/100000-4 19687 6 -99.97% benchmark old bytes new bytes delta BenchmarkNatMul/10-4 192 192 +0.00% BenchmarkNatMul/100-4 4864 4864 +0.00% BenchmarkNatMul/1000-4 57344 49224 -14.16% BenchmarkNatMul/10000-4 565248 498772 -11.76% BenchmarkNatMul/100000-4 5749504 7263720 +26.34% BenchmarkNatSqr/20-4 672 352 -47.62% BenchmarkNatSqr/30-4 992 512 -48.39% BenchmarkNatSqr/50-4 1792 896 -50.00% BenchmarkNatSqr/80-4 2688 1408 -47.62% BenchmarkNatSqr/100-4 3584 1792 -50.00% BenchmarkNatSqr/200-4 6656 3456 -48.08% BenchmarkNatSqr/300-4 24448 16387 -32.97% BenchmarkNatSqr/500-4 36864 24591 -33.29% BenchmarkNatSqr/800-4 69760 40981 -41.25% BenchmarkNatSqr/1000-4 86016 49180 -42.82% BenchmarkNatSqr/10000-4 2524800 487368 -80.70% BenchmarkNatSqr/100000-4 68599808 5876581 -91.43% Change-Id: I8e6e409ae1cb48be9d5aa9b5f428d6cbe487673a Reviewed-on: https://go-review.googlesource.com/c/go/+/172017 Run-TryBot: Brad Fitzpatrick TryBot-Result: Gobot Gobot Reviewed-by: Robert Griesemer --- src/math/big/nat.go | 14 +++++++++++--- src/math/big/nat_test.go | 29 ++++++++++++++++++++++++++++- 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/src/math/big/nat.go b/src/math/big/nat.go index 22d7a6cac0..3b60232075 100644 --- a/src/math/big/nat.go +++ b/src/math/big/nat.go @@ -463,7 +463,8 @@ func (z nat) mul(x, y nat) nat { // be a larger valid threshold contradicting the assumption about k. // if k < n || m != n { - var t nat + tp := getNat(3 * k) + t := *tp // add x0*y1*b x0 := x0.norm() @@ -484,6 +485,8 @@ func (z nat) mul(x, y nat) nat { t = t.mul(xi, y1) addAt(z, t, i+k) } + + putNat(tp) } return z.norm() @@ -495,7 +498,9 @@ func (z nat) mul(x, y nat) nat { // The (non-normalized) result is placed in z. func basicSqr(z, x nat) { n := len(x) - t := make(nat, 2*n) // temporary variable to hold the products + tp := getNat(2 * n) + t := *tp // temporary variable to hold the products + t.clear() z[1], z[0] = mulWW(x[0], x[0]) // the initial square for i := 1; i < n; i++ { d := x[i] @@ -506,6 +511,7 @@ func basicSqr(z, x nat) { } t[2*n-1] = shlVU(t[1:2*n-1], t[1:2*n-1], 1) // double the j < i products addVV(z, z, t) // combine the result + putNat(tp) } // karatsubaSqr squares x and leaves the result in z. @@ -592,7 +598,8 @@ func (z nat) sqr(x nat) nat { z[2*k:].clear() if k < n { - var t nat + tp := getNat(2 * k) + t := *tp x0 := x0.norm() x1 := x[k:] t = t.mul(x0, x1) @@ -600,6 +607,7 @@ func (z nat) sqr(x nat) nat { addAt(z, t, k) // z = 2*x1*x0*b + x0^2 t = t.sqr(x1) addAt(z, t, 2*k) // z = x1^2*b^2 + 2*x1*x0*b + x0^2 + putNat(tp) } return z.norm() diff --git a/src/math/big/nat_test.go b/src/math/big/nat_test.go index 3c794954dc..bb5e14b5fa 100644 --- a/src/math/big/nat_test.go +++ b/src/math/big/nat_test.go @@ -206,6 +206,29 @@ func BenchmarkMul(b *testing.B) { } } +func benchmarkNatMul(b *testing.B, nwords int) { + x := rndNat(nwords) + y := rndNat(nwords) + var z nat + b.ResetTimer() + for i := 0; i < b.N; i++ { + z.mul(x, y) + } +} + +var mulBenchSizes = []int{10, 100, 1000, 10000, 100000} + +func BenchmarkNatMul(b *testing.B) { + for _, n := range mulBenchSizes { + if isRaceBuilder && n > 1e3 { + continue + } + b.Run(fmt.Sprintf("%d", n), func(b *testing.B) { + benchmarkNatMul(b, n) + }) + } +} + func TestNLZ(t *testing.T) { var x Word = _B >> 1 for i := 0; i <= _W; i++ { @@ -681,7 +704,11 @@ func benchmarkNatSqr(b *testing.B, nwords int) { } } -var sqrBenchSizes = []int{1, 2, 3, 5, 8, 10, 20, 30, 50, 80, 100, 200, 300, 500, 800, 1000} +var sqrBenchSizes = []int{ + 1, 2, 3, 5, 8, 10, 20, 30, 50, 80, + 100, 200, 300, 500, 800, + 1000, 10000, 100000, +} func BenchmarkNatSqr(b *testing.B) { for _, n := range sqrBenchSizes { -- 2.48.1