From 8f30d25168cae6380d9ef50528063716261356c4 Mon Sep 17 00:00:00 2001
From: =?utf8?q?R=C3=A9my=20Oudompheng?= <remyoudompheng@gmail.com>
Date: Sun, 14 Apr 2019 08:00:38 +0200
Subject: [PATCH] math/big: use nat pool to reduce allocations in mul and sqr

This notably allows to reuse temporaries across
the karatsubaSqr recursion.

benchmark                    old ns/op     new ns/op     delta
BenchmarkNatMul/10-4         227           228           +0.44%
BenchmarkNatMul/100-4        8339          8589          +3.00%
BenchmarkNatMul/1000-4       313796        312272        -0.49%
BenchmarkNatMul/10000-4      11924720      11873589      -0.43%
BenchmarkNatMul/100000-4     503813354     503839058     +0.01%
BenchmarkNatSqr/20-4         549           513           -6.56%
BenchmarkNatSqr/30-4         945           874           -7.51%
BenchmarkNatSqr/50-4         1993          1832          -8.08%
BenchmarkNatSqr/80-4         4096          3874          -5.42%
BenchmarkNatSqr/100-4        6192          5712          -7.75%
BenchmarkNatSqr/200-4        20388         19543         -4.14%
BenchmarkNatSqr/300-4        38735         36715         -5.21%
BenchmarkNatSqr/500-4        99562         93542         -6.05%
BenchmarkNatSqr/800-4        195554        184907        -5.44%
BenchmarkNatSqr/1000-4       286302        275053        -3.93%
BenchmarkNatSqr/10000-4      9817057       9441641       -3.82%
BenchmarkNatSqr/100000-4     390713416     379696789     -2.82%

benchmark                    old allocs     new allocs     delta
BenchmarkNatMul/10-4         1              1              +0.00%
BenchmarkNatMul/100-4        1              1              +0.00%
BenchmarkNatMul/1000-4       2              1              -50.00%
BenchmarkNatMul/10000-4      2              1              -50.00%
BenchmarkNatMul/100000-4     9              11             +22.22%
BenchmarkNatSqr/20-4         2              1              -50.00%
BenchmarkNatSqr/30-4         2              1              -50.00%
BenchmarkNatSqr/50-4         2              1              -50.00%
BenchmarkNatSqr/80-4         2              1              -50.00%
BenchmarkNatSqr/100-4        2              1              -50.00%
BenchmarkNatSqr/200-4        2              1              -50.00%
BenchmarkNatSqr/300-4        4              1              -75.00%
BenchmarkNatSqr/500-4        4              1              -75.00%
BenchmarkNatSqr/800-4        10             1              -90.00%
BenchmarkNatSqr/1000-4       10             1              -90.00%
BenchmarkNatSqr/10000-4      731            1              -99.86%
BenchmarkNatSqr/100000-4     19687          6              -99.97%

benchmark                    old bytes     new bytes     delta
BenchmarkNatMul/10-4         192           192           +0.00%
BenchmarkNatMul/100-4        4864          4864          +0.00%
BenchmarkNatMul/1000-4       57344         49224         -14.16%
BenchmarkNatMul/10000-4      565248        498772        -11.76%
BenchmarkNatMul/100000-4     5749504       7263720       +26.34%
BenchmarkNatSqr/20-4         672           352           -47.62%
BenchmarkNatSqr/30-4         992           512           -48.39%
BenchmarkNatSqr/50-4         1792          896           -50.00%
BenchmarkNatSqr/80-4         2688          1408          -47.62%
BenchmarkNatSqr/100-4        3584          1792          -50.00%
BenchmarkNatSqr/200-4        6656          3456          -48.08%
BenchmarkNatSqr/300-4        24448         16387         -32.97%
BenchmarkNatSqr/500-4        36864         24591         -33.29%
BenchmarkNatSqr/800-4        69760         40981         -41.25%
BenchmarkNatSqr/1000-4       86016         49180         -42.82%
BenchmarkNatSqr/10000-4      2524800       487368        -80.70%
BenchmarkNatSqr/100000-4     68599808      5876581       -91.43%

Change-Id: I8e6e409ae1cb48be9d5aa9b5f428d6cbe487673a
Reviewed-on: https://go-review.googlesource.com/c/go/+/172017
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Robert Griesemer <gri@golang.org>
---
 src/math/big/nat.go      | 14 +++++++++++---
 src/math/big/nat_test.go | 29 ++++++++++++++++++++++++++++-
 2 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/src/math/big/nat.go b/src/math/big/nat.go
index 22d7a6cac0..3b60232075 100644
--- a/src/math/big/nat.go
+++ b/src/math/big/nat.go
@@ -463,7 +463,8 @@ func (z nat) mul(x, y nat) nat {
 	// be a larger valid threshold contradicting the assumption about k.
 	//
 	if k < n || m != n {
-		var t nat
+		tp := getNat(3 * k)
+		t := *tp
 
 		// add x0*y1*b
 		x0 := x0.norm()
@@ -484,6 +485,8 @@ func (z nat) mul(x, y nat) nat {
 			t = t.mul(xi, y1)
 			addAt(z, t, i+k)
 		}
+
+		putNat(tp)
 	}
 
 	return z.norm()
@@ -495,7 +498,9 @@ func (z nat) mul(x, y nat) nat {
 // The (non-normalized) result is placed in z.
 func basicSqr(z, x nat) {
 	n := len(x)
-	t := make(nat, 2*n)            // temporary variable to hold the products
+	tp := getNat(2 * n)
+	t := *tp // temporary variable to hold the products
+	t.clear()
 	z[1], z[0] = mulWW(x[0], x[0]) // the initial square
 	for i := 1; i < n; i++ {
 		d := x[i]
@@ -506,6 +511,7 @@ func basicSqr(z, x nat) {
 	}
 	t[2*n-1] = shlVU(t[1:2*n-1], t[1:2*n-1], 1) // double the j < i products
 	addVV(z, z, t)                              // combine the result
+	putNat(tp)
 }
 
 // karatsubaSqr squares x and leaves the result in z.
@@ -592,7 +598,8 @@ func (z nat) sqr(x nat) nat {
 	z[2*k:].clear()
 
 	if k < n {
-		var t nat
+		tp := getNat(2 * k)
+		t := *tp
 		x0 := x0.norm()
 		x1 := x[k:]
 		t = t.mul(x0, x1)
@@ -600,6 +607,7 @@ func (z nat) sqr(x nat) nat {
 		addAt(z, t, k) // z = 2*x1*x0*b + x0^2
 		t = t.sqr(x1)
 		addAt(z, t, 2*k) // z = x1^2*b^2 + 2*x1*x0*b + x0^2
+		putNat(tp)
 	}
 
 	return z.norm()
diff --git a/src/math/big/nat_test.go b/src/math/big/nat_test.go
index 3c794954dc..bb5e14b5fa 100644
--- a/src/math/big/nat_test.go
+++ b/src/math/big/nat_test.go
@@ -206,6 +206,29 @@ func BenchmarkMul(b *testing.B) {
 	}
 }
 
+func benchmarkNatMul(b *testing.B, nwords int) {
+	x := rndNat(nwords)
+	y := rndNat(nwords)
+	var z nat
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		z.mul(x, y)
+	}
+}
+
+var mulBenchSizes = []int{10, 100, 1000, 10000, 100000}
+
+func BenchmarkNatMul(b *testing.B) {
+	for _, n := range mulBenchSizes {
+		if isRaceBuilder && n > 1e3 {
+			continue
+		}
+		b.Run(fmt.Sprintf("%d", n), func(b *testing.B) {
+			benchmarkNatMul(b, n)
+		})
+	}
+}
+
 func TestNLZ(t *testing.T) {
 	var x Word = _B >> 1
 	for i := 0; i <= _W; i++ {
@@ -681,7 +704,11 @@ func benchmarkNatSqr(b *testing.B, nwords int) {
 	}
 }
 
-var sqrBenchSizes = []int{1, 2, 3, 5, 8, 10, 20, 30, 50, 80, 100, 200, 300, 500, 800, 1000}
+var sqrBenchSizes = []int{
+	1, 2, 3, 5, 8, 10, 20, 30, 50, 80,
+	100, 200, 300, 500, 800,
+	1000, 10000, 100000,
+}
 
 func BenchmarkNatSqr(b *testing.B) {
 	for _, n := range sqrBenchSizes {
-- 
2.50.0