This notably allows to reuse temporaries across
the karatsubaSqr recursion.
benchmark old ns/op new ns/op delta
BenchmarkNatMul/10-4 227 228 +0.44%
BenchmarkNatMul/100-4 8339 8589 +3.00%
BenchmarkNatMul/1000-4 313796 312272 -0.49%
BenchmarkNatMul/10000-4
11924720 11873589 -0.43%
BenchmarkNatMul/100000-4
503813354 503839058 +0.01%
BenchmarkNatSqr/20-4 549 513 -6.56%
BenchmarkNatSqr/30-4 945 874 -7.51%
BenchmarkNatSqr/50-4 1993 1832 -8.08%
BenchmarkNatSqr/80-4 4096 3874 -5.42%
BenchmarkNatSqr/100-4 6192 5712 -7.75%
BenchmarkNatSqr/200-4 20388 19543 -4.14%
BenchmarkNatSqr/300-4 38735 36715 -5.21%
BenchmarkNatSqr/500-4 99562 93542 -6.05%
BenchmarkNatSqr/800-4 195554 184907 -5.44%
BenchmarkNatSqr/1000-4 286302 275053 -3.93%
BenchmarkNatSqr/10000-4
9817057 9441641 -3.82%
BenchmarkNatSqr/100000-4
390713416 379696789 -2.82%
benchmark old allocs new allocs delta
BenchmarkNatMul/10-4 1 1 +0.00%
BenchmarkNatMul/100-4 1 1 +0.00%
BenchmarkNatMul/1000-4 2 1 -50.00%
BenchmarkNatMul/10000-4 2 1 -50.00%
BenchmarkNatMul/100000-4 9 11 +22.22%
BenchmarkNatSqr/20-4 2 1 -50.00%
BenchmarkNatSqr/30-4 2 1 -50.00%
BenchmarkNatSqr/50-4 2 1 -50.00%
BenchmarkNatSqr/80-4 2 1 -50.00%
BenchmarkNatSqr/100-4 2 1 -50.00%
BenchmarkNatSqr/200-4 2 1 -50.00%
BenchmarkNatSqr/300-4 4 1 -75.00%
BenchmarkNatSqr/500-4 4 1 -75.00%
BenchmarkNatSqr/800-4 10 1 -90.00%
BenchmarkNatSqr/1000-4 10 1 -90.00%
BenchmarkNatSqr/10000-4 731 1 -99.86%
BenchmarkNatSqr/100000-4 19687 6 -99.97%
benchmark old bytes new bytes delta
BenchmarkNatMul/10-4 192 192 +0.00%
BenchmarkNatMul/100-4 4864 4864 +0.00%
BenchmarkNatMul/1000-4 57344 49224 -14.16%
BenchmarkNatMul/10000-4 565248 498772 -11.76%
BenchmarkNatMul/100000-4
5749504 7263720 +26.34%
BenchmarkNatSqr/20-4 672 352 -47.62%
BenchmarkNatSqr/30-4 992 512 -48.39%
BenchmarkNatSqr/50-4 1792 896 -50.00%
BenchmarkNatSqr/80-4 2688 1408 -47.62%
BenchmarkNatSqr/100-4 3584 1792 -50.00%
BenchmarkNatSqr/200-4 6656 3456 -48.08%
BenchmarkNatSqr/300-4 24448 16387 -32.97%
BenchmarkNatSqr/500-4 36864 24591 -33.29%
BenchmarkNatSqr/800-4 69760 40981 -41.25%
BenchmarkNatSqr/1000-4 86016 49180 -42.82%
BenchmarkNatSqr/10000-4
2524800 487368 -80.70%
BenchmarkNatSqr/100000-4
68599808 5876581 -91.43%
Change-Id: I8e6e409ae1cb48be9d5aa9b5f428d6cbe487673a
Reviewed-on: https://go-review.googlesource.com/c/go/+/172017
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Robert Griesemer <gri@golang.org>
// be a larger valid threshold contradicting the assumption about k.
//
if k < n || m != n {
- var t nat
+ tp := getNat(3 * k)
+ t := *tp
// add x0*y1*b
x0 := x0.norm()
t = t.mul(xi, y1)
addAt(z, t, i+k)
}
+
+ putNat(tp)
}
return z.norm()
// The (non-normalized) result is placed in z.
func basicSqr(z, x nat) {
n := len(x)
- t := make(nat, 2*n) // temporary variable to hold the products
+ tp := getNat(2 * n)
+ t := *tp // temporary variable to hold the products
+ t.clear()
z[1], z[0] = mulWW(x[0], x[0]) // the initial square
for i := 1; i < n; i++ {
d := x[i]
}
t[2*n-1] = shlVU(t[1:2*n-1], t[1:2*n-1], 1) // double the j < i products
addVV(z, z, t) // combine the result
+ putNat(tp)
}
// karatsubaSqr squares x and leaves the result in z.
z[2*k:].clear()
if k < n {
- var t nat
+ tp := getNat(2 * k)
+ t := *tp
x0 := x0.norm()
x1 := x[k:]
t = t.mul(x0, x1)
addAt(z, t, k) // z = 2*x1*x0*b + x0^2
t = t.sqr(x1)
addAt(z, t, 2*k) // z = x1^2*b^2 + 2*x1*x0*b + x0^2
+ putNat(tp)
}
return z.norm()
}
}
+func benchmarkNatMul(b *testing.B, nwords int) {
+ x := rndNat(nwords)
+ y := rndNat(nwords)
+ var z nat
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ z.mul(x, y)
+ }
+}
+
+var mulBenchSizes = []int{10, 100, 1000, 10000, 100000}
+
+func BenchmarkNatMul(b *testing.B) {
+ for _, n := range mulBenchSizes {
+ if isRaceBuilder && n > 1e3 {
+ continue
+ }
+ b.Run(fmt.Sprintf("%d", n), func(b *testing.B) {
+ benchmarkNatMul(b, n)
+ })
+ }
+}
+
func TestNLZ(t *testing.T) {
var x Word = _B >> 1
for i := 0; i <= _W; i++ {
}
}
-var sqrBenchSizes = []int{1, 2, 3, 5, 8, 10, 20, 30, 50, 80, 100, 200, 300, 500, 800, 1000}
+var sqrBenchSizes = []int{
+ 1, 2, 3, 5, 8, 10, 20, 30, 50, 80,
+ 100, 200, 300, 500, 800,
+ 1000, 10000, 100000,
+}
func BenchmarkNatSqr(b *testing.B) {
for _, n := range sqrBenchSizes {