From: Brian Kessler Date: Thu, 10 Aug 2017 06:17:07 +0000 (-0700) Subject: math/big: implement Lehmer's GCD algorithm X-Git-Tag: go1.10beta1~610 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=1643d4f33a0ed45cef0f6d33aff207ad530f9c94;p=gostls13.git math/big: implement Lehmer's GCD algorithm Updates #15833 Lehmer's GCD algorithm uses single precision calculations to simulate several steps of multiple precision calculations in Euclid's GCD algorithm which leads to a considerable speed up. This implementation uses Collins' simplified testing condition on the single digit cosequences which requires only one quotient and avoids any possibility of overflow. name old time/op new time/op delta GCD10x10/WithoutXY-4 1.82µs ±24% 0.28µs ± 6% -84.40% (p=0.008 n=5+5) GCD10x10/WithXY-4 1.69µs ± 6% 1.71µs ± 6% ~ (p=0.595 n=5+5) GCD10x100/WithoutXY-4 1.87µs ± 2% 0.56µs ± 4% -70.13% (p=0.008 n=5+5) GCD10x100/WithXY-4 2.61µs ± 2% 2.65µs ± 4% ~ (p=0.635 n=5+5) GCD10x1000/WithoutXY-4 2.75µs ± 2% 1.48µs ± 1% -46.06% (p=0.008 n=5+5) GCD10x1000/WithXY-4 5.29µs ± 2% 5.25µs ± 2% ~ (p=0.548 n=5+5) GCD10x10000/WithoutXY-4 10.7µs ± 2% 10.3µs ± 0% -4.38% (p=0.008 n=5+5) GCD10x10000/WithXY-4 22.3µs ± 6% 22.1µs ± 1% ~ (p=1.000 n=5+5) GCD10x100000/WithoutXY-4 93.7µs ± 2% 99.4µs ± 2% +6.09% (p=0.008 n=5+5) GCD10x100000/WithXY-4 196µs ± 2% 199µs ± 2% ~ (p=0.222 n=5+5) GCD100x100/WithoutXY-4 10.1µs ± 2% 2.5µs ± 2% -74.84% (p=0.008 n=5+5) GCD100x100/WithXY-4 21.4µs ± 2% 21.3µs ± 7% ~ (p=0.548 n=5+5) GCD100x1000/WithoutXY-4 11.3µs ± 2% 4.4µs ± 4% -60.87% (p=0.008 n=5+5) GCD100x1000/WithXY-4 24.7µs ± 3% 23.9µs ± 1% ~ (p=0.056 n=5+5) GCD100x10000/WithoutXY-4 26.6µs ± 1% 20.0µs ± 2% -24.82% (p=0.008 n=5+5) GCD100x10000/WithXY-4 78.7µs ± 2% 78.2µs ± 2% ~ (p=0.690 n=5+5) GCD100x100000/WithoutXY-4 174µs ± 2% 171µs ± 1% ~ (p=0.056 n=5+5) GCD100x100000/WithXY-4 563µs ± 4% 561µs ± 2% ~ (p=1.000 n=5+5) GCD1000x1000/WithoutXY-4 120µs ± 5% 29µs ± 3% -75.71% (p=0.008 n=5+5) GCD1000x1000/WithXY-4 355µs ± 4% 358µs ± 2% ~ (p=0.841 n=5+5) GCD1000x10000/WithoutXY-4 140µs ± 2% 49µs ± 2% -65.07% (p=0.008 n=5+5) GCD1000x10000/WithXY-4 626µs ± 3% 628µs ± 9% ~ (p=0.690 n=5+5) GCD1000x100000/WithoutXY-4 340µs ± 4% 259µs ± 6% -23.79% (p=0.008 n=5+5) GCD1000x100000/WithXY-4 3.76ms ± 4% 3.82ms ± 5% ~ (p=0.310 n=5+5) GCD10000x10000/WithoutXY-4 3.11ms ± 3% 0.54ms ± 2% -82.74% (p=0.008 n=5+5) GCD10000x10000/WithXY-4 7.96ms ± 3% 7.69ms ± 3% ~ (p=0.151 n=5+5) GCD10000x100000/WithoutXY-4 3.88ms ± 1% 1.27ms ± 2% -67.21% (p=0.008 n=5+5) GCD10000x100000/WithXY-4 38.1ms ± 2% 38.8ms ± 1% ~ (p=0.095 n=5+5) GCD100000x100000/WithoutXY-4 208ms ± 1% 25ms ± 4% -88.07% (p=0.008 n=5+5) GCD100000x100000/WithXY-4 533ms ± 5% 525ms ± 4% ~ (p=0.548 n=5+5) Change-Id: Ic1e007eb807b93e75f4752e968e98c1f0cb90e43 Reviewed-on: https://go-review.googlesource.com/59450 Run-TryBot: Robert Griesemer TryBot-Result: Gobot Gobot Reviewed-by: Robert Griesemer --- diff --git a/src/math/big/int.go b/src/math/big/int.go index 73d48deb81..c5ff67266a 100644 --- a/src/math/big/int.go +++ b/src/math/big/int.go @@ -476,7 +476,7 @@ func (z *Int) GCD(x, y, a, b *Int) *Int { return z } if x == nil && y == nil { - return z.binaryGCD(a, b) + return z.lehmerGCD(a, b) } A := new(Int).Set(a) @@ -515,64 +515,122 @@ func (z *Int) GCD(x, y, a, b *Int) *Int { return z } -// binaryGCD sets z to the greatest common divisor of a and b, which both must -// be > 0, and returns z. -// See Knuth, The Art of Computer Programming, Vol. 2, Section 4.5.2, Algorithm B. -func (z *Int) binaryGCD(a, b *Int) *Int { - u := z - v := new(Int) - - // use one Euclidean iteration to ensure that u and v are approx. the same size - switch { - case len(a.abs) > len(b.abs): - // must set v before u since u may be alias for a or b (was issue #11284) - v.Rem(a, b) - u.Set(b) - case len(a.abs) < len(b.abs): - v.Rem(b, a) - u.Set(a) - default: - v.Set(b) - u.Set(a) - } - // a, b must not be used anymore (may be aliases with u) +// lehmerGCD sets z to the greatest common divisor of a and b, +// which both must be > 0, and returns z. +// See Knuth, The Art of Computer Programming, Vol. 2, Section 4.5.2, Algorithm L. +// This implementation uses the improved condition by Collins requiring only one +// quotient and avoiding the possibility of single Word overflow. +// See Jebelean, "Improving the multiprecision Euclidean algorithm", +// Design and Implementation of Symbolic Computation Systems, pp 45-58. +func (z *Int) lehmerGCD(a, b *Int) *Int { - // v might be 0 now - if len(v.abs) == 0 { - return u + // ensure a >= b + if a.abs.cmp(b.abs) < 0 { + a, b = b, a } - // u > 0 && v > 0 - // determine largest k such that u = u' << k, v = v' << k - k := u.abs.trailingZeroBits() - if vk := v.abs.trailingZeroBits(); vk < k { - k = vk - } - u.Rsh(u, k) - v.Rsh(v, k) + // don't destroy incoming values of a and b + B := new(Int).Set(b) // must be set first in case b is an alias of z + A := z.Set(a) - // determine t (we know that u > 0) + // temp variables for multiprecision update t := new(Int) - if u.abs[0]&1 != 0 { - // u is odd - t.Neg(v) - } else { - t.Set(u) - } + r := new(Int) + s := new(Int) + w := new(Int) + + // loop invariant A >= B + for len(B.abs) > 1 { + + // initialize the digits + var a1, a2, u0, u1, u2, v0, v1, v2 Word + + m := len(B.abs) // m >= 2 + n := len(A.abs) // n >= m >= 2 + + // extract the top Word of bits from A and B + h := nlz(A.abs[n-1]) + a1 = (A.abs[n-1] << h) | (A.abs[n-2] >> (_W - h)) + // B may have implicit zero words in the high bits if the lengths differ + switch { + case n == m: + a2 = (B.abs[n-1] << h) | (B.abs[n-2] >> (_W - h)) + case n == m+1: + a2 = (B.abs[n-2] >> (_W - h)) + default: + a2 = 0 + } + + // Since we are calculating with full words to avoid overflow, + // we use 'even' to track the sign of the cosequences. + // For even iterations: u0, v1 >= 0 && u1, v0 <= 0 + // For odd iterations: u0, v1 <= 0 && u1, v0 >= 0 + // The first iteration starts with k=1 (odd). + even := false + // variables to track the cosequences + u0, u1, u2 = 0, 1, 0 + v0, v1, v2 = 0, 0, 1 + + // calculate the quotient and cosequences using Collins' stopping condition + for a2 >= v2 && a1-a2 >= v1+v2 { + q := a1 / a2 + a1, a2 = a2, a1-q*a2 + u0, u1, u2 = u1, u2, u1+q*u2 + v0, v1, v2 = v1, v2, v1+q*v2 + even = !even + } + + // multiprecision step + if v0 != 0 { + // simulate the effect of the single precision steps using the cosequences + // A = u0*A + v0*B + // B = u1*A + v1*B + + t.abs = t.abs.setWord(u0) + s.abs = s.abs.setWord(v0) + t.neg = !even + s.neg = even + + t.Mul(A, t) + s.Mul(B, s) + + r.abs = r.abs.setWord(u1) + w.abs = w.abs.setWord(v1) + r.neg = even + w.neg = !even + + r.Mul(A, r) + w.Mul(B, w) + + A.Add(t, s) + B.Add(r, w) - for len(t.abs) > 0 { - // reduce t - t.Rsh(t, t.abs.trailingZeroBits()) - if t.neg { - v, t = t, v - v.neg = len(v.abs) > 0 && !v.neg // 0 has no sign } else { - u, t = t, u + // single-digit calculations failed to simluate any quotients + // do a standard Euclidean step + t.Rem(A, B) + A, B, t = B, t, A } - t.Sub(u, v) } - return z.Lsh(u, k) + if len(B.abs) > 0 { + // standard Euclidean algorithm base case for B a single Word + if len(A.abs) > 1 { + // A is longer than a single Word + t.Rem(A, B) + A, B, t = B, t, A + } + if len(B.abs) > 0 { + // A and B are both a single Word + a1, a2 := A.abs[0], B.abs[0] + for a2 != 0 { + a1, a2 = a2, a1%a2 + } + A.abs[0] = a1 + } + } + *z = *A + return z } // Rand sets z to a pseudo-random number in [0, n) and returns z. diff --git a/src/math/big/int_test.go b/src/math/big/int_test.go index bc2eef5f76..e42917b58e 100644 --- a/src/math/big/int_test.go +++ b/src/math/big/int_test.go @@ -675,6 +675,37 @@ func checkGcd(aBytes, bBytes []byte) bool { return x.Cmp(d) == 0 } +// euclidGCD is a reference implementation of Euclid's GCD +// algorithm for testing against optimized algorithms. +// Requirements: a, b > 0 +func euclidGCD(a, b *Int) *Int { + + A := new(Int).Set(a) + B := new(Int).Set(b) + t := new(Int) + + for len(B.abs) > 0 { + // A, B = B, A % B + t.Rem(A, B) + A, B, t = B, t, A + } + return A +} + +func checkLehmerGcd(aBytes, bBytes []byte) bool { + a := new(Int).SetBytes(aBytes) + b := new(Int).SetBytes(bBytes) + + if a.Sign() <= 0 || b.Sign() <= 0 { + return true // can only test positive arguments + } + + d := new(Int).lehmerGCD(a, b) + d0 := euclidGCD(a, b) + + return d.Cmp(d0) == 0 +} + var gcdTests = []struct { d, x, y, a, b string }{ @@ -708,38 +739,28 @@ func testGcd(t *testing.T, d, x, y, a, b *Int) { D := new(Int).GCD(X, Y, a, b) if D.Cmp(d) != 0 { - t.Errorf("GCD(%s, %s): got d = %s, want %s", a, b, D, d) + t.Errorf("GCD(%s, %s, %s, %s): got d = %s, want %s", x, y, a, b, D, d) } if x != nil && X.Cmp(x) != 0 { - t.Errorf("GCD(%s, %s): got x = %s, want %s", a, b, X, x) + t.Errorf("GCD(%s, %s, %s, %s): got x = %s, want %s", x, y, a, b, X, x) } if y != nil && Y.Cmp(y) != 0 { - t.Errorf("GCD(%s, %s): got y = %s, want %s", a, b, Y, y) - } - - // binaryGCD requires a > 0 && b > 0 - if a.Sign() <= 0 || b.Sign() <= 0 { - return - } - - D.binaryGCD(a, b) - if D.Cmp(d) != 0 { - t.Errorf("binaryGcd(%s, %s): got d = %s, want %s", a, b, D, d) + t.Errorf("GCD(%s, %s, %s, %s): got y = %s, want %s", x, y, a, b, Y, y) } // check results in presence of aliasing (issue #11284) a2 := new(Int).Set(a) b2 := new(Int).Set(b) - a2.binaryGCD(a2, b2) // result is same as 1st argument + a2.GCD(X, Y, a2, b2) // result is same as 1st argument if a2.Cmp(d) != 0 { - t.Errorf("binaryGcd(%s, %s): got d = %s, want %s", a, b, a2, d) + t.Errorf("aliased z = a GCD(%s, %s, %s, %s): got d = %s, want %s", x, y, a, b, a2, d) } a2 = new(Int).Set(a) b2 = new(Int).Set(b) - b2.binaryGCD(a2, b2) // result is same as 2nd argument + b2.GCD(X, Y, a2, b2) // result is same as 2nd argument if b2.Cmp(d) != 0 { - t.Errorf("binaryGcd(%s, %s): got d = %s, want %s", a, b, b2, d) + t.Errorf("aliased z = b GCD(%s, %s, %s, %s): got d = %s, want %s", x, y, a, b, b2, d) } } @@ -760,6 +781,10 @@ func TestGcd(t *testing.T) { if err := quick.Check(checkGcd, nil); err != nil { t.Error(err) } + + if err := quick.Check(checkLehmerGcd, nil); err != nil { + t.Error(err) + } } type intShiftTest struct { diff --git a/src/math/big/rat.go b/src/math/big/rat.go index f0f436e452..b33fc696ad 100644 --- a/src/math/big/rat.go +++ b/src/math/big/rat.go @@ -422,7 +422,7 @@ func (z *Rat) norm() *Rat { neg := z.a.neg z.a.neg = false z.b.neg = false - if f := NewInt(0).binaryGCD(&z.a, &z.b); f.Cmp(intOne) != 0 { + if f := NewInt(0).lehmerGCD(&z.a, &z.b); f.Cmp(intOne) != 0 { z.a.abs, _ = z.a.abs.div(nil, z.a.abs, f.abs) z.b.abs, _ = z.b.abs.div(nil, z.b.abs, f.abs) if z.b.abs.cmp(natOne) == 0 {