math/big: implement Lehmer's extended GCD algorithm

author Brian Kessler <brian.m.kessler@gmail.com>

Tue, 29 Aug 2017 05:38:39 +0000 (22:38 -0700)

committer Robert Griesemer <gri@golang.org>

Tue, 8 May 2018 17:24:36 +0000 (17:24 +0000)
author Brian Kessler <brian.m.kessler@gmail.com>
Tue, 29 Aug 2017 05:38:39 +0000 (22:38 -0700)
committer Robert Griesemer <gri@golang.org>
Tue, 8 May 2018 17:24:36 +0000 (17:24 +0000)
diff --git a/src/math/big/int.go b/src/math/big/int.go

index caebde92fa847a2a88c98da2e9400dd0888de025..b1d09cdad803bdc5fd0f705de5eae1e7f3b505a6 100644 (file)
--- a/src/math/big/int.go
+++ b/src/math/big/int.go
@@ -485,162 +485,223 @@ func (z *Int) GCD(x, y, a, b *Int) *Int {
                 }
                 return z
         }
-       if x == nil && y == nil {
-               return z.lehmerGCD(a, b)
-       }
  
-       A := new(Int).Set(a)
-       B := new(Int).Set(b)
+       return z.lehmerGCD(x, y, a, b)
+}
  
-       X := new(Int)
-       lastX := new(Int).SetInt64(1)
+// lehmerSimulate attempts to simulate several Euclidean update steps
+// using the leading digits of A and B.  It returns u0, u1, v0, v1
+// such that A and B can be updated as:
+//             A = u0*A + v0*B
+//             B = u1*A + v1*B
+// Requirements: A >= B and len(B.abs) >= 2
+// Since we are calculating with full words to avoid overflow,
+// we use 'even' to track the sign of the cosequences.
+// For even iterations: u0, v1 >= 0 && u1, v0 <= 0
+// For odd  iterations: u0, v1 <= 0 && u1, v0 >= 0
+func lehmerSimulate(A, B *Int) (u0, u1, v0, v1 Word, even bool) {
+       // initialize the digits
+       var a1, a2, u2, v2 Word
+
+       m := len(B.abs) // m >= 2
+       n := len(A.abs) // n >= m >= 2
+
+       // extract the top Word of bits from A and B
+       h := nlz(A.abs[n-1])
+       a1 = A.abs[n-1]<<h | A.abs[n-2]>>(_W-h)
+       // B may have implicit zero words in the high bits if the lengths differ
+       switch {
+       case n == m:
+               a2 = B.abs[n-1]<<h | B.abs[n-2]>>(_W-h)
+       case n == m+1:
+               a2 = B.abs[n-2] >> (_W - h)
+       default:
+               a2 = 0
+       }
  
-       q := new(Int)
-       temp := new(Int)
+       // Since we are calculating with full words to avoid overflow,
+       // we use 'even' to track the sign of the cosequences.
+       // For even iterations: u0, v1 >= 0 && u1, v0 <= 0
+       // For odd  iterations: u0, v1 <= 0 && u1, v0 >= 0
+       // The first iteration starts with k=1 (odd).
+       even = false
+       // variables to track the cosequences
+       u0, u1, u2 = 0, 1, 0
+       v0, v1, v2 = 0, 0, 1
+
+       // Calculate the quotient and cosequences using Collins' stopping condition.
+       // Note that overflow of a Word is not possible when computing the remainder
+       // sequence and cosequences since the cosequence size is bounded by the input size.
+       // See section 4.2 of Jebelean for details.
+       for a2 >= v2 && a1-a2 >= v1+v2 {
+               q, r := a1/a2, a1%a2
+               a1, a2 = a2, r
+               u0, u1, u2 = u1, u2, u1+q*u2
+               v0, v1, v2 = v1, v2, v1+q*v2
+               even = !even
+       }
+       return
+}
  
-       r := new(Int)
-       for len(B.abs) > 0 {
-               q, r = q.QuoRem(A, B, r)
+// lehmerUpdate updates the inputs A and B such that:
+//             A = u0*A + v0*B
+//             B = u1*A + v1*B
+// where the signs of u0, u1, v0, v1 are given by even
+// For even == true: u0, v1 >= 0 && u1, v0 <= 0
+// For even == false: u0, v1 <= 0 && u1, v0 >= 0
+// q, r, s, t are temporary variables to avoid allocations in the multiplication
+func lehmerUpdate(A, B, q, r, s, t *Int, u0, u1, v0, v1 Word, even bool) {
+
+       t.abs = t.abs.setWord(u0)
+       s.abs = s.abs.setWord(v0)
+       t.neg = !even
+       s.neg = even
+
+       t.Mul(A, t)
+       s.Mul(B, s)
+
+       r.abs = r.abs.setWord(u1)
+       q.abs = q.abs.setWord(v1)
+       r.neg = even
+       q.neg = !even
+
+       r.Mul(A, r)
+       q.Mul(B, q)
+
+       A.Add(t, s)
+       B.Add(r, q)
+}
  
-               A, B, r = B, r, A
+// euclidUpdate performs a single step of the Euclidean GCD algorithm
+// if extended is true, it also updates the cosequence Ua, Ub
+func euclidUpdate(A, B, Ua, Ub, q, r, s, t *Int, extended bool) {
+       q, r = q.QuoRem(A, B, r)
  
-               temp.Set(X)
-               X.Mul(X, q)
-               X.Sub(lastX, X)
-               lastX.Set(temp)
-       }
+       *A, *B, *r = *B, *r, *A
  
-       if x != nil {
-               *x = *lastX
+       if extended {
+               // Ua, Ub = Ub, Ua - q*Ub
+               t.Set(Ub)
+               s.Mul(Ub, q)
+               Ub.Sub(Ua, s)
+               Ua.Set(t)
         }
-
-       if y != nil {
-               // y = (z - a*x)/b
-               y.Mul(a, lastX)
-               y.Sub(A, y)
-               y.Div(y, b)
-       }
-
-       *z = *A
-       return z
  }
  
  // lehmerGCD sets z to the greatest common divisor of a and b,
  // which both must be > 0, and returns z.
+// If x or y are not nil, their values are set such that z = a*x + b*y.
  // See Knuth, The Art of Computer Programming, Vol. 2, Section 4.5.2, Algorithm L.
  // This implementation uses the improved condition by Collins requiring only one
  // quotient and avoiding the possibility of single Word overflow.
  // See Jebelean, "Improving the multiprecision Euclidean algorithm",
  // Design and Implementation of Symbolic Computation Systems, pp 45-58.
-func (z *Int) lehmerGCD(a, b *Int) *Int {
-       // ensure a >= b
-       if a.abs.cmp(b.abs) < 0 {
-               a, b = b, a
-       }
+// The cosequences are updated according to Algorithm 10.45 from
+// Cohen et al. "Handbook of Elliptic and Hyperelliptic Curve Cryptography" pp 192.
+func (z *Int) lehmerGCD(x, y, a, b *Int) *Int {
+       var A, B, Ua, Ub *Int
+
+       A = new(Int).Set(a)
+       B = new(Int).Set(b)
+
+       extended := x != nil || y != nil
  
-       // don't destroy incoming values of a and b
-       B := new(Int).Set(b) // must be set first in case b is an alias of z
-       A := z.Set(a)
+       if extended {
+               // Ua (Ub) tracks how many times input a has been accumulated into A (B).
+               Ua = new(Int).SetInt64(1)
+               Ub = new(Int)
+       }
  
         // temp variables for multiprecision update
-       t := new(Int)
+       q := new(Int)
         r := new(Int)
         s := new(Int)
-       w := new(Int)
+       t := new(Int)
+
+       // ensure A >= B
+       if A.abs.cmp(B.abs) < 0 {
+               A, B = B, A
+               Ub, Ua = Ua, Ub
+       }
  
         // loop invariant A >= B
         for len(B.abs) > 1 {
-               // initialize the digits
-               var a1, a2, u0, u1, u2, v0, v1, v2 Word
-
-               m := len(B.abs) // m >= 2
-               n := len(A.abs) // n >= m >= 2
-
-               // extract the top Word of bits from A and B
-               h := nlz(A.abs[n-1])
-               a1 = (A.abs[n-1] << h) | (A.abs[n-2] >> (_W - h))
-               // B may have implicit zero words in the high bits if the lengths differ
-               switch {
-               case n == m:
-                       a2 = (B.abs[n-1] << h) | (B.abs[n-2] >> (_W - h))
-               case n == m+1:
-                       a2 = (B.abs[n-2] >> (_W - h))
-               default:
-                       a2 = 0
-               }
+               // Attempt to calculate in single-precision using leading words of A and B.
+               u0, u1, v0, v1, even := lehmerSimulate(A, B)
  
-               // Since we are calculating with full words to avoid overflow,
-               // we use 'even' to track the sign of the cosequences.
-               // For even iterations: u0, v1 >= 0 && u1, v0 <= 0
-               // For odd  iterations: u0, v1 <= 0 && u1, v0 >= 0
-               // The first iteration starts with k=1 (odd).
-               even := false
-               // variables to track the cosequences
-               u0, u1, u2 = 0, 1, 0
-               v0, v1, v2 = 0, 0, 1
-
-               // Calculate the quotient and cosequences using Collins' stopping condition.
-               // Note that overflow of a Word is not possible when computing the remainder
-               // sequence and cosequences since the cosequence size is bounded by the input size.
-               // See section 4.2 of Jebelean for details.
-               for a2 >= v2 && a1-a2 >= v1+v2 {
-                       q := a1 / a2
-                       a1, a2 = a2, a1-q*a2
-                       u0, u1, u2 = u1, u2, u1+q*u2
-                       v0, v1, v2 = v1, v2, v1+q*v2
-                       even = !even
-               }
-
-               // multiprecision step
+               // multiprecision Step
                 if v0 != 0 {
-                       // simulate the effect of the single precision steps using the cosequences
+                       // Simulate the effect of the single-precision steps using the cosequences.
                         // A = u0*A + v0*B
                         // B = u1*A + v1*B
+                       lehmerUpdate(A, B, q, r, s, t, u0, u1, v0, v1, even)
  
-                       t.abs = t.abs.setWord(u0)
-                       s.abs = s.abs.setWord(v0)
-                       t.neg = !even
-                       s.neg = even
-
-                       t.Mul(A, t)
-                       s.Mul(B, s)
-
-                       r.abs = r.abs.setWord(u1)
-                       w.abs = w.abs.setWord(v1)
-                       r.neg = even
-                       w.neg = !even
-
-                       r.Mul(A, r)
-                       w.Mul(B, w)
-
-                       A.Add(t, s)
-                       B.Add(r, w)
+                       if extended {
+                               // Ua = u0*Ua + v0*Ub
+                               // Ub = u1*Ua + v1*Ub
+                               lehmerUpdate(Ua, Ub, q, r, s, t, u0, u1, v0, v1, even)
+                       }
  
                 } else {
-                       // single-digit calculations failed to simulate any quotients
-                       // do a standard Euclidean step
-                       t.Rem(A, B)
-                       A, B, t = B, t, A
+                       // Single-digit calculations failed to simulate any quotients.
+                       // Do a standard Euclidean step.
+                       euclidUpdate(A, B, Ua, Ub, q, r, s, t, extended)
                 }
         }
  
         if len(B.abs) > 0 {
-               // standard Euclidean algorithm base case for B a single Word
+               // extended Euclidean algorithm base case if B is a single Word
                 if len(A.abs) > 1 {
-                       // A is longer than a single Word
-                       t.Rem(A, B)
-                       A, B, t = B, t, A
+                       // A is longer than a single Word, so one update is needed.
+                       euclidUpdate(A, B, Ua, Ub, q, r, s, t, extended)
                 }
                 if len(B.abs) > 0 {
-                       // A and B are both a single Word
-                       a1, a2 := A.abs[0], B.abs[0]
-                       for a2 != 0 {
-                               a1, a2 = a2, a1%a2
+                       // A and B are both a single Word.
+                       aWord, bWord := A.abs[0], B.abs[0]
+                       if extended {
+                               var ua, ub, va, vb Word
+                               ua, ub = 1, 0
+                               va, vb = 0, 1
+                               even := true
+                               for bWord != 0 {
+                                       q, r := aWord/bWord, aWord%bWord
+                                       aWord, bWord = bWord, r
+                                       ua, ub = ub, ua+q*ub
+                                       va, vb = vb, va+q*vb
+                                       even = !even
+                               }
+
+                               t.abs = t.abs.setWord(ua)
+                               s.abs = s.abs.setWord(va)
+                               t.neg = !even
+                               s.neg = even
+
+                               t.Mul(Ua, t)
+                               s.Mul(Ub, s)
+
+                               Ua.Add(t, s)
+                       } else {
+                               for bWord != 0 {
+                                       aWord, bWord = bWord, aWord%bWord
+                               }
                         }
-                       A.abs[0] = a1
+                       A.abs[0] = aWord
                 }
         }
+
+       if x != nil {
+               *x = *Ua
+       }
+
+       if y != nil {
+               // y = (z - a*x)/b
+               y.Mul(a, Ua)
+               y.Sub(A, y)
+               y.Div(y, b)
+       }
+
         *z = *A
+
         return z
  }
  
diff --git a/src/math/big/int_test.go b/src/math/big/int_test.go

index b660d535235dc6d750fb445a16d3c14252761c56..1ef4d150b8aad3603ecd4c8b6db9023136bd7834 100644 (file)
--- a/src/math/big/int_test.go
+++ b/src/math/big/int_test.go
@@ -675,21 +675,43 @@ func checkGcd(aBytes, bBytes []byte) bool {
         return x.Cmp(d) == 0
  }
  
-// euclidGCD is a reference implementation of Euclid's GCD
-// algorithm for testing against optimized algorithms.
+// euclidExtGCD is a reference implementation of Euclid's
+// extended GCD algorithm for testing against optimized algorithms.
  // Requirements: a, b > 0
-func euclidGCD(a, b *Int) *Int {
-
+func euclidExtGCD(a, b *Int) (g, x, y *Int) {
         A := new(Int).Set(a)
         B := new(Int).Set(b)
-       t := new(Int)
  
+       // A = Ua*a + Va*b
+       // B = Ub*a + Vb*b
+       Ua := new(Int).SetInt64(1)
+       Va := new(Int)
+
+       Ub := new(Int)
+       Vb := new(Int).SetInt64(1)
+
+       q := new(Int)
+       temp := new(Int)
+
+       r := new(Int)
         for len(B.abs) > 0 {
-               // A, B = B, A % B
-               t.Rem(A, B)
-               A, B, t = B, t, A
+               q, r = q.QuoRem(A, B, r)
+
+               A, B, r = B, r, A
+
+               // Ua, Ub = Ub, Ua-q*Ub
+               temp.Set(Ub)
+               Ub.Mul(Ub, q)
+               Ub.Sub(Ua, Ub)
+               Ua.Set(temp)
+
+               // Va, Vb = Vb, Va-q*Vb
+               temp.Set(Vb)
+               Vb.Mul(Vb, q)
+               Vb.Sub(Va, Vb)
+               Va.Set(temp)
         }
-       return A
+       return A, Ua, Va
  }
  
  func checkLehmerGcd(aBytes, bBytes []byte) bool {
@@ -700,12 +722,28 @@ func checkLehmerGcd(aBytes, bBytes []byte) bool {
                 return true // can only test positive arguments
         }
  
-       d := new(Int).lehmerGCD(a, b)
-       d0 := euclidGCD(a, b)
+       d := new(Int).lehmerGCD(nil, nil, a, b)
+       d0, _, _ := euclidExtGCD(a, b)
  
         return d.Cmp(d0) == 0
  }
  
+func checkLehmerExtGcd(aBytes, bBytes []byte) bool {
+       a := new(Int).SetBytes(aBytes)
+       b := new(Int).SetBytes(bBytes)
+       x := new(Int)
+       y := new(Int)
+
+       if a.Sign() <= 0 || b.Sign() <= 0 {
+               return true // can only test positive arguments
+       }
+
+       d := new(Int).lehmerGCD(x, y, a, b)
+       d0, x0, y0 := euclidExtGCD(a, b)
+
+       return d.Cmp(d0) == 0 && x.Cmp(x0) == 0 && y.Cmp(y0) == 0
+}
+
  var gcdTests = []struct {
         d, x, y, a, b string
  }{
@@ -785,6 +823,10 @@ func TestGcd(t *testing.T) {
         if err := quick.Check(checkLehmerGcd, nil); err != nil {
                 t.Error(err)
         }
+
+       if err := quick.Check(checkLehmerExtGcd, nil); err != nil {
+               t.Error(err)
+       }
  }
  
  type intShiftTest struct {
diff --git a/src/math/big/rat.go b/src/math/big/rat.go

index b33fc696adf5392a2fa5d18a9190a02f90adc787..46d58fcf365d29c501a0340538989caec9c9bedc 100644 (file)
--- a/src/math/big/rat.go
+++ b/src/math/big/rat.go
@@ -422,7 +422,7 @@ func (z *Rat) norm() *Rat {
                 neg := z.a.neg
                 z.a.neg = false
                 z.b.neg = false
-               if f := NewInt(0).lehmerGCD(&z.a, &z.b); f.Cmp(intOne) != 0 {
+               if f := NewInt(0).lehmerGCD(nil, nil, &z.a, &z.b); f.Cmp(intOne) != 0 {
                         z.a.abs, _ = z.a.abs.div(nil, z.a.abs, f.abs)
                         z.b.abs, _ = z.b.abs.div(nil, z.b.abs, f.abs)
                         if z.b.abs.cmp(natOne) == 0 {
author	Brian Kessler <brian.m.kessler@gmail.com>
	Tue, 29 Aug 2017 05:38:39 +0000 (22:38 -0700)
committer	Robert Griesemer <gri@golang.org>
	Tue, 8 May 2018 17:24:36 +0000 (17:24 +0000)
src/math/big/int.go		patch \| blob \| history
src/math/big/int_test.go		patch \| blob \| history
src/math/big/rat.go		patch \| blob \| history