From: Robert Griesemer <gri@golang.org>
Date: Fri, 22 May 2015 01:13:44 +0000 (-0700)
Subject: cmd/compile/internal/big: update to latest version (run sh vendor.bash)
X-Git-Tag: go1.5beta1~490
X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=db0594b633525e5b88e1b508019edaeb1b44ed2f;p=gostls13.git

cmd/compile/internal/big: update to latest version (run sh vendor.bash)

No manual code changes.

This will permit addressing the compiler aspect of issue #10321 in a
subsequent change.

Change-Id: I3376dc38cafa0ec98bf54de33293015d0183cc82
Reviewed-on: https://go-review.googlesource.com/10354
Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
---

diff --git a/src/cmd/compile/internal/big/arith.go b/src/cmd/compile/internal/big/arith.go
index 328c85c4f7..1ff6349d9d 100644
--- a/src/cmd/compile/internal/big/arith.go
+++ b/src/cmd/compile/internal/big/arith.go
@@ -196,7 +196,6 @@ func subVV_g(z, x, y []Word) (c Word) {
 	return
 }
 
-// Argument y must be either 0 or 1.
 // The resulting carry c is either 0 or 1.
 func addVW_g(z, x []Word, y Word) (c Word) {
 	if use_addWW_g {
diff --git a/src/cmd/compile/internal/big/arith_test.go b/src/cmd/compile/internal/big/arith_test.go
index cd92dd7173..f46a494f17 100644
--- a/src/cmd/compile/internal/big/arith_test.go
+++ b/src/cmd/compile/internal/big/arith_test.go
@@ -155,6 +155,7 @@ var sumVW = []argVW{
 	{nat{1}, nat{1}, 0, 0},
 	{nat{0}, nat{_M}, 1, 1},
 	{nat{0, 0, 0, 0}, nat{_M, _M, _M, _M}, 1, 1},
+	{nat{585}, nat{314}, 271, 0},
 }
 
 var prodVW = []argVW{
diff --git a/src/cmd/compile/internal/big/float.go b/src/cmd/compile/internal/big/float.go
index ed55e8e513..dcb72c5754 100644
--- a/src/cmd/compile/internal/big/float.go
+++ b/src/cmd/compile/internal/big/float.go
@@ -65,12 +65,16 @@ type Float struct {
 	exp  int32
 }
 
-// Float operations that would lead to a NaN under IEEE-754 rules cause
-// a run-time panic of ErrNaN type.
+// An ErrNaN panic is raised by a Float operation that would lead to
+// a NaN under IEEE-754 rules. An ErrNaN implements the error interface.
 type ErrNaN struct {
 	msg string
 }
 
+func (err ErrNaN) Error() string {
+	return err.msg
+}
+
 // NewFloat allocates and returns a new Float set to x,
 // with precision 53 and rounding mode ToNearestEven.
 // NewFloat panics with ErrNaN if x is a NaN.
@@ -849,9 +853,6 @@ func (x *Float) Int64() (int64, Accuracy) {
 	panic("unreachable")
 }
 
-// TODO(gri) Float32 and Float64 are very similar internally but for the
-// floatxx parameters and some conversions. Should factor out shared code.
-
 // Float32 returns the float32 value nearest to x. If x is too small to be
 // represented by a float32 (|x| < math.SmallestNonzeroFloat32), the result
 // is (0, Below) or (-0, Above), respectively, depending on the sign of x.
@@ -876,64 +877,70 @@ func (x *Float) Float32() (float32, Accuracy) {
 			emax  = bias              //   127  largest unbiased exponent (normal)
 		)
 
-		// Float mantissae m have an explicit msb and are in the range 0.5 <= m < 1.0.
-		// floatxx mantissae have an implicit msb and are in the range 1.0 <= m < 2.0.
-		// For a given mantissa m, we need to add 1 to a floatxx exponent to get the
-		// corresponding Float exponent.
-		// (see also implementation of math.Ldexp for similar code)
-
-		if x.exp < dmin+1 {
-			// underflow
-			if x.neg {
-				var z float32
-				return -z, Above
+		// Float mantissa m is 0.5 <= m < 1.0; compute exponent for floatxx mantissa.
+		e := x.exp - 1 // exponent for mantissa m with 1.0 <= m < 2.0
+		p := mbits + 1 // precision of normal float
+
+		// If the exponent is too small, we may have a denormal number
+		// in which case we have fewer mantissa bits available: reduce
+		// precision accordingly.
+		if e < emin {
+			p -= emin - int(e)
+			// Make sure we have at least 1 bit so that we don't
+			// lose numbers rounded up to the smallest denormal.
+			if p < 1 {
+				p = 1
 			}
-			return 0.0, Below
 		}
-		// x.exp >= dmin+1
 
+		// round
 		var r Float
-		r.prec = mbits + 1 // +1 for implicit msb
-		if x.exp < emin+1 {
-			// denormal number - round to fewer bits
-			r.prec = uint32(x.exp - dmin)
-		}
+		r.prec = uint32(p)
 		r.Set(x)
+		e = r.exp - 1
 
 		// Rounding may have caused r to overflow to Â±Inf
 		// (rounding never causes underflows to 0).
 		if r.form == inf {
-			r.exp = emax + 2 // cause overflow below
+			e = emax + 1 // cause overflow below
 		}
 
-		if r.exp > emax+1 {
+		// If the exponent is too large, overflow to Â±Inf.
+		if e > emax {
 			// overflow
 			if x.neg {
 				return float32(math.Inf(-1)), Below
 			}
 			return float32(math.Inf(+1)), Above
 		}
-		// dmin+1 <= r.exp <= emax+1
 
-		var s uint32
-		if r.neg {
-			s = 1 << (fbits - 1)
+		// Determine sign, biased exponent, and mantissa.
+		var sign, bexp, mant uint32
+		if x.neg {
+			sign = 1 << (fbits - 1)
 		}
 
-		m := high32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
-
 		// Rounding may have caused a denormal number to
 		// become normal. Check again.
-		c := float32(1.0)
-		if r.exp < emin+1 {
+		if e < emin {
 			// denormal number
-			r.exp += mbits
-			c = 1.0 / (1 << mbits) // 2**-mbits
+			if e < dmin {
+				// underflow to Â±0
+				if x.neg {
+					var z float32
+					return -z, Above
+				}
+				return 0.0, Below
+			}
+			// bexp = 0
+			mant = high32(r.mant) >> (fbits - r.prec)
+		} else {
+			// normal number: emin <= e <= emax
+			bexp = uint32(e+bias) << mbits
+			mant = high32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
 		}
-		// emin+1 <= r.exp <= emax+1
-		e := uint32(r.exp-emin) << mbits
 
-		return c * math.Float32frombits(s|e|m), r.acc
+		return math.Float32frombits(sign | bexp | mant), r.acc
 
 	case zero:
 		if x.neg {
@@ -976,64 +983,70 @@ func (x *Float) Float64() (float64, Accuracy) {
 			emax  = bias              //  1023  largest unbiased exponent (normal)
 		)
 
-		// Float mantissae m have an explicit msb and are in the range 0.5 <= m < 1.0.
-		// floatxx mantissae have an implicit msb and are in the range 1.0 <= m < 2.0.
-		// For a given mantissa m, we need to add 1 to a floatxx exponent to get the
-		// corresponding Float exponent.
-		// (see also implementation of math.Ldexp for similar code)
-
-		if x.exp < dmin+1 {
-			// underflow
-			if x.neg {
-				var z float64
-				return -z, Above
+		// Float mantissa m is 0.5 <= m < 1.0; compute exponent for floatxx mantissa.
+		e := x.exp - 1 // exponent for mantissa m with 1.0 <= m < 2.0
+		p := mbits + 1 // precision of normal float
+
+		// If the exponent is too small, we may have a denormal number
+		// in which case we have fewer mantissa bits available: reduce
+		// precision accordingly.
+		if e < emin {
+			p -= emin - int(e)
+			// Make sure we have at least 1 bit so that we don't
+			// lose numbers rounded up to the smallest denormal.
+			if p < 1 {
+				p = 1
 			}
-			return 0.0, Below
 		}
-		// x.exp >= dmin+1
 
+		// round
 		var r Float
-		r.prec = mbits + 1 // +1 for implicit msb
-		if x.exp < emin+1 {
-			// denormal number - round to fewer bits
-			r.prec = uint32(x.exp - dmin)
-		}
+		r.prec = uint32(p)
 		r.Set(x)
+		e = r.exp - 1
 
 		// Rounding may have caused r to overflow to Â±Inf
 		// (rounding never causes underflows to 0).
 		if r.form == inf {
-			r.exp = emax + 2 // cause overflow below
+			e = emax + 1 // cause overflow below
 		}
 
-		if r.exp > emax+1 {
+		// If the exponent is too large, overflow to Â±Inf.
+		if e > emax {
 			// overflow
 			if x.neg {
 				return math.Inf(-1), Below
 			}
 			return math.Inf(+1), Above
 		}
-		// dmin+1 <= r.exp <= emax+1
 
-		var s uint64
-		if r.neg {
-			s = 1 << (fbits - 1)
+		// Determine sign, biased exponent, and mantissa.
+		var sign, bexp, mant uint64
+		if x.neg {
+			sign = 1 << (fbits - 1)
 		}
 
-		m := high64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
-
 		// Rounding may have caused a denormal number to
 		// become normal. Check again.
-		c := 1.0
-		if r.exp < emin+1 {
+		if e < emin {
 			// denormal number
-			r.exp += mbits
-			c = 1.0 / (1 << mbits) // 2**-mbits
+			if e < dmin {
+				// underflow to Â±0
+				if x.neg {
+					var z float64
+					return -z, Above
+				}
+				return 0.0, Below
+			}
+			// bexp = 0
+			mant = high64(r.mant) >> (fbits - r.prec)
+		} else {
+			// normal number: emin <= e <= emax
+			bexp = uint64(e+bias) << mbits
+			mant = high64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
 		}
-		// emin+1 <= r.exp <= emax+1
-		e := uint64(r.exp-emin) << mbits
 
-		return c * math.Float64frombits(s|e|m), r.acc
+		return math.Float64frombits(sign | bexp | mant), r.acc
 
 	case zero:
 		if x.neg {
diff --git a/src/cmd/compile/internal/big/float_test.go b/src/cmd/compile/internal/big/float_test.go
index de79b07aaf..8bd3a9c8c9 100644
--- a/src/cmd/compile/internal/big/float_test.go
+++ b/src/cmd/compile/internal/big/float_test.go
@@ -12,6 +12,9 @@ import (
 	"testing"
 )
 
+// Verify that ErrNaN implements the error interface.
+var _ error = ErrNaN{}
+
 func (x *Float) uint64() uint64 {
 	u, acc := x.Uint64()
 	if acc != Exact {
@@ -200,6 +203,18 @@ func alike(x, y *Float) bool {
 	return x.Cmp(y) == 0 && x.Signbit() == y.Signbit()
 }
 
+func alike32(x, y float32) bool {
+	// we can ignore NaNs
+	return x == y && math.Signbit(float64(x)) == math.Signbit(float64(y))
+
+}
+
+func alike64(x, y float64) bool {
+	// we can ignore NaNs
+	return x == y && math.Signbit(x) == math.Signbit(y)
+
+}
+
 func TestFloatMantExp(t *testing.T) {
 	for _, test := range []struct {
 		x    string
@@ -825,52 +840,69 @@ func TestFloatFloat32(t *testing.T) {
 		out float32
 		acc Accuracy
 	}{
-		{"-Inf", float32(math.Inf(-1)), Exact},
-		{"-0x1.ffffff0p2147483646", float32(-math.Inf(+1)), Below}, // overflow in rounding
-		{"-1e10000", float32(math.Inf(-1)), Below},                 // overflow
-		{"-0x1p128", float32(math.Inf(-1)), Below},                 // overflow
-		{"-0x1.ffffff0p127", float32(-math.Inf(+1)), Below},        // overflow
-		{"-0x1.fffffe8p127", -math.MaxFloat32, Above},
-		{"-0x1.fffffe0p127", -math.MaxFloat32, Exact},
-		{"-12345.000000000000000000001", -12345, Above},
-		{"-12345.0", -12345, Exact},
-		{"-1.000000000000000000001", -1, Above},
-		{"-1", -1, Exact},
-		{"-0x0.000002p-126", -math.SmallestNonzeroFloat32, Exact},
-		{"-0x0.000002p-127", -0, Above}, // underflow
-		{"-1e-1000", -0, Above},         // underflow
 		{"0", 0, Exact},
-		{"1e-1000", 0, Below},         // underflow
-		{"0x0.000002p-127", 0, Below}, // underflow
-		{"0x0.000002p-126", math.SmallestNonzeroFloat32, Exact},
+
+		// underflow
+		{"1e-1000", 0, Below},
+		{"0x0.000002p-127", 0, Below},
+		{"0x.0000010p-126", 0, Below},
+
+		// denormals
+		{"1.401298464e-45", math.SmallestNonzeroFloat32, Above}, // rounded up to smallest denormal
+		{"0x.ffffff8p-149", math.SmallestNonzeroFloat32, Above}, // rounded up to smallest denormal
+		{"0x.0000018p-126", math.SmallestNonzeroFloat32, Above}, // rounded up to smallest denormal
+		{"0x.0000020p-126", math.SmallestNonzeroFloat32, Exact},
+		{"0x.8p-148", math.SmallestNonzeroFloat32, Exact},
+		{"1p-149", math.SmallestNonzeroFloat32, Exact},
+		{"0x.fffffep-126", math.Float32frombits(0x7fffff), Exact}, // largest denormal
+
+		// normals
+		{"0x.ffffffp-126", math.Float32frombits(0x00800000), Above}, // rounded up to smallest normal
+		{"1p-126", math.Float32frombits(0x00800000), Exact},         // smallest normal
+		{"0x1.fffffep-126", math.Float32frombits(0x00ffffff), Exact},
+		{"0x1.ffffffp-126", math.Float32frombits(0x01000000), Above}, // rounded up
 		{"1", 1, Exact},
 		{"1.000000000000000000001", 1, Below},
 		{"12345.0", 12345, Exact},
 		{"12345.000000000000000000001", 12345, Below},
 		{"0x1.fffffe0p127", math.MaxFloat32, Exact},
 		{"0x1.fffffe8p127", math.MaxFloat32, Below},
-		{"0x1.ffffff0p127", float32(math.Inf(+1)), Above},        // overflow
-		{"0x1p128", float32(math.Inf(+1)), Above},                // overflow
-		{"1e10000", float32(math.Inf(+1)), Above},                // overflow
+
+		// overflow
+		{"0x1.ffffff0p127", float32(math.Inf(+1)), Above},
+		{"0x1p128", float32(math.Inf(+1)), Above},
+		{"1e10000", float32(math.Inf(+1)), Above},
 		{"0x1.ffffff0p2147483646", float32(math.Inf(+1)), Above}, // overflow in rounding
-		{"+Inf", float32(math.Inf(+1)), Exact},
+
+		// inf
+		{"Inf", float32(math.Inf(+1)), Exact},
 	} {
-		// conversion should match strconv where syntax is agreeable
-		if f, err := strconv.ParseFloat(test.x, 32); err == nil && float32(f) != test.out {
-			t.Errorf("%s: got %g; want %g (incorrect test data)", test.x, f, test.out)
-		}
+		for i := 0; i < 2; i++ {
+			// test both signs
+			tx, tout, tacc := test.x, test.out, test.acc
+			if i != 0 {
+				tx = "-" + tx
+				tout = -tout
+				tacc = -tacc
+			}
 
-		x := makeFloat(test.x)
-		out, acc := x.Float32()
-		if out != test.out || acc != test.acc {
-			t.Errorf("%s: got %g (%#x, %s); want %g (%#x, %s)", test.x, out, math.Float32bits(out), acc, test.out, math.Float32bits(test.out), test.acc)
-		}
+			// conversion should match strconv where syntax is agreeable
+			if f, err := strconv.ParseFloat(tx, 32); err == nil && !alike32(float32(f), tout) {
+				t.Errorf("%s: got %g; want %g (incorrect test data)", tx, f, tout)
+			}
+
+			x := makeFloat(tx)
+			out, acc := x.Float32()
+			if !alike32(out, tout) || acc != tacc {
+				t.Errorf("%s: got %g (%#x, %s); want %g (%#x, %s)", tx, out, math.Float32bits(out), acc, test.out, math.Float32bits(test.out), tacc)
+			}
 
-		// test that x.SetFloat64(float64(f)).Float32() == f
-		var x2 Float
-		out2, acc2 := x2.SetFloat64(float64(out)).Float32()
-		if out2 != out || acc2 != Exact {
-			t.Errorf("idempotency test: got %g (%s); want %g (Exact)", out2, acc2, out)
+			// test that x.SetFloat64(float64(f)).Float32() == f
+			var x2 Float
+			out2, acc2 := x2.SetFloat64(float64(out)).Float32()
+			if !alike32(out2, out) || acc2 != Exact {
+				t.Errorf("idempotency test: got %g (%s); want %g (Exact)", out2, acc2, out)
+			}
 		}
 	}
 }
@@ -882,35 +914,36 @@ func TestFloatFloat64(t *testing.T) {
 		out float64
 		acc Accuracy
 	}{
-		{"-Inf", math.Inf(-1), Exact},
-		{"-0x1.fffffffffffff8p2147483646", -math.Inf(+1), Below}, // overflow in rounding
-		{"-1e10000", math.Inf(-1), Below},                        // overflow
-		{"-0x1p1024", math.Inf(-1), Below},                       // overflow
-		{"-0x1.fffffffffffff8p1023", -math.Inf(+1), Below},       // overflow
-		{"-0x1.fffffffffffff4p1023", -math.MaxFloat64, Above},
-		{"-0x1.fffffffffffff0p1023", -math.MaxFloat64, Exact},
-		{"-12345.000000000000000000001", -12345, Above},
-		{"-12345.0", -12345, Exact},
-		{"-1.000000000000000000001", -1, Above},
-		{"-1", -1, Exact},
-		{"-0x0.0000000000001p-1022", -math.SmallestNonzeroFloat64, Exact},
-		{"-0x0.0000000000001p-1023", -0, Above}, // underflow
-		{"-1e-1000", -0, Above},                 // underflow
 		{"0", 0, Exact},
-		{"1e-1000", 0, Below},                 // underflow
-		{"0x0.0000000000001p-1023", 0, Below}, // underflow
-		{"0x0.0000000000001p-1022", math.SmallestNonzeroFloat64, Exact},
+
+		// underflow
+		{"1e-1000", 0, Below},
+		{"0x0.0000000000001p-1023", 0, Below},
+		{"0x0.00000000000008p-1022", 0, Below},
+
+		// denormals
+		{"0x0.0000000000000cp-1022", math.SmallestNonzeroFloat64, Above}, // rounded up to smallest denormal
+		{"0x0.0000000000001p-1022", math.SmallestNonzeroFloat64, Exact},  // smallest denormal
+		{"0x.8p-1073", math.SmallestNonzeroFloat64, Exact},
+		{"1p-1074", math.SmallestNonzeroFloat64, Exact},
+		{"0x.fffffffffffffp-1022", math.Float64frombits(0x000fffffffffffff), Exact}, // largest denormal
+
+		// normals
+		{"0x.fffffffffffff8p-1022", math.Float64frombits(0x0010000000000000), Above}, // rounded up to smallest normal
+		{"1p-1022", math.Float64frombits(0x0010000000000000), Exact},                 // smallest normal
 		{"1", 1, Exact},
 		{"1.000000000000000000001", 1, Below},
 		{"12345.0", 12345, Exact},
 		{"12345.000000000000000000001", 12345, Below},
 		{"0x1.fffffffffffff0p1023", math.MaxFloat64, Exact},
 		{"0x1.fffffffffffff4p1023", math.MaxFloat64, Below},
-		{"0x1.fffffffffffff8p1023", math.Inf(+1), Above},       // overflow
-		{"0x1p1024", math.Inf(+1), Above},                      // overflow
-		{"1e10000", math.Inf(+1), Above},                       // overflow
+
+		// overflow
+		{"0x1.fffffffffffff8p1023", math.Inf(+1), Above},
+		{"0x1p1024", math.Inf(+1), Above},
+		{"1e10000", math.Inf(+1), Above},
 		{"0x1.fffffffffffff8p2147483646", math.Inf(+1), Above}, // overflow in rounding
-		{"+Inf", math.Inf(+1), Exact},
+		{"Inf", math.Inf(+1), Exact},
 
 		// selected denormalized values that were handled incorrectly in the past
 		{"0x.fffffffffffffp-1022", smallestNormalFloat64 - math.SmallestNonzeroFloat64, Exact},
@@ -921,22 +954,32 @@ func TestFloatFloat64(t *testing.T) {
 		// http://www.exploringbinary.com/java-hangs-when-converting-2-2250738585072012e-308/
 		{"2.2250738585072012e-308", 2.2250738585072014e-308, Above},
 	} {
-		// conversion should match strconv where syntax is agreeable
-		if f, err := strconv.ParseFloat(test.x, 64); err == nil && f != test.out {
-			t.Errorf("%s: got %g; want %g (incorrect test data)", test.x, f, test.out)
-		}
+		for i := 0; i < 2; i++ {
+			// test both signs
+			tx, tout, tacc := test.x, test.out, test.acc
+			if i != 0 {
+				tx = "-" + tx
+				tout = -tout
+				tacc = -tacc
+			}
 
-		x := makeFloat(test.x)
-		out, acc := x.Float64()
-		if out != test.out || acc != test.acc {
-			t.Errorf("%s: got %g (%#x, %s); want %g (%#x, %s)", test.x, out, math.Float64bits(out), acc, test.out, math.Float64bits(test.out), test.acc)
-		}
+			// conversion should match strconv where syntax is agreeable
+			if f, err := strconv.ParseFloat(tx, 64); err == nil && !alike64(f, tout) {
+				t.Errorf("%s: got %g; want %g (incorrect test data)", tx, f, tout)
+			}
 
-		// test that x.SetFloat64(f).Float64() == f
-		var x2 Float
-		out2, acc2 := x2.SetFloat64(out).Float64()
-		if out2 != out || acc2 != Exact {
-			t.Errorf("idempotency test: got %g (%s); want %g (Exact)", out2, acc2, out)
+			x := makeFloat(tx)
+			out, acc := x.Float64()
+			if !alike64(out, tout) || acc != tacc {
+				t.Errorf("%s: got %g (%#x, %s); want %g (%#x, %s)", tx, out, math.Float64bits(out), acc, test.out, math.Float64bits(test.out), tacc)
+			}
+
+			// test that x.SetFloat64(f).Float64() == f
+			var x2 Float
+			out2, acc2 := x2.SetFloat64(out).Float64()
+			if !alike64(out2, out) || acc2 != Exact {
+				t.Errorf("idempotency test: got %g (%s); want %g (Exact)", out2, acc2, out)
+			}
 		}
 	}
 }
diff --git a/src/cmd/compile/internal/big/int.go b/src/cmd/compile/internal/big/int.go
index 7b419bf688..5e3125375b 100644
--- a/src/cmd/compile/internal/big/int.go
+++ b/src/cmd/compile/internal/big/int.go
@@ -583,6 +583,124 @@ func (z *Int) ModInverse(g, n *Int) *Int {
 	return z
 }
 
+// Jacobi returns the Jacobi symbol (x/y), either +1, -1, or 0.
+// The y argument must be an odd integer.
+func Jacobi(x, y *Int) int {
+	if len(y.abs) == 0 || y.abs[0]&1 == 0 {
+		panic(fmt.Sprintf("big: invalid 2nd argument to Int.Jacobi: need odd integer but got %s", y))
+	}
+
+	// We use the formulation described in chapter 2, section 2.4,
+	// "The Yacas Book of Algorithms":
+	// http://yacas.sourceforge.net/Algo.book.pdf
+
+	var a, b, c Int
+	a.Set(x)
+	b.Set(y)
+	j := 1
+
+	if b.neg {
+		if a.neg {
+			j = -1
+		}
+		b.neg = false
+	}
+
+	for {
+		if b.Cmp(intOne) == 0 {
+			return j
+		}
+		if len(a.abs) == 0 {
+			return 0
+		}
+		a.Mod(&a, &b)
+		if len(a.abs) == 0 {
+			return 0
+		}
+		// a > 0
+
+		// handle factors of 2 in 'a'
+		s := a.abs.trailingZeroBits()
+		if s&1 != 0 {
+			bmod8 := b.abs[0] & 7
+			if bmod8 == 3 || bmod8 == 5 {
+				j = -j
+			}
+		}
+		c.Rsh(&a, s) // a = 2^s*c
+
+		// swap numerator and denominator
+		if b.abs[0]&3 == 3 && c.abs[0]&3 == 3 {
+			j = -j
+		}
+		a.Set(&b)
+		b.Set(&c)
+	}
+}
+
+// ModSqrt sets z to a square root of x mod p if such a square root exists, and
+// returns z. The modulus p must be an odd prime. If x is not a square mod p,
+// ModSqrt leaves z unchanged and returns nil. This function panics if p is
+// not an odd integer.
+func (z *Int) ModSqrt(x, p *Int) *Int {
+	switch Jacobi(x, p) {
+	case -1:
+		return nil // x is not a square mod p
+	case 0:
+		return z.SetInt64(0) // sqrt(0) mod p = 0
+	case 1:
+		break
+	}
+	if x.neg || x.Cmp(p) >= 0 { // ensure 0 <= x < p
+		x = new(Int).Mod(x, p)
+	}
+
+	// Break p-1 into s*2^e such that s is odd.
+	var s Int
+	s.Sub(p, intOne)
+	e := s.abs.trailingZeroBits()
+	s.Rsh(&s, e)
+
+	// find some non-square n
+	var n Int
+	n.SetInt64(2)
+	for Jacobi(&n, p) != -1 {
+		n.Add(&n, intOne)
+	}
+
+	// Core of the Tonelli-Shanks algorithm. Follows the description in
+	// section 6 of "Square roots from 1; 24, 51, 10 to Dan Shanks" by Ezra
+	// Brown:
+	// https://www.maa.org/sites/default/files/pdf/upload_library/22/Polya/07468342.di020786.02p0470a.pdf
+	var y, b, g, t Int
+	y.Add(&s, intOne)
+	y.Rsh(&y, 1)
+	y.Exp(x, &y, p)  // y = x^((s+1)/2)
+	b.Exp(x, &s, p)  // b = x^s
+	g.Exp(&n, &s, p) // g = n^s
+	r := e
+	for {
+		// find the least m such that ord_p(b) = 2^m
+		var m uint
+		t.Set(&b)
+		for t.Cmp(intOne) != 0 {
+			t.Mul(&t, &t).Mod(&t, p)
+			m++
+		}
+
+		if m == 0 {
+			return z.Set(&y)
+		}
+
+		t.SetInt64(0).SetBit(&t, int(r-m-1), 1).Exp(&g, &t, p)
+		// t = g^(2^(r-m-1)) mod p
+		g.Mul(&t, &t).Mod(&g, p) // g = g^(2^(r-m)) mod p
+		y.Mul(&y, &t).Mod(&y, p)
+		b.Mul(&b, &g).Mod(&b, p)
+		r = m
+	}
+}
+
 // Lsh sets z = x << n and returns z.
 func (z *Int) Lsh(x *Int, n uint) *Int {
 	z.abs = z.abs.shl(x.abs, n)
diff --git a/src/cmd/compile/internal/big/int_test.go b/src/cmd/compile/internal/big/int_test.go
index a972a7249b..c19e88addb 100644
--- a/src/cmd/compile/internal/big/int_test.go
+++ b/src/cmd/compile/internal/big/int_test.go
@@ -525,6 +525,7 @@ var expTests = []struct {
 	{"1234", "-1", "1", "0"},
 
 	// misc
+	{"5", "1", "3", "2"},
 	{"5", "-7", "", "1"},
 	{"-5", "-7", "", "1"},
 	{"5", "0", "", "1"},
@@ -703,6 +704,13 @@ var primes = []string{
 	"230975859993204150666423538988557839555560243929065415434980904258310530753006723857139742334640122533598517597674807096648905501653461687601339782814316124971547968912893214002992086353183070342498989426570593",
 	"5521712099665906221540423207019333379125265462121169655563495403888449493493629943498064604536961775110765377745550377067893607246020694972959780839151452457728855382113555867743022746090187341871655890805971735385789993",
 	"203956878356401977405765866929034577280193993314348263094772646453283062722701277632936616063144088173312372882677123879538709400158306567338328279154499698366071906766440037074217117805690872792848149112022286332144876183376326512083574821647933992961249917319836219304274280243803104015000563790123",
+
+	// ECC primes: http://tools.ietf.org/html/draft-ladd-safecurves-02
+	"3618502788666131106986593281521497120414687020801267626233049500247285301239",                                                                                  // Curve1174: 2^251-9
+	"57896044618658097711785492504343953926634992332820282019728792003956564819949",                                                                                 // Curve25519: 2^255-19
+	"9850501549098619803069760025035903451269934817616361666987073351061430442874302652853566563721228910201656997576599",                                           // E-382: 2^382-105
+	"42307582002575910332922579714097346549017899709713998034217522897561970639123926132812109468141778230245837569601494931472367",                                 // Curve41417: 2^414-17
+	"6864797660130609714981900799081393217269435300143305409394463459185543183397656052122559640661454554977296311391480858037121987999716643812574028291115057151", // E-521: 2^521-1
 }
 
 var composites = []string{
@@ -1248,6 +1256,136 @@ func TestModInverse(t *testing.T) {
 	}
 }
 
+// testModSqrt is a helper for TestModSqrt,
+// which checks that ModSqrt can compute a square-root of elt^2.
+func testModSqrt(t *testing.T, elt, mod, sq, sqrt *Int) bool {
+	var sqChk, sqrtChk, sqrtsq Int
+	sq.Mul(elt, elt)
+	sq.Mod(sq, mod)
+	z := sqrt.ModSqrt(sq, mod)
+	if z != sqrt {
+		t.Errorf("ModSqrt returned wrong value %s", z)
+	}
+
+	// test ModSqrt arguments outside the range [0,mod)
+	sqChk.Add(sq, mod)
+	z = sqrtChk.ModSqrt(&sqChk, mod)
+	if z != &sqrtChk || z.Cmp(sqrt) != 0 {
+		t.Errorf("ModSqrt returned inconsistent value %s", z)
+	}
+	sqChk.Sub(sq, mod)
+	z = sqrtChk.ModSqrt(&sqChk, mod)
+	if z != &sqrtChk || z.Cmp(sqrt) != 0 {
+		t.Errorf("ModSqrt returned inconsistent value %s", z)
+	}
+
+	// make sure we actually got a square root
+	if sqrt.Cmp(elt) == 0 {
+		return true // we found the "desired" square root
+	}
+	sqrtsq.Mul(sqrt, sqrt) // make sure we found the "other" one
+	sqrtsq.Mod(&sqrtsq, mod)
+	return sq.Cmp(&sqrtsq) == 0
+}
+
+func TestModSqrt(t *testing.T) {
+	var elt, mod, modx4, sq, sqrt Int
+	r := rand.New(rand.NewSource(9))
+	for i, s := range primes[1:] { // skip 2, use only odd primes
+		mod.SetString(s, 10)
+		modx4.Lsh(&mod, 2)
+
+		// test a few random elements per prime
+		for x := 1; x < 5; x++ {
+			elt.Rand(r, &modx4)
+			elt.Sub(&elt, &mod) // test range [-mod, 3*mod)
+			if !testModSqrt(t, &elt, &mod, &sq, &sqrt) {
+				t.Errorf("#%d: failed (sqrt(e) = %s)", i, &sqrt)
+			}
+		}
+	}
+
+	// exhaustive test for small values
+	for n := 3; n < 100; n++ {
+		mod.SetInt64(int64(n))
+		if !mod.ProbablyPrime(10) {
+			continue
+		}
+		isSquare := make([]bool, n)
+
+		// test all the squares
+		for x := 1; x < n; x++ {
+			elt.SetInt64(int64(x))
+			if !testModSqrt(t, &elt, &mod, &sq, &sqrt) {
+				t.Errorf("#%d: failed (sqrt(%d,%d) = %s)", x, &elt, &mod, &sqrt)
+			}
+			isSquare[sq.Uint64()] = true
+		}
+
+		// test all non-squares
+		for x := 1; x < n; x++ {
+			sq.SetInt64(int64(x))
+			z := sqrt.ModSqrt(&sq, &mod)
+			if !isSquare[x] && z != nil {
+				t.Errorf("#%d: failed (sqrt(%d,%d) = nil)", x, &sqrt, &mod)
+			}
+		}
+	}
+}
+
+func TestJacobi(t *testing.T) {
+	testCases := []struct {
+		x, y   int64
+		result int
+	}{
+		{0, 1, 1},
+		{0, -1, 1},
+		{1, 1, 1},
+		{1, -1, 1},
+		{0, 5, 0},
+		{1, 5, 1},
+		{2, 5, -1},
+		{-2, 5, -1},
+		{2, -5, -1},
+		{-2, -5, 1},
+		{3, 5, -1},
+		{5, 5, 0},
+		{-5, 5, 0},
+		{6, 5, 1},
+		{6, -5, 1},
+		{-6, 5, 1},
+		{-6, -5, -1},
+	}
+
+	var x, y Int
+
+	for i, test := range testCases {
+		x.SetInt64(test.x)
+		y.SetInt64(test.y)
+		expected := test.result
+		actual := Jacobi(&x, &y)
+		if actual != expected {
+			t.Errorf("#%d: Jacobi(%d, %d) = %d, but expected %d", i, test.x, test.y, actual, expected)
+		}
+	}
+}
+
+func TestJacobiPanic(t *testing.T) {
+	const failureMsg = "test failure"
+	defer func() {
+		msg := recover()
+		if msg == nil || msg == failureMsg {
+			panic(msg)
+		}
+		t.Log(msg)
+	}()
+	x := NewInt(1)
+	y := NewInt(2)
+	// Jacobi should panic when the second argument is even.
+	Jacobi(x, y)
+	panic(failureMsg)
+}
+
 var encodingTests = []string{
 	"-539345864568634858364538753846587364875430589374589",
 	"-678645873",
diff --git a/src/cmd/compile/internal/big/nat.go b/src/cmd/compile/internal/big/nat.go
index 2a279d186c..c3eef76fa1 100644
--- a/src/cmd/compile/internal/big/nat.go
+++ b/src/cmd/compile/internal/big/nat.go
@@ -216,6 +216,34 @@ func basicMul(z, x, y nat) {
 	}
 }
 
+// montgomery computes x*y*2^(-n*_W) mod m,
+// assuming k = -1/m mod 2^_W.
+// z is used for storing the result which is returned;
+// z must not alias x, y or m.
+func (z nat) montgomery(x, y, m nat, k Word, n int) nat {
+	var c1, c2 Word
+	z = z.make(n)
+	z.clear()
+	for i := 0; i < n; i++ {
+		d := y[i]
+		c1 += addMulVVW(z, x, d)
+		t := z[0] * k
+		c2 = addMulVVW(z, m, t)
+
+		copy(z, z[1:])
+		z[n-1] = c1 + c2
+		if z[n-1] < c1 {
+			c1 = 1
+		} else {
+			c1 = 0
+		}
+	}
+	if c1 != 0 {
+		subVV(z, z, m)
+	}
+	return z
+}
+
 // Fast version of z[0:n+n>>1].add(z[0:n+n>>1], x[0:n]) w/o bounds checks.
 // Factored out for readability - do not use outside karatsuba.
 func karatsubaAdd(z, x nat, n int) {
@@ -888,6 +916,13 @@ func (z nat) expNN(x, y, m nat) nat {
 	}
 	// y > 0
 
+	// x**1 mod m == x mod m
+	if len(y) == 1 && y[0] == 1 && len(m) != 0 {
+		_, z = z.div(z, x, m)
+		return z
+	}
+	// y > 1
+
 	if len(m) != 0 {
 		// We likely end up being as long as the modulus.
 		z = z.make(len(m))
@@ -898,8 +933,11 @@ func (z nat) expNN(x, y, m nat) nat {
 	// 4-bit, windowed exponentiation. This involves precomputing 14 values
 	// (x^2...x^15) but then reduces the number of multiply-reduces by a
 	// third. Even for a 32-bit exponent, this reduces the number of
-	// operations.
+	// operations. Uses Montgomery method for odd moduli.
 	if len(x) > 1 && len(y) > 1 && len(m) > 0 {
+		if m[0]&1 == 1 {
+			return z.expNNMontgomery(x, y, m)
+		}
 		return z.expNNWindowed(x, y, m)
 	}
 
@@ -1022,6 +1060,87 @@ func (z nat) expNNWindowed(x, y, m nat) nat {
 	return z.norm()
 }
 
+// expNNMontgomery calculates x**y mod m using a fixed, 4-bit window.
+// Uses Montgomery representation.
+func (z nat) expNNMontgomery(x, y, m nat) nat {
+	var zz, one, rr, RR nat
+
+	numWords := len(m)
+
+	// We want the lengths of x and m to be equal.
+	if len(x) > numWords {
+		_, rr = rr.div(rr, x, m)
+	} else if len(x) < numWords {
+		rr = rr.make(numWords)
+		rr.clear()
+		for i := range x {
+			rr[i] = x[i]
+		}
+	} else {
+		rr = x
+	}
+	x = rr
+
+	// Ideally the precomputations would be performed outside, and reused
+	// k0 = -mË-1 mod 2Ë_W. Algorithm from: Dumas, J.G. "On NewtonâRaphson
+	// Iteration for Multiplicative Inverses Modulo Prime Powers".
+	k0 := 2 - m[0]
+	t := m[0] - 1
+	for i := 1; i < _W; i <<= 1 {
+		t *= t
+		k0 *= (t + 1)
+	}
+	k0 = -k0
+
+	// RR = 2Ë(2*_W*len(m)) mod m
+	RR = RR.setWord(1)
+	zz = zz.shl(RR, uint(2*numWords*_W))
+	_, RR = RR.div(RR, zz, m)
+	if len(RR) < numWords {
+		zz = zz.make(numWords)
+		copy(zz, RR)
+		RR = zz
+	}
+	// one = 1, with equal length to that of m
+	one = one.make(numWords)
+	one.clear()
+	one[0] = 1
+
+	const n = 4
+	// powers[i] contains x^i
+	var powers [1 << n]nat
+	powers[0] = powers[0].montgomery(one, RR, m, k0, numWords)
+	powers[1] = powers[1].montgomery(x, RR, m, k0, numWords)
+	for i := 2; i < 1<<n; i++ {
+		powers[i] = powers[i].montgomery(powers[i-1], powers[1], m, k0, numWords)
+	}
+
+	// initialize z = 1 (Montgomery 1)
+	z = z.make(numWords)
+	copy(z, powers[0])
+
+	zz = zz.make(numWords)
+
+	// same windowed exponent, but with Montgomery multiplications
+	for i := len(y) - 1; i >= 0; i-- {
+		yi := y[i]
+		for j := 0; j < _W; j += n {
+			if i != len(y)-1 || j != 0 {
+				zz = zz.montgomery(z, z, m, k0, numWords)
+				z = z.montgomery(zz, zz, m, k0, numWords)
+				zz = zz.montgomery(z, z, m, k0, numWords)
+				z = z.montgomery(zz, zz, m, k0, numWords)
+			}
+			zz = zz.montgomery(z, powers[yi>>(_W-n)], m, k0, numWords)
+			z, zz = zz, z
+			yi <<= n
+		}
+	}
+	// convert to regular number
+	zz = zz.montgomery(z, one, m, k0, numWords)
+	return zz.norm()
+}
+
 // probablyPrime performs reps Miller-Rabin tests to check whether n is prime.
 // If it returns true, n is prime with probability 1 - 1/4^reps.
 // If it returns false, n is not prime.
diff --git a/src/cmd/compile/internal/big/nat_test.go b/src/cmd/compile/internal/big/nat_test.go
index b25a89f731..a15a2bcac0 100644
--- a/src/cmd/compile/internal/big/nat_test.go
+++ b/src/cmd/compile/internal/big/nat_test.go
@@ -332,6 +332,67 @@ func TestTrailingZeroBits(t *testing.T) {
 	}
 }
 
+var montgomeryTests = []struct {
+	x, y, m      string
+	k0           uint64
+	out32, out64 string
+}{
+	{
+		"0xffffffffffffffffffffffffffffffffffffffffffffffffe",
+		"0xffffffffffffffffffffffffffffffffffffffffffffffffe",
+		"0xfffffffffffffffffffffffffffffffffffffffffffffffff",
+		0x0000000000000000,
+		"0xffffffffffffffffffffffffffffffffffffffffff",
+		"0xffffffffffffffffffffffffffffffffff",
+	},
+	{
+		"0x0000000080000000",
+		"0x00000000ffffffff",
+		"0x0000000010000001",
+		0xff0000000fffffff,
+		"0x0000000088000000",
+		"0x0000000007800001",
+	},
+	{
+		"0xffffffffffffffffffffffffffffffff00000000000022222223333333333444444444",
+		"0xffffffffffffffffffffffffffffffff999999999999999aaabbbbbbbbcccccccccccc",
+		"0x33377fffffffffffffffffffffffffffffffffffffffffffff0000000000022222eee1",
+		0xdecc8f1249812adf,
+		"0x22bb05b6d95eaaeca2bb7c05e51f807bce9064b5fbad177161695e4558f9474e91cd79",
+		"0x14beb58d230f85b6d95eaaeca2bb7c05e51f807bce9064b5fb45669afa695f228e48cd",
+	},
+	{
+		"0x10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffffffffffffffffffffffff00000000000022222223333333333444444444",
+		"0x10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffffffffffffffffffffffff999999999999999aaabbbbbbbbcccccccccccc",
+		"0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff33377fffffffffffffffffffffffffffffffffffffffffffff0000000000022222eee1",
+		0xdecc8f1249812adf,
+		"0x5c0d52f451aec609b15da8e5e5626c4eaa88723bdeac9d25ca9b961269400410ca208a16af9c2fb07d7a11c7772cba02c22f9711078d51a3797eb18e691295293284d988e349fa6deba46b25a4ecd9f715",
+		"0x92fcad4b5c0d52f451aec609b15da8e5e5626c4eaa88723bdeac9d25ca9b961269400410ca208a16af9c2fb07d799c32fe2f3cc5422f9711078d51a3797eb18e691295293284d8f5e69caf6decddfe1df6",
+	},
+}
+
+func TestMontgomery(t *testing.T) {
+	for i, test := range montgomeryTests {
+		x := natFromString(test.x)
+		y := natFromString(test.y)
+		m := natFromString(test.m)
+
+		var out nat
+		if _W == 32 {
+			out = natFromString(test.out32)
+		} else {
+			out = natFromString(test.out64)
+		}
+
+		k0 := Word(test.k0 & _M) // mask k0 to ensure that it fits for 32-bit systems.
+		z := nat(nil).montgomery(x, y, m, k0, len(m))
+		z = z.norm()
+		if z.cmp(out) != 0 {
+			t.Errorf("#%d got %s want %s", i, z.decimalString(), out.decimalString())
+		}
+	}
+}
+
 var expNNTests = []struct {
 	x, y, m string
 	out     string