From 70dc28f766194a2b3b050b9c8fab83523164bfbb Mon Sep 17 00:00:00 2001 From: Brian Kessler Date: Thu, 11 Apr 2019 22:54:18 -0600 Subject: [PATCH] math/cmplx: implement Payne-Hanek range reduction Tan has poles along the real axis. In order to accurately calculate the value near these poles, a range reduction by Pi is performed and the result calculated via a Taylor series. The prior implementation of range reduction used Cody-Waite range reduction in three parts. This fails when x is too large to accurately calculate the partial products in the summation accurately. Above this threshold, Payne-Hanek range reduction using a multiple precision value of 1/Pi is required. Additionally, the threshold used in math/trig_reduce.go for Payne-Hanek range reduction was not set conservatively enough. The prior threshold ensured that catastrophic failure did not occur where the argument x would not actually be reduced below Pi/4. However, errors in reduction begin to occur at values much lower when z = ((x - y*PI4A) - y*PI4B) - y*PI4C is not exact because y*PI4A cannot be exactly represented as a float64. reduceThreshold is lowered to the proper value. Fixes #31566 Change-Id: I0f39a4171a5be44f64305f18dc57f6c29f19dba7 Reviewed-on: https://go-review.googlesource.com/c/go/+/172838 Reviewed-by: Rob Pike --- src/go/build/deps_test.go | 2 +- src/math/cmplx/cmath_test.go | 34 ++++++++++ src/math/cmplx/tan.go | 119 ++++++++++++++++++++++++++++++----- src/math/huge_test.go | 16 +++++ src/math/trig_reduce.go | 16 +++-- 5 files changed, 167 insertions(+), 20 deletions(-) diff --git a/src/go/build/deps_test.go b/src/go/build/deps_test.go index c59ac72aa0..efb11814e7 100644 --- a/src/go/build/deps_test.go +++ b/src/go/build/deps_test.go @@ -65,7 +65,7 @@ var pkgDeps = map[string][]string{ // but not Unicode tables. "math": {"internal/cpu", "unsafe", "math/bits"}, "math/bits": {"unsafe"}, - "math/cmplx": {"math"}, + "math/cmplx": {"math", "math/bits"}, "math/rand": {"L0", "math"}, "strconv": {"L0", "unicode/utf8", "math", "math/bits"}, "unicode/utf16": {}, diff --git a/src/math/cmplx/cmath_test.go b/src/math/cmplx/cmath_test.go index 57ba76a767..1b076c881c 100644 --- a/src/math/cmplx/cmath_test.go +++ b/src/math/cmplx/cmath_test.go @@ -291,6 +291,35 @@ var tanh = []complex128{ (-1.0000000491604982429364892e+00 - 2.901873195374433112227349e-08i), } +// huge values along the real axis for testing reducePi in Tan +var hugeIn = []complex128{ + 1 << 28, + 1 << 29, + 1 << 30, + 1 << 35, + -1 << 120, + 1 << 240, + 1 << 300, + -1 << 480, + 1234567891234567 << 180, + -1234567891234567 << 300, +} + +// Results for tanHuge[i] calculated with https://github.com/robpike/ivy +// using 4096 bits of working precision. +var tanHuge = []complex128{ + 5.95641897939639421, + -0.34551069233430392, + -0.78469661331920043, + 0.84276385870875983, + 0.40806638884180424, + -0.37603456702698076, + 4.60901287677810962, + 3.39135965054779932, + -6.76813854009065030, + -0.76417695016604922, +} + // special cases var vcAbsSC = []complex128{ NaN(), @@ -805,6 +834,11 @@ func TestTan(t *testing.T) { t.Errorf("Tan(%g) = %g, want %g", vc[i], f, tan[i]) } } + for i, x := range hugeIn { + if f := Tan(x); !cSoclose(tanHuge[i], f, 3e-15) { + t.Errorf("Tan(%g) = %g, want %g", x, f, tanHuge[i]) + } + } for i := 0; i < len(vcTanSC); i++ { if f := Tan(vcTanSC[i]); !cAlike(tanSC[i], f) { t.Errorf("Tan(%g) = %g, want %g", vcTanSC[i], f, tanSC[i]) diff --git a/src/math/cmplx/tan.go b/src/math/cmplx/tan.go index 0243ea0417..714fb8c45b 100644 --- a/src/math/cmplx/tan.go +++ b/src/math/cmplx/tan.go @@ -4,7 +4,10 @@ package cmplx -import "math" +import ( + "math" + "math/bits" +) // The original C code, the long comment, and the constants // below are from http://netlib.sandia.gov/cephes/c9x-complex/clog.c. @@ -42,7 +45,7 @@ import "math" // cos 2x + cosh 2y // // On the real axis the denominator is zero at odd multiples -// of PI/2. The denominator is evaluated by its Taylor +// of PI/2. The denominator is evaluated by its Taylor // series near these points. // // ctan(z) = -i ctanh(iz). @@ -88,22 +91,110 @@ func Tanh(x complex128) complex128 { return complex(math.Sinh(2*real(x))/d, math.Sin(2*imag(x))/d) } -// Program to subtract nearest integer multiple of PI +// reducePi reduces the input argument x to the range (-Pi/2, Pi/2]. +// x must be greater than or equal to 0. For small arguments it +// uses Cody-Waite reduction in 3 float64 parts based on: +// "Elementary Function Evaluation: Algorithms and Implementation" +// Jean-Michel Muller, 1997. +// For very large arguments it uses Payne-Hanek range reduction based on: +// "ARGUMENT REDUCTION FOR HUGE ARGUMENTS: Good to the Last Bit" +// K. C. Ng et al, March 24, 1992. func reducePi(x float64) float64 { + // reduceThreshold is the maximum value of x where the reduction using + // Cody-Waite reduction still gives accurate results. This threshold + // is set by t*PIn being representable as a float64 without error + // where t is given by t = floor(x * (1 / Pi)) and PIn are the leading partial + // terms of Pi. Since the leading terms, PI1 and PI2 below, have 30 and 32 + // trailing zero bits respectively, t should have less than 30 significant bits. + // t < 1<<30 -> floor(x*(1/Pi)+0.5) < 1<<30 -> x < (1<<30-1) * Pi - 0.5 + // So, conservatively we can take x < 1<<30. + const reduceThreshold float64 = 1 << 30 + if math.Abs(x) < reduceThreshold { + // Use Cody-Waite reduction in three parts. + const ( + // PI1, PI2 and PI3 comprise an extended precision value of PI + // such that PI ~= PI1 + PI2 + PI3. The parts are chosen so + // that PI1 and PI2 have an approximately equal number of trailing + // zero bits. This ensures that t*PI1 and t*PI2 are exact for + // large integer values of t. The full precision PI3 ensures the + // approximation of PI is accurate to 102 bits to handle cancellation + // during subtraction. + PI1 = 3.141592502593994 // 0x400921fb40000000 + PI2 = 1.5099578831723193e-07 // 0x3e84442d00000000 + PI3 = 1.0780605716316238e-14 // 0x3d08469898cc5170 + ) + t := x / math.Pi + t += 0.5 + t = float64(int64(t)) // int64(t) = the multiple + return ((x - t*PI1) - t*PI2) - t*PI3 + } + // Must apply Payne-Hanek range reduction const ( - // extended precision value of PI: - DP1 = 3.14159265160560607910e0 // ?? 0x400921fb54000000 - DP2 = 1.98418714791870343106e-9 // ?? 0x3e210b4610000000 - DP3 = 1.14423774522196636802e-17 // ?? 0x3c6a62633145c06e + mask = 0x7FF + shift = 64 - 11 - 1 + bias = 1023 + fracMask = 1<= 0 { - t += 0.5 - } else { - t -= 0.5 + // Extract out the integer and exponent such that, + // x = ix * 2 ** exp. + ix := math.Float64bits(x) + exp := int(ix>>shift&mask) - bias - shift + ix &= fracMask + ix |= 1 << shift + + // mPi is the binary digits of 1/Pi as a uint64 array, + // that is, 1/Pi = Sum mPi[i]*2^(-64*i). + // 19 64-bit digits give 1216 bits of precision + // to handle the largest possible float64 exponent. + var mPi = [...]uint64{ + 0x0000000000000000, + 0x517cc1b727220a94, + 0xfe13abe8fa9a6ee0, + 0x6db14acc9e21c820, + 0xff28b1d5ef5de2b0, + 0xdb92371d2126e970, + 0x0324977504e8c90e, + 0x7f0ef58e5894d39f, + 0x74411afa975da242, + 0x74ce38135a2fbf20, + 0x9cc8eb1cc1a99cfa, + 0x4e422fc5defc941d, + 0x8ffc4bffef02cc07, + 0xf79788c5ad05368f, + 0xb69b3f6793e584db, + 0xa7a31fb34f2ff516, + 0xba93dd63f5f2f8bd, + 0x9e839cfbc5294975, + 0x35fdafd88fc6ae84, + 0x2b0198237e3db5d5, + } + // Use the exponent to extract the 3 appropriate uint64 digits from mPi, + // B ~ (z0, z1, z2), such that the product leading digit has the exponent -64. + // Note, exp >= 50 since x >= reduceThreshold and exp < 971 for maximum float64. + digit, bitshift := uint(exp+64)/64, uint(exp+64)%64 + z0 := (mPi[digit] << bitshift) | (mPi[digit+1] >> (64 - bitshift)) + z1 := (mPi[digit+1] << bitshift) | (mPi[digit+2] >> (64 - bitshift)) + z2 := (mPi[digit+2] << bitshift) | (mPi[digit+3] >> (64 - bitshift)) + // Multiply mantissa by the digits and extract the upper two digits (hi, lo). + z2hi, _ := bits.Mul64(z2, ix) + z1hi, z1lo := bits.Mul64(z1, ix) + z0lo := z0 * ix + lo, c := bits.Add64(z1lo, z2hi, 0) + hi, _ := bits.Add64(z0lo, z1hi, c) + // Find the magnitude of the fraction. + lz := uint(bits.LeadingZeros64(hi)) + e := uint64(bias - (lz + 1)) + // Clear implicit mantissa bit and shift into place. + hi = (hi << (lz + 1)) | (lo >> (64 - (lz + 1))) + hi >>= 64 - shift + // Include the exponent and convert to a float. + hi |= e << shift + x = math.Float64frombits(hi) + // map to (-Pi/2, Pi/2] + if x > 0.5 { + x-- } - t = float64(int64(t)) // int64(t) = the multiple - return ((x - t*DP1) - t*DP2) - t*DP3 + return math.Pi * x } // Taylor series expansion for cosh(2y) - cos(2x) diff --git a/src/math/huge_test.go b/src/math/huge_test.go index 0b45dbf5b1..9448edc339 100644 --- a/src/math/huge_test.go +++ b/src/math/huge_test.go @@ -16,6 +16,10 @@ import ( // Inputs to test trig_reduce var trigHuge = []float64{ + 1 << 28, + 1 << 29, + 1 << 30, + 1 << 35, 1 << 120, 1 << 240, 1 << 480, @@ -29,6 +33,10 @@ var trigHuge = []float64{ // 102 decimal digits (1 << 120, 1 << 240, 1 << 480, 1234567891234567 << 180) // were confirmed via https://keisan.casio.com/ var cosHuge = []float64{ + -0.16556897949057876, + -0.94517382606089662, + 0.78670712294118812, + -0.76466301249635305, -0.92587902285483787, 0.93601042593353793, -0.28282777640193788, @@ -38,6 +46,10 @@ var cosHuge = []float64{ } var sinHuge = []float64{ + -0.98619821183697566, + 0.32656766301856334, + -0.61732641504604217, + -0.64443035102329113, 0.37782010936075202, -0.35197227524865778, 0.95917070894368716, @@ -47,6 +59,10 @@ var sinHuge = []float64{ } var tanHuge = []float64{ + 5.95641897939639421, + -0.34551069233430392, + -0.78469661331920043, + 0.84276385870875983, -0.40806638884180424, -0.37603456702698076, -3.39135965054779932, diff --git a/src/math/trig_reduce.go b/src/math/trig_reduce.go index 6f8eaba9b9..5cdf4fa013 100644 --- a/src/math/trig_reduce.go +++ b/src/math/trig_reduce.go @@ -8,13 +8,19 @@ import ( "math/bits" ) -// reduceThreshold is the maximum value where the reduction using Pi/4 -// in 3 float64 parts still gives accurate results. Above this -// threshold Payne-Hanek range reduction must be used. -const reduceThreshold = (1 << 52) / (4 / Pi) +// reduceThreshold is the maximum value of x where the reduction using Pi/4 +// in 3 float64 parts still gives accurate results. This threshold +// is set by y*C being representable as a float64 without error +// where y is given by y = floor(x * (4 / Pi)) and C is the leading partial +// terms of 4/Pi. Since the leading terms (PI4A and PI4B in sin.go) have 30 +// and 32 trailing zero bits, y should have less than 30 significant bits. +// y < 1<<30 -> floor(x*4/Pi) < 1<<30 -> x < (1<<30 - 1) * Pi/4 +// So, conservatively we can take x < 1<<29. +// Above this threshold Payne-Hanek range reduction must be used. +const reduceThreshold = 1 << 29 // trigReduce implements Payne-Hanek range reduction by Pi/4 -// for x > 0. It returns the integer part mod 8 (j) and +// for x > 0. It returns the integer part mod 8 (j) and // the fractional part (z) of x / (Pi/4). // The implementation is based on: // "ARGUMENT REDUCTION FOR HUGE ARGUMENTS: Good to the Last Bit" -- 2.50.0