From 19d0b3e81f4a072615f92fd6821c8ed2cee27c9f Mon Sep 17 00:00:00 2001 From: Filippo Valsorda Date: Fri, 3 Jan 2025 14:03:08 +0100 Subject: [PATCH] crypto/rsa: use Div instead of GCD for trial division MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Div is way faster. We could actually test a lot more primes and still gain performance despite the diminishing returns, but necessarily it would have marginal impact overall. fips140: off goos: linux goarch: amd64 pkg: crypto/rsa cpu: AMD Ryzen 7 PRO 8700GE w/ Radeon 780M Graphics │ e325b41ad1 │ 0f611af2e1 │ │ sec/op │ sec/op vs base │ GenerateKey/2048-16 124.19m ± 0% 39.93m ± 0% -67.85% (p=0.000 n=20) Surprisingly, the performance gain is similar on ARM64, which doesn't have intrinsified math.Div. fips140: off goos: darwin goarch: arm64 pkg: crypto/rsa cpu: Apple M2 │ e325b41ad1 │ 6276161a7f │ │ sec/op │ sec/op vs base │ GenerateKey/2048-8 136.49m ± 0% 47.97m ± 1% -64.86% (p=0.000 n=20) Change-Id: I6a6a46560331198312bd09c1cbe4d2b3c370c552 Reviewed-on: https://go-review.googlesource.com/c/go/+/639955 Reviewed-by: Junyang Shao Auto-Submit: Filippo Valsorda Reviewed-by: Russ Cox LUCI-TryBot-Result: Go LUCI Reviewed-by: Daniel McCarney --- src/crypto/internal/fips140/rsa/keygen.go | 53 ++++++++++++++--------- 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/src/crypto/internal/fips140/rsa/keygen.go b/src/crypto/internal/fips140/rsa/keygen.go index c8314e78df..7c0272239f 100644 --- a/src/crypto/internal/fips140/rsa/keygen.go +++ b/src/crypto/internal/fips140/rsa/keygen.go @@ -223,14 +223,11 @@ func isPrime(w []byte) bool { return false } - primes, err := bigmod.NewNat().SetBytes(productOfPrimes, mr.w) - // If w is too small for productOfPrimes, key generation is - // going to be fast enough anyway. - if err == nil { - _, hasInverse := primes.InverseVarTime(primes, mr.w) - if !hasInverse { - // productOfPrimes doesn't have an inverse mod w, - // so w is divisible by at least one of the primes. + // Before Miller-Rabin, rule out most composites with trial divisions. + for i := 0; i < len(primes); i += 3 { + p1, p2, p3 := primes[i], primes[i+1], primes[i+2] + r := mr.w.Nat().DivShortVarTime(p1 * p2 * p3) + if r%p1 == 0 || r%p2 == 0 || r%p3 == 0 { return false } } @@ -289,20 +286,34 @@ func isPrime(w []byte) bool { } } -// productOfPrimes is the product of the first 74 primes higher than 2. +// primes are the first prime numbers (except 2), such that the product of any +// three primes fits in a uint32. // -// The number of primes was selected to be the highest such that the product fit -// in 512 bits, so to be usable for 1024 bit RSA keys. -// -// Higher values cause fewer Miller-Rabin tests of composites (nothing can help -// with the final test on the actual prime) but make InverseVarTime take longer. -// There are diminishing returns: including the 75th prime would increase the -// success rate of trial division by 0.05%. -var productOfPrimes = []byte{ - 0x10, 0x6a, 0xa9, 0xfb, 0x76, 0x46, 0xfa, 0x6e, 0xb0, 0x81, 0x3c, 0x28, 0xc5, 0xd5, 0xf0, 0x9f, - 0x07, 0x7e, 0xc3, 0xba, 0x23, 0x8b, 0xfb, 0x99, 0xc1, 0xb6, 0x31, 0xa2, 0x03, 0xe8, 0x11, 0x87, - 0x23, 0x3d, 0xb1, 0x17, 0xcb, 0xc3, 0x84, 0x05, 0x6e, 0xf0, 0x46, 0x59, 0xa4, 0xa1, 0x1d, 0xe4, - 0x9f, 0x7e, 0xcb, 0x29, 0xba, 0xda, 0x8f, 0x98, 0x0d, 0xec, 0xec, 0xe9, 0x2e, 0x30, 0xc4, 0x8f, +// More primes cause fewer Miller-Rabin tests of composites (nothing can help +// with the final test on the actual prime) but have diminishing returns: these +// 255 primes catch 84.9% of composites, the next 255 would catch 1.5% more. +// Adding primes can still be marginally useful since they only compete with the +// (much more expensive) first Miller-Rabin round for candidates that were not +// rejected by the previous primes. +var primes = []uint{ + 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, + 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, + 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, + 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281, 283, + 293, 307, 311, 313, 317, 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, + 389, 397, 401, 409, 419, 421, 431, 433, 439, 443, 449, 457, 461, 463, 467, + 479, 487, 491, 499, 503, 509, 521, 523, 541, 547, 557, 563, 569, 571, 577, + 587, 593, 599, 601, 607, 613, 617, 619, 631, 641, 643, 647, 653, 659, 661, + 673, 677, 683, 691, 701, 709, 719, 727, 733, 739, 743, 751, 757, 761, 769, + 773, 787, 797, 809, 811, 821, 823, 827, 829, 839, 853, 857, 859, 863, 877, + 881, 883, 887, 907, 911, 919, 929, 937, 941, 947, 953, 967, 971, 977, 983, + 991, 997, 1009, 1013, 1019, 1021, 1031, 1033, 1039, 1049, 1051, 1061, 1063, 1069, + 1087, 1091, 1093, 1097, 1103, 1109, 1117, 1123, 1129, 1151, 1153, 1163, 1171, 1181, + 1187, 1193, 1201, 1213, 1217, 1223, 1229, 1231, 1237, 1249, 1259, 1277, 1279, 1283, + 1289, 1291, 1297, 1301, 1303, 1307, 1319, 1321, 1327, 1361, 1367, 1373, 1381, 1399, + 1409, 1423, 1427, 1429, 1433, 1439, 1447, 1451, 1453, 1459, 1471, 1481, 1483, 1487, + 1489, 1493, 1499, 1511, 1523, 1531, 1543, 1549, 1553, 1559, 1567, 1571, 1579, 1583, + 1597, 1601, 1607, 1609, 1613, 1619, } type millerRabin struct { -- 2.50.0