crypto/internal/nistec: refactor scalar multiplication

author Filippo Valsorda <filippo@golang.org>

Mon, 13 Feb 2023 19:49:38 +0000 (20:49 +0100)

committer Gopher Robot <gobot@golang.org>

Mon, 13 Mar 2023 18:55:22 +0000 (18:55 +0000)
author Filippo Valsorda <filippo@golang.org>
Mon, 13 Feb 2023 19:49:38 +0000 (20:49 +0100)
committer Gopher Robot <gobot@golang.org>
Mon, 13 Mar 2023 18:55:22 +0000 (18:55 +0000)
diff --git a/src/crypto/internal/nistec/nistec_test.go b/src/crypto/internal/nistec/nistec_test.go

index 9103608c18a0f9270bd0cabbf5371a04dd9c1722..1a82b22286f22f2f72f8fcaf28d03bbeb3a92186 100644 (file)
--- a/src/crypto/internal/nistec/nistec_test.go
+++ b/src/crypto/internal/nistec/nistec_test.go
@@ -19,7 +19,7 @@ func TestAllocations(t *testing.T) {
         testenv.SkipIfOptimizationOff(t)
  
         t.Run("P224", func(t *testing.T) {
-               if allocs := testing.AllocsPerRun(100, func() {
+               if allocs := testing.AllocsPerRun(10, func() {
                         p := nistec.NewP224Point().SetGenerator()
                         scalar := make([]byte, 28)
                         rand.Read(scalar)
@@ -38,7 +38,7 @@ func TestAllocations(t *testing.T) {
                 }
         })
         t.Run("P256", func(t *testing.T) {
-               if allocs := testing.AllocsPerRun(100, func() {
+               if allocs := testing.AllocsPerRun(10, func() {
                         p := nistec.NewP256Point().SetGenerator()
                         scalar := make([]byte, 32)
                         rand.Read(scalar)
@@ -57,7 +57,7 @@ func TestAllocations(t *testing.T) {
                 }
         })
         t.Run("P384", func(t *testing.T) {
-               if allocs := testing.AllocsPerRun(100, func() {
+               if allocs := testing.AllocsPerRun(10, func() {
                         p := nistec.NewP384Point().SetGenerator()
                         scalar := make([]byte, 48)
                         rand.Read(scalar)
@@ -76,7 +76,7 @@ func TestAllocations(t *testing.T) {
                 }
         })
         t.Run("P521", func(t *testing.T) {
-               if allocs := testing.AllocsPerRun(100, func() {
+               if allocs := testing.AllocsPerRun(10, func() {
                         p := nistec.NewP521Point().SetGenerator()
                         scalar := make([]byte, 66)
                         rand.Read(scalar)
@@ -133,21 +133,13 @@ func testEquivalents[P nistPoint[P]](t *testing.T, newPoint func() P, c elliptic
         p1 := newPoint().Double(p)
         p2 := newPoint().Add(p, p)
         p3, err := newPoint().ScalarMult(p, two)
-       if err != nil {
-               t.Fatal(err)
-       }
+       fatalIfErr(t, err)
         p4, err := newPoint().ScalarBaseMult(two)
-       if err != nil {
-               t.Fatal(err)
-       }
+       fatalIfErr(t, err)
         p5, err := newPoint().ScalarMult(p, nPlusTwo)
-       if err != nil {
-               t.Fatal(err)
-       }
+       fatalIfErr(t, err)
         p6, err := newPoint().ScalarBaseMult(nPlusTwo)
-       if err != nil {
-               t.Fatal(err)
-       }
+       fatalIfErr(t, err)
  
         if !bytes.Equal(p1.Bytes(), p2.Bytes()) {
                 t.Error("P+P != 2*P")
@@ -230,10 +222,10 @@ func testScalarMult[P nistPoint[P]](t *testing.T, newPoint func() P, c elliptic.
                         checkScalar(t, s.FillBytes(make([]byte, byteLen)))
                 })
         }
-       // Test N+1...N+32 since they risk overlapping with precomputed table values
+       // Test N-32...N+32 since they risk overlapping with precomputed table values
         // in the final additions.
-       for i := int64(2); i <= 32; i++ {
-               t.Run(fmt.Sprintf("N+%d", i), func(t *testing.T) {
+       for i := int64(-32); i <= 32; i++ {
+               t.Run(fmt.Sprintf("N%+d", i), func(t *testing.T) {
                         checkScalar(t, new(big.Int).Add(c.Params().N, big.NewInt(i)).Bytes())
                 })
         }
diff --git a/src/crypto/internal/nistec/p256_asm.go b/src/crypto/internal/nistec/p256_asm.go

index 99a22b833f028c846e07191573530aa6efaa4eb4..aa1ceba6bb45a2071f56020d7e0c9cd0efb05120 100644 (file)
--- a/src/crypto/internal/nistec/p256_asm.go
+++ b/src/crypto/internal/nistec/p256_asm.go
@@ -294,8 +294,9 @@ func p256OrdLittleToBig(res *[32]byte, in *p256OrdElement)
  // [0]P is the point at infinity and it's not stored.
  type p256Table [16]P256Point
  
-// p256Select sets res to the point at index idx in the table.
-// idx must be in [0, 15]. It executes in constant time.
+// p256Select sets res to the point at index idx - 1 in the table.
+// idx must be in [1, 16] or res will be set to an undefined value.
+// It executes in constant time.
  //
  //go:noescape
  func p256Select(res *P256Point, table *p256Table, idx int)
@@ -335,22 +336,25 @@ func init() {
         p256Precomputed = (*[43]p256AffineTable)(*p256PrecomputedPtr)
  }
  
-// p256SelectAffine sets res to the point at index idx in the table.
-// idx must be in [0, 31]. It executes in constant time.
+// p256SelectAffine sets res to the point at index idx - 1 in the table.
+// idx must be in [1, 32] or res will be set to an undefined value.
+// It executes in constant time.
  //
  //go:noescape
  func p256SelectAffine(res *p256AffinePoint, table *p256AffineTable, idx int)
  
  // Point addition with an affine point and constant time conditions.
  // If zero is 0, sets res = in2. If sel is 0, sets res = in1.
-// If sign is not 0, sets res = in1 + -in2. Otherwise, sets res = in1 + in2
+// If sign is not 0, sets res = in1 + -in2. Otherwise, sets res = in1 + in2.
+// If neither sel nor zero are 0 and in1 = in2, or both zero and sel are 0,
+// or in1 is the infinity, res is undefined.
  //
  //go:noescape
  func p256PointAddAffineAsm(res, in1 *P256Point, in2 *p256AffinePoint, sign, sel, zero int)
  
-// Point addition. Sets res = in1 + in2. Returns one if the two input points
-// were equal and zero otherwise. If in1 or in2 are the point at infinity, res
-// and the return value are undefined.
+// Point addition. Sets res = in1 + in2 and returns zero if in1 and in2 are not
+// equal. Otherwise, returns one and res is undefined. If in1 or in2 are the
+// point at infinity, res and the return value are undefined.
  //
  //go:noescape
  func p256PointAddAsm(res, in1, in2 *P256Point) int
@@ -603,58 +607,93 @@ func p256Inverse(out, in *p256Element) {
         p256Mul(out, in, z)
  }
  
-func boothW5(in uint) (int, int) {
-       var s uint = ^((in >> 5) - 1)
-       var d uint = (1 << 6) - in - 1
+// p256OrdRsh returns the 64 least significant bits of x >> n. n must be lower
+// than 256. The value of n leaks through timing side-channels.
+func p256OrdRsh(x *p256OrdElement, n int) uint64 {
+       i := n / 64
+       n = n % 64
+       res := x[i] >> n
+       // Shift in the more significant limb, if present.
+       if i := i + 1; i < len(x) {
+               res |= x[i] << (64 - n)
+       }
+       return res
+}
+
+func boothW5(in uint64) (int, int) {
+       s := ^((in >> 5) - 1)
+       d := (1 << 6) - in - 1
         d = (d & s) | (in & (^s))
         d = (d >> 1) + (d & 1)
         return int(d), int(s & 1)
  }
  
-func boothW6(in uint) (int, int) {
-       var s uint = ^((in >> 6) - 1)
-       var d uint = (1 << 7) - in - 1
+func boothW6(in uint64) (int, int) {
+       s := ^((in >> 6) - 1)
+       d := (1 << 7) - in - 1
         d = (d & s) | (in & (^s))
         d = (d >> 1) + (d & 1)
         return int(d), int(s & 1)
  }
  
  func (p *P256Point) p256BaseMult(scalar *p256OrdElement) {
-       var t0 p256AffinePoint
+       // This function works like p256ScalarMult below, but the table is fixed and
+       // "pre-doubled" for each iteration, so instead of doubling we move to the
+       // next table at each iteration.
  
-       wvalue := (scalar[0] << 1) & 0x7f
-       sel, sign := boothW6(uint(wvalue))
-       p256SelectAffine(&t0, &p256Precomputed[0], sel)
-       p.x, p.y, p.z = t0.x, t0.y, p256One
-       p256NegCond(&p.y, sign)
+       // Start scanning the window from the most significant bits. We move by
+       // 6 bits at a time and need to finish at -1, so -1 + 6 * 42 = 251.
+       index := 251
  
-       index := uint(5)
+       sel, sign := boothW6(p256OrdRsh(scalar, index))
+       // sign is always zero because the boothW6 input here is at
+       // most five bits long, so the top bit is never set.
+       _ = sign
+
+       var t0 p256AffinePoint
+       p256SelectAffine(&t0, &p256Precomputed[(index+1)/6], sel)
+       p.x, p.y, p.z = t0.x, t0.y, p256One
         zero := sel
  
-       for i := 1; i < 43; i++ {
-               if index < 192 {
-                       wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x7f
+       for index >= 5 {
+               index -= 6
+
+               if index >= 0 {
+                       sel, sign = boothW6(p256OrdRsh(scalar, index) & 0b1111111)
                 } else {
-                       wvalue = (scalar[index/64] >> (index % 64)) & 0x7f
+                       // Booth encoding considers a virtual zero bit at index -1,
+                       // so we shift left the least significant limb.
+                       wvalue := (scalar[0] << 1) & 0b1111111
+                       sel, sign = boothW6(wvalue)
                 }
-               index += 6
-               sel, sign = boothW6(uint(wvalue))
-               p256SelectAffine(&t0, &p256Precomputed[i], sel)
+
+               table := &p256Precomputed[(index+1)/6]
+               p256SelectAffine(&t0, table, sel)
+
+               // See p256ScalarMult for the behavior of sign, sel, and zero, that here
+               // is all rolled into the p256PointAddAffineAsm function. We also know
+               // that (if sel and zero are not 0) p != t0 for a similar reason.
                 p256PointAddAffineAsm(p, p, &t0, sign, sel, zero)
                 zero |= sel
         }
  
-       // If the whole scalar was zero, set to the point at infinity.
-       p256MovCond(p, p, NewP256Point(), zero)
+       // If zero is 0, the whole scalar was zero, p is undefined,
+       // and the correct result is the infinity.
+       infinity := NewP256Point()
+       p256MovCond(p, p, infinity, zero)
  }
  
  func (p *P256Point) p256ScalarMult(scalar *p256OrdElement) {
-       // precomp is a table of precomputed points that stores powers of p
-       // from p^1 to p^16.
+       // If p is the point at infinity, p256PointAddAsm's behavior below is
+       // undefined. We'll just return the infinity at the end.
+       isInfinity := p.isInfinity()
+
+       // precomp is a table of precomputed points that stores
+       // powers of p from p^1 to p^16.
         var precomp p256Table
         var t0, t1, t2, t3 P256Point
  
-       // Prepare the table
+       // Prepare the table by double and adding.
         precomp[0] = *p // 1
  
         p256PointDoubleAsm(&t0, p)
@@ -693,52 +732,56 @@ func (p *P256Point) p256ScalarMult(scalar *p256OrdElement) {
         precomp[12] = t0 // 13
         precomp[14] = t2 // 15
  
-       // Start scanning the window from top bit
-       index := uint(254)
-       var sel, sign int
+       // Start scanning the window from the most significant bits. We move by
+       // 5 bits at a time and need to finish at -1, so -1 + 5 * 51 = 254.
+       index := 254
  
-       wvalue := (scalar[index/64] >> (index % 64)) & 0x3f
-       sel, _ = boothW5(uint(wvalue))
+       sel, sign := boothW5(p256OrdRsh(scalar, index))
+       // sign is always zero because the boothW5 input here is at
+       // most two bits long, so the top bit is never set.
+       _ = sign
  
         p256Select(p, &precomp, sel)
         zero := sel
  
-       for index > 4 {
+       for index >= 4 {
                 index -= 5
+
                 p256PointDoubleAsm(p, p)
                 p256PointDoubleAsm(p, p)
                 p256PointDoubleAsm(p, p)
                 p256PointDoubleAsm(p, p)
                 p256PointDoubleAsm(p, p)
  
-               if index < 192 {
-                       wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f
+               if index >= 0 {
+                       sel, sign = boothW5(p256OrdRsh(scalar, index) & 0b111111)
                 } else {
-                       wvalue = (scalar[index/64] >> (index % 64)) & 0x3f
+                       // Booth encoding considers a virtual zero bit at index -1,
+                       // so we shift left the least significant limb.
+                       wvalue := (scalar[0] << 1) & 0b111111
+                       sel, sign = boothW5(wvalue)
                 }
  
-               sel, sign = boothW5(uint(wvalue))
-
                 p256Select(&t0, &precomp, sel)
                 p256NegCond(&t0.y, sign)
+
+               // We don't check the return value of p256PointAddAsm because t0 is
+               // [±1-16]P, while p was just doubled five times and can't have wrapped
+               // around because scalar is less than the group order.
                 p256PointAddAsm(&t1, p, &t0)
+
+               // If sel is 0, t0 was undefined and the correct result is p unmodified.
+               // If zero is 0, all previous sel were 0 and the correct result is t0.
+               // If both are 0, the result doesn't matter as it will be thrown out.
                 p256MovCond(&t1, &t1, p, sel)
                 p256MovCond(p, &t1, &t0, zero)
                 zero |= sel
         }
  
-       p256PointDoubleAsm(p, p)
-       p256PointDoubleAsm(p, p)
-       p256PointDoubleAsm(p, p)
-       p256PointDoubleAsm(p, p)
-       p256PointDoubleAsm(p, p)
-
-       wvalue = (scalar[0] << 1) & 0x3f
-       sel, sign = boothW5(uint(wvalue))
-
-       p256Select(&t0, &precomp, sel)
-       p256NegCond(&t0.y, sign)
-       p256PointAddAsm(&t1, p, &t0)
-       p256MovCond(&t1, &t1, p, sel)
-       p256MovCond(p, &t1, &t0, zero)
+       // If zero is 0, the whole scalar was zero.
+       // If isInfinity is 1, the input point was the infinity.
+       // In both cases, p is undefined and the correct result is the infinity.
+       infinity := NewP256Point()
+       wantInfinity := zero & (isInfinity - 1)
+       p256MovCond(p, p, infinity, wantInfinity)
  }
author	Filippo Valsorda <filippo@golang.org>
	Mon, 13 Feb 2023 19:49:38 +0000 (20:49 +0100)
committer	Gopher Robot <gobot@golang.org>
	Mon, 13 Mar 2023 18:55:22 +0000 (18:55 +0000)
src/crypto/internal/nistec/nistec_test.go		patch \| blob \| history
src/crypto/internal/nistec/p256_asm.go		patch \| blob \| history