"crypto/elliptic/internal/fiat"
"crypto/subtle"
"errors"
+ "sync"
)
var {{.p}}B, _ = new({{.Element}}).SetBytes({{.B}})
var {{.p}}G, _ = New{{.P}}Point().SetBytes({{.G}})
+// {{.p}}ElementLength is the length of an element of the base or scalar field,
+// which have the same bytes length for all NIST P curves.
const {{.p}}ElementLength = {{ .ElementLen }}
// {{.P}}Point is a {{.P}} point. The zero value is NOT valid.
return q
}
+// A {{.p}}Table holds the first 15 multiples of a point at offset -1, so [1]P
+// is at table[0], [15]P is at table[14], and [0]P is implicitly the identity
+// point.
+type {{.p}}Table [15]*{{.P}}Point
+
+// Select selects the n-th multiple of the table base point into p. It works in
+// constant time by iterating over every entry of the table. n must be in [0, 15].
+func (table *{{.p}}Table) Select(p *{{.P}}Point, n uint8) {
+ if n >= 16 {
+ panic("nistec: internal error: {{.p}}Table called with out-of-bounds value")
+ }
+ p.Set(New{{.P}}Point())
+ for i := uint8(1); i < 16; i++ {
+ cond := subtle.ConstantTimeByteEq(i, n)
+ p.Select(table[i-1], p, cond)
+ }
+}
+
// ScalarMult sets p = scalar * q, and returns p.
func (p *{{.P}}Point) ScalarMult(q *{{.P}}Point, scalar []byte) (*{{.P}}Point, error) {
- // table holds the first 16 multiples of q. The explicit new{{.P}}Point calls
- // get inlined, letting the allocations live on the stack.
- var table = [16]*{{.P}}Point{
- New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point(),
- New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point(),
+ // Compute a {{.p}}Table for the base point q. The explicit New{{.P}}Point
+ // calls get inlined, letting the allocations live on the stack.
+ var table = {{.p}}Table{New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point(),
New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point(),
New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point(),
- }
- for i := 1; i < 16; i++ {
- table[i].Add(table[i-1], q)
+ New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point(), New{{.P}}Point()}
+ table[0].Set(q)
+ for i := 1; i < 15; i += 2 {
+ table[i].Double(table[i/2])
+ table[i+1].Add(table[i], q)
}
// Instead of doing the classic double-and-add chain, we do it with a
// four-bit window: we double four times, and then add [0-15]P.
t := New{{.P}}Point()
p.Set(New{{.P}}Point())
- for _, byte := range scalar {
- p.Double(p)
- p.Double(p)
- p.Double(p)
- p.Double(p)
-
- for i := uint8(0); i < 16; i++ {
- cond := subtle.ConstantTimeByteEq(byte>>4, i)
- t.Select(table[i], t, cond)
+ for i, byte := range scalar {
+ // No need to double on the first iteration, as p is the identity at
+ // this point, and [N]∞ = ∞.
+ if i != 0 {
+ p.Double(p)
+ p.Double(p)
+ p.Double(p)
+ p.Double(p)
}
+
+ windowValue := byte >> 4
+ table.Select(t, windowValue)
p.Add(p, t)
p.Double(p)
p.Double(p)
p.Double(p)
- for i := uint8(0); i < 16; i++ {
- cond := subtle.ConstantTimeByteEq(byte&0b1111, i)
- t.Select(table[i], t, cond)
- }
+ windowValue = byte & 0b1111
+ table.Select(t, windowValue)
p.Add(p, t)
}
return p, nil
}
+var {{.p}}GeneratorTable *[{{.p}}ElementLength * 2]{{.p}}Table
+var {{.p}}GeneratorTableOnce sync.Once
+
+// generatorTable returns a sequence of {{.p}}Tables. The first table contains
+// multiples of G. Each successive table is the previous table doubled four
+// times.
+func (p *{{.P}}Point) generatorTable() *[{{.p}}ElementLength * 2]{{.p}}Table {
+ {{.p}}GeneratorTableOnce.Do(func() {
+ {{.p}}GeneratorTable = new([{{.p}}ElementLength * 2]{{.p}}Table)
+ base := New{{.P}}Generator()
+ for i := 0; i < {{.p}}ElementLength*2; i++ {
+ {{.p}}GeneratorTable[i][0] = New{{.P}}Point().Set(base)
+ for j := 1; j < 15; j++ {
+ {{.p}}GeneratorTable[i][j] = New{{.P}}Point().Add({{.p}}GeneratorTable[i][j-1], base)
+ }
+ base.Double(base)
+ base.Double(base)
+ base.Double(base)
+ base.Double(base)
+ }
+ })
+ return {{.p}}GeneratorTable
+}
+
// ScalarBaseMult sets p = scalar * B, where B is the canonical generator, and
// returns p.
func (p *{{.P}}Point) ScalarBaseMult(scalar []byte) (*{{.P}}Point, error) {
- return p.ScalarMult(New{{.P}}Generator(), scalar)
+ if len(scalar) != {{.p}}ElementLength {
+ return nil, errors.New("invalid scalar length")
+ }
+ tables := p.generatorTable()
+
+ // This is also a scalar multiplication with a four-bit window like in
+ // ScalarMult, but in this case the doublings are precomputed. The value
+ // [windowValue]G added at iteration k would normally get doubled
+ // (totIterations-k)×4 times, but with a larger precomputation we can
+ // instead add [2^((totIterations-k)×4)][windowValue]G and avoid the
+ // doublings between iterations.
+ t := New{{.P}}Point()
+ p.Set(New{{.P}}Point())
+ tableIndex := len(tables) - 1
+ for _, byte := range scalar {
+ windowValue := byte >> 4
+ tables[tableIndex].Select(t, windowValue)
+ p.Add(p, t)
+ tableIndex--
+
+ windowValue = byte & 0b1111
+ tables[tableIndex].Select(t, windowValue)
+ p.Add(p, t)
+ tableIndex--
+ }
+
+ return p, nil
}
`
"crypto/elliptic/internal/fiat"
"crypto/subtle"
"errors"
+ "sync"
)
var p224B, _ = new(fiat.P224Element).SetBytes([]byte{0xb4, 0x5, 0xa, 0x85, 0xc, 0x4, 0xb3, 0xab, 0xf5, 0x41, 0x32, 0x56, 0x50, 0x44, 0xb0, 0xb7, 0xd7, 0xbf, 0xd8, 0xba, 0x27, 0xb, 0x39, 0x43, 0x23, 0x55, 0xff, 0xb4})
var p224G, _ = NewP224Point().SetBytes([]byte{0x4, 0xb7, 0xe, 0xc, 0xbd, 0x6b, 0xb4, 0xbf, 0x7f, 0x32, 0x13, 0x90, 0xb9, 0x4a, 0x3, 0xc1, 0xd3, 0x56, 0xc2, 0x11, 0x22, 0x34, 0x32, 0x80, 0xd6, 0x11, 0x5c, 0x1d, 0x21, 0xbd, 0x37, 0x63, 0x88, 0xb5, 0xf7, 0x23, 0xfb, 0x4c, 0x22, 0xdf, 0xe6, 0xcd, 0x43, 0x75, 0xa0, 0x5a, 0x7, 0x47, 0x64, 0x44, 0xd5, 0x81, 0x99, 0x85, 0x0, 0x7e, 0x34})
+// p224ElementLength is the length of an element of the base or scalar field,
+// which have the same bytes length for all NIST P curves.
const p224ElementLength = 28
// P224Point is a P224 point. The zero value is NOT valid.
return q
}
+// A p224Table holds the first 15 multiples of a point at offset -1, so [1]P
+// is at table[0], [15]P is at table[14], and [0]P is implicitly the identity
+// point.
+type p224Table [15]*P224Point
+
+// Select selects the n-th multiple of the table base point into p. It works in
+// constant time by iterating over every entry of the table. n must be in [0, 15].
+func (table *p224Table) Select(p *P224Point, n uint8) {
+ if n >= 16 {
+ panic("nistec: internal error: p224Table called with out-of-bounds value")
+ }
+ p.Set(NewP224Point())
+ for i := uint8(1); i < 16; i++ {
+ cond := subtle.ConstantTimeByteEq(i, n)
+ p.Select(table[i-1], p, cond)
+ }
+}
+
// ScalarMult sets p = scalar * q, and returns p.
func (p *P224Point) ScalarMult(q *P224Point, scalar []byte) (*P224Point, error) {
- // table holds the first 16 multiples of q. The explicit newP224Point calls
- // get inlined, letting the allocations live on the stack.
- var table = [16]*P224Point{
- NewP224Point(), NewP224Point(), NewP224Point(), NewP224Point(),
- NewP224Point(), NewP224Point(), NewP224Point(), NewP224Point(),
+ // Compute a p224Table for the base point q. The explicit NewP224Point
+ // calls get inlined, letting the allocations live on the stack.
+ var table = p224Table{NewP224Point(), NewP224Point(), NewP224Point(),
NewP224Point(), NewP224Point(), NewP224Point(), NewP224Point(),
NewP224Point(), NewP224Point(), NewP224Point(), NewP224Point(),
- }
- for i := 1; i < 16; i++ {
- table[i].Add(table[i-1], q)
+ NewP224Point(), NewP224Point(), NewP224Point(), NewP224Point()}
+ table[0].Set(q)
+ for i := 1; i < 15; i += 2 {
+ table[i].Double(table[i/2])
+ table[i+1].Add(table[i], q)
}
// Instead of doing the classic double-and-add chain, we do it with a
// four-bit window: we double four times, and then add [0-15]P.
t := NewP224Point()
p.Set(NewP224Point())
- for _, byte := range scalar {
- p.Double(p)
- p.Double(p)
- p.Double(p)
- p.Double(p)
-
- for i := uint8(0); i < 16; i++ {
- cond := subtle.ConstantTimeByteEq(byte>>4, i)
- t.Select(table[i], t, cond)
+ for i, byte := range scalar {
+ // No need to double on the first iteration, as p is the identity at
+ // this point, and [N]∞ = ∞.
+ if i != 0 {
+ p.Double(p)
+ p.Double(p)
+ p.Double(p)
+ p.Double(p)
}
+
+ windowValue := byte >> 4
+ table.Select(t, windowValue)
p.Add(p, t)
p.Double(p)
p.Double(p)
p.Double(p)
- for i := uint8(0); i < 16; i++ {
- cond := subtle.ConstantTimeByteEq(byte&0b1111, i)
- t.Select(table[i], t, cond)
- }
+ windowValue = byte & 0b1111
+ table.Select(t, windowValue)
p.Add(p, t)
}
return p, nil
}
+var p224GeneratorTable *[p224ElementLength * 2]p224Table
+var p224GeneratorTableOnce sync.Once
+
+// generatorTable returns a sequence of p224Tables. The first table contains
+// multiples of G. Each successive table is the previous table doubled four
+// times.
+func (p *P224Point) generatorTable() *[p224ElementLength * 2]p224Table {
+ p224GeneratorTableOnce.Do(func() {
+ p224GeneratorTable = new([p224ElementLength * 2]p224Table)
+ base := NewP224Generator()
+ for i := 0; i < p224ElementLength*2; i++ {
+ p224GeneratorTable[i][0] = NewP224Point().Set(base)
+ for j := 1; j < 15; j++ {
+ p224GeneratorTable[i][j] = NewP224Point().Add(p224GeneratorTable[i][j-1], base)
+ }
+ base.Double(base)
+ base.Double(base)
+ base.Double(base)
+ base.Double(base)
+ }
+ })
+ return p224GeneratorTable
+}
+
// ScalarBaseMult sets p = scalar * B, where B is the canonical generator, and
// returns p.
func (p *P224Point) ScalarBaseMult(scalar []byte) (*P224Point, error) {
- return p.ScalarMult(NewP224Generator(), scalar)
+ if len(scalar) != p224ElementLength {
+ return nil, errors.New("invalid scalar length")
+ }
+ tables := p.generatorTable()
+
+ // This is also a scalar multiplication with a four-bit window like in
+ // ScalarMult, but in this case the doublings are precomputed. The value
+ // [windowValue]G added at iteration k would normally get doubled
+ // (totIterations-k)×4 times, but with a larger precomputation we can
+ // instead add [2^((totIterations-k)×4)][windowValue]G and avoid the
+ // doublings between iterations.
+ t := NewP224Point()
+ p.Set(NewP224Point())
+ tableIndex := len(tables) - 1
+ for _, byte := range scalar {
+ windowValue := byte >> 4
+ tables[tableIndex].Select(t, windowValue)
+ p.Add(p, t)
+ tableIndex--
+
+ windowValue = byte & 0b1111
+ tables[tableIndex].Select(t, windowValue)
+ p.Add(p, t)
+ tableIndex--
+ }
+
+ return p, nil
}
"crypto/elliptic/internal/fiat"
"crypto/subtle"
"errors"
+ "sync"
)
var p256B, _ = new(fiat.P256Element).SetBytes([]byte{0x5a, 0xc6, 0x35, 0xd8, 0xaa, 0x3a, 0x93, 0xe7, 0xb3, 0xeb, 0xbd, 0x55, 0x76, 0x98, 0x86, 0xbc, 0x65, 0x1d, 0x6, 0xb0, 0xcc, 0x53, 0xb0, 0xf6, 0x3b, 0xce, 0x3c, 0x3e, 0x27, 0xd2, 0x60, 0x4b})
var p256G, _ = NewP256Point().SetBytes([]byte{0x4, 0x6b, 0x17, 0xd1, 0xf2, 0xe1, 0x2c, 0x42, 0x47, 0xf8, 0xbc, 0xe6, 0xe5, 0x63, 0xa4, 0x40, 0xf2, 0x77, 0x3, 0x7d, 0x81, 0x2d, 0xeb, 0x33, 0xa0, 0xf4, 0xa1, 0x39, 0x45, 0xd8, 0x98, 0xc2, 0x96, 0x4f, 0xe3, 0x42, 0xe2, 0xfe, 0x1a, 0x7f, 0x9b, 0x8e, 0xe7, 0xeb, 0x4a, 0x7c, 0xf, 0x9e, 0x16, 0x2b, 0xce, 0x33, 0x57, 0x6b, 0x31, 0x5e, 0xce, 0xcb, 0xb6, 0x40, 0x68, 0x37, 0xbf, 0x51, 0xf5})
+// p256ElementLength is the length of an element of the base or scalar field,
+// which have the same bytes length for all NIST P curves.
const p256ElementLength = 32
// P256Point is a P256 point. The zero value is NOT valid.
return q
}
+// A p256Table holds the first 15 multiples of a point at offset -1, so [1]P
+// is at table[0], [15]P is at table[14], and [0]P is implicitly the identity
+// point.
+type p256Table [15]*P256Point
+
+// Select selects the n-th multiple of the table base point into p. It works in
+// constant time by iterating over every entry of the table. n must be in [0, 15].
+func (table *p256Table) Select(p *P256Point, n uint8) {
+ if n >= 16 {
+ panic("nistec: internal error: p256Table called with out-of-bounds value")
+ }
+ p.Set(NewP256Point())
+ for i := uint8(1); i < 16; i++ {
+ cond := subtle.ConstantTimeByteEq(i, n)
+ p.Select(table[i-1], p, cond)
+ }
+}
+
// ScalarMult sets p = scalar * q, and returns p.
func (p *P256Point) ScalarMult(q *P256Point, scalar []byte) (*P256Point, error) {
- // table holds the first 16 multiples of q. The explicit newP256Point calls
- // get inlined, letting the allocations live on the stack.
- var table = [16]*P256Point{
- NewP256Point(), NewP256Point(), NewP256Point(), NewP256Point(),
- NewP256Point(), NewP256Point(), NewP256Point(), NewP256Point(),
+ // Compute a p256Table for the base point q. The explicit NewP256Point
+ // calls get inlined, letting the allocations live on the stack.
+ var table = p256Table{NewP256Point(), NewP256Point(), NewP256Point(),
NewP256Point(), NewP256Point(), NewP256Point(), NewP256Point(),
NewP256Point(), NewP256Point(), NewP256Point(), NewP256Point(),
- }
- for i := 1; i < 16; i++ {
- table[i].Add(table[i-1], q)
+ NewP256Point(), NewP256Point(), NewP256Point(), NewP256Point()}
+ table[0].Set(q)
+ for i := 1; i < 15; i += 2 {
+ table[i].Double(table[i/2])
+ table[i+1].Add(table[i], q)
}
// Instead of doing the classic double-and-add chain, we do it with a
// four-bit window: we double four times, and then add [0-15]P.
t := NewP256Point()
p.Set(NewP256Point())
- for _, byte := range scalar {
- p.Double(p)
- p.Double(p)
- p.Double(p)
- p.Double(p)
-
- for i := uint8(0); i < 16; i++ {
- cond := subtle.ConstantTimeByteEq(byte>>4, i)
- t.Select(table[i], t, cond)
+ for i, byte := range scalar {
+ // No need to double on the first iteration, as p is the identity at
+ // this point, and [N]∞ = ∞.
+ if i != 0 {
+ p.Double(p)
+ p.Double(p)
+ p.Double(p)
+ p.Double(p)
}
+
+ windowValue := byte >> 4
+ table.Select(t, windowValue)
p.Add(p, t)
p.Double(p)
p.Double(p)
p.Double(p)
- for i := uint8(0); i < 16; i++ {
- cond := subtle.ConstantTimeByteEq(byte&0b1111, i)
- t.Select(table[i], t, cond)
- }
+ windowValue = byte & 0b1111
+ table.Select(t, windowValue)
p.Add(p, t)
}
return p, nil
}
+var p256GeneratorTable *[p256ElementLength * 2]p256Table
+var p256GeneratorTableOnce sync.Once
+
+// generatorTable returns a sequence of p256Tables. The first table contains
+// multiples of G. Each successive table is the previous table doubled four
+// times.
+func (p *P256Point) generatorTable() *[p256ElementLength * 2]p256Table {
+ p256GeneratorTableOnce.Do(func() {
+ p256GeneratorTable = new([p256ElementLength * 2]p256Table)
+ base := NewP256Generator()
+ for i := 0; i < p256ElementLength*2; i++ {
+ p256GeneratorTable[i][0] = NewP256Point().Set(base)
+ for j := 1; j < 15; j++ {
+ p256GeneratorTable[i][j] = NewP256Point().Add(p256GeneratorTable[i][j-1], base)
+ }
+ base.Double(base)
+ base.Double(base)
+ base.Double(base)
+ base.Double(base)
+ }
+ })
+ return p256GeneratorTable
+}
+
// ScalarBaseMult sets p = scalar * B, where B is the canonical generator, and
// returns p.
func (p *P256Point) ScalarBaseMult(scalar []byte) (*P256Point, error) {
- return p.ScalarMult(NewP256Generator(), scalar)
+ if len(scalar) != p256ElementLength {
+ return nil, errors.New("invalid scalar length")
+ }
+ tables := p.generatorTable()
+
+ // This is also a scalar multiplication with a four-bit window like in
+ // ScalarMult, but in this case the doublings are precomputed. The value
+ // [windowValue]G added at iteration k would normally get doubled
+ // (totIterations-k)×4 times, but with a larger precomputation we can
+ // instead add [2^((totIterations-k)×4)][windowValue]G and avoid the
+ // doublings between iterations.
+ t := NewP256Point()
+ p.Set(NewP256Point())
+ tableIndex := len(tables) - 1
+ for _, byte := range scalar {
+ windowValue := byte >> 4
+ tables[tableIndex].Select(t, windowValue)
+ p.Add(p, t)
+ tableIndex--
+
+ windowValue = byte & 0b1111
+ tables[tableIndex].Select(t, windowValue)
+ p.Add(p, t)
+ tableIndex--
+ }
+
+ return p, nil
}
"crypto/elliptic/internal/fiat"
"crypto/subtle"
"errors"
+ "sync"
)
var p384B, _ = new(fiat.P384Element).SetBytes([]byte{0xb3, 0x31, 0x2f, 0xa7, 0xe2, 0x3e, 0xe7, 0xe4, 0x98, 0x8e, 0x5, 0x6b, 0xe3, 0xf8, 0x2d, 0x19, 0x18, 0x1d, 0x9c, 0x6e, 0xfe, 0x81, 0x41, 0x12, 0x3, 0x14, 0x8, 0x8f, 0x50, 0x13, 0x87, 0x5a, 0xc6, 0x56, 0x39, 0x8d, 0x8a, 0x2e, 0xd1, 0x9d, 0x2a, 0x85, 0xc8, 0xed, 0xd3, 0xec, 0x2a, 0xef})
var p384G, _ = NewP384Point().SetBytes([]byte{0x4, 0xaa, 0x87, 0xca, 0x22, 0xbe, 0x8b, 0x5, 0x37, 0x8e, 0xb1, 0xc7, 0x1e, 0xf3, 0x20, 0xad, 0x74, 0x6e, 0x1d, 0x3b, 0x62, 0x8b, 0xa7, 0x9b, 0x98, 0x59, 0xf7, 0x41, 0xe0, 0x82, 0x54, 0x2a, 0x38, 0x55, 0x2, 0xf2, 0x5d, 0xbf, 0x55, 0x29, 0x6c, 0x3a, 0x54, 0x5e, 0x38, 0x72, 0x76, 0xa, 0xb7, 0x36, 0x17, 0xde, 0x4a, 0x96, 0x26, 0x2c, 0x6f, 0x5d, 0x9e, 0x98, 0xbf, 0x92, 0x92, 0xdc, 0x29, 0xf8, 0xf4, 0x1d, 0xbd, 0x28, 0x9a, 0x14, 0x7c, 0xe9, 0xda, 0x31, 0x13, 0xb5, 0xf0, 0xb8, 0xc0, 0xa, 0x60, 0xb1, 0xce, 0x1d, 0x7e, 0x81, 0x9d, 0x7a, 0x43, 0x1d, 0x7c, 0x90, 0xea, 0xe, 0x5f})
+// p384ElementLength is the length of an element of the base or scalar field,
+// which have the same bytes length for all NIST P curves.
const p384ElementLength = 48
// P384Point is a P384 point. The zero value is NOT valid.
return q
}
+// A p384Table holds the first 15 multiples of a point at offset -1, so [1]P
+// is at table[0], [15]P is at table[14], and [0]P is implicitly the identity
+// point.
+type p384Table [15]*P384Point
+
+// Select selects the n-th multiple of the table base point into p. It works in
+// constant time by iterating over every entry of the table. n must be in [0, 15].
+func (table *p384Table) Select(p *P384Point, n uint8) {
+ if n >= 16 {
+ panic("nistec: internal error: p384Table called with out-of-bounds value")
+ }
+ p.Set(NewP384Point())
+ for i := uint8(1); i < 16; i++ {
+ cond := subtle.ConstantTimeByteEq(i, n)
+ p.Select(table[i-1], p, cond)
+ }
+}
+
// ScalarMult sets p = scalar * q, and returns p.
func (p *P384Point) ScalarMult(q *P384Point, scalar []byte) (*P384Point, error) {
- // table holds the first 16 multiples of q. The explicit newP384Point calls
- // get inlined, letting the allocations live on the stack.
- var table = [16]*P384Point{
- NewP384Point(), NewP384Point(), NewP384Point(), NewP384Point(),
- NewP384Point(), NewP384Point(), NewP384Point(), NewP384Point(),
+ // Compute a p384Table for the base point q. The explicit NewP384Point
+ // calls get inlined, letting the allocations live on the stack.
+ var table = p384Table{NewP384Point(), NewP384Point(), NewP384Point(),
NewP384Point(), NewP384Point(), NewP384Point(), NewP384Point(),
NewP384Point(), NewP384Point(), NewP384Point(), NewP384Point(),
- }
- for i := 1; i < 16; i++ {
- table[i].Add(table[i-1], q)
+ NewP384Point(), NewP384Point(), NewP384Point(), NewP384Point()}
+ table[0].Set(q)
+ for i := 1; i < 15; i += 2 {
+ table[i].Double(table[i/2])
+ table[i+1].Add(table[i], q)
}
// Instead of doing the classic double-and-add chain, we do it with a
// four-bit window: we double four times, and then add [0-15]P.
t := NewP384Point()
p.Set(NewP384Point())
- for _, byte := range scalar {
- p.Double(p)
- p.Double(p)
- p.Double(p)
- p.Double(p)
-
- for i := uint8(0); i < 16; i++ {
- cond := subtle.ConstantTimeByteEq(byte>>4, i)
- t.Select(table[i], t, cond)
+ for i, byte := range scalar {
+ // No need to double on the first iteration, as p is the identity at
+ // this point, and [N]∞ = ∞.
+ if i != 0 {
+ p.Double(p)
+ p.Double(p)
+ p.Double(p)
+ p.Double(p)
}
+
+ windowValue := byte >> 4
+ table.Select(t, windowValue)
p.Add(p, t)
p.Double(p)
p.Double(p)
p.Double(p)
- for i := uint8(0); i < 16; i++ {
- cond := subtle.ConstantTimeByteEq(byte&0b1111, i)
- t.Select(table[i], t, cond)
- }
+ windowValue = byte & 0b1111
+ table.Select(t, windowValue)
p.Add(p, t)
}
return p, nil
}
+var p384GeneratorTable *[p384ElementLength * 2]p384Table
+var p384GeneratorTableOnce sync.Once
+
+// generatorTable returns a sequence of p384Tables. The first table contains
+// multiples of G. Each successive table is the previous table doubled four
+// times.
+func (p *P384Point) generatorTable() *[p384ElementLength * 2]p384Table {
+ p384GeneratorTableOnce.Do(func() {
+ p384GeneratorTable = new([p384ElementLength * 2]p384Table)
+ base := NewP384Generator()
+ for i := 0; i < p384ElementLength*2; i++ {
+ p384GeneratorTable[i][0] = NewP384Point().Set(base)
+ for j := 1; j < 15; j++ {
+ p384GeneratorTable[i][j] = NewP384Point().Add(p384GeneratorTable[i][j-1], base)
+ }
+ base.Double(base)
+ base.Double(base)
+ base.Double(base)
+ base.Double(base)
+ }
+ })
+ return p384GeneratorTable
+}
+
// ScalarBaseMult sets p = scalar * B, where B is the canonical generator, and
// returns p.
func (p *P384Point) ScalarBaseMult(scalar []byte) (*P384Point, error) {
- return p.ScalarMult(NewP384Generator(), scalar)
+ if len(scalar) != p384ElementLength {
+ return nil, errors.New("invalid scalar length")
+ }
+ tables := p.generatorTable()
+
+ // This is also a scalar multiplication with a four-bit window like in
+ // ScalarMult, but in this case the doublings are precomputed. The value
+ // [windowValue]G added at iteration k would normally get doubled
+ // (totIterations-k)×4 times, but with a larger precomputation we can
+ // instead add [2^((totIterations-k)×4)][windowValue]G and avoid the
+ // doublings between iterations.
+ t := NewP384Point()
+ p.Set(NewP384Point())
+ tableIndex := len(tables) - 1
+ for _, byte := range scalar {
+ windowValue := byte >> 4
+ tables[tableIndex].Select(t, windowValue)
+ p.Add(p, t)
+ tableIndex--
+
+ windowValue = byte & 0b1111
+ tables[tableIndex].Select(t, windowValue)
+ p.Add(p, t)
+ tableIndex--
+ }
+
+ return p, nil
}
"crypto/elliptic/internal/fiat"
"crypto/subtle"
"errors"
+ "sync"
)
var p521B, _ = new(fiat.P521Element).SetBytes([]byte{0x0, 0x51, 0x95, 0x3e, 0xb9, 0x61, 0x8e, 0x1c, 0x9a, 0x1f, 0x92, 0x9a, 0x21, 0xa0, 0xb6, 0x85, 0x40, 0xee, 0xa2, 0xda, 0x72, 0x5b, 0x99, 0xb3, 0x15, 0xf3, 0xb8, 0xb4, 0x89, 0x91, 0x8e, 0xf1, 0x9, 0xe1, 0x56, 0x19, 0x39, 0x51, 0xec, 0x7e, 0x93, 0x7b, 0x16, 0x52, 0xc0, 0xbd, 0x3b, 0xb1, 0xbf, 0x7, 0x35, 0x73, 0xdf, 0x88, 0x3d, 0x2c, 0x34, 0xf1, 0xef, 0x45, 0x1f, 0xd4, 0x6b, 0x50, 0x3f, 0x0})
var p521G, _ = NewP521Point().SetBytes([]byte{0x4, 0x0, 0xc6, 0x85, 0x8e, 0x6, 0xb7, 0x4, 0x4, 0xe9, 0xcd, 0x9e, 0x3e, 0xcb, 0x66, 0x23, 0x95, 0xb4, 0x42, 0x9c, 0x64, 0x81, 0x39, 0x5, 0x3f, 0xb5, 0x21, 0xf8, 0x28, 0xaf, 0x60, 0x6b, 0x4d, 0x3d, 0xba, 0xa1, 0x4b, 0x5e, 0x77, 0xef, 0xe7, 0x59, 0x28, 0xfe, 0x1d, 0xc1, 0x27, 0xa2, 0xff, 0xa8, 0xde, 0x33, 0x48, 0xb3, 0xc1, 0x85, 0x6a, 0x42, 0x9b, 0xf9, 0x7e, 0x7e, 0x31, 0xc2, 0xe5, 0xbd, 0x66, 0x1, 0x18, 0x39, 0x29, 0x6a, 0x78, 0x9a, 0x3b, 0xc0, 0x4, 0x5c, 0x8a, 0x5f, 0xb4, 0x2c, 0x7d, 0x1b, 0xd9, 0x98, 0xf5, 0x44, 0x49, 0x57, 0x9b, 0x44, 0x68, 0x17, 0xaf, 0xbd, 0x17, 0x27, 0x3e, 0x66, 0x2c, 0x97, 0xee, 0x72, 0x99, 0x5e, 0xf4, 0x26, 0x40, 0xc5, 0x50, 0xb9, 0x1, 0x3f, 0xad, 0x7, 0x61, 0x35, 0x3c, 0x70, 0x86, 0xa2, 0x72, 0xc2, 0x40, 0x88, 0xbe, 0x94, 0x76, 0x9f, 0xd1, 0x66, 0x50})
+// p521ElementLength is the length of an element of the base or scalar field,
+// which have the same bytes length for all NIST P curves.
const p521ElementLength = 66
// P521Point is a P521 point. The zero value is NOT valid.
return q
}
+// A p521Table holds the first 15 multiples of a point at offset -1, so [1]P
+// is at table[0], [15]P is at table[14], and [0]P is implicitly the identity
+// point.
+type p521Table [15]*P521Point
+
+// Select selects the n-th multiple of the table base point into p. It works in
+// constant time by iterating over every entry of the table. n must be in [0, 15].
+func (table *p521Table) Select(p *P521Point, n uint8) {
+ if n >= 16 {
+ panic("nistec: internal error: p521Table called with out-of-bounds value")
+ }
+ p.Set(NewP521Point())
+ for i := uint8(1); i < 16; i++ {
+ cond := subtle.ConstantTimeByteEq(i, n)
+ p.Select(table[i-1], p, cond)
+ }
+}
+
// ScalarMult sets p = scalar * q, and returns p.
func (p *P521Point) ScalarMult(q *P521Point, scalar []byte) (*P521Point, error) {
- // table holds the first 16 multiples of q. The explicit newP521Point calls
- // get inlined, letting the allocations live on the stack.
- var table = [16]*P521Point{
- NewP521Point(), NewP521Point(), NewP521Point(), NewP521Point(),
- NewP521Point(), NewP521Point(), NewP521Point(), NewP521Point(),
+ // Compute a p521Table for the base point q. The explicit NewP521Point
+ // calls get inlined, letting the allocations live on the stack.
+ var table = p521Table{NewP521Point(), NewP521Point(), NewP521Point(),
NewP521Point(), NewP521Point(), NewP521Point(), NewP521Point(),
NewP521Point(), NewP521Point(), NewP521Point(), NewP521Point(),
- }
- for i := 1; i < 16; i++ {
- table[i].Add(table[i-1], q)
+ NewP521Point(), NewP521Point(), NewP521Point(), NewP521Point()}
+ table[0].Set(q)
+ for i := 1; i < 15; i += 2 {
+ table[i].Double(table[i/2])
+ table[i+1].Add(table[i], q)
}
// Instead of doing the classic double-and-add chain, we do it with a
// four-bit window: we double four times, and then add [0-15]P.
t := NewP521Point()
p.Set(NewP521Point())
- for _, byte := range scalar {
- p.Double(p)
- p.Double(p)
- p.Double(p)
- p.Double(p)
-
- for i := uint8(0); i < 16; i++ {
- cond := subtle.ConstantTimeByteEq(byte>>4, i)
- t.Select(table[i], t, cond)
+ for i, byte := range scalar {
+ // No need to double on the first iteration, as p is the identity at
+ // this point, and [N]∞ = ∞.
+ if i != 0 {
+ p.Double(p)
+ p.Double(p)
+ p.Double(p)
+ p.Double(p)
}
+
+ windowValue := byte >> 4
+ table.Select(t, windowValue)
p.Add(p, t)
p.Double(p)
p.Double(p)
p.Double(p)
- for i := uint8(0); i < 16; i++ {
- cond := subtle.ConstantTimeByteEq(byte&0b1111, i)
- t.Select(table[i], t, cond)
- }
+ windowValue = byte & 0b1111
+ table.Select(t, windowValue)
p.Add(p, t)
}
return p, nil
}
+var p521GeneratorTable *[p521ElementLength * 2]p521Table
+var p521GeneratorTableOnce sync.Once
+
+// generatorTable returns a sequence of p521Tables. The first table contains
+// multiples of G. Each successive table is the previous table doubled four
+// times.
+func (p *P521Point) generatorTable() *[p521ElementLength * 2]p521Table {
+ p521GeneratorTableOnce.Do(func() {
+ p521GeneratorTable = new([p521ElementLength * 2]p521Table)
+ base := NewP521Generator()
+ for i := 0; i < p521ElementLength*2; i++ {
+ p521GeneratorTable[i][0] = NewP521Point().Set(base)
+ for j := 1; j < 15; j++ {
+ p521GeneratorTable[i][j] = NewP521Point().Add(p521GeneratorTable[i][j-1], base)
+ }
+ base.Double(base)
+ base.Double(base)
+ base.Double(base)
+ base.Double(base)
+ }
+ })
+ return p521GeneratorTable
+}
+
// ScalarBaseMult sets p = scalar * B, where B is the canonical generator, and
// returns p.
func (p *P521Point) ScalarBaseMult(scalar []byte) (*P521Point, error) {
- return p.ScalarMult(NewP521Generator(), scalar)
+ if len(scalar) != p521ElementLength {
+ return nil, errors.New("invalid scalar length")
+ }
+ tables := p.generatorTable()
+
+ // This is also a scalar multiplication with a four-bit window like in
+ // ScalarMult, but in this case the doublings are precomputed. The value
+ // [windowValue]G added at iteration k would normally get doubled
+ // (totIterations-k)×4 times, but with a larger precomputation we can
+ // instead add [2^((totIterations-k)×4)][windowValue]G and avoid the
+ // doublings between iterations.
+ t := NewP521Point()
+ p.Set(NewP521Point())
+ tableIndex := len(tables) - 1
+ for _, byte := range scalar {
+ windowValue := byte >> 4
+ tables[tableIndex].Select(t, windowValue)
+ p.Add(p, t)
+ tableIndex--
+
+ windowValue = byte & 0b1111
+ tables[tableIndex].Select(t, windowValue)
+ p.Add(p, t)
+ tableIndex--
+ }
+
+ return p, nil
}