]> Cypherpunks repositories - gostls13.git/commitdiff
crypto/rc4: add simple amd64 asm implementation.
authorAdam Langley <agl@golang.org>
Wed, 30 Jan 2013 16:01:19 +0000 (11:01 -0500)
committerAdam Langley <agl@golang.org>
Wed, 30 Jan 2013 16:01:19 +0000 (11:01 -0500)
(Although it's still half the speed of OpenSSL.)

benchmark           old ns/op    new ns/op    delta
BenchmarkRC4_128         1409          398  -71.75%
BenchmarkRC4_1K         10920         2898  -73.46%
BenchmarkRC4_8K        131323        23083  -82.42%

benchmark            old MB/s     new MB/s  speedup
BenchmarkRC4_128        90.83       321.43    3.54x
BenchmarkRC4_1K         93.77       353.28    3.77x
BenchmarkRC4_8K         61.65       350.73    5.69x

R=rsc, remyoudompheng
CC=golang-dev, jgrahamc
https://golang.org/cl/7234055

src/pkg/crypto/rc4/rc4.go
src/pkg/crypto/rc4/rc4_amd64.s [new file with mode: 0644]
src/pkg/crypto/rc4/rc4_asm.go [new file with mode: 0644]
src/pkg/crypto/rc4/rc4_ref.go [new file with mode: 0644]
src/pkg/crypto/rc4/rc4_test.go

index 1bb278f74a454db37327eaf89b9a0f175d48a3d6..e0c33fa4b587b92615ebf581353fe7135938eb09 100644 (file)
@@ -42,17 +42,6 @@ func NewCipher(key []byte) (*Cipher, error) {
        return &c, nil
 }
 
-// XORKeyStream sets dst to the result of XORing src with the key stream.
-// Dst and src may be the same slice but otherwise should not overlap.
-func (c *Cipher) XORKeyStream(dst, src []byte) {
-       for i := range src {
-               c.i += 1
-               c.j += c.s[c.i]
-               c.s[c.i], c.s[c.j] = c.s[c.j], c.s[c.i]
-               dst[i] = src[i] ^ c.s[c.s[c.i]+c.s[c.j]]
-       }
-}
-
 // Reset zeros the key data so that it will no longer appear in the
 // process's memory.
 func (c *Cipher) Reset() {
diff --git a/src/pkg/crypto/rc4/rc4_amd64.s b/src/pkg/crypto/rc4/rc4_amd64.s
new file mode 100644 (file)
index 0000000..ffe9ada
--- /dev/null
@@ -0,0 +1,53 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// func xorKeyStream(dst, src *byte, n int, state *[256]byte, i, j *uint8)
+TEXT ·xorKeyStream(SB),7,$0
+       MOVQ dst+0(FP), DI
+       MOVQ src+8(FP), SI
+       MOVQ n+16(FP), CX
+       MOVQ state+24(FP), R8
+
+       MOVQ xPtr+32(FP), AX
+       MOVBQZX (AX), AX
+       MOVQ yPtr+40(FP), BX
+       MOVBQZX (BX), BX
+
+loop:
+       CMPQ CX, $0
+       JE done
+
+       // c.i += 1
+       INCB AX
+
+       // c.j += c.s[c.i]
+       MOVB (R8)(AX*1), R9
+       ADDB R9, BX
+
+       MOVBQZX (R8)(BX*1), R10
+
+       MOVB R10, (R8)(AX*1)
+       MOVB R9, (R8)(BX*1)
+
+       // R11 = c.s[c.i]+c.s[c.j]
+       MOVQ R10, R11
+       ADDB R9, R11
+
+       MOVB (R8)(R11*1), R11
+       MOVB (SI), R12
+       XORB R11, R12
+       MOVB R12, (DI)
+
+       INCQ SI
+       INCQ DI
+       DECQ CX
+
+       JMP loop
+done:
+       MOVQ xPtr+32(FP), R8
+       MOVB AX, (R8)
+       MOVQ yPtr+40(FP), R8
+       MOVB BX, (R8)
+
+       RET
diff --git a/src/pkg/crypto/rc4/rc4_asm.go b/src/pkg/crypto/rc4/rc4_asm.go
new file mode 100644 (file)
index 0000000..6bb2439
--- /dev/null
@@ -0,0 +1,18 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build amd64
+
+package rc4
+
+func xorKeyStream(dst, src *byte, n int, state *[256]byte, i, j *uint8)
+
+// XORKeyStream sets dst to the result of XORing src with the key stream.
+// Dst and src may be the same slice but otherwise should not overlap.
+func (c *Cipher) XORKeyStream(dst, src []byte) {
+       if len(src) == 0 {
+               return
+       }
+       xorKeyStream(&dst[0], &src[0], len(src), &c.s, &c.i, &c.j)
+}
diff --git a/src/pkg/crypto/rc4/rc4_ref.go b/src/pkg/crypto/rc4/rc4_ref.go
new file mode 100644 (file)
index 0000000..4d47299
--- /dev/null
@@ -0,0 +1,20 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !amd64
+
+package rc4
+
+// XORKeyStream sets dst to the result of XORing src with the key stream.
+// Dst and src may be the same slice but otherwise should not overlap.
+func (c *Cipher) XORKeyStream(dst, src []byte) {
+       i, j := c.i, c.j
+       for k, v := range src {
+               i += 1
+               j += c.s[i]
+               c.s[i], c.s[j] = c.s[j], c.s[i]
+               dst[k] = v ^ c.s[c.s[i]+c.s[j]]
+       }
+       c.i, c.j = i, j
+}
index 6265d9408f40bd1daf5a63b3539f1652f301901b..9e12789f7f38ec7e033acd85b7163e895f8d10b9 100644 (file)
@@ -37,6 +37,39 @@ var golden = []rc4Test{
                []byte{0x57, 0x69, 0x6b, 0x69},
                []byte{0x60, 0x44, 0xdb, 0x6d, 0x41, 0xb7},
        },
+       {
+               []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+               []byte{
+                       0xde, 0x18, 0x89, 0x41, 0xa3, 0x37, 0x5d, 0x3a,
+                       0x8a, 0x06, 0x1e, 0x67, 0x57, 0x6e, 0x92, 0x6d,
+                       0xc7, 0x1a, 0x7f, 0xa3, 0xf0, 0xcc, 0xeb, 0x97,
+                       0x45, 0x2b, 0x4d, 0x32, 0x27, 0x96, 0x5f, 0x9e,
+                       0xa8, 0xcc, 0x75, 0x07, 0x6d, 0x9f, 0xb9, 0xc5,
+                       0x41, 0x7a, 0xa5, 0xcb, 0x30, 0xfc, 0x22, 0x19,
+                       0x8b, 0x34, 0x98, 0x2d, 0xbb, 0x62, 0x9e, 0xc0,
+                       0x4b, 0x4f, 0x8b, 0x05, 0xa0, 0x71, 0x08, 0x50,
+                       0x92, 0xa0, 0xc3, 0x58, 0x4a, 0x48, 0xe4, 0xa3,
+                       0x0a, 0x39, 0x7b, 0x8a, 0xcd, 0x1d, 0x00, 0x9e,
+                       0xc8, 0x7d, 0x68, 0x11, 0xf2, 0x2c, 0xf4, 0x9c,
+                       0xa3, 0xe5, 0x93, 0x54, 0xb9, 0x45, 0x15, 0x35,
+                       0xa2, 0x18, 0x7a, 0x86, 0x42, 0x6c, 0xca, 0x7d,
+                       0x5e, 0x82, 0x3e, 0xba, 0x00, 0x44, 0x12, 0x67,
+                       0x12, 0x57, 0xb8, 0xd8, 0x60, 0xae, 0x4c, 0xbd,
+                       0x4c, 0x49, 0x06, 0xbb, 0xc5, 0x35, 0xef, 0xe1,
+                       0x58, 0x7f, 0x08, 0xdb, 0x33, 0x95, 0x5c, 0xdb,
+                       0xcb, 0xad, 0x9b, 0x10, 0xf5, 0x3f, 0xc4, 0xe5,
+                       0x2c, 0x59, 0x15, 0x65, 0x51, 0x84, 0x87, 0xfe,
+                       0x08, 0x4d, 0x0e, 0x3f, 0x03, 0xde, 0xbc, 0xc9,
+                       0xda, 0x1c, 0xe9, 0x0d, 0x08, 0x5c, 0x2d, 0x8a,
+                       0x19, 0xd8, 0x37, 0x30, 0x86, 0x16, 0x36, 0x92,
+                       0x14, 0x2b, 0xd8, 0xfc, 0x5d, 0x7a, 0x73, 0x49,
+                       0x6a, 0x8e, 0x59, 0xee, 0x7e, 0xcf, 0x6b, 0x94,
+                       0x06, 0x63, 0xf4, 0xa6, 0xbe, 0xe6, 0x5b, 0xd2,
+                       0xc8, 0x5c, 0x46, 0x98, 0x6c, 0x1b, 0xef, 0x34,
+                       0x90, 0xd3, 0x7b, 0x38, 0xda, 0x85, 0xd3, 0x2e,
+                       0x97, 0x39, 0xcb, 0x23, 0x4a, 0x2b, 0xe7, 0x40,
+               },
+       },
 }
 
 func TestGolden(t *testing.T) {
@@ -51,9 +84,34 @@ func TestGolden(t *testing.T) {
                c.XORKeyStream(keystream, keystream)
                for j, v := range keystream {
                        if g.keystream[j] != v {
-                               t.Errorf("Failed at golden index %d", i)
+                               t.Errorf("Failed at golden index %d:\n%x\nvs\n%x", i, keystream, g.keystream)
                                break
                        }
                }
        }
 }
+
+func benchmark(b *testing.B, size int64) {
+       buf := make([]byte, size)
+       c, err := NewCipher(golden[0].key)
+       if err != nil {
+               panic(err)
+       }
+       b.SetBytes(size)
+
+       for i := 0; i < b.N; i++ {
+               c.XORKeyStream(buf, buf)
+       }
+}
+
+func BenchmarkRC4_128(b *testing.B) {
+       benchmark(b, 128)
+}
+
+func BenchmarkRC4_1K(b *testing.B) {
+       benchmark(b, 1024)
+}
+
+func BenchmarkRC4_8K(b *testing.B) {
+       benchmark(b, 8096)
+}