From 475d86b6d98a9d7c30d3933d3097727dddb94320 Mon Sep 17 00:00:00 2001 From: Adam Langley Date: Wed, 30 Jan 2013 11:01:19 -0500 Subject: [PATCH] crypto/rc4: add simple amd64 asm implementation. (Although it's still half the speed of OpenSSL.) benchmark old ns/op new ns/op delta BenchmarkRC4_128 1409 398 -71.75% BenchmarkRC4_1K 10920 2898 -73.46% BenchmarkRC4_8K 131323 23083 -82.42% benchmark old MB/s new MB/s speedup BenchmarkRC4_128 90.83 321.43 3.54x BenchmarkRC4_1K 93.77 353.28 3.77x BenchmarkRC4_8K 61.65 350.73 5.69x R=rsc, remyoudompheng CC=golang-dev, jgrahamc https://golang.org/cl/7234055 --- src/pkg/crypto/rc4/rc4.go | 11 ------- src/pkg/crypto/rc4/rc4_amd64.s | 53 ++++++++++++++++++++++++++++++ src/pkg/crypto/rc4/rc4_asm.go | 18 ++++++++++ src/pkg/crypto/rc4/rc4_ref.go | 20 ++++++++++++ src/pkg/crypto/rc4/rc4_test.go | 60 +++++++++++++++++++++++++++++++++- 5 files changed, 150 insertions(+), 12 deletions(-) create mode 100644 src/pkg/crypto/rc4/rc4_amd64.s create mode 100644 src/pkg/crypto/rc4/rc4_asm.go create mode 100644 src/pkg/crypto/rc4/rc4_ref.go diff --git a/src/pkg/crypto/rc4/rc4.go b/src/pkg/crypto/rc4/rc4.go index 1bb278f74a..e0c33fa4b5 100644 --- a/src/pkg/crypto/rc4/rc4.go +++ b/src/pkg/crypto/rc4/rc4.go @@ -42,17 +42,6 @@ func NewCipher(key []byte) (*Cipher, error) { return &c, nil } -// XORKeyStream sets dst to the result of XORing src with the key stream. -// Dst and src may be the same slice but otherwise should not overlap. -func (c *Cipher) XORKeyStream(dst, src []byte) { - for i := range src { - c.i += 1 - c.j += c.s[c.i] - c.s[c.i], c.s[c.j] = c.s[c.j], c.s[c.i] - dst[i] = src[i] ^ c.s[c.s[c.i]+c.s[c.j]] - } -} - // Reset zeros the key data so that it will no longer appear in the // process's memory. func (c *Cipher) Reset() { diff --git a/src/pkg/crypto/rc4/rc4_amd64.s b/src/pkg/crypto/rc4/rc4_amd64.s new file mode 100644 index 0000000000..ffe9ada85b --- /dev/null +++ b/src/pkg/crypto/rc4/rc4_amd64.s @@ -0,0 +1,53 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// func xorKeyStream(dst, src *byte, n int, state *[256]byte, i, j *uint8) +TEXT ·xorKeyStream(SB),7,$0 + MOVQ dst+0(FP), DI + MOVQ src+8(FP), SI + MOVQ n+16(FP), CX + MOVQ state+24(FP), R8 + + MOVQ xPtr+32(FP), AX + MOVBQZX (AX), AX + MOVQ yPtr+40(FP), BX + MOVBQZX (BX), BX + +loop: + CMPQ CX, $0 + JE done + + // c.i += 1 + INCB AX + + // c.j += c.s[c.i] + MOVB (R8)(AX*1), R9 + ADDB R9, BX + + MOVBQZX (R8)(BX*1), R10 + + MOVB R10, (R8)(AX*1) + MOVB R9, (R8)(BX*1) + + // R11 = c.s[c.i]+c.s[c.j] + MOVQ R10, R11 + ADDB R9, R11 + + MOVB (R8)(R11*1), R11 + MOVB (SI), R12 + XORB R11, R12 + MOVB R12, (DI) + + INCQ SI + INCQ DI + DECQ CX + + JMP loop +done: + MOVQ xPtr+32(FP), R8 + MOVB AX, (R8) + MOVQ yPtr+40(FP), R8 + MOVB BX, (R8) + + RET diff --git a/src/pkg/crypto/rc4/rc4_asm.go b/src/pkg/crypto/rc4/rc4_asm.go new file mode 100644 index 0000000000..6bb24398e2 --- /dev/null +++ b/src/pkg/crypto/rc4/rc4_asm.go @@ -0,0 +1,18 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build amd64 + +package rc4 + +func xorKeyStream(dst, src *byte, n int, state *[256]byte, i, j *uint8) + +// XORKeyStream sets dst to the result of XORing src with the key stream. +// Dst and src may be the same slice but otherwise should not overlap. +func (c *Cipher) XORKeyStream(dst, src []byte) { + if len(src) == 0 { + return + } + xorKeyStream(&dst[0], &src[0], len(src), &c.s, &c.i, &c.j) +} diff --git a/src/pkg/crypto/rc4/rc4_ref.go b/src/pkg/crypto/rc4/rc4_ref.go new file mode 100644 index 0000000000..4d47299cbb --- /dev/null +++ b/src/pkg/crypto/rc4/rc4_ref.go @@ -0,0 +1,20 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !amd64 + +package rc4 + +// XORKeyStream sets dst to the result of XORing src with the key stream. +// Dst and src may be the same slice but otherwise should not overlap. +func (c *Cipher) XORKeyStream(dst, src []byte) { + i, j := c.i, c.j + for k, v := range src { + i += 1 + j += c.s[i] + c.s[i], c.s[j] = c.s[j], c.s[i] + dst[k] = v ^ c.s[c.s[i]+c.s[j]] + } + c.i, c.j = i, j +} diff --git a/src/pkg/crypto/rc4/rc4_test.go b/src/pkg/crypto/rc4/rc4_test.go index 6265d9408f..9e12789f7f 100644 --- a/src/pkg/crypto/rc4/rc4_test.go +++ b/src/pkg/crypto/rc4/rc4_test.go @@ -37,6 +37,39 @@ var golden = []rc4Test{ []byte{0x57, 0x69, 0x6b, 0x69}, []byte{0x60, 0x44, 0xdb, 0x6d, 0x41, 0xb7}, }, + { + []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + []byte{ + 0xde, 0x18, 0x89, 0x41, 0xa3, 0x37, 0x5d, 0x3a, + 0x8a, 0x06, 0x1e, 0x67, 0x57, 0x6e, 0x92, 0x6d, + 0xc7, 0x1a, 0x7f, 0xa3, 0xf0, 0xcc, 0xeb, 0x97, + 0x45, 0x2b, 0x4d, 0x32, 0x27, 0x96, 0x5f, 0x9e, + 0xa8, 0xcc, 0x75, 0x07, 0x6d, 0x9f, 0xb9, 0xc5, + 0x41, 0x7a, 0xa5, 0xcb, 0x30, 0xfc, 0x22, 0x19, + 0x8b, 0x34, 0x98, 0x2d, 0xbb, 0x62, 0x9e, 0xc0, + 0x4b, 0x4f, 0x8b, 0x05, 0xa0, 0x71, 0x08, 0x50, + 0x92, 0xa0, 0xc3, 0x58, 0x4a, 0x48, 0xe4, 0xa3, + 0x0a, 0x39, 0x7b, 0x8a, 0xcd, 0x1d, 0x00, 0x9e, + 0xc8, 0x7d, 0x68, 0x11, 0xf2, 0x2c, 0xf4, 0x9c, + 0xa3, 0xe5, 0x93, 0x54, 0xb9, 0x45, 0x15, 0x35, + 0xa2, 0x18, 0x7a, 0x86, 0x42, 0x6c, 0xca, 0x7d, + 0x5e, 0x82, 0x3e, 0xba, 0x00, 0x44, 0x12, 0x67, + 0x12, 0x57, 0xb8, 0xd8, 0x60, 0xae, 0x4c, 0xbd, + 0x4c, 0x49, 0x06, 0xbb, 0xc5, 0x35, 0xef, 0xe1, + 0x58, 0x7f, 0x08, 0xdb, 0x33, 0x95, 0x5c, 0xdb, + 0xcb, 0xad, 0x9b, 0x10, 0xf5, 0x3f, 0xc4, 0xe5, + 0x2c, 0x59, 0x15, 0x65, 0x51, 0x84, 0x87, 0xfe, + 0x08, 0x4d, 0x0e, 0x3f, 0x03, 0xde, 0xbc, 0xc9, + 0xda, 0x1c, 0xe9, 0x0d, 0x08, 0x5c, 0x2d, 0x8a, + 0x19, 0xd8, 0x37, 0x30, 0x86, 0x16, 0x36, 0x92, + 0x14, 0x2b, 0xd8, 0xfc, 0x5d, 0x7a, 0x73, 0x49, + 0x6a, 0x8e, 0x59, 0xee, 0x7e, 0xcf, 0x6b, 0x94, + 0x06, 0x63, 0xf4, 0xa6, 0xbe, 0xe6, 0x5b, 0xd2, + 0xc8, 0x5c, 0x46, 0x98, 0x6c, 0x1b, 0xef, 0x34, + 0x90, 0xd3, 0x7b, 0x38, 0xda, 0x85, 0xd3, 0x2e, + 0x97, 0x39, 0xcb, 0x23, 0x4a, 0x2b, 0xe7, 0x40, + }, + }, } func TestGolden(t *testing.T) { @@ -51,9 +84,34 @@ func TestGolden(t *testing.T) { c.XORKeyStream(keystream, keystream) for j, v := range keystream { if g.keystream[j] != v { - t.Errorf("Failed at golden index %d", i) + t.Errorf("Failed at golden index %d:\n%x\nvs\n%x", i, keystream, g.keystream) break } } } } + +func benchmark(b *testing.B, size int64) { + buf := make([]byte, size) + c, err := NewCipher(golden[0].key) + if err != nil { + panic(err) + } + b.SetBytes(size) + + for i := 0; i < b.N; i++ { + c.XORKeyStream(buf, buf) + } +} + +func BenchmarkRC4_128(b *testing.B) { + benchmark(b, 128) +} + +func BenchmarkRC4_1K(b *testing.B) { + benchmark(b, 1024) +} + +func BenchmarkRC4_8K(b *testing.B) { + benchmark(b, 8096) +} -- 2.48.1