build: move package sources from src/pkg to src

Preparation was in CL 134570043. This CL contains only the effect of 'hg mv src/pkg/* src'. For more about the move, see golang.org/s/go14nopkg.
author: Russ Cox <rsc@golang.org> 2014-09-08 00:08:51 -0400
committer: Russ Cox <rsc@golang.org> 2014-09-08 00:08:51 -0400
commit: 8528da672cc093d4dd06732819abc1f7b6b5a46e (patch)
tree: 334be80d4a4c85b77db6f6fdb67cbf0528cba5f5 /src/crypto/rc4
parent: 73bcb69f272cbf34ddcc9daa56427a8683b5a95d (diff)
download: go-8528da672cc093d4dd06732819abc1f7b6b5a46e.tar.gz
8 files changed, 759 insertions, 0 deletions
diff --git a/src/crypto/rc4/rc4.go b/src/crypto/rc4/rc4.go
new file mode 100644
index 000000000..9acb681bf
--- /dev/null
+++ b/src/crypto/rc4/rc4.go
@@ -0,0 +1,69 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package rc4 implements RC4 encryption, as defined in Bruce Schneier's
+// Applied Cryptography.
+package rc4
+
+// BUG(agl): RC4 is in common use but has design weaknesses that make
+// it a poor choice for new protocols.
+
+import "strconv"
+
+// A Cipher is an instance of RC4 using a particular key.
+type Cipher struct {
+	s    [256]uint32
+	i, j uint8
+}
+
+type KeySizeError int
+
+func (k KeySizeError) Error() string {
+	return "crypto/rc4: invalid key size " + strconv.Itoa(int(k))
+}
+
+// NewCipher creates and returns a new Cipher.  The key argument should be the
+// RC4 key, at least 1 byte and at most 256 bytes.
+func NewCipher(key []byte) (*Cipher, error) {
+	k := len(key)
+	if k < 1 || k > 256 {
+		return nil, KeySizeError(k)
+	}
+	var c Cipher
+	for i := 0; i < 256; i++ {
+		c.s[i] = uint32(i)
+	}
+	var j uint8 = 0
+	for i := 0; i < 256; i++ {
+		j += uint8(c.s[i]) + key[i%k]
+		c.s[i], c.s[j] = c.s[j], c.s[i]
+	}
+	return &c, nil
+}
+
+// Reset zeros the key data so that it will no longer appear in the
+// process's memory.
+func (c *Cipher) Reset() {
+	for i := range c.s {
+		c.s[i] = 0
+	}
+	c.i, c.j = 0, 0
+}
+
+// xorKeyStreamGeneric sets dst to the result of XORing src with the
+// key stream.  Dst and src may be the same slice but otherwise should
+// not overlap.
+//
+// This is the pure Go version. rc4_{amd64,386,arm}* contain assembly
+// implementations. This is here for tests and to prevent bitrot.
+func (c *Cipher) xorKeyStreamGeneric(dst, src []byte) {
+	i, j := c.i, c.j
+	for k, v := range src {
+		i += 1
+		j += uint8(c.s[i])
+		c.s[i], c.s[j] = c.s[j], c.s[i]
+		dst[k] = v ^ uint8(c.s[uint8(c.s[i]+c.s[j])])
+	}
+	c.i, c.j = i, j
+}
diff --git a/src/crypto/rc4/rc4_386.s b/src/crypto/rc4/rc4_386.s
new file mode 100644
index 000000000..54221036b
--- /dev/null
+++ b/src/crypto/rc4/rc4_386.s
@@ -0,0 +1,53 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "textflag.h"
+
+// func xorKeyStream(dst, src *byte, n int, state *[256]byte, i, j *uint8)
+TEXT ·xorKeyStream(SB),NOSPLIT,$0
+	MOVL dst+0(FP), DI
+	MOVL src+4(FP), SI
+	MOVL state+12(FP), BP
+
+	MOVL i+16(FP), AX
+	MOVBLZX (AX), AX
+	MOVL j+20(FP), BX
+	MOVBLZX (BX), BX
+	CMPL n+8(FP), $0
+	JEQ done
+
+loop:
+	// i += 1
+	INCB AX
+
+	// j += c.s[i]
+	MOVBLZX (BP)(AX*4), DX
+	ADDB DX, BX
+	MOVBLZX BX, BX
+
+	// c.s[i], c.s[j] = c.s[j], c.s[i]
+	MOVBLZX (BP)(BX*4), CX
+	MOVB CX, (BP)(AX*4)
+	MOVB DX, (BP)(BX*4)
+
+	// *dst = *src ^ c.s[c.s[i]+c.s[j]]
+	ADDB DX, CX
+	MOVBLZX CX, CX
+	MOVB (BP)(CX*4), CX
+	XORB (SI), CX
+	MOVBLZX CX, CX
+	MOVB CX, (DI)
+
+	INCL SI
+	INCL DI
+	DECL n+8(FP)
+	JNE loop
+
+done:
+	MOVL i+16(FP), CX
+	MOVB AX, (CX)
+	MOVL j+20(FP), CX
+	MOVB BX, (CX)
+
+	RET
diff --git a/src/crypto/rc4/rc4_amd64.s b/src/crypto/rc4/rc4_amd64.s
new file mode 100644
index 000000000..57d941c8f
--- /dev/null
+++ b/src/crypto/rc4/rc4_amd64.s
@@ -0,0 +1,179 @@
+// Original source:
+//	http://www.zorinaq.com/papers/rc4-amd64.html
+//	http://www.zorinaq.com/papers/rc4-amd64.tar.bz2
+
+#include "textflag.h"
+
+// Local modifications:
+//
+// Transliterated from GNU to 6a assembly syntax by the Go authors.
+// The comments and spacing are from the original.
+//
+// The new EXTEND macros avoid a bad stall on some systems after 8-bit math.
+//
+// The original code accumulated 64 bits of key stream in an integer
+// register and then XOR'ed the key stream into the data 8 bytes at a time.
+// Modified to accumulate 128 bits of key stream into an XMM register
+// and then XOR the key stream into the data 16 bytes at a time.
+// Approximately doubles throughput.
+
+// NOTE: Changing EXTEND to a no-op makes the code run 1.2x faster on Core i5
+// but makes the code run 2.0x slower on Xeon.
+#define EXTEND(r) MOVBLZX r, r
+
+/*
+** RC4 implementation optimized for AMD64.
+**
+** Author: Marc Bevand <bevand_m (at) epita.fr>
+** Licence: I hereby disclaim the copyright on this code and place it
+** in the public domain.
+**
+** The code has been designed to be easily integrated into openssl:
+** the exported RC4() function can replace the actual implementations
+** openssl already contains. Please note that when linking with openssl,
+** it requires that sizeof(RC4_INT) == 8. So openssl must be compiled
+** with -DRC4_INT='unsigned long'.
+**
+** The throughput achieved by this code is about 320 MBytes/sec, on
+** a 1.8 GHz AMD Opteron (rev C0) processor.
+*/
+
+TEXT ·xorKeyStream(SB),NOSPLIT,$0
+	MOVQ	n+16(FP),	BX		// rbx = ARG(len)
+	MOVQ	src+8(FP),	SI		// in = ARG(in)
+	MOVQ	dst+0(FP),	DI		// out = ARG(out)
+	MOVQ	state+24(FP),	BP		// d = ARG(data)
+	MOVQ	i+32(FP),	AX
+	MOVBQZX	0(AX),		CX		// x = *xp
+	MOVQ	j+40(FP),	AX
+	MOVBQZX	0(AX),		DX		// y = *yp
+
+	LEAQ	(SI)(BX*1),	R9		// limit = in+len
+
+l1:	CMPQ	SI,		R9		// cmp in with in+len
+	JGE	finished			// jump if (in >= in+len)
+
+	INCB	CX
+	EXTEND(CX)
+	TESTL	$15,		CX
+	JZ	wordloop
+
+	MOVBLZX	(BP)(CX*4),	AX
+
+	ADDB	AX,		DX		// y += tx
+	EXTEND(DX)
+	MOVBLZX	(BP)(DX*4),	BX		// ty = d[y]
+	MOVB	BX,		(BP)(CX*4)	// d[x] = ty
+	ADDB	AX,		BX		// val = ty+tx
+	EXTEND(BX)
+	MOVB	AX,		(BP)(DX*4)	// d[y] = tx
+	MOVBLZX	(BP)(BX*4),	R8		// val = d[val]
+	XORB	(SI),		R8		// xor 1 byte
+	MOVB	R8,		(DI)
+	INCQ	SI				// in++
+	INCQ	DI				// out++
+	JMP l1
+
+wordloop:
+	SUBQ	$16,		R9
+	CMPQ	SI,		R9
+	JGT	end
+
+start:
+	ADDQ	$16,		SI		// increment in
+	ADDQ	$16,		DI		// increment out
+
+	// Each KEYROUND generates one byte of key and
+	// inserts it into an XMM register at the given 16-bit index.
+	// The key state array is uint32 words only using the bottom
+	// byte of each word, so the 16-bit OR only copies 8 useful bits.
+	// We accumulate alternating bytes into X0 and X1, and then at
+	// the end we OR X1<<8 into X0 to produce the actual key.
+	//
+	// At the beginning of the loop, CX%16 == 0, so the 16 loads
+	// at state[CX], state[CX+1], ..., state[CX+15] can precompute
+	// (state+CX) as R12 and then become R12[0], R12[1], ... R12[15],
+	// without fear of the byte computation CX+15 wrapping around.
+	//
+	// The first round needs R12[0], the second needs R12[1], and so on.
+	// We can avoid memory stalls by starting the load for round n+1
+	// before the end of round n, using the LOAD macro.
+	LEAQ	(BP)(CX*4),	R12
+
+#define KEYROUND(xmm, load, off, r1, r2, index) \
+	MOVBLZX	(BP)(DX*4),	R8; \
+	MOVB	r1,		(BP)(DX*4); \
+	load((off+1), r2); \
+	MOVB	R8,		(off*4)(R12); \
+	ADDB	r1,		R8; \
+	EXTEND(R8); \
+	PINSRW	$index, (BP)(R8*4), xmm
+
+#define LOAD(off, reg) \
+	MOVBLZX	(off*4)(R12),	reg; \
+	ADDB	reg,		DX; \
+	EXTEND(DX)
+
+#define SKIP(off, reg)
+
+	LOAD(0, AX)
+	KEYROUND(X0, LOAD, 0, AX, BX, 0)
+	KEYROUND(X1, LOAD, 1, BX, AX, 0)
+	KEYROUND(X0, LOAD, 2, AX, BX, 1)
+	KEYROUND(X1, LOAD, 3, BX, AX, 1)
+	KEYROUND(X0, LOAD, 4, AX, BX, 2)
+	KEYROUND(X1, LOAD, 5, BX, AX, 2)
+	KEYROUND(X0, LOAD, 6, AX, BX, 3)
+	KEYROUND(X1, LOAD, 7, BX, AX, 3)
+	KEYROUND(X0, LOAD, 8, AX, BX, 4)
+	KEYROUND(X1, LOAD, 9, BX, AX, 4)
+	KEYROUND(X0, LOAD, 10, AX, BX, 5)
+	KEYROUND(X1, LOAD, 11, BX, AX, 5)
+	KEYROUND(X0, LOAD, 12, AX, BX, 6)
+	KEYROUND(X1, LOAD, 13, BX, AX, 6)
+	KEYROUND(X0, LOAD, 14, AX, BX, 7)
+	KEYROUND(X1, SKIP, 15, BX, AX, 7)
+	
+	ADDB	$16,		CX
+
+	PSLLQ	$8,		X1
+	PXOR	X1,		X0
+	MOVOU	-16(SI),	X2
+	PXOR	X0,		X2
+	MOVOU	X2,		-16(DI)
+
+	CMPQ	SI,		R9		// cmp in with in+len-16
+	JLE	start				// jump if (in <= in+len-16)
+
+end:
+	DECB	CX
+	ADDQ	$16,		R9		// tmp = in+len
+
+	// handle the last bytes, one by one
+l2:	CMPQ	SI,		R9		// cmp in with in+len
+	JGE	finished			// jump if (in >= in+len)
+
+	INCB	CX
+	EXTEND(CX)
+	MOVBLZX	(BP)(CX*4),	AX
+
+	ADDB	AX,		DX		// y += tx
+	EXTEND(DX)
+	MOVBLZX	(BP)(DX*4),	BX		// ty = d[y]
+	MOVB	BX,		(BP)(CX*4)	// d[x] = ty
+	ADDB	AX,		BX		// val = ty+tx
+	EXTEND(BX)
+	MOVB	AX,		(BP)(DX*4)	// d[y] = tx
+	MOVBLZX	(BP)(BX*4),	R8		// val = d[val]
+	XORB	(SI),		R8		// xor 1 byte
+	MOVB	R8,		(DI)
+	INCQ	SI				// in++
+	INCQ	DI				// out++
+	JMP l2
+
+finished:
+	MOVQ	j+40(FP),	BX
+	MOVB	DX, 0(BX)
+	MOVQ	i+32(FP),	AX
+	MOVB	CX, 0(AX)
+	RET
diff --git a/src/crypto/rc4/rc4_amd64p32.s b/src/crypto/rc4/rc4_amd64p32.s
new file mode 100644
index 000000000..970b34e08
--- /dev/null
+++ b/src/crypto/rc4/rc4_amd64p32.s
@@ -0,0 +1,192 @@
+// Original source:
+//	http://www.zorinaq.com/papers/rc4-amd64.html
+//	http://www.zorinaq.com/papers/rc4-amd64.tar.bz2
+
+#include "textflag.h"
+
+// Local modifications:
+//
+// Transliterated from GNU to 6a assembly syntax by the Go authors.
+// The comments and spacing are from the original.
+//
+// The new EXTEND macros avoid a bad stall on some systems after 8-bit math.
+//
+// The original code accumulated 64 bits of key stream in an integer
+// register and then XOR'ed the key stream into the data 8 bytes at a time.
+// Modified to accumulate 128 bits of key stream into an XMM register
+// and then XOR the key stream into the data 16 bytes at a time.
+// Approximately doubles throughput.
+//
+// Converted to amd64p32.
+//
+// To make safe for Native Client, avoid use of BP, R15,
+// and two-register addressing modes.
+
+// NOTE: Changing EXTEND to a no-op makes the code run 1.2x faster on Core i5
+// but makes the code run 2.0x slower on Xeon.
+#define EXTEND(r) MOVBLZX r, r
+
+/*
+** RC4 implementation optimized for AMD64.
+**
+** Author: Marc Bevand <bevand_m (at) epita.fr>
+** Licence: I hereby disclaim the copyright on this code and place it
+** in the public domain.
+**
+** The code has been designed to be easily integrated into openssl:
+** the exported RC4() function can replace the actual implementations
+** openssl already contains. Please note that when linking with openssl,
+** it requires that sizeof(RC4_INT) == 8. So openssl must be compiled
+** with -DRC4_INT='unsigned long'.
+**
+** The throughput achieved by this code is about 320 MBytes/sec, on
+** a 1.8 GHz AMD Opteron (rev C0) processor.
+*/
+
+TEXT ·xorKeyStream(SB),NOSPLIT,$0
+	MOVL	n+8(FP),	BX		// rbx = ARG(len)
+	MOVL	src+4(FP),	SI		// in = ARG(in)
+	MOVL	dst+0(FP),	DI		// out = ARG(out)
+	MOVL	state+12(FP),	R10		// d = ARG(data)
+	MOVL	i+16(FP),	AX
+	MOVBQZX	0(AX),		CX		// x = *xp
+	MOVL	j+20(FP),	AX
+	MOVBQZX	0(AX),		DX		// y = *yp
+
+	LEAQ	(SI)(BX*1),	R9		// limit = in+len
+
+l1:	CMPQ	SI,		R9		// cmp in with in+len
+	JGE	finished			// jump if (in >= in+len)
+
+	INCB	CX
+	EXTEND(CX)
+	TESTL	$15,		CX
+	JZ	wordloop
+	LEAL	(R10)(CX*4), R12
+
+	MOVBLZX	(R12),	AX
+
+	ADDB	AX,		DX		// y += tx
+	EXTEND(DX)
+	LEAL (R10)(DX*4), R11
+	MOVBLZX	(R11),	BX		// ty = d[y]
+	MOVB	BX,		(R12)	// d[x] = ty
+	ADDB	AX,		BX		// val = ty+tx
+	EXTEND(BX)
+	LEAL (R10)(BX*4), R13
+	MOVB	AX,		(R11)	// d[y] = tx
+	MOVBLZX	(R13),	R8		// val = d[val]
+	XORB	(SI),		R8		// xor 1 byte
+	MOVB	R8,		(DI)
+	INCQ	SI				// in++
+	INCQ	DI				// out++
+	JMP l1
+
+wordloop:
+	SUBQ	$16,		R9
+	CMPQ	SI,		R9
+	JGT	end
+
+start:
+	ADDQ	$16,		SI		// increment in
+	ADDQ	$16,		DI		// increment out
+
+	// Each KEYROUND generates one byte of key and
+	// inserts it into an XMM register at the given 16-bit index.
+	// The key state array is uint32 words only using the bottom
+	// byte of each word, so the 16-bit OR only copies 8 useful bits.
+	// We accumulate alternating bytes into X0 and X1, and then at
+	// the end we OR X1<<8 into X0 to produce the actual key.
+	//
+	// At the beginning of the loop, CX%16 == 0, so the 16 loads
+	// at state[CX], state[CX+1], ..., state[CX+15] can precompute
+	// (state+CX) as R12 and then become R12[0], R12[1], ... R12[15],
+	// without fear of the byte computation CX+15 wrapping around.
+	//
+	// The first round needs R12[0], the second needs R12[1], and so on.
+	// We can avoid memory stalls by starting the load for round n+1
+	// before the end of round n, using the LOAD macro.
+	LEAQ	(R10)(CX*4),	R12
+
+#define KEYROUND(xmm, load, off, r1, r2, index) \
+	LEAL (R10)(DX*4), R11; \
+	MOVBLZX	(R11),	R8; \
+	MOVB	r1,		(R11); \
+	load((off+1), r2); \
+	MOVB	R8,		(off*4)(R12); \
+	ADDB	r1,		R8; \
+	EXTEND(R8); \
+	LEAL (R10)(R8*4), R14; \
+	PINSRW	$index, (R14), xmm
+
+#define LOAD(off, reg) \
+	MOVBLZX	(off*4)(R12),	reg; \
+	ADDB	reg,		DX; \
+	EXTEND(DX)
+
+#define SKIP(off, reg)
+
+	LOAD(0, AX)
+	KEYROUND(X0, LOAD, 0, AX, BX, 0)
+	KEYROUND(X1, LOAD, 1, BX, AX, 0)
+	KEYROUND(X0, LOAD, 2, AX, BX, 1)
+	KEYROUND(X1, LOAD, 3, BX, AX, 1)
+	KEYROUND(X0, LOAD, 4, AX, BX, 2)
+	KEYROUND(X1, LOAD, 5, BX, AX, 2)
+	KEYROUND(X0, LOAD, 6, AX, BX, 3)
+	KEYROUND(X1, LOAD, 7, BX, AX, 3)
+	KEYROUND(X0, LOAD, 8, AX, BX, 4)
+	KEYROUND(X1, LOAD, 9, BX, AX, 4)
+	KEYROUND(X0, LOAD, 10, AX, BX, 5)
+	KEYROUND(X1, LOAD, 11, BX, AX, 5)
+	KEYROUND(X0, LOAD, 12, AX, BX, 6)
+	KEYROUND(X1, LOAD, 13, BX, AX, 6)
+	KEYROUND(X0, LOAD, 14, AX, BX, 7)
+	KEYROUND(X1, SKIP, 15, BX, AX, 7)
+	
+	ADDB	$16,		CX
+
+	PSLLQ	$8,		X1
+	PXOR	X1,		X0
+	MOVOU	-16(SI),	X2
+	PXOR	X0,		X2
+	MOVOU	X2,		-16(DI)
+
+	CMPQ	SI,		R9		// cmp in with in+len-16
+	JLE	start				// jump if (in <= in+len-16)
+
+end:
+	DECB	CX
+	ADDQ	$16,		R9		// tmp = in+len
+
+	// handle the last bytes, one by one
+l2:	CMPQ	SI,		R9		// cmp in with in+len
+	JGE	finished			// jump if (in >= in+len)
+
+	INCB	CX
+	EXTEND(CX)
+	LEAL (R10)(CX*4), R12
+	MOVBLZX	(R12),	AX
+
+	ADDB	AX,		DX		// y += tx
+	EXTEND(DX)
+	LEAL (R10)(DX*4), R11
+	MOVBLZX	(R11),	BX		// ty = d[y]
+	MOVB	BX,		(R12)	// d[x] = ty
+	ADDB	AX,		BX		// val = ty+tx
+	EXTEND(BX)
+	LEAL (R10)(BX*4), R13
+	MOVB	AX,		(R11)	// d[y] = tx
+	MOVBLZX	(R13),	R8		// val = d[val]
+	XORB	(SI),		R8		// xor 1 byte
+	MOVB	R8,		(DI)
+	INCQ	SI				// in++
+	INCQ	DI				// out++
+	JMP l2
+
+finished:
+	MOVL	j+20(FP),	BX
+	MOVB	DX, 0(BX)
+	MOVL	i+16(FP),	AX
+	MOVB	CX, 0(AX)
+	RET
diff --git a/src/crypto/rc4/rc4_arm.s b/src/crypto/rc4/rc4_arm.s
new file mode 100644
index 000000000..51be3bf95
--- /dev/null
+++ b/src/crypto/rc4/rc4_arm.s
@@ -0,0 +1,62 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !nacl
+
+#include "textflag.h"
+
+// Registers
+dst = 0
+src = 1
+n = 2
+state = 3
+pi = 4
+pj = 5
+i = 6
+j = 7
+k = 8
+t = 11
+t2 = 12
+
+// func xorKeyStream(dst, src *byte, n int, state *[256]byte, i, j *uint8)
+TEXT ·xorKeyStream(SB),NOSPLIT,$0
+	MOVW 0(FP), R(dst)
+	MOVW 4(FP), R(src)
+	MOVW 8(FP), R(n)
+	MOVW 12(FP), R(state)
+	MOVW 16(FP), R(pi)
+	MOVW 20(FP), R(pj)
+	MOVBU (R(pi)), R(i)
+	MOVBU (R(pj)), R(j)
+	MOVW $0, R(k)
+
+loop:
+	// i += 1; j += state[i]
+	ADD $1, R(i)
+	AND $0xff, R(i)
+	MOVBU R(i)<<2(R(state)), R(t)
+	ADD R(t), R(j)
+	AND $0xff, R(j)
+
+	// swap state[i] <-> state[j]
+	MOVBU R(j)<<2(R(state)), R(t2)
+	MOVB R(t2), R(i)<<2(R(state))
+	MOVB R(t), R(j)<<2(R(state))
+
+	// dst[k] = src[k] ^ state[state[i] + state[j]]
+	ADD R(t2), R(t)
+	AND $0xff, R(t)
+	MOVBU R(t)<<2(R(state)), R(t)
+	MOVBU R(k)<<0(R(src)), R(t2)
+	EOR R(t), R(t2)
+	MOVB R(t2), R(k)<<0(R(dst))
+
+	ADD $1, R(k)
+	CMP R(k), R(n)
+	BNE loop
+
+done:
+	MOVB R(i), (R(pi))
+	MOVB R(j), (R(pj))
+	RET
diff --git a/src/crypto/rc4/rc4_asm.go b/src/crypto/rc4/rc4_asm.go
new file mode 100644
index 000000000..02e5b67d5
--- /dev/null
+++ b/src/crypto/rc4/rc4_asm.go
@@ -0,0 +1,18 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build amd64 amd64p32 arm,!nacl 386
+
+package rc4
+
+func xorKeyStream(dst, src *byte, n int, state *[256]uint32, i, j *uint8)
+
+// XORKeyStream sets dst to the result of XORing src with the key stream.
+// Dst and src may be the same slice but otherwise should not overlap.
+func (c *Cipher) XORKeyStream(dst, src []byte) {
+	if len(src) == 0 {
+		return
+	}
+	xorKeyStream(&dst[0], &src[0], len(src), &c.s, &c.i, &c.j)
+}
diff --git a/src/crypto/rc4/rc4_ref.go b/src/crypto/rc4/rc4_ref.go
new file mode 100644
index 000000000..e34bd34cf
--- /dev/null
+++ b/src/crypto/rc4/rc4_ref.go
@@ -0,0 +1,13 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !amd64,!amd64p32,!arm,!386 arm,nacl
+
+package rc4
+
+// XORKeyStream sets dst to the result of XORing src with the key stream.
+// Dst and src may be the same slice but otherwise should not overlap.
+func (c *Cipher) XORKeyStream(dst, src []byte) {
+	c.xorKeyStreamGeneric(dst, src)
+}
diff --git a/src/crypto/rc4/rc4_test.go b/src/crypto/rc4/rc4_test.go
new file mode 100644
index 000000000..af7988246
--- /dev/null
+++ b/src/crypto/rc4/rc4_test.go
@@ -0,0 +1,173 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package rc4
+
+import (
+	"bytes"
+	"fmt"
+	"testing"
+)
+
+type rc4Test struct {
+	key, keystream []byte
+}
+
+var golden = []rc4Test{
+	// Test vectors from the original cypherpunk posting of ARC4:
+	//   http://groups.google.com/group/sci.crypt/msg/10a300c9d21afca0?pli=1
+	{
+		[]byte{0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef},
+		[]byte{0x74, 0x94, 0xc2, 0xe7, 0x10, 0x4b, 0x08, 0x79},
+	},
+	{
+		[]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		[]byte{0xde, 0x18, 0x89, 0x41, 0xa3, 0x37, 0x5d, 0x3a},
+	},
+	{
+		[]byte{0xef, 0x01, 0x23, 0x45},
+		[]byte{0xd6, 0xa1, 0x41, 0xa7, 0xec, 0x3c, 0x38, 0xdf, 0xbd, 0x61},
+	},
+
+	// Test vectors from the Wikipedia page: http://en.wikipedia.org/wiki/RC4
+	{
+		[]byte{0x4b, 0x65, 0x79},
+		[]byte{0xeb, 0x9f, 0x77, 0x81, 0xb7, 0x34, 0xca, 0x72, 0xa7, 0x19},
+	},
+	{
+		[]byte{0x57, 0x69, 0x6b, 0x69},
+		[]byte{0x60, 0x44, 0xdb, 0x6d, 0x41, 0xb7},
+	},
+	{
+		[]byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+		[]byte{
+			0xde, 0x18, 0x89, 0x41, 0xa3, 0x37, 0x5d, 0x3a,
+			0x8a, 0x06, 0x1e, 0x67, 0x57, 0x6e, 0x92, 0x6d,
+			0xc7, 0x1a, 0x7f, 0xa3, 0xf0, 0xcc, 0xeb, 0x97,
+			0x45, 0x2b, 0x4d, 0x32, 0x27, 0x96, 0x5f, 0x9e,
+			0xa8, 0xcc, 0x75, 0x07, 0x6d, 0x9f, 0xb9, 0xc5,
+			0x41, 0x7a, 0xa5, 0xcb, 0x30, 0xfc, 0x22, 0x19,
+			0x8b, 0x34, 0x98, 0x2d, 0xbb, 0x62, 0x9e, 0xc0,
+			0x4b, 0x4f, 0x8b, 0x05, 0xa0, 0x71, 0x08, 0x50,
+			0x92, 0xa0, 0xc3, 0x58, 0x4a, 0x48, 0xe4, 0xa3,
+			0x0a, 0x39, 0x7b, 0x8a, 0xcd, 0x1d, 0x00, 0x9e,
+			0xc8, 0x7d, 0x68, 0x11, 0xf2, 0x2c, 0xf4, 0x9c,
+			0xa3, 0xe5, 0x93, 0x54, 0xb9, 0x45, 0x15, 0x35,
+			0xa2, 0x18, 0x7a, 0x86, 0x42, 0x6c, 0xca, 0x7d,
+			0x5e, 0x82, 0x3e, 0xba, 0x00, 0x44, 0x12, 0x67,
+			0x12, 0x57, 0xb8, 0xd8, 0x60, 0xae, 0x4c, 0xbd,
+			0x4c, 0x49, 0x06, 0xbb, 0xc5, 0x35, 0xef, 0xe1,
+			0x58, 0x7f, 0x08, 0xdb, 0x33, 0x95, 0x5c, 0xdb,
+			0xcb, 0xad, 0x9b, 0x10, 0xf5, 0x3f, 0xc4, 0xe5,
+			0x2c, 0x59, 0x15, 0x65, 0x51, 0x84, 0x87, 0xfe,
+			0x08, 0x4d, 0x0e, 0x3f, 0x03, 0xde, 0xbc, 0xc9,
+			0xda, 0x1c, 0xe9, 0x0d, 0x08, 0x5c, 0x2d, 0x8a,
+			0x19, 0xd8, 0x37, 0x30, 0x86, 0x16, 0x36, 0x92,
+			0x14, 0x2b, 0xd8, 0xfc, 0x5d, 0x7a, 0x73, 0x49,
+			0x6a, 0x8e, 0x59, 0xee, 0x7e, 0xcf, 0x6b, 0x94,
+			0x06, 0x63, 0xf4, 0xa6, 0xbe, 0xe6, 0x5b, 0xd2,
+			0xc8, 0x5c, 0x46, 0x98, 0x6c, 0x1b, 0xef, 0x34,
+			0x90, 0xd3, 0x7b, 0x38, 0xda, 0x85, 0xd3, 0x2e,
+			0x97, 0x39, 0xcb, 0x23, 0x4a, 0x2b, 0xe7, 0x40,
+		},
+	},
+}
+
+func testEncrypt(t *testing.T, desc string, c *Cipher, src, expect []byte) {
+	dst := make([]byte, len(src))
+	c.XORKeyStream(dst, src)
+	for i, v := range dst {
+		if v != expect[i] {
+			t.Fatalf("%s: mismatch at byte %d:\nhave %x\nwant %x", desc, i, dst, expect)
+		}
+	}
+}
+
+func TestGolden(t *testing.T) {
+	for gi, g := range golden {
+		data := make([]byte, len(g.keystream))
+		for i := range data {
+			data[i] = byte(i)
+		}
+
+		expect := make([]byte, len(g.keystream))
+		for i := range expect {
+			expect[i] = byte(i) ^ g.keystream[i]
+		}
+
+		for size := 1; size <= len(g.keystream); size++ {
+			c, err := NewCipher(g.key)
+			if err != nil {
+				t.Fatalf("#%d: NewCipher: %v", gi, err)
+			}
+
+			off := 0
+			for off < len(g.keystream) {
+				n := len(g.keystream) - off
+				if n > size {
+					n = size
+				}
+				desc := fmt.Sprintf("#%d@[%d:%d]", gi, off, off+n)
+				testEncrypt(t, desc, c, data[off:off+n], expect[off:off+n])
+				off += n
+			}
+		}
+	}
+}
+
+func TestBlock(t *testing.T) {
+	testBlock(t, (*Cipher).XORKeyStream)
+}
+
+// Test the pure Go version.
+// Because we have assembly for amd64, 386, and arm, this prevents
+// bitrot of the reference implementations.
+func TestBlockGeneric(t *testing.T) {
+	testBlock(t, (*Cipher).xorKeyStreamGeneric)
+}
+
+func testBlock(t *testing.T, xor func(c *Cipher, dst, src []byte)) {
+	c1a, _ := NewCipher(golden[0].key)
+	c1b, _ := NewCipher(golden[1].key)
+	data1 := make([]byte, 1<<20)
+	for i := range data1 {
+		xor(c1a, data1[i:i+1], data1[i:i+1])
+		xor(c1b, data1[i:i+1], data1[i:i+1])
+	}
+
+	c2a, _ := NewCipher(golden[0].key)
+	c2b, _ := NewCipher(golden[1].key)
+	data2 := make([]byte, 1<<20)
+	xor(c2a, data2, data2)
+	xor(c2b, data2, data2)
+
+	if !bytes.Equal(data1, data2) {
+		t.Fatalf("bad block")
+	}
+}
+
+func benchmark(b *testing.B, size int64) {
+	buf := make([]byte, size)
+	c, err := NewCipher(golden[0].key)
+	if err != nil {
+		panic(err)
+	}
+	b.SetBytes(size)
+
+	for i := 0; i < b.N; i++ {
+		c.XORKeyStream(buf, buf)
+	}
+}
+
+func BenchmarkRC4_128(b *testing.B) {
+	benchmark(b, 128)
+}
+
+func BenchmarkRC4_1K(b *testing.B) {
+	benchmark(b, 1024)
+}
+
+func BenchmarkRC4_8K(b *testing.B) {
+	benchmark(b, 8096)
+}
author	Russ Cox <rsc@golang.org>	2014-09-08 00:08:51 -0400
committer	Russ Cox <rsc@golang.org>	2014-09-08 00:08:51 -0400
commit	8528da672cc093d4dd06732819abc1f7b6b5a46e (patch)
tree	334be80d4a4c85b77db6f6fdb67cbf0528cba5f5 /src/crypto/rc4
parent	73bcb69f272cbf34ddcc9daa56427a8683b5a95d (diff)
download	go-8528da672cc093d4dd06732819abc1f7b6b5a46e.tar.gz