From: Robert Griesemer <gri@golang.org>
Date: Tue, 29 Dec 2009 01:20:33 +0000 (-0800)
Subject: Symmetric changes to md4.go as for md5.go.
X-Git-Tag: weekly.2010-01-05~29
X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=f0fcb2d59fb431d45847f09cf1bbcfc3363047ba;p=gostls13.git

Symmetric changes to md4.go as for md5.go.

Use uint index variables in some cases instead
of int to enable strength reduction; this makes
it possible for the compiler to reduce % into
masks.

Old code: 6g -S md4.go md4block.go | grep "md4block.go:44"
0471 (md4block.go:44) MOVL    AX,BX
0472 (md4block.go:44) MOVL    AX,BP
0473 (md4block.go:44) MOVL    AX,R8
0474 (md4block.go:44) SARL    $31,R8
0475 (md4block.go:44) SHRL    $30,R8
0476 (md4block.go:44) ADDL    R8,BP
0477 (md4block.go:44) SARL    $2,BP
0478 (md4block.go:44) IMULL   $4,BP
0479 (md4block.go:44) SUBL    BP,BX
0480 (md4block.go:44) MOVLQSX BX,BX
0481 (md4block.go:44) LEAQ    shift1+0(SB),BP
0482 (md4block.go:44) CMPL    BX,8(BP)
0483 (md4block.go:44) JCS     ,485
0484 (md4block.go:44) CALL    ,runtime.throwindex+0(SB)
0485 (md4block.go:44) MOVQ    (BP),BP
0486 (md4block.go:44) MOVL    (BP)(BX*4),DI

New code: 6g -S md4.go md4block.go | grep "md4block.go:44"
0471 (md4block.go:44) MOVL    AX,BX
0472 (md4block.go:44) ANDL    $3,BX
0473 (md4block.go:44) MOVLQZX BX,BX
0474 (md4block.go:44) LEAQ    shift1+0(SB),BP
0475 (md4block.go:44) CMPL    BX,8(BP)
0476 (md4block.go:44) JCS     ,478
0477 (md4block.go:44) CALL    ,runtime.throwindex+0(SB)
0478 (md4block.go:44) MOVQ    (BP),BP
0479 (md4block.go:44) MOVL    (BP)(BX*4),DI

R=agl, agl1
CC=golang-dev
https://golang.org/cl/181086
---

diff --git a/src/pkg/crypto/md4/md4.go b/src/pkg/crypto/md4/md4.go
index 650fce313f..6096ab9975 100644
--- a/src/pkg/crypto/md4/md4.go
+++ b/src/pkg/crypto/md4/md4.go
@@ -68,8 +68,8 @@ func (d *digest) Write(p []byte) (nn int, err os.Error) {
 	n := _Block(d, p)
 	p = p[n:]
 	if len(p) > 0 {
-		for i := 0; i < len(p); i++ {
-			d.x[i] = p[i]
+		for i, x := range p {
+			d.x[i] = x
 		}
 		d.nx = len(p)
 	}
@@ -100,16 +100,12 @@ func (d *digest) Sum() []byte {
 
 	p := make([]byte, 16)
 	j := 0
-	for i := 0; i < 4; i++ {
-		s := d.s[i]
-		p[j] = byte(s)
-		j++
-		p[j] = byte(s >> 8)
-		j++
-		p[j] = byte(s >> 16)
-		j++
-		p[j] = byte(s >> 24)
-		j++
+	for _, s := range d.s {
+		p[j+0] = byte(s >> 0)
+		p[j+1] = byte(s >> 8)
+		p[j+2] = byte(s >> 16)
+		p[j+3] = byte(s >> 24)
+		j += 4
 	}
 	return p
 }
diff --git a/src/pkg/crypto/md4/md4block.go b/src/pkg/crypto/md4/md4block.go
index 492e960cae..3fed475f3f 100644
--- a/src/pkg/crypto/md4/md4block.go
+++ b/src/pkg/crypto/md4/md4block.go
@@ -25,9 +25,10 @@ func _Block(dig *digest, p []byte) int {
 	for len(p) >= _Chunk {
 		aa, bb, cc, dd := a, b, c, d
 
+		j := 0
 		for i := 0; i < 16; i++ {
-			j := i * 4
 			X[i] = uint32(p[j]) | uint32(p[j+1])<<8 | uint32(p[j+2])<<16 | uint32(p[j+3])<<24
+			j += 4
 		}
 
 		// If this needs to be made faster in the future,
@@ -37,9 +38,12 @@ func _Block(dig *digest, p []byte) int {
 		// with suitable variable renaming in each
 		// unrolled body, delete the a, b, c, d = d, a, b, c
 		// (or you can let the optimizer do the renaming).
+		//
+		// The index variables are uint so that % by a power
+		// of two can be optimized easily by a compiler.
 
 		// Round 1.
-		for i := 0; i < 16; i++ {
+		for i := uint(0); i < 16; i++ {
 			x := i
 			s := shift1[i%4]
 			f := ((c ^ d) & b) ^ d
@@ -49,7 +53,7 @@ func _Block(dig *digest, p []byte) int {
 		}
 
 		// Round 2.
-		for i := 0; i < 16; i++ {
+		for i := uint(0); i < 16; i++ {
 			x := xIndex2[i]
 			s := shift2[i%4]
 			g := (b & c) | (b & d) | (c & d)
@@ -59,7 +63,7 @@ func _Block(dig *digest, p []byte) int {
 		}
 
 		// Round 3.
-		for i := 0; i < 16; i++ {
+		for i := uint(0); i < 16; i++ {
 			x := xIndex3[i]
 			s := shift3[i%4]
 			h := b ^ c ^ d
diff --git a/src/pkg/crypto/md5/md5block.go b/src/pkg/crypto/md5/md5block.go
index f35096d50c..a887e2e05e 100644
--- a/src/pkg/crypto/md5/md5block.go
+++ b/src/pkg/crypto/md5/md5block.go
@@ -98,9 +98,10 @@ func _Block(dig *digest, p []byte) int {
 	for len(p) >= _Chunk {
 		aa, bb, cc, dd := a, b, c, d
 
+		j := 0
 		for i := 0; i < 16; i++ {
-			j := i * 4
 			X[i] = uint32(p[j]) | uint32(p[j+1])<<8 | uint32(p[j+2])<<16 | uint32(p[j+3])<<24
+			j += 4
 		}
 
 		// If this needs to be made faster in the future,
@@ -110,52 +111,47 @@ func _Block(dig *digest, p []byte) int {
 		// with suitable variable renaming in each
 		// unrolled body, delete the a, b, c, d = d, a, b, c
 		// (or you can let the optimizer do the renaming).
+		//
+		// The index variables are uint so that % by a power
+		// of two can be optimized easily by a compiler.
 
 		// Round 1.
-		for i := 0; i < 16; i++ {
+		for i := uint(0); i < 16; i++ {
 			x := i
-			t := i
 			s := shift1[i%4]
 			f := ((c ^ d) & b) ^ d
-			a += f + X[x] + table[t]
-			a = a<<s | a>>(32-s)
-			a += b
+			a += f + X[x] + table[i]
+			a = a<<s | a>>(32-s) + b
 			a, b, c, d = d, a, b, c
 		}
 
 		// Round 2.
-		for i := 0; i < 16; i++ {
+		for i := uint(0); i < 16; i++ {
 			x := (1 + 5*i) % 16
-			t := 16 + i
 			s := shift2[i%4]
 			g := ((b ^ c) & d) ^ c
-			a += g + X[x] + table[t]
-			a = a<<s | a>>(32-s)
-			a += b
+			a += g + X[x] + table[i+16]
+			a = a<<s | a>>(32-s) + b
 			a, b, c, d = d, a, b, c
 		}
 
 		// Round 3.
-		for i := 0; i < 16; i++ {
+		for i := uint(0); i < 16; i++ {
 			x := (5 + 3*i) % 16
-			t := 32 + i
 			s := shift3[i%4]
 			h := b ^ c ^ d
-			a += h + X[x] + table[t]
-			a = a<<s | a>>(32-s)
-			a += b
+			a += h + X[x] + table[i+32]
+			a = a<<s | a>>(32-s) + b
 			a, b, c, d = d, a, b, c
 		}
 
 		// Round 4.
-		for i := 0; i < 16; i++ {
+		for i := uint(0); i < 16; i++ {
 			x := (7 * i) % 16
 			s := shift4[i%4]
-			t := 48 + i
-			ii := c ^ (b | ^d)
-			a += ii + X[x] + table[t]
-			a = a<<s | a>>(32-s)
-			a += b
+			j := c ^ (b | ^d)
+			a += j + X[x] + table[i+48]
+			a = a<<s | a>>(32-s) + b
 			a, b, c, d = d, a, b, c
 		}