From: Robert Griesemer Date: Tue, 29 Dec 2009 01:20:33 +0000 (-0800) Subject: Symmetric changes to md4.go as for md5.go. X-Git-Tag: weekly.2010-01-05~29 X-Git-Url: http://www.git.cypherpunks.su/?a=commitdiff_plain;h=f0fcb2d59fb431d45847f09cf1bbcfc3363047ba;p=gostls13.git Symmetric changes to md4.go as for md5.go. Use uint index variables in some cases instead of int to enable strength reduction; this makes it possible for the compiler to reduce % into masks. Old code: 6g -S md4.go md4block.go | grep "md4block.go:44" 0471 (md4block.go:44) MOVL AX,BX 0472 (md4block.go:44) MOVL AX,BP 0473 (md4block.go:44) MOVL AX,R8 0474 (md4block.go:44) SARL $31,R8 0475 (md4block.go:44) SHRL $30,R8 0476 (md4block.go:44) ADDL R8,BP 0477 (md4block.go:44) SARL $2,BP 0478 (md4block.go:44) IMULL $4,BP 0479 (md4block.go:44) SUBL BP,BX 0480 (md4block.go:44) MOVLQSX BX,BX 0481 (md4block.go:44) LEAQ shift1+0(SB),BP 0482 (md4block.go:44) CMPL BX,8(BP) 0483 (md4block.go:44) JCS ,485 0484 (md4block.go:44) CALL ,runtime.throwindex+0(SB) 0485 (md4block.go:44) MOVQ (BP),BP 0486 (md4block.go:44) MOVL (BP)(BX*4),DI New code: 6g -S md4.go md4block.go | grep "md4block.go:44" 0471 (md4block.go:44) MOVL AX,BX 0472 (md4block.go:44) ANDL $3,BX 0473 (md4block.go:44) MOVLQZX BX,BX 0474 (md4block.go:44) LEAQ shift1+0(SB),BP 0475 (md4block.go:44) CMPL BX,8(BP) 0476 (md4block.go:44) JCS ,478 0477 (md4block.go:44) CALL ,runtime.throwindex+0(SB) 0478 (md4block.go:44) MOVQ (BP),BP 0479 (md4block.go:44) MOVL (BP)(BX*4),DI R=agl, agl1 CC=golang-dev https://golang.org/cl/181086 --- diff --git a/src/pkg/crypto/md4/md4.go b/src/pkg/crypto/md4/md4.go index 650fce313f..6096ab9975 100644 --- a/src/pkg/crypto/md4/md4.go +++ b/src/pkg/crypto/md4/md4.go @@ -68,8 +68,8 @@ func (d *digest) Write(p []byte) (nn int, err os.Error) { n := _Block(d, p) p = p[n:] if len(p) > 0 { - for i := 0; i < len(p); i++ { - d.x[i] = p[i] + for i, x := range p { + d.x[i] = x } d.nx = len(p) } @@ -100,16 +100,12 @@ func (d *digest) Sum() []byte { p := make([]byte, 16) j := 0 - for i := 0; i < 4; i++ { - s := d.s[i] - p[j] = byte(s) - j++ - p[j] = byte(s >> 8) - j++ - p[j] = byte(s >> 16) - j++ - p[j] = byte(s >> 24) - j++ + for _, s := range d.s { + p[j+0] = byte(s >> 0) + p[j+1] = byte(s >> 8) + p[j+2] = byte(s >> 16) + p[j+3] = byte(s >> 24) + j += 4 } return p } diff --git a/src/pkg/crypto/md4/md4block.go b/src/pkg/crypto/md4/md4block.go index 492e960cae..3fed475f3f 100644 --- a/src/pkg/crypto/md4/md4block.go +++ b/src/pkg/crypto/md4/md4block.go @@ -25,9 +25,10 @@ func _Block(dig *digest, p []byte) int { for len(p) >= _Chunk { aa, bb, cc, dd := a, b, c, d + j := 0 for i := 0; i < 16; i++ { - j := i * 4 X[i] = uint32(p[j]) | uint32(p[j+1])<<8 | uint32(p[j+2])<<16 | uint32(p[j+3])<<24 + j += 4 } // If this needs to be made faster in the future, @@ -37,9 +38,12 @@ func _Block(dig *digest, p []byte) int { // with suitable variable renaming in each // unrolled body, delete the a, b, c, d = d, a, b, c // (or you can let the optimizer do the renaming). + // + // The index variables are uint so that % by a power + // of two can be optimized easily by a compiler. // Round 1. - for i := 0; i < 16; i++ { + for i := uint(0); i < 16; i++ { x := i s := shift1[i%4] f := ((c ^ d) & b) ^ d @@ -49,7 +53,7 @@ func _Block(dig *digest, p []byte) int { } // Round 2. - for i := 0; i < 16; i++ { + for i := uint(0); i < 16; i++ { x := xIndex2[i] s := shift2[i%4] g := (b & c) | (b & d) | (c & d) @@ -59,7 +63,7 @@ func _Block(dig *digest, p []byte) int { } // Round 3. - for i := 0; i < 16; i++ { + for i := uint(0); i < 16; i++ { x := xIndex3[i] s := shift3[i%4] h := b ^ c ^ d diff --git a/src/pkg/crypto/md5/md5block.go b/src/pkg/crypto/md5/md5block.go index f35096d50c..a887e2e05e 100644 --- a/src/pkg/crypto/md5/md5block.go +++ b/src/pkg/crypto/md5/md5block.go @@ -98,9 +98,10 @@ func _Block(dig *digest, p []byte) int { for len(p) >= _Chunk { aa, bb, cc, dd := a, b, c, d + j := 0 for i := 0; i < 16; i++ { - j := i * 4 X[i] = uint32(p[j]) | uint32(p[j+1])<<8 | uint32(p[j+2])<<16 | uint32(p[j+3])<<24 + j += 4 } // If this needs to be made faster in the future, @@ -110,52 +111,47 @@ func _Block(dig *digest, p []byte) int { // with suitable variable renaming in each // unrolled body, delete the a, b, c, d = d, a, b, c // (or you can let the optimizer do the renaming). + // + // The index variables are uint so that % by a power + // of two can be optimized easily by a compiler. // Round 1. - for i := 0; i < 16; i++ { + for i := uint(0); i < 16; i++ { x := i - t := i s := shift1[i%4] f := ((c ^ d) & b) ^ d - a += f + X[x] + table[t] - a = a<>(32-s) - a += b + a += f + X[x] + table[i] + a = a<>(32-s) + b a, b, c, d = d, a, b, c } // Round 2. - for i := 0; i < 16; i++ { + for i := uint(0); i < 16; i++ { x := (1 + 5*i) % 16 - t := 16 + i s := shift2[i%4] g := ((b ^ c) & d) ^ c - a += g + X[x] + table[t] - a = a<>(32-s) - a += b + a += g + X[x] + table[i+16] + a = a<>(32-s) + b a, b, c, d = d, a, b, c } // Round 3. - for i := 0; i < 16; i++ { + for i := uint(0); i < 16; i++ { x := (5 + 3*i) % 16 - t := 32 + i s := shift3[i%4] h := b ^ c ^ d - a += h + X[x] + table[t] - a = a<>(32-s) - a += b + a += h + X[x] + table[i+32] + a = a<>(32-s) + b a, b, c, d = d, a, b, c } // Round 4. - for i := 0; i < 16; i++ { + for i := uint(0); i < 16; i++ { x := (7 * i) % 16 s := shift4[i%4] - t := 48 + i - ii := c ^ (b | ^d) - a += ii + X[x] + table[t] - a = a<>(32-s) - a += b + j := c ^ (b | ^d) + a += j + X[x] + table[i+48] + a = a<>(32-s) + b a, b, c, d = d, a, b, c }