for len(p) >= _Chunk {
aa, bb, cc, dd := a, b, c, d
+ j := 0
for i := 0; i < 16; i++ {
- j := i * 4
X[i] = uint32(p[j]) | uint32(p[j+1])<<8 | uint32(p[j+2])<<16 | uint32(p[j+3])<<24
+ j += 4
}
// If this needs to be made faster in the future,
// with suitable variable renaming in each
// unrolled body, delete the a, b, c, d = d, a, b, c
// (or you can let the optimizer do the renaming).
+ //
+ // The index variables are uint so that % by a power
+ // of two can be optimized easily by a compiler.
// Round 1.
- for i := 0; i < 16; i++ {
+ for i := uint(0); i < 16; i++ {
x := i
s := shift1[i%4]
f := ((c ^ d) & b) ^ d
}
// Round 2.
- for i := 0; i < 16; i++ {
+ for i := uint(0); i < 16; i++ {
x := xIndex2[i]
s := shift2[i%4]
g := (b & c) | (b & d) | (c & d)
}
// Round 3.
- for i := 0; i < 16; i++ {
+ for i := uint(0); i < 16; i++ {
x := xIndex3[i]
s := shift3[i%4]
h := b ^ c ^ d
for len(p) >= _Chunk {
aa, bb, cc, dd := a, b, c, d
+ j := 0
for i := 0; i < 16; i++ {
- j := i * 4
X[i] = uint32(p[j]) | uint32(p[j+1])<<8 | uint32(p[j+2])<<16 | uint32(p[j+3])<<24
+ j += 4
}
// If this needs to be made faster in the future,
// with suitable variable renaming in each
// unrolled body, delete the a, b, c, d = d, a, b, c
// (or you can let the optimizer do the renaming).
+ //
+ // The index variables are uint so that % by a power
+ // of two can be optimized easily by a compiler.
// Round 1.
- for i := 0; i < 16; i++ {
+ for i := uint(0); i < 16; i++ {
x := i
- t := i
s := shift1[i%4]
f := ((c ^ d) & b) ^ d
- a += f + X[x] + table[t]
- a = a<<s | a>>(32-s)
- a += b
+ a += f + X[x] + table[i]
+ a = a<<s | a>>(32-s) + b
a, b, c, d = d, a, b, c
}
// Round 2.
- for i := 0; i < 16; i++ {
+ for i := uint(0); i < 16; i++ {
x := (1 + 5*i) % 16
- t := 16 + i
s := shift2[i%4]
g := ((b ^ c) & d) ^ c
- a += g + X[x] + table[t]
- a = a<<s | a>>(32-s)
- a += b
+ a += g + X[x] + table[i+16]
+ a = a<<s | a>>(32-s) + b
a, b, c, d = d, a, b, c
}
// Round 3.
- for i := 0; i < 16; i++ {
+ for i := uint(0); i < 16; i++ {
x := (5 + 3*i) % 16
- t := 32 + i
s := shift3[i%4]
h := b ^ c ^ d
- a += h + X[x] + table[t]
- a = a<<s | a>>(32-s)
- a += b
+ a += h + X[x] + table[i+32]
+ a = a<<s | a>>(32-s) + b
a, b, c, d = d, a, b, c
}
// Round 4.
- for i := 0; i < 16; i++ {
+ for i := uint(0); i < 16; i++ {
x := (7 * i) % 16
s := shift4[i%4]
- t := 48 + i
- ii := c ^ (b | ^d)
- a += ii + X[x] + table[t]
- a = a<<s | a>>(32-s)
- a += b
+ j := c ^ (b | ^d)
+ a += j + X[x] + table[i+48]
+ a = a<<s | a>>(32-s) + b
a, b, c, d = d, a, b, c
}