for checking for page boundary. Also avoid boundary check
when >=16 bytes are hashed.
benchmark old ns/op new ns/op delta
BenchmarkHashStringSpeed 23 22 -0.43%
BenchmarkHashBytesSpeed 44 42 -3.61%
BenchmarkHashStringArraySpeed 71 68 -4.05%
R=iant, khr
CC=gobot, golang-dev, google
https://golang.org/cl/
9123046
PINSRD $1, CX, X0 // size to next 32 bits of xmm0
MOVO runtime·aeskeysched+0(SB), X2
MOVO runtime·aeskeysched+16(SB), X3
+ CMPL CX, $16
+ JB aessmall
aesloop:
CMPL CX, $16
- JB aesloopend
+ JBE aesloopend
MOVOU (AX), X1
AESENC X2, X0
AESENC X1, X0
SUBL $16, CX
ADDL $16, AX
JMP aesloop
+// 1-16 bytes remaining
aesloopend:
+ // This load may overlap with the previous load above.
+ // We'll hash some bytes twice, but that's ok.
+ MOVOU -16(AX)(CX*1), X1
+ JMP partial
+// 0-15 bytes
+aessmall:
TESTL CX, CX
- JE finalize // no partial block
+ JE finalize // 0 bytes
- TESTL $16, AX
- JNE highpartial
+ CMPB AX, $0xf0
+ JA highpartial
- // address ends in 0xxxx. 16 bytes loaded
- // at this address won't cross a page boundary, so
- // we can load it directly.
+ // 16 bytes loaded at this address won't cross
+ // a page boundary, so we can load it directly.
MOVOU (AX), X1
ADDL CX, CX
PAND masks(SB)(CX*8), X1
JMP partial
highpartial:
- // address ends in 1xxxx. Might be up against
+ // address ends in 1111xxxx. Might be up against
// a page boundary, so load ending at last byte.
// Then shift bytes down using pshufb.
MOVOU -16(AX)(CX*1), X1
PINSRQ $1, CX, X0 // size to high 64 bits of xmm0
MOVO runtime·aeskeysched+0(SB), X2
MOVO runtime·aeskeysched+16(SB), X3
+ CMPQ CX, $16
+ JB aessmall
aesloop:
CMPQ CX, $16
- JB aesloopend
+ JBE aesloopend
MOVOU (AX), X1
AESENC X2, X0
AESENC X1, X0
SUBQ $16, CX
ADDQ $16, AX
JMP aesloop
+// 1-16 bytes remaining
aesloopend:
+ // This load may overlap with the previous load above.
+ // We'll hash some bytes twice, but that's ok.
+ MOVOU -16(AX)(CX*1), X1
+ JMP partial
+// 0-15 bytes
+aessmall:
TESTQ CX, CX
- JE finalize // no partial block
+ JE finalize // 0 bytes
- TESTQ $16, AX
- JNE highpartial
+ CMPB AX, $0xf0
+ JA highpartial
- // address ends in 0xxxx. 16 bytes loaded
- // at this address won't cross a page boundary, so
- // we can load it directly.
+ // 16 bytes loaded at this address won't cross
+ // a page boundary, so we can load it directly.
MOVOU (AX), X1
ADDQ CX, CX
PAND masks(SB)(CX*8), X1
JMP partial
highpartial:
- // address ends in 1xxxx. Might be up against
+ // address ends in 1111xxxx. Might be up against
// a page boundary, so load ending at last byte.
// Then shift bytes down using pshufb.
MOVOU -16(AX)(CX*1), X1
}
}
+type chunk [17]byte
+
+func BenchmarkHashBytesSpeed(b *testing.B) {
+ // a bunch of chunks, each with a different alignment mod 16
+ var chunks [size]chunk
+ // initialize each to a different value
+ for i := 0; i < size; i++ {
+ chunks[i][0] = byte(i)
+ }
+ // put into a map
+ m := make(map[chunk]int, size)
+ for i, c := range chunks {
+ m[c] = i
+ }
+ idx := 0
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ if m[chunks[idx]] != idx {
+ b.Error("bad map entry for chunk")
+ }
+ idx++
+ if idx == size {
+ idx = 0
+ }
+ }
+}
+
func BenchmarkHashInt32Speed(b *testing.B) {
ints := make([]int32, size)
for i := 0; i < size; i++ {