compress/flate: eliminate most common bounds checks

author Klaus Post <klauspost@gmail.com>

Sun, 10 Apr 2016 11:43:24 +0000 (13:43 +0200)

committer Brad Fitzpatrick <bradfitz@golang.org>

Sun, 10 Apr 2016 15:53:54 +0000 (15:53 +0000)
author Klaus Post <klauspost@gmail.com>
Sun, 10 Apr 2016 11:43:24 +0000 (13:43 +0200)
committer Brad Fitzpatrick <bradfitz@golang.org>
Sun, 10 Apr 2016 15:53:54 +0000 (15:53 +0000)
diff --git a/src/compress/flate/deflate.go b/src/compress/flate/deflate.go

index 3bb8b5e02ae66e169e114c5e53ff2ae587cb1d78..d8bbffbc66c366d07d2b0ff86e417f4afc032ef2 100644 (file)
--- a/src/compress/flate/deflate.go
+++ b/src/compress/flate/deflate.go
@@ -73,8 +73,8 @@ type compressor struct {
         // hashPrev[hashHead[hashValue] & windowMask] contains the previous index
         // with the same hash value.
         chainHead  int
-       hashHead   []uint32
-       hashPrev   []uint32
+       hashHead   [hashSize]uint32
+       hashPrev   [windowSize]uint32
         hashOffset int
  
         // input window: unprocessed data is window[index:windowEnd]
@@ -188,12 +188,13 @@ func (d *compressor) fillWindow(b []byte) {
                 var newH uint32
                 for i, val := range dst {
                         di := i + index
-                       newH = val & hashMask
+                       newH = val
+                       hh := &d.hashHead[newH&hashMask]
                         // Get previous value with the same hash.
                         // Our chain should point to the previous value.
-                       d.hashPrev[di&windowMask] = d.hashHead[newH]
+                       d.hashPrev[di&windowMask] = *hh
                         // Set the head of the hash chain to us.
-                       d.hashHead[newH] = uint32(di + d.hashOffset)
+                       *hh = uint32(di + d.hashOffset)
                 }
                 d.hash = newH
         }
@@ -293,6 +294,7 @@ func bulkHash4(b []byte, dst []uint32) {
  // bytes in size.
  func matchLen(a, b []byte, max int) int {
         a = a[:max]
+       b = b[:len(a)]
         for i, av := range a {
                 if b[i] != av {
                         return i
@@ -302,8 +304,6 @@ func matchLen(a, b []byte, max int) int {
  }
  
  func (d *compressor) initDeflate() {
-       d.hashHead = make([]uint32, hashSize)
-       d.hashPrev = make([]uint32, windowSize)
         d.window = make([]byte, 2*windowSize)
         d.hashOffset = 1
         d.tokens = make([]token, 0, maxFlateBlockTokens+1)
@@ -358,9 +358,10 @@ Loop:
                 if d.index < d.maxInsertIndex {
                         // Update the hash
                         d.hash = hash4(d.window[d.index : d.index+minMatchLength])
-                       d.chainHead = int(d.hashHead[d.hash])
+                       hh := &d.hashHead[d.hash&hashMask]
+                       d.chainHead = int(*hh)
                         d.hashPrev[d.index&windowMask] = uint32(d.chainHead)
-                       d.hashHead[d.hash] = uint32(d.index + d.hashOffset)
+                       *hh = uint32(d.index + d.hashOffset)
                 }
                 prevLength := d.length
                 prevOffset := d.offset
@@ -404,9 +405,10 @@ Loop:
                                                 d.hash = hash4(d.window[d.index : d.index+minMatchLength])
                                                 // Get previous value with the same hash.
                                                 // Our chain should point to the previous value.
-                                               d.hashPrev[d.index&windowMask] = d.hashHead[d.hash]
+                                               hh := &d.hashHead[d.hash&hashMask]
+                                               d.hashPrev[d.index&windowMask] = *hh
                                                 // Set the head of the hash chain to us.
-                                               d.hashHead[d.hash] = uint32(d.index + d.hashOffset)
+                                               *hh = uint32(d.index + d.hashOffset)
                                         }
                                 }
                                 if d.fastSkipHashing == skipNever {
@@ -531,9 +533,6 @@ func (d *compressor) init(w io.Writer, level int) (err error) {
         return nil
  }
  
-// hzeroes is used for zeroing the hash slice.
-var hzeroes [256]uint32
-
  func (d *compressor) reset(w io.Writer) {
         d.w.reset(w)
         d.sync = false
@@ -543,15 +542,13 @@ func (d *compressor) reset(w io.Writer) {
                 d.windowEnd = 0
         default:
                 d.chainHead = -1
-               for s := d.hashHead; len(s) > 0; {
-                       n := copy(s, hzeroes[:])
-                       s = s[n:]
+               for i := range d.hashHead {
+                       d.hashHead[i] = 0
                 }
-               for s := d.hashPrev; len(s) > 0; s = s[len(hzeroes):] {
-                       copy(s, hzeroes[:])
+               for i := range d.hashPrev {
+                       d.hashPrev[i] = 0
                 }
                 d.hashOffset = 1
-
                 d.index, d.windowEnd = 0, 0
                 d.blockStart, d.byteAvailable = 0, false
                 d.tokens = d.tokens[:0]
diff --git a/src/compress/flate/huffman_bit_writer.go b/src/compress/flate/huffman_bit_writer.go

index b99f86ea1377e3cacd1da37c2d3c6372fae11ea4..23f242f88ebc08eefbf0896d97f319d3f6ae86c4 100644 (file)
--- a/src/compress/flate/huffman_bit_writer.go
+++ b/src/compress/flate/huffman_bit_writer.go
@@ -84,11 +84,11 @@ type huffmanBitWriter struct {
         bits            uint64
         nbits           uint
         bytes           [bufferSize]byte
+       codegenFreq     [codegenCodeCount]int32
         nbytes          int
         literalFreq     []int32
         offsetFreq      []int32
         codegen         []uint8
-       codegenFreq     []int32
         literalEncoding *huffmanEncoder
         offsetEncoding  *huffmanEncoder
         codegenEncoding *huffmanEncoder
@@ -101,7 +101,6 @@ func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
                 literalFreq:     make([]int32, maxNumLit),
                 offsetFreq:      make([]int32, offsetCodeCount),
                 codegen:         make([]uint8, maxNumLit+offsetCodeCount+1),
-               codegenFreq:     make([]int32, codegenCodeCount),
                 literalEncoding: newHuffmanEncoder(maxNumLit),
                 codegenEncoding: newHuffmanEncoder(codegenCodeCount),
                 offsetEncoding:  newHuffmanEncoder(offsetCodeCount),
@@ -143,12 +142,13 @@ func (w *huffmanBitWriter) writeBits(b int32, nb uint) {
                 w.bits >>= 48
                 w.nbits -= 48
                 n := w.nbytes
-               w.bytes[n+0] = byte(bits)
-               w.bytes[n+1] = byte(bits >> 8)
-               w.bytes[n+2] = byte(bits >> 16)
-               w.bytes[n+3] = byte(bits >> 24)
-               w.bytes[n+4] = byte(bits >> 32)
-               w.bytes[n+5] = byte(bits >> 40)
+               bytes := w.bytes[n : n+6]
+               bytes[0] = byte(bits)
+               bytes[1] = byte(bits >> 8)
+               bytes[2] = byte(bits >> 16)
+               bytes[3] = byte(bits >> 24)
+               bytes[4] = byte(bits >> 32)
+               bytes[5] = byte(bits >> 40)
                 n += 6
                 if n >= bufferFlushSize {
                         _, w.err = w.w.Write(w.bytes[:n])
@@ -293,12 +293,13 @@ func (w *huffmanBitWriter) writeCode(c hcode) {
                 w.bits >>= 48
                 w.nbits -= 48
                 n := w.nbytes
-               w.bytes[n+0] = byte(bits)
-               w.bytes[n+1] = byte(bits >> 8)
-               w.bytes[n+2] = byte(bits >> 16)
-               w.bytes[n+3] = byte(bits >> 24)
-               w.bytes[n+4] = byte(bits >> 32)
-               w.bytes[n+5] = byte(bits >> 40)
+               bytes := w.bytes[n : n+6]
+               bytes[0] = byte(bits)
+               bytes[1] = byte(bits >> 8)
+               bytes[2] = byte(bits >> 16)
+               bytes[3] = byte(bits >> 24)
+               bytes[4] = byte(bits >> 32)
+               bytes[5] = byte(bits >> 40)
                 n += 6
                 if n >= bufferFlushSize {
                         _, w.err = w.w.Write(w.bytes[:n])
@@ -428,13 +429,13 @@ func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) {
         // Generate codegen and codegenFrequencies, which indicates how to encode
         // the literalEncoding and the offsetEncoding.
         w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
-       w.codegenEncoding.generate(w.codegenFreq, 7)
+       w.codegenEncoding.generate(w.codegenFreq[:], 7)
         numCodegens = len(w.codegenFreq)
         for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
                 numCodegens--
         }
         dynamicHeader := int64(3+5+5+4+(3*numCodegens)) +
-               w.codegenEncoding.bitLength(w.codegenFreq) +
+               w.codegenEncoding.bitLength(w.codegenFreq[:]) +
                 int64(extraBits) +
                 int64(w.codegenFreq[16]*2) +
                 int64(w.codegenFreq[17]*3) +
@@ -482,7 +483,7 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens []token, eof bool, input []b
         // Generate codegen and codegenFrequencies, which indicates how to encode
         // the literalEncoding and the offsetEncoding.
         w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding)
-       w.codegenEncoding.generate(w.codegenFreq, 7)
+       w.codegenEncoding.generate(w.codegenFreq[:], 7)
         numCodegens := len(w.codegenFreq)
         for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
                 numCodegens--
@@ -609,13 +610,13 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte) {
         // Generate codegen and codegenFrequencies, which indicates how to encode
         // the literalEncoding and the offsetEncoding.
         w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset)
-       w.codegenEncoding.generate(w.codegenFreq, 7)
+       w.codegenEncoding.generate(w.codegenFreq[:], 7)
         numCodegens = len(w.codegenFreq)
         for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 {
                 numCodegens--
         }
         headerSize := int64(3+5+5+4+(3*numCodegens)) +
-               w.codegenEncoding.bitLength(w.codegenFreq) +
+               w.codegenEncoding.bitLength(w.codegenFreq[:]) +
                 int64(w.codegenFreq[16]*2) +
                 int64(w.codegenFreq[17]*3) +
                 int64(w.codegenFreq[18]*7)
@@ -639,7 +640,7 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte) {
  
         // Huffman.
         w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
-       encoding := w.literalEncoding.codes
+       encoding := w.literalEncoding.codes[:257]
         n := w.nbytes
         for _, t := range input {
                 // Bitwriting inlined, ~30% speedup
@@ -653,12 +654,13 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte) {
                 bits := w.bits
                 w.bits >>= 48
                 w.nbits -= 48
-               w.bytes[n+0] = byte(bits)
-               w.bytes[n+1] = byte(bits >> 8)
-               w.bytes[n+2] = byte(bits >> 16)
-               w.bytes[n+3] = byte(bits >> 24)
-               w.bytes[n+4] = byte(bits >> 32)
-               w.bytes[n+5] = byte(bits >> 40)
+               bytes := w.bytes[n : n+6]
+               bytes[0] = byte(bits)
+               bytes[1] = byte(bits >> 8)
+               bytes[2] = byte(bits >> 16)
+               bytes[3] = byte(bits >> 24)
+               bytes[4] = byte(bits >> 32)
+               bytes[5] = byte(bits >> 40)
                 n += 6
                 if n < bufferFlushSize {
                         continue
@@ -677,6 +679,7 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte) {
  //
  // len(h) must be >= 256, and h's elements must be all zeroes.
  func histogram(b []byte, h []int32) {
+       h = h[:256]
         for _, t := range b {
                 h[t]++
         }
diff --git a/src/compress/flate/huffman_code.go b/src/compress/flate/huffman_code.go

index b0328c6e08aa7367457bae6456dada7478c916c4..20fb19090d61b5ffa94f47682505b3f1ff5ed25a 100644 (file)
--- a/src/compress/flate/huffman_code.go
+++ b/src/compress/flate/huffman_code.go
@@ -96,8 +96,8 @@ func generateFixedLiteralEncoding() *huffmanEncoder {
  func generateFixedOffsetEncoding() *huffmanEncoder {
         h := newHuffmanEncoder(30)
         codes := h.codes
-       for ch := uint16(0); ch < 30; ch++ {
-               codes[ch] = hcode{code: reverseBits(ch, 5), len: 5}
+       for ch := range codes {
+               codes[ch] = hcode{code: reverseBits(uint16(ch), 5), len: 5}
         }
         return h
  }
author	Klaus Post <klauspost@gmail.com>
	Sun, 10 Apr 2016 11:43:24 +0000 (13:43 +0200)
committer	Brad Fitzpatrick <bradfitz@golang.org>
	Sun, 10 Apr 2016 15:53:54 +0000 (15:53 +0000)
src/compress/flate/deflate.go		patch \| blob \| history
src/compress/flate/huffman_bit_writer.go		patch \| blob \| history
src/compress/flate/huffman_code.go		patch \| blob \| history