logWindowSize = 15
windowSize = 1 << logWindowSize
windowMask = windowSize - 1
- logMaxOffsetSize = 15 // Standard DEFLATE
- minMatchLength = 4 // The smallest match that the compressor looks for
- maxMatchLength = 258 // The longest match for the compressor
- minOffsetSize = 1 // The shortest offset that makes any sense
- // The maximum number of tokens we put into a single flat block, just to
+ // The LZ77 step produces a sequence of literal tokens and <length, offset>
+ // pair tokens. The offset is also known as distance. The underlying wire
+ // format limits the range of lengths and offsets. For example, there are
+ // 256 legitimate lengths: those in the range [3, 258]. This package's
+ // compressor uses a higher minimum match length, enabling optimizations
+ // such as finding matches via 32-bit loads and compares.
+ baseMatchLength = 3 // The smallest match length per the RFC section 3.2.5
+ minMatchLength = 4 // The smallest match length that the compressor actually emits
+ maxMatchLength = 258 // The largest match length
+ baseMatchOffset = 1 // The smallest match offset
+ maxMatchOffset = 1 << 15 // The largest match offset
+
+ // The maximum number of tokens we put into a single flate block, just to
// stop things from getting too large.
maxFlateBlockTokens = 1 << 14
maxStoreBlockSize = 65535
// There was a match at the previous step, and the current match is
// not better. Output the previous match.
if d.fastSkipHashing != skipNever {
- d.tokens = append(d.tokens, matchToken(uint32(d.length-3), uint32(d.offset-minOffsetSize)))
+ d.tokens = append(d.tokens, matchToken(uint32(d.length-baseMatchLength), uint32(d.offset-baseMatchOffset)))
} else {
- d.tokens = append(d.tokens, matchToken(uint32(prevLength-3), uint32(prevOffset-minOffsetSize)))
+ d.tokens = append(d.tokens, matchToken(uint32(prevLength-baseMatchLength), uint32(prevOffset-baseMatchOffset)))
}
// Insert in the hash table all strings up to the end of the match.
// index and index-1 are already inserted. If there is not enough
// based on Snappy's LZ77-style encoder: github.com/golang/snappy
const (
- maxOffset = 1 << logMaxOffsetSize // Maximum deflate offset.
-
tableBits = 14 // Bits used in the table.
tableSize = 1 << tableBits // Size of the table.
tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
candidate = int(table[nextHash&tableMask])
table[nextHash&tableMask] = uint16(s)
nextHash = hash(load32(src, nextS))
- if s-candidate < maxOffset && load32(src, s) == load32(src, candidate) {
+ // TODO: < should be <=, and add a test for that.
+ if s-candidate < maxMatchOffset && load32(src, s) == load32(src, candidate) {
break
}
}
}
// matchToken is flate's equivalent of Snappy's emitCopy.
- dst = append(dst, matchToken(uint32(s-base-3), uint32(base-candidate-minOffsetSize)))
+ dst = append(dst, matchToken(uint32(s-base-baseMatchLength), uint32(base-candidate-baseMatchOffset)))
nextEmit = s
if s >= sLimit {
goto emitRemainder
currHash := hash(uint32(x >> 8))
candidate = int(table[currHash&tableMask])
table[currHash&tableMask] = uint16(s)
- if s-candidate >= maxOffset || uint32(x>>8) != load32(src, candidate) {
+ // TODO: >= should be >, and add a test for that.
+ if s-candidate >= maxMatchOffset || uint32(x>>8) != load32(src, candidate) {
nextHash = hash(uint32(x >> 16))
s++
break
)
const (
- maxCodeLen = 16 // max length of Huffman code
- maxHist = 32768 // max history required
+ maxCodeLen = 16 // max length of Huffman code
// The next three numbers come from the RFC section 3.2.7, with the
// additional proviso in section 3.2.5 which implies that distance codes
// 30 and 31 should never occur in compressed data.
dict: f.dict,
step: (*decompressor).nextBlock,
}
- f.dict.init(maxHist, nil)
+ f.dict.init(maxMatchOffset, nil)
return nil
}
f.bits = new([maxNumLit + maxNumDist]int)
f.codebits = new([numCodes]int)
f.step = (*decompressor).nextBlock
- f.dict.init(maxHist, nil)
+ f.dict.init(maxMatchOffset, nil)
return &f
}
f.bits = new([maxNumLit + maxNumDist]int)
f.codebits = new([numCodes]int)
f.step = (*decompressor).nextBlock
- f.dict.init(maxHist, dict)
+ f.dict.init(maxMatchOffset, dict)
return &f
}