const (
NoCompression = 0
BestSpeed = 1
- fastCompression = 3
BestCompression = 9
DefaultCompression = -1
logWindowSize = 15
windowSize = 1 << logWindowSize
windowMask = windowSize - 1
logMaxOffsetSize = 15 // Standard DEFLATE
- minMatchLength = 3 // The smallest match that the compressor looks for
+ minMatchLength = 4 // The smallest match that the compressor looks for
maxMatchLength = 258 // The longest match for the compressor
minOffsetSize = 1 // The shortest offset that makes any sense
// stop things from getting too large.
maxFlateBlockTokens = 1 << 14
maxStoreBlockSize = 65535
- hashBits = 17
+ hashBits = 17 // After 17 performance degrades
hashSize = 1 << hashBits
hashMask = (1 << hashBits) - 1
- hashShift = (hashBits + minMatchLength - 1) / minMatchLength
maxHashOffset = 1 << 24
skipNever = math.MaxInt32
)
type compressionLevel struct {
- good, lazy, nice, chain, fastSkipHashing int
+ level, good, lazy, nice, chain, fastSkipHashing int
}
var levels = []compressionLevel{
{}, // 0
// For levels 1-3 we don't bother trying with lazy matches
- {3, 0, 8, 4, 4},
- {3, 0, 16, 8, 5},
- {3, 0, 32, 32, 6},
+ {1, 4, 0, 8, 4, 4},
+ {2, 4, 0, 16, 8, 5},
+ {3, 4, 0, 32, 32, 6},
// Levels 4-9 use increasingly more lazy matching
// and increasingly stringent conditions for "good enough".
- {4, 4, 16, 16, skipNever},
- {8, 16, 32, 32, skipNever},
- {8, 16, 128, 128, skipNever},
- {8, 32, 128, 256, skipNever},
- {32, 128, 258, 1024, skipNever},
- {32, 258, 258, 4096, skipNever},
+ {4, 4, 4, 16, 16, skipNever},
+ {5, 8, 16, 32, 32, skipNever},
+ {6, 8, 16, 128, 128, skipNever},
+ {7, 8, 32, 128, 256, skipNever},
+ {8, 32, 128, 258, 1024, skipNever},
+ {9, 32, 258, 258, 4096, skipNever},
}
type compressor struct {
compressionLevel
- w *huffmanBitWriter
+ w *huffmanBitWriter
+ bulkHasher func([]byte, []uint32)
// compression algorithm
fill func(*compressor, []byte) int // copy data to window
// hashPrev[hashHead[hashValue] & windowMask] contains the previous index
// with the same hash value.
chainHead int
- hashHead []int
- hashPrev []int
+ hashHead []uint32
+ hashPrev []uint32
hashOffset int
// input window: unprocessed data is window[index:windowEnd]
// deflate state
length int
offset int
- hash int
+ hash uint32
maxInsertIndex int
err error
+
+ // hashMatch must be able to contain hashes for the maximum match length.
+ hashMatch [maxMatchLength - 1]uint32
}
func (d *compressor) fillDeflate(b []byte) int {
d.hashOffset -= delta
d.chainHead -= delta
for i, v := range d.hashPrev {
- if v > delta {
- d.hashPrev[i] -= delta
+ if int(v) > delta {
+ d.hashPrev[i] = uint32(int(v) - delta)
} else {
d.hashPrev[i] = 0
}
}
for i, v := range d.hashHead {
- if v > delta {
- d.hashHead[i] -= delta
+ if int(v) > delta {
+ d.hashHead[i] = uint32(int(v) - delta)
} else {
d.hashHead[i] = 0
}
return n
}
-func (d *compressor) writeBlock(tokens []token, index int, eof bool) error {
- if index > 0 || eof {
+func (d *compressor) writeBlock(tokens []token, index int) error {
+ if index > 0 {
var window []byte
if d.blockStart <= index {
window = d.window[d.blockStart:index]
}
d.blockStart = index
- d.w.writeBlock(tokens, eof, window)
+ d.w.writeBlock(tokens, false, window)
return d.w.err
}
return nil
}
+// fillWindow will fill the current window with the supplied
+// dictionary and calculate all hashes.
+// This is much faster than doing a full encode.
+// Should only be used after a reset.
+func (d *compressor) fillWindow(b []byte) {
+ // Do not fill window if we are in store-only mode.
+ if d.compressionLevel.level == 0 {
+ return
+ }
+ if d.index != 0 || d.windowEnd != 0 {
+ panic("internal error: fillWindow called with stale data")
+ }
+
+ // If we are given too much, cut it.
+ if len(b) > windowSize {
+ b = b[len(b)-windowSize:]
+ }
+ // Add all to window.
+ n := copy(d.window, b)
+
+ // Calculate 256 hashes at the time (more L1 cache hits)
+ loops := (n + 256 - minMatchLength) / 256
+ for j := 0; j < loops; j++ {
+ index := j * 256
+ end := index + 256 + minMatchLength - 1
+ if end > n {
+ end = n
+ }
+ toCheck := d.window[index:end]
+ dstSize := len(toCheck) - minMatchLength + 1
+
+ if dstSize <= 0 {
+ continue
+ }
+
+ dst := d.hashMatch[:dstSize]
+ d.bulkHasher(toCheck, dst)
+ var newH uint32
+ for i, val := range dst {
+ di := i + index
+ newH = val & hashMask
+ // Get previous value with the same hash.
+ // Our chain should point to the previous value.
+ d.hashPrev[di&windowMask] = d.hashHead[newH]
+ // Set the head of the hash chain to us.
+ d.hashHead[newH] = uint32(di + d.hashOffset)
+ }
+ d.hash = newH
+ }
+ // Update window information.
+ d.windowEnd = n
+ d.index = n
+}
+
// Try to find a match starting at index whose length is greater than prevSize.
// We only look at chainCount possibilities before giving up.
func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) {
tries >>= 2
}
- w0 := win[pos]
- w1 := win[pos+1]
wEnd := win[pos+length]
+ wPos := win[pos:]
minIndex := pos - windowSize
for i := prevHead; tries > 0; tries-- {
- if w0 == win[i] && w1 == win[i+1] && wEnd == win[i+length] {
- // The hash function ensures that if win[i] and win[i+1] match, win[i+2] matches
+ if wEnd == win[i+length] {
+ n := matchLen(win[i:], wPos, minMatchLook)
- n := 3
- for pos+n < len(win) && win[i+n] == win[pos+n] {
- n++
- }
- if n > length && (n > 3 || pos-i <= 4096) {
+ if n > length && (n > minMatchLength || pos-i <= 4096) {
length = n
offset = pos - i
ok = true
// hashPrev[i & windowMask] has already been overwritten, so stop now.
break
}
- if i = d.hashPrev[i&windowMask] - d.hashOffset; i < minIndex || i < 0 {
+ i = int(d.hashPrev[i&windowMask]) - d.hashOffset
+ if i < minIndex || i < 0 {
break
}
}
return d.w.err
}
+const hashmul = 0x1e35a7bd
+
+// hash4 returns a hash representation of the first 4 bytes
+// of the supplied slice.
+// The caller must ensure that len(b) >= 4.
+func hash4(b []byte) uint32 {
+ return ((uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24) * hashmul) >> (32 - hashBits)
+}
+
+// bulkHash4 will compute hashes using the same
+// algorithm as hash4
+func bulkHash4(b []byte, dst []uint32) {
+ if len(b) < minMatchLength {
+ return
+ }
+ hb := uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24
+ dst[0] = (hb * hashmul) >> (32 - hashBits)
+ end := len(b) - minMatchLength + 1
+ for i := 1; i < end; i++ {
+ hb = (hb << 8) | uint32(b[i+3])
+ dst[i] = (hb * hashmul) >> (32 - hashBits)
+ }
+}
+
+// matchLen returns the number of matching bytes in a and b
+// up to length 'max'. Both slices must be at least 'max'
+// bytes in size.
+func matchLen(a, b []byte, max int) int {
+ a = a[:max]
+ for i, av := range a {
+ if b[i] != av {
+ return i
+ }
+ }
+ return max
+}
+
func (d *compressor) initDeflate() {
- d.hashHead = make([]int, hashSize)
- d.hashPrev = make([]int, windowSize)
+ d.hashHead = make([]uint32, hashSize)
+ d.hashPrev = make([]uint32, windowSize)
d.window = make([]byte, 2*windowSize)
d.hashOffset = 1
d.tokens = make([]token, 0, maxFlateBlockTokens+1)
d.index = 0
d.hash = 0
d.chainHead = -1
+ d.bulkHasher = bulkHash4
}
func (d *compressor) deflate() {
d.maxInsertIndex = d.windowEnd - (minMatchLength - 1)
if d.index < d.maxInsertIndex {
- d.hash = int(d.window[d.index])<<hashShift + int(d.window[d.index+1])
+ d.hash = hash4(d.window[d.index : d.index+minMatchLength])
}
Loop:
d.byteAvailable = false
}
if len(d.tokens) > 0 {
- if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
+ if d.err = d.writeBlock(d.tokens, d.index); d.err != nil {
return
}
d.tokens = d.tokens[:0]
}
if d.index < d.maxInsertIndex {
// Update the hash
- d.hash = (d.hash<<hashShift + int(d.window[d.index+2])) & hashMask
- d.chainHead = d.hashHead[d.hash]
- d.hashPrev[d.index&windowMask] = d.chainHead
- d.hashHead[d.hash] = d.index + d.hashOffset
+ d.hash = hash4(d.window[d.index : d.index+minMatchLength])
+ d.chainHead = int(d.hashHead[d.hash])
+ d.hashPrev[d.index&windowMask] = uint32(d.chainHead)
+ d.hashHead[d.hash] = uint32(d.index + d.hashOffset)
}
prevLength := d.length
prevOffset := d.offset
// There was a match at the previous step, and the current match is
// not better. Output the previous match.
if d.fastSkipHashing != skipNever {
- d.tokens = append(d.tokens, matchToken(uint32(d.length-minMatchLength), uint32(d.offset-minOffsetSize)))
+ d.tokens = append(d.tokens, matchToken(uint32(d.length-3), uint32(d.offset-minOffsetSize)))
} else {
- d.tokens = append(d.tokens, matchToken(uint32(prevLength-minMatchLength), uint32(prevOffset-minOffsetSize)))
+ d.tokens = append(d.tokens, matchToken(uint32(prevLength-3), uint32(prevOffset-minOffsetSize)))
}
// Insert in the hash table all strings up to the end of the match.
// index and index-1 are already inserted. If there is not enough
}
for d.index++; d.index < newIndex; d.index++ {
if d.index < d.maxInsertIndex {
- d.hash = (d.hash<<hashShift + int(d.window[d.index+2])) & hashMask
+ d.hash = hash4(d.window[d.index : d.index+minMatchLength])
// Get previous value with the same hash.
// Our chain should point to the previous value.
d.hashPrev[d.index&windowMask] = d.hashHead[d.hash]
// Set the head of the hash chain to us.
- d.hashHead[d.hash] = d.index + d.hashOffset
+ d.hashHead[d.hash] = uint32(d.index + d.hashOffset)
}
}
if d.fastSkipHashing == skipNever {
// item into the table.
d.index += d.length
if d.index < d.maxInsertIndex {
- d.hash = (int(d.window[d.index])<<hashShift + int(d.window[d.index+1]))
+ d.hash = hash4(d.window[d.index : d.index+minMatchLength])
}
}
if len(d.tokens) == maxFlateBlockTokens {
// The block includes the current character
- if d.err = d.writeBlock(d.tokens, d.index, false); d.err != nil {
+ if d.err = d.writeBlock(d.tokens, d.index); d.err != nil {
return
}
d.tokens = d.tokens[:0]
}
d.tokens = append(d.tokens, literalToken(uint32(d.window[i])))
if len(d.tokens) == maxFlateBlockTokens {
- if d.err = d.writeBlock(d.tokens, i+1, false); d.err != nil {
+ if d.err = d.writeBlock(d.tokens, i+1); d.err != nil {
return
}
d.tokens = d.tokens[:0]
return 0, d.err
}
n = len(b)
- b = b[d.fill(d, b):]
for len(b) > 0 {
d.step(d)
b = b[d.fill(d, b):]
return nil
}
-var zeroes [32]int
-var bzeroes [256]byte
+// hzeroes is used for zeroing the hash slice.
+var hzeroes [256]uint32
func (d *compressor) reset(w io.Writer) {
d.w.reset(w)
d.sync = false
d.err = nil
- switch d.compressionLevel.chain {
- case 0:
- // level was NoCompression.
- for i := range d.window {
- d.window[i] = 0
- }
+ switch d.compressionLevel.level {
+ case NoCompression:
d.windowEnd = 0
default:
d.chainHead = -1
for s := d.hashHead; len(s) > 0; {
- n := copy(s, zeroes[:])
+ n := copy(s, hzeroes[:])
s = s[n:]
}
- for s := d.hashPrev; len(s) > 0; s = s[len(zeroes):] {
- copy(s, zeroes[:])
+ for s := d.hashPrev; len(s) > 0; s = s[len(hzeroes):] {
+ copy(s, hzeroes[:])
}
d.hashOffset = 1
d.index, d.windowEnd = 0, 0
- for s := d.window; len(s) > 0; {
- n := copy(s, bzeroes[:])
- s = s[n:]
- }
d.blockStart, d.byteAvailable = 0, false
-
- d.tokens = d.tokens[:maxFlateBlockTokens+1]
- for i := 0; i <= maxFlateBlockTokens; i++ {
- d.tokens[i] = 0
- }
d.tokens = d.tokens[:0]
d.length = minMatchLength - 1
d.offset = 0
// can only be decompressed by a Reader initialized with the
// same dictionary.
func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) {
- dw := &dictWriter{w, false}
+ dw := &dictWriter{w}
zw, err := NewWriter(dw, level)
if err != nil {
return nil, err
}
- zw.Write(dict)
- zw.Flush()
- dw.enabled = true
+ zw.d.fillWindow(dict)
zw.dict = append(zw.dict, dict...) // duplicate dictionary for Reset method.
return zw, err
}
type dictWriter struct {
- w io.Writer
- enabled bool
+ w io.Writer
}
func (w *dictWriter) Write(b []byte) (n int, err error) {
- if w.enabled {
- return w.w.Write(b)
- }
- return len(b), nil
+ return w.w.Write(b)
}
// A Writer takes data written to it and writes the compressed
// w was created with NewWriterDict
dw.w = dst
w.d.reset(dw)
- dw.enabled = false
- w.Write(w.dict)
- w.Flush()
- dw.enabled = true
+ w.d.fillWindow(w.dict)
} else {
// w was created with NewWriter
w.d.reset(dst)
return result
}
+func TestBulkHash4(t *testing.T) {
+ for _, x := range deflateTests {
+ y := x.out
+ if len(y) < minMatchLength {
+ continue
+ }
+ y = append(y, y...)
+ for j := 4; j < len(y); j++ {
+ y := y[:j]
+ dst := make([]uint32, len(y)-minMatchLength+1)
+ for i := range dst {
+ dst[i] = uint32(i + 100)
+ }
+ bulkHash4(y, dst)
+ for i, got := range dst {
+ want := hash4(y[i:])
+ if got != want && got == uint32(i)+100 {
+ t.Errorf("Len:%d Index:%d, want 0x%08x but not modified", len(y), i, want)
+ } else if got != want {
+ t.Errorf("Len:%d Index:%d, got 0x%08x want:0x%08x", len(y), i, got, want)
+ }
+ }
+ }
+ }
+}
+
func TestDeflate(t *testing.T) {
for _, h := range deflateTests {
var buf bytes.Buffer
w.Write(h.in)
w.Close()
if !bytes.Equal(buf.Bytes(), h.out) {
- t.Errorf("Deflate(%d, %x) = %x, want %x", h.level, h.in, buf.Bytes(), h.out)
+ t.Errorf("Deflate(%d, %x) = \n%#v, want \n%#v", h.level, h.in, buf.Bytes(), h.out)
}
}
}
t.Errorf("level: %d, len(compress(data)) = %d > limit = %d", level, buffer.Len(), limit)
return
}
+ if limit > 0 {
+ t.Logf("level: %d, size:%.2f%%, %d b\n", level, float64(buffer.Len()*100)/float64(limit), buffer.Len())
+ }
r := NewReader(&buffer)
out, err := ioutil.ReadAll(r)
if err != nil {
// DeepEqual doesn't compare functions.
w.d.fill, wref.d.fill = nil, nil
w.d.step, wref.d.step = nil, nil
+ w.d.bulkHasher, wref.d.bulkHasher = nil, nil
+ // hashMatch is always overwritten when used.
+ copy(w.d.hashMatch[:], wref.d.hashMatch[:])
+ if len(w.d.tokens) != 0 {
+ t.Errorf("level %d Writer not reset after Reset. %d tokens were present", level, len(w.d.tokens))
+ }
+ // As long as the length is 0, we don't care about the content.
+ w.d.tokens = wref.d.tokens
+
+ // We don't care if there are values in the window, as long as it is at d.index is 0
+ w.d.window = wref.d.window
if !reflect.DeepEqual(w, wref) {
t.Errorf("level %d Writer not reset after Reset", level)
}
w.Write(b)
}
w.Close()
- out1 := buf.String()
+ out1 := buf.Bytes()
buf2 := new(bytes.Buffer)
w.Reset(buf2)
w.Write(b)
}
w.Close()
- out2 := buf2.String()
+ out2 := buf2.Bytes()
- if out1 != out2 {
- t.Errorf("got %q, expected %q", out2, out1)
+ if len(out1) != len(out2) {
+ t.Errorf("got %d, expected %d bytes", len(out2), len(out1))
+ return
+ }
+ if !bytes.Equal(out1, out2) {
+ mm := 0
+ for i, b := range out1[:len(out2)] {
+ if b != out2[i] {
+ t.Errorf("mismatch index %d: %#02x, expected %#02x", i, out2[i], b)
+ }
+ mm++
+ if mm == 10 {
+ t.Fatal("Stopping")
+ }
+ }
}
t.Logf("got %d bytes", len(out1))
}