import (
"bytes"
- "encoding/binary"
"exp/regexp"
+ "gob"
"io"
"os"
"sort"
return &Index{data, qsufsort(data)}
}
+// Read and Write slice the data into successive portions of length gobN,
+// so gob can allocate smaller buffers for its I/O.
+const gobN = 1 << 16 // slightly better than say 1 << 20 (BenchmarkSaveRestore)
+
// Read reads the index from r into x; x must not be nil.
func (x *Index) Read(r io.Reader) os.Error {
- var n int32
- if err := binary.Read(r, binary.LittleEndian, &n); err != nil {
+ d := gob.NewDecoder(r)
+ var n int
+ if err := d.Decode(&n); err != nil {
return err
}
- if 2*n < int32(cap(x.data)) || int32(cap(x.data)) < n {
+ if 2*n < cap(x.data) || cap(x.data) < n {
// new data is significantly smaller or larger then
// existing buffers - allocate new ones
x.data = make([]byte, n)
x.data = x.data[0:n]
x.sa = x.sa[0:n]
}
-
- if err := binary.Read(r, binary.LittleEndian, x.data); err != nil {
- return err
- }
- if err := binary.Read(r, binary.LittleEndian, x.sa); err != nil {
- return err
+ for i := 0; i < n; {
+ j := i + gobN
+ if j > n {
+ j = n
+ }
+ // data holds next piece of x.data; its length is updated by Decode
+ data := x.data[i:j]
+ if err := d.Decode(&data); err != nil {
+ return err
+ }
+ if len(data) != j-i {
+ return os.NewError("suffixarray.Read: inconsistent data format")
+ }
+ // sa holds next piece of x.data; its length is updated by Decode
+ sa := x.sa[i:j]
+ if err := d.Decode(&sa); err != nil {
+ return err
+ }
+ if len(sa) != j-i {
+ return os.NewError("suffixarray.Read: inconsistent data format")
+ }
+ i = j
}
-
return nil
}
// Write writes the index x to w.
func (x *Index) Write(w io.Writer) os.Error {
- n := int32(len(x.data))
- if err := binary.Write(w, binary.LittleEndian, n); err != nil {
- return err
- }
- if err := binary.Write(w, binary.LittleEndian, x.data); err != nil {
+ e := gob.NewEncoder(w)
+ n := len(x.data)
+ if err := e.Encode(n); err != nil {
return err
}
- if err := binary.Write(w, binary.LittleEndian, x.sa); err != nil {
- return err
+ for i := 0; i < n; {
+ j := i + gobN
+ if j > n {
+ j = n
+ }
+ if err := e.Encode(x.data[i:j]); err != nil {
+ return err
+ }
+ if err := e.Encode(x.sa[i:j]); err != nil {
+ return err
+ }
+ i = j
}
return nil
}
import (
"bytes"
"exp/regexp"
+ "rand"
"sort"
"strings"
"testing"
testLookups(t, &tc, x, -1)
}
}
+
+func BenchmarkSaveRestore(b *testing.B) {
+ b.StopTimer()
+ r := rand.New(rand.NewSource(0x5a77a1)) // guarantee always same sequence
+ data := make([]byte, 10<<20) // 10MB index data
+ for i := range data {
+ data[i] = byte(r.Intn(256))
+ }
+ x := New(data)
+ testSaveRestore(nil, nil, x) // verify correctness
+ buf := bytes.NewBuffer(make([]byte, len(data))) // avoid frequent growing
+ b.StartTimer()
+ for i := 0; i < b.N; i++ {
+ x.Write(buf)
+ var y Index
+ y.Read(buf)
+ }
+}