From cf84e4f1d6b6210cf5a893556c9878f56082b37c6ced327404c22497de973c49 Mon Sep 17 00:00:00 2001 From: Sergey Matveev Date: Sun, 15 Dec 2024 19:26:37 +0300 Subject: [PATCH] Streaming BLOBs --- gyac/blob.go | 31 +++++++++-------------------- gyac/cmd/test-vector-anys/main.go | 11 ++++++----- gyac/dec.go | 9 +++++++-- gyac/enc.go | 33 +++++++++++++++---------------- 4 files changed, 38 insertions(+), 46 deletions(-) diff --git a/gyac/blob.go b/gyac/blob.go index 23e0d38..3aa4cda 100644 --- a/gyac/blob.go +++ b/gyac/blob.go @@ -15,31 +15,18 @@ package gyac -import "fmt" +import ( + "fmt" + "io" +) -// BLOB object, that keeps data splitted on ChunkLen chunks. +// BLOB object. You have to set its ChunkLen > 0 and data is read from R. type Blob struct { - Chunks [][]byte - ChunkLen int + R io.Reader + ChunkLen int + DecodedLen int64 // filled up after decoding } func (blob *Blob) String() string { - var l int - for _, chunk := range blob.Chunks { - l += len(chunk) - } - return fmt.Sprintf("BLOB(%d, %d)", blob.ChunkLen, l) -} - -func MakeBlob(chunkLen int, data []byte) (blob Blob) { - blob.ChunkLen = chunkLen - n := len(data) / chunkLen - for i := 0; i < n; i++ { - blob.Chunks = append(blob.Chunks, data[i*chunkLen:(i+1)*chunkLen]) - } - left := len(data) - n*chunkLen - if left > 0 { - blob.Chunks = append(blob.Chunks, data[len(data)-left:]) - } - return + return fmt.Sprintf("BLOB(%d, %d)", blob.ChunkLen, blob.DecodedLen) } diff --git a/gyac/cmd/test-vector-anys/main.go b/gyac/cmd/test-vector-anys/main.go index 03cfc9b..c307203 100644 --- a/gyac/cmd/test-vector-anys/main.go +++ b/gyac/cmd/test-vector-anys/main.go @@ -6,6 +6,7 @@ import ( "fmt" "log" "math/big" + "strings" "time" "github.com/google/uuid" @@ -79,15 +80,15 @@ func main() { "utf8": "привет мир", }, "blob": []any{ - gyac.MakeBlob(12, []byte{'5'}), - gyac.MakeBlob(12, bytes.Repeat([]byte{'6'}, 12)), - gyac.MakeBlob(12, bytes.Repeat([]byte{'7'}, 13)), - gyac.MakeBlob(5, []byte("1234567890-")), + gyac.Blob{ChunkLen: 12, R: strings.NewReader("5")}, + gyac.Blob{ChunkLen: 12, R: strings.NewReader(strings.Repeat("6", 12))}, + gyac.Blob{ChunkLen: 12, R: strings.NewReader(strings.Repeat("7", 13))}, + gyac.Blob{ChunkLen: 5, R: strings.NewReader("1234567890-")}, }, "empties": []any{ []any{}, map[string]any{}, - gyac.MakeBlob(123, []byte{}), + gyac.Blob{ChunkLen: 123, R: bytes.NewReader(nil)}, uuid.Nil, atom.Raw{T: atom.TAI64, V: mustHexDec("0000000000000000")}, }, diff --git a/gyac/dec.go b/gyac/dec.go index 1ddafb5..5b51597 100644 --- a/gyac/dec.go +++ b/gyac/dec.go @@ -16,6 +16,7 @@ package gyac import ( + "bytes" "errors" "io" @@ -150,6 +151,7 @@ func decode( v := Blob{ChunkLen: chunkLen} var sub Item var subRead int64 + var chunks []io.Reader BlobCycle: for { sub, subRead, err = decode(r, false, recursionDepth+1) @@ -165,7 +167,8 @@ func decode( if err != nil { return } - v.Chunks = append(v.Chunks, buf) + chunks = append(chunks, bytes.NewReader(buf)) + v.DecodedLen += int64(chunkLen) case types.Bin: b := sub.V.([]byte) if len(b) >= chunkLen { @@ -173,7 +176,8 @@ func decode( return } if len(b) != 0 { - v.Chunks = append(v.Chunks, b) + chunks = append(chunks, bytes.NewReader(b)) + v.DecodedLen += int64(len(b)) } break BlobCycle default: @@ -181,6 +185,7 @@ func decode( return } } + v.R = io.MultiReader(chunks...) item.V = v return } diff --git a/gyac/enc.go b/gyac/enc.go index 6565077..6506fb3 100644 --- a/gyac/enc.go +++ b/gyac/enc.go @@ -92,27 +92,26 @@ func (item Item) Encode(w io.Writer) (written int64, err error) { if err != nil { return } - var n int64 - for _, chunk := range blob.Chunks { - if len(chunk) == blob.ChunkLen { - n, err = atom.ChunkEncode(w, chunk) - written += n - if err != nil { - return + chunk := make([]byte, blob.ChunkLen) + var n int + var n64 int64 + for { + n, err = io.ReadFull(blob.R, chunk) + if err != nil { + if err == io.ErrUnexpectedEOF || err == io.EOF { + chunk = chunk[:n] + break } + return } - } - if len(blob.Chunks) == 0 { - n, err = atom.BinEncode(w, []byte{}) - } else { - last := blob.Chunks[len(blob.Chunks)-1] - if len(last) == blob.ChunkLen { - n, err = atom.BinEncode(w, []byte{}) - } else { - n, err = atom.BinEncode(w, last) + n64, err = atom.ChunkEncode(w, chunk) + written += n64 + if err != nil { + return } } - written += n + n64, err = atom.BinEncode(w, chunk) + written += n64 case types.TAI64: return atom.TAI64Encode(w, item.V.([]byte)) case types.Bin: -- 2.48.1