]> Cypherpunks repositories - gostls13.git/commitdiff
archive/zip: set utf-8 flag
authorYasuhiro Matsumoto <mattn.jp@gmail.com>
Wed, 5 Apr 2017 08:32:09 +0000 (17:32 +0900)
committerJoe Tsai <thebrokentoaster@gmail.com>
Fri, 26 May 2017 17:08:18 +0000 (17:08 +0000)
See: https://pkware.cachefly.net/webdocs/APPNOTE/APPNOTE-6.3.0.TXT

Document says:
> If general purpose bit 11 is set, the filename and comment must support The
> Unicode Standard, Version 4.1.0 or greater using the character encoding form
> defined by the UTF-8 storage specification.

Since Go encode the filename to UTF-8, general purpose bit 11 should be set.

Change-Id: Ica4af02b4dc695e9a5c015ae360e70171efb6ee3
Reviewed-on: https://go-review.googlesource.com/39570
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
Run-TryBot: Joe Tsai <thebrokentoaster@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>

src/archive/zip/writer.go
src/archive/zip/writer_test.go

index 8940e25560e4501703a739e1321421deba2cffec..f46a03c37e64fdc36b64257abf05fa90e82d1d87 100644 (file)
@@ -11,6 +11,7 @@ import (
        "hash"
        "hash/crc32"
        "io"
+       "unicode/utf8"
 )
 
 // TODO(adg): support zip file comments
@@ -201,6 +202,20 @@ func (w *Writer) Create(name string) (io.Writer, error) {
        return w.CreateHeader(header)
 }
 
+func hasValidUTF8(s string) bool {
+       n := 0
+       for _, r := range s {
+               // By default, ZIP uses CP437, which is only identical to ASCII for the printable characters.
+               if r < 0x20 || r >= 0x7f {
+                       if !utf8.ValidRune(r) {
+                               return false
+                       }
+                       n++
+               }
+       }
+       return n > 0
+}
+
 // CreateHeader adds a file to the zip file using the provided FileHeader
 // for the file metadata.
 // It returns a Writer to which the file contents should be written.
@@ -221,6 +236,10 @@ func (w *Writer) CreateHeader(fh *FileHeader) (io.Writer, error) {
 
        fh.Flags |= 0x8 // we will write a data descriptor
 
+       if hasValidUTF8(fh.Name) || hasValidUTF8(fh.Comment) {
+               fh.Flags |= 0x800 // filename or comment have valid utf-8 string
+       }
+
        fh.CreatorVersion = fh.CreatorVersion&0xff00 | zipVersion20 // preserve compatibility byte
        fh.ReaderVersion = zipVersion20
 
index 84b1d9e06294e924bb100febce689e37eebaba1b..92fb6ecf0ed11e7decce45b72b77058994259e73 100644 (file)
@@ -87,6 +87,69 @@ func TestWriter(t *testing.T) {
        }
 }
 
+func TestWriterUTF8(t *testing.T) {
+       var utf8Tests = []struct {
+               name    string
+               comment string
+               expect  uint16
+       }{
+               {
+                       name:    "hi, hello",
+                       comment: "in the world",
+                       expect:  0x8,
+               },
+               {
+                       name:    "hi, こんにちわ",
+                       comment: "in the world",
+                       expect:  0x808,
+               },
+               {
+                       name:    "hi, hello",
+                       comment: "in the 世界",
+                       expect:  0x808,
+               },
+               {
+                       name:    "hi, こんにちわ",
+                       comment: "in the 世界",
+                       expect:  0x808,
+               },
+       }
+
+       // write a zip file
+       buf := new(bytes.Buffer)
+       w := NewWriter(buf)
+
+       for _, test := range utf8Tests {
+               h := &FileHeader{
+                       Name:    test.name,
+                       Comment: test.comment,
+                       Method:  Deflate,
+               }
+               w, err := w.CreateHeader(h)
+               if err != nil {
+                       t.Fatal(err)
+               }
+               w.Write([]byte{})
+       }
+
+       if err := w.Close(); err != nil {
+               t.Fatal(err)
+       }
+
+       // read it back
+       r, err := NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len()))
+       if err != nil {
+               t.Fatal(err)
+       }
+       for i, test := range utf8Tests {
+               got := r.File[i].Flags
+               t.Logf("name %v, comment %v", test.name, test.comment)
+               if got != test.expect {
+                       t.Fatalf("Flags: got %v, want %v", got, test.expect)
+               }
+       }
+}
+
 func TestWriterOffset(t *testing.T) {
        largeData := make([]byte, 1<<17)
        for i := range largeData {