// must be considered UTF-8 encoding (i.e., not compatible with CP-437, ASCII,
// or any other common encoding).
func detectUTF8(s string) (valid, require bool) {
- for _, r := range s {
+ for i := 0; i < len(s); {
+ r, size := utf8.DecodeRuneInString(s[i:])
+ i += size
// Officially, ZIP uses CP-437, but many readers use the system's
// local character encoding. Most encoding are compatible with a large
// subset of CP-437, which itself is ASCII-like.
// Forbid 0x7e and 0x5c since EUC-KR and Shift-JIS replace those
// characters with localized currency and overline characters.
if r < 0x20 || r > 0x7d || r == 0x5c {
- if !utf8.ValidRune(r) || r == utf8.RuneError {
+ if !utf8.ValidRune(r) || (r == utf8.RuneError && size == 1) {
return false, false
}
require = true
var utf8Tests = []struct {
name string
comment string
- expect uint16
nonUTF8 bool
+ flags uint16
}{
{
name: "hi, hello",
comment: "in the world",
- expect: 0x8,
+ flags: 0x8,
},
{
name: "hi, こんにちわ",
comment: "in the world",
- expect: 0x808,
+ flags: 0x808,
},
{
name: "hi, こんにちわ",
comment: "in the world",
nonUTF8: true,
- expect: 0x8,
+ flags: 0x8,
},
{
name: "hi, hello",
comment: "in the 世界",
- expect: 0x808,
+ flags: 0x808,
},
{
name: "hi, こんにちわ",
comment: "in the 世界",
- expect: 0x808,
+ flags: 0x808,
+ },
+ {
+ name: "the replacement rune is �",
+ comment: "the replacement rune is �",
+ flags: 0x808,
},
{
// Name is Japanese encoded in Shift JIS.
name: "\x93\xfa\x96{\x8c\xea.txt",
comment: "in the 世界",
- expect: 0x008, // UTF-8 must not be set
+ flags: 0x008, // UTF-8 must not be set
},
}
t.Fatal(err)
}
for i, test := range utf8Tests {
- got := r.File[i].Flags
- t.Logf("name %v, comment %v", test.name, test.comment)
- if got != test.expect {
- t.Fatalf("Flags: got %v, want %v", got, test.expect)
+ flags := r.File[i].Flags
+ if flags != test.flags {
+ t.Errorf("CreateHeader(name=%q comment=%q nonUTF8=%v): flags=%#x, want %#x", test.name, test.comment, test.nonUTF8, flags, test.flags)
}
}
}