os, syscall: support ill-formed UTF-16 strings on Windows

author qmuntal <quimmuntal@gmail.com>

Fri, 5 May 2023 16:17:18 +0000 (18:17 +0200)

committer Quim Muntal <quimmuntal@gmail.com>

Mon, 15 May 2023 09:26:16 +0000 (09:26 +0000)
author qmuntal <quimmuntal@gmail.com>
Fri, 5 May 2023 16:17:18 +0000 (18:17 +0200)
committer Quim Muntal <quimmuntal@gmail.com>
Mon, 15 May 2023 09:26:16 +0000 (09:26 +0000)
diff --git a/src/internal/syscall/execenv/execenv_windows.go b/src/internal/syscall/execenv/execenv_windows.go

index 46ba12efc521b12fb05e6a36d96a08ce8eb10714..2a89ed1f58f1c42c04ec88b384cfc8d6436f77aa 100644 (file)
--- a/src/internal/syscall/execenv/execenv_windows.go
+++ b/src/internal/syscall/execenv/execenv_windows.go
@@ -9,7 +9,6 @@ package execenv
  import (
         "internal/syscall/windows"
         "syscall"
-       "unicode/utf16"
         "unsafe"
  )
  
@@ -41,7 +40,7 @@ func Default(sys *syscall.SysProcAttr) (env []string, err error) {
                 }
  
                 entry := unsafe.Slice(blockp, (uintptr(end)-uintptr(unsafe.Pointer(blockp)))/2)
-               env = append(env, string(utf16.Decode(entry)))
+               env = append(env, syscall.UTF16ToString(entry))
                 blockp = (*uint16)(unsafe.Add(end, size))
         }
         return
diff --git a/src/internal/syscall/windows/registry/value.go b/src/internal/syscall/windows/registry/value.go

index 025574015f69e8ac7bd901268a7330bc07d54a8d..7dfee0330fe3515784780269f87710adba7000cc 100644 (file)
--- a/src/internal/syscall/windows/registry/value.go
+++ b/src/internal/syscall/windows/registry/value.go
@@ -217,7 +217,7 @@ func (k Key) GetStringsValue(name string) (val []string, valtype uint32, err err
         from := 0
         for i, c := range p {
                 if c == 0 {
-                       val = append(val, string(utf16.Decode(p[from:i])))
+                       val = append(val, syscall.UTF16ToString(p[from:i]))
                         from = i + 1
                 }
         }
diff --git a/src/internal/syscall/windows/syscall_windows.go b/src/internal/syscall/windows/syscall_windows.go

index cfe4695258bae3a66dfce42682bf4387f6c46328..53d32a14a01437665300fe61b000274980d26b65 100644 (file)
--- a/src/internal/syscall/windows/syscall_windows.go
+++ b/src/internal/syscall/windows/syscall_windows.go
@@ -7,7 +7,6 @@ package windows
  import (
         "sync"
         "syscall"
-       "unicode/utf16"
         "unsafe"
  )
  
@@ -17,17 +16,13 @@ func UTF16PtrToString(p *uint16) string {
         if p == nil {
                 return ""
         }
-       // Find NUL terminator.
         end := unsafe.Pointer(p)
         n := 0
         for *(*uint16)(end) != 0 {
                 end = unsafe.Pointer(uintptr(end) + unsafe.Sizeof(*p))
                 n++
         }
-       // Turn *uint16 into []uint16.
-       s := unsafe.Slice(p, n)
-       // Decode []uint16 into string.
-       return string(utf16.Decode(s))
+       return syscall.UTF16ToString(unsafe.Slice(p, n))
  }
  
  const (
diff --git a/src/os/dir_windows.go b/src/os/dir_windows.go

index cee05cc7292857296c5053456880e7339e635a4c..7792d030404a8997aee0c942fc706884f3849b09 100644 (file)
--- a/src/os/dir_windows.go
+++ b/src/os/dir_windows.go
@@ -11,7 +11,6 @@ import (
         "runtime"
         "sync"
         "syscall"
-       "unicode/utf16"
         "unsafe"
  )
  
@@ -104,7 +103,7 @@ func (file *File) readdir(n int, mode readdirMode) (names []string, dirents []Di
                                 d.bufp = 0
                         }
                         nameslice := unsafe.Slice(&info.FileName[0], info.FileNameLength/2)
-                       name := string(utf16.Decode(nameslice))
+                       name := syscall.UTF16ToString(nameslice)
                         if name == "." || name == ".." { // Useless names
                                 continue
                         }
diff --git a/src/os/exec/lp_windows_test.go b/src/os/exec/lp_windows_test.go

index 50d522948a7957df42576c9d5dd009d52f067920..4d85a5f41573e15d0782ab1827f1ee94e4162a8a 100644 (file)
--- a/src/os/exec/lp_windows_test.go
+++ b/src/os/exec/lp_windows_test.go
@@ -587,7 +587,6 @@ package main
  import (
         "os"
         "syscall"
-       "unicode/utf16"
         "unsafe"
  )
  
@@ -599,7 +598,7 @@ func getMyName() (string, error) {
         if n == 0 {
                 return "", err
         }
-       return string(utf16.Decode(b[0:n])), nil
+       return syscall.UTF16ToString(b[0:n]), nil
  }
  
  func main() {
diff --git a/src/os/file_windows.go b/src/os/file_windows.go

index f5a436e2353a3cf821ca0a29836f68c3296c3ea5..37db3f931c69c04a2934f70bba2c196b5bad1e4c 100644 (file)
--- a/src/os/file_windows.go
+++ b/src/os/file_windows.go
@@ -11,7 +11,6 @@ import (
         "runtime"
         "sync"
         "syscall"
-       "unicode/utf16"
         "unsafe"
  )
  
@@ -259,7 +258,7 @@ func tempDir() string {
                         // Otherwise remove terminating \.
                         n--
                 }
-               return string(utf16.Decode(b[:n]))
+               return syscall.UTF16ToString(b[:n])
         }
  }
  
diff --git a/src/os/os_windows_test.go b/src/os/os_windows_test.go

index 21a8c21d1e466b83a1cf2fcd123dad4ccf728766..fbc8cc1b9f8fdcd85fd970fd98e3c634720034a6 100644 (file)
--- a/src/os/os_windows_test.go
+++ b/src/os/os_windows_test.go
@@ -18,6 +18,7 @@ import (
         "path/filepath"
         "reflect"
         "runtime"
+       "slices"
         "sort"
         "strings"
         "syscall"
@@ -1377,3 +1378,71 @@ func TestAppExecLinkStat(t *testing.T) {
                 t.Errorf("exec.LookPath(%q) = %q; want %q", pythonPath, p, pythonPath)
         }
  }
+
+func TestIllformedUTF16FileName(t *testing.T) {
+       dir := t.TempDir()
+       const sep = string(os.PathSeparator)
+       if !strings.HasSuffix(dir, sep) {
+               dir += sep
+       }
+
+       // This UTF-16 file name is ill-formed as it contains low surrogates that are not preceded by high surrogates ([1:5]).
+       namew := []uint16{0x2e, 0xdc6d, 0xdc73, 0xdc79, 0xdc73, 0x30, 0x30, 0x30, 0x31, 0}
+
+       // Create a file whose name contains unpaired surrogates.
+       // Use syscall.CreateFile instead of os.Create to simulate a file that is created by
+       // a non-Go program so the file name hasn't gone through syscall.UTF16FromString.
+       dirw := utf16.Encode([]rune(dir))
+       pathw := append(dirw, namew...)
+       fd, err := syscall.CreateFile(&pathw[0], syscall.GENERIC_ALL, 0, nil, syscall.CREATE_NEW, 0, 0)
+       if err != nil {
+               t.Fatal(err)
+       }
+       syscall.CloseHandle(fd)
+
+       name := syscall.UTF16ToString(namew)
+       path := filepath.Join(dir, name)
+       // Verify that os.Lstat can query the file.
+       fi, err := os.Lstat(path)
+       if err != nil {
+               t.Fatal(err)
+       }
+       if got := fi.Name(); got != name {
+               t.Errorf("got %q, want %q", got, name)
+       }
+       // Verify that File.Readdirnames lists the file.
+       f, err := os.Open(dir)
+       if err != nil {
+               t.Fatal(err)
+       }
+       files, err := f.Readdirnames(0)
+       f.Close()
+       if err != nil {
+               t.Fatal(err)
+       }
+       if !slices.Contains(files, name) {
+               t.Error("file not listed")
+       }
+       // Verify that os.RemoveAll can remove the directory
+       // and that it doesn't hang.
+       err = os.RemoveAll(dir)
+       if err != nil {
+               t.Error(err)
+       }
+}
+
+func TestUTF16Alloc(t *testing.T) {
+       allowsPerRun := func(want int, f func()) {
+               t.Helper()
+               got := int(testing.AllocsPerRun(5, f))
+               if got != want {
+                       t.Errorf("got %d allocs, want %d", got, want)
+               }
+       }
+       allowsPerRun(1, func() {
+               syscall.UTF16ToString([]uint16{'a', 'b', 'c'})
+       })
+       allowsPerRun(1, func() {
+               syscall.UTF16FromString("abc")
+       })
+}
diff --git a/src/syscall/env_windows.go b/src/syscall/env_windows.go

index 94364f930c6fe41ba1269514c4ac409871e30f33..20d74b51e0107274f97b57a026af6d5b9eeb5127 100644 (file)
--- a/src/syscall/env_windows.go
+++ b/src/syscall/env_windows.go
@@ -7,7 +7,6 @@
  package syscall
  
  import (
-       "unicode/utf16"
         "unsafe"
  )
  
@@ -24,7 +23,7 @@ func Getenv(key string) (value string, found bool) {
                         return "", false
                 }
                 if n <= uint32(len(b)) {
-                       return string(utf16.Decode(b[:n])), true
+                       return UTF16ToString(b[:n]), true
                 }
         }
  }
@@ -90,7 +89,7 @@ func Environ() []string {
                 }
  
                 entry := unsafe.Slice(envp, (uintptr(end)-uintptr(unsafe.Pointer(envp)))/size)
-               r = append(r, string(utf16.Decode(entry)))
+               r = append(r, UTF16ToString(entry))
                 envp = (*uint16)(unsafe.Add(end, size))
         }
         return r
diff --git a/src/syscall/export_windows_test.go b/src/syscall/export_windows_test.go

index a72a1ee391f535dfbe00b014696a7459203553af..eccf1bccace4d9585a452504455638c5fa203f42 100644 (file)
--- a/src/syscall/export_windows_test.go
+++ b/src/syscall/export_windows_test.go
@@ -9,3 +9,6 @@ var UpdateProcThreadAttribute = updateProcThreadAttribute
  var DeleteProcThreadAttributeList = deleteProcThreadAttributeList
  
  const PROC_THREAD_ATTRIBUTE_HANDLE_LIST = _PROC_THREAD_ATTRIBUTE_HANDLE_LIST
+
+var EncodeWTF16 = encodeWTF16
+var DecodeWTF16 = decodeWTF16
diff --git a/src/syscall/syscall_windows.go b/src/syscall/syscall_windows.go

index 1f7753663b5aa0cd0251deecf8f046e91b6dbc2b..c3fa415832ed835ffe33d326b938ef7f91502513 100644 (file)
--- a/src/syscall/syscall_windows.go
+++ b/src/syscall/syscall_windows.go
@@ -14,7 +14,6 @@ import (
         "internal/race"
         "runtime"
         "sync"
-       "unicode/utf16"
         "unsafe"
  )
  
@@ -37,7 +36,8 @@ func StringToUTF16(s string) []uint16 {
  
  // UTF16FromString returns the UTF-16 encoding of the UTF-8 string
  // s, with a terminating NUL added. If s contains a NUL byte at any
-// location, it returns (nil, EINVAL).
+// location, it returns (nil, EINVAL). Unpaired surrogates
+// are encoded using WTF-8.
  func UTF16FromString(s string) ([]uint16, error) {
         if bytealg.IndexByteString(s, 0) != -1 {
                 return nil, EINVAL
@@ -49,22 +49,37 @@ func UTF16FromString(s string) ([]uint16, error) {
         // equal than the number of UTF-16 code units.
         // Also account for the terminating NUL character.
         buf := make([]uint16, 0, len(s)+1)
-       for _, r := range s {
-               buf = utf16.AppendRune(buf, r)
-       }
-       return utf16.AppendRune(buf, '\x00'), nil
+       buf = encodeWTF16(s, buf)
+       return append(buf, 0), nil
  }
  
  // UTF16ToString returns the UTF-8 encoding of the UTF-16 sequence s,
-// with a terminating NUL removed.
+// with a terminating NUL removed. Unpaired surrogates are decoded
+// using WTF-8 instead of UTF-8 encoding.
  func UTF16ToString(s []uint16) string {
+       maxLen := 0
         for i, v := range s {
                 if v == 0 {
                         s = s[0:i]
                         break
                 }
+               switch {
+               case v <= rune1Max:
+                       maxLen += 1
+               case v <= rune2Max:
+                       maxLen += 2
+               default:
+                       // r is a non-surrogate that decodes to 3 bytes,
+                       // or is an unpaired surrogate (also 3 bytes in WTF-8),
+                       // or is one half of a valid surrogate pair.
+                       // If it is half of a pair, we will add 3 for the second surrogate
+                       // (total of 6) and overestimate by 2 bytes for the pair,
+                       // since the resulting rune only requires 4 bytes.
+                       maxLen += 3
+               }
         }
-       return string(utf16.Decode(s))
+       buf := decodeWTF16(s, make([]byte, 0, maxLen))
+       return unsafe.String(unsafe.SliceData(buf), len(buf))
  }
  
  // utf16PtrToString is like UTF16ToString, but takes *uint16
@@ -73,17 +88,13 @@ func utf16PtrToString(p *uint16) string {
         if p == nil {
                 return ""
         }
-       // Find NUL terminator.
         end := unsafe.Pointer(p)
         n := 0
         for *(*uint16)(end) != 0 {
                 end = unsafe.Pointer(uintptr(end) + unsafe.Sizeof(*p))
                 n++
         }
-       // Turn *uint16 into []uint16.
-       s := unsafe.Slice(p, n)
-       // Decode []uint16 into string.
-       return string(utf16.Decode(s))
+       return UTF16ToString(unsafe.Slice(p, n))
  }
  
  // StringToUTF16Ptr returns pointer to the UTF-16 encoding of
@@ -97,6 +108,7 @@ func StringToUTF16Ptr(s string) *uint16 { return &StringToUTF16(s)[0] }
  // UTF16PtrFromString returns pointer to the UTF-16 encoding of
  // the UTF-8 string s, with a terminating NUL added. If s
  // contains a NUL byte at any location, it returns (nil, EINVAL).
+// Unpaired surrogates are encoded using WTF-8.
  func UTF16PtrFromString(s string) (*uint16, error) {
         a, err := UTF16FromString(s)
         if err != nil {
@@ -143,7 +155,7 @@ func (e Errno) Error() string {
         // trim terminating \r and \n
         for ; n > 0 && (b[n-1] == '\n' || b[n-1] == '\r'); n-- {
         }
-       return string(utf16.Decode(b[:n]))
+       return UTF16ToString(b[:n])
  }
  
  const (
@@ -525,7 +537,7 @@ func Getwd() (wd string, err error) {
         if e != nil {
                 return "", e
         }
-       return string(utf16.Decode(b[0:n])), nil
+       return UTF16ToString(b[0:n]), nil
  }
  
  func Chdir(path string) (err error) {
@@ -573,13 +585,13 @@ func Rename(oldpath, newpath string) (err error) {
  }
  
  func ComputerName() (name string, err error) {
-       var n uint32 = MAX_COMPUTERNAME_LENGTH + 1
-       b := make([]uint16, n)
+       b := make([]uint16, MAX_COMPUTERNAME_LENGTH+1)
+       var n uint32
         e := GetComputerName(&b[0], &n)
         if e != nil {
                 return "", e
         }
-       return string(utf16.Decode(b[0:n])), nil
+       return UTF16ToString(b[:n]), nil
  }
  
  func Ftruncate(fd Handle, length int64) (err error) {
diff --git a/src/syscall/wtf8_windows.go b/src/syscall/wtf8_windows.go

new file mode 100644 (file)

index 0000000..f166021
--- /dev/null
+++ b/src/syscall/wtf8_windows.go
@@ -0,0 +1,92 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Windows UTF-16 strings can contain unpaired surrogates, which can't be
+// decoded into a valid UTF-8 string. This file defines a set of functions
+// that can be used to encode and decode potentially ill-formed UTF-16 strings
+// by using the [the WTF-8 encoding](https://simonsapin.github.io/wtf-8/).
+//
+// WTF-8 is a strict superset of UTF-8, i.e. any string that is
+// well-formed in UTF-8 is also well-formed in WTF-8 and the content
+// is unchanged. Also, the conversion never fails and is lossless.
+//
+// The benefit of using WTF-8 instead of UTF-8 when decoding a UTF-16 string
+// is that the conversion is lossless even for ill-formed UTF-16 strings.
+// This property allows to read an ill-formed UTF-16 string, convert it
+// to a Go string, and convert it back to the same original UTF-16 string.
+//
+// See go.dev/issues/59971 for more info.
+
+package syscall
+
+import (
+       "unicode/utf16"
+       "unicode/utf8"
+)
+
+const (
+       surr1 = 0xd800
+       surr2 = 0xdc00
+       surr3 = 0xe000
+
+       tx    = 0b10000000
+       t3    = 0b11100000
+       maskx = 0b00111111
+       mask3 = 0b00001111
+
+       rune1Max = 1<<7 - 1
+       rune2Max = 1<<11 - 1
+)
+
+// encodeWTF16 returns the potentially ill-formed
+// UTF-16 encoding of s.
+func encodeWTF16(s string, buf []uint16) []uint16 {
+       for i := 0; i < len(s); {
+               // Cannot use 'for range s' because it expects valid
+               // UTF-8 runes.
+               r, size := utf8.DecodeRuneInString(s[i:])
+               if r == utf8.RuneError {
+                       // Check if s[i:] contains a valid WTF-8 encoded surrogate.
+                       if sc := s[i:]; len(sc) >= 3 && sc[0] == 0xED && 0xA0 <= sc[1] && sc[1] <= 0xBF && 0x80 <= sc[2] && sc[2] <= 0xBF {
+                               r = rune(sc[0]&mask3)<<12 + rune(sc[1]&maskx)<<6 + rune(sc[2]&maskx)
+                               buf = append(buf, uint16(r))
+                               i += 3
+                               continue
+                       }
+               }
+               i += size
+               buf = utf16.AppendRune(buf, r)
+       }
+       return buf
+}
+
+// decodeWTF16 returns the WTF-8 encoding of
+// the potentially ill-formed UTF-16 s.
+func decodeWTF16(s []uint16, buf []byte) []byte {
+       for i := 0; i < len(s); i++ {
+               var ar rune
+               switch r := s[i]; {
+               case r < surr1, surr3 <= r:
+                       // normal rune
+                       ar = rune(r)
+               case surr1 <= r && r < surr2 && i+1 < len(s) &&
+                       surr2 <= s[i+1] && s[i+1] < surr3:
+                       // valid surrogate sequence
+                       ar = utf16.DecodeRune(rune(r), rune(s[i+1]))
+                       i++
+               default:
+                       // WTF-8 fallback.
+                       // This only handles the 3-byte case of utf8.AppendRune,
+                       // as surrogates always fall in that case.
+                       ar = rune(r)
+                       if ar > utf8.MaxRune {
+                               ar = utf8.RuneError
+                       }
+                       buf = append(buf, t3|byte(ar>>12), tx|byte(ar>>6)&maskx, tx|byte(ar)&maskx)
+                       continue
+               }
+               buf = utf8.AppendRune(buf, ar)
+       }
+       return buf
+}
diff --git a/src/syscall/wtf8_windows_test.go b/src/syscall/wtf8_windows_test.go

new file mode 100644 (file)

index 0000000..077f718
--- /dev/null
+++ b/src/syscall/wtf8_windows_test.go
@@ -0,0 +1,200 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syscall_test
+
+import (
+       "fmt"
+       "slices"
+       "syscall"
+       "testing"
+       "unicode/utf16"
+       "unicode/utf8"
+       "unsafe"
+)
+
+var wtf8tests = []struct {
+       str  string
+       wstr []uint16
+}{
+       {
+               str:  "\x00",
+               wstr: []uint16{0x00},
+       },
+       {
+               str:  "\x5C",
+               wstr: []uint16{0x5C},
+       },
+       {
+               str:  "\x7F",
+               wstr: []uint16{0x7F},
+       },
+
+       // 2-byte
+       {
+               str:  "\xC2\x80",
+               wstr: []uint16{0x80},
+       },
+       {
+               str:  "\xD7\x8A",
+               wstr: []uint16{0x05CA},
+       },
+       {
+               str:  "\xDF\xBF",
+               wstr: []uint16{0x07FF},
+       },
+
+       // 3-byte
+       {
+               str:  "\xE0\xA0\x80",
+               wstr: []uint16{0x0800},
+       },
+       {
+               str:  "\xE2\xB0\xBC",
+               wstr: []uint16{0x2C3C},
+       },
+       {
+               str:  "\xEF\xBF\xBF",
+               wstr: []uint16{0xFFFF},
+       },
+       // unmatched surrogate halves
+       // high surrogates: 0xD800 to 0xDBFF
+       {
+               str:  "\xED\xA0\x80",
+               wstr: []uint16{0xD800},
+       },
+       {
+               // "High surrogate followed by another high surrogate"
+               str:  "\xED\xA0\x80\xED\xA0\x80",
+               wstr: []uint16{0xD800, 0xD800},
+       },
+       {
+               // "High surrogate followed by a symbol that is not a surrogate"
+               str:  string([]byte{0xED, 0xA0, 0x80, 0xA}),
+               wstr: []uint16{0xD800, 0xA},
+       },
+       {
+               // "Unmatched high surrogate, followed by a surrogate pair, followed by an unmatched high surrogate"
+               str:  string([]byte{0xED, 0xA0, 0x80, 0xF0, 0x9D, 0x8C, 0x86, 0xED, 0xA0, 0x80}),
+               wstr: []uint16{0xD800, 0xD834, 0xDF06, 0xD800},
+       },
+       {
+               str:  "\xED\xA6\xAF",
+               wstr: []uint16{0xD9AF},
+       },
+       {
+               str:  "\xED\xAF\xBF",
+               wstr: []uint16{0xDBFF},
+       },
+       // low surrogates: 0xDC00 to 0xDFFF
+       {
+               str:  "\xED\xB0\x80",
+               wstr: []uint16{0xDC00},
+       },
+       {
+               // "Low surrogate followed by another low surrogate"
+               str:  "\xED\xB0\x80\xED\xB0\x80",
+               wstr: []uint16{0xDC00, 0xDC00},
+       },
+       {
+               // "Low surrogate followed by a symbol that is not a surrogate"
+               str:  string([]byte{0xED, 0xB0, 0x80, 0xA}),
+               wstr: []uint16{0xDC00, 0xA},
+       },
+       {
+               // "Unmatched low surrogate, followed by a surrogate pair, followed by an unmatched low surrogate"
+               str:  string([]byte{0xED, 0xB0, 0x80, 0xF0, 0x9D, 0x8C, 0x86, 0xED, 0xB0, 0x80}),
+               wstr: []uint16{0xDC00, 0xD834, 0xDF06, 0xDC00},
+       },
+       {
+               str:  "\xED\xBB\xAE",
+               wstr: []uint16{0xDEEE},
+       },
+       {
+               str:  "\xED\xBF\xBF",
+               wstr: []uint16{0xDFFF},
+       },
+
+       // 4-byte
+       {
+               str:  "\xF0\x90\x80\x80",
+               wstr: []uint16{0xD800, 0xDC00},
+       },
+       {
+               str:  "\xF0\x9D\x8C\x86",
+               wstr: []uint16{0xD834, 0xDF06},
+       },
+       {
+               str:  "\xF4\x8F\xBF\xBF",
+               wstr: []uint16{0xDBFF, 0xDFFF},
+       },
+}
+
+func TestWTF16Rountrip(t *testing.T) {
+       for _, tt := range wtf8tests {
+               t.Run(fmt.Sprintf("%X", tt.str), func(t *testing.T) {
+                       got := syscall.EncodeWTF16(tt.str, nil)
+                       got2 := string(syscall.DecodeWTF16(got, nil))
+                       if got2 != tt.str {
+                               t.Errorf("got:\n%s\nwant:\n%s", got2, tt.str)
+                       }
+               })
+       }
+}
+
+func TestWTF16Golden(t *testing.T) {
+       for _, tt := range wtf8tests {
+               t.Run(fmt.Sprintf("%X", tt.str), func(t *testing.T) {
+                       got := syscall.EncodeWTF16(tt.str, nil)
+                       if !slices.Equal(got, tt.wstr) {
+                               t.Errorf("got:\n%v\nwant:\n%v", got, tt.wstr)
+                       }
+               })
+       }
+}
+
+func FuzzEncodeWTF16(f *testing.F) {
+       for _, tt := range wtf8tests {
+               f.Add(tt.str)
+       }
+       f.Fuzz(func(t *testing.T, b string) {
+               // test that there are no panics
+               got := syscall.EncodeWTF16(b, nil)
+               syscall.DecodeWTF16(got, nil)
+               if utf8.ValidString(b) {
+                       // if the input is a valid UTF-8 string, then
+                       // test that syscall.EncodeWTF16 behaves as
+                       // utf16.Encode
+                       want := utf16.Encode([]rune(b))
+                       if !slices.Equal(got, want) {
+                               t.Errorf("got:\n%v\nwant:\n%v", got, want)
+                       }
+               }
+       })
+}
+
+func FuzzDecodeWTF16(f *testing.F) {
+       for _, tt := range wtf8tests {
+               b := unsafe.Slice((*uint8)(unsafe.Pointer(unsafe.SliceData(tt.wstr))), len(tt.wstr)*2)
+               f.Add(b)
+       }
+       f.Fuzz(func(t *testing.T, b []byte) {
+               u16 := unsafe.Slice((*uint16)(unsafe.Pointer(unsafe.SliceData(b))), len(b)/2)
+               got := syscall.DecodeWTF16(u16, nil)
+               if utf8.Valid(got) {
+                       // if the input is a valid UTF-8 string, then
+                       // test that syscall.DecodeWTF16 behaves as
+                       // utf16.Decode
+                       want := utf16.Decode(u16)
+                       if string(got) != string(want) {
+                               t.Errorf("got:\n%s\nwant:\n%s", string(got), string(want))
+                       }
+               }
+               // WTF-8 should always roundtrip
+               got2 := syscall.EncodeWTF16(string(got), nil)
+               if !slices.Equal(got2, u16) {
+                       t.Errorf("got:\n%v\nwant:\n%v", got2, u16)
+               }
+       })
+}
author	qmuntal <quimmuntal@gmail.com>
	Fri, 5 May 2023 16:17:18 +0000 (18:17 +0200)
committer	Quim Muntal <quimmuntal@gmail.com>
	Mon, 15 May 2023 09:26:16 +0000 (09:26 +0000)
src/internal/syscall/execenv/execenv_windows.go		patch \| blob \| history
src/internal/syscall/windows/registry/value.go		patch \| blob \| history
src/internal/syscall/windows/syscall_windows.go		patch \| blob \| history
src/os/dir_windows.go		patch \| blob \| history
src/os/exec/lp_windows_test.go		patch \| blob \| history
src/os/file_windows.go		patch \| blob \| history
src/os/os_windows_test.go		patch \| blob \| history
src/syscall/env_windows.go		patch \| blob \| history
src/syscall/export_windows_test.go		patch \| blob \| history
src/syscall/syscall_windows.go		patch \| blob \| history
src/syscall/wtf8_windows.go	[new file with mode: 0644]	patch \| blob
src/syscall/wtf8_windows_test.go	[new file with mode: 0644]	patch \| blob