]> Cypherpunks repositories - gostls13.git/commitdiff
os: use handle based APIs to read directories on windows
authorqmuntal <quimmuntal@gmail.com>
Tue, 22 Nov 2022 17:46:35 +0000 (18:46 +0100)
committerQuim Muntal <quimmuntal@gmail.com>
Tue, 24 Jan 2023 13:26:00 +0000 (13:26 +0000)
This CL updates File.readdir() on windows so it uses
GetFileInformationByHandleEx with FILE_ID_BOTH_DIR_INFO
instead of Find* APIs. The former is more performant because
it allows us to buffer IO calls and reduces the number of system calls,
passing from 1 per file to 1 every ~100 files
(depending on the size of the file name and the size of the buffer).

This change improve performance of File.ReadDir by 20-30%.

name        old time/op    new time/op    delta
ReadDir-12     562µs ±14%     385µs ± 9%  -31.60%  (p=0.000 n=9+9)

name        old alloc/op   new alloc/op   delta
ReadDir-12    29.7kB ± 0%    29.5kB ± 0%   -0.88%  (p=0.000 n=8+10)

name        old allocs/op  new allocs/op  delta
ReadDir-12       399 ± 0%       397 ± 0%   -0.50%  (p=0.000 n=10+10)

This change also speeds up calls to os.SameFile when using FileStats
returned from File.readdir(), as their file ID can be inferred while
reading the directory.

Change-Id: Id56a338ee66c39656b564105cac131099218fb5d
Reviewed-on: https://go-review.googlesource.com/c/go/+/452995
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Damien Neil <dneil@google.com>
Reviewed-by: Alex Brainman <alex.brainman@gmail.com>
Run-TryBot: Quim Muntal <quimmuntal@gmail.com>
Reviewed-by: Bryan Mills <bcmills@google.com>
src/internal/syscall/windows/syscall_windows.go
src/internal/syscall/windows/zsyscall_windows.go
src/os/dir_windows.go
src/os/file_windows.go
src/os/types_windows.go

index 8ace2a27e79fcb5f84fa87b959469468050de263..311d083f45c14014343afebfc6a1c0c0736bb9ea 100644 (file)
@@ -367,3 +367,23 @@ func LoadGetFinalPathNameByHandle() error {
 //sys  DestroyEnvironmentBlock(block *uint16) (err error) = userenv.DestroyEnvironmentBlock
 
 //sys  RtlGenRandom(buf []byte) (err error) = advapi32.SystemFunction036
+
+type FILE_ID_BOTH_DIR_INFO struct {
+       NextEntryOffset uint32
+       FileIndex       uint32
+       CreationTime    syscall.Filetime
+       LastAccessTime  syscall.Filetime
+       LastWriteTime   syscall.Filetime
+       ChangeTime      syscall.Filetime
+       EndOfFile       uint64
+       AllocationSize  uint64
+       FileAttributes  uint32
+       FileNameLength  uint32
+       EaSize          uint32
+       ShortNameLength uint32
+       ShortName       [12]uint16
+       FileID          uint64
+       FileName        [1]uint16
+}
+
+//sys  GetVolumeInformationByHandle(file syscall.Handle, volumeNameBuffer *uint16, volumeNameSize uint32, volumeNameSerialNumber *uint32, maximumComponentLength *uint32, fileSystemFlags *uint32, fileSystemNameBuffer *uint16, fileSystemNameSize uint32) (err error) = GetVolumeInformationByHandleW
index afd64e318e516daea2e2d429bd172a934b252200..fd2f255b9b4f9146b1c90b483b5d6e2c79b02f33 100644 (file)
@@ -45,38 +45,39 @@ var (
        moduserenv  = syscall.NewLazyDLL(sysdll.Add("userenv.dll"))
        modws2_32   = syscall.NewLazyDLL(sysdll.Add("ws2_32.dll"))
 
-       procAdjustTokenPrivileges        = modadvapi32.NewProc("AdjustTokenPrivileges")
-       procDuplicateTokenEx             = modadvapi32.NewProc("DuplicateTokenEx")
-       procImpersonateSelf              = modadvapi32.NewProc("ImpersonateSelf")
-       procLookupPrivilegeValueW        = modadvapi32.NewProc("LookupPrivilegeValueW")
-       procOpenThreadToken              = modadvapi32.NewProc("OpenThreadToken")
-       procRevertToSelf                 = modadvapi32.NewProc("RevertToSelf")
-       procSetTokenInformation          = modadvapi32.NewProc("SetTokenInformation")
-       procSystemFunction036            = modadvapi32.NewProc("SystemFunction036")
-       procGetAdaptersAddresses         = modiphlpapi.NewProc("GetAdaptersAddresses")
-       procGetACP                       = modkernel32.NewProc("GetACP")
-       procGetComputerNameExW           = modkernel32.NewProc("GetComputerNameExW")
-       procGetConsoleCP                 = modkernel32.NewProc("GetConsoleCP")
-       procGetCurrentThread             = modkernel32.NewProc("GetCurrentThread")
-       procGetFileInformationByHandleEx = modkernel32.NewProc("GetFileInformationByHandleEx")
-       procGetFinalPathNameByHandleW    = modkernel32.NewProc("GetFinalPathNameByHandleW")
-       procGetModuleFileNameW           = modkernel32.NewProc("GetModuleFileNameW")
-       procLockFileEx                   = modkernel32.NewProc("LockFileEx")
-       procModule32FirstW               = modkernel32.NewProc("Module32FirstW")
-       procModule32NextW                = modkernel32.NewProc("Module32NextW")
-       procMoveFileExW                  = modkernel32.NewProc("MoveFileExW")
-       procMultiByteToWideChar          = modkernel32.NewProc("MultiByteToWideChar")
-       procSetFileInformationByHandle   = modkernel32.NewProc("SetFileInformationByHandle")
-       procUnlockFileEx                 = modkernel32.NewProc("UnlockFileEx")
-       procVirtualQuery                 = modkernel32.NewProc("VirtualQuery")
-       procNetShareAdd                  = modnetapi32.NewProc("NetShareAdd")
-       procNetShareDel                  = modnetapi32.NewProc("NetShareDel")
-       procNetUserGetLocalGroups        = modnetapi32.NewProc("NetUserGetLocalGroups")
-       procGetProcessMemoryInfo         = modpsapi.NewProc("GetProcessMemoryInfo")
-       procCreateEnvironmentBlock       = moduserenv.NewProc("CreateEnvironmentBlock")
-       procDestroyEnvironmentBlock      = moduserenv.NewProc("DestroyEnvironmentBlock")
-       procGetProfilesDirectoryW        = moduserenv.NewProc("GetProfilesDirectoryW")
-       procWSASocketW                   = modws2_32.NewProc("WSASocketW")
+       procAdjustTokenPrivileges         = modadvapi32.NewProc("AdjustTokenPrivileges")
+       procDuplicateTokenEx              = modadvapi32.NewProc("DuplicateTokenEx")
+       procImpersonateSelf               = modadvapi32.NewProc("ImpersonateSelf")
+       procLookupPrivilegeValueW         = modadvapi32.NewProc("LookupPrivilegeValueW")
+       procOpenThreadToken               = modadvapi32.NewProc("OpenThreadToken")
+       procRevertToSelf                  = modadvapi32.NewProc("RevertToSelf")
+       procSetTokenInformation           = modadvapi32.NewProc("SetTokenInformation")
+       procSystemFunction036             = modadvapi32.NewProc("SystemFunction036")
+       procGetAdaptersAddresses          = modiphlpapi.NewProc("GetAdaptersAddresses")
+       procGetACP                        = modkernel32.NewProc("GetACP")
+       procGetComputerNameExW            = modkernel32.NewProc("GetComputerNameExW")
+       procGetConsoleCP                  = modkernel32.NewProc("GetConsoleCP")
+       procGetCurrentThread              = modkernel32.NewProc("GetCurrentThread")
+       procGetFileInformationByHandleEx  = modkernel32.NewProc("GetFileInformationByHandleEx")
+       procGetFinalPathNameByHandleW     = modkernel32.NewProc("GetFinalPathNameByHandleW")
+       procGetModuleFileNameW            = modkernel32.NewProc("GetModuleFileNameW")
+       procGetVolumeInformationByHandleW = modkernel32.NewProc("GetVolumeInformationByHandleW")
+       procLockFileEx                    = modkernel32.NewProc("LockFileEx")
+       procModule32FirstW                = modkernel32.NewProc("Module32FirstW")
+       procModule32NextW                 = modkernel32.NewProc("Module32NextW")
+       procMoveFileExW                   = modkernel32.NewProc("MoveFileExW")
+       procMultiByteToWideChar           = modkernel32.NewProc("MultiByteToWideChar")
+       procSetFileInformationByHandle    = modkernel32.NewProc("SetFileInformationByHandle")
+       procUnlockFileEx                  = modkernel32.NewProc("UnlockFileEx")
+       procVirtualQuery                  = modkernel32.NewProc("VirtualQuery")
+       procNetShareAdd                   = modnetapi32.NewProc("NetShareAdd")
+       procNetShareDel                   = modnetapi32.NewProc("NetShareDel")
+       procNetUserGetLocalGroups         = modnetapi32.NewProc("NetUserGetLocalGroups")
+       procGetProcessMemoryInfo          = modpsapi.NewProc("GetProcessMemoryInfo")
+       procCreateEnvironmentBlock        = moduserenv.NewProc("CreateEnvironmentBlock")
+       procDestroyEnvironmentBlock       = moduserenv.NewProc("DestroyEnvironmentBlock")
+       procGetProfilesDirectoryW         = moduserenv.NewProc("GetProfilesDirectoryW")
+       procWSASocketW                    = modws2_32.NewProc("WSASocketW")
 )
 
 func adjustTokenPrivileges(token syscall.Token, disableAllPrivileges bool, newstate *TOKEN_PRIVILEGES, buflen uint32, prevstate *TOKEN_PRIVILEGES, returnlen *uint32) (ret uint32, err error) {
@@ -219,6 +220,14 @@ func GetModuleFileName(module syscall.Handle, fn *uint16, len uint32) (n uint32,
        return
 }
 
+func GetVolumeInformationByHandle(file syscall.Handle, volumeNameBuffer *uint16, volumeNameSize uint32, volumeNameSerialNumber *uint32, maximumComponentLength *uint32, fileSystemFlags *uint32, fileSystemNameBuffer *uint16, fileSystemNameSize uint32) (err error) {
+       r1, _, e1 := syscall.Syscall9(procGetVolumeInformationByHandleW.Addr(), 8, uintptr(file), uintptr(unsafe.Pointer(volumeNameBuffer)), uintptr(volumeNameSize), uintptr(unsafe.Pointer(volumeNameSerialNumber)), uintptr(unsafe.Pointer(maximumComponentLength)), uintptr(unsafe.Pointer(fileSystemFlags)), uintptr(unsafe.Pointer(fileSystemNameBuffer)), uintptr(fileSystemNameSize), 0)
+       if r1 == 0 {
+               err = errnoErr(e1)
+       }
+       return
+}
+
 func LockFileEx(file syscall.Handle, flags uint32, reserved uint32, bytesLow uint32, bytesHigh uint32, overlapped *syscall.Overlapped) (err error) {
        r1, _, e1 := syscall.Syscall6(procLockFileEx.Addr(), 6, uintptr(file), uintptr(flags), uintptr(reserved), uintptr(bytesLow), uintptr(bytesHigh), uintptr(unsafe.Pointer(overlapped)))
        if r1 == 0 {
index 445e4f7c4ff66858060cc7b7dab4f1614cd65eb1..ab120546c0a978bd9c768fb6ba97cd75863d6c5a 100644 (file)
 package os
 
 import (
+       "internal/syscall/windows"
        "io"
        "runtime"
+       "sync"
        "syscall"
+       "unicode/utf16"
+       "unsafe"
 )
 
+// Auxiliary information if the File describes a directory
+type dirInfo struct {
+       // buf is a slice pointer so the slice header
+       // does not escape to the heap when returning
+       // buf to dirBufPool.
+       buf  *[]byte // buffer for directory I/O
+       bufp int     // location of next record in buf
+       vol  uint32
+}
+
+const (
+       // dirBufSize is the size of the dirInfo buffer.
+       // The buffer must be big enough to hold at least a single entry.
+       // The filename alone can be 512 bytes (MAX_PATH*2), and the fixed part of
+       // the FILE_ID_BOTH_DIR_INFO structure is 105 bytes, so dirBufSize
+       // should not be set below 1024 bytes (512+105+safety buffer).
+       // Windows 8.1 and earlier only works with buffer sizes up to 64 kB.
+       dirBufSize = 64 * 1024 // 64kB
+)
+
+var dirBufPool = sync.Pool{
+       New: func() any {
+               // The buffer must be at least a block long.
+               buf := make([]byte, dirBufSize)
+               return &buf
+       },
+}
+
+func (d *dirInfo) close() {
+       if d.buf != nil {
+               dirBufPool.Put(d.buf)
+               d.buf = nil
+       }
+}
+
 func (file *File) readdir(n int, mode readdirMode) (names []string, dirents []DirEntry, infos []FileInfo, err error) {
        // If this file has no dirinfo, create one.
-       needdata := true
+       var infoClass uint32 = windows.FileIdBothDirectoryInfo
        if file.dirinfo == nil {
-               needdata = false
-               file.dirinfo, err = openDir(file.name)
+               // vol is used by os.SameFile.
+               // It is safe to query it once and reuse the value.
+               // Hard links are not allowed to reference files in other volumes.
+               // Junctions and symbolic links can reference files and directories in other volumes,
+               // but the reparse point should still live in the parent volume.
+               var vol uint32
+               err = windows.GetVolumeInformationByHandle(file.pfd.Sysfd, nil, 0, &vol, nil, nil, nil, 0)
+               runtime.KeepAlive(file)
                if err != nil {
                        err = &PathError{Op: "readdir", Path: file.name, Err: err}
                        return
                }
+               infoClass = windows.FileIdBothDirectoryRestartInfo
+               file.dirinfo = new(dirInfo)
+               file.dirinfo.buf = dirBufPool.Get().(*[]byte)
+               file.dirinfo.vol = vol
        }
+       d := file.dirinfo
        wantAll := n <= 0
        if wantAll {
                n = -1
        }
-       d := &file.dirinfo.data
-       for n != 0 && !file.dirinfo.isempty {
-               if needdata {
-                       e := syscall.FindNextFile(file.dirinfo.h, d)
+       for n != 0 {
+               // Refill the buffer if necessary
+               if d.bufp == 0 {
+                       err = windows.GetFileInformationByHandleEx(file.pfd.Sysfd, infoClass, (*byte)(unsafe.Pointer(&(*d.buf)[0])), uint32(len(*d.buf)))
                        runtime.KeepAlive(file)
-                       if e != nil {
-                               if e == syscall.ERROR_NO_MORE_FILES {
+                       if err != nil {
+                               if err == syscall.ERROR_NO_MORE_FILES {
                                        break
+                               }
+                               if s, _ := file.Stat(); s != nil && !s.IsDir() {
+                                       err = &PathError{Op: "readdir", Path: file.name, Err: syscall.ENOTDIR}
                                } else {
-                                       err = &PathError{Op: "FindNextFile", Path: file.name, Err: e}
-                                       return
+                                       err = &PathError{Op: "GetFileInformationByHandleEx", Path: file.name, Err: err}
                                }
+                               return
                        }
+                       infoClass = windows.FileIdBothDirectoryInfo
                }
-               needdata = true
-               name := syscall.UTF16ToString(d.FileName[0:])
-               if name == "." || name == ".." { // Useless names
-                       continue
-               }
-               if mode == readdirName {
-                       names = append(names, name)
-               } else {
-                       f := newFileStatFromWin32finddata(d)
-                       f.name = name
-                       f.path = file.dirinfo.path
-                       f.appendNameToPath = true
-                       if mode == readdirDirEntry {
-                               dirents = append(dirents, dirEntry{f})
+               // Drain the buffer
+               var islast bool
+               for n != 0 && !islast {
+                       info := (*windows.FILE_ID_BOTH_DIR_INFO)(unsafe.Pointer(&(*d.buf)[d.bufp]))
+                       d.bufp += int(info.NextEntryOffset)
+                       islast = info.NextEntryOffset == 0
+                       if islast {
+                               d.bufp = 0
+                       }
+                       nameslice := unsafe.Slice(&info.FileName[0], info.FileNameLength/2)
+                       name := string(utf16.Decode(nameslice))
+                       if name == "." || name == ".." { // Useless names
+                               continue
+                       }
+                       if mode == readdirName {
+                               names = append(names, name)
                        } else {
-                               infos = append(infos, f)
+                               f := newFileStatFromFileIDBothDirInfo(info)
+                               f.name = name
+                               f.vol = d.vol
+                               // f.path is used by os.SameFile to decide if it needs
+                               // to fetch vol, idxhi and idxlo. But these are already set,
+                               // so set f.path to "" to prevent os.SameFile doing it again.
+                               f.path = ""
+                               if mode == readdirDirEntry {
+                                       dirents = append(dirents, dirEntry{f})
+                               } else {
+                                       infos = append(infos, f)
+                               }
                        }
+                       n--
                }
-               n--
        }
        if !wantAll && len(names)+len(dirents)+len(infos) == 0 {
                return nil, nil, nil, io.EOF
index d94b78f52478d1926ac52b87fbe0695c94045f56..a48feca855a3afa58131dab8f3cf75fab62cb92e 100644 (file)
@@ -84,18 +84,6 @@ func NewFile(fd uintptr, name string) *File {
        return newFile(h, name, "file")
 }
 
-// Auxiliary information if the File describes a directory
-type dirInfo struct {
-       h       syscall.Handle // search handle created with FindFirstFile
-       data    syscall.Win32finddata
-       path    string
-       isempty bool // set if FindFirstFile returns ERROR_FILE_NOT_FOUND
-}
-
-func (d *dirInfo) close() error {
-       return syscall.FindClose(d.h)
-}
-
 func epipecheck(file *File, e error) {
 }
 
@@ -103,63 +91,6 @@ func epipecheck(file *File, e error) {
 // On Unix-like systems, it is "/dev/null"; on Windows, "NUL".
 const DevNull = "NUL"
 
-func openDir(name string) (d *dirInfo, e error) {
-       var mask string
-
-       path := fixLongPath(name)
-
-       if len(path) == 2 && path[1] == ':' { // it is a drive letter, like C:
-               mask = path + `*`
-       } else if len(path) > 0 {
-               lc := path[len(path)-1]
-               if lc == '/' || lc == '\\' {
-                       mask = path + `*`
-               } else {
-                       mask = path + `\*`
-               }
-       } else {
-               mask = `\*`
-       }
-       maskp, e := syscall.UTF16PtrFromString(mask)
-       if e != nil {
-               return nil, e
-       }
-       d = new(dirInfo)
-       d.h, e = syscall.FindFirstFile(maskp, &d.data)
-       if e != nil {
-               // FindFirstFile returns ERROR_FILE_NOT_FOUND when
-               // no matching files can be found. Then, if directory
-               // exists, we should proceed.
-               // If FindFirstFile failed because name does not point
-               // to a directory, we should return ENOTDIR.
-               var fa syscall.Win32FileAttributeData
-               pathp, e1 := syscall.UTF16PtrFromString(path)
-               if e1 != nil {
-                       return nil, e
-               }
-               e1 = syscall.GetFileAttributesEx(pathp, syscall.GetFileExInfoStandard, (*byte)(unsafe.Pointer(&fa)))
-               if e1 != nil {
-                       return nil, e
-               }
-               if fa.FileAttributes&syscall.FILE_ATTRIBUTE_DIRECTORY == 0 {
-                       return nil, syscall.ENOTDIR
-               }
-               if e != syscall.ERROR_FILE_NOT_FOUND {
-                       return nil, e
-               }
-               d.isempty = true
-       }
-       d.path = path
-       if !isAbs(d.path) {
-               d.path, e = syscall.FullPath(d.path)
-               if e != nil {
-                       d.close()
-                       return nil, e
-               }
-       }
-       return d, nil
-}
-
 // openFileNolog is the Windows implementation of OpenFile.
 func openFileNolog(name string, flag int, perm FileMode) (*File, error) {
        if name == "" {
index 9a3d508783940ab64a718abd72b124cee17d8d89..d1623f7b17edc645e27ff26ad822fc0d23eb869e 100644 (file)
@@ -16,7 +16,7 @@ import (
 type fileStat struct {
        name string
 
-       // from ByHandleFileInformation, Win32FileAttributeData and Win32finddata
+       // from ByHandleFileInformation, Win32FileAttributeData, Win32finddata, and GetFileInformationByHandleEx
        FileAttributes uint32
        CreationTime   syscall.Filetime
        LastAccessTime syscall.Filetime
@@ -24,7 +24,7 @@ type fileStat struct {
        FileSizeHigh   uint32
        FileSizeLow    uint32
 
-       // from Win32finddata
+       // from Win32finddata and GetFileInformationByHandleEx
        ReparseTag uint32
 
        // what syscall.GetFileType returns
@@ -32,11 +32,10 @@ type fileStat struct {
 
        // used to implement SameFile
        sync.Mutex
-       path             string
-       vol              uint32
-       idxhi            uint32
-       idxlo            uint32
-       appendNameToPath bool
+       path  string
+       vol   uint32
+       idxhi uint32
+       idxlo uint32
 }
 
 // newFileStatFromGetFileInformationByHandle calls GetFileInformationByHandle
@@ -80,6 +79,26 @@ func newFileStatFromGetFileInformationByHandle(path string, h syscall.Handle) (f
        }, nil
 }
 
+// newFileStatFromFileIDBothDirInfo copies all required information
+// from windows.FILE_ID_BOTH_DIR_INFO d into the newly created fileStat.
+func newFileStatFromFileIDBothDirInfo(d *windows.FILE_ID_BOTH_DIR_INFO) *fileStat {
+       // The FILE_ID_BOTH_DIR_INFO MSDN documentations isn't completely correct.
+       // FileAttributes can contain any file attributes that is currently set on the file,
+       // not just the ones documented.
+       // EaSize contains the reparse tag if the file is a reparse point.
+       return &fileStat{
+               FileAttributes: d.FileAttributes,
+               CreationTime:   d.CreationTime,
+               LastAccessTime: d.LastAccessTime,
+               LastWriteTime:  d.LastWriteTime,
+               FileSizeHigh:   uint32(d.EndOfFile >> 32),
+               FileSizeLow:    uint32(d.EndOfFile),
+               ReparseTag:     d.EaSize,
+               idxhi:          uint32(d.FileID >> 32),
+               idxlo:          uint32(d.FileID),
+       }
+}
+
 // newFileStatFromWin32finddata copies all required information
 // from syscall.Win32finddata d into the newly created fileStat.
 func newFileStatFromWin32finddata(d *syscall.Win32finddata) *fileStat {
@@ -169,13 +188,7 @@ func (fs *fileStat) loadFileId() error {
                // already done
                return nil
        }
-       var path string
-       if fs.appendNameToPath {
-               path = fs.path + `\` + fs.name
-       } else {
-               path = fs.path
-       }
-       pathp, err := syscall.UTF16PtrFromString(path)
+       pathp, err := syscall.UTF16PtrFromString(fs.path)
        if err != nil {
                return err
        }