Use substitute names to obtain the target of a reparse point.
The print name can be empty for some reparse points (e.g. mount points created by Box cloud storage driver and directory junctions created by junction.exe). It is supposed to be mostly used for presenting a "simple" path to the user and not to actually locate the file. The substitute name is the actionable replacement path, but it is in NT path format and can potentially point to unmounted volumes and UNC resources. The implementation attempts to convert the NT path to Win32 path by matching commonly known patterns against the NT path. If no pattern matches, we create a Win32 path by converting the NT path prefix to "\\?\". Related to https://github.com/boostorg/filesystem/issues/187.
This commit is contained in:
parent
16bd89b7c0
commit
a252f15f06
@ -53,6 +53,7 @@
|
||||
<li><b>New:</b> Added <code>weakly_canonical</code> overloads taking <code>base</code> path as an argument.</li>
|
||||
<li><b>Breaking change:</b><code>path::filename</code>, <code>path::stem</code> and <code>path::extension</code> no longer consider root name of the path as a filename if the path only consists of a root name. For example, <code>path("C:").filename()</code> used to return "C:" on Windows and will return an empty path now.</li>
|
||||
<li><b>New:</b> Improved support for various path prefixes on Windows. Added support for local device prefix ("\\.\") and experimental support for NT path prefix ("\??\"). The prefixes will be included in the root name of a path.</li>
|
||||
<li>In <code>read_symlink</code> on Windows, corrected reparse point handling. The operation would return an empty path for some mount points (for example, created by <a href="https://www.box.com/">Box</a> cloud storage driver) and directory junction points that had empty print names. The new implementation now parses substitute name of the reparse point and attempts to reconstruct a Win32 path from it. (<a href="https://github.com/boostorg/filesystem/issues/187">#187</a>)</li>
|
||||
<li>Reworked <code>path::lexically_normal</code> implementation to eliminate some cases of duplicate dot (".") elements in the normalized paths.</li>
|
||||
</ul>
|
||||
|
||||
|
@ -201,14 +201,32 @@ typedef struct _REPARSE_DATA_BUFFER
|
||||
USHORT Reserved;
|
||||
union
|
||||
{
|
||||
/*
|
||||
* In SymbolicLink and MountPoint reparse points, there are two names.
|
||||
* SubstituteName is the effective replacement path for the reparse point.
|
||||
* This is what should be used for path traversal.
|
||||
* PrintName is intended for presentation to the user and may omit some
|
||||
* elements of the path or be absent entirely.
|
||||
*
|
||||
* Examples of substitute and print names:
|
||||
* mklink /D ldrive c:\
|
||||
* SubstituteName: "\??\c:\"
|
||||
* PrintName: "c:\"
|
||||
*
|
||||
* mklink /J ldrive c:\
|
||||
* SubstituteName: "\??\C:\"
|
||||
* PrintName: "c:\"
|
||||
*
|
||||
* junction ldrive c:\
|
||||
* SubstituteName: "\??\C:\"
|
||||
* PrintName: ""
|
||||
*
|
||||
* box.com mounted cloud storage
|
||||
* SubstituteName: "\??\Volume{<UUID>}\"
|
||||
* PrintName: ""
|
||||
*/
|
||||
struct
|
||||
{
|
||||
/*
|
||||
* Example of distinction between substitute and print names:
|
||||
* mklink /d ldrive c:\
|
||||
* SubstituteName: c:\\??\
|
||||
* PrintName: c:\
|
||||
*/
|
||||
USHORT SubstituteNameOffset;
|
||||
USHORT SubstituteNameLength;
|
||||
USHORT PrintNameOffset;
|
||||
@ -419,8 +437,6 @@ uintmax_t remove_all_aux(path const& p, fs::file_type type, error_code* ec)
|
||||
// //
|
||||
//--------------------------------------------------------------------------------------//
|
||||
|
||||
BOOST_CONSTEXPR_OR_CONST char dot = '.';
|
||||
|
||||
#if !defined(BOOST_FILESYSTEM_HAS_STATX) && defined(BOOST_FILESYSTEM_HAS_STATX_SYSCALL)
|
||||
//! A wrapper for the statx syscall
|
||||
inline int statx(int dirfd, const char* path, int flags, unsigned int mask, struct ::statx* stx) BOOST_NOEXCEPT
|
||||
@ -851,7 +867,7 @@ inline fs::file_type query_file_type(path const& p, error_code* ec)
|
||||
return fs::detail::symlink_status(p, ec).type();
|
||||
}
|
||||
|
||||
#else
|
||||
#else // defined(BOOST_POSIX_API)
|
||||
|
||||
//--------------------------------------------------------------------------------------//
|
||||
// //
|
||||
@ -859,8 +875,6 @@ inline fs::file_type query_file_type(path const& p, error_code* ec)
|
||||
// //
|
||||
//--------------------------------------------------------------------------------------//
|
||||
|
||||
BOOST_CONSTEXPR_OR_CONST wchar_t dot = L'.';
|
||||
|
||||
// Windows CE has no environment variables
|
||||
#if !defined(UNDER_CE)
|
||||
inline std::wstring wgetenv(const wchar_t* name)
|
||||
@ -1013,10 +1027,156 @@ inline BOOL resize_file_api(const wchar_t* p, uintmax_t size)
|
||||
return h.handle != INVALID_HANDLE_VALUE && ::SetFilePointerEx(h.handle, sz, 0, FILE_BEGIN) && ::SetEndOfFile(h.handle);
|
||||
}
|
||||
|
||||
//! Converts NT path to a Win32 path
|
||||
inline path convert_nt_path_to_win32_path(const wchar_t* nt_path, std::size_t size)
|
||||
{
|
||||
// https://googleprojectzero.blogspot.com/2016/02/the-definitive-guide-on-win32-to-nt.html
|
||||
// https://stackoverflow.com/questions/23041983/path-prefixes-and
|
||||
//
|
||||
// NT paths can be used to identify practically any named objects, devices, files, local and remote shares, etc.
|
||||
// The path starts with a leading backslash and consists of one or more path elements separated with backslashes.
|
||||
// The set of characters allowed in NT path elements is significantly larger than that of Win32 paths - basically,
|
||||
// any character except the backslash is allowed. Path elements are case-insensitive.
|
||||
//
|
||||
// NT paths that start with the "\??\" prefix are used to indicate the current user's session namespace. The prefix
|
||||
// indicates to the NT object manager to lookup the object relative to "\Sessions\0\DosDevices\[Logon Authentication ID]".
|
||||
//
|
||||
// There is also a special "\Global??\" prefix that refers to the system logon. User's session directory shadows
|
||||
// the system logon directory, so that when the referenced object is not found in the user's namespace,
|
||||
// system logon is looked up instead.
|
||||
//
|
||||
// There is a symlink "Global" in the user's session namespace that refers to the global namespace, so "\??\Global"
|
||||
// effectively resolves to "\Global??". This allows Win32 applications to directly refer to the system objects,
|
||||
// even if shadowed by the current user's logon object.
|
||||
//
|
||||
// NT paths can be used to reference not only local filesystems, but also devices and remote shares identifiable via
|
||||
// UNC paths. For this, there is a special "UNC" device (which is a symlink to "\Device\Mup") in the system logon
|
||||
// namespace, so "\??\UNC\host\share" (or "\??\Global\UNC\host\share", or "\Global??\UNC\host\share") is equivalent
|
||||
// to "\\host\share".
|
||||
//
|
||||
// NT paths are not universally accepted by Win32 applications and APIs. For example, Far supports paths starting
|
||||
// with "\??\" and "\??\Global\" but not with "\Global??\". As of Win10 21H1, File Explorer, cmd.exe and PowerShell
|
||||
// don't support any of these. Given this, and that NT paths have a different set of allowed characters from Win32 paths,
|
||||
// we should normally avoid exposing NT paths to users that expect Win32 paths.
|
||||
//
|
||||
// In Boost.Filesystem we only deal with NT paths that come from reparse points, such as symlinks and mount points,
|
||||
// including directory junctions. It was observed that reparse points created by junction.exe and mklink use the "\??\"
|
||||
// prefix for directory junctions and absolute symlink and unqualified relative path for relative symlinks.
|
||||
// Absolute paths are using drive letters for mounted drives (e.g. "\??\C:\directory"), although it is possible
|
||||
// to create a junction to an directory using a different way of identifying the filesystem (e.g.
|
||||
// "\??\Volume{00000000-0000-0000-0000-000000000000}\directory").
|
||||
// mklink does not support creating junctions pointing to a UNC path. junction.exe does create a junction that
|
||||
// uses a seemingly invalid syntax like "\??\\\host\share", i.e. it basically does not expect an UNC path. It is not known
|
||||
// if reparse points that refer to a UNC path are considered valid.
|
||||
// There are reparse points created as mount points for local and remote filsystems (for example, a cloud storage mounted
|
||||
// in the local filesystem). Such mount points have the form of "\??\Volume{00000000-0000-0000-0000-000000000000}\",
|
||||
// "\??\Harddisk0Partition1\" or "\??\HarddiskVolume1\".
|
||||
// Reparse points that refer directly to a global namespace (through "\??\Global\" or "\Global??\" prefixes) or
|
||||
// devices (e.g. "\Device\HarddiskVolume1") have not been observed so far.
|
||||
|
||||
path win32_path;
|
||||
std::size_t pos = 0u;
|
||||
bool global_namespace = false;
|
||||
|
||||
// Check for the "\??\" prefix
|
||||
if (size >= 4u &&
|
||||
nt_path[0] == path::preferred_separator &&
|
||||
nt_path[1] == questionmark &&
|
||||
nt_path[2] == questionmark &&
|
||||
nt_path[3] == path::preferred_separator)
|
||||
{
|
||||
pos = 4u;
|
||||
|
||||
// Check "Global"
|
||||
if ((size - pos) >= 6u &&
|
||||
(nt_path[pos] == L'G' || nt_path[pos] == L'g') &&
|
||||
(nt_path[pos + 1] == L'l' || nt_path[pos + 1] == L'L') &&
|
||||
(nt_path[pos + 2] == L'o' || nt_path[pos + 2] == L'O') &&
|
||||
(nt_path[pos + 3] == L'b' || nt_path[pos + 3] == L'B') &&
|
||||
(nt_path[pos + 4] == L'a' || nt_path[pos + 4] == L'A') &&
|
||||
(nt_path[pos + 5] == L'l' || nt_path[pos + 5] == L'L'))
|
||||
{
|
||||
if ((size - pos) == 6u)
|
||||
{
|
||||
pos += 6u;
|
||||
global_namespace = true;
|
||||
}
|
||||
else if (detail::is_directory_separator(nt_path[pos + 6u]))
|
||||
{
|
||||
pos += 7u;
|
||||
global_namespace = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Check for the "\Global??\" prefix
|
||||
else if (size >= 10u &&
|
||||
nt_path[0] == path::preferred_separator &&
|
||||
(nt_path[1] == L'G' || nt_path[1] == L'g') &&
|
||||
(nt_path[2] == L'l' || nt_path[2] == L'L') &&
|
||||
(nt_path[3] == L'o' || nt_path[3] == L'O') &&
|
||||
(nt_path[4] == L'b' || nt_path[4] == L'B') &&
|
||||
(nt_path[5] == L'a' || nt_path[5] == L'A') &&
|
||||
(nt_path[6] == L'l' || nt_path[6] == L'L') &&
|
||||
nt_path[7] == questionmark &&
|
||||
nt_path[8] == questionmark &&
|
||||
nt_path[9] == path::preferred_separator)
|
||||
{
|
||||
pos = 10u;
|
||||
global_namespace = true;
|
||||
}
|
||||
|
||||
if (pos > 0u)
|
||||
{
|
||||
if ((size - pos) >= 2u &&
|
||||
(
|
||||
// Check if the following is a drive letter
|
||||
(
|
||||
detail::is_letter(nt_path[pos]) && nt_path[pos + 1u] == colon &&
|
||||
((size - pos) == 2u || detail::is_directory_separator(nt_path[pos + 2u]))
|
||||
) ||
|
||||
// Check for an "incorrect" syntax for UNC path junction points
|
||||
(
|
||||
detail::is_directory_separator(nt_path[pos]) && detail::is_directory_separator(nt_path[pos + 1u]) &&
|
||||
((size - pos) == 2u || !detail::is_directory_separator(nt_path[pos + 2u]))
|
||||
)
|
||||
))
|
||||
{
|
||||
// Strip the NT path prefix
|
||||
goto done;
|
||||
}
|
||||
|
||||
static const wchar_t win32_path_prefix[4u] = { path::preferred_separator, path::preferred_separator, questionmark, path::preferred_separator };
|
||||
|
||||
// Check for a UNC path
|
||||
if ((size - pos) >= 4u &&
|
||||
(nt_path[pos] == L'U' || nt_path[pos] == L'u') &&
|
||||
(nt_path[pos + 1] == L'N' || nt_path[pos + 1] == L'n') &&
|
||||
(nt_path[pos + 2] == L'C' || nt_path[pos + 2] == L'c') &&
|
||||
nt_path[pos + 3] == path::preferred_separator)
|
||||
{
|
||||
win32_path.assign(win32_path_prefix, win32_path_prefix + 2);
|
||||
pos += 4u;
|
||||
goto done;
|
||||
}
|
||||
|
||||
// This is some other NT path, possibly a volume mount point. Replace the NT prefix with a Win32 filesystem prefix "\\?\".
|
||||
win32_path.assign(win32_path_prefix, win32_path_prefix + 4);
|
||||
if (global_namespace)
|
||||
{
|
||||
static const wchar_t win32_path_global_prefix[7u] = { L'G', L'l', L'o', L'b', L'a', L'l', path::preferred_separator };
|
||||
win32_path.concat(win32_path_global_prefix, win32_path_global_prefix + 7);
|
||||
}
|
||||
}
|
||||
|
||||
done:
|
||||
win32_path.concat(nt_path + pos, nt_path + size);
|
||||
return win32_path;
|
||||
}
|
||||
|
||||
// Windows kernel32.dll functions that may or may not be present
|
||||
// must be accessed through pointers
|
||||
|
||||
typedef BOOL(WINAPI* PtrCreateHardLinkW)(
|
||||
typedef BOOL (WINAPI* PtrCreateHardLinkW)(
|
||||
/*__in*/ LPCWSTR lpFileName,
|
||||
/*__in*/ LPCWSTR lpExistingFileName,
|
||||
/*__reserved*/ LPSECURITY_ATTRIBUTES lpSecurityAttributes);
|
||||
@ -1025,7 +1185,7 @@ PtrCreateHardLinkW create_hard_link_api = PtrCreateHardLinkW(
|
||||
boost::winapi::get_proc_address(
|
||||
boost::winapi::GetModuleHandleW(L"kernel32.dll"), "CreateHardLinkW"));
|
||||
|
||||
typedef BOOLEAN(WINAPI* PtrCreateSymbolicLinkW)(
|
||||
typedef BOOLEAN (WINAPI* PtrCreateSymbolicLinkW)(
|
||||
/*__in*/ LPCWSTR lpSymlinkFileName,
|
||||
/*__in*/ LPCWSTR lpTargetFileName,
|
||||
/*__in*/ DWORD dwFlags);
|
||||
@ -1034,7 +1194,7 @@ PtrCreateSymbolicLinkW create_symbolic_link_api = PtrCreateSymbolicLinkW(
|
||||
boost::winapi::get_proc_address(
|
||||
boost::winapi::GetModuleHandleW(L"kernel32.dll"), "CreateSymbolicLinkW"));
|
||||
|
||||
#endif
|
||||
#endif // defined(BOOST_POSIX_API)
|
||||
|
||||
//#ifdef BOOST_WINDOWS_API
|
||||
//
|
||||
@ -2710,14 +2870,14 @@ path read_symlink(path const& p, system::error_code* ec)
|
||||
{
|
||||
case IO_REPARSE_TAG_MOUNT_POINT:
|
||||
buffer = buf->rdb.MountPointReparseBuffer.PathBuffer;
|
||||
offset = buf->rdb.MountPointReparseBuffer.PrintNameOffset;
|
||||
len = buf->rdb.MountPointReparseBuffer.PrintNameLength;
|
||||
offset = buf->rdb.MountPointReparseBuffer.SubstituteNameOffset;
|
||||
len = buf->rdb.MountPointReparseBuffer.SubstituteNameLength;
|
||||
break;
|
||||
|
||||
case IO_REPARSE_TAG_SYMLINK:
|
||||
buffer = buf->rdb.SymbolicLinkReparseBuffer.PathBuffer;
|
||||
offset = buf->rdb.SymbolicLinkReparseBuffer.PrintNameOffset;
|
||||
len = buf->rdb.SymbolicLinkReparseBuffer.PrintNameLength;
|
||||
offset = buf->rdb.SymbolicLinkReparseBuffer.SubstituteNameOffset;
|
||||
len = buf->rdb.SymbolicLinkReparseBuffer.SubstituteNameLength;
|
||||
// Note: iff info.rdb.SymbolicLinkReparseBuffer.Flags & SYMLINK_FLAG_RELATIVE
|
||||
// -> resulting path is relative to the source
|
||||
break;
|
||||
@ -2727,9 +2887,7 @@ path read_symlink(path const& p, system::error_code* ec)
|
||||
return symlink_path;
|
||||
}
|
||||
|
||||
symlink_path.assign(
|
||||
buffer + offset / sizeof(wchar_t),
|
||||
buffer + (offset + len) / sizeof(wchar_t));
|
||||
symlink_path = convert_nt_path_to_win32_path(buffer + offset / sizeof(wchar_t), len / sizeof(wchar_t));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
14
src/path.cpp
14
src/path.cpp
@ -23,6 +23,7 @@
|
||||
|
||||
#ifdef BOOST_WINDOWS_API
|
||||
#include "windows_file_codecvt.hpp"
|
||||
#include "windows_tools.hpp"
|
||||
#include <windows.h>
|
||||
#elif defined(macintosh) || defined(__APPLE__) || defined(__APPLE_CC__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__HAIKU__)
|
||||
#include <boost/filesystem/detail/utf8_codecvt_facet.hpp>
|
||||
@ -63,17 +64,12 @@ using boost::filesystem::path_detail::substring;
|
||||
const wchar_t separators[] = L"/\\";
|
||||
const wchar_t separator_string[] = L"/";
|
||||
const wchar_t preferred_separator_string[] = L"\\";
|
||||
BOOST_CONSTEXPR_OR_CONST wchar_t colon = L':';
|
||||
BOOST_CONSTEXPR_OR_CONST wchar_t questionmark = L'?';
|
||||
|
||||
inline bool is_letter(wchar_t c)
|
||||
{
|
||||
return (c >= L'A' && c <= L'Z') || (c >= L'a' && c <= L'z');
|
||||
}
|
||||
using boost::filesystem::detail::colon;
|
||||
using boost::filesystem::detail::questionmark;
|
||||
|
||||
inline bool is_alnum(wchar_t c)
|
||||
{
|
||||
return is_letter(c) || (c >= L'0' && c <= L'9');
|
||||
return boost::filesystem::detail::is_letter(c) || (c >= L'0' && c <= L'9');
|
||||
}
|
||||
|
||||
inline bool is_device_name_char(wchar_t c)
|
||||
@ -632,7 +628,7 @@ size_type find_root_directory_start(string_type const& path, size_type size, siz
|
||||
// case "c:" or "prn:"
|
||||
// Note: There is ambiguity in a "c:x" path interpretation. It could either mean a file "x" located at the current directory for drive C:,
|
||||
// or an alternative stream "x" of a file "c". Windows API resolve this as the former, and so do we.
|
||||
if ((size - pos) >= 2 && is_letter(path[pos]))
|
||||
if ((size - pos) >= 2 && fs::detail::is_letter(path[pos]))
|
||||
{
|
||||
size_type i = pos + 1;
|
||||
for (; i < size; ++i)
|
||||
|
@ -13,6 +13,7 @@
|
||||
#ifndef BOOST_FILESYSTEM_SRC_WINDOWS_TOOLS_HPP_
|
||||
#define BOOST_FILESYSTEM_SRC_WINDOWS_TOOLS_HPP_
|
||||
|
||||
#include <boost/filesystem/config.hpp>
|
||||
#include <boost/filesystem/path.hpp>
|
||||
#include <boost/filesystem/file_status.hpp>
|
||||
|
||||
@ -22,6 +23,14 @@ namespace boost {
|
||||
namespace filesystem {
|
||||
namespace detail {
|
||||
|
||||
BOOST_INLINE_VARIABLE BOOST_CONSTEXPR_OR_CONST wchar_t colon = L':';
|
||||
BOOST_INLINE_VARIABLE BOOST_CONSTEXPR_OR_CONST wchar_t questionmark = L'?';
|
||||
|
||||
inline bool is_letter(wchar_t c)
|
||||
{
|
||||
return (c >= L'A' && c <= L'Z') || (c >= L'a' && c <= L'z');
|
||||
}
|
||||
|
||||
inline bool equal_extension(wchar_t const* p, wchar_t const (&x1)[5], wchar_t const (&x2)[5])
|
||||
{
|
||||
return (p[0] == x1[0] || p[0] == x2[0]) &&
|
||||
|
Loading…
Reference in New Issue
Block a user