Skip to content

Make path::Display output user-style paths instead of verbatim #90547

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion library/std/src/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2841,6 +2841,11 @@ impl fmt::Debug for Path {
/// println!("{}", path.display());
/// ```
///
/// # Windows
///
/// Verbatim paths may be converted to their more familiar form.
/// For example, `\\?\C:\Program Files\Rust` may display as `C:\Program Files\Rust`.
///
/// [`Display`]: fmt::Display
/// [`format!`]: crate::format
#[stable(feature = "rust1", since = "1.0.0")]
Expand All @@ -2858,7 +2863,21 @@ impl fmt::Debug for Display<'_> {
#[stable(feature = "rust1", since = "1.0.0")]
impl fmt::Display for Display<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.path.inner.display(f)
#[cfg(not(windows))]
let path = self.path;
#[cfg(windows)]
let path = if let Some((root, subpath)) =
crate::sys::path::try_from_verbatim(self.path.as_u8_slice())
{
if root.is_unc() {
f.write_str(r"\\")?;
}
// SAFETY: The path will only be split after an ASCII root.
unsafe { Path::from_u8_slice(subpath) }
} else {
self.path
};
path.inner.display(f)
}
}

Expand Down
141 changes: 141 additions & 0 deletions library/std/src/path/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1090,6 +1090,147 @@ pub fn test_decompositions_windows() {
file_prefix: Some(".x")
);
}
#[test]
#[cfg(windows)]
pub fn windows_display_user_paths() {
fn check(path: &str, expected: &str) {
assert_eq!(&Path::new(path).display().to_string(), expected);
}
fn unchanged(path: &str) {
check(path, path);
}

// Make sure non-verbatim paths aren't changed
unchanged(r"path\to\file");
unchanged(r".\path\to\file");
unchanged(r"..\path\to\file");
unchanged(r"C:\path\to\file");
unchanged(r"\\server\share\path\to\file");
unchanged(r"\\.\server\share\path\to\file");
unchanged(r"//?\UNC\server\share\path\to\file");

// The simple cases.
check(r"\\?\C:\path", r"C:\path");
check(r"\\?\C:\", r"C:\");
check(r"\\?\UNC\server\share", r"\\server\share");
check(r"\\?\UNC\server\share\", r"\\server\share\");
check(r"\\?\UNC\server\share\path", r"\\server\share\path");

// `\\?\C:` is an absolute path while `C:` is a "drive relative" path.
unchanged(r"\\?\C:");

// We only change drive and UNC paths, not device paths.
unchanged(r"\\?\pipe\name");

// Empty components are nonsensical but can be represented in verbatim paths
unchanged(r"\\?\C:\path\\to\file");
unchanged(r"\\?\UNC\server\share\path\\to\file");

// Verbatim `.` and `..` components have no user path equivalent.
unchanged(r"\\?\C:\path\..\file");
unchanged(r"\\?\C:\path\.\file");
unchanged(r"\\?\C:\path\file\.");
unchanged(r"\\?\C:\path\file\..");

unchanged(r"\\?\UNC\server\share\path\..\file");
unchanged(r"\\?\UNC\server\share\path\.\file");
unchanged(r"\\?\UNC\server\share\path\file\.");
unchanged(r"\\?\UNC\server\share\path\file\..");

// All trailing dots and spaces are stripped from user paths.
unchanged(r"\\?\C:\path\to\file..");
unchanged(r"\\?\C:\path\to\file ");
unchanged(r"\\?\C:\path\to\file.. ..");

unchanged(r"\\?\UNC\server\share\path\to\file..");
unchanged(r"\\?\UNC\server\share\path\to\file ");
unchanged(r"\\?\UNC\server\share\path\to\file.. ..");

// A single trailing dot in an interior component will be stripped by
// non-verbatim paths...
unchanged(r"\\?\C:\path\to.\file");
unchanged(r"\\?\UNC\server\share\path\to.\file");

// ...but two dots won't be. Don't ask.
check(r"\\?\C:\path\to..\file", r"C:\path\to..\file");
check(r"\\?\UNC\server\share\path\to..\file", r"\\server\share\path\to..\file");

// Dots elsewhere are fine.
check(r"\\?\C:\path\.to\file", r"C:\path\.to\file");
check(r"\\?\C:\path\..to\file", r"C:\path\..to\file");
check(r"\\?\C:\path\t.o\file", r"C:\path\t.o\file");
check(r"\\?\C:\path\t..o\file", r"C:\path\t..o\file");

check(r"\\?\UNC\server\share\path\.to\file", r"\\server\share\path\.to\file");
check(r"\\?\UNC\server\share\path\..to\file", r"\\server\share\path\..to\file");
check(r"\\?\UNC\server\share\path\t.o\file", r"\\server\share\path\t.o\file");
check(r"\\?\UNC\server\share\path\t..o\file", r"\\server\share\path\t..o\file");

// Verbatim `/` has not user path equivalent.
unchanged(r"\\?\C:\path/to\file");
unchanged(r"\\?\UNC\server\share\path/to\file");

// Legacy dos device names are converted to `\\.\` paths, but only for drive paths.
unchanged(r"\\?\C:\path\to\AUX");
unchanged(r"\\?\C:\path\to\NUL");
unchanged(r"\\?\C:\path\to\PRN");
unchanged(r"\\?\C:\path\to\CON");
unchanged(r"\\?\C:\path\to\CONIN$");
unchanged(r"\\?\C:\path\to\CONOUT$");

unchanged(r"\\?\C:\path\to\COM1");
unchanged(r"\\?\C:\path\to\COM2");
unchanged(r"\\?\C:\path\to\COM3");
unchanged(r"\\?\C:\path\to\COM4");
unchanged(r"\\?\C:\path\to\COM5");
unchanged(r"\\?\C:\path\to\COM6");
unchanged(r"\\?\C:\path\to\COM7");
unchanged(r"\\?\C:\path\to\COM8");
unchanged(r"\\?\C:\path\to\COM9");

unchanged(r"\\?\C:\path\to\LPT1");
unchanged(r"\\?\C:\path\to\LPT2");
unchanged(r"\\?\C:\path\to\LPT3");
unchanged(r"\\?\C:\path\to\LPT4");
unchanged(r"\\?\C:\path\to\LPT5");
unchanged(r"\\?\C:\path\to\LPT6");
unchanged(r"\\?\C:\path\to\LPT7");
unchanged(r"\\?\C:\path\to\LPT8");
unchanged(r"\\?\C:\path\to\LPT9");

// Yes, these are superscript digits. The legend goes that someone once used
// the wrong "is a digit" function and now it can't be changed due to
// stability guarantees.
unchanged(r"\\?\C:\path\to\COM²");
unchanged(r"\\?\C:\path\to\COM³");
unchanged(r"\\?\C:\path\to\COM¹");
unchanged(r"\\?\C:\path\to\LPT²");
unchanged(r"\\?\C:\path\to\LPT³");
unchanged(r"\\?\C:\path\to\LPT¹");

// DOS device names are case-insensitive
unchanged(r"\\?\C:\aux");
unchanged(r"\\?\C:\CoM4");
unchanged(r"\\?\C:\cOnOuT$");

// Everything after a dot is ignored for the sake of parsing the device name.
unchanged(r"\\?\C:\path\to\LPT¹.txt");
// Spaces are ignored too.
unchanged(r"\\?\C:\path\to\LPT¹ ");
// And these two rules can be combined.
unchanged(r"\\?\C:\path\to\LPT¹ .txt");

// UNC paths can have DOS devices even when not verbatim.
check(r"\\?\UNC\server\share\AUX", r"\\server\share\AUX");

// In UNC paths, the server name and share name are never changed, even
// without the verbatim prefix...
check(r"\\?\UNC\..\share.\", r"\\..\share.\");

// ... aside from the forward slash which is a path separator.
unchanged(r"\\?\UNC\ser/ver\share\");
unchanged(r"\\?\UNC\server\sh/are\");
}

#[test]
pub fn test_stem_ext() {
Expand Down
105 changes: 105 additions & 0 deletions library/std/src/sys/windows/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -242,3 +242,108 @@ pub(crate) fn maybe_verbatim(path: &Path) -> io::Result<Vec<u16>> {
)?;
Ok(path)
}

#[derive(PartialEq, Eq, Clone, Copy)]
pub(crate) enum Root {
Drive,
Unc,
//Device,
}
impl Root {
pub fn is_unc(self) -> bool {
self == Root::Unc
}
}

/// If the given verbatim path can be losslessly converted to a user path,
/// then this returns the [`Root`] type and the subpath.
/// Otherwise it returns `None`.
pub(crate) fn try_from_verbatim(path: &[u8]) -> Option<(Root, &[u8])> {
if !path.starts_with(br"\\?\") {
return None;
}
// Parse the root type.
let (root, subpath) = match path[4..] {
ref subpath @ [drive @ _, b':', b'\\', ..] if drive.is_ascii_alphabetic() => {
(Root::Drive, subpath)
}
[b'U', b'N', b'C', b'\\', ref subpath @ ..] => (Root::Unc, subpath),
_ => return None,
};

let mut components = subpath.split_inclusive(|&b| b == b'\\');
let mut filename = None;
if root == Root::Unc {
// Skip the first two components.
for component in components.by_ref().take(2) {
if component.contains(&b'/') {
return None;
}
}
}
for component in components {
if component.contains(&b'/') {
return None;
}
match component {
br"\" | br".\" | br"..\" => return None,
// Ends with one and only one dot.
[.., b @ _, b'.', b'\\'] if *b != b'.' => return None,
_ => {}
}
filename.replace(component);
}
if let Some(name) = filename {
if matches!(name.last(), Some(b'.') | Some(b' '))
|| (root == Root::Drive && is_dos_device(name))
{
return None;
}
}
Some((root, subpath))
}

/// Returns true if the filename is the name of a DOS device.
fn is_dos_device(filename: &[u8]) -> bool {
// The UTF-8 encoding of "²", "³" and "¹" is two bytes and starts with 0xc2.
const SUPER: u8 = 0xc2;
const SUPER_2: u8 = 0xb2; // ²
const SUPER_3: u8 = 0xb3; // ³
const SUPER_1: u8 = 0xb9; // ¹

let upper = {
let mut upper = [0u8; 7];
for (a, &b) in upper.iter_mut().zip(filename.iter()) {
*a = b.to_ascii_uppercase();
}
upper
};
let tail = match &upper[..3] {
b"AUX" | b"NUL" | b"PRN" => &filename[3..],
b"CON" => match &upper[3..] {
// Disambiguate `CON`, `CONIN$` and `CONOUT$`.
b"OUT$" => &filename[7..],
[b'I', b'N', b'$', _] => &filename[6..],
_ => &filename[3..],
},
b"COM" | b"LPT" => match upper[3] {
// Match digit
b'1'..=b'9' => &filename[4..],
// Test for the two byte super-script numbers.
SUPER if matches!(upper[4], SUPER_1 | SUPER_2 | SUPER_3) => &filename[5..],
_ => return false,
},
_ => return false,
};
// Trailing spaces are ignored.
// A `.` marks the end of the device name.
// Anything else means this is not a device name.
let mut iter = tail.iter();
loop {
match iter.next() {
None | Some(b'.') => break true,
Some(b' ') => continue,
_ => break false,
}
}
}