diff --git a/library/std/src/path.rs b/library/std/src/path.rs index dc0c735a06c6f..ce1901fb4f7c6 100644 --- a/library/std/src/path.rs +++ b/library/std/src/path.rs @@ -2841,6 +2841,11 @@ impl fmt::Debug for Path { /// println!("{}", path.display()); /// ``` /// +/// # Windows +/// +/// Verbatim paths may be converted to their more familiar form. +/// For example, `\\?\C:\Program Files\Rust` may display as `C:\Program Files\Rust`. +/// /// [`Display`]: fmt::Display /// [`format!`]: crate::format #[stable(feature = "rust1", since = "1.0.0")] @@ -2858,7 +2863,21 @@ impl fmt::Debug for Display<'_> { #[stable(feature = "rust1", since = "1.0.0")] impl fmt::Display for Display<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.path.inner.display(f) + #[cfg(not(windows))] + let path = self.path; + #[cfg(windows)] + let path = if let Some((root, subpath)) = + crate::sys::path::try_from_verbatim(self.path.as_u8_slice()) + { + if root.is_unc() { + f.write_str(r"\\")?; + } + // SAFETY: The path will only be split after an ASCII root. + unsafe { Path::from_u8_slice(subpath) } + } else { + self.path + }; + path.inner.display(f) } } diff --git a/library/std/src/path/tests.rs b/library/std/src/path/tests.rs index 0a16ff2a721ce..288f02a58a326 100644 --- a/library/std/src/path/tests.rs +++ b/library/std/src/path/tests.rs @@ -1090,6 +1090,147 @@ pub fn test_decompositions_windows() { file_prefix: Some(".x") ); } +#[test] +#[cfg(windows)] +pub fn windows_display_user_paths() { + fn check(path: &str, expected: &str) { + assert_eq!(&Path::new(path).display().to_string(), expected); + } + fn unchanged(path: &str) { + check(path, path); + } + + // Make sure non-verbatim paths aren't changed + unchanged(r"path\to\file"); + unchanged(r".\path\to\file"); + unchanged(r"..\path\to\file"); + unchanged(r"C:\path\to\file"); + unchanged(r"\\server\share\path\to\file"); + unchanged(r"\\.\server\share\path\to\file"); + unchanged(r"//?\UNC\server\share\path\to\file"); + + // The simple cases. + check(r"\\?\C:\path", r"C:\path"); + check(r"\\?\C:\", r"C:\"); + check(r"\\?\UNC\server\share", r"\\server\share"); + check(r"\\?\UNC\server\share\", r"\\server\share\"); + check(r"\\?\UNC\server\share\path", r"\\server\share\path"); + + // `\\?\C:` is an absolute path while `C:` is a "drive relative" path. + unchanged(r"\\?\C:"); + + // We only change drive and UNC paths, not device paths. + unchanged(r"\\?\pipe\name"); + + // Empty components are nonsensical but can be represented in verbatim paths + unchanged(r"\\?\C:\path\\to\file"); + unchanged(r"\\?\UNC\server\share\path\\to\file"); + + // Verbatim `.` and `..` components have no user path equivalent. + unchanged(r"\\?\C:\path\..\file"); + unchanged(r"\\?\C:\path\.\file"); + unchanged(r"\\?\C:\path\file\."); + unchanged(r"\\?\C:\path\file\.."); + + unchanged(r"\\?\UNC\server\share\path\..\file"); + unchanged(r"\\?\UNC\server\share\path\.\file"); + unchanged(r"\\?\UNC\server\share\path\file\."); + unchanged(r"\\?\UNC\server\share\path\file\.."); + + // All trailing dots and spaces are stripped from user paths. + unchanged(r"\\?\C:\path\to\file.."); + unchanged(r"\\?\C:\path\to\file "); + unchanged(r"\\?\C:\path\to\file.. .."); + + unchanged(r"\\?\UNC\server\share\path\to\file.."); + unchanged(r"\\?\UNC\server\share\path\to\file "); + unchanged(r"\\?\UNC\server\share\path\to\file.. .."); + + // A single trailing dot in an interior component will be stripped by + // non-verbatim paths... + unchanged(r"\\?\C:\path\to.\file"); + unchanged(r"\\?\UNC\server\share\path\to.\file"); + + // ...but two dots won't be. Don't ask. + check(r"\\?\C:\path\to..\file", r"C:\path\to..\file"); + check(r"\\?\UNC\server\share\path\to..\file", r"\\server\share\path\to..\file"); + + // Dots elsewhere are fine. + check(r"\\?\C:\path\.to\file", r"C:\path\.to\file"); + check(r"\\?\C:\path\..to\file", r"C:\path\..to\file"); + check(r"\\?\C:\path\t.o\file", r"C:\path\t.o\file"); + check(r"\\?\C:\path\t..o\file", r"C:\path\t..o\file"); + + check(r"\\?\UNC\server\share\path\.to\file", r"\\server\share\path\.to\file"); + check(r"\\?\UNC\server\share\path\..to\file", r"\\server\share\path\..to\file"); + check(r"\\?\UNC\server\share\path\t.o\file", r"\\server\share\path\t.o\file"); + check(r"\\?\UNC\server\share\path\t..o\file", r"\\server\share\path\t..o\file"); + + // Verbatim `/` has not user path equivalent. + unchanged(r"\\?\C:\path/to\file"); + unchanged(r"\\?\UNC\server\share\path/to\file"); + + // Legacy dos device names are converted to `\\.\` paths, but only for drive paths. + unchanged(r"\\?\C:\path\to\AUX"); + unchanged(r"\\?\C:\path\to\NUL"); + unchanged(r"\\?\C:\path\to\PRN"); + unchanged(r"\\?\C:\path\to\CON"); + unchanged(r"\\?\C:\path\to\CONIN$"); + unchanged(r"\\?\C:\path\to\CONOUT$"); + + unchanged(r"\\?\C:\path\to\COM1"); + unchanged(r"\\?\C:\path\to\COM2"); + unchanged(r"\\?\C:\path\to\COM3"); + unchanged(r"\\?\C:\path\to\COM4"); + unchanged(r"\\?\C:\path\to\COM5"); + unchanged(r"\\?\C:\path\to\COM6"); + unchanged(r"\\?\C:\path\to\COM7"); + unchanged(r"\\?\C:\path\to\COM8"); + unchanged(r"\\?\C:\path\to\COM9"); + + unchanged(r"\\?\C:\path\to\LPT1"); + unchanged(r"\\?\C:\path\to\LPT2"); + unchanged(r"\\?\C:\path\to\LPT3"); + unchanged(r"\\?\C:\path\to\LPT4"); + unchanged(r"\\?\C:\path\to\LPT5"); + unchanged(r"\\?\C:\path\to\LPT6"); + unchanged(r"\\?\C:\path\to\LPT7"); + unchanged(r"\\?\C:\path\to\LPT8"); + unchanged(r"\\?\C:\path\to\LPT9"); + + // Yes, these are superscript digits. The legend goes that someone once used + // the wrong "is a digit" function and now it can't be changed due to + // stability guarantees. + unchanged(r"\\?\C:\path\to\COM²"); + unchanged(r"\\?\C:\path\to\COM³"); + unchanged(r"\\?\C:\path\to\COM¹"); + unchanged(r"\\?\C:\path\to\LPT²"); + unchanged(r"\\?\C:\path\to\LPT³"); + unchanged(r"\\?\C:\path\to\LPT¹"); + + // DOS device names are case-insensitive + unchanged(r"\\?\C:\aux"); + unchanged(r"\\?\C:\CoM4"); + unchanged(r"\\?\C:\cOnOuT$"); + + // Everything after a dot is ignored for the sake of parsing the device name. + unchanged(r"\\?\C:\path\to\LPT¹.txt"); + // Spaces are ignored too. + unchanged(r"\\?\C:\path\to\LPT¹ "); + // And these two rules can be combined. + unchanged(r"\\?\C:\path\to\LPT¹ .txt"); + + // UNC paths can have DOS devices even when not verbatim. + check(r"\\?\UNC\server\share\AUX", r"\\server\share\AUX"); + + // In UNC paths, the server name and share name are never changed, even + // without the verbatim prefix... + check(r"\\?\UNC\..\share.\", r"\\..\share.\"); + + // ... aside from the forward slash which is a path separator. + unchanged(r"\\?\UNC\ser/ver\share\"); + unchanged(r"\\?\UNC\server\sh/are\"); +} #[test] pub fn test_stem_ext() { diff --git a/library/std/src/sys/windows/path.rs b/library/std/src/sys/windows/path.rs index 460c1eff7788d..88b50c3006296 100644 --- a/library/std/src/sys/windows/path.rs +++ b/library/std/src/sys/windows/path.rs @@ -242,3 +242,108 @@ pub(crate) fn maybe_verbatim(path: &Path) -> io::Result> { )?; Ok(path) } + +#[derive(PartialEq, Eq, Clone, Copy)] +pub(crate) enum Root { + Drive, + Unc, + //Device, +} +impl Root { + pub fn is_unc(self) -> bool { + self == Root::Unc + } +} + +/// If the given verbatim path can be losslessly converted to a user path, +/// then this returns the [`Root`] type and the subpath. +/// Otherwise it returns `None`. +pub(crate) fn try_from_verbatim(path: &[u8]) -> Option<(Root, &[u8])> { + if !path.starts_with(br"\\?\") { + return None; + } + // Parse the root type. + let (root, subpath) = match path[4..] { + ref subpath @ [drive @ _, b':', b'\\', ..] if drive.is_ascii_alphabetic() => { + (Root::Drive, subpath) + } + [b'U', b'N', b'C', b'\\', ref subpath @ ..] => (Root::Unc, subpath), + _ => return None, + }; + + let mut components = subpath.split_inclusive(|&b| b == b'\\'); + let mut filename = None; + if root == Root::Unc { + // Skip the first two components. + for component in components.by_ref().take(2) { + if component.contains(&b'/') { + return None; + } + } + } + for component in components { + if component.contains(&b'/') { + return None; + } + match component { + br"\" | br".\" | br"..\" => return None, + // Ends with one and only one dot. + [.., b @ _, b'.', b'\\'] if *b != b'.' => return None, + _ => {} + } + filename.replace(component); + } + if let Some(name) = filename { + if matches!(name.last(), Some(b'.') | Some(b' ')) + || (root == Root::Drive && is_dos_device(name)) + { + return None; + } + } + Some((root, subpath)) +} + +/// Returns true if the filename is the name of a DOS device. +fn is_dos_device(filename: &[u8]) -> bool { + // The UTF-8 encoding of "²", "³" and "¹" is two bytes and starts with 0xc2. + const SUPER: u8 = 0xc2; + const SUPER_2: u8 = 0xb2; // ² + const SUPER_3: u8 = 0xb3; // ³ + const SUPER_1: u8 = 0xb9; // ¹ + + let upper = { + let mut upper = [0u8; 7]; + for (a, &b) in upper.iter_mut().zip(filename.iter()) { + *a = b.to_ascii_uppercase(); + } + upper + }; + let tail = match &upper[..3] { + b"AUX" | b"NUL" | b"PRN" => &filename[3..], + b"CON" => match &upper[3..] { + // Disambiguate `CON`, `CONIN$` and `CONOUT$`. + b"OUT$" => &filename[7..], + [b'I', b'N', b'$', _] => &filename[6..], + _ => &filename[3..], + }, + b"COM" | b"LPT" => match upper[3] { + // Match digit + b'1'..=b'9' => &filename[4..], + // Test for the two byte super-script numbers. + SUPER if matches!(upper[4], SUPER_1 | SUPER_2 | SUPER_3) => &filename[5..], + _ => return false, + }, + _ => return false, + }; + // Trailing spaces are ignored. + // A `.` marks the end of the device name. + // Anything else means this is not a device name. + let mut iter = tail.iter(); + loop { + match iter.next() { + None | Some(b'.') => break true, + Some(b' ') => continue, + _ => break false, + } + } +}