Rollup merge of rust-lang#141864 - Berrysoft:cygwin-path, r=ChrisDenton

jhpratt · web-flow · commit 39a0bdc2df25 · 2025-06-17T03:00:38.000+02:00
Handle win32 separator for cygwin paths

This PR handles a issue that cygwin actually supports Win32 path, so we need to handle the Win32 prefix and separaters.

r? ``@mati865``

cc ``@jeremyd2019``

~~Not sure if I should handle the prefix like the windows target... Cygwin *does* support win32 paths directly going through the APIs, but I think it's not the recommended way.~~

Here I just use `cygwin_conv_path` because it handles both cygwin and win32 paths correctly and convert them into absolute POSIX paths.

UPDATE: Windows path prefix is handled.
diff --git a/library/std/src/path.rs b/library/std/src/path.rs
@@ -1316,8 +1316,17 @@ impl PathBuf {
             need_sep = false
         }
 
+        let need_clear = if cfg!(target_os = "cygwin") {
+            // If path is absolute and its prefix is none, it is like `/foo`,
+            // and will be handled below.
+            path.prefix().is_some()
+        } else {
+            // On Unix: prefix is always None.
+            path.is_absolute() || path.prefix().is_some()
+        };
+
         // absolute `path` replaces `self`
-        if path.is_absolute() || path.prefix().is_some() {
+        if need_clear {
             self.inner.truncate(0);
 
         // verbatim paths need . and .. removed
@@ -3643,6 +3652,11 @@ impl Error for NormalizeError {}
 /// paths, this is currently equivalent to calling
 /// [`GetFullPathNameW`][windows-path].
 ///
+/// On Cygwin, this is currently equivalent to calling [`cygwin_conv_path`][cygwin-path]
+/// with mode `CCP_WIN_A_TO_POSIX`, and then being processed like other POSIX platforms.
+/// If a Windows path is given, it will be converted to an absolute POSIX path without
+/// keeping `..`.
+///
 /// Note that these [may change in the future][changes].
 ///
 /// # Errors
@@ -3700,6 +3714,7 @@ impl Error for NormalizeError {}
 /// [changes]: io#platform-specific-behavior
 /// [posix-semantics]: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13
 /// [windows-path]: https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfullpathnamew
+/// [cygwin-path]: https://cygwin.com/cygwin-api/func-cygwin-conv-path.html
 #[stable(feature = "absolute_path", since = "1.79.0")]
 pub fn absolute<P: AsRef<Path>>(path: P) -> io::Result<PathBuf> {
     let path = path.as_ref();
diff --git a/library/std/src/sys/path/cygwin.rs b/library/std/src/sys/path/cygwin.rs
@@ -0,0 +1,92 @@
+use crate::ffi::OsString;
+use crate::os::unix::ffi::OsStringExt;
+use crate::path::{Path, PathBuf};
+use crate::sys::common::small_c_string::run_path_with_cstr;
+use crate::sys::cvt;
+use crate::{io, ptr};
+
+#[inline]
+pub fn is_sep_byte(b: u8) -> bool {
+    b == b'/' || b == b'\\'
+}
+
+/// Cygwin allways prefers `/` over `\`, and it always converts all `/` to `\`
+/// internally when calling Win32 APIs. Therefore, the server component of path
+/// `\\?\UNC\localhost/share` is `localhost/share` on Win32, but `localhost`
+/// on Cygwin.
+#[inline]
+pub fn is_verbatim_sep(b: u8) -> bool {
+    b == b'/' || b == b'\\'
+}
+
+pub use super::windows_prefix::parse_prefix;
+
+pub const MAIN_SEP_STR: &str = "/";
+pub const MAIN_SEP: char = '/';
+
+unsafe extern "C" {
+    // Doc: https://cygwin.com/cygwin-api/func-cygwin-conv-path.html
+    // Src: https://github.com/cygwin/cygwin/blob/718a15ba50e0d01c79800bd658c2477f9a603540/winsup/cygwin/path.cc#L3902
+    // Safety:
+    // * `what` should be `CCP_WIN_A_TO_POSIX` here
+    // * `from` is null-terminated UTF-8 path
+    // * `to` is buffer, the buffer size is `size`.
+    //
+    // Converts a path to an absolute POSIX path, no matter the input is Win32 path or POSIX path.
+    fn cygwin_conv_path(
+        what: libc::c_uint,
+        from: *const libc::c_char,
+        to: *mut u8,
+        size: libc::size_t,
+    ) -> libc::ssize_t;
+}
+
+const CCP_WIN_A_TO_POSIX: libc::c_uint = 2;
+
+/// Make a POSIX path absolute.
+pub(crate) fn absolute(path: &Path) -> io::Result<PathBuf> {
+    run_path_with_cstr(path, &|path| {
+        let conv = CCP_WIN_A_TO_POSIX;
+        let size = cvt(unsafe { cygwin_conv_path(conv, path.as_ptr(), ptr::null_mut(), 0) })?;
+        // If success, size should not be 0.
+        debug_assert!(size >= 1);
+        let size = size as usize;
+        let mut buffer = Vec::with_capacity(size);
+        cvt(unsafe { cygwin_conv_path(conv, path.as_ptr(), buffer.as_mut_ptr(), size) })?;
+        unsafe {
+            buffer.set_len(size - 1);
+        }
+        Ok(PathBuf::from(OsString::from_vec(buffer)))
+    })
+    .map(|path| {
+        if path.prefix().is_some() {
+            return path;
+        }
+
+        // From unix.rs
+        let mut components = path.components();
+        let path_os = path.as_os_str().as_encoded_bytes();
+
+        let mut normalized = if path_os.starts_with(b"//") && !path_os.starts_with(b"///") {
+            components.next();
+            PathBuf::from("//")
+        } else {
+            PathBuf::new()
+        };
+        normalized.extend(components);
+
+        if path_os.ends_with(b"/") {
+            normalized.push("");
+        }
+
+        normalized
+    })
+}
+
+pub(crate) fn is_absolute(path: &Path) -> bool {
+    if path.as_os_str().as_encoded_bytes().starts_with(b"\\") {
+        path.has_root() && path.prefix().is_some()
+    } else {
+        path.has_root()
+    }
+}
diff --git a/library/std/src/sys/path/mod.rs b/library/std/src/sys/path/mod.rs
@@ -1,6 +1,7 @@
 cfg_if::cfg_if! {
     if #[cfg(target_os = "windows")] {
         mod windows;
+        mod windows_prefix;
         pub use windows::*;
     } else if #[cfg(all(target_vendor = "fortanix", target_env = "sgx"))] {
         mod sgx;
@@ -11,6 +12,10 @@ cfg_if::cfg_if! {
     } else if #[cfg(target_os = "uefi")] {
         mod uefi;
         pub use uefi::*;
+    } else if #[cfg(target_os = "cygwin")] {
+        mod cygwin;
+        mod windows_prefix;
+        pub use cygwin::*;
     } else {
         mod unix;
         pub use unix::*;
diff --git a/library/std/src/sys/path/windows.rs b/library/std/src/sys/path/windows.rs
@@ -1,12 +1,14 @@
 use crate::ffi::{OsStr, OsString};
-use crate::path::{Path, PathBuf, Prefix};
+use crate::path::{Path, PathBuf};
 use crate::sys::api::utf16;
 use crate::sys::pal::{c, fill_utf16_buf, os2path, to_u16s};
 use crate::{io, ptr};
 
 #[cfg(test)]
 mod tests;
 
+pub use super::windows_prefix::parse_prefix;
+
 pub const MAIN_SEP_STR: &str = "\\";
 pub const MAIN_SEP: char = '\\';
 
@@ -77,177 +79,6 @@ pub(crate) fn append_suffix(path: PathBuf, suffix: &OsStr) -> PathBuf {
     path.into()
 }
 
-struct PrefixParser<'a, const LEN: usize> {
-    path: &'a OsStr,
-    prefix: [u8; LEN],
-}
-
-impl<'a, const LEN: usize> PrefixParser<'a, LEN> {
-    #[inline]
-    fn get_prefix(path: &OsStr) -> [u8; LEN] {
-        let mut prefix = [0; LEN];
-        // SAFETY: Only ASCII characters are modified.
-        for (i, &ch) in path.as_encoded_bytes().iter().take(LEN).enumerate() {
-            prefix[i] = if ch == b'/' { b'\\' } else { ch };
-        }
-        prefix
-    }
-
-    fn new(path: &'a OsStr) -> Self {
-        Self { path, prefix: Self::get_prefix(path) }
-    }
-
-    fn as_slice(&self) -> PrefixParserSlice<'a, '_> {
-        PrefixParserSlice {
-            path: self.path,
-            prefix: &self.prefix[..LEN.min(self.path.len())],
-            index: 0,
-        }
-    }
-}
-
-struct PrefixParserSlice<'a, 'b> {
-    path: &'a OsStr,
-    prefix: &'b [u8],
-    index: usize,
-}
-
-impl<'a> PrefixParserSlice<'a, '_> {
-    fn strip_prefix(&self, prefix: &str) -> Option<Self> {
-        self.prefix[self.index..]
-            .starts_with(prefix.as_bytes())
-            .then_some(Self { index: self.index + prefix.len(), ..*self })
-    }
-
-    fn prefix_bytes(&self) -> &'a [u8] {
-        &self.path.as_encoded_bytes()[..self.index]
-    }
-
-    fn finish(self) -> &'a OsStr {
-        // SAFETY: The unsafety here stems from converting between &OsStr and
-        // &[u8] and back. This is safe to do because (1) we only look at ASCII
-        // contents of the encoding and (2) new &OsStr values are produced only
-        // from ASCII-bounded slices of existing &OsStr values.
-        unsafe { OsStr::from_encoded_bytes_unchecked(&self.path.as_encoded_bytes()[self.index..]) }
-    }
-}
-
-pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> {
-    use Prefix::{DeviceNS, Disk, UNC, Verbatim, VerbatimDisk, VerbatimUNC};
-
-    let parser = PrefixParser::<8>::new(path);
-    let parser = parser.as_slice();
-    if let Some(parser) = parser.strip_prefix(r"\\") {
-        // \\
-
-        // The meaning of verbatim paths can change when they use a different
-        // separator.
-        if let Some(parser) = parser.strip_prefix(r"?\")
-            && !parser.prefix_bytes().iter().any(|&x| x == b'/')
-        {
-            // \\?\
-            if let Some(parser) = parser.strip_prefix(r"UNC\") {
-                // \\?\UNC\server\share
-
-                let path = parser.finish();
-                let (server, path) = parse_next_component(path, true);
-                let (share, _) = parse_next_component(path, true);
-
-                Some(VerbatimUNC(server, share))
-            } else {
-                let path = parser.finish();
-
-                // in verbatim paths only recognize an exact drive prefix
-                if let Some(drive) = parse_drive_exact(path) {
-                    // \\?\C:
-                    Some(VerbatimDisk(drive))
-                } else {
-                    // \\?\prefix
-                    let (prefix, _) = parse_next_component(path, true);
-                    Some(Verbatim(prefix))
-                }
-            }
-        } else if let Some(parser) = parser.strip_prefix(r".\") {
-            // \\.\COM42
-            let path = parser.finish();
-            let (prefix, _) = parse_next_component(path, false);
-            Some(DeviceNS(prefix))
-        } else {
-            let path = parser.finish();
-            let (server, path) = parse_next_component(path, false);
-            let (share, _) = parse_next_component(path, false);
-
-            if !server.is_empty() && !share.is_empty() {
-                // \\server\share
-                Some(UNC(server, share))
-            } else {
-                // no valid prefix beginning with "\\" recognized
-                None
-            }
-        }
-    } else {
-        // If it has a drive like `C:` then it's a disk.
-        // Otherwise there is no prefix.
-        parse_drive(path).map(Disk)
-    }
-}
-
-// Parses a drive prefix, e.g. "C:" and "C:\whatever"
-fn parse_drive(path: &OsStr) -> Option<u8> {
-    // In most DOS systems, it is not possible to have more than 26 drive letters.
-    // See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>.
-    fn is_valid_drive_letter(drive: &u8) -> bool {
-        drive.is_ascii_alphabetic()
-    }
-
-    match path.as_encoded_bytes() {
-        [drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()),
-        _ => None,
-    }
-}
-
-// Parses a drive prefix exactly, e.g. "C:"
-fn parse_drive_exact(path: &OsStr) -> Option<u8> {
-    // only parse two bytes: the drive letter and the drive separator
-    if path.as_encoded_bytes().get(2).map(|&x| is_sep_byte(x)).unwrap_or(true) {
-        parse_drive(path)
-    } else {
-        None
-    }
-}
-
-// Parse the next path component.
-//
-// Returns the next component and the rest of the path excluding the component and separator.
-// Does not recognize `/` as a separator character if `verbatim` is true.
-fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) {
-    let separator = if verbatim { is_verbatim_sep } else { is_sep_byte };
-
-    match path.as_encoded_bytes().iter().position(|&x| separator(x)) {
-        Some(separator_start) => {
-            let separator_end = separator_start + 1;
-
-            let component = &path.as_encoded_bytes()[..separator_start];
-
-            // Panic safe
-            // The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index.
-            let path = &path.as_encoded_bytes()[separator_end..];
-
-            // SAFETY: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\')
-            // is encoded in a single byte, therefore `bytes[separator_start]` and
-            // `bytes[separator_end]` must be code point boundaries and thus
-            // `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices.
-            unsafe {
-                (
-                    OsStr::from_encoded_bytes_unchecked(component),
-                    OsStr::from_encoded_bytes_unchecked(path),
-                )
-            }
-        }
-        None => (path, OsStr::new("")),
-    }
-}
-
 /// Returns a UTF-16 encoded path capable of bypassing the legacy `MAX_PATH` limits.
 ///
 /// This path may or may not have a verbatim prefix.
diff --git a/library/std/src/sys/path/windows/tests.rs b/library/std/src/sys/path/windows/tests.rs
@@ -1,4 +1,6 @@
+use super::super::windows_prefix::*;
 use super::*;
+use crate::path::Prefix;
 
 #[test]
 fn test_parse_next_component() {
diff --git a/library/std/src/sys/path/windows_prefix.rs b/library/std/src/sys/path/windows_prefix.rs
diff --git a/library/std/tests/path.rs b/library/std/tests/path.rs