diff --git a/library/alloc/src/ffi/mod.rs b/library/alloc/src/ffi/mod.rs index 4f9dc40a3cfc9..0880e8a340f4e 100644 --- a/library/alloc/src/ffi/mod.rs +++ b/library/alloc/src/ffi/mod.rs @@ -89,3 +89,19 @@ pub use self::c_str::{FromVecWithNulError, IntoStringError, NulError}; #[unstable(feature = "c_str_module", issue = "112134")] pub mod c_str; + +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +#[doc(hidden)] +pub mod os_str; + +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +#[doc(hidden)] +pub mod wtf8; diff --git a/library/alloc/src/ffi/os_str.rs b/library/alloc/src/ffi/os_str.rs new file mode 100644 index 0000000000000..521b0366df24a --- /dev/null +++ b/library/alloc/src/ffi/os_str.rs @@ -0,0 +1,1272 @@ +//! The [`OsStr`] and [`OsString`] types and associated utilities. + +#[cfg(test)] +mod tests; + +use core::ffi::os_str::{OsStr, Slice}; +use core::hash::{Hash, Hasher}; +use core::{cmp, fmt, ops}; + +use crate::borrow::{Borrow, Cow, ToOwned}; +use crate::boxed::Box; +use crate::collections::TryReserveError; +use crate::rc::Rc; +use crate::str::FromStr; +use crate::string::String; +use crate::sync::Arc; +use crate::vec::Vec; + +mod private { + /// This trait being unreachable from outside the crate + /// prevents outside implementations of our extension traits. + /// This allows adding more trait methods in the future. + #[unstable(feature = "sealed", issue = "none")] + pub trait Sealed {} +} + +#[cfg(any(target_os = "windows", target_os = "uefi"))] +#[stable(feature = "rust1", since = "1.0.0")] +pub mod os_str_ext_windows; + +#[cfg(not(any(target_os = "windows", target_os = "uefi")))] +#[stable(feature = "rust1", since = "1.0.0")] +pub mod os_str_ext_unix; + +#[cfg(any(target_os = "windows", target_os = "uefi"))] +mod wtf8; +#[cfg(any(target_os = "windows", target_os = "uefi"))] +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +#[doc(hidden)] +pub use wtf8::Buf; + +#[cfg(not(any(target_os = "windows", target_os = "uefi")))] +mod bytes; +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +#[cfg(not(any(target_os = "windows", target_os = "uefi")))] +#[doc(hidden)] +pub use bytes::Buf; + +/// A type that can represent owned, mutable platform-native strings, but is +/// cheaply inter-convertible with Rust strings. +/// +/// The need for this type arises from the fact that: +/// +/// * On Unix systems, strings are often arbitrary sequences of non-zero +/// bytes, in many cases interpreted as UTF-8. +/// +/// * On Windows, strings are often arbitrary sequences of non-zero 16-bit +/// values, interpreted as UTF-16 when it is valid to do so. +/// +/// * In Rust, strings are always valid UTF-8, which may contain zeros. +/// +/// `OsString` and [`OsStr`] bridge this gap by simultaneously representing Rust +/// and platform-native string values, and in particular allowing a Rust string +/// to be converted into an "OS" string with no cost if possible. A consequence +/// of this is that `OsString` instances are *not* `NUL` terminated; in order +/// to pass to e.g., Unix system call, you should create a [`CStr`]. +/// +/// `OsString` is to &[OsStr] as [`String`] is to &[str]: the former +/// in each pair are owned strings; the latter are borrowed +/// references. +/// +/// Note, `OsString` and [`OsStr`] internally do not necessarily hold strings in +/// the form native to the platform; While on Unix, strings are stored as a +/// sequence of 8-bit values, on Windows, where strings are 16-bit value based +/// as just discussed, strings are also actually stored as a sequence of 8-bit +/// values, encoded in a less-strict variant of UTF-8. This is useful to +/// understand when handling capacity and length values. +/// +/// # Capacity of `OsString` +/// +/// Capacity uses units of UTF-8 bytes for OS strings which were created from valid unicode, and +/// uses units of bytes in an unspecified encoding for other contents. On a given target, all +/// `OsString` and `OsStr` values use the same units for capacity, so the following will work: +/// ``` +/// use std::ffi::{OsStr, OsString}; +/// +/// fn concat_os_strings(a: &OsStr, b: &OsStr) -> OsString { +/// let mut ret = OsString::with_capacity(a.len() + b.len()); // This will allocate +/// ret.push(a); // This will not allocate further +/// ret.push(b); // This will not allocate further +/// ret +/// } +/// ``` +/// +/// # Creating an `OsString` +/// +/// **From a Rust string**: `OsString` implements +/// [From]<[String]>, so you can use my_string.[into]\() to +/// create an `OsString` from a normal Rust string. +/// +/// **From slices:** Just like you can start with an empty Rust +/// [`String`] and then [`String::push_str`] some &[str] +/// sub-string slices into it, you can create an empty `OsString` with +/// the [`OsString::new`] method and then push string slices into it with the +/// [`OsString::push`] method. +/// +/// # Extracting a borrowed reference to the whole OS string +/// +/// You can use the [`OsString::as_os_str`] method to get an &[OsStr] from +/// an `OsString`; this is effectively a borrowed reference to the +/// whole string. +/// +/// # Conversions +/// +/// See the [module's toplevel documentation about conversions][conversions] for a discussion on +/// the traits which `OsString` implements for [conversions] from/to native representations. +/// +/// [`CStr`]: crate::ffi::CStr +/// [conversions]: super#conversions +/// [into]: Into::into +#[cfg_attr(not(test), rustc_diagnostic_item = "OsString")] +#[stable(feature = "rust1", since = "1.0.0")] +pub struct OsString { + inner: Buf, +} + +/// Allows extension traits within `std`. +#[unstable(feature = "sealed", issue = "none")] +impl private::Sealed for OsString {} + +impl OsString { + /// Construct [`OsString`] from [`Buf`]. + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[must_use] + #[inline] + #[doc(hidden)] + pub fn from_inner(inner: Buf) -> Self { + Self { inner } + } + + /// Constructs a new empty `OsString`. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsString; + /// + /// let os_string = OsString::new(); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[must_use] + #[inline] + pub fn new() -> OsString { + OsString { inner: Buf::from_string(String::new()) } + } + + /// Converts bytes to an `OsString` without checking that the bytes contains + /// valid [`OsStr`]-encoded data. + /// + /// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8. + /// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit + /// ASCII. + /// + /// See the [module's toplevel documentation about conversions][conversions] for safe, + /// cross-platform [conversions] from/to native representations. + /// + /// # Safety + /// + /// As the encoding is unspecified, callers must pass in bytes that originated as a mixture of + /// validated UTF-8 and bytes from [`OsStr::as_encoded_bytes`] from within the same Rust version + /// built for the same target platform. For example, reconstructing an `OsString` from bytes sent + /// over the network or stored in a file will likely violate these safety rules. + /// + /// Due to the encoding being self-synchronizing, the bytes from [`OsStr::as_encoded_bytes`] can be + /// split either immediately before or immediately after any valid non-empty UTF-8 substring. + /// + /// # Example + /// + /// ``` + /// use std::ffi::OsStr; + /// + /// let os_str = OsStr::new("Mary had a little lamb"); + /// let bytes = os_str.as_encoded_bytes(); + /// let words = bytes.split(|b| *b == b' '); + /// let words: Vec<&OsStr> = words.map(|word| { + /// // SAFETY: + /// // - Each `word` only contains content that originated from `OsStr::as_encoded_bytes` + /// // - Only split with ASCII whitespace which is a non-empty UTF-8 substring + /// unsafe { OsStr::from_encoded_bytes_unchecked(word) } + /// }).collect(); + /// ``` + /// + /// [conversions]: super#conversions + #[inline] + #[stable(feature = "os_str_bytes", since = "1.74.0")] + pub unsafe fn from_encoded_bytes_unchecked(bytes: Vec) -> Self { + OsString { inner: unsafe { Buf::from_encoded_bytes_unchecked(bytes) } } + } + + /// Converts to an [`OsStr`] slice. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::{OsString, OsStr}; + /// + /// let os_string = OsString::from("foo"); + /// let os_str = OsStr::new("foo"); + /// assert_eq!(os_string.as_os_str(), os_str); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[must_use] + #[inline] + pub fn as_os_str(&self) -> &OsStr { + self + } + + /// Converts the `OsString` into a byte slice. To convert the byte slice back into an + /// `OsString`, use the [`OsStr::from_encoded_bytes_unchecked`] function. + /// + /// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8. + /// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit + /// ASCII. + /// + /// Note: As the encoding is unspecified, any sub-slice of bytes that is not valid UTF-8 should + /// be treated as opaque and only comparable within the same Rust version built for the same + /// target platform. For example, sending the bytes over the network or storing it in a file + /// will likely result in incompatible data. See [`OsString`] for more encoding details + /// and [`std::ffi`] for platform-specific, specified conversions. + /// + /// [`std::ffi`]: crate::ffi + #[inline] + #[stable(feature = "os_str_bytes", since = "1.74.0")] + pub fn into_encoded_bytes(self) -> Vec { + self.inner.into_encoded_bytes() + } + + /// Converts the `OsString` into a [`String`] if it contains valid Unicode data. + /// + /// On failure, ownership of the original `OsString` is returned. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsString; + /// + /// let os_string = OsString::from("foo"); + /// let string = os_string.into_string(); + /// assert_eq!(string, Ok(String::from("foo"))); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[inline] + pub fn into_string(self) -> Result { + self.inner.into_string().map_err(|buf| OsString { inner: buf }) + } + + /// Extends the string with the given &[OsStr] slice. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsString; + /// + /// let mut os_string = OsString::from("foo"); + /// os_string.push("bar"); + /// assert_eq!(&os_string, "foobar"); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[inline] + #[rustc_confusables("append", "put")] + pub fn push>(&mut self, s: T) { + self.inner.push_slice(s.as_ref().as_inner()) + } + + /// Creates a new `OsString` with at least the given capacity. + /// + /// The string will be able to hold at least `capacity` length units of other + /// OS strings without reallocating. This method is allowed to allocate for + /// more units than `capacity`. If `capacity` is 0, the string will not + /// allocate. + /// + /// See the main `OsString` documentation information about encoding and capacity units. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsString; + /// + /// let mut os_string = OsString::with_capacity(10); + /// let capacity = os_string.capacity(); + /// + /// // This push is done without reallocating + /// os_string.push("foo"); + /// + /// assert_eq!(capacity, os_string.capacity()); + /// ``` + #[stable(feature = "osstring_simple_functions", since = "1.9.0")] + #[must_use] + #[inline] + pub fn with_capacity(capacity: usize) -> OsString { + OsString { inner: Buf::with_capacity(capacity) } + } + + /// Truncates the `OsString` to zero length. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsString; + /// + /// let mut os_string = OsString::from("foo"); + /// assert_eq!(&os_string, "foo"); + /// + /// os_string.clear(); + /// assert_eq!(&os_string, ""); + /// ``` + #[stable(feature = "osstring_simple_functions", since = "1.9.0")] + #[inline] + pub fn clear(&mut self) { + self.inner.clear() + } + + /// Returns the capacity this `OsString` can hold without reallocating. + /// + /// See the main `OsString` documentation information about encoding and capacity units. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsString; + /// + /// let os_string = OsString::with_capacity(10); + /// assert!(os_string.capacity() >= 10); + /// ``` + #[stable(feature = "osstring_simple_functions", since = "1.9.0")] + #[must_use] + #[inline] + pub fn capacity(&self) -> usize { + self.inner.capacity() + } + + /// Reserves capacity for at least `additional` more capacity to be inserted + /// in the given `OsString`. Does nothing if the capacity is + /// already sufficient. + /// + /// The collection may reserve more space to speculatively avoid frequent reallocations. + /// + /// See the main `OsString` documentation information about encoding and capacity units. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsString; + /// + /// let mut s = OsString::new(); + /// s.reserve(10); + /// assert!(s.capacity() >= 10); + /// ``` + #[stable(feature = "osstring_simple_functions", since = "1.9.0")] + #[inline] + pub fn reserve(&mut self, additional: usize) { + self.inner.reserve(additional) + } + + /// Tries to reserve capacity for at least `additional` more length units + /// in the given `OsString`. The string may reserve more space to speculatively avoid + /// frequent reallocations. After calling `try_reserve`, capacity will be + /// greater than or equal to `self.len() + additional` if it returns `Ok(())`. + /// Does nothing if capacity is already sufficient. This method preserves + /// the contents even if an error occurs. + /// + /// See the main `OsString` documentation information about encoding and capacity units. + /// + /// # Errors + /// + /// If the capacity overflows, or the allocator reports a failure, then an error + /// is returned. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::{OsStr, OsString}; + /// use std::collections::TryReserveError; + /// + /// fn process_data(data: &str) -> Result { + /// let mut s = OsString::new(); + /// + /// // Pre-reserve the memory, exiting if we can't + /// s.try_reserve(OsStr::new(data).len())?; + /// + /// // Now we know this can't OOM in the middle of our complex work + /// s.push(data); + /// + /// Ok(s) + /// } + /// # process_data("123").expect("why is the test harness OOMing on 3 bytes?"); + /// ``` + #[stable(feature = "try_reserve_2", since = "1.63.0")] + #[inline] + pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.inner.try_reserve(additional) + } + + /// Reserves the minimum capacity for at least `additional` more capacity to + /// be inserted in the given `OsString`. Does nothing if the capacity is + /// already sufficient. + /// + /// Note that the allocator may give the collection more space than it + /// requests. Therefore, capacity can not be relied upon to be precisely + /// minimal. Prefer [`reserve`] if future insertions are expected. + /// + /// [`reserve`]: OsString::reserve + /// + /// See the main `OsString` documentation information about encoding and capacity units. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsString; + /// + /// let mut s = OsString::new(); + /// s.reserve_exact(10); + /// assert!(s.capacity() >= 10); + /// ``` + #[stable(feature = "osstring_simple_functions", since = "1.9.0")] + #[inline] + pub fn reserve_exact(&mut self, additional: usize) { + self.inner.reserve_exact(additional) + } + + /// Tries to reserve the minimum capacity for at least `additional` + /// more length units in the given `OsString`. After calling + /// `try_reserve_exact`, capacity will be greater than or equal to + /// `self.len() + additional` if it returns `Ok(())`. + /// Does nothing if the capacity is already sufficient. + /// + /// Note that the allocator may give the `OsString` more space than it + /// requests. Therefore, capacity can not be relied upon to be precisely + /// minimal. Prefer [`try_reserve`] if future insertions are expected. + /// + /// [`try_reserve`]: OsString::try_reserve + /// + /// See the main `OsString` documentation information about encoding and capacity units. + /// + /// # Errors + /// + /// If the capacity overflows, or the allocator reports a failure, then an error + /// is returned. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::{OsStr, OsString}; + /// use std::collections::TryReserveError; + /// + /// fn process_data(data: &str) -> Result { + /// let mut s = OsString::new(); + /// + /// // Pre-reserve the memory, exiting if we can't + /// s.try_reserve_exact(OsStr::new(data).len())?; + /// + /// // Now we know this can't OOM in the middle of our complex work + /// s.push(data); + /// + /// Ok(s) + /// } + /// # process_data("123").expect("why is the test harness OOMing on 3 bytes?"); + /// ``` + #[stable(feature = "try_reserve_2", since = "1.63.0")] + #[inline] + pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.inner.try_reserve_exact(additional) + } + + /// Shrinks the capacity of the `OsString` to match its length. + /// + /// See the main `OsString` documentation information about encoding and capacity units. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsString; + /// + /// let mut s = OsString::from("foo"); + /// + /// s.reserve(100); + /// assert!(s.capacity() >= 100); + /// + /// s.shrink_to_fit(); + /// assert_eq!(3, s.capacity()); + /// ``` + #[stable(feature = "osstring_shrink_to_fit", since = "1.19.0")] + #[inline] + pub fn shrink_to_fit(&mut self) { + self.inner.shrink_to_fit() + } + + /// Shrinks the capacity of the `OsString` with a lower bound. + /// + /// The capacity will remain at least as large as both the length + /// and the supplied value. + /// + /// If the current capacity is less than the lower limit, this is a no-op. + /// + /// See the main `OsString` documentation information about encoding and capacity units. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsString; + /// + /// let mut s = OsString::from("foo"); + /// + /// s.reserve(100); + /// assert!(s.capacity() >= 100); + /// + /// s.shrink_to(10); + /// assert!(s.capacity() >= 10); + /// s.shrink_to(0); + /// assert!(s.capacity() >= 3); + /// ``` + #[inline] + #[stable(feature = "shrink_to", since = "1.56.0")] + pub fn shrink_to(&mut self, min_capacity: usize) { + self.inner.shrink_to(min_capacity) + } + + /// Converts this `OsString` into a boxed [`OsStr`]. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::{OsString, OsStr}; + /// + /// let s = OsString::from("hello"); + /// + /// let b: Box = s.into_boxed_os_str(); + /// ``` + #[must_use = "`self` will be dropped if the result is not used"] + #[stable(feature = "into_boxed_os_str", since = "1.20.0")] + pub fn into_boxed_os_str(self) -> Box { + let rw = Box::into_raw(self.inner.into_box()) as *mut OsStr; + unsafe { Box::from_raw(rw) } + } + + /// Consumes and leaks the `OsString`, returning a mutable reference to the contents, + /// `&'a mut OsStr`. + /// + /// The caller has free choice over the returned lifetime, including 'static. + /// Indeed, this function is ideally used for data that lives for the remainder of + /// the program’s life, as dropping the returned reference will cause a memory leak. + /// + /// It does not reallocate or shrink the `OsString`, so the leaked allocation may include + /// unused capacity that is not part of the returned slice. If you want to discard excess + /// capacity, call [`into_boxed_os_str`], and then [`Box::leak`] instead. + /// However, keep in mind that trimming the capacity may result in a reallocation and copy. + /// + /// [`into_boxed_os_str`]: Self::into_boxed_os_str + #[unstable(feature = "os_string_pathbuf_leak", issue = "125965")] + #[inline] + pub fn leak<'a>(self) -> &'a mut OsStr { + OsStr::from_inner_mut(self.inner.leak()) + } + + /// Provides plumbing to core `Vec::truncate`. + /// More well behaving alternative to allowing outer types + /// full mutable access to the core `Vec`. + #[inline] + #[allow(unused)] + pub(crate) fn truncate(&mut self, len: usize) { + self.inner.truncate(len); + } + + /// Provides plumbing to core `Vec::extend_from_slice`. + /// More well behaving alternative to allowing outer types + /// full mutable access to the core `Vec`. + #[inline] + #[allow(unused)] + pub(crate) fn extend_from_slice(&mut self, other: &[u8]) { + self.inner.extend_from_slice(other); + } +} + +impl OsStr { + /// Returns a copy of this string where each character is mapped to its + /// ASCII upper case equivalent. + /// + /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', + /// but non-ASCII letters are unchanged. + /// + /// To uppercase the value in-place, use [`OsStr::make_ascii_uppercase`]. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsString; + /// let s = OsString::from("Grüße, Jürgen ❤"); + /// + /// assert_eq!("GRüßE, JüRGEN ❤", s.to_ascii_uppercase()); + /// ``` + #[rustc_allow_incoherent_impl] + #[must_use = "to uppercase the value in-place, use `make_ascii_uppercase`"] + #[stable(feature = "osstring_ascii", since = "1.53.0")] + pub fn to_ascii_uppercase(&self) -> OsString { + OsString::from_inner(self.as_inner().to_ascii_uppercase()) + } + + /// Returns a copy of this string where each character is mapped to its + /// ASCII lower case equivalent. + /// + /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', + /// but non-ASCII letters are unchanged. + /// + /// To lowercase the value in-place, use [`OsStr::make_ascii_lowercase`]. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsString; + /// let s = OsString::from("Grüße, Jürgen ❤"); + /// + /// assert_eq!("grüße, jürgen ❤", s.to_ascii_lowercase()); + /// ``` + #[rustc_allow_incoherent_impl] + #[must_use = "to lowercase the value in-place, use `make_ascii_lowercase`"] + #[stable(feature = "osstring_ascii", since = "1.53.0")] + pub fn to_ascii_lowercase(&self) -> OsString { + OsString::from_inner(self.as_inner().to_ascii_lowercase()) + } + + /// Converts a [Box]<[OsStr]> into an [`OsString`] without copying or allocating. + #[rustc_allow_incoherent_impl] + #[stable(feature = "into_boxed_os_str", since = "1.20.0")] + #[must_use = "`self` will be dropped if the result is not used"] + pub fn into_os_string(self: Box) -> OsString { + let boxed = unsafe { Box::from_raw(Box::into_raw(self) as *mut Slice) }; + OsString { inner: Buf::from_box(boxed) } + } + + /// Converts an `OsStr` to a [Cow]<[str]>. + /// + /// Any non-Unicode sequences are replaced with + /// [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD]. + /// + /// [U+FFFD]: crate::char::REPLACEMENT_CHARACTER + /// + /// # Examples + /// + /// Calling `to_string_lossy` on an `OsStr` with invalid unicode: + /// + /// ``` + /// // Note, due to differences in how Unix and Windows represent strings, + /// // we are forced to complicate this example, setting up example `OsStr`s + /// // with different source data and via different platform extensions. + /// // Understand that in reality you could end up with such example invalid + /// // sequences simply through collecting user command line arguments, for + /// // example. + /// + /// #[cfg(unix)] { + /// use std::ffi::OsStr; + /// use std::os::unix::ffi::OsStrExt; + /// + /// // Here, the values 0x66 and 0x6f correspond to 'f' and 'o' + /// // respectively. The value 0x80 is a lone continuation byte, invalid + /// // in a UTF-8 sequence. + /// let source = [0x66, 0x6f, 0x80, 0x6f]; + /// let os_str = OsStr::from_bytes(&source[..]); + /// + /// assert_eq!(os_str.to_string_lossy(), "fo�o"); + /// } + /// #[cfg(windows)] { + /// use std::ffi::OsString; + /// use std::os::windows::prelude::*; + /// + /// // Here the values 0x0066 and 0x006f correspond to 'f' and 'o' + /// // respectively. The value 0xD800 is a lone surrogate half, invalid + /// // in a UTF-16 sequence. + /// let source = [0x0066, 0x006f, 0xD800, 0x006f]; + /// let os_string = OsString::from_wide(&source[..]); + /// let os_str = os_string.as_os_str(); + /// + /// assert_eq!(os_str.to_string_lossy(), "fo�o"); + /// } + /// ``` + #[rustc_allow_incoherent_impl] + #[stable(feature = "rust1", since = "1.0.0")] + #[must_use = "this returns the result of the operation, \ + without modifying the original"] + #[inline] + pub fn to_string_lossy(&self) -> Cow<'_, str> { + self.as_inner().to_string_lossy() + } + + /// Copies the slice into an owned [`OsString`]. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::{OsStr, OsString}; + /// + /// let os_str = OsStr::new("foo"); + /// let os_string = os_str.to_os_string(); + /// assert_eq!(os_string, OsString::from("foo")); + /// ``` + #[rustc_allow_incoherent_impl] + #[stable(feature = "rust1", since = "1.0.0")] + #[must_use = "this returns the result of the operation, \ + without modifying the original"] + #[inline] + pub fn to_os_string(&self) -> OsString { + OsString { inner: self.as_inner().to_owned() } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl From for OsString { + /// Converts a [`String`] into an [`OsString`]. + /// + /// This conversion does not allocate or copy memory. + #[inline] + fn from(s: String) -> OsString { + OsString { inner: Buf::from_string(s) } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl> From<&T> for OsString { + /// Copies any value implementing [AsRef]<[OsStr]> + /// into a newly allocated [`OsString`]. + fn from(s: &T) -> OsString { + s.as_ref().to_os_string() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl ops::Index for OsString { + type Output = OsStr; + + #[inline] + fn index(&self, _index: ops::RangeFull) -> &OsStr { + OsStr::from_inner(self.inner.as_slice()) + } +} + +#[stable(feature = "mut_osstr", since = "1.44.0")] +impl ops::IndexMut for OsString { + #[inline] + fn index_mut(&mut self, _index: ops::RangeFull) -> &mut OsStr { + OsStr::from_inner_mut(self.inner.as_mut_slice()) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl ops::Deref for OsString { + type Target = OsStr; + + #[inline] + fn deref(&self) -> &OsStr { + &self[..] + } +} + +#[stable(feature = "mut_osstr", since = "1.44.0")] +impl ops::DerefMut for OsString { + #[inline] + fn deref_mut(&mut self) -> &mut OsStr { + &mut self[..] + } +} + +#[stable(feature = "osstring_default", since = "1.9.0")] +impl Default for OsString { + /// Constructs an empty `OsString`. + #[inline] + fn default() -> OsString { + OsString::new() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Clone for OsString { + #[inline] + fn clone(&self) -> Self { + OsString { inner: self.inner.clone() } + } + + /// Clones the contents of `source` into `self`. + /// + /// This method is preferred over simply assigning `source.clone()` to `self`, + /// as it avoids reallocation if possible. + #[inline] + fn clone_from(&mut self, source: &Self) { + self.inner.clone_from(&source.inner) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Debug for OsString { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&**self, formatter) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl PartialEq for OsString { + #[inline] + fn eq(&self, other: &OsString) -> bool { + &**self == &**other + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl PartialEq for OsString { + #[inline] + fn eq(&self, other: &str) -> bool { + &**self == other + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl PartialEq for str { + #[inline] + fn eq(&self, other: &OsString) -> bool { + &**other == self + } +} + +#[stable(feature = "os_str_str_ref_eq", since = "1.29.0")] +impl PartialEq<&str> for OsString { + #[inline] + fn eq(&self, other: &&str) -> bool { + **self == **other + } +} + +#[stable(feature = "os_str_str_ref_eq", since = "1.29.0")] +impl<'a> PartialEq for &'a str { + #[inline] + fn eq(&self, other: &OsString) -> bool { + **other == **self + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Eq for OsString {} + +#[stable(feature = "rust1", since = "1.0.0")] +impl PartialOrd for OsString { + #[inline] + fn partial_cmp(&self, other: &OsString) -> Option { + (&**self).partial_cmp(&**other) + } + #[inline] + fn lt(&self, other: &OsString) -> bool { + &**self < &**other + } + #[inline] + fn le(&self, other: &OsString) -> bool { + &**self <= &**other + } + #[inline] + fn gt(&self, other: &OsString) -> bool { + &**self > &**other + } + #[inline] + fn ge(&self, other: &OsString) -> bool { + &**self >= &**other + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl PartialOrd for OsString { + #[inline] + fn partial_cmp(&self, other: &str) -> Option { + (&**self).partial_cmp(other) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Ord for OsString { + #[inline] + fn cmp(&self, other: &OsString) -> cmp::Ordering { + (&**self).cmp(&**other) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Hash for OsString { + #[inline] + fn hash(&self, state: &mut H) { + (&**self).hash(state) + } +} + +#[stable(feature = "os_string_fmt_write", since = "1.64.0")] +impl fmt::Write for OsString { + fn write_str(&mut self, s: &str) -> fmt::Result { + self.push(s); + Ok(()) + } +} + +#[stable(feature = "box_from_os_str", since = "1.17.0")] +impl From<&OsStr> for Box { + /// Copies the string into a newly allocated [Box]<[OsStr]>. + #[inline] + fn from(s: &OsStr) -> Box { + let rw = Box::into_raw(s.as_inner().into_box()) as *mut OsStr; + unsafe { Box::from_raw(rw) } + } +} + +#[stable(feature = "box_from_cow", since = "1.45.0")] +impl From> for Box { + /// Converts a `Cow<'a, OsStr>` into a [Box]<[OsStr]>, + /// by copying the contents if they are borrowed. + #[inline] + fn from(cow: Cow<'_, OsStr>) -> Box { + match cow { + Cow::Borrowed(s) => Box::from(s), + Cow::Owned(s) => Box::from(s), + } + } +} + +#[stable(feature = "os_string_from_box", since = "1.18.0")] +impl From> for OsString { + /// Converts a [Box]<[OsStr]> into an [`OsString`] without copying or + /// allocating. + #[inline] + fn from(boxed: Box) -> OsString { + boxed.into_os_string() + } +} + +#[stable(feature = "box_from_os_string", since = "1.20.0")] +impl From for Box { + /// Converts an [`OsString`] into a [Box]<[OsStr]> without copying or allocating. + #[inline] + fn from(s: OsString) -> Box { + s.into_boxed_os_str() + } +} + +#[stable(feature = "more_box_slice_clone", since = "1.29.0")] +impl Clone for Box { + #[inline] + fn clone(&self) -> Self { + self.to_os_string().into_boxed_os_str() + } +} + +#[stable(feature = "shared_from_slice2", since = "1.24.0")] +impl From for Arc { + /// Converts an [`OsString`] into an [Arc]<[OsStr]> by moving the [`OsString`] + /// data into a new [`Arc`] buffer. + #[inline] + fn from(s: OsString) -> Arc { + let arc = s.inner.into_arc(); + unsafe { Arc::from_raw(Arc::into_raw(arc) as *const OsStr) } + } +} + +#[stable(feature = "shared_from_slice2", since = "1.24.0")] +impl From<&OsStr> for Arc { + /// Copies the string into a newly allocated [Arc]<[OsStr]>. + #[inline] + fn from(s: &OsStr) -> Arc { + let arc = s.as_inner().into_arc(); + unsafe { Arc::from_raw(Arc::into_raw(arc) as *const OsStr) } + } +} + +#[stable(feature = "shared_from_slice2", since = "1.24.0")] +impl From for Rc { + /// Converts an [`OsString`] into an [Rc]<[OsStr]> by moving the [`OsString`] + /// data into a new [`Rc`] buffer. + #[inline] + fn from(s: OsString) -> Rc { + let rc = s.inner.into_rc(); + unsafe { Rc::from_raw(Rc::into_raw(rc) as *const OsStr) } + } +} + +#[stable(feature = "shared_from_slice2", since = "1.24.0")] +impl From<&OsStr> for Rc { + /// Copies the string into a newly allocated [Rc]<[OsStr]>. + #[inline] + fn from(s: &OsStr) -> Rc { + let rc = s.as_inner().into_rc(); + unsafe { Rc::from_raw(Rc::into_raw(rc) as *const OsStr) } + } +} + +#[stable(feature = "cow_from_osstr", since = "1.28.0")] +impl<'a> From for Cow<'a, OsStr> { + /// Moves the string into a [`Cow::Owned`]. + #[inline] + fn from(s: OsString) -> Cow<'a, OsStr> { + Cow::Owned(s) + } +} + +#[stable(feature = "cow_from_osstr", since = "1.28.0")] +impl<'a> From<&'a OsStr> for Cow<'a, OsStr> { + /// Converts the string reference into a [`Cow::Borrowed`]. + #[inline] + fn from(s: &'a OsStr) -> Cow<'a, OsStr> { + Cow::Borrowed(s) + } +} + +#[stable(feature = "cow_from_osstr", since = "1.28.0")] +impl<'a> From<&'a OsString> for Cow<'a, OsStr> { + /// Converts the string reference into a [`Cow::Borrowed`]. + #[inline] + fn from(s: &'a OsString) -> Cow<'a, OsStr> { + Cow::Borrowed(s.as_os_str()) + } +} + +#[stable(feature = "osstring_from_cow_osstr", since = "1.28.0")] +impl<'a> From> for OsString { + /// Converts a `Cow<'a, OsStr>` into an [`OsString`], + /// by copying the contents if they are borrowed. + #[inline] + fn from(s: Cow<'a, OsStr>) -> Self { + s.into_owned() + } +} + +#[stable(feature = "box_default_extra", since = "1.17.0")] +impl Default for Box { + #[inline] + fn default() -> Box { + let rw = Box::into_raw(Slice::empty_box()) as *mut OsStr; + unsafe { Box::from_raw(rw) } + } +} + +macro_rules! impl_cmp { + ($lhs:ty, $rhs: ty) => { + #[allow(unused_lifetimes)] + #[stable(feature = "cmp_os_str", since = "1.8.0")] + impl<'a, 'b> PartialEq<$rhs> for $lhs { + #[inline] + fn eq(&self, other: &$rhs) -> bool { + ::eq(self, other) + } + } + + #[allow(unused_lifetimes)] + #[stable(feature = "cmp_os_str", since = "1.8.0")] + impl<'a, 'b> PartialEq<$lhs> for $rhs { + #[inline] + fn eq(&self, other: &$lhs) -> bool { + ::eq(self, other) + } + } + + #[allow(unused_lifetimes)] + #[stable(feature = "cmp_os_str", since = "1.8.0")] + impl<'a, 'b> PartialOrd<$rhs> for $lhs { + #[inline] + fn partial_cmp(&self, other: &$rhs) -> Option { + ::partial_cmp(self, other) + } + } + + #[allow(unused_lifetimes)] + #[stable(feature = "cmp_os_str", since = "1.8.0")] + impl<'a, 'b> PartialOrd<$lhs> for $rhs { + #[inline] + fn partial_cmp(&self, other: &$lhs) -> Option { + ::partial_cmp(self, other) + } + } + }; +} + +impl_cmp!(OsString, OsStr); +impl_cmp!(OsString, &'a OsStr); +impl_cmp!(Cow<'a, OsStr>, OsStr); +impl_cmp!(Cow<'a, OsStr>, &'b OsStr); +impl_cmp!(Cow<'a, OsStr>, OsString); + +#[unstable(feature = "slice_concat_ext", issue = "27747")] +impl> crate::slice::Join<&OsStr> for [S] { + type Output = OsString; + + fn join(slice: &Self, sep: &OsStr) -> OsString { + let Some((first, suffix)) = slice.split_first() else { + return OsString::new(); + }; + let first_owned = first.borrow().to_owned(); + suffix.iter().fold(first_owned, |mut a, b| { + a.push(sep); + a.push(b.borrow()); + a + }) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Borrow for OsString { + #[inline] + fn borrow(&self) -> &OsStr { + &self[..] + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl ToOwned for OsStr { + type Owned = OsString; + #[inline] + fn to_owned(&self) -> OsString { + self.to_os_string() + } + #[inline] + fn clone_into(&self, target: &mut OsString) { + self.as_inner().clone_into(&mut target.inner) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl AsRef for OsString { + #[inline] + fn as_ref(&self) -> &OsStr { + self + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl AsRef for String { + #[inline] + fn as_ref(&self) -> &OsStr { + (&**self).as_ref() + } +} + +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +#[doc(hidden)] +impl From for OsString { + #[inline] + fn from(buf: Buf) -> OsString { + OsString { inner: buf } + } +} + +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +#[doc(hidden)] +impl Into for OsString { + #[inline] + fn into(self) -> Buf { + self.inner + } +} + +#[stable(feature = "osstring_from_str", since = "1.45.0")] +impl FromStr for OsString { + type Err = core::convert::Infallible; + + #[inline] + fn from_str(s: &str) -> Result { + Ok(OsString::from(s)) + } +} + +#[stable(feature = "osstring_extend", since = "1.52.0")] +impl Extend for OsString { + #[inline] + fn extend>(&mut self, iter: T) { + for s in iter { + self.push(&s); + } + } +} + +#[stable(feature = "osstring_extend", since = "1.52.0")] +impl<'a> Extend<&'a OsStr> for OsString { + #[inline] + fn extend>(&mut self, iter: T) { + for s in iter { + self.push(s); + } + } +} + +#[stable(feature = "osstring_extend", since = "1.52.0")] +impl<'a> Extend> for OsString { + #[inline] + fn extend>>(&mut self, iter: T) { + for s in iter { + self.push(&s); + } + } +} + +#[stable(feature = "osstring_extend", since = "1.52.0")] +impl FromIterator for OsString { + #[inline] + fn from_iter>(iter: I) -> Self { + let mut iterator = iter.into_iter(); + + // Because we're iterating over `OsString`s, we can avoid at least + // one allocation by getting the first string from the iterator + // and appending to it all the subsequent strings. + match iterator.next() { + None => OsString::new(), + Some(mut buf) => { + buf.extend(iterator); + buf + } + } + } +} + +#[stable(feature = "osstring_extend", since = "1.52.0")] +impl<'a> FromIterator<&'a OsStr> for OsString { + #[inline] + fn from_iter>(iter: I) -> Self { + let mut buf = Self::new(); + for s in iter { + buf.push(s); + } + buf + } +} + +#[stable(feature = "osstring_extend", since = "1.52.0")] +impl<'a> FromIterator> for OsString { + #[inline] + fn from_iter>>(iter: I) -> Self { + let mut iterator = iter.into_iter(); + + // Because we're iterating over `OsString`s, we can avoid at least + // one allocation by getting the first owned string from the iterator + // and appending to it all the subsequent strings. + match iterator.next() { + None => OsString::new(), + Some(Cow::Owned(mut buf)) => { + buf.extend(iterator); + buf + } + Some(Cow::Borrowed(buf)) => { + let mut buf = OsString::from(buf); + buf.extend(iterator); + buf + } + } + } +} diff --git a/library/alloc/src/ffi/os_str/bytes.rs b/library/alloc/src/ffi/os_str/bytes.rs new file mode 100644 index 0000000000000..55fe072c7be6e --- /dev/null +++ b/library/alloc/src/ffi/os_str/bytes.rs @@ -0,0 +1,435 @@ +#![allow(missing_docs)] +#![allow(missing_debug_implementations)] + +//! The underlying OsString/OsStr implementation on Unix and many other +//! systems: just a `Vec`/`[u8]`. + +use core::ffi::os_str::Slice; +use core::{fmt, mem, str}; + +use crate::borrow::{Cow, ToOwned}; +use crate::boxed::Box; +use crate::collections::TryReserveError; +use crate::rc::Rc; +use crate::string::String; +use crate::sync::Arc; +use crate::vec::Vec; + +#[cfg(test)] +mod tests; + +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +#[derive(Hash)] +#[repr(transparent)] +pub struct Buf { + pub inner: Vec, +} + +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +impl fmt::Debug for Buf { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(self.as_slice(), formatter) + } +} + +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +impl fmt::Display for Buf { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(self.as_slice(), formatter) + } +} + +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +impl Clone for Buf { + #[inline] + fn clone(&self) -> Self { + Buf { inner: self.inner.clone() } + } + + #[inline] + fn clone_from(&mut self, source: &Self) { + self.inner.clone_from(&source.inner) + } +} + +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +impl Into> for Buf { + fn into(self) -> Vec { + self.inner + } +} + +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +impl AsRef<[u8]> for Buf { + #[inline] + fn as_ref(&self) -> &[u8] { + &self.inner + } +} + +impl Buf { + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn into_encoded_bytes(self) -> Vec { + self.inner + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub unsafe fn from_encoded_bytes_unchecked(s: Vec) -> Self { + Self { inner: s } + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn from_string(s: String) -> Buf { + Buf { inner: s.into_bytes() } + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn with_capacity(capacity: usize) -> Buf { + Buf { inner: Vec::with_capacity(capacity) } + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn clear(&mut self) { + self.inner.clear() + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn capacity(&self) -> usize { + self.inner.capacity() + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn reserve(&mut self, additional: usize) { + self.inner.reserve(additional) + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.inner.try_reserve(additional) + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn reserve_exact(&mut self, additional: usize) { + self.inner.reserve_exact(additional) + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.inner.try_reserve_exact(additional) + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn shrink_to_fit(&mut self) { + self.inner.shrink_to_fit() + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn shrink_to(&mut self, min_capacity: usize) { + self.inner.shrink_to(min_capacity) + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn as_slice(&self) -> &Slice { + // SAFETY: Slice just wraps [u8], + // and &*self.inner is &[u8], therefore + // transmuting &[u8] to &Slice is safe. + unsafe { mem::transmute(&*self.inner) } + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn as_mut_slice(&mut self) -> &mut Slice { + // SAFETY: Slice just wraps [u8], + // and &mut *self.inner is &mut [u8], therefore + // transmuting &mut [u8] to &mut Slice is safe. + unsafe { mem::transmute(&mut *self.inner) } + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn into_string(self) -> Result { + String::from_utf8(self.inner).map_err(|p| Buf { inner: p.into_bytes() }) + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn push_slice(&mut self, s: &Slice) { + self.inner.extend_from_slice(&s.inner) + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn leak<'a>(self) -> &'a mut Slice { + unsafe { mem::transmute(self.inner.leak()) } + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn into_box(self) -> Box { + unsafe { mem::transmute(self.inner.into_boxed_slice()) } + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn from_box(boxed: Box) -> Buf { + let inner: Box<[u8]> = unsafe { mem::transmute(boxed) }; + Buf { inner: inner.into_vec() } + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn into_arc(&self) -> Arc { + self.as_slice().into_arc() + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn into_rc(&self) -> Rc { + self.as_slice().into_rc() + } + + /// Provides plumbing to core `Vec::truncate`. + /// More well behaving alternative to allowing outer types + /// full mutable access to the core `Vec`. + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub(crate) fn truncate(&mut self, len: usize) { + self.inner.truncate(len); + } + + /// Provides plumbing to core `Vec::extend_from_slice`. + /// More well behaving alternative to allowing outer types + /// full mutable access to the core `Vec`. + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub(crate) fn extend_from_slice(&mut self, other: &[u8]) { + self.inner.extend_from_slice(other); + } +} + +impl Slice { + #[rustc_allow_incoherent_impl] + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn to_string_lossy(&self) -> Cow<'_, str> { + String::from_utf8_lossy(&self.inner) + } + + #[rustc_allow_incoherent_impl] + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn to_owned(&self) -> Buf { + Buf { inner: self.inner.to_vec() } + } + + #[rustc_allow_incoherent_impl] + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn clone_into(&self, buf: &mut Buf) { + self.inner.clone_into(&mut buf.inner) + } + + #[rustc_allow_incoherent_impl] + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn into_box(&self) -> Box { + let boxed: Box<[u8]> = self.inner.into(); + unsafe { mem::transmute(boxed) } + } + + #[rustc_allow_incoherent_impl] + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn empty_box() -> Box { + let boxed: Box<[u8]> = Default::default(); + unsafe { mem::transmute(boxed) } + } + + #[rustc_allow_incoherent_impl] + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn into_arc(&self) -> Arc { + let arc: Arc<[u8]> = Arc::from(&self.inner); + unsafe { Arc::from_raw(Arc::into_raw(arc) as *const Slice) } + } + + #[rustc_allow_incoherent_impl] + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn into_rc(&self) -> Rc { + let rc: Rc<[u8]> = Rc::from(&self.inner); + unsafe { Rc::from_raw(Rc::into_raw(rc) as *const Slice) } + } + + #[rustc_allow_incoherent_impl] + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn to_ascii_lowercase(&self) -> Buf { + Buf { inner: self.inner.to_ascii_lowercase() } + } + + #[rustc_allow_incoherent_impl] + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn to_ascii_uppercase(&self) -> Buf { + Buf { inner: self.inner.to_ascii_uppercase() } + } +} diff --git a/library/alloc/src/ffi/os_str/os_str_ext_unix.rs b/library/alloc/src/ffi/os_str/os_str_ext_unix.rs new file mode 100644 index 0000000000000..fd8891624b8f3 --- /dev/null +++ b/library/alloc/src/ffi/os_str/os_str_ext_unix.rs @@ -0,0 +1,35 @@ +//! [`OsStringExt`] for unix. + +use super::{private, Buf, OsString}; +use crate::vec::Vec; + +/// Platform-specific extensions to [`OsString`]. +/// +/// This trait is sealed: it cannot be implemented outside the standard library. +/// This is so that future additional methods are not breaking changes. +#[stable(feature = "rust1", since = "1.0.0")] +pub trait OsStringExt: private::Sealed { + /// Creates an [`OsString`] from a byte vector. + /// + /// See the module documentation for an example. + #[stable(feature = "rust1", since = "1.0.0")] + fn from_vec(vec: Vec) -> Self; + + /// Yields the underlying byte vector of this [`OsString`]. + /// + /// See the module documentation for an example. + #[stable(feature = "rust1", since = "1.0.0")] + fn into_vec(self) -> Vec; +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl OsStringExt for OsString { + #[inline] + fn from_vec(vec: Vec) -> OsString { + From::from(Buf { inner: vec }) + } + #[inline] + fn into_vec(self) -> Vec { + self.inner.inner + } +} diff --git a/library/alloc/src/ffi/os_str/os_str_ext_windows.rs b/library/alloc/src/ffi/os_str/os_str_ext_windows.rs new file mode 100644 index 0000000000000..fa2e0a8d25f26 --- /dev/null +++ b/library/alloc/src/ffi/os_str/os_str_ext_windows.rs @@ -0,0 +1,38 @@ +//! [`OsStringExt`] for windows. + +use super::{private, Buf, OsString}; +use crate::ffi::wtf8::Wtf8Buf; + +/// Windows-specific extensions to [`OsString`]. +/// +/// This trait is sealed: it cannot be implemented outside the standard library. +/// This is so that future additional methods are not breaking changes. +#[stable(feature = "rust1", since = "1.0.0")] +pub trait OsStringExt: private::Sealed { + /// Creates an `OsString` from a potentially ill-formed UTF-16 slice of + /// 16-bit code units. + /// + /// This is lossless: calling [`OsStrExt::encode_wide`] on the resulting string + /// will always return the original code units. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsString; + /// use std::os::windows::prelude::*; + /// + /// // UTF-16 encoding for "Unicode". + /// let source = [0x0055, 0x006E, 0x0069, 0x0063, 0x006F, 0x0064, 0x0065]; + /// + /// let string = OsString::from_wide(&source[..]); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + fn from_wide(wide: &[u16]) -> Self; +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl OsStringExt for OsString { + fn from_wide(wide: &[u16]) -> OsString { + From::from(Buf { inner: Wtf8Buf::from_wide(wide) }) + } +} diff --git a/library/std/src/ffi/os_str/tests.rs b/library/alloc/src/ffi/os_str/tests.rs similarity index 73% rename from library/std/src/ffi/os_str/tests.rs rename to library/alloc/src/ffi/os_str/tests.rs index 67147934b4db3..5b3e3dadb737e 100644 --- a/library/std/src/ffi/os_str/tests.rs +++ b/library/alloc/src/ffi/os_str/tests.rs @@ -1,28 +1,28 @@ +#![feature(os_str_slice)] + use super::*; -use crate::mem::MaybeUninit; -use crate::ptr; #[test] fn test_os_string_with_capacity() { let os_string = OsString::with_capacity(0); - assert_eq!(0, os_string.inner.into_inner().capacity()); + assert_eq!(0, os_string.inner.inner.capacity()); let os_string = OsString::with_capacity(10); - assert_eq!(10, os_string.inner.into_inner().capacity()); + assert_eq!(10, os_string.inner.inner.capacity()); let mut os_string = OsString::with_capacity(0); os_string.push("abc"); - assert!(os_string.inner.into_inner().capacity() >= 3); + assert!(os_string.inner.inner.capacity() >= 3); } #[test] fn test_os_string_clear() { let mut os_string = OsString::from("abc"); - assert_eq!(3, os_string.inner.as_inner().len()); + assert_eq!(3, os_string.inner.inner.len()); os_string.clear(); assert_eq!(&os_string, ""); - assert_eq!(0, os_string.inner.as_inner().len()); + assert_eq!(0, os_string.inner.inner.len()); } #[test] @@ -146,7 +146,7 @@ fn into_boxed() { let orig = "Hello, world!"; let os_str = OsStr::new(orig); let boxed: Box = Box::from(os_str); - let os_string = os_str.to_owned().into_boxed_os_str().into_os_string(); + let os_string = OsStr::into_os_string(os_str.to_owned().into_boxed_os_str()); assert_eq!(os_str, &*boxed); assert_eq!(&*boxed, &*os_string); assert_eq!(&*os_string, os_str); @@ -185,54 +185,13 @@ fn into_rc() { assert_eq!(&*arc2, os_str); } -#[test] -fn slice_encoded_bytes() { - let os_str = OsStr::new("123θგ🦀"); - // ASCII - let digits = os_str.slice_encoded_bytes(..3); - assert_eq!(digits, "123"); - let three = os_str.slice_encoded_bytes(2..3); - assert_eq!(three, "3"); - // 2-byte UTF-8 - let theta = os_str.slice_encoded_bytes(3..5); - assert_eq!(theta, "θ"); - // 3-byte UTF-8 - let gani = os_str.slice_encoded_bytes(5..8); - assert_eq!(gani, "გ"); - // 4-byte UTF-8 - let crab = os_str.slice_encoded_bytes(8..); - assert_eq!(crab, "🦀"); -} - -#[test] -#[should_panic] -fn slice_out_of_bounds() { - let crab = OsStr::new("🦀"); - let _ = crab.slice_encoded_bytes(..5); -} - -#[test] -#[should_panic] -fn slice_mid_char() { - let crab = OsStr::new("🦀"); - let _ = crab.slice_encoded_bytes(..2); -} - -#[cfg(unix)] -#[test] -#[should_panic(expected = "byte index 1 is not an OsStr boundary")] -fn slice_invalid_data() { - use crate::os::unix::ffi::OsStrExt; - - let os_string = OsStr::from_bytes(b"\xFF\xFF"); - let _ = os_string.slice_encoded_bytes(1..); -} - #[cfg(unix)] #[test] #[should_panic(expected = "byte index 1 is not an OsStr boundary")] fn slice_partial_utf8() { - use crate::os::unix::ffi::{OsStrExt, OsStringExt}; + use core::ffi::os_str::os_str_ext_unix::OsStrExt; + + use os_str_ext_unix::OsStringExt; let part_crab = OsStr::from_bytes(&"🦀".as_bytes()[..3]); let mut os_string = OsString::from_vec(vec![0xFF]); @@ -243,7 +202,9 @@ fn slice_partial_utf8() { #[cfg(unix)] #[test] fn slice_invalid_edge() { - use crate::os::unix::ffi::{OsStrExt, OsStringExt}; + use core::ffi::os_str::os_str_ext_unix::OsStrExt; + + use os_str_ext_unix::OsStringExt; let os_string = OsStr::from_bytes(b"a\xFFa"); assert_eq!(os_string.slice_encoded_bytes(..1), "a"); @@ -265,7 +226,7 @@ fn slice_invalid_edge() { #[test] #[should_panic(expected = "byte index 3 lies between surrogate codepoints")] fn slice_between_surrogates() { - use crate::os::windows::ffi::OsStringExt; + use crate::ffi::os_str::os_str_ext_windows::OsStringExt; let os_string = OsString::from_wide(&[0xD800, 0xD800]); assert_eq!(os_string.as_encoded_bytes(), &[0xED, 0xA0, 0x80, 0xED, 0xA0, 0x80]); @@ -275,7 +236,7 @@ fn slice_between_surrogates() { #[cfg(windows)] #[test] fn slice_surrogate_edge() { - use crate::os::windows::ffi::OsStringExt; + use crate::ffi::os_str::os_str_ext_windows::OsStringExt; let surrogate = OsString::from_wide(&[0xD800]); let mut pre_crab = surrogate.clone(); @@ -288,18 +249,3 @@ fn slice_surrogate_edge() { assert_eq!(post_crab.slice_encoded_bytes(..4), "🦀"); assert_eq!(post_crab.slice_encoded_bytes(4..), surrogate); } - -#[test] -fn clone_to_uninit() { - let a = OsStr::new("hello.txt"); - - let mut storage = vec![MaybeUninit::::uninit(); size_of_val::(a)]; - unsafe { a.clone_to_uninit(ptr::from_mut::<[_]>(storage.as_mut_slice()) as *mut OsStr) }; - assert_eq!(a.as_encoded_bytes(), unsafe { MaybeUninit::slice_assume_init_ref(&storage) }); - - let mut b: Box = OsStr::new("world.exe").into(); - assert_eq!(size_of_val::(a), size_of_val::(&b)); - assert_ne!(a, &*b); - unsafe { a.clone_to_uninit(ptr::from_mut::(&mut b)) }; - assert_eq!(a, &*b); -} diff --git a/library/alloc/src/ffi/os_str/wtf8.rs b/library/alloc/src/ffi/os_str/wtf8.rs new file mode 100644 index 0000000000000..004ba6ca9094f --- /dev/null +++ b/library/alloc/src/ffi/os_str/wtf8.rs @@ -0,0 +1,416 @@ +#![allow(missing_docs)] +#![allow(missing_debug_implementations)] + +//! The underlying OsString/OsStr implementation on Windows is a +//! wrapper around the "WTF-8" encoding; see the `wtf8` module for more. +use core::ffi::os_str::Slice; +use core::ffi::wtf8::Wtf8; +use core::{fmt, mem}; + +use crate::borrow::Cow; +use crate::boxed::Box; +use crate::collections::TryReserveError; +use crate::ffi::wtf8::Wtf8Buf; +use crate::rc::Rc; +use crate::string::String; +use crate::sync::Arc; +use crate::vec::Vec; + +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +#[derive(Clone, Hash)] +pub struct Buf { + pub inner: Wtf8Buf, +} + +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +impl Into for Buf { + fn into(self) -> Wtf8Buf { + self.inner + } +} + +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +impl From for Buf { + fn from(inner: Wtf8Buf) -> Self { + Buf { inner } + } +} + +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +impl AsRef for Buf { + #[inline] + fn as_ref(&self) -> &Wtf8 { + &self.inner + } +} + +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +impl fmt::Debug for Buf { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(self.as_slice(), formatter) + } +} + +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +impl fmt::Display for Buf { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(self.as_slice(), formatter) + } +} + +impl Buf { + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn into_encoded_bytes(self) -> Vec { + self.inner.into_bytes() + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub unsafe fn from_encoded_bytes_unchecked(s: Vec) -> Self { + unsafe { Self { inner: Wtf8Buf::from_bytes_unchecked(s) } } + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn with_capacity(capacity: usize) -> Buf { + Buf { inner: Wtf8Buf::with_capacity(capacity) } + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn clear(&mut self) { + self.inner.clear() + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn capacity(&self) -> usize { + self.inner.capacity() + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn from_string(s: String) -> Buf { + Buf { inner: Wtf8Buf::from_string(s) } + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn as_slice(&self) -> &Slice { + // SAFETY: Slice is just a wrapper for Wtf8, + // and self.inner.as_slice() returns &Wtf8. + // Therefore, transmuting &Wtf8 to &Slice is safe. + unsafe { mem::transmute(self.inner.as_slice()) } + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn as_mut_slice(&mut self) -> &mut Slice { + // SAFETY: Slice is just a wrapper for Wtf8, + // and self.inner.as_mut_slice() returns &mut Wtf8. + // Therefore, transmuting &mut Wtf8 to &mut Slice is safe. + // Additionally, care should be taken to ensure the slice + // is always valid Wtf8. + unsafe { mem::transmute(self.inner.as_mut_slice()) } + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn into_string(self) -> Result { + self.inner.into_string().map_err(|buf| Buf { inner: buf }) + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn push_slice(&mut self, s: &Slice) { + self.inner.push_wtf8(&s.inner) + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn reserve(&mut self, additional: usize) { + self.inner.reserve(additional) + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.inner.try_reserve(additional) + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn reserve_exact(&mut self, additional: usize) { + self.inner.reserve_exact(additional) + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.inner.try_reserve_exact(additional) + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn shrink_to_fit(&mut self) { + self.inner.shrink_to_fit() + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn shrink_to(&mut self, min_capacity: usize) { + self.inner.shrink_to(min_capacity) + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn leak<'a>(self) -> &'a mut Slice { + unsafe { mem::transmute(self.inner.leak()) } + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn into_box(self) -> Box { + unsafe { mem::transmute(self.inner.into_box()) } + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn from_box(boxed: Box) -> Buf { + let inner: Box = unsafe { mem::transmute(boxed) }; + Buf { inner: Wtf8Buf::from_box(inner) } + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn into_arc(&self) -> Arc { + self.as_slice().into_arc() + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn into_rc(&self) -> Rc { + self.as_slice().into_rc() + } + + /// Provides plumbing to core `Vec::truncate`. + /// More well behaving alternative to allowing outer types + /// full mutable access to the core `Vec`. + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub(crate) fn truncate(&mut self, len: usize) { + self.inner.truncate(len); + } + + /// Provides plumbing to core `Vec::extend_from_slice`. + /// More well behaving alternative to allowing outer types + /// full mutable access to the core `Vec`. + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub(crate) fn extend_from_slice(&mut self, other: &[u8]) { + self.inner.extend_from_slice(other); + } +} + +impl Slice { + #[rustc_allow_incoherent_impl] + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn to_string_lossy(&self) -> Cow<'_, str> { + self.inner.to_string_lossy() + } + + #[rustc_allow_incoherent_impl] + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn to_owned(&self) -> Buf { + Buf { inner: self.inner.to_owned() } + } + + #[rustc_allow_incoherent_impl] + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn clone_into(&self, buf: &mut Buf) { + self.inner.clone_into(&mut buf.inner) + } + + #[rustc_allow_incoherent_impl] + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn into_box(&self) -> Box { + unsafe { mem::transmute(self.inner.into_box()) } + } + + #[rustc_allow_incoherent_impl] + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn empty_box() -> Box { + unsafe { mem::transmute(Wtf8::empty_box()) } + } + + #[rustc_allow_incoherent_impl] + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn into_arc(&self) -> Arc { + let arc = self.inner.into_arc(); + unsafe { Arc::from_raw(Arc::into_raw(arc) as *const Slice) } + } + + #[rustc_allow_incoherent_impl] + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn into_rc(&self) -> Rc { + let rc = self.inner.into_rc(); + unsafe { Rc::from_raw(Rc::into_raw(rc) as *const Slice) } + } + + #[rustc_allow_incoherent_impl] + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn to_ascii_lowercase(&self) -> Buf { + Buf { inner: self.inner.to_ascii_lowercase() } + } + + #[rustc_allow_incoherent_impl] + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn to_ascii_uppercase(&self) -> Buf { + Buf { inner: self.inner.to_ascii_uppercase() } + } +} diff --git a/library/alloc/src/ffi/wtf8.rs b/library/alloc/src/ffi/wtf8.rs new file mode 100644 index 0000000000000..c9d9f0dc0c0fb --- /dev/null +++ b/library/alloc/src/ffi/wtf8.rs @@ -0,0 +1,703 @@ +#![allow(missing_docs)] +#![allow(missing_debug_implementations)] + +//! Implementation of [the WTF-8 encoding](https://simonsapin.github.io/wtf-8/). +//! +//! This library uses Rust’s type system to maintain +//! [well-formedness](https://simonsapin.github.io/wtf-8/#well-formed), +//! like the `String` and `&str` types do for UTF-8. +//! +//! Since [WTF-8 must not be used +//! for interchange](https://simonsapin.github.io/wtf-8/#intended-audience), +//! this library deliberately does not provide access to the underlying bytes +//! of WTF-8 strings, +//! nor can it decode WTF-8 from arbitrary bytes. +//! WTF-8 strings can be obtained from UTF-8, UTF-16, or code points. + +// this module is imported from @SimonSapin's repo and has tons of dead code on +// unix (it's mostly used on windows), so don't worry about dead code here. +#![allow(dead_code)] + +#[cfg(test)] +mod tests; + +use core::char::encode_utf8_raw; +use core::ffi::wtf8::*; +use core::hash::{Hash, Hasher}; +use core::{fmt, mem, ops, str}; + +use crate::borrow::{Cow, ToOwned}; +use crate::boxed::Box; +use crate::collections::TryReserveError; +use crate::rc::Rc; +use crate::string::String; +use crate::sync::Arc; +use crate::vec::Vec; + +/// An owned, growable string of well-formed WTF-8 data. +/// +/// Similar to `String`, but can additionally contain surrogate code points +/// if they’re not in a surrogate pair. +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +#[derive(Eq, PartialEq, Ord, PartialOrd, Clone)] +pub struct Wtf8Buf { + bytes: Vec, + + /// Do we know that `bytes` holds a valid UTF-8 encoding? We can easily + /// know this if we're constructed from a `String` or `&str`. + /// + /// It is possible for `bytes` to have valid UTF-8 without this being + /// set, such as when we're concatenating `&Wtf8`'s and surrogates become + /// paired, as we don't bother to rescan the entire string. + is_known_utf8: bool, +} + +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +impl ops::Deref for Wtf8Buf { + type Target = Wtf8; + + fn deref(&self) -> &Wtf8 { + self.as_slice() + } +} + +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +impl ops::DerefMut for Wtf8Buf { + fn deref_mut(&mut self) -> &mut Wtf8 { + self.as_mut_slice() + } +} + +/// Format the string with double quotes, +/// and surrogates as `\u` followed by four hexadecimal digits. +/// Example: `"a\u{D800}"` for a string with code points [U+0061, U+D800] +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +impl fmt::Debug for Wtf8Buf { + #[inline] + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&**self, formatter) + } +} + +impl Wtf8Buf { + /// Creates a new, empty WTF-8 string. + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn new() -> Wtf8Buf { + Wtf8Buf { bytes: Vec::new(), is_known_utf8: true } + } + + /// Creates a new, empty WTF-8 string with pre-allocated capacity for `capacity` bytes. + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn with_capacity(capacity: usize) -> Wtf8Buf { + Wtf8Buf { bytes: Vec::with_capacity(capacity), is_known_utf8: true } + } + + /// Creates a WTF-8 string from a WTF-8 byte vec. + /// + /// Since the byte vec is not checked for valid WTF-8, this functions is + /// marked unsafe. + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub unsafe fn from_bytes_unchecked(value: Vec) -> Wtf8Buf { + Wtf8Buf { bytes: value, is_known_utf8: false } + } + + /// Creates a WTF-8 string from a UTF-8 `String`. + /// + /// This takes ownership of the `String` and does not copy. + /// + /// Since WTF-8 is a superset of UTF-8, this always succeeds. + #[inline] + pub fn from_string(string: String) -> Wtf8Buf { + Wtf8Buf { bytes: string.into_bytes(), is_known_utf8: true } + } + + /// Creates a WTF-8 string from a UTF-8 `&str` slice. + /// + /// This copies the content of the slice. + /// + /// Since WTF-8 is a superset of UTF-8, this always succeeds. + #[inline] + pub fn from_str(str: &str) -> Wtf8Buf { + Wtf8Buf { bytes: <[_]>::to_vec(str.as_bytes()), is_known_utf8: true } + } + + pub fn clear(&mut self) { + self.bytes.clear(); + self.is_known_utf8 = true; + } + + /// Creates a WTF-8 string from a potentially ill-formed UTF-16 slice of 16-bit code units. + /// + /// This is lossless: calling `.encode_wide()` on the resulting string + /// will always return the original code units. + pub fn from_wide(v: &[u16]) -> Wtf8Buf { + let mut string = Wtf8Buf::with_capacity(v.len()); + for item in char::decode_utf16(v.iter().cloned()) { + match item { + Ok(ch) => string.push_char(ch), + Err(surrogate) => { + let surrogate = surrogate.unpaired_surrogate(); + // Surrogates are known to be in the code point range. + let code_point = unsafe { CodePoint::from_u32_unchecked(surrogate as u32) }; + // The string will now contain an unpaired surrogate. + string.is_known_utf8 = false; + // Skip the WTF-8 concatenation check, + // surrogate pairs are already decoded by decode_utf16 + string.push_code_point_unchecked(code_point); + } + } + } + string + } + + /// Copied from String::push + /// This does **not** include the WTF-8 concatenation check or `is_known_utf8` check. + fn push_code_point_unchecked(&mut self, code_point: CodePoint) { + let mut bytes = [0; 4]; + let bytes = encode_utf8_raw(code_point.to_u32(), &mut bytes); + self.bytes.extend_from_slice(bytes) + } + + #[inline] + pub fn as_slice(&self) -> &Wtf8 { + unsafe { Wtf8::from_bytes_unchecked(&self.bytes) } + } + + #[inline] + pub fn as_mut_slice(&mut self) -> &mut Wtf8 { + // Safety: `Wtf8` doesn't expose any way to mutate the bytes that would + // cause them to change from well-formed UTF-8 to ill-formed UTF-8, + // which would break the assumptions of the `is_known_utf8` field. + unsafe { Wtf8::from_mut_bytes_unchecked(&mut self.bytes) } + } + + /// Reserves capacity for at least `additional` more bytes to be inserted + /// in the given `Wtf8Buf`. + /// The collection may reserve more space to avoid frequent reallocations. + /// + /// # Panics + /// + /// Panics if the new capacity overflows `usize`. + #[inline] + pub fn reserve(&mut self, additional: usize) { + self.bytes.reserve(additional) + } + + /// Tries to reserve capacity for at least `additional` more length units + /// in the given `Wtf8Buf`. The `Wtf8Buf` may reserve more space to avoid + /// frequent reallocations. After calling `try_reserve`, capacity will be + /// greater than or equal to `self.len() + additional`. Does nothing if + /// capacity is already sufficient. This method preserves the contents even + /// if an error occurs. + /// + /// # Errors + /// + /// If the capacity overflows, or the allocator reports a failure, then an error + /// is returned. + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.bytes.try_reserve(additional) + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn reserve_exact(&mut self, additional: usize) { + self.bytes.reserve_exact(additional) + } + + /// Tries to reserve the minimum capacity for exactly `additional` + /// length units in the given `Wtf8Buf`. After calling + /// `try_reserve_exact`, capacity will be greater than or equal to + /// `self.len() + additional` if it returns `Ok(())`. + /// Does nothing if the capacity is already sufficient. + /// + /// Note that the allocator may give the `Wtf8Buf` more space than it + /// requests. Therefore, capacity can not be relied upon to be precisely + /// minimal. Prefer [`try_reserve`] if future insertions are expected. + /// + /// [`try_reserve`]: Wtf8Buf::try_reserve + /// + /// # Errors + /// + /// If the capacity overflows, or the allocator reports a failure, then an error + /// is returned. + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.bytes.try_reserve_exact(additional) + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn shrink_to_fit(&mut self) { + self.bytes.shrink_to_fit() + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn shrink_to(&mut self, min_capacity: usize) { + self.bytes.shrink_to(min_capacity) + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn leak<'a>(self) -> &'a mut Wtf8 { + unsafe { Wtf8::from_mut_bytes_unchecked(self.bytes.leak()) } + } + + /// Returns the number of bytes that this string buffer can hold without reallocating. + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn capacity(&self) -> usize { + self.bytes.capacity() + } + + /// Append a UTF-8 slice at the end of the string. + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn push_str(&mut self, other: &str) { + self.bytes.extend_from_slice(other.as_bytes()) + } + + /// Append a WTF-8 slice at the end of the string. + /// + /// This replaces newly paired surrogates at the boundary + /// with a supplementary code point, + /// like concatenating ill-formed UTF-16 strings effectively would. + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn push_wtf8(&mut self, other: &Wtf8) { + match ((&*self).final_lead_surrogate(), other.initial_trail_surrogate()) { + // Replace newly paired surrogates by a supplementary code point. + (Some(lead), Some(trail)) => { + let len_without_lead_surrogate = self.len() - 3; + self.bytes.truncate(len_without_lead_surrogate); + let other_without_trail_surrogate = &other.as_bytes()[3..]; + // 4 bytes for the supplementary code point + self.bytes.reserve(4 + other_without_trail_surrogate.len()); + self.push_char(decode_surrogate_pair(lead, trail)); + self.bytes.extend_from_slice(other_without_trail_surrogate); + } + _ => { + // If we'll be pushing a string containing a surrogate, we may + // no longer have UTF-8. + if other.next_surrogate(0).is_some() { + self.is_known_utf8 = false; + } + + self.bytes.extend_from_slice(other.as_bytes()); + } + } + } + + /// Append a Unicode scalar value at the end of the string. + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn push_char(&mut self, c: char) { + self.push_code_point_unchecked(CodePoint::from_char(c)) + } + + /// Append a code point at the end of the string. + /// + /// This replaces newly paired surrogates at the boundary + /// with a supplementary code point, + /// like concatenating ill-formed UTF-16 strings effectively would. + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn push(&mut self, code_point: CodePoint) { + if let Some(trail) = code_point.to_trail_surrogate() { + if let Some(lead) = (&*self).final_lead_surrogate() { + let len_without_lead_surrogate = self.len() - 3; + self.bytes.truncate(len_without_lead_surrogate); + self.push_char(decode_surrogate_pair(lead, trail)); + return; + } + + // We're pushing a trailing surrogate. + self.is_known_utf8 = false; + } else if code_point.to_lead_surrogate().is_some() { + // We're pushing a leading surrogate. + self.is_known_utf8 = false; + } + + // No newly paired surrogates at the boundary. + self.push_code_point_unchecked(code_point) + } + + /// Shortens a string to the specified length. + /// + /// # Panics + /// + /// Panics if `new_len` > current length, + /// or if `new_len` is not a code point boundary. + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn truncate(&mut self, new_len: usize) { + assert!(is_code_point_boundary(self, new_len)); + self.bytes.truncate(new_len) + } + + /// Consumes the WTF-8 string and tries to convert it to a vec of bytes. + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn into_bytes(self) -> Vec { + self.bytes + } + + /// Consumes the WTF-8 string and tries to convert it to UTF-8. + /// + /// This does not copy the data. + /// + /// If the contents are not well-formed UTF-8 + /// (that is, if the string contains surrogates), + /// the original WTF-8 string is returned instead. + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn into_string(self) -> Result { + if self.is_known_utf8 || self.next_surrogate(0).is_none() { + Ok(unsafe { String::from_utf8_unchecked(self.bytes) }) + } else { + Err(self) + } + } + + /// Consumes the WTF-8 string and converts it lossily to UTF-8. + /// + /// This does not copy the data (but may overwrite parts of it in place). + /// + /// Surrogates are replaced with `"\u{FFFD}"` (the replacement character “�”) + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn into_string_lossy(mut self) -> String { + // Fast path: If we already have UTF-8, we can return it immediately. + if self.is_known_utf8 { + return unsafe { String::from_utf8_unchecked(self.bytes) }; + } + + let mut pos = 0; + loop { + match self.next_surrogate(pos) { + Some((surrogate_pos, _)) => { + pos = surrogate_pos + 3; + self.bytes[surrogate_pos..pos] + .copy_from_slice(UTF8_REPLACEMENT_CHARACTER.as_bytes()); + } + None => return unsafe { String::from_utf8_unchecked(self.bytes) }, + } + } + } + + /// Converts this `Wtf8Buf` into a boxed `Wtf8`. + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn into_box(self) -> Box { + // SAFETY: relies on `Wtf8` being `repr(transparent)`. + unsafe { mem::transmute(self.bytes.into_boxed_slice()) } + } + + /// Converts a `Box` into a `Wtf8Buf`. + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn from_box(boxed: Box) -> Wtf8Buf { + let bytes: Box<[u8]> = unsafe { mem::transmute(boxed) }; + Wtf8Buf { bytes: bytes.into_vec(), is_known_utf8: false } + } + + /// Provides plumbing to core `Vec::extend_from_slice`. + /// More well behaving alternative to allowing outer types + /// full mutable access to the core `Vec`. + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub(crate) fn extend_from_slice(&mut self, other: &[u8]) { + self.bytes.extend_from_slice(other); + self.is_known_utf8 = false; + } +} + +/// Creates a new WTF-8 string from an iterator of code points. +/// +/// This replaces surrogate code point pairs with supplementary code points, +/// like concatenating ill-formed UTF-16 strings effectively would. +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +impl FromIterator for Wtf8Buf { + fn from_iter>(iter: T) -> Wtf8Buf { + let mut string = Wtf8Buf::new(); + string.extend(iter); + string + } +} + +/// Append code points from an iterator to the string. +/// +/// This replaces surrogate code point pairs with supplementary code points, +/// like concatenating ill-formed UTF-16 strings effectively would. +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +impl Extend for Wtf8Buf { + fn extend>(&mut self, iter: T) { + let iterator = iter.into_iter(); + let (low, _high) = iterator.size_hint(); + // Lower bound of one byte per code point (ASCII only) + self.bytes.reserve(low); + iterator.for_each(move |code_point| self.push(code_point)); + } + + #[inline] + fn extend_one(&mut self, code_point: CodePoint) { + self.push(code_point); + } + + #[inline] + fn extend_reserve(&mut self, additional: usize) { + // Lower bound of one byte per code point (ASCII only) + self.bytes.reserve(additional); + } +} + +impl Wtf8 { + /// Creates an owned `Wtf8Buf` from a borrowed `Wtf8`. + #[rustc_allow_incoherent_impl] + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn to_owned(&self) -> Wtf8Buf { + Wtf8Buf { bytes: self.as_bytes().to_vec(), is_known_utf8: false } + } + + /// Lossily converts the string to UTF-8. + /// Returns a UTF-8 `&str` slice if the contents are well-formed in UTF-8. + /// + /// Surrogates are replaced with `"\u{FFFD}"` (the replacement character “�”). + /// + /// This only copies the data if necessary (if it contains any surrogate). + #[rustc_allow_incoherent_impl] + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn to_string_lossy(&self) -> Cow<'_, str> { + let surrogate_pos = match self.next_surrogate(0) { + None => return Cow::Borrowed(unsafe { str::from_utf8_unchecked(self.as_bytes()) }), + Some((pos, _)) => pos, + }; + let wtf8_bytes = self.as_bytes(); + let mut utf8_bytes = Vec::with_capacity(self.len()); + utf8_bytes.extend_from_slice(&wtf8_bytes[..surrogate_pos]); + utf8_bytes.extend_from_slice(UTF8_REPLACEMENT_CHARACTER.as_bytes()); + let mut pos = surrogate_pos + 3; + loop { + match self.next_surrogate(pos) { + Some((surrogate_pos, _)) => { + utf8_bytes.extend_from_slice(&wtf8_bytes[pos..surrogate_pos]); + utf8_bytes.extend_from_slice(UTF8_REPLACEMENT_CHARACTER.as_bytes()); + pos = surrogate_pos + 3; + } + None => { + utf8_bytes.extend_from_slice(&wtf8_bytes[pos..]); + return Cow::Owned(unsafe { String::from_utf8_unchecked(utf8_bytes) }); + } + } + } + } + + #[rustc_allow_incoherent_impl] + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn clone_into(&self, buf: &mut Wtf8Buf) { + buf.is_known_utf8 = false; + self.as_bytes().clone_into(&mut buf.bytes); + } + + /// Boxes this `Wtf8`. + #[rustc_allow_incoherent_impl] + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn into_box(&self) -> Box { + let boxed: Box<[u8]> = self.as_bytes().into(); + unsafe { mem::transmute(boxed) } + } + + /// Creates a boxed, empty `Wtf8`. + #[rustc_allow_incoherent_impl] + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn empty_box() -> Box { + let boxed: Box<[u8]> = Default::default(); + unsafe { mem::transmute(boxed) } + } + + #[rustc_allow_incoherent_impl] + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn into_arc(&self) -> Arc { + let arc: Arc<[u8]> = Arc::from(self.as_bytes()); + unsafe { Arc::from_raw(Arc::into_raw(arc) as *const Wtf8) } + } + + #[rustc_allow_incoherent_impl] + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn into_rc(&self) -> Rc { + let rc: Rc<[u8]> = Rc::from(self.as_bytes()); + unsafe { Rc::from_raw(Rc::into_raw(rc) as *const Wtf8) } + } + + #[rustc_allow_incoherent_impl] + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn to_ascii_lowercase(&self) -> Wtf8Buf { + Wtf8Buf { bytes: self.as_bytes().to_ascii_lowercase(), is_known_utf8: false } + } + + #[rustc_allow_incoherent_impl] + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn to_ascii_uppercase(&self) -> Wtf8Buf { + Wtf8Buf { bytes: self.as_bytes().to_ascii_uppercase(), is_known_utf8: false } + } +} + +impl Hash for Wtf8Buf { + #[inline] + fn hash(&self, state: &mut H) { + state.write(&self.bytes); + 0xfeu8.hash(state) + } +} diff --git a/library/alloc/src/ffi/wtf8/tests.rs b/library/alloc/src/ffi/wtf8/tests.rs new file mode 100644 index 0000000000000..364a555c0849d --- /dev/null +++ b/library/alloc/src/ffi/wtf8/tests.rs @@ -0,0 +1,618 @@ +use alloc::vec::Vec; + +use super::*; + +#[test] +fn wtf8buf_new() { + assert_eq!(Wtf8Buf::new().bytes, b""); +} + +#[test] +fn wtf8buf_from_str() { + assert_eq!(Wtf8Buf::from_str("").bytes, b""); + assert_eq!(Wtf8Buf::from_str("aé 💩").bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); +} + +#[test] +fn wtf8buf_from_string() { + assert_eq!(Wtf8Buf::from_string(String::from("")).bytes, b""); + assert_eq!(Wtf8Buf::from_string(String::from("aé 💩")).bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); +} + +#[test] +fn wtf8buf_from_wide() { + let buf = Wtf8Buf::from_wide(&[]); + assert_eq!(buf.bytes, b""); + assert!(buf.is_known_utf8); + + let buf = Wtf8Buf::from_wide(&[0x61, 0xE9, 0x20, 0xD83D, 0xDCA9]); + assert_eq!(buf.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + assert!(buf.is_known_utf8); + + let buf = Wtf8Buf::from_wide(&[0x61, 0xE9, 0x20, 0xD83D, 0xD83D, 0xDCA9]); + assert_eq!(buf.bytes, b"a\xC3\xA9 \xED\xA0\xBD\xF0\x9F\x92\xA9"); + assert!(!buf.is_known_utf8); + + let buf = Wtf8Buf::from_wide(&[0xD800]); + assert_eq!(buf.bytes, b"\xED\xA0\x80"); + assert!(!buf.is_known_utf8); + + let buf = Wtf8Buf::from_wide(&[0xDBFF]); + assert_eq!(buf.bytes, b"\xED\xAF\xBF"); + assert!(!buf.is_known_utf8); + + let buf = Wtf8Buf::from_wide(&[0xDC00]); + assert_eq!(buf.bytes, b"\xED\xB0\x80"); + assert!(!buf.is_known_utf8); + + let buf = Wtf8Buf::from_wide(&[0xDFFF]); + assert_eq!(buf.bytes, b"\xED\xBF\xBF"); + assert!(!buf.is_known_utf8); +} + +#[test] +fn wtf8buf_push_str() { + let mut string = Wtf8Buf::new(); + assert_eq!(string.bytes, b""); + assert!(string.is_known_utf8); + + string.push_str("aé 💩"); + assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + assert!(string.is_known_utf8); +} + +#[test] +fn wtf8buf_push_char() { + let mut string = Wtf8Buf::from_str("aé "); + assert_eq!(string.bytes, b"a\xC3\xA9 "); + assert!(string.is_known_utf8); + + string.push_char('💩'); + assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + assert!(string.is_known_utf8); +} + +#[test] +fn wtf8buf_push() { + let mut string = Wtf8Buf::from_str("aé "); + assert_eq!(string.bytes, b"a\xC3\xA9 "); + assert!(string.is_known_utf8); + + string.push(CodePoint::from_char('💩')); + assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + assert!(string.is_known_utf8); + + fn c(value: u32) -> CodePoint { + CodePoint::from_u32(value).unwrap() + } + + let mut string = Wtf8Buf::new(); + string.push(c(0xD83D)); // lead + assert!(!string.is_known_utf8); + string.push(c(0xDCA9)); // trail + assert_eq!(string.bytes, b"\xF0\x9F\x92\xA9"); // Magic! + + let mut string = Wtf8Buf::new(); + string.push(c(0xD83D)); // lead + assert!(!string.is_known_utf8); + string.push(c(0x20)); // not surrogate + string.push(c(0xDCA9)); // trail + assert_eq!(string.bytes, b"\xED\xA0\xBD \xED\xB2\xA9"); + + let mut string = Wtf8Buf::new(); + string.push(c(0xD800)); // lead + assert!(!string.is_known_utf8); + string.push(c(0xDBFF)); // lead + assert_eq!(string.bytes, b"\xED\xA0\x80\xED\xAF\xBF"); + + let mut string = Wtf8Buf::new(); + string.push(c(0xD800)); // lead + assert!(!string.is_known_utf8); + string.push(c(0xE000)); // not surrogate + assert_eq!(string.bytes, b"\xED\xA0\x80\xEE\x80\x80"); + + let mut string = Wtf8Buf::new(); + string.push(c(0xD7FF)); // not surrogate + assert!(string.is_known_utf8); + string.push(c(0xDC00)); // trail + assert!(!string.is_known_utf8); + assert_eq!(string.bytes, b"\xED\x9F\xBF\xED\xB0\x80"); + + let mut string = Wtf8Buf::new(); + string.push(c(0x61)); // not surrogate, < 3 bytes + assert!(string.is_known_utf8); + string.push(c(0xDC00)); // trail + assert!(!string.is_known_utf8); + assert_eq!(string.bytes, b"\x61\xED\xB0\x80"); + + let mut string = Wtf8Buf::new(); + string.push(c(0xDC00)); // trail + assert!(!string.is_known_utf8); + assert_eq!(string.bytes, b"\xED\xB0\x80"); +} + +#[test] +fn wtf8buf_push_wtf8() { + let mut string = Wtf8Buf::from_str("aé"); + assert_eq!(string.bytes, b"a\xC3\xA9"); + string.push_wtf8(Wtf8::from_str(" 💩")); + assert_eq!(string.bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); + assert!(string.is_known_utf8); + + fn w(v: &[u8]) -> &Wtf8 { + unsafe { Wtf8::from_bytes_unchecked(v) } + } + + let mut string = Wtf8Buf::new(); + string.push_wtf8(w(b"\xED\xA0\xBD")); // lead + string.push_wtf8(w(b"\xED\xB2\xA9")); // trail + assert_eq!(string.bytes, b"\xF0\x9F\x92\xA9"); // Magic! + + let mut string = Wtf8Buf::new(); + string.push_wtf8(w(b"\xED\xA0\xBD")); // lead + string.push_wtf8(w(b" ")); // not surrogate + string.push_wtf8(w(b"\xED\xB2\xA9")); // trail + assert_eq!(string.bytes, b"\xED\xA0\xBD \xED\xB2\xA9"); + assert!(!string.is_known_utf8); + + let mut string = Wtf8Buf::new(); + string.push_wtf8(w(b"\xED\xA0\x80")); // lead + string.push_wtf8(w(b"\xED\xAF\xBF")); // lead + assert_eq!(string.bytes, b"\xED\xA0\x80\xED\xAF\xBF"); + assert!(!string.is_known_utf8); + + let mut string = Wtf8Buf::new(); + string.push_wtf8(w(b"\xED\xA0\x80")); // lead + string.push_wtf8(w(b"\xEE\x80\x80")); // not surrogate + assert_eq!(string.bytes, b"\xED\xA0\x80\xEE\x80\x80"); + assert!(!string.is_known_utf8); + + let mut string = Wtf8Buf::new(); + string.push_wtf8(w(b"\xED\x9F\xBF")); // not surrogate + string.push_wtf8(w(b"\xED\xB0\x80")); // trail + assert_eq!(string.bytes, b"\xED\x9F\xBF\xED\xB0\x80"); + assert!(!string.is_known_utf8); + + let mut string = Wtf8Buf::new(); + string.push_wtf8(w(b"a")); // not surrogate, < 3 bytes + string.push_wtf8(w(b"\xED\xB0\x80")); // trail + assert_eq!(string.bytes, b"\x61\xED\xB0\x80"); + assert!(!string.is_known_utf8); + + let mut string = Wtf8Buf::new(); + string.push_wtf8(w(b"\xED\xB0\x80")); // trail + assert_eq!(string.bytes, b"\xED\xB0\x80"); + assert!(!string.is_known_utf8); +} + +#[test] +fn wtf8buf_truncate() { + let mut string = Wtf8Buf::from_str("aé"); + assert!(string.is_known_utf8); + + string.truncate(3); + assert_eq!(string.bytes, b"a\xC3\xA9"); + assert!(string.is_known_utf8); + + string.truncate(1); + assert_eq!(string.bytes, b"a"); + assert!(string.is_known_utf8); + + string.truncate(0); + assert_eq!(string.bytes, b""); + assert!(string.is_known_utf8); +} + +#[test] +fn wtf8buf_truncate_around_non_bmp() { + let mut string = Wtf8Buf::from_str("💩"); + assert!(string.is_known_utf8); + + string.truncate(4); + assert_eq!(string.bytes, b"\xF0\x9F\x92\xA9"); + assert!(string.is_known_utf8); + + string.truncate(0); + assert_eq!(string.bytes, b""); + assert!(string.is_known_utf8); +} + +#[test] +#[should_panic] +fn wtf8buf_truncate_fail_code_point_boundary() { + let mut string = Wtf8Buf::from_str("aé"); + string.truncate(2); +} + +#[test] +#[should_panic] +fn wtf8buf_truncate_fail_longer() { + let mut string = Wtf8Buf::from_str("aé"); + string.truncate(4); +} + +#[test] +#[should_panic] +fn wtf8buf_truncate_splitting_non_bmp3() { + let mut string = Wtf8Buf::from_str("💩"); + assert!(string.is_known_utf8); + string.truncate(3); +} + +#[test] +#[should_panic] +fn wtf8buf_truncate_splitting_non_bmp2() { + let mut string = Wtf8Buf::from_str("💩"); + assert!(string.is_known_utf8); + string.truncate(2); +} + +#[test] +#[should_panic] +fn wtf8buf_truncate_splitting_non_bmp1() { + let mut string = Wtf8Buf::from_str("💩"); + assert!(string.is_known_utf8); + string.truncate(1); +} + +#[test] +fn wtf8buf_into_string() { + let mut string = Wtf8Buf::from_str("aé 💩"); + assert!(string.is_known_utf8); + assert_eq!(string.clone().into_string(), Ok(String::from("aé 💩"))); + string.push(CodePoint::from_u32(0xD800).unwrap()); + assert!(!string.is_known_utf8); + assert_eq!(string.clone().into_string(), Err(string)); +} + +#[test] +fn wtf8buf_into_string_lossy() { + let mut string = Wtf8Buf::from_str("aé 💩"); + assert_eq!(string.clone().into_string_lossy(), String::from("aé 💩")); + string.push(CodePoint::from_u32(0xD800).unwrap()); + assert_eq!(string.clone().into_string_lossy(), String::from("aé 💩�")); +} + +#[test] +fn wtf8buf_from_iterator() { + fn f(values: &[u32]) -> Wtf8Buf { + values.iter().map(|&c| CodePoint::from_u32(c).unwrap()).collect::() + } + assert_eq!( + f(&[0x61, 0xE9, 0x20, 0x1F4A9]), + Wtf8Buf { bytes: b"a\xC3\xA9 \xF0\x9F\x92\xA9".to_vec(), is_known_utf8: true } + ); + + assert_eq!(f(&[0xD83D, 0xDCA9]).bytes, b"\xF0\x9F\x92\xA9"); // Magic! + assert_eq!( + f(&[0xD83D, 0x20, 0xDCA9]), + Wtf8Buf { bytes: b"\xED\xA0\xBD \xED\xB2\xA9".to_vec(), is_known_utf8: false } + ); + assert_eq!( + f(&[0xD800, 0xDBFF]), + Wtf8Buf { bytes: b"\xED\xA0\x80\xED\xAF\xBF".to_vec(), is_known_utf8: false } + ); + assert_eq!( + f(&[0xD800, 0xE000]), + Wtf8Buf { bytes: b"\xED\xA0\x80\xEE\x80\x80".to_vec(), is_known_utf8: false } + ); + assert_eq!( + f(&[0xD7FF, 0xDC00]), + Wtf8Buf { bytes: b"\xED\x9F\xBF\xED\xB0\x80".to_vec(), is_known_utf8: false } + ); + assert_eq!( + f(&[0x61, 0xDC00]), + Wtf8Buf { bytes: b"\x61\xED\xB0\x80".to_vec(), is_known_utf8: false } + ); + assert_eq!(f(&[0xDC00]), Wtf8Buf { bytes: b"\xED\xB0\x80".to_vec(), is_known_utf8: false }); +} + +#[test] +fn wtf8buf_extend() { + fn e(initial: &[u32], extended: &[u32]) -> Wtf8Buf { + fn c(value: &u32) -> CodePoint { + CodePoint::from_u32(*value).unwrap() + } + let mut string = initial.iter().map(c).collect::(); + string.extend(extended.iter().map(c)); + string + } + + assert_eq!( + e(&[0x61, 0xE9], &[0x20, 0x1F4A9]), + Wtf8Buf { bytes: b"a\xC3\xA9 \xF0\x9F\x92\xA9".to_vec(), is_known_utf8: true } + ); + + assert_eq!(e(&[0xD83D], &[0xDCA9]).bytes, b"\xF0\x9F\x92\xA9"); // Magic! + assert_eq!( + e(&[0xD83D, 0x20], &[0xDCA9]), + Wtf8Buf { bytes: b"\xED\xA0\xBD \xED\xB2\xA9".to_vec(), is_known_utf8: false } + ); + assert_eq!( + e(&[0xD800], &[0xDBFF]), + Wtf8Buf { bytes: b"\xED\xA0\x80\xED\xAF\xBF".to_vec(), is_known_utf8: false } + ); + assert_eq!( + e(&[0xD800], &[0xE000]), + Wtf8Buf { bytes: b"\xED\xA0\x80\xEE\x80\x80".to_vec(), is_known_utf8: false } + ); + assert_eq!( + e(&[0xD7FF], &[0xDC00]), + Wtf8Buf { bytes: b"\xED\x9F\xBF\xED\xB0\x80".to_vec(), is_known_utf8: false } + ); + assert_eq!( + e(&[0x61], &[0xDC00]), + Wtf8Buf { bytes: b"\x61\xED\xB0\x80".to_vec(), is_known_utf8: false } + ); + assert_eq!( + e(&[], &[0xDC00]), + Wtf8Buf { bytes: b"\xED\xB0\x80".to_vec(), is_known_utf8: false } + ); +} + +#[test] +fn wtf8buf_show() { + let mut string = Wtf8Buf::from_str("a\té \u{7f}💩\r"); + string.push(CodePoint::from_u32(0xD800).unwrap()); + assert_eq!(format!("{string:?}"), "\"a\\té \\u{7f}\u{1f4a9}\\r\\u{d800}\""); +} + +#[test] +fn wtf8buf_as_slice() { + assert_eq!(Wtf8Buf::from_str("aé").as_slice(), Wtf8::from_str("aé")); +} + +#[test] +fn wtf8buf_show_str() { + let text = "a\té 💩\r"; + let string = Wtf8Buf::from_str(text); + assert_eq!(format!("{text:?}"), format!("{string:?}")); +} + +#[test] +fn wtf8_code_points() { + fn c(value: u32) -> CodePoint { + CodePoint::from_u32(value).unwrap() + } + fn cp(string: &Wtf8Buf) -> Vec> { + string.code_points().map(|c| c.to_char()).collect::>() + } + let mut string = Wtf8Buf::from_str("é "); + assert_eq!(cp(&string), [Some('é'), Some(' ')]); + string.push(c(0xD83D)); + assert_eq!(cp(&string), [Some('é'), Some(' '), None]); + string.push(c(0xDCA9)); + assert_eq!(cp(&string), [Some('é'), Some(' '), Some('💩')]); +} + +#[test] +fn wtf8_as_str() { + assert_eq!(Wtf8::from_str("").as_str(), Ok("")); + assert_eq!(Wtf8::from_str("aé 💩").as_str(), Ok("aé 💩")); + let mut string = Wtf8Buf::new(); + string.push(CodePoint::from_u32(0xD800).unwrap()); + assert!(string.as_str().is_err()); +} + +#[test] +fn wtf8_to_string_lossy() { + assert_eq!(Wtf8::from_str("").to_string_lossy(), Cow::Borrowed("")); + assert_eq!(Wtf8::from_str("aé 💩").to_string_lossy(), Cow::Borrowed("aé 💩")); + let mut string = Wtf8Buf::from_str("aé 💩"); + string.push(CodePoint::from_u32(0xD800).unwrap()); + let expected: Cow<'_, str> = Cow::Owned(String::from("aé 💩�")); + assert_eq!(string.to_string_lossy(), expected); +} + +#[test] +fn wtf8_display() { + fn d(b: &[u8]) -> String { + (&unsafe { Wtf8::from_bytes_unchecked(b) }).to_string() + } + + assert_eq!("", d("".as_bytes())); + assert_eq!("aé 💩", d("aé 💩".as_bytes())); + + let mut string = Wtf8Buf::from_str("aé 💩"); + string.push(CodePoint::from_u32(0xD800).unwrap()); + assert_eq!("aé 💩�", d(string.as_inner())); +} + +#[test] +fn wtf8_encode_wide() { + let mut string = Wtf8Buf::from_str("aé "); + string.push(CodePoint::from_u32(0xD83D).unwrap()); + string.push_char('💩'); + assert_eq!( + string.encode_wide().collect::>(), + vec![0x61, 0xE9, 0x20, 0xD83D, 0xD83D, 0xDCA9] + ); +} + +#[test] +fn wtf8_encode_wide_size_hint() { + let string = Wtf8Buf::from_str("\u{12345}"); + let mut iter = string.encode_wide(); + assert_eq!((1, Some(8)), iter.size_hint()); + iter.next().unwrap(); + assert_eq!((1, Some(1)), iter.size_hint()); + iter.next().unwrap(); + assert_eq!((0, Some(0)), iter.size_hint()); + assert!(iter.next().is_none()); +} + +#[test] +fn wtf8_clone_into() { + let mut string = Wtf8Buf::new(); + Wtf8::from_str("green").clone_into(&mut string); + assert_eq!(string.bytes, b"green"); + + let mut string = Wtf8Buf::from_str("green"); + Wtf8::from_str("").clone_into(&mut string); + assert_eq!(string.bytes, b""); + + let mut string = Wtf8Buf::from_str("red"); + Wtf8::from_str("green").clone_into(&mut string); + assert_eq!(string.bytes, b"green"); + + let mut string = Wtf8Buf::from_str("green"); + Wtf8::from_str("red").clone_into(&mut string); + assert_eq!(string.bytes, b"red"); + + let mut string = Wtf8Buf::from_str("green"); + assert!(string.is_known_utf8); + unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").clone_into(&mut string) }; + assert_eq!(string.bytes, b"\xED\xA0\x80"); + assert!(!string.is_known_utf8); +} + +#[test] +fn wtf8_to_ascii_lowercase() { + let lowercase = Wtf8::from_str("").to_ascii_lowercase(); + assert_eq!(lowercase.bytes, b""); + + let lowercase = Wtf8::from_str("GrEeN gRaPeS! 🍇").to_ascii_lowercase(); + assert_eq!(lowercase.bytes, b"green grapes! \xf0\x9f\x8d\x87"); + + let lowercase = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_ascii_lowercase() }; + assert_eq!(lowercase.bytes, b"\xED\xA0\x80"); + assert!(!lowercase.is_known_utf8); +} + +#[test] +fn wtf8_to_ascii_uppercase() { + let uppercase = Wtf8::from_str("").to_ascii_uppercase(); + assert_eq!(uppercase.bytes, b""); + + let uppercase = Wtf8::from_str("GrEeN gRaPeS! 🍇").to_ascii_uppercase(); + assert_eq!(uppercase.bytes, b"GREEN GRAPES! \xf0\x9f\x8d\x87"); + + let uppercase = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_ascii_uppercase() }; + assert_eq!(uppercase.bytes, b"\xED\xA0\x80"); + assert!(!uppercase.is_known_utf8); +} + +#[test] +fn wtf8_make_ascii_lowercase() { + let mut lowercase = Wtf8Buf::from_str(""); + lowercase.make_ascii_lowercase(); + assert_eq!(lowercase.bytes, b""); + + let mut lowercase = Wtf8Buf::from_str("GrEeN gRaPeS! 🍇"); + lowercase.make_ascii_lowercase(); + assert_eq!(lowercase.bytes, b"green grapes! \xf0\x9f\x8d\x87"); + + let mut lowercase = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() }; + lowercase.make_ascii_lowercase(); + assert_eq!(lowercase.bytes, b"\xED\xA0\x80"); + assert!(!lowercase.is_known_utf8); +} + +#[test] +fn wtf8_make_ascii_uppercase() { + let mut uppercase = Wtf8Buf::from_str(""); + uppercase.make_ascii_uppercase(); + assert_eq!(uppercase.bytes, b""); + + let mut uppercase = Wtf8Buf::from_str("GrEeN gRaPeS! 🍇"); + uppercase.make_ascii_uppercase(); + assert_eq!(uppercase.bytes, b"GREEN GRAPES! \xf0\x9f\x8d\x87"); + + let mut uppercase = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() }; + uppercase.make_ascii_uppercase(); + assert_eq!(uppercase.bytes, b"\xED\xA0\x80"); + assert!(!uppercase.is_known_utf8); +} + +#[test] +fn wtf8_to_owned() { + let string = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() }; + assert_eq!(string.bytes, b"\xED\xA0\x80"); + assert!(!string.is_known_utf8); +} + +#[test] +fn wtf8_valid_utf8_boundaries() { + let mut string = Wtf8Buf::from_str("aé 💩"); + string.push(CodePoint::from_u32(0xD800).unwrap()); + string.push(CodePoint::from_u32(0xD800).unwrap()); + check_utf8_boundary(&string, 0); + check_utf8_boundary(&string, 1); + check_utf8_boundary(&string, 3); + check_utf8_boundary(&string, 4); + check_utf8_boundary(&string, 8); + check_utf8_boundary(&string, 14); + assert_eq!(string.len(), 14); + + string.push_char('a'); + check_utf8_boundary(&string, 14); + check_utf8_boundary(&string, 15); + + let mut string = Wtf8Buf::from_str("a"); + string.push(CodePoint::from_u32(0xD800).unwrap()); + check_utf8_boundary(&string, 1); + + let mut string = Wtf8Buf::from_str("\u{D7FF}"); + string.push(CodePoint::from_u32(0xD800).unwrap()); + check_utf8_boundary(&string, 3); + + let mut string = Wtf8Buf::new(); + string.push(CodePoint::from_u32(0xD800).unwrap()); + string.push_char('\u{D7FF}'); + check_utf8_boundary(&string, 3); +} + +#[test] +#[should_panic(expected = "byte index 4 is out of bounds")] +fn wtf8_utf8_boundary_out_of_bounds() { + let string = Wtf8::from_str("aé"); + check_utf8_boundary(&string, 4); +} + +#[test] +#[should_panic(expected = "byte index 1 is not a codepoint boundary")] +fn wtf8_utf8_boundary_inside_codepoint() { + let string = Wtf8::from_str("é"); + check_utf8_boundary(&string, 1); +} + +#[test] +#[should_panic(expected = "byte index 1 is not a codepoint boundary")] +fn wtf8_utf8_boundary_inside_surrogate() { + let mut string = Wtf8Buf::new(); + string.push(CodePoint::from_u32(0xD800).unwrap()); + check_utf8_boundary(&string, 1); +} + +#[test] +#[should_panic(expected = "byte index 3 lies between surrogate codepoints")] +fn wtf8_utf8_boundary_between_surrogates() { + let mut string = Wtf8Buf::new(); + string.push(CodePoint::from_u32(0xD800).unwrap()); + string.push(CodePoint::from_u32(0xD800).unwrap()); + check_utf8_boundary(&string, 3); +} + +#[test] +fn wobbled_wtf8_plus_bytes_isnt_utf8() { + let mut string: Wtf8Buf = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() }; + assert!(!string.is_known_utf8); + string.extend_from_slice(b"some utf-8"); + assert!(!string.is_known_utf8); +} + +#[test] +fn wobbled_wtf8_plus_str_isnt_utf8() { + let mut string: Wtf8Buf = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() }; + assert!(!string.is_known_utf8); + string.push_str("some utf-8"); + assert!(!string.is_known_utf8); +} + +#[test] +fn unwobbly_wtf8_plus_utf8_is_utf8() { + let mut string: Wtf8Buf = Wtf8Buf::from_str("hello world"); + assert!(string.is_known_utf8); + string.push_str("some utf-8"); + assert!(string.is_known_utf8); +} diff --git a/library/alloc/src/lib.rs b/library/alloc/src/lib.rs index 7aaa4e73df72c..5124f849cca72 100644 --- a/library/alloc/src/lib.rs +++ b/library/alloc/src/lib.rs @@ -103,6 +103,7 @@ #![feature(async_closure)] #![feature(async_fn_traits)] #![feature(async_iterator)] +#![feature(char_internals)] #![feature(clone_to_uninit)] #![feature(coerce_unsized)] #![feature(const_align_of_val)] @@ -134,7 +135,10 @@ #![feature(local_waker)] #![feature(maybe_uninit_slice)] #![feature(maybe_uninit_uninit_array_transpose)] +#![feature(os_str_internals)] +#![feature(os_str_slice)] #![feature(panic_internals)] +#![feature(path_internals)] #![feature(pattern)] #![feature(pin_coerce_unsized_trait)] #![feature(ptr_internals)] @@ -252,6 +256,14 @@ pub mod task; mod tests; pub mod vec; +#[unstable( + feature = "path_internals", + reason = "internal details of the implementation of path", + issue = "none" +)] +#[doc(hidden)] +pub mod path; + #[doc(hidden)] #[unstable(feature = "liballoc_internals", issue = "none", reason = "implementation detail")] pub mod __export { diff --git a/library/alloc/src/path.rs b/library/alloc/src/path.rs new file mode 100644 index 0000000000000..85bfb8718c53e --- /dev/null +++ b/library/alloc/src/path.rs @@ -0,0 +1,1295 @@ +#![deny(unsafe_op_in_unsafe_fn)] + +#[cfg(test)] +mod tests; + +use core::ffi::os_str::OsStr; +use core::hash::{Hash, Hasher}; +use core::ops::{self, Deref}; +use core::path::*; +use core::str::FromStr; +use core::{cmp, fmt}; + +use crate::borrow::{Borrow, Cow, ToOwned}; +use crate::boxed::Box; +use crate::collections::TryReserveError; +use crate::ffi::os_str::OsString; +use crate::rc::Rc; +use crate::string::String; +use crate::sync::Arc; +use crate::vec::Vec; + +//////////////////////////////////////////////////////////////////////////////// +// GENERAL NOTES +//////////////////////////////////////////////////////////////////////////////// +// +// Parsing in this module is done by directly transmuting OsStr to [u8] slices, +// taking advantage of the fact that OsStr always encodes ASCII characters +// as-is. Eventually, this transmutation should be replaced by direct uses of +// OsStr APIs for parsing, but it will take a while for those to become +// available. + +//////////////////////////////////////////////////////////////////////////////// +// Basic types and traits +//////////////////////////////////////////////////////////////////////////////// + +/// An owned, mutable path (akin to [`String`]). +/// +/// This type provides methods like [`push`] and [`set_extension`] that mutate +/// the path in place. It also implements [`Deref`] to [`Path`], meaning that +/// all methods on [`Path`] slices are available on `PathBuf` values as well. +/// +/// [`push`]: PathBuf::push +/// [`set_extension`]: PathBuf::set_extension +/// +/// More details about the overall approach can be found in +/// the [module documentation](self). +/// +/// # Examples +/// +/// You can use [`push`] to build up a `PathBuf` from +/// components: +/// +/// ``` +/// use std::path::PathBuf; +/// +/// let mut path = PathBuf::new(); +/// +/// path.push(r"C:\"); +/// path.push("windows"); +/// path.push("system32"); +/// +/// path.set_extension("dll"); +/// ``` +/// +/// However, [`push`] is best used for dynamic situations. This is a better way +/// to do this when you know all of the components ahead of time: +/// +/// ``` +/// use std::path::PathBuf; +/// +/// let path: PathBuf = [r"C:\", "windows", "system32.dll"].iter().collect(); +/// ``` +/// +/// We can still do better than this! Since these are all strings, we can use +/// `From::from`: +/// +/// ``` +/// use std::path::PathBuf; +/// +/// let path = PathBuf::from(r"C:\windows\system32.dll"); +/// ``` +/// +/// Which method works best depends on what kind of situation you're in. +#[cfg_attr(not(test), rustc_diagnostic_item = "PathBuf")] +#[stable(feature = "rust1", since = "1.0.0")] +pub struct PathBuf { + inner: OsString, +} + +impl PathBuf { + /// Allocates an empty `PathBuf`. + /// + /// # Examples + /// + /// ``` + /// use std::path::PathBuf; + /// + /// let path = PathBuf::new(); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[must_use] + #[inline] + pub fn new() -> PathBuf { + PathBuf { inner: OsString::new() } + } + + /// Creates a new `PathBuf` with a given capacity used to create the + /// internal [`OsString`]. See [`with_capacity`] defined on [`OsString`]. + /// + /// # Examples + /// + /// ``` + /// use std::path::PathBuf; + /// + /// let mut path = PathBuf::with_capacity(10); + /// let capacity = path.capacity(); + /// + /// // This push is done without reallocating + /// path.push(r"C:\"); + /// + /// assert_eq!(capacity, path.capacity()); + /// ``` + /// + /// [`with_capacity`]: OsString::with_capacity + #[stable(feature = "path_buf_capacity", since = "1.44.0")] + #[must_use] + #[inline] + pub fn with_capacity(capacity: usize) -> PathBuf { + PathBuf { inner: OsString::with_capacity(capacity) } + } + + /// Coerces to a [`Path`] slice. + /// + /// # Examples + /// + /// ``` + /// use std::path::{Path, PathBuf}; + /// + /// let p = PathBuf::from("/test"); + /// assert_eq!(Path::new("/test"), p.as_path()); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[must_use] + #[inline] + pub fn as_path(&self) -> &Path { + self + } + + /// Consumes and leaks the `PathBuf`, returning a mutable reference to the contents, + /// `&'a mut Path`. + /// + /// The caller has free choice over the returned lifetime, including 'static. + /// Indeed, this function is ideally used for data that lives for the remainder of + /// the program’s life, as dropping the returned reference will cause a memory leak. + /// + /// It does not reallocate or shrink the `PathBuf`, so the leaked allocation may include + /// unused capacity that is not part of the returned slice. If you want to discard excess + /// capacity, call [`into_boxed_path`], and then [`Box::leak`] instead. + /// However, keep in mind that trimming the capacity may result in a reallocation and copy. + /// + /// [`into_boxed_path`]: Self::into_boxed_path + #[unstable(feature = "os_string_pathbuf_leak", issue = "125965")] + #[inline] + pub fn leak<'a>(self) -> &'a mut Path { + Path::from_inner_mut(self.inner.leak()) + } + + /// Extends `self` with `path`. + /// + /// If `path` is absolute, it replaces the current path. + /// + /// On Windows: + /// + /// * if `path` has a root but no prefix (e.g., `\windows`), it + /// replaces everything except for the prefix (if any) of `self`. + /// * if `path` has a prefix but no root, it replaces `self`. + /// * if `self` has a verbatim prefix (e.g. `\\?\C:\windows`) + /// and `path` is not empty, the new path is normalized: all references + /// to `.` and `..` are removed. + /// + /// Consider using [`Path::join`] if you need a new `PathBuf` instead of + /// using this function on a cloned `PathBuf`. + /// + /// # Examples + /// + /// Pushing a relative path extends the existing path: + /// + /// ``` + /// use std::path::PathBuf; + /// + /// let mut path = PathBuf::from("/tmp"); + /// path.push("file.bk"); + /// assert_eq!(path, PathBuf::from("/tmp/file.bk")); + /// ``` + /// + /// Pushing an absolute path replaces the existing path: + /// + /// ``` + /// use std::path::PathBuf; + /// + /// let mut path = PathBuf::from("/tmp"); + /// path.push("/etc"); + /// assert_eq!(path, PathBuf::from("/etc")); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[rustc_confusables("append", "put")] + pub fn push>(&mut self, path: P) { + self._push(path.as_ref()) + } + + fn _push(&mut self, path: &Path) { + // in general, a separator is needed if the rightmost byte is not a separator + let buf = self.inner.as_encoded_bytes(); + let mut need_sep = buf.last().map(|c| !is_sep_byte(*c)).unwrap_or(false); + + // in the special case of `C:` on Windows, do *not* add a separator + let comps = self.components(); + + if comps.prefix_len() > 0 + && comps.prefix_len() == comps.path_left_to_parse().len() + && comps.prefix().unwrap().is_drive() + { + need_sep = false + } + + // absolute `path` replaces `self` + if path.is_absolute() || path.prefix().is_some() { + self.inner.truncate(0); + + // verbatim paths need . and .. removed + } else if comps.prefix_verbatim() && !path.as_os_str().is_empty() { + let mut buf: Vec<_> = comps.collect(); + for c in path.components() { + match c { + Component::RootDir => { + buf.truncate(1); + buf.push(c); + } + Component::CurDir => (), + Component::ParentDir => { + if let Some(Component::Normal(_)) = buf.last() { + buf.pop(); + } + } + _ => buf.push(c), + } + } + + let mut res = OsString::new(); + let mut need_sep = false; + + for c in buf { + if need_sep && c != Component::RootDir { + res.push(MAIN_SEP_STR); + } + res.push(c.as_os_str()); + + need_sep = match c { + Component::RootDir => false, + Component::Prefix(prefix) => { + !prefix.kind().is_drive() && prefix.kind().len() > 0 + } + _ => true, + } + } + + self.inner = res; + return; + + // `path` has a root but no prefix, e.g., `\windows` (Windows only) + } else if path.has_root() { + let prefix_len = self.components().prefix_remaining(); + self.inner.truncate(prefix_len); + + // `path` is a pure relative path + } else if need_sep { + self.inner.push(MAIN_SEP_STR); + } + + self.inner.push(path); + } + + /// Truncates `self` to [`self.parent`]. + /// + /// Returns `false` and does nothing if [`self.parent`] is [`None`]. + /// Otherwise, returns `true`. + /// + /// [`self.parent`]: Path::parent + /// + /// # Examples + /// + /// ``` + /// use std::path::{Path, PathBuf}; + /// + /// let mut p = PathBuf::from("/spirited/away.rs"); + /// + /// p.pop(); + /// assert_eq!(Path::new("/spirited"), p); + /// p.pop(); + /// assert_eq!(Path::new("/"), p); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn pop(&mut self) -> bool { + match self.parent().map(|p| p.as_u8_slice().len()) { + Some(len) => { + self.inner.truncate(len); + true + } + None => false, + } + } + + /// Updates [`self.file_name`] to `file_name`. + /// + /// If [`self.file_name`] was [`None`], this is equivalent to pushing + /// `file_name`. + /// + /// Otherwise it is equivalent to calling [`pop`] and then pushing + /// `file_name`. The new path will be a sibling of the original path. + /// (That is, it will have the same parent.) + /// + /// [`self.file_name`]: Path::file_name + /// [`pop`]: PathBuf::pop + /// + /// # Examples + /// + /// ``` + /// use std::path::PathBuf; + /// + /// let mut buf = PathBuf::from("/"); + /// assert!(buf.file_name() == None); + /// + /// buf.set_file_name("foo.txt"); + /// assert!(buf == PathBuf::from("/foo.txt")); + /// assert!(buf.file_name().is_some()); + /// + /// buf.set_file_name("bar.txt"); + /// assert!(buf == PathBuf::from("/bar.txt")); + /// + /// buf.set_file_name("baz"); + /// assert!(buf == PathBuf::from("/baz")); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn set_file_name>(&mut self, file_name: S) { + self._set_file_name(file_name.as_ref()) + } + + fn _set_file_name(&mut self, file_name: &OsStr) { + if self.file_name().is_some() { + let popped = self.pop(); + debug_assert!(popped); + } + self.push(file_name); + } + + /// Updates [`self.extension`] to `Some(extension)` or to `None` if + /// `extension` is empty. + /// + /// Returns `false` and does nothing if [`self.file_name`] is [`None`], + /// returns `true` and updates the extension otherwise. + /// + /// If [`self.extension`] is [`None`], the extension is added; otherwise + /// it is replaced. + /// + /// If `extension` is the empty string, [`self.extension`] will be [`None`] + /// afterwards, not `Some("")`. + /// + /// # Panics + /// + /// Panics if the passed extension contains a path separator (see + /// [`is_separator`]). + /// + /// # Caveats + /// + /// The new `extension` may contain dots and will be used in its entirety, + /// but only the part after the final dot will be reflected in + /// [`self.extension`]. + /// + /// If the file stem contains internal dots and `extension` is empty, part + /// of the old file stem will be considered the new [`self.extension`]. + /// + /// See the examples below. + /// + /// [`self.file_name`]: Path::file_name + /// [`self.extension`]: Path::extension + /// + /// # Examples + /// + /// ``` + /// use std::path::{Path, PathBuf}; + /// + /// let mut p = PathBuf::from("/feel/the"); + /// + /// p.set_extension("force"); + /// assert_eq!(Path::new("/feel/the.force"), p.as_path()); + /// + /// p.set_extension("dark.side"); + /// assert_eq!(Path::new("/feel/the.dark.side"), p.as_path()); + /// + /// p.set_extension("cookie"); + /// assert_eq!(Path::new("/feel/the.dark.cookie"), p.as_path()); + /// + /// p.set_extension(""); + /// assert_eq!(Path::new("/feel/the.dark"), p.as_path()); + /// + /// p.set_extension(""); + /// assert_eq!(Path::new("/feel/the"), p.as_path()); + /// + /// p.set_extension(""); + /// assert_eq!(Path::new("/feel/the"), p.as_path()); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn set_extension>(&mut self, extension: S) -> bool { + self._set_extension(extension.as_ref()) + } + + fn _set_extension(&mut self, extension: &OsStr) -> bool { + for &b in extension.as_encoded_bytes() { + if b < 128 { + if is_separator(b as char) { + panic!("extension cannot contain path separators: {:?}", extension); + } + } + } + + let file_stem = match self.file_stem() { + None => return false, + Some(f) => f.as_encoded_bytes(), + }; + + // truncate until right after the file stem + let end_file_stem = file_stem[file_stem.len()..].as_ptr().addr(); + let start = self.inner.as_encoded_bytes().as_ptr().addr(); + self.inner.truncate(end_file_stem.wrapping_sub(start)); + + // add the new extension, if any + let new = extension; + if !new.is_empty() { + self.inner.reserve_exact(new.len() + 1); + self.inner.push(OsStr::new(".")); + self.inner.push(new); + } + + true + } + + /// Append [`self.extension`] with `extension`. + /// + /// Returns `false` and does nothing if [`self.file_name`] is [`None`], + /// returns `true` and updates the extension otherwise. + /// + /// # Caveats + /// + /// The appended `extension` may contain dots and will be used in its entirety, + /// but only the part after the final dot will be reflected in + /// [`self.extension`]. + /// + /// See the examples below. + /// + /// [`self.file_name`]: Path::file_name + /// [`self.extension`]: Path::extension + /// + /// # Examples + /// + /// ``` + /// #![feature(path_add_extension)] + /// + /// use std::path::{Path, PathBuf}; + /// + /// let mut p = PathBuf::from("/feel/the"); + /// + /// p.add_extension("formatted"); + /// assert_eq!(Path::new("/feel/the.formatted"), p.as_path()); + /// + /// p.add_extension("dark.side"); + /// assert_eq!(Path::new("/feel/the.formatted.dark.side"), p.as_path()); + /// + /// p.set_extension("cookie"); + /// assert_eq!(Path::new("/feel/the.formatted.dark.cookie"), p.as_path()); + /// + /// p.set_extension(""); + /// assert_eq!(Path::new("/feel/the.formatted.dark"), p.as_path()); + /// + /// p.add_extension(""); + /// assert_eq!(Path::new("/feel/the.formatted.dark"), p.as_path()); + /// ``` + #[unstable(feature = "path_add_extension", issue = "127292")] + pub fn add_extension>(&mut self, extension: S) -> bool { + self._add_extension(extension.as_ref()) + } + + fn _add_extension(&mut self, extension: &OsStr) -> bool { + let file_name = match self.file_name() { + None => return false, + Some(f) => f.as_encoded_bytes(), + }; + + let new = extension; + if !new.is_empty() { + // truncate until right after the file name + // this is necessary for trimming the trailing slash + let end_file_name = file_name[file_name.len()..].as_ptr().addr(); + let start = self.inner.as_encoded_bytes().as_ptr().addr(); + self.inner.truncate(end_file_name.wrapping_sub(start)); + + // append the new extension + self.inner.reserve_exact(new.len() + 1); + self.inner.push(OsStr::new(".")); + self.inner.push(new); + } + + true + } + + /// Yields a mutable reference to the underlying [`OsString`] instance. + /// + /// # Examples + /// + /// ``` + /// use std::path::{Path, PathBuf}; + /// + /// let mut path = PathBuf::from("/foo"); + /// + /// path.push("bar"); + /// assert_eq!(path, Path::new("/foo/bar")); + /// + /// // OsString's `push` does not add a separator. + /// path.as_mut_os_string().push("baz"); + /// assert_eq!(path, Path::new("/foo/barbaz")); + /// ``` + #[stable(feature = "path_as_mut_os_str", since = "1.70.0")] + #[must_use] + #[inline] + pub fn as_mut_os_string(&mut self) -> &mut OsString { + &mut self.inner + } + + /// Consumes the `PathBuf`, yielding its internal [`OsString`] storage. + /// + /// # Examples + /// + /// ``` + /// use std::path::PathBuf; + /// + /// let p = PathBuf::from("/the/head"); + /// let os_str = p.into_os_string(); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[must_use = "`self` will be dropped if the result is not used"] + #[inline] + pub fn into_os_string(self) -> OsString { + self.inner + } + + /// Converts this `PathBuf` into a [boxed](Box) [`Path`]. + #[stable(feature = "into_boxed_path", since = "1.20.0")] + #[must_use = "`self` will be dropped if the result is not used"] + #[inline] + pub fn into_boxed_path(self) -> Box { + let rw = Box::into_raw(self.inner.into_boxed_os_str()) as *mut Path; + unsafe { Box::from_raw(rw) } + } + + /// Invokes [`capacity`] on the underlying instance of [`OsString`]. + /// + /// [`capacity`]: OsString::capacity + #[stable(feature = "path_buf_capacity", since = "1.44.0")] + #[must_use] + #[inline] + pub fn capacity(&self) -> usize { + self.inner.capacity() + } + + /// Invokes [`clear`] on the underlying instance of [`OsString`]. + /// + /// [`clear`]: OsString::clear + #[stable(feature = "path_buf_capacity", since = "1.44.0")] + #[inline] + pub fn clear(&mut self) { + self.inner.clear() + } + + /// Invokes [`reserve`] on the underlying instance of [`OsString`]. + /// + /// [`reserve`]: OsString::reserve + #[stable(feature = "path_buf_capacity", since = "1.44.0")] + #[inline] + pub fn reserve(&mut self, additional: usize) { + self.inner.reserve(additional) + } + + /// Invokes [`try_reserve`] on the underlying instance of [`OsString`]. + /// + /// [`try_reserve`]: OsString::try_reserve + #[stable(feature = "try_reserve_2", since = "1.63.0")] + #[inline] + pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.inner.try_reserve(additional) + } + + /// Invokes [`reserve_exact`] on the underlying instance of [`OsString`]. + /// + /// [`reserve_exact`]: OsString::reserve_exact + #[stable(feature = "path_buf_capacity", since = "1.44.0")] + #[inline] + pub fn reserve_exact(&mut self, additional: usize) { + self.inner.reserve_exact(additional) + } + + /// Invokes [`try_reserve_exact`] on the underlying instance of [`OsString`]. + /// + /// [`try_reserve_exact`]: OsString::try_reserve_exact + #[stable(feature = "try_reserve_2", since = "1.63.0")] + #[inline] + pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.inner.try_reserve_exact(additional) + } + + /// Invokes [`shrink_to_fit`] on the underlying instance of [`OsString`]. + /// + /// [`shrink_to_fit`]: OsString::shrink_to_fit + #[stable(feature = "path_buf_capacity", since = "1.44.0")] + #[inline] + pub fn shrink_to_fit(&mut self) { + self.inner.shrink_to_fit() + } + + /// Invokes [`shrink_to`] on the underlying instance of [`OsString`]. + /// + /// [`shrink_to`]: OsString::shrink_to + #[stable(feature = "shrink_to", since = "1.56.0")] + #[inline] + pub fn shrink_to(&mut self, min_capacity: usize) { + self.inner.shrink_to(min_capacity) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Clone for PathBuf { + #[inline] + fn clone(&self) -> Self { + PathBuf { inner: self.inner.clone() } + } + + /// Clones the contents of `source` into `self`. + /// + /// This method is preferred over simply assigning `source.clone()` to `self`, + /// as it avoids reallocation if possible. + #[inline] + fn clone_from(&mut self, source: &Self) { + self.inner.clone_from(&source.inner) + } +} + +#[stable(feature = "box_from_path", since = "1.17.0")] +impl From<&Path> for Box { + /// Creates a boxed [`Path`] from a reference. + /// + /// This will allocate and clone `path` to it. + fn from(path: &Path) -> Box { + let boxed: Box = path.as_os_str().into(); + let rw = Box::into_raw(boxed) as *mut Path; + unsafe { Box::from_raw(rw) } + } +} + +#[stable(feature = "box_from_cow", since = "1.45.0")] +impl From> for Box { + /// Creates a boxed [`Path`] from a clone-on-write pointer. + /// + /// Converting from a `Cow::Owned` does not clone or allocate. + #[inline] + fn from(cow: Cow<'_, Path>) -> Box { + match cow { + Cow::Borrowed(path) => Box::from(path), + Cow::Owned(path) => Box::from(path), + } + } +} + +#[stable(feature = "path_buf_from_box", since = "1.18.0")] +impl From> for PathBuf { + /// Converts a [Box]<[Path]> into a [`PathBuf`]. + /// + /// This conversion does not allocate or copy memory. + #[inline] + fn from(boxed: Box) -> PathBuf { + boxed.into_path_buf() + } +} + +#[stable(feature = "box_from_path_buf", since = "1.20.0")] +impl From for Box { + /// Converts a [`PathBuf`] into a [Box]<[Path]>. + /// + /// This conversion currently should not allocate memory, + /// but this behavior is not guaranteed on all platforms or in all future versions. + #[inline] + fn from(p: PathBuf) -> Box { + p.into_boxed_path() + } +} + +#[stable(feature = "more_box_slice_clone", since = "1.29.0")] +impl Clone for Box { + #[inline] + fn clone(&self) -> Self { + self.to_path_buf().into_boxed_path() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl> From<&T> for PathBuf { + /// Converts a borrowed [`OsStr`] to a [`PathBuf`]. + /// + /// Allocates a [`PathBuf`] and copies the data into it. + #[inline] + fn from(s: &T) -> PathBuf { + PathBuf::from(s.as_ref().to_os_string()) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl From for PathBuf { + /// Converts an [`OsString`] into a [`PathBuf`]. + /// + /// This conversion does not allocate or copy memory. + #[inline] + fn from(s: OsString) -> PathBuf { + PathBuf { inner: s } + } +} + +#[stable(feature = "from_path_buf_for_os_string", since = "1.14.0")] +impl From for OsString { + /// Converts a [`PathBuf`] into an [`OsString`] + /// + /// This conversion does not allocate or copy memory. + #[inline] + fn from(path_buf: PathBuf) -> OsString { + path_buf.inner + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl From for PathBuf { + /// Converts a [`String`] into a [`PathBuf`] + /// + /// This conversion does not allocate or copy memory. + #[inline] + fn from(s: String) -> PathBuf { + PathBuf::from(OsString::from(s)) + } +} + +#[stable(feature = "path_from_str", since = "1.32.0")] +impl FromStr for PathBuf { + type Err = core::convert::Infallible; + + #[inline] + fn from_str(s: &str) -> Result { + Ok(PathBuf::from(s)) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl> FromIterator

for PathBuf { + fn from_iter>(iter: I) -> PathBuf { + let mut buf = PathBuf::new(); + buf.extend(iter); + buf + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl> Extend

for PathBuf { + fn extend>(&mut self, iter: I) { + iter.into_iter().for_each(move |p| self.push(p.as_ref())); + } + + #[inline] + fn extend_one(&mut self, p: P) { + self.push(p.as_ref()); + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Debug for PathBuf { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&**self, formatter) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl ops::Deref for PathBuf { + type Target = Path; + #[inline] + fn deref(&self) -> &Path { + Path::new(&self.inner) + } +} + +#[stable(feature = "path_buf_deref_mut", since = "1.68.0")] +impl ops::DerefMut for PathBuf { + #[inline] + fn deref_mut(&mut self) -> &mut Path { + Path::from_inner_mut(&mut self.inner) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Borrow for PathBuf { + #[inline] + fn borrow(&self) -> &Path { + self.deref() + } +} + +#[stable(feature = "default_for_pathbuf", since = "1.17.0")] +impl Default for PathBuf { + #[inline] + fn default() -> Self { + PathBuf::new() + } +} + +#[stable(feature = "cow_from_path", since = "1.6.0")] +impl<'a> From<&'a Path> for Cow<'a, Path> { + /// Creates a clone-on-write pointer from a reference to + /// [`Path`]. + /// + /// This conversion does not clone or allocate. + #[inline] + fn from(s: &'a Path) -> Cow<'a, Path> { + Cow::Borrowed(s) + } +} + +#[stable(feature = "cow_from_path", since = "1.6.0")] +impl<'a> From for Cow<'a, Path> { + /// Creates a clone-on-write pointer from an owned + /// instance of [`PathBuf`]. + /// + /// This conversion does not clone or allocate. + #[inline] + fn from(s: PathBuf) -> Cow<'a, Path> { + Cow::Owned(s) + } +} + +#[stable(feature = "cow_from_pathbuf_ref", since = "1.28.0")] +impl<'a> From<&'a PathBuf> for Cow<'a, Path> { + /// Creates a clone-on-write pointer from a reference to + /// [`PathBuf`]. + /// + /// This conversion does not clone or allocate. + #[inline] + fn from(p: &'a PathBuf) -> Cow<'a, Path> { + Cow::Borrowed(p.as_path()) + } +} + +#[stable(feature = "pathbuf_from_cow_path", since = "1.28.0")] +impl<'a> From> for PathBuf { + /// Converts a clone-on-write pointer to an owned path. + /// + /// Converting from a `Cow::Owned` does not clone or allocate. + #[inline] + fn from(p: Cow<'a, Path>) -> Self { + p.into_owned() + } +} + +#[stable(feature = "shared_from_slice2", since = "1.24.0")] +impl From for Arc { + /// Converts a [`PathBuf`] into an [Arc]<[Path]> by moving the [`PathBuf`] data + /// into a new [`Arc`] buffer. + #[inline] + fn from(s: PathBuf) -> Arc { + let arc: Arc = Arc::from(s.into_os_string()); + unsafe { Arc::from_raw(Arc::into_raw(arc) as *const Path) } + } +} + +#[stable(feature = "shared_from_slice2", since = "1.24.0")] +impl From<&Path> for Arc { + /// Converts a [`Path`] into an [`Arc`] by copying the [`Path`] data into a new [`Arc`] buffer. + #[inline] + fn from(s: &Path) -> Arc { + let arc: Arc = Arc::from(s.as_os_str()); + unsafe { Arc::from_raw(Arc::into_raw(arc) as *const Path) } + } +} + +#[stable(feature = "shared_from_slice2", since = "1.24.0")] +impl From for Rc { + /// Converts a [`PathBuf`] into an [Rc]<[Path]> by moving the [`PathBuf`] data into + /// a new [`Rc`] buffer. + #[inline] + fn from(s: PathBuf) -> Rc { + let rc: Rc = Rc::from(s.into_os_string()); + unsafe { Rc::from_raw(Rc::into_raw(rc) as *const Path) } + } +} + +#[stable(feature = "shared_from_slice2", since = "1.24.0")] +impl From<&Path> for Rc { + /// Converts a [`Path`] into an [`Rc`] by copying the [`Path`] data into a new [`Rc`] buffer. + #[inline] + fn from(s: &Path) -> Rc { + let rc: Rc = Rc::from(s.as_os_str()); + unsafe { Rc::from_raw(Rc::into_raw(rc) as *const Path) } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl ToOwned for Path { + type Owned = PathBuf; + #[inline] + fn to_owned(&self) -> PathBuf { + self.to_path_buf() + } + #[inline] + fn clone_into(&self, target: &mut PathBuf) { + self.as_os_str().clone_into(&mut target.inner); + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl PartialEq for PathBuf { + #[inline] + fn eq(&self, other: &PathBuf) -> bool { + self.components() == other.components() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Hash for PathBuf { + fn hash(&self, h: &mut H) { + self.as_path().hash(h) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Eq for PathBuf {} + +#[stable(feature = "rust1", since = "1.0.0")] +impl PartialOrd for PathBuf { + #[inline] + fn partial_cmp(&self, other: &PathBuf) -> Option { + Some(compare_components(self.components(), other.components())) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Ord for PathBuf { + #[inline] + fn cmp(&self, other: &PathBuf) -> cmp::Ordering { + compare_components(self.components(), other.components()) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl AsRef for PathBuf { + #[inline] + fn as_ref(&self) -> &OsStr { + &self.inner[..] + } +} + +impl Path { + /// Converts a `Path` to a [`Cow`]. + /// + /// Any non-Unicode sequences are replaced with + /// [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD]. + /// + /// [U+FFFD]: super::char::REPLACEMENT_CHARACTER + /// + /// # Examples + /// + /// Calling `to_string_lossy` on a `Path` with valid unicode: + /// + /// ``` + /// use std::path::Path; + /// + /// let path = Path::new("foo.txt"); + /// assert_eq!(path.to_string_lossy(), "foo.txt"); + /// ``` + /// + /// Had `path` contained invalid unicode, the `to_string_lossy` call might + /// have returned `"fo�.txt"`. + #[rustc_allow_incoherent_impl] + #[stable(feature = "rust1", since = "1.0.0")] + #[must_use = "this returns the result of the operation, \ + without modifying the original"] + #[inline] + pub fn to_string_lossy(&self) -> Cow<'_, str> { + self.as_os_str().to_string_lossy() + } + + /// Converts a `Path` to an owned [`PathBuf`]. + /// + /// # Examples + /// + /// ``` + /// use std::path::{Path, PathBuf}; + /// + /// let path_buf = Path::new("foo.txt").to_path_buf(); + /// assert_eq!(path_buf, PathBuf::from("foo.txt")); + /// ``` + #[rustc_allow_incoherent_impl] + #[rustc_conversion_suggestion] + #[must_use = "this returns the result of the operation, \ + without modifying the original"] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn to_path_buf(&self) -> PathBuf { + PathBuf::from(self.as_os_str().to_os_string()) + } + + /// Creates an owned [`PathBuf`] with `path` adjoined to `self`. + /// + /// If `path` is absolute, it replaces the current path. + /// + /// See [`PathBuf::push`] for more details on what it means to adjoin a path. + /// + /// # Examples + /// + /// ``` + /// use std::path::{Path, PathBuf}; + /// + /// assert_eq!(Path::new("/etc").join("passwd"), PathBuf::from("/etc/passwd")); + /// assert_eq!(Path::new("/etc").join("/bin/sh"), PathBuf::from("/bin/sh")); + /// ``` + #[rustc_allow_incoherent_impl] + #[stable(feature = "rust1", since = "1.0.0")] + #[must_use] + pub fn join>(&self, path: P) -> PathBuf { + self._join(path.as_ref()) + } + + #[rustc_allow_incoherent_impl] + fn _join(&self, path: &Path) -> PathBuf { + let mut buf = self.to_path_buf(); + buf.push(path); + buf + } + + /// Creates an owned [`PathBuf`] like `self` but with the given file name. + /// + /// See [`PathBuf::set_file_name`] for more details. + /// + /// # Examples + /// + /// ``` + /// use std::path::{Path, PathBuf}; + /// + /// let path = Path::new("/tmp/foo.png"); + /// assert_eq!(path.with_file_name("bar"), PathBuf::from("/tmp/bar")); + /// assert_eq!(path.with_file_name("bar.txt"), PathBuf::from("/tmp/bar.txt")); + /// + /// let path = Path::new("/tmp"); + /// assert_eq!(path.with_file_name("var"), PathBuf::from("/var")); + /// ``` + #[rustc_allow_incoherent_impl] + #[stable(feature = "rust1", since = "1.0.0")] + #[must_use] + pub fn with_file_name>(&self, file_name: S) -> PathBuf { + self._with_file_name(file_name.as_ref()) + } + + #[rustc_allow_incoherent_impl] + fn _with_file_name(&self, file_name: &OsStr) -> PathBuf { + let mut buf = self.to_path_buf(); + buf.set_file_name(file_name); + buf + } + + /// Creates an owned [`PathBuf`] like `self` but with the given extension. + /// + /// See [`PathBuf::set_extension`] for more details. + /// + /// # Examples + /// + /// ``` + /// use std::path::{Path, PathBuf}; + /// + /// let path = Path::new("foo.rs"); + /// assert_eq!(path.with_extension("txt"), PathBuf::from("foo.txt")); + /// + /// let path = Path::new("foo.tar.gz"); + /// assert_eq!(path.with_extension(""), PathBuf::from("foo.tar")); + /// assert_eq!(path.with_extension("xz"), PathBuf::from("foo.tar.xz")); + /// assert_eq!(path.with_extension("").with_extension("txt"), PathBuf::from("foo.txt")); + /// ``` + #[rustc_allow_incoherent_impl] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn with_extension>(&self, extension: S) -> PathBuf { + self._with_extension(extension.as_ref()) + } + + #[rustc_allow_incoherent_impl] + fn _with_extension(&self, extension: &OsStr) -> PathBuf { + let self_len = self.as_os_str().len(); + let self_bytes = self.as_os_str().as_encoded_bytes(); + + let (new_capacity, slice_to_copy) = match self.extension() { + None => { + // Enough capacity for the extension and the dot + let capacity = self_len + extension.len() + 1; + let whole_path = self_bytes; + (capacity, whole_path) + } + Some(previous_extension) => { + let capacity = self_len + extension.len() - previous_extension.len(); + let path_till_dot = &self_bytes[..self_len - previous_extension.len()]; + (capacity, path_till_dot) + } + }; + + let mut new_path = PathBuf::with_capacity(new_capacity); + new_path.inner.extend_from_slice(slice_to_copy); + new_path.set_extension(extension); + new_path + } + + /// Creates an owned [`PathBuf`] like `self` but with the extension added. + /// + /// See [`PathBuf::add_extension`] for more details. + /// + /// # Examples + /// + /// ``` + /// #![feature(path_add_extension)] + /// + /// use std::path::{Path, PathBuf}; + /// + /// let path = Path::new("foo.rs"); + /// assert_eq!(path.with_added_extension("txt"), PathBuf::from("foo.rs.txt")); + /// + /// let path = Path::new("foo.tar.gz"); + /// assert_eq!(path.with_added_extension(""), PathBuf::from("foo.tar.gz")); + /// assert_eq!(path.with_added_extension("xz"), PathBuf::from("foo.tar.gz.xz")); + /// assert_eq!(path.with_added_extension("").with_added_extension("txt"), PathBuf::from("foo.tar.gz.txt")); + /// ``` + #[rustc_allow_incoherent_impl] + #[unstable(feature = "path_add_extension", issue = "127292")] + pub fn with_added_extension>(&self, extension: S) -> PathBuf { + let mut new_path = self.to_path_buf(); + new_path.add_extension(extension); + new_path + } + + /// Converts a [`Box`](Box) into a [`PathBuf`] without copying or + /// allocating. + #[rustc_allow_incoherent_impl] + #[stable(feature = "into_boxed_path", since = "1.20.0")] + #[must_use = "`self` will be dropped if the result is not used"] + pub fn into_path_buf(self: Box) -> PathBuf { + let rw = Box::into_raw(self) as *mut OsStr; + let inner = unsafe { Box::from_raw(rw) }; + PathBuf { inner: OsString::from(inner) } + } +} + +#[stable(feature = "cow_os_str_as_ref_path", since = "1.8.0")] +impl AsRef for Cow<'_, OsStr> { + #[inline] + fn as_ref(&self) -> &Path { + Path::new(self) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl AsRef for OsString { + #[inline] + fn as_ref(&self) -> &Path { + Path::new(self) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl AsRef for String { + #[inline] + fn as_ref(&self) -> &Path { + Path::new(self) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl AsRef for PathBuf { + #[inline] + fn as_ref(&self) -> &Path { + self + } +} + +#[stable(feature = "path_into_iter", since = "1.6.0")] +impl<'a> IntoIterator for &'a PathBuf { + type Item = &'a OsStr; + type IntoIter = Iter<'a>; + #[inline] + fn into_iter(self) -> Iter<'a> { + self.iter() + } +} + +macro_rules! impl_cmp { + (<$($life:lifetime),*> $lhs:ty, $rhs: ty) => { + #[stable(feature = "partialeq_path", since = "1.6.0")] + impl<$($life),*> PartialEq<$rhs> for $lhs { + #[inline] + fn eq(&self, other: &$rhs) -> bool { + ::eq(self, other) + } + } + + #[stable(feature = "partialeq_path", since = "1.6.0")] + impl<$($life),*> PartialEq<$lhs> for $rhs { + #[inline] + fn eq(&self, other: &$lhs) -> bool { + ::eq(self, other) + } + } + + #[stable(feature = "cmp_path", since = "1.8.0")] + impl<$($life),*> PartialOrd<$rhs> for $lhs { + #[inline] + fn partial_cmp(&self, other: &$rhs) -> Option { + ::partial_cmp(self, other) + } + } + + #[stable(feature = "cmp_path", since = "1.8.0")] + impl<$($life),*> PartialOrd<$lhs> for $rhs { + #[inline] + fn partial_cmp(&self, other: &$lhs) -> Option { + ::partial_cmp(self, other) + } + } + }; +} + +impl_cmp!(<> PathBuf, Path); +impl_cmp!(<'a> PathBuf, &'a Path); +impl_cmp!(<'a> Cow<'a, Path>, Path); +impl_cmp!(<'a, 'b> Cow<'a, Path>, &'b Path); +impl_cmp!(<'a> Cow<'a, Path>, PathBuf); + +macro_rules! impl_cmp_os_str { + (<$($life:lifetime),*> $lhs:ty, $rhs: ty) => { + #[stable(feature = "cmp_path", since = "1.8.0")] + impl<$($life),*> PartialEq<$rhs> for $lhs { + #[inline] + fn eq(&self, other: &$rhs) -> bool { + ::eq(self, other.as_ref()) + } + } + + #[stable(feature = "cmp_path", since = "1.8.0")] + impl<$($life),*> PartialEq<$lhs> for $rhs { + #[inline] + fn eq(&self, other: &$lhs) -> bool { + ::eq(self.as_ref(), other) + } + } + + #[stable(feature = "cmp_path", since = "1.8.0")] + impl<$($life),*> PartialOrd<$rhs> for $lhs { + #[inline] + fn partial_cmp(&self, other: &$rhs) -> Option { + ::partial_cmp(self, other.as_ref()) + } + } + + #[stable(feature = "cmp_path", since = "1.8.0")] + impl<$($life),*> PartialOrd<$lhs> for $rhs { + #[inline] + fn partial_cmp(&self, other: &$lhs) -> Option { + ::partial_cmp(self.as_ref(), other) + } + } + }; +} + +impl_cmp_os_str!(<> PathBuf, OsStr); +impl_cmp_os_str!(<'a> PathBuf, &'a OsStr); +impl_cmp_os_str!(<'a> PathBuf, Cow<'a, OsStr>); +impl_cmp_os_str!(<> PathBuf, OsString); +impl_cmp_os_str!(<'a> Path, Cow<'a, OsStr>); +impl_cmp_os_str!(<> Path, OsString); +impl_cmp_os_str!(<'a, 'b> &'a Path, Cow<'b, OsStr>); +impl_cmp_os_str!(<'a> &'a Path, OsString); +impl_cmp_os_str!(<'a> Cow<'a, Path>, OsStr); +impl_cmp_os_str!(<'a, 'b> Cow<'a, Path>, &'b OsStr); +impl_cmp_os_str!(<'a> Cow<'a, Path>, OsString); diff --git a/library/alloc/src/path/tests.rs b/library/alloc/src/path/tests.rs new file mode 100644 index 0000000000000..f69fd040c6b42 --- /dev/null +++ b/library/alloc/src/path/tests.rs @@ -0,0 +1,1689 @@ +use core::hint::black_box; +use std::collections::{BTreeSet, HashSet}; + +use super::*; + +#[allow(unknown_lints, unused_macro_rules)] +macro_rules! t ( + ($path:expr, iter: $iter:expr) => ( + { + let path = Path::new($path); + + // Forward iteration + let comps = path.iter() + .map(|p| p.to_string_lossy().into_owned()) + .collect::>(); + let exp: &[&str] = &$iter; + let exps = exp.iter().map(|s| s.to_string()).collect::>(); + assert!(comps == exps, "iter: Expected {:?}, found {:?}", + exps, comps); + + // Reverse iteration + let comps = Path::new($path).iter().rev() + .map(|p| p.to_string_lossy().into_owned()) + .collect::>(); + let exps = exps.into_iter().rev().collect::>(); + assert!(comps == exps, "iter().rev(): Expected {:?}, found {:?}", + exps, comps); + } + ); + + ($path:expr, has_root: $has_root:expr, is_absolute: $is_absolute:expr) => ( + { + let path = Path::new($path); + + let act_root = path.has_root(); + assert!(act_root == $has_root, "has_root: Expected {:?}, found {:?}", + $has_root, act_root); + + let act_abs = path.is_absolute(); + assert!(act_abs == $is_absolute, "is_absolute: Expected {:?}, found {:?}", + $is_absolute, act_abs); + } + ); + + ($path:expr, parent: $parent:expr, file_name: $file:expr) => ( + { + let path = Path::new($path); + + let parent = path.parent().map(|p| p.to_str().unwrap()); + let exp_parent: Option<&str> = $parent; + assert!(parent == exp_parent, "parent: Expected {:?}, found {:?}", + exp_parent, parent); + + let file = path.file_name().map(|p| p.to_str().unwrap()); + let exp_file: Option<&str> = $file; + assert!(file == exp_file, "file_name: Expected {:?}, found {:?}", + exp_file, file); + } + ); + + ($path:expr, file_stem: $file_stem:expr, extension: $extension:expr) => ( + { + let path = Path::new($path); + + let stem = path.file_stem().map(|p| p.to_str().unwrap()); + let exp_stem: Option<&str> = $file_stem; + assert!(stem == exp_stem, "file_stem: Expected {:?}, found {:?}", + exp_stem, stem); + + let ext = path.extension().map(|p| p.to_str().unwrap()); + let exp_ext: Option<&str> = $extension; + assert!(ext == exp_ext, "extension: Expected {:?}, found {:?}", + exp_ext, ext); + } + ); + + ($path:expr, file_prefix: $file_prefix:expr, extension: $extension:expr) => ( + { + let path = Path::new($path); + + let prefix = path.file_prefix().map(|p| p.to_str().unwrap()); + let exp_prefix: Option<&str> = $file_prefix; + assert!(prefix == exp_prefix, "file_prefix: Expected {:?}, found {:?}", + exp_prefix, prefix); + + let ext = path.extension().map(|p| p.to_str().unwrap()); + let exp_ext: Option<&str> = $extension; + assert!(ext == exp_ext, "extension: Expected {:?}, found {:?}", + exp_ext, ext); + } + ); + + ($path:expr, iter: $iter:expr, + has_root: $has_root:expr, is_absolute: $is_absolute:expr, + parent: $parent:expr, file_name: $file:expr, + file_stem: $file_stem:expr, extension: $extension:expr, + file_prefix: $file_prefix:expr) => ( + { + t!($path, iter: $iter); + t!($path, has_root: $has_root, is_absolute: $is_absolute); + t!($path, parent: $parent, file_name: $file); + t!($path, file_stem: $file_stem, extension: $extension); + t!($path, file_prefix: $file_prefix, extension: $extension); + } + ); +); + +#[test] +fn into() { + use crate::borrow::Cow; + + let static_path = Path::new("/home/foo"); + let static_cow_path: Cow<'static, Path> = static_path.into(); + let pathbuf = PathBuf::from("/home/foo"); + + { + let path: &Path = &pathbuf; + let borrowed_cow_path: Cow<'_, Path> = path.into(); + + assert_eq!(static_cow_path, borrowed_cow_path); + } + + let owned_cow_path: Cow<'static, Path> = pathbuf.into(); + + assert_eq!(static_cow_path, owned_cow_path); +} + +#[test] +fn test_pathbuf_leak() { + let string = "/have/a/cake".to_owned(); + let (len, cap) = (string.len(), string.capacity()); + let buf = PathBuf::from(string); + let leaked = buf.leak(); + assert_eq!(leaked.as_os_str().as_encoded_bytes(), b"/have/a/cake"); + unsafe { drop(String::from_raw_parts(leaked.as_mut_os_str() as *mut OsStr as _, len, cap)) } +} + +#[test] +#[cfg(unix)] +pub fn test_decompositions_unix() { + t!("", + iter: [], + has_root: false, + is_absolute: false, + parent: None, + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("foo", + iter: ["foo"], + has_root: false, + is_absolute: false, + parent: Some(""), + file_name: Some("foo"), + file_stem: Some("foo"), + extension: None, + file_prefix: Some("foo") + ); + + t!("/", + iter: ["/"], + has_root: true, + is_absolute: true, + parent: None, + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("/foo", + iter: ["/", "foo"], + has_root: true, + is_absolute: true, + parent: Some("/"), + file_name: Some("foo"), + file_stem: Some("foo"), + extension: None, + file_prefix: Some("foo") + ); + + t!("foo/", + iter: ["foo"], + has_root: false, + is_absolute: false, + parent: Some(""), + file_name: Some("foo"), + file_stem: Some("foo"), + extension: None, + file_prefix: Some("foo") + ); + + t!("/foo/", + iter: ["/", "foo"], + has_root: true, + is_absolute: true, + parent: Some("/"), + file_name: Some("foo"), + file_stem: Some("foo"), + extension: None, + file_prefix: Some("foo") + ); + + t!("foo/bar", + iter: ["foo", "bar"], + has_root: false, + is_absolute: false, + parent: Some("foo"), + file_name: Some("bar"), + file_stem: Some("bar"), + extension: None, + file_prefix: Some("bar") + ); + + t!("/foo/bar", + iter: ["/", "foo", "bar"], + has_root: true, + is_absolute: true, + parent: Some("/foo"), + file_name: Some("bar"), + file_stem: Some("bar"), + extension: None, + file_prefix: Some("bar") + ); + + t!("///foo///", + iter: ["/", "foo"], + has_root: true, + is_absolute: true, + parent: Some("/"), + file_name: Some("foo"), + file_stem: Some("foo"), + extension: None, + file_prefix: Some("foo") + ); + + t!("///foo///bar", + iter: ["/", "foo", "bar"], + has_root: true, + is_absolute: true, + parent: Some("///foo"), + file_name: Some("bar"), + file_stem: Some("bar"), + extension: None, + file_prefix: Some("bar") + ); + + t!("./.", + iter: ["."], + has_root: false, + is_absolute: false, + parent: Some(""), + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("/..", + iter: ["/", ".."], + has_root: true, + is_absolute: true, + parent: Some("/"), + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("../", + iter: [".."], + has_root: false, + is_absolute: false, + parent: Some(""), + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("foo/.", + iter: ["foo"], + has_root: false, + is_absolute: false, + parent: Some(""), + file_name: Some("foo"), + file_stem: Some("foo"), + extension: None, + file_prefix: Some("foo") + ); + + t!("foo/..", + iter: ["foo", ".."], + has_root: false, + is_absolute: false, + parent: Some("foo"), + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("foo/./", + iter: ["foo"], + has_root: false, + is_absolute: false, + parent: Some(""), + file_name: Some("foo"), + file_stem: Some("foo"), + extension: None, + file_prefix: Some("foo") + ); + + t!("foo/./bar", + iter: ["foo", "bar"], + has_root: false, + is_absolute: false, + parent: Some("foo"), + file_name: Some("bar"), + file_stem: Some("bar"), + extension: None, + file_prefix: Some("bar") + ); + + t!("foo/../", + iter: ["foo", ".."], + has_root: false, + is_absolute: false, + parent: Some("foo"), + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("foo/../bar", + iter: ["foo", "..", "bar"], + has_root: false, + is_absolute: false, + parent: Some("foo/.."), + file_name: Some("bar"), + file_stem: Some("bar"), + extension: None, + file_prefix: Some("bar") + ); + + t!("./a", + iter: [".", "a"], + has_root: false, + is_absolute: false, + parent: Some("."), + file_name: Some("a"), + file_stem: Some("a"), + extension: None, + file_prefix: Some("a") + ); + + t!(".", + iter: ["."], + has_root: false, + is_absolute: false, + parent: Some(""), + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("./", + iter: ["."], + has_root: false, + is_absolute: false, + parent: Some(""), + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("a/b", + iter: ["a", "b"], + has_root: false, + is_absolute: false, + parent: Some("a"), + file_name: Some("b"), + file_stem: Some("b"), + extension: None, + file_prefix: Some("b") + ); + + t!("a//b", + iter: ["a", "b"], + has_root: false, + is_absolute: false, + parent: Some("a"), + file_name: Some("b"), + file_stem: Some("b"), + extension: None, + file_prefix: Some("b") + ); + + t!("a/./b", + iter: ["a", "b"], + has_root: false, + is_absolute: false, + parent: Some("a"), + file_name: Some("b"), + file_stem: Some("b"), + extension: None, + file_prefix: Some("b") + ); + + t!("a/b/c", + iter: ["a", "b", "c"], + has_root: false, + is_absolute: false, + parent: Some("a/b"), + file_name: Some("c"), + file_stem: Some("c"), + extension: None, + file_prefix: Some("c") + ); + + t!(".foo", + iter: [".foo"], + has_root: false, + is_absolute: false, + parent: Some(""), + file_name: Some(".foo"), + file_stem: Some(".foo"), + extension: None, + file_prefix: Some(".foo") + ); + + t!("a/.foo", + iter: ["a", ".foo"], + has_root: false, + is_absolute: false, + parent: Some("a"), + file_name: Some(".foo"), + file_stem: Some(".foo"), + extension: None, + file_prefix: Some(".foo") + ); + + t!("a/.rustfmt.toml", + iter: ["a", ".rustfmt.toml"], + has_root: false, + is_absolute: false, + parent: Some("a"), + file_name: Some(".rustfmt.toml"), + file_stem: Some(".rustfmt"), + extension: Some("toml"), + file_prefix: Some(".rustfmt") + ); + + t!("a/.x.y.z", + iter: ["a", ".x.y.z"], + has_root: false, + is_absolute: false, + parent: Some("a"), + file_name: Some(".x.y.z"), + file_stem: Some(".x.y"), + extension: Some("z"), + file_prefix: Some(".x") + ); +} + +#[test] +#[cfg(windows)] +pub fn test_decompositions_windows() { + t!("", + iter: [], + has_root: false, + is_absolute: false, + parent: None, + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("foo", + iter: ["foo"], + has_root: false, + is_absolute: false, + parent: Some(""), + file_name: Some("foo"), + file_stem: Some("foo"), + extension: None, + file_prefix: Some("foo") + ); + + t!("/", + iter: ["\\"], + has_root: true, + is_absolute: false, + parent: None, + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("\\", + iter: ["\\"], + has_root: true, + is_absolute: false, + parent: None, + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("c:", + iter: ["c:"], + has_root: false, + is_absolute: false, + parent: None, + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("c:\\", + iter: ["c:", "\\"], + has_root: true, + is_absolute: true, + parent: None, + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("c:/", + iter: ["c:", "\\"], + has_root: true, + is_absolute: true, + parent: None, + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("/foo", + iter: ["\\", "foo"], + has_root: true, + is_absolute: false, + parent: Some("/"), + file_name: Some("foo"), + file_stem: Some("foo"), + extension: None, + file_prefix: Some("foo") + ); + + t!("foo/", + iter: ["foo"], + has_root: false, + is_absolute: false, + parent: Some(""), + file_name: Some("foo"), + file_stem: Some("foo"), + extension: None, + file_prefix: Some("foo") + ); + + t!("/foo/", + iter: ["\\", "foo"], + has_root: true, + is_absolute: false, + parent: Some("/"), + file_name: Some("foo"), + file_stem: Some("foo"), + extension: None, + file_prefix: Some("foo") + ); + + t!("foo/bar", + iter: ["foo", "bar"], + has_root: false, + is_absolute: false, + parent: Some("foo"), + file_name: Some("bar"), + file_stem: Some("bar"), + extension: None, + file_prefix: Some("bar") + ); + + t!("/foo/bar", + iter: ["\\", "foo", "bar"], + has_root: true, + is_absolute: false, + parent: Some("/foo"), + file_name: Some("bar"), + file_stem: Some("bar"), + extension: None, + file_prefix: Some("bar") + ); + + t!("///foo///", + iter: ["\\", "foo"], + has_root: true, + is_absolute: false, + parent: Some("/"), + file_name: Some("foo"), + file_stem: Some("foo"), + extension: None, + file_prefix: Some("foo") + ); + + t!("///foo///bar", + iter: ["\\", "foo", "bar"], + has_root: true, + is_absolute: false, + parent: Some("///foo"), + file_name: Some("bar"), + file_stem: Some("bar"), + extension: None, + file_prefix: Some("bar") + ); + + t!("./.", + iter: ["."], + has_root: false, + is_absolute: false, + parent: Some(""), + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("/..", + iter: ["\\", ".."], + has_root: true, + is_absolute: false, + parent: Some("/"), + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("../", + iter: [".."], + has_root: false, + is_absolute: false, + parent: Some(""), + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("foo/.", + iter: ["foo"], + has_root: false, + is_absolute: false, + parent: Some(""), + file_name: Some("foo"), + file_stem: Some("foo"), + extension: None, + file_prefix: Some("foo") + ); + + t!("foo/..", + iter: ["foo", ".."], + has_root: false, + is_absolute: false, + parent: Some("foo"), + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("foo/./", + iter: ["foo"], + has_root: false, + is_absolute: false, + parent: Some(""), + file_name: Some("foo"), + file_stem: Some("foo"), + extension: None, + file_prefix: Some("foo") + ); + + t!("foo/./bar", + iter: ["foo", "bar"], + has_root: false, + is_absolute: false, + parent: Some("foo"), + file_name: Some("bar"), + file_stem: Some("bar"), + extension: None, + file_prefix: Some("bar") + ); + + t!("foo/../", + iter: ["foo", ".."], + has_root: false, + is_absolute: false, + parent: Some("foo"), + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("foo/../bar", + iter: ["foo", "..", "bar"], + has_root: false, + is_absolute: false, + parent: Some("foo/.."), + file_name: Some("bar"), + file_stem: Some("bar"), + extension: None, + file_prefix: Some("bar") + ); + + t!("./a", + iter: [".", "a"], + has_root: false, + is_absolute: false, + parent: Some("."), + file_name: Some("a"), + file_stem: Some("a"), + extension: None, + file_prefix: Some("a") + ); + + t!(".", + iter: ["."], + has_root: false, + is_absolute: false, + parent: Some(""), + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("./", + iter: ["."], + has_root: false, + is_absolute: false, + parent: Some(""), + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("a/b", + iter: ["a", "b"], + has_root: false, + is_absolute: false, + parent: Some("a"), + file_name: Some("b"), + file_stem: Some("b"), + extension: None, + file_prefix: Some("b") + ); + + t!("a//b", + iter: ["a", "b"], + has_root: false, + is_absolute: false, + parent: Some("a"), + file_name: Some("b"), + file_stem: Some("b"), + extension: None, + file_prefix: Some("b") + ); + + t!("a/./b", + iter: ["a", "b"], + has_root: false, + is_absolute: false, + parent: Some("a"), + file_name: Some("b"), + file_stem: Some("b"), + extension: None, + file_prefix: Some("b") + ); + + t!("a/b/c", + iter: ["a", "b", "c"], + has_root: false, + is_absolute: false, + parent: Some("a/b"), + file_name: Some("c"), + file_stem: Some("c"), + extension: None, + file_prefix: Some("c") + ); + + t!("a\\b\\c", + iter: ["a", "b", "c"], + has_root: false, + is_absolute: false, + parent: Some("a\\b"), + file_name: Some("c"), + file_stem: Some("c"), + extension: None, + file_prefix: Some("c") + ); + + t!("\\a", + iter: ["\\", "a"], + has_root: true, + is_absolute: false, + parent: Some("\\"), + file_name: Some("a"), + file_stem: Some("a"), + extension: None, + file_prefix: Some("a") + ); + + t!("c:\\foo.txt", + iter: ["c:", "\\", "foo.txt"], + has_root: true, + is_absolute: true, + parent: Some("c:\\"), + file_name: Some("foo.txt"), + file_stem: Some("foo"), + extension: Some("txt"), + file_prefix: Some("foo") + ); + + t!("\\\\server\\share\\foo.txt", + iter: ["\\\\server\\share", "\\", "foo.txt"], + has_root: true, + is_absolute: true, + parent: Some("\\\\server\\share\\"), + file_name: Some("foo.txt"), + file_stem: Some("foo"), + extension: Some("txt"), + file_prefix: Some("foo") + ); + + t!("\\\\server\\share", + iter: ["\\\\server\\share", "\\"], + has_root: true, + is_absolute: true, + parent: None, + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("\\\\server", + iter: ["\\", "server"], + has_root: true, + is_absolute: false, + parent: Some("\\"), + file_name: Some("server"), + file_stem: Some("server"), + extension: None, + file_prefix: Some("server") + ); + + t!("\\\\?\\bar\\foo.txt", + iter: ["\\\\?\\bar", "\\", "foo.txt"], + has_root: true, + is_absolute: true, + parent: Some("\\\\?\\bar\\"), + file_name: Some("foo.txt"), + file_stem: Some("foo"), + extension: Some("txt"), + file_prefix: Some("foo") + ); + + t!("\\\\?\\bar", + iter: ["\\\\?\\bar"], + has_root: true, + is_absolute: true, + parent: None, + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("\\\\?\\", + iter: ["\\\\?\\"], + has_root: true, + is_absolute: true, + parent: None, + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("\\\\?\\UNC\\server\\share\\foo.txt", + iter: ["\\\\?\\UNC\\server\\share", "\\", "foo.txt"], + has_root: true, + is_absolute: true, + parent: Some("\\\\?\\UNC\\server\\share\\"), + file_name: Some("foo.txt"), + file_stem: Some("foo"), + extension: Some("txt"), + file_prefix: Some("foo") + ); + + t!("\\\\?\\UNC\\server", + iter: ["\\\\?\\UNC\\server"], + has_root: true, + is_absolute: true, + parent: None, + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("\\\\?\\UNC\\", + iter: ["\\\\?\\UNC\\"], + has_root: true, + is_absolute: true, + parent: None, + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("\\\\?\\C:\\foo.txt", + iter: ["\\\\?\\C:", "\\", "foo.txt"], + has_root: true, + is_absolute: true, + parent: Some("\\\\?\\C:\\"), + file_name: Some("foo.txt"), + file_stem: Some("foo"), + extension: Some("txt"), + file_prefix: Some("foo") + ); + + t!("\\\\?\\C:\\", + iter: ["\\\\?\\C:", "\\"], + has_root: true, + is_absolute: true, + parent: None, + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("\\\\?\\C:", + iter: ["\\\\?\\C:"], + has_root: true, + is_absolute: true, + parent: None, + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("\\\\?\\foo/bar", + iter: ["\\\\?\\foo/bar"], + has_root: true, + is_absolute: true, + parent: None, + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("\\\\?\\C:/foo/bar", + iter: ["\\\\?\\C:", "\\", "foo/bar"], + has_root: true, + is_absolute: true, + parent: Some("\\\\?\\C:/"), + file_name: Some("foo/bar"), + file_stem: Some("foo/bar"), + extension: None, + file_prefix: Some("foo/bar") + ); + + t!("\\\\.\\foo\\bar", + iter: ["\\\\.\\foo", "\\", "bar"], + has_root: true, + is_absolute: true, + parent: Some("\\\\.\\foo\\"), + file_name: Some("bar"), + file_stem: Some("bar"), + extension: None, + file_prefix: Some("bar") + ); + + t!("\\\\.\\foo", + iter: ["\\\\.\\foo", "\\"], + has_root: true, + is_absolute: true, + parent: None, + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("\\\\.\\foo/bar", + iter: ["\\\\.\\foo", "\\", "bar"], + has_root: true, + is_absolute: true, + parent: Some("\\\\.\\foo/"), + file_name: Some("bar"), + file_stem: Some("bar"), + extension: None, + file_prefix: Some("bar") + ); + + t!("\\\\.\\foo\\bar/baz", + iter: ["\\\\.\\foo", "\\", "bar", "baz"], + has_root: true, + is_absolute: true, + parent: Some("\\\\.\\foo\\bar"), + file_name: Some("baz"), + file_stem: Some("baz"), + extension: None, + file_prefix: Some("baz") + ); + + t!("\\\\.\\", + iter: ["\\\\.\\", "\\"], + has_root: true, + is_absolute: true, + parent: None, + file_name: None, + file_stem: None, + extension: None, + file_prefix: None + ); + + t!("\\\\?\\a\\b\\", + iter: ["\\\\?\\a", "\\", "b"], + has_root: true, + is_absolute: true, + parent: Some("\\\\?\\a\\"), + file_name: Some("b"), + file_stem: Some("b"), + extension: None, + file_prefix: Some("b") + ); + + t!("\\\\?\\C:\\foo.txt.zip", + iter: ["\\\\?\\C:", "\\", "foo.txt.zip"], + has_root: true, + is_absolute: true, + parent: Some("\\\\?\\C:\\"), + file_name: Some("foo.txt.zip"), + file_stem: Some("foo.txt"), + extension: Some("zip"), + file_prefix: Some("foo") + ); + + t!("\\\\?\\C:\\.foo.txt.zip", + iter: ["\\\\?\\C:", "\\", ".foo.txt.zip"], + has_root: true, + is_absolute: true, + parent: Some("\\\\?\\C:\\"), + file_name: Some(".foo.txt.zip"), + file_stem: Some(".foo.txt"), + extension: Some("zip"), + file_prefix: Some(".foo") + ); + + t!("\\\\?\\C:\\.foo", + iter: ["\\\\?\\C:", "\\", ".foo"], + has_root: true, + is_absolute: true, + parent: Some("\\\\?\\C:\\"), + file_name: Some(".foo"), + file_stem: Some(".foo"), + extension: None, + file_prefix: Some(".foo") + ); + + t!("a/.x.y.z", + iter: ["a", ".x.y.z"], + has_root: false, + is_absolute: false, + parent: Some("a"), + file_name: Some(".x.y.z"), + file_stem: Some(".x.y"), + extension: Some("z"), + file_prefix: Some(".x") + ); +} + +#[test] +pub fn test_stem_ext() { + t!("foo", + file_stem: Some("foo"), + extension: None + ); + + t!("foo.", + file_stem: Some("foo"), + extension: Some("") + ); + + t!(".foo", + file_stem: Some(".foo"), + extension: None + ); + + t!("foo.txt", + file_stem: Some("foo"), + extension: Some("txt") + ); + + t!("foo.bar.txt", + file_stem: Some("foo.bar"), + extension: Some("txt") + ); + + t!("foo.bar.", + file_stem: Some("foo.bar"), + extension: Some("") + ); + + t!(".", file_stem: None, extension: None); + + t!("..", file_stem: None, extension: None); + + t!(".x.y.z", file_stem: Some(".x.y"), extension: Some("z")); + + t!("..x.y.z", file_stem: Some("..x.y"), extension: Some("z")); + + t!("", file_stem: None, extension: None); +} + +#[test] +pub fn test_prefix_ext() { + t!("foo", + file_prefix: Some("foo"), + extension: None + ); + + t!("foo.", + file_prefix: Some("foo"), + extension: Some("") + ); + + t!(".foo", + file_prefix: Some(".foo"), + extension: None + ); + + t!("foo.txt", + file_prefix: Some("foo"), + extension: Some("txt") + ); + + t!("foo.bar.txt", + file_prefix: Some("foo"), + extension: Some("txt") + ); + + t!("foo.bar.", + file_prefix: Some("foo"), + extension: Some("") + ); + + t!(".", file_prefix: None, extension: None); + + t!("..", file_prefix: None, extension: None); + + t!(".x.y.z", file_prefix: Some(".x"), extension: Some("z")); + + t!("..x.y.z", file_prefix: Some("."), extension: Some("z")); + + t!("", file_prefix: None, extension: None); +} + +#[test] +pub fn test_push() { + macro_rules! tp ( + ($path:expr, $push:expr, $expected:expr) => ({ + let mut actual = PathBuf::from($path); + actual.push($push); + assert!(actual.to_str() == Some($expected), + "pushing {:?} onto {:?}: Expected {:?}, got {:?}", + $push, $path, $expected, actual.to_str().unwrap()); + }); + ); + + if cfg!(unix) || cfg!(all(target_env = "sgx", target_vendor = "fortanix")) { + tp!("", "foo", "foo"); + tp!("foo", "bar", "foo/bar"); + tp!("foo/", "bar", "foo/bar"); + tp!("foo//", "bar", "foo//bar"); + tp!("foo/.", "bar", "foo/./bar"); + tp!("foo./.", "bar", "foo././bar"); + tp!("foo", "", "foo/"); + tp!("foo", ".", "foo/."); + tp!("foo", "..", "foo/.."); + tp!("foo", "/", "/"); + tp!("/foo/bar", "/", "/"); + tp!("/foo/bar", "/baz", "/baz"); + tp!("/foo/bar", "./baz", "/foo/bar/./baz"); + } else { + tp!("", "foo", "foo"); + tp!("foo", "bar", r"foo\bar"); + tp!("foo/", "bar", r"foo/bar"); + tp!(r"foo\", "bar", r"foo\bar"); + tp!("foo//", "bar", r"foo//bar"); + tp!(r"foo\\", "bar", r"foo\\bar"); + tp!("foo/.", "bar", r"foo/.\bar"); + tp!("foo./.", "bar", r"foo./.\bar"); + tp!(r"foo\.", "bar", r"foo\.\bar"); + tp!(r"foo.\.", "bar", r"foo.\.\bar"); + tp!("foo", "", "foo\\"); + tp!("foo", ".", r"foo\."); + tp!("foo", "..", r"foo\.."); + tp!("foo", "/", "/"); + tp!("foo", r"\", r"\"); + tp!("/foo/bar", "/", "/"); + tp!(r"\foo\bar", r"\", r"\"); + tp!("/foo/bar", "/baz", "/baz"); + tp!("/foo/bar", r"\baz", r"\baz"); + tp!("/foo/bar", "./baz", r"/foo/bar\./baz"); + tp!("/foo/bar", r".\baz", r"/foo/bar\.\baz"); + + tp!("c:\\", "windows", "c:\\windows"); + tp!("c:", "windows", "c:windows"); + + tp!("a\\b\\c", "d", "a\\b\\c\\d"); + tp!("\\a\\b\\c", "d", "\\a\\b\\c\\d"); + tp!("a\\b", "c\\d", "a\\b\\c\\d"); + tp!("a\\b", "\\c\\d", "\\c\\d"); + tp!("a\\b", ".", "a\\b\\."); + tp!("a\\b", "..\\c", "a\\b\\..\\c"); + tp!("a\\b", "C:a.txt", "C:a.txt"); + tp!("a\\b", "C:\\a.txt", "C:\\a.txt"); + tp!("C:\\a", "C:\\b.txt", "C:\\b.txt"); + tp!("C:\\a\\b\\c", "C:d", "C:d"); + tp!("C:a\\b\\c", "C:d", "C:d"); + tp!("C:", r"a\b\c", r"C:a\b\c"); + tp!("C:", r"..\a", r"C:..\a"); + tp!("\\\\server\\share\\foo", "bar", "\\\\server\\share\\foo\\bar"); + tp!("\\\\server\\share\\foo", "C:baz", "C:baz"); + tp!("\\\\?\\C:\\a\\b", "C:c\\d", "C:c\\d"); + tp!("\\\\?\\C:a\\b", "C:c\\d", "C:c\\d"); + tp!("\\\\?\\C:\\a\\b", "C:\\c\\d", "C:\\c\\d"); + tp!("\\\\?\\foo\\bar", "baz", "\\\\?\\foo\\bar\\baz"); + tp!("\\\\?\\UNC\\server\\share\\foo", "bar", "\\\\?\\UNC\\server\\share\\foo\\bar"); + tp!("\\\\?\\UNC\\server\\share", "C:\\a", "C:\\a"); + tp!("\\\\?\\UNC\\server\\share", "C:a", "C:a"); + + // Note: modified from old path API + tp!("\\\\?\\UNC\\server", "foo", "\\\\?\\UNC\\server\\foo"); + + tp!("C:\\a", "\\\\?\\UNC\\server\\share", "\\\\?\\UNC\\server\\share"); + tp!("\\\\.\\foo\\bar", "baz", "\\\\.\\foo\\bar\\baz"); + tp!("\\\\.\\foo\\bar", "C:a", "C:a"); + // again, not sure about the following, but I'm assuming \\.\ should be verbatim + tp!("\\\\.\\foo", "..\\bar", "\\\\.\\foo\\..\\bar"); + + tp!("\\\\?\\C:", "foo", "\\\\?\\C:\\foo"); // this is a weird one + + tp!(r"\\?\C:\bar", "../foo", r"\\?\C:\foo"); + tp!(r"\\?\C:\bar", "../../foo", r"\\?\C:\foo"); + tp!(r"\\?\C:\", "../foo", r"\\?\C:\foo"); + tp!(r"\\?\C:", r"D:\foo/./", r"D:\foo/./"); + tp!(r"\\?\C:", r"\\?\D:\foo\.\", r"\\?\D:\foo\.\"); + tp!(r"\\?\A:\x\y", "/foo", r"\\?\A:\foo"); + tp!(r"\\?\A:", r"..\foo\.", r"\\?\A:\foo"); + tp!(r"\\?\A:\x\y", r".\foo\.", r"\\?\A:\x\y\foo"); + tp!(r"\\?\A:\x\y", r"", r"\\?\A:\x\y\"); + } +} + +#[test] +pub fn test_pop() { + macro_rules! tp ( + ($path:expr, $expected:expr, $output:expr) => ({ + let mut actual = PathBuf::from($path); + let output = actual.pop(); + assert!(actual.to_str() == Some($expected) && output == $output, + "popping from {:?}: Expected {:?}/{:?}, got {:?}/{:?}", + $path, $expected, $output, + actual.to_str().unwrap(), output); + }); + ); + + tp!("", "", false); + tp!("/", "/", false); + tp!("foo", "", true); + tp!(".", "", true); + tp!("/foo", "/", true); + tp!("/foo/bar", "/foo", true); + tp!("foo/bar", "foo", true); + tp!("foo/.", "", true); + tp!("foo//bar", "foo", true); + + if cfg!(windows) { + tp!("a\\b\\c", "a\\b", true); + tp!("\\a", "\\", true); + tp!("\\", "\\", false); + + tp!("C:\\a\\b", "C:\\a", true); + tp!("C:\\a", "C:\\", true); + tp!("C:\\", "C:\\", false); + tp!("C:a\\b", "C:a", true); + tp!("C:a", "C:", true); + tp!("C:", "C:", false); + tp!("\\\\server\\share\\a\\b", "\\\\server\\share\\a", true); + tp!("\\\\server\\share\\a", "\\\\server\\share\\", true); + tp!("\\\\server\\share", "\\\\server\\share", false); + tp!("\\\\?\\a\\b\\c", "\\\\?\\a\\b", true); + tp!("\\\\?\\a\\b", "\\\\?\\a\\", true); + tp!("\\\\?\\a", "\\\\?\\a", false); + tp!("\\\\?\\C:\\a\\b", "\\\\?\\C:\\a", true); + tp!("\\\\?\\C:\\a", "\\\\?\\C:\\", true); + tp!("\\\\?\\C:\\", "\\\\?\\C:\\", false); + tp!("\\\\?\\UNC\\server\\share\\a\\b", "\\\\?\\UNC\\server\\share\\a", true); + tp!("\\\\?\\UNC\\server\\share\\a", "\\\\?\\UNC\\server\\share\\", true); + tp!("\\\\?\\UNC\\server\\share", "\\\\?\\UNC\\server\\share", false); + tp!("\\\\.\\a\\b\\c", "\\\\.\\a\\b", true); + tp!("\\\\.\\a\\b", "\\\\.\\a\\", true); + tp!("\\\\.\\a", "\\\\.\\a", false); + + tp!("\\\\?\\a\\b\\", "\\\\?\\a\\", true); + } +} + +#[test] +pub fn test_set_file_name() { + macro_rules! tfn ( + ($path:expr, $file:expr, $expected:expr) => ({ + let mut p = PathBuf::from($path); + p.set_file_name($file); + assert!(p.to_str() == Some($expected), + "setting file name of {:?} to {:?}: Expected {:?}, got {:?}", + $path, $file, $expected, + p.to_str().unwrap()); + }); + ); + + tfn!("foo", "foo", "foo"); + tfn!("foo", "bar", "bar"); + tfn!("foo", "", ""); + tfn!("", "foo", "foo"); + if cfg!(unix) || cfg!(all(target_env = "sgx", target_vendor = "fortanix")) { + tfn!(".", "foo", "./foo"); + tfn!("foo/", "bar", "bar"); + tfn!("foo/.", "bar", "bar"); + tfn!("..", "foo", "../foo"); + tfn!("foo/..", "bar", "foo/../bar"); + tfn!("/", "foo", "/foo"); + } else { + tfn!(".", "foo", r".\foo"); + tfn!(r"foo\", "bar", r"bar"); + tfn!(r"foo\.", "bar", r"bar"); + tfn!("..", "foo", r"..\foo"); + tfn!(r"foo\..", "bar", r"foo\..\bar"); + tfn!(r"\", "foo", r"\foo"); + } +} + +#[test] +pub fn test_set_extension() { + macro_rules! tfe ( + ($path:expr, $ext:expr, $expected:expr, $output:expr) => ({ + let mut p = PathBuf::from($path); + let output = p.set_extension($ext); + assert!(p.to_str() == Some($expected) && output == $output, + "setting extension of {:?} to {:?}: Expected {:?}/{:?}, got {:?}/{:?}", + $path, $ext, $expected, $output, + p.to_str().unwrap(), output); + }); + ); + + tfe!("foo", "txt", "foo.txt", true); + tfe!("foo.bar", "txt", "foo.txt", true); + tfe!("foo.bar.baz", "txt", "foo.bar.txt", true); + tfe!(".test", "txt", ".test.txt", true); + tfe!("foo.txt", "", "foo", true); + tfe!("foo", "", "foo", true); + tfe!("", "foo", "", false); + tfe!(".", "foo", ".", false); + tfe!("foo/", "bar", "foo.bar", true); + tfe!("foo/.", "bar", "foo.bar", true); + tfe!("..", "foo", "..", false); + tfe!("foo/..", "bar", "foo/..", false); + tfe!("/", "foo", "/", false); +} + +#[test] +pub fn test_add_extension() { + macro_rules! tfe ( + ($path:expr, $ext:expr, $expected:expr, $output:expr) => ({ + let mut p = PathBuf::from($path); + let output = p.add_extension($ext); + assert!(p.to_str() == Some($expected) && output == $output, + "adding extension of {:?} to {:?}: Expected {:?}/{:?}, got {:?}/{:?}", + $path, $ext, $expected, $output, + p.to_str().unwrap(), output); + }); + ); + + tfe!("foo", "txt", "foo.txt", true); + tfe!("foo.bar", "txt", "foo.bar.txt", true); + tfe!("foo.bar.baz", "txt", "foo.bar.baz.txt", true); + tfe!(".test", "txt", ".test.txt", true); + tfe!("foo.txt", "", "foo.txt", true); + tfe!("foo", "", "foo", true); + tfe!("", "foo", "", false); + tfe!(".", "foo", ".", false); + tfe!("foo/", "bar", "foo.bar", true); + tfe!("foo/.", "bar", "foo.bar", true); + tfe!("..", "foo", "..", false); + tfe!("foo/..", "bar", "foo/..", false); + tfe!("/", "foo", "/", false); + + // edge cases + tfe!("/foo.ext////", "bar", "/foo.ext.bar", true); +} + +#[test] +pub fn test_with_extension() { + macro_rules! twe ( + ($input:expr, $extension:expr, $expected:expr) => ({ + let input = Path::new($input); + let output = input.with_extension($extension); + + assert!( + output.to_str() == Some($expected), + "calling Path::new({:?}).with_extension({:?}): Expected {:?}, got {:?}", + $input, $extension, $expected, output, + ); + }); + ); + + twe!("foo", "txt", "foo.txt"); + twe!("foo.bar", "txt", "foo.txt"); + twe!("foo.bar.baz", "txt", "foo.bar.txt"); + twe!(".test", "txt", ".test.txt"); + twe!("foo.txt", "", "foo"); + twe!("foo", "", "foo"); + twe!("", "foo", ""); + twe!(".", "foo", "."); + twe!("foo/", "bar", "foo.bar"); + twe!("foo/.", "bar", "foo.bar"); + twe!("..", "foo", ".."); + twe!("foo/..", "bar", "foo/.."); + twe!("/", "foo", "/"); + + // New extension is smaller than file name + twe!("aaa_aaa_aaa", "bbb_bbb", "aaa_aaa_aaa.bbb_bbb"); + // New extension is greater than file name + twe!("bbb_bbb", "aaa_aaa_aaa", "bbb_bbb.aaa_aaa_aaa"); + + // New extension is smaller than previous extension + twe!("ccc.aaa_aaa_aaa", "bbb_bbb", "ccc.bbb_bbb"); + // New extension is greater than previous extension + twe!("ccc.bbb_bbb", "aaa_aaa_aaa", "ccc.aaa_aaa_aaa"); +} + +#[test] +pub fn test_with_added_extension() { + macro_rules! twe ( + ($input:expr, $extension:expr, $expected:expr) => ({ + let input = Path::new($input); + let output = input.with_added_extension($extension); + + assert!( + output.to_str() == Some($expected), + "calling Path::new({:?}).with_added_extension({:?}): Expected {:?}, got {:?}", + $input, $extension, $expected, output, + ); + }); + ); + + twe!("foo", "txt", "foo.txt"); + twe!("foo.bar", "txt", "foo.bar.txt"); + twe!("foo.bar.baz", "txt", "foo.bar.baz.txt"); + twe!(".test", "txt", ".test.txt"); + twe!("foo.txt", "", "foo.txt"); + twe!("foo", "", "foo"); + twe!("", "foo", ""); + twe!(".", "foo", "."); + twe!("foo/", "bar", "foo.bar"); + twe!("foo/.", "bar", "foo.bar"); + twe!("..", "foo", ".."); + twe!("foo/..", "bar", "foo/.."); + twe!("/", "foo", "/"); + + // edge cases + twe!("/foo.ext////", "bar", "/foo.ext.bar"); + + // New extension is smaller than file name + twe!("aaa_aaa_aaa", "bbb_bbb", "aaa_aaa_aaa.bbb_bbb"); + // New extension is greater than file name + twe!("bbb_bbb", "aaa_aaa_aaa", "bbb_bbb.aaa_aaa_aaa"); + + // New extension is smaller than previous extension + twe!("ccc.aaa_aaa_aaa", "bbb_bbb", "ccc.aaa_aaa_aaa.bbb_bbb"); + // New extension is greater than previous extension + twe!("ccc.bbb_bbb", "aaa_aaa_aaa", "ccc.bbb_bbb.aaa_aaa_aaa"); +} + +#[test] +fn test_eq_receivers() { + use crate::borrow::Cow; + + let borrowed: &Path = Path::new("foo/bar"); + let mut owned: PathBuf = PathBuf::new(); + owned.push("foo"); + owned.push("bar"); + let borrowed_cow: Cow<'_, Path> = borrowed.into(); + let owned_cow: Cow<'_, Path> = owned.clone().into(); + + macro_rules! t { + ($($current:expr),+) => { + $( + assert_eq!($current, borrowed); + assert_eq!($current, owned); + assert_eq!($current, borrowed_cow); + assert_eq!($current, owned_cow); + )+ + } + } + + t!(borrowed, owned, borrowed_cow, owned_cow); +} + +#[test] +fn into_boxed() { + let orig: &str = "some/sort/of/path"; + let path = Path::new(orig); + let boxed: Box = Box::from(path); + let path_buf = path.to_owned().into_boxed_path().into_path_buf(); + assert_eq!(path, &*boxed); + assert_eq!(&*boxed, &*path_buf); + assert_eq!(&*path_buf, path); +} + +#[test] +fn test_clone_into() { + let mut path_buf = PathBuf::from("supercalifragilisticexpialidocious"); + let path = Path::new("short"); + path.clone_into(&mut path_buf); + assert_eq!(path, path_buf); + assert!(path_buf.into_os_string().capacity() >= 15); +} + +#[test] +fn into_rc() { + let orig = "hello/world"; + let path = Path::new(orig); + let rc: Rc = Rc::from(path); + let arc: Arc = Arc::from(path); + + assert_eq!(&*rc, path); + assert_eq!(&*arc, path); + + let rc2: Rc = Rc::from(path.to_owned()); + let arc2: Arc = Arc::from(path.to_owned()); + + assert_eq!(&*rc2, path); + assert_eq!(&*arc2, path); +} + +#[test] +#[should_panic = "path separator"] +fn test_extension_path_sep() { + let mut path = PathBuf::from("path/to/file"); + path.set_extension("d/../../../../../etc/passwd"); +} + +#[test] +#[should_panic = "path separator"] +#[cfg(windows)] +fn test_extension_path_sep_alternate() { + let mut path = PathBuf::from("path/to/file"); + path.set_extension("d\\test"); +} + +#[test] +#[cfg(not(windows))] +fn test_extension_path_sep_alternate() { + let mut path = PathBuf::from("path/to/file"); + path.set_extension("d\\test"); + assert_eq!(path, Path::new("path/to/file.d\\test")); +} + +#[bench] +#[cfg_attr(miri, ignore)] // Miri isn't fast... +fn bench_path_cmp_fast_path_buf_sort(b: &mut test::Bencher) { + let prefix = "my/home"; + let mut paths: Vec<_> = + (0..1000).map(|num| PathBuf::from(prefix).join(format!("file {num}.rs"))).collect(); + + paths.sort(); + + b.iter(|| { + black_box(paths.as_mut_slice()).sort_unstable(); + }); +} + +#[bench] +#[cfg_attr(miri, ignore)] // Miri isn't fast... +fn bench_path_cmp_fast_path_long(b: &mut test::Bencher) { + let prefix = "/my/home/is/my/castle/and/my/castle/has/a/rusty/workbench/"; + let paths: Vec<_> = + (0..1000).map(|num| PathBuf::from(prefix).join(format!("file {num}.rs"))).collect(); + + let mut set = BTreeSet::new(); + + paths.iter().for_each(|p| { + set.insert(p.as_path()); + }); + + b.iter(|| { + set.remove(paths[500].as_path()); + set.insert(paths[500].as_path()); + }); +} + +#[bench] +#[cfg_attr(miri, ignore)] // Miri isn't fast... +fn bench_path_cmp_fast_path_short(b: &mut test::Bencher) { + let prefix = "my/home"; + let paths: Vec<_> = + (0..1000).map(|num| PathBuf::from(prefix).join(format!("file {num}.rs"))).collect(); + + let mut set = BTreeSet::new(); + + paths.iter().for_each(|p| { + set.insert(p.as_path()); + }); + + b.iter(|| { + set.remove(paths[500].as_path()); + set.insert(paths[500].as_path()); + }); +} + +#[bench] +#[cfg_attr(miri, ignore)] // Miri isn't fast... +fn bench_path_hashset(b: &mut test::Bencher) { + let prefix = "/my/home/is/my/castle/and/my/castle/has/a/rusty/workbench/"; + let paths: Vec<_> = + (0..1000).map(|num| PathBuf::from(prefix).join(format!("file {num}.rs"))).collect(); + + let mut set = HashSet::new(); + + paths.iter().for_each(|p| { + set.insert(p.as_path()); + }); + + b.iter(|| { + set.remove(paths[500].as_path()); + set.insert(black_box(paths[500].as_path())) + }); +} + +#[bench] +#[cfg_attr(miri, ignore)] // Miri isn't fast... +fn bench_path_hashset_miss(b: &mut test::Bencher) { + let prefix = "/my/home/is/my/castle/and/my/castle/has/a/rusty/workbench/"; + let paths: Vec<_> = + (0..1000).map(|num| PathBuf::from(prefix).join(format!("file {num}.rs"))).collect(); + + let mut set = HashSet::new(); + + paths.iter().for_each(|p| { + set.insert(p.as_path()); + }); + + let probe = PathBuf::from(prefix).join("other"); + + b.iter(|| set.remove(black_box(probe.as_path()))); +} diff --git a/library/core/src/ffi/mod.rs b/library/core/src/ffi/mod.rs index ec1f9052a1564..b0d7993530118 100644 --- a/library/core/src/ffi/mod.rs +++ b/library/core/src/ffi/mod.rs @@ -23,6 +23,22 @@ use crate::fmt; #[unstable(feature = "c_str_module", issue = "112134")] pub mod c_str; +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +#[doc(hidden)] +pub mod os_str; + +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +#[doc(hidden)] +pub mod wtf8; + #[unstable( feature = "c_variadic", issue = "44930", diff --git a/library/core/src/ffi/os_str.rs b/library/core/src/ffi/os_str.rs new file mode 100644 index 0000000000000..f010d7f0e7f0c --- /dev/null +++ b/library/core/src/ffi/os_str.rs @@ -0,0 +1,626 @@ +//! [`OsStr`] abd their related types. + +use crate::clone::CloneToUninit; +use crate::hash::{Hash, Hasher}; +use crate::ops::{self, Range}; +use crate::ptr::addr_of_mut; +use crate::{cmp, fmt, slice}; + +mod private { + /// This trait being unreachable from outside the crate + /// prevents outside implementations of our extension traits. + /// This allows adding more trait methods in the future. + #[unstable(feature = "sealed", issue = "none")] + pub trait Sealed {} +} + +#[cfg(any(target_os = "windows", target_os = "uefi"))] +mod wtf8; + +#[cfg(any(target_os = "windows", target_os = "uefi"))] +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +#[doc(hidden)] +pub use wtf8::Slice; + +#[cfg(not(any(target_os = "windows", target_os = "uefi")))] +mod bytes; + +#[cfg(not(any(target_os = "windows", target_os = "uefi")))] +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +#[doc(hidden)] +pub use bytes::Slice; + +#[cfg(any(target_os = "windows", target_os = "uefi"))] +#[stable(feature = "rust1", since = "1.0.0")] +pub mod os_str_ext_windows; + +#[cfg(not(any(target_os = "windows", target_os = "uefi")))] +#[stable(feature = "rust1", since = "1.0.0")] +pub mod os_str_ext_unix; + +/// Borrowed reference to an OS string (see [`OsString`]). +/// +/// This type represents a borrowed reference to a string in the operating system's preferred +/// representation. +/// +/// `&OsStr` is to [`OsString`] as &[str] is to [`String`]: the +/// former in each pair are borrowed references; the latter are owned strings. +/// +/// See the [module's toplevel documentation about conversions][conversions] for a discussion on +/// the traits which `OsStr` implements for [conversions] from/to native representations. +/// +/// [conversions]: super#conversions +#[cfg_attr(not(test), rustc_diagnostic_item = "OsStr")] +#[stable(feature = "rust1", since = "1.0.0")] +// `OsStr::from_inner` current implementation relies +// on `OsStr` being layout-compatible with `Slice`. +// However, `OsStr` layout is considered an implementation detail and must not be relied upon. +#[repr(transparent)] +#[rustc_has_incoherent_inherent_impls] +pub struct OsStr { + inner: Slice, +} + +/// Allows extension traits within `std`. +#[unstable(feature = "sealed", issue = "none")] +impl private::Sealed for OsStr {} + +impl OsStr { + /// Coerces into an `OsStr` slice. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsStr; + /// + /// let os_str = OsStr::new("foo"); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn new + ?Sized>(s: &S) -> &OsStr { + s.as_ref() + } + + /// Converts a slice of bytes to an OS string slice without checking that the string contains + /// valid `OsStr`-encoded data. + /// + /// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8. + /// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit + /// ASCII. + /// + /// See the [module's toplevel documentation about conversions][conversions] for safe, + /// cross-platform [conversions] from/to native representations. + /// + /// # Safety + /// + /// As the encoding is unspecified, callers must pass in bytes that originated as a mixture of + /// validated UTF-8 and bytes from [`OsStr::as_encoded_bytes`] from within the same Rust version + /// built for the same target platform. For example, reconstructing an `OsStr` from bytes sent + /// over the network or stored in a file will likely violate these safety rules. + /// + /// Due to the encoding being self-synchronizing, the bytes from [`OsStr::as_encoded_bytes`] can be + /// split either immediately before or immediately after any valid non-empty UTF-8 substring. + /// + /// # Example + /// + /// ``` + /// use std::ffi::OsStr; + /// + /// let os_str = OsStr::new("Mary had a little lamb"); + /// let bytes = os_str.as_encoded_bytes(); + /// let words = bytes.split(|b| *b == b' '); + /// let words: Vec<&OsStr> = words.map(|word| { + /// // SAFETY: + /// // - Each `word` only contains content that originated from `OsStr::as_encoded_bytes` + /// // - Only split with ASCII whitespace which is a non-empty UTF-8 substring + /// unsafe { OsStr::from_encoded_bytes_unchecked(word) } + /// }).collect(); + /// ``` + /// + /// [conversions]: super#conversions + #[inline] + #[stable(feature = "os_str_bytes", since = "1.74.0")] + pub unsafe fn from_encoded_bytes_unchecked(bytes: &[u8]) -> &Self { + // SAFETY: unsafe fn + Self::from_inner(unsafe { Slice::from_encoded_bytes_unchecked(bytes) }) + } + + /// Create immutable [`OsStr`] from [`Slice`] + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + #[doc(hidden)] + pub fn from_inner(inner: &Slice) -> &OsStr { + // SAFETY: OsStr is just a wrapper of Slice, + // therefore converting &Slice to &OsStr is safe. + unsafe { &*(inner as *const Slice as *const OsStr) } + } + + /// Create mutable [`OsStr`] from [`Slice`] + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + #[doc(hidden)] + pub fn from_inner_mut(inner: &mut Slice) -> &mut OsStr { + // SAFETY: OsStr is just a wrapper of Slice, + // therefore converting &mut Slice to &mut OsStr is safe. + // Any method that mutates OsStr must be careful not to + // break platform-specific encoding, in particular Wtf8 on Windows. + unsafe { &mut *(inner as *mut Slice as *mut OsStr) } + } + + /// Yields a &[str] slice if the `OsStr` is valid Unicode. + /// + /// This conversion may entail doing a check for UTF-8 validity. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsStr; + /// + /// let os_str = OsStr::new("foo"); + /// assert_eq!(os_str.to_str(), Some("foo")); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[must_use = "this returns the result of the operation, \ + without modifying the original"] + #[inline] + pub fn to_str(&self) -> Option<&str> { + self.inner.to_str().ok() + } + + /// Checks whether the `OsStr` is empty. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsStr; + /// + /// let os_str = OsStr::new(""); + /// assert!(os_str.is_empty()); + /// + /// let os_str = OsStr::new("foo"); + /// assert!(!os_str.is_empty()); + /// ``` + #[stable(feature = "osstring_simple_functions", since = "1.9.0")] + #[must_use] + #[inline] + pub fn is_empty(&self) -> bool { + self.inner.inner.is_empty() + } + + /// Returns the length of this `OsStr`. + /// + /// Note that this does **not** return the number of bytes in the string in + /// OS string form. + /// + /// The length returned is that of the underlying storage used by `OsStr`. + /// As discussed in the [`OsString`] introduction, [`OsString`] and `OsStr` + /// store strings in a form best suited for cheap inter-conversion between + /// native-platform and Rust string forms, which may differ significantly + /// from both of them, including in storage size and encoding. + /// + /// This number is simply useful for passing to other methods, like + /// [`OsString::with_capacity`] to avoid reallocations. + /// + /// See the main `OsString` documentation information about encoding and capacity units. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsStr; + /// + /// let os_str = OsStr::new(""); + /// assert_eq!(os_str.len(), 0); + /// + /// let os_str = OsStr::new("foo"); + /// assert_eq!(os_str.len(), 3); + /// ``` + #[stable(feature = "osstring_simple_functions", since = "1.9.0")] + #[must_use] + #[inline] + pub fn len(&self) -> usize { + self.inner.inner.len() + } + + /// Converts an OS string slice to a byte slice. To convert the byte slice back into an OS + /// string slice, use the [`OsStr::from_encoded_bytes_unchecked`] function. + /// + /// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8. + /// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit + /// ASCII. + /// + /// Note: As the encoding is unspecified, any sub-slice of bytes that is not valid UTF-8 should + /// be treated as opaque and only comparable within the same Rust version built for the same + /// target platform. For example, sending the slice over the network or storing it in a file + /// will likely result in incompatible byte slices. See [`OsString`] for more encoding details + /// and [`std::ffi`] for platform-specific, specified conversions. + /// + /// [`std::ffi`]: crate::ffi + #[inline] + #[stable(feature = "os_str_bytes", since = "1.74.0")] + pub fn as_encoded_bytes(&self) -> &[u8] { + self.inner.as_encoded_bytes() + } + + /// Takes a substring based on a range that corresponds to the return value of + /// [`OsStr::as_encoded_bytes`]. + /// + /// The range's start and end must lie on valid `OsStr` boundaries. + /// A valid `OsStr` boundary is one of: + /// - The start of the string + /// - The end of the string + /// - Immediately before a valid non-empty UTF-8 substring + /// - Immediately after a valid non-empty UTF-8 substring + /// + /// # Panics + /// + /// Panics if `range` does not lie on valid `OsStr` boundaries or if it + /// exceeds the end of the string. + /// + /// # Example + /// + /// ``` + /// #![feature(os_str_slice)] + /// + /// use std::ffi::OsStr; + /// + /// let os_str = OsStr::new("foo=bar"); + /// let bytes = os_str.as_encoded_bytes(); + /// if let Some(index) = bytes.iter().position(|b| *b == b'=') { + /// let key = os_str.slice_encoded_bytes(..index); + /// let value = os_str.slice_encoded_bytes(index + 1..); + /// assert_eq!(key, "foo"); + /// assert_eq!(value, "bar"); + /// } + /// ``` + #[unstable(feature = "os_str_slice", issue = "118485")] + pub fn slice_encoded_bytes>(&self, range: R) -> &Self { + let encoded_bytes = self.as_encoded_bytes(); + let Range { start, end } = slice::range(range, ..encoded_bytes.len()); + + // `check_public_boundary` should panic if the index does not lie on an + // `OsStr` boundary as described above. It's possible to do this in an + // encoding-agnostic way, but details of the internal encoding might + // permit a more efficient implementation. + self.inner.check_public_boundary(start); + self.inner.check_public_boundary(end); + + // SAFETY: `slice::range` ensures that `start` and `end` are valid + let slice = unsafe { encoded_bytes.get_unchecked(start..end) }; + + // SAFETY: `slice` comes from `self` and we validated the boundaries + unsafe { Self::from_encoded_bytes_unchecked(slice) } + } + + /// Converts this string to its ASCII lower case equivalent in-place. + /// + /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', + /// but non-ASCII letters are unchanged. + /// + /// To return a new lowercased value without modifying the existing one, use + /// [`OsStr::to_ascii_lowercase`]. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsString; + /// + /// let mut s = OsString::from("GRÜßE, JÜRGEN ❤"); + /// + /// s.make_ascii_lowercase(); + /// + /// assert_eq!("grÜße, jÜrgen ❤", s); + /// ``` + #[stable(feature = "osstring_ascii", since = "1.53.0")] + #[inline] + pub fn make_ascii_lowercase(&mut self) { + self.inner.make_ascii_lowercase() + } + + /// Converts this string to its ASCII upper case equivalent in-place. + /// + /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', + /// but non-ASCII letters are unchanged. + /// + /// To return a new uppercased value without modifying the existing one, use + /// [`OsStr::to_ascii_uppercase`]. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsString; + /// + /// let mut s = OsString::from("Grüße, Jürgen ❤"); + /// + /// s.make_ascii_uppercase(); + /// + /// assert_eq!("GRüßE, JüRGEN ❤", s); + /// ``` + #[stable(feature = "osstring_ascii", since = "1.53.0")] + #[inline] + pub fn make_ascii_uppercase(&mut self) { + self.inner.make_ascii_uppercase() + } + + /// Checks if all characters in this string are within the ASCII range. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsString; + /// + /// let ascii = OsString::from("hello!\n"); + /// let non_ascii = OsString::from("Grüße, Jürgen ❤"); + /// + /// assert!(ascii.is_ascii()); + /// assert!(!non_ascii.is_ascii()); + /// ``` + #[stable(feature = "osstring_ascii", since = "1.53.0")] + #[must_use] + #[inline] + pub fn is_ascii(&self) -> bool { + self.inner.is_ascii() + } + + /// Checks that two strings are an ASCII case-insensitive match. + /// + /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`, + /// but without allocating and copying temporaries. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsString; + /// + /// assert!(OsString::from("Ferris").eq_ignore_ascii_case("FERRIS")); + /// assert!(OsString::from("Ferrös").eq_ignore_ascii_case("FERRöS")); + /// assert!(!OsString::from("Ferrös").eq_ignore_ascii_case("FERRÖS")); + /// ``` + #[stable(feature = "osstring_ascii", since = "1.53.0")] + pub fn eq_ignore_ascii_case>(&self, other: S) -> bool { + self.inner.eq_ignore_ascii_case(&other.as_ref().inner) + } + + /// Returns an object that implements [`Display`] for safely printing an + /// [`OsStr`] that may contain non-Unicode data. This may perform lossy + /// conversion, depending on the platform. If you would like an + /// implementation which escapes the [`OsStr`] please use [`Debug`] + /// instead. + /// + /// [`Display`]: fmt::Display + /// [`Debug`]: fmt::Debug + /// + /// # Examples + /// + /// ``` + /// #![feature(os_str_display)] + /// use std::ffi::OsStr; + /// + /// let s = OsStr::new("Hello, world!"); + /// println!("{}", s.display()); + /// ``` + #[unstable(feature = "os_str_display", issue = "120048")] + #[must_use = "this does not display the `OsStr`; \ + it returns an object that can be displayed"] + #[inline] + pub fn display(&self) -> Display<'_> { + Display { os_str: self } + } + + /// Get inner slice + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + #[doc(hidden)] + pub fn as_inner(&self) -> &Slice { + &self.inner + } +} + +#[unstable(feature = "clone_to_uninit", issue = "126799")] +unsafe impl CloneToUninit for OsStr { + #[inline] + #[cfg_attr(debug_assertions, track_caller)] + unsafe fn clone_to_uninit(&self, dst: *mut Self) { + // SAFETY: we're just a wrapper around a platform-specific Slice + unsafe { self.inner.clone_to_uninit(addr_of_mut!((*dst).inner)) } + } +} + +#[stable(feature = "str_tryfrom_osstr_impl", since = "1.72.0")] +impl<'a> TryFrom<&'a OsStr> for &'a str { + type Error = crate::str::Utf8Error; + + /// Tries to convert an `&OsStr` to a `&str`. + /// + /// ``` + /// use std::ffi::OsStr; + /// + /// let os_str = OsStr::new("foo"); + /// let as_str = <&str>::try_from(os_str).unwrap(); + /// assert_eq!(as_str, "foo"); + /// ``` + fn try_from(value: &'a OsStr) -> Result { + value.inner.to_str() + } +} + +#[stable(feature = "osstring_default", since = "1.9.0")] +impl Default for &OsStr { + /// Creates an empty `OsStr`. + #[inline] + fn default() -> Self { + OsStr::new("") + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl PartialEq for OsStr { + #[inline] + fn eq(&self, other: &OsStr) -> bool { + self.as_encoded_bytes().eq(other.as_encoded_bytes()) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl PartialEq for OsStr { + #[inline] + fn eq(&self, other: &str) -> bool { + *self == *OsStr::new(other) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl PartialEq for str { + #[inline] + fn eq(&self, other: &OsStr) -> bool { + *other == *OsStr::new(self) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Eq for OsStr {} + +#[stable(feature = "rust1", since = "1.0.0")] +impl PartialOrd for OsStr { + #[inline] + fn partial_cmp(&self, other: &OsStr) -> Option { + self.as_encoded_bytes().partial_cmp(other.as_encoded_bytes()) + } + #[inline] + fn lt(&self, other: &OsStr) -> bool { + self.as_encoded_bytes().lt(other.as_encoded_bytes()) + } + #[inline] + fn le(&self, other: &OsStr) -> bool { + self.as_encoded_bytes().le(other.as_encoded_bytes()) + } + #[inline] + fn gt(&self, other: &OsStr) -> bool { + self.as_encoded_bytes().gt(other.as_encoded_bytes()) + } + #[inline] + fn ge(&self, other: &OsStr) -> bool { + self.as_encoded_bytes().ge(other.as_encoded_bytes()) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl PartialOrd for OsStr { + #[inline] + fn partial_cmp(&self, other: &str) -> Option { + self.partial_cmp(OsStr::new(other)) + } +} + +// FIXME (#19470): cannot provide PartialOrd for str until we +// have more flexible coherence rules. + +#[stable(feature = "rust1", since = "1.0.0")] +impl Ord for OsStr { + #[inline] + fn cmp(&self, other: &OsStr) -> cmp::Ordering { + self.as_encoded_bytes().cmp(other.as_encoded_bytes()) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Hash for OsStr { + #[inline] + fn hash(&self, state: &mut H) { + self.as_encoded_bytes().hash(state) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Debug for OsStr { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&self.inner, formatter) + } +} + +/// Helper struct for safely printing an [`OsStr`] with [`format!`] and `{}`. +/// +/// An [`OsStr`] might contain non-Unicode data. This `struct` implements the +/// [`Display`] trait in a way that mitigates that. It is created by the +/// [`display`](OsStr::display) method on [`OsStr`]. This may perform lossy +/// conversion, depending on the platform. If you would like an implementation +/// which escapes the [`OsStr`] please use [`Debug`] instead. +/// +/// # Examples +/// +/// ``` +/// #![feature(os_str_display)] +/// use std::ffi::OsStr; +/// +/// let s = OsStr::new("Hello, world!"); +/// println!("{}", s.display()); +/// ``` +/// +/// [`Display`]: fmt::Display +/// [`format!`]: crate::format +#[unstable(feature = "os_str_display", issue = "120048")] +pub struct Display<'a> { + os_str: &'a OsStr, +} + +#[unstable(feature = "os_str_display", issue = "120048")] +impl fmt::Debug for Display<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&self.os_str, f) + } +} + +#[unstable(feature = "os_str_display", issue = "120048")] +impl fmt::Display for Display<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&self.os_str.inner, f) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl AsRef for OsStr { + #[inline] + fn as_ref(&self) -> &OsStr { + self + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl AsRef for str { + #[inline] + fn as_ref(&self) -> &OsStr { + OsStr::from_inner(Slice::from_str(self)) + } +} + +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +#[doc(hidden)] +impl AsRef for OsStr { + #[inline] + fn as_ref(&self) -> &Slice { + &self.inner + } +} diff --git a/library/core/src/ffi/os_str/bytes.rs b/library/core/src/ffi/os_str/bytes.rs new file mode 100644 index 0000000000000..d51045878cd23 --- /dev/null +++ b/library/core/src/ffi/os_str/bytes.rs @@ -0,0 +1,202 @@ +#![allow(missing_docs)] +#![allow(missing_debug_implementations)] + +//! The underlying OsString/OsStr implementation on Unix and many other +//! systems: just a `Vec`/`[u8]`. + +use crate::clone::CloneToUninit; +use crate::fmt::Write; +use crate::ptr::addr_of_mut; +use crate::{fmt, mem, str}; + +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +#[repr(transparent)] +#[rustc_has_incoherent_inherent_impls] +pub struct Slice { + pub inner: [u8], +} + +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +impl fmt::Debug for Slice { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&self.inner.utf8_chunks().debug(), f) + } +} + +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +impl fmt::Display for Slice { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // If we're the empty string then our iterator won't actually yield + // anything, so perform the formatting manually + if self.inner.is_empty() { + return "".fmt(f); + } + + for chunk in self.inner.utf8_chunks() { + let valid = chunk.valid(); + // If we successfully decoded the whole chunk as a valid string then + // we can return a direct formatting of the string which will also + // respect various formatting flags if possible. + if chunk.invalid().is_empty() { + return valid.fmt(f); + } + + f.write_str(valid)?; + f.write_char(char::REPLACEMENT_CHARACTER)?; + } + Ok(()) + } +} + +impl Slice { + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn as_encoded_bytes(&self) -> &[u8] { + &self.inner + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub unsafe fn from_encoded_bytes_unchecked(s: &[u8]) -> &Slice { + // SAFETY: Slice is just a wrapper of [u8] + unsafe { mem::transmute(s) } + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[track_caller] + #[inline] + pub fn check_public_boundary(&self, index: usize) { + if index == 0 || index == self.inner.len() { + return; + } + if index < self.inner.len() + && (self.inner[index - 1].is_ascii() || self.inner[index].is_ascii()) + { + return; + } + + slow_path(&self.inner, index); + + /// We're betting that typical splits will involve an ASCII character. + /// + /// Putting the expensive checks in a separate function generates notably + /// better assembly. + #[track_caller] + #[inline(never)] + fn slow_path(bytes: &[u8], index: usize) { + let (before, after) = bytes.split_at(index); + + // UTF-8 takes at most 4 bytes per codepoint, so we don't + // need to check more than that. + let after = after.get(..4).unwrap_or(after); + match str::from_utf8(after) { + Ok(_) => return, + Err(err) if err.valid_up_to() != 0 => return, + Err(_) => (), + } + + for len in 2..=4.min(index) { + let before = &before[index - len..]; + if str::from_utf8(before).is_ok() { + return; + } + } + + panic!("byte index {index} is not an OsStr boundary"); + } + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn from_str(s: &str) -> &Slice { + // SAFETY: Slice is just a wrapper of [u8] + unsafe { Slice::from_encoded_bytes_unchecked(s.as_bytes()) } + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn to_str(&self) -> Result<&str, crate::str::Utf8Error> { + str::from_utf8(&self.inner) + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn make_ascii_lowercase(&mut self) { + self.inner.make_ascii_lowercase() + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn make_ascii_uppercase(&mut self) { + self.inner.make_ascii_uppercase() + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn is_ascii(&self) -> bool { + self.inner.is_ascii() + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { + self.inner.eq_ignore_ascii_case(&other.inner) + } +} + +#[unstable(feature = "clone_to_uninit", issue = "126799")] +unsafe impl CloneToUninit for Slice { + #[inline] + #[cfg_attr(debug_assertions, track_caller)] + unsafe fn clone_to_uninit(&self, dst: *mut Self) { + // SAFETY: we're just a wrapper around [u8] + unsafe { self.inner.clone_to_uninit(addr_of_mut!((*dst).inner)) } + } +} diff --git a/library/core/src/ffi/os_str/os_str_ext_unix.rs b/library/core/src/ffi/os_str/os_str_ext_unix.rs new file mode 100644 index 0000000000000..f2040c62ff77c --- /dev/null +++ b/library/core/src/ffi/os_str/os_str_ext_unix.rs @@ -0,0 +1,36 @@ +//! [`OsStrExt`] for unix. + +use super::{private, OsStr}; +use crate::mem; + +/// Platform-specific extensions to [`OsStr`]. +/// +/// This trait is sealed: it cannot be implemented outside the standard library. +/// This is so that future additional methods are not breaking changes. +#[stable(feature = "rust1", since = "1.0.0")] +pub trait OsStrExt: private::Sealed { + #[stable(feature = "rust1", since = "1.0.0")] + /// Creates an [`OsStr`] from a byte slice. + /// + /// See the module documentation for an example. + fn from_bytes(slice: &[u8]) -> &Self; + + /// Gets the underlying byte view of the [`OsStr`] slice. + /// + /// See the module documentation for an example. + #[stable(feature = "rust1", since = "1.0.0")] + fn as_bytes(&self) -> &[u8]; +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl OsStrExt for OsStr { + #[inline] + fn from_bytes(slice: &[u8]) -> &OsStr { + // SAFETY: OsStr is just a wrapper of [u8] + unsafe { mem::transmute(slice) } + } + #[inline] + fn as_bytes(&self) -> &[u8] { + &self.inner.inner + } +} diff --git a/library/core/src/ffi/os_str/os_str_ext_windows.rs b/library/core/src/ffi/os_str/os_str_ext_windows.rs new file mode 100644 index 0000000000000..6205b74872e55 --- /dev/null +++ b/library/core/src/ffi/os_str/os_str_ext_windows.rs @@ -0,0 +1,44 @@ +//! [`OsStrExt`] for windows + +use super::{private, OsStr}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use crate::ffi::wtf8::EncodeWide; + +/// Windows-specific extensions to [`OsStr`]. +/// +/// This trait is sealed: it cannot be implemented outside the standard library. +/// This is so that future additional methods are not breaking changes. +#[stable(feature = "rust1", since = "1.0.0")] +pub trait OsStrExt: private::Sealed { + /// Re-encodes an `OsStr` as a wide character sequence, i.e., potentially + /// ill-formed UTF-16. + /// + /// This is lossless: calling [`OsStringExt::from_wide`] and then + /// `encode_wide` on the result will yield the original code units. + /// Note that the encoding does not add a final null terminator. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::OsString; + /// use std::os::windows::prelude::*; + /// + /// // UTF-16 encoding for "Unicode". + /// let source = [0x0055, 0x006E, 0x0069, 0x0063, 0x006F, 0x0064, 0x0065]; + /// + /// let string = OsString::from_wide(&source[..]); + /// + /// let result: Vec = string.encode_wide().collect(); + /// assert_eq!(&source[..], &result[..]); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + fn encode_wide(&self) -> EncodeWide<'_>; +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl OsStrExt for OsStr { + #[inline] + fn encode_wide(&self) -> EncodeWide<'_> { + self.inner.inner.encode_wide() + } +} diff --git a/library/core/src/ffi/os_str/wtf8.rs b/library/core/src/ffi/os_str/wtf8.rs new file mode 100644 index 0000000000000..4e3ff8d2400bd --- /dev/null +++ b/library/core/src/ffi/os_str/wtf8.rs @@ -0,0 +1,145 @@ +#![allow(missing_docs)] +#![allow(missing_debug_implementations)] + +//! The underlying OsString/OsStr implementation on Windows is a +//! wrapper around the "WTF-8" encoding; see the `wtf8` module for more. +use crate::clone::CloneToUninit; +use crate::ffi::wtf8::{check_utf8_boundary, Wtf8}; +use crate::ptr::addr_of_mut; +use crate::{fmt, mem}; + +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +#[repr(transparent)] +#[rustc_has_incoherent_inherent_impls] +pub struct Slice { + pub inner: Wtf8, +} + +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +impl fmt::Debug for Slice { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&self.inner, formatter) + } +} + +#[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" +)] +impl fmt::Display for Slice { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&self.inner, formatter) + } +} + +impl Slice { + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn as_encoded_bytes(&self) -> &[u8] { + self.inner.as_bytes() + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub unsafe fn from_encoded_bytes_unchecked(s: &[u8]) -> &Slice { + // SAFETY:: Slice is just a wrapper of Wtf8 + unsafe { mem::transmute(Wtf8::from_bytes_unchecked(s)) } + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[track_caller] + pub fn check_public_boundary(&self, index: usize) { + check_utf8_boundary(&self.inner, index); + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn from_str(s: &str) -> &Slice { + // SAFETY: Slice is just a wrapper of wtf8 + unsafe { mem::transmute(Wtf8::from_str(s)) } + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + pub fn to_str(&self) -> Result<&str, crate::str::Utf8Error> { + self.inner.as_str() + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn make_ascii_lowercase(&mut self) { + self.inner.make_ascii_lowercase() + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn make_ascii_uppercase(&mut self) { + self.inner.make_ascii_uppercase() + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn is_ascii(&self) -> bool { + self.inner.is_ascii() + } + + #[unstable( + feature = "os_str_internals", + reason = "internal details of the implementation of os str", + issue = "none" + )] + #[inline] + pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { + self.inner.eq_ignore_ascii_case(&other.inner) + } +} + +#[unstable(feature = "clone_to_uninit", issue = "126799")] +unsafe impl CloneToUninit for Slice { + #[inline] + #[cfg_attr(debug_assertions, track_caller)] + unsafe fn clone_to_uninit(&self, dst: *mut Self) { + // SAFETY: we're just a wrapper around Wtf8 + unsafe { self.inner.clone_to_uninit(addr_of_mut!((*dst).inner)) } + } +} diff --git a/library/core/src/ffi/wtf8.rs b/library/core/src/ffi/wtf8.rs new file mode 100644 index 0000000000000..00b7abfbac1a8 --- /dev/null +++ b/library/core/src/ffi/wtf8.rs @@ -0,0 +1,595 @@ +#![allow(missing_docs)] +#![allow(missing_debug_implementations)] + +//! Implementation of [the WTF-8 encoding](https://simonsapin.github.io/wtf-8/). +//! +//! This library uses Rust’s type system to maintain +//! [well-formedness](https://simonsapin.github.io/wtf-8/#well-formed), +//! like the `String` and `&str` types do for UTF-8. +//! +//! Since [WTF-8 must not be used +//! for interchange](https://simonsapin.github.io/wtf-8/#intended-audience), +//! this library deliberately does not provide access to the underlying bytes +//! of WTF-8 strings, +//! nor can it decode WTF-8 from arbitrary bytes. +//! WTF-8 strings can be obtained from UTF-8, UTF-16, or code points. + +// this module is imported from @SimonSapin's repo and has tons of dead code on +// unix (it's mostly used on windows), so don't worry about dead code here. +#![allow(dead_code)] + +use crate::char::encode_utf16_raw; +use crate::clone::CloneToUninit; +use crate::hash::{Hash, Hasher}; +use crate::iter::FusedIterator; +use crate::ptr::addr_of_mut; +use crate::str::next_code_point; +use crate::{fmt, ops, slice, str}; + +pub const UTF8_REPLACEMENT_CHARACTER: &str = "\u{FFFD}"; + +/// A Unicode code point: from U+0000 to U+10FFFF. +/// +/// Compares with the `char` type, +/// which represents a Unicode scalar value: +/// a code point that is not a surrogate (U+D800 to U+DFFF). +#[derive(Eq, PartialEq, Ord, PartialOrd, Clone, Copy)] +pub struct CodePoint { + value: u32, +} + +/// Format the code point as `U+` followed by four to six hexadecimal digits. +/// Example: `U+1F4A9` +impl fmt::Debug for CodePoint { + #[inline] + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(formatter, "U+{:04X}", self.value) + } +} + +impl CodePoint { + /// Unsafely creates a new `CodePoint` without checking the value. + /// + /// Only use when `value` is known to be less than or equal to 0x10FFFF. + #[inline] + pub unsafe fn from_u32_unchecked(value: u32) -> CodePoint { + CodePoint { value } + } + + /// Creates a new `CodePoint` if the value is a valid code point. + /// + /// Returns `None` if `value` is above 0x10FFFF. + #[inline] + pub fn from_u32(value: u32) -> Option { + match value { + 0..=0x10FFFF => Some(CodePoint { value }), + _ => None, + } + } + + /// Creates a new `CodePoint` from a `char`. + /// + /// Since all Unicode scalar values are code points, this always succeeds. + #[inline] + pub fn from_char(value: char) -> CodePoint { + CodePoint { value: value as u32 } + } + + /// Returns the numeric value of the code point. + #[inline] + pub fn to_u32(&self) -> u32 { + self.value + } + + /// Returns the numeric value of the code point if it is a leading surrogate. + #[inline] + pub fn to_lead_surrogate(&self) -> Option { + match self.value { + lead @ 0xD800..=0xDBFF => Some(lead as u16), + _ => None, + } + } + + /// Returns the numeric value of the code point if it is a trailing surrogate. + #[inline] + pub fn to_trail_surrogate(&self) -> Option { + match self.value { + trail @ 0xDC00..=0xDFFF => Some(trail as u16), + _ => None, + } + } + + /// Optionally returns a Unicode scalar value for the code point. + /// + /// Returns `None` if the code point is a surrogate (from U+D800 to U+DFFF). + #[inline] + pub fn to_char(&self) -> Option { + match self.value { + 0xD800..=0xDFFF => None, + // SAFETY: self.value is valid char + _ => Some(unsafe { char::from_u32_unchecked(self.value) }), + } + } + + /// Returns a Unicode scalar value for the code point. + /// + /// Returns `'\u{FFFD}'` (the replacement character “�”) + /// if the code point is a surrogate (from U+D800 to U+DFFF). + #[inline] + pub fn to_char_lossy(&self) -> char { + self.to_char().unwrap_or('\u{FFFD}') + } +} + +/// A borrowed slice of well-formed WTF-8 data. +/// +/// Similar to `&str`, but can additionally contain surrogate code points +/// if they’re not in a surrogate pair. +#[derive(Eq, Ord, PartialEq, PartialOrd)] +#[repr(transparent)] +#[rustc_has_incoherent_inherent_impls] +pub struct Wtf8 { + bytes: [u8], +} + +impl AsRef<[u8]> for Wtf8 { + #[inline] + fn as_ref(&self) -> &[u8] { + &self.bytes + } +} + +/// Format the slice with double quotes, +/// and surrogates as `\u` followed by four hexadecimal digits. +/// Example: `"a\u{D800}"` for a slice with code points [U+0061, U+D800] +impl fmt::Debug for Wtf8 { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + fn write_str_escaped(f: &mut fmt::Formatter<'_>, s: &str) -> fmt::Result { + use crate::fmt::Write; + for c in s.chars().flat_map(|c| c.escape_debug()) { + f.write_char(c)? + } + Ok(()) + } + + formatter.write_str("\"")?; + let mut pos = 0; + while let Some((surrogate_pos, surrogate)) = self.next_surrogate(pos) { + // SAFETY: self.bytes[pos..surrogate_pos] is valid utf-8 + write_str_escaped(formatter, unsafe { + str::from_utf8_unchecked(&self.bytes[pos..surrogate_pos]) + })?; + write!(formatter, "\\u{{{:x}}}", surrogate)?; + pos = surrogate_pos + 3; + } + // SAFETY: self.bytes[pos..] is valid utf-8 + write_str_escaped(formatter, unsafe { str::from_utf8_unchecked(&self.bytes[pos..]) })?; + formatter.write_str("\"") + } +} + +impl fmt::Display for Wtf8 { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + let wtf8_bytes = &self.bytes; + let mut pos = 0; + loop { + match self.next_surrogate(pos) { + Some((surrogate_pos, _)) => { + // SAFETY: self.bytes[pos..surrogate_pos] is valid utf-8 + formatter.write_str(unsafe { + str::from_utf8_unchecked(&wtf8_bytes[pos..surrogate_pos]) + })?; + formatter.write_str(UTF8_REPLACEMENT_CHARACTER)?; + pos = surrogate_pos + 3; + } + None => { + // SAFETY: self.bytes[pos..] is valid utf-8 + let s = unsafe { str::from_utf8_unchecked(&wtf8_bytes[pos..]) }; + if pos == 0 { + return s.fmt(formatter); + } else { + return formatter.write_str(s); + } + } + } + } + } +} + +impl Wtf8 { + /// Creates a WTF-8 slice from a UTF-8 `&str` slice. + /// + /// Since WTF-8 is a superset of UTF-8, this always succeeds. + #[inline] + pub fn from_str(value: &str) -> &Wtf8 { + // SAFETY: value is valid utf-8 + unsafe { Wtf8::from_bytes_unchecked(value.as_bytes()) } + } + + /// Creates a WTF-8 slice from a WTF-8 byte slice. + /// + /// Since the byte slice is not checked for valid WTF-8, this functions is + /// marked unsafe. + #[inline] + pub unsafe fn from_bytes_unchecked(value: &[u8]) -> &Wtf8 { + // SAFETY: start with &[u8], end with fancy &[u8] + unsafe { &*(value as *const [u8] as *const Wtf8) } + } + + /// Creates a mutable WTF-8 slice from a mutable WTF-8 byte slice. + /// + /// Since the byte slice is not checked for valid WTF-8, this functions is + /// marked unsafe. + #[inline] + pub unsafe fn from_mut_bytes_unchecked(value: &mut [u8]) -> &mut Wtf8 { + // SAFETY: start with &mut [u8], end with fancy &mut [u8] + unsafe { &mut *(value as *mut [u8] as *mut Wtf8) } + } + + /// Returns the length, in WTF-8 bytes. + #[inline] + pub fn len(&self) -> usize { + self.bytes.len() + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.bytes.is_empty() + } + + /// Returns the code point at `position` if it is in the ASCII range, + /// or `b'\xFF'` otherwise. + /// + /// # Panics + /// + /// Panics if `position` is beyond the end of the string. + #[inline] + pub fn ascii_byte_at(&self, position: usize) -> u8 { + match self.bytes[position] { + ascii_byte @ 0x00..=0x7F => ascii_byte, + _ => 0xFF, + } + } + + /// Returns an iterator for the string’s code points. + #[inline] + pub fn code_points(&self) -> Wtf8CodePoints<'_> { + Wtf8CodePoints { bytes: self.bytes.iter() } + } + + /// Access raw bytes of WTF-8 data + #[inline] + pub fn as_bytes(&self) -> &[u8] { + &self.bytes + } + + /// Tries to convert the string to UTF-8 and return a `&str` slice. + /// + /// Returns `None` if the string contains surrogates. + /// + /// This does not copy the data. + #[inline] + pub fn as_str(&self) -> Result<&str, str::Utf8Error> { + str::from_utf8(&self.bytes) + } + + /// Converts the WTF-8 string to potentially ill-formed UTF-16 + /// and return an iterator of 16-bit code units. + /// + /// This is lossless: + /// calling `Wtf8Buf::from_ill_formed_utf16` on the resulting code units + /// would always return the original WTF-8 string. + #[inline] + pub fn encode_wide(&self) -> EncodeWide<'_> { + EncodeWide { code_points: self.code_points(), extra: 0 } + } + + /// Next suroogate + #[inline] + pub fn next_surrogate(&self, mut pos: usize) -> Option<(usize, u16)> { + let mut iter = self.bytes[pos..].iter(); + loop { + let b = *iter.next()?; + if b < 0x80 { + pos += 1; + } else if b < 0xE0 { + iter.next(); + pos += 2; + } else if b == 0xED { + match (iter.next(), iter.next()) { + (Some(&b2), Some(&b3)) if b2 >= 0xA0 => { + return Some((pos, decode_surrogate(b2, b3))); + } + _ => pos += 3, + } + } else if b < 0xF0 { + iter.next(); + iter.next(); + pos += 3; + } else { + iter.next(); + iter.next(); + iter.next(); + pos += 4; + } + } + } + + /// Get final suroogate + #[inline] + pub fn final_lead_surrogate(&self) -> Option { + match self.bytes { + [.., 0xED, b2 @ 0xA0..=0xAF, b3] => Some(decode_surrogate(b2, b3)), + _ => None, + } + } + + /// Get initial trai suroogate + #[inline] + pub fn initial_trail_surrogate(&self) -> Option { + match self.bytes { + [0xED, b2 @ 0xB0..=0xBF, b3, ..] => Some(decode_surrogate(b2, b3)), + _ => None, + } + } + + #[inline] + pub fn make_ascii_lowercase(&mut self) { + self.bytes.make_ascii_lowercase() + } + + #[inline] + pub fn make_ascii_uppercase(&mut self) { + self.bytes.make_ascii_uppercase() + } + + #[inline] + pub fn is_ascii(&self) -> bool { + self.bytes.is_ascii() + } + + #[inline] + pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { + self.bytes.eq_ignore_ascii_case(&other.bytes) + } +} + +/// Returns a slice of the given string for the byte range \[`begin`..`end`). +/// +/// # Panics +/// +/// Panics when `begin` and `end` do not point to code point boundaries, +/// or point beyond the end of the string. +impl ops::Index> for Wtf8 { + type Output = Wtf8; + + #[inline] + fn index(&self, range: ops::Range) -> &Wtf8 { + // is_code_point_boundary checks that the index is in [0, .len()] + if range.start <= range.end + && is_code_point_boundary(self, range.start) + && is_code_point_boundary(self, range.end) + { + // SAFETY: start and end is at boundary + unsafe { slice_unchecked(self, range.start, range.end) } + } else { + slice_error_fail(self, range.start, range.end) + } + } +} + +/// Returns a slice of the given string from byte `begin` to its end. +/// +/// # Panics +/// +/// Panics when `begin` is not at a code point boundary, +/// or is beyond the end of the string. +impl ops::Index> for Wtf8 { + type Output = Wtf8; + + #[inline] + fn index(&self, range: ops::RangeFrom) -> &Wtf8 { + // is_code_point_boundary checks that the index is in [0, .len()] + if is_code_point_boundary(self, range.start) { + // SAFETY: start is at boundary + unsafe { slice_unchecked(self, range.start, self.len()) } + } else { + slice_error_fail(self, range.start, self.len()) + } + } +} + +/// Returns a slice of the given string from its beginning to byte `end`. +/// +/// # Panics +/// +/// Panics when `end` is not at a code point boundary, +/// or is beyond the end of the string. +impl ops::Index> for Wtf8 { + type Output = Wtf8; + + #[inline] + fn index(&self, range: ops::RangeTo) -> &Wtf8 { + // is_code_point_boundary checks that the index is in [0, .len()] + if is_code_point_boundary(self, range.end) { + // SAFETY: end is at boundary + unsafe { slice_unchecked(self, 0, range.end) } + } else { + slice_error_fail(self, 0, range.end) + } + } +} + +impl ops::Index for Wtf8 { + type Output = Wtf8; + + #[inline] + fn index(&self, _range: ops::RangeFull) -> &Wtf8 { + self + } +} + +#[inline] +pub fn decode_surrogate(second_byte: u8, third_byte: u8) -> u16 { + // The first byte is assumed to be 0xED + 0xD800 | (second_byte as u16 & 0x3F) << 6 | third_byte as u16 & 0x3F +} + +#[inline] +pub fn decode_surrogate_pair(lead: u16, trail: u16) -> char { + let code_point = 0x10000 + ((((lead - 0xD800) as u32) << 10) | (trail - 0xDC00) as u32); + // SAFETY: code_point is valid char + unsafe { char::from_u32_unchecked(code_point) } +} + +/// Copied from str::is_char_boundary +#[inline] +pub fn is_code_point_boundary(slice: &Wtf8, index: usize) -> bool { + if index == 0 { + return true; + } + match slice.bytes.get(index) { + None => index == slice.len(), + Some(&b) => (b as i8) >= -0x40, + } +} + +/// Verify that `index` is at the edge of either a valid UTF-8 codepoint +/// (i.e. a codepoint that's not a surrogate) or of the whole string. +/// +/// These are the cases currently permitted by `OsStr::slice_encoded_bytes`. +/// Splitting between surrogates is valid as far as WTF-8 is concerned, but +/// we do not permit it in the public API because WTF-8 is considered an +/// implementation detail. +#[track_caller] +#[inline] +pub fn check_utf8_boundary(slice: &Wtf8, index: usize) { + if index == 0 { + return; + } + match slice.bytes.get(index) { + Some(0xED) => (), // Might be a surrogate + Some(&b) if (b as i8) >= -0x40 => return, + Some(_) => panic!("byte index {index} is not a codepoint boundary"), + None if index == slice.len() => return, + None => panic!("byte index {index} is out of bounds"), + } + if slice.bytes[index + 1] >= 0xA0 { + // There's a surrogate after index. Now check before index. + if index >= 3 && slice.bytes[index - 3] == 0xED && slice.bytes[index - 2] >= 0xA0 { + panic!("byte index {index} lies between surrogate codepoints"); + } + } +} + +/// Copied from core::str::raw::slice_unchecked +#[inline] +pub unsafe fn slice_unchecked(s: &Wtf8, begin: usize, end: usize) -> &Wtf8 { + // SAFETY: memory layout of a &[u8] and &Wtf8 are the same + unsafe { + let len = end - begin; + let start = s.as_bytes().as_ptr().add(begin); + Wtf8::from_bytes_unchecked(slice::from_raw_parts(start, len)) + } +} + +/// Copied from core::str::raw::slice_error_fail +#[inline(never)] +pub fn slice_error_fail(s: &Wtf8, begin: usize, end: usize) -> ! { + assert!(begin <= end); + panic!("index {begin} and/or {end} in `{s:?}` do not lie on character boundary"); +} + +/// Iterator for the code points of a WTF-8 string. +/// +/// Created with the method `.code_points()`. +#[derive(Clone)] +pub struct Wtf8CodePoints<'a> { + bytes: slice::Iter<'a, u8>, +} + +impl<'a> Iterator for Wtf8CodePoints<'a> { + type Item = CodePoint; + + #[inline] + fn next(&mut self) -> Option { + // SAFETY: `self.bytes` has been created from a WTF-8 string + unsafe { next_code_point(&mut self.bytes).map(|c| CodePoint { value: c }) } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + let len = self.bytes.len(); + (len.saturating_add(3) / 4, Some(len)) + } +} + +/// Generates a wide character sequence for potentially ill-formed UTF-16. +#[stable(feature = "rust1", since = "1.0.0")] +#[derive(Clone)] +pub struct EncodeWide<'a> { + code_points: Wtf8CodePoints<'a>, + extra: u16, +} + +// Copied from libunicode/u_str.rs +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> Iterator for EncodeWide<'a> { + type Item = u16; + + #[inline] + fn next(&mut self) -> Option { + if self.extra != 0 { + let tmp = self.extra; + self.extra = 0; + return Some(tmp); + } + + let mut buf = [0; 2]; + self.code_points.next().map(|code_point| { + let n = encode_utf16_raw(code_point.value, &mut buf).len(); + if n == 2 { + self.extra = buf[1]; + } + buf[0] + }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + let (low, high) = self.code_points.size_hint(); + let ext = (self.extra != 0) as usize; + // every code point gets either one u16 or two u16, + // so this iterator is between 1 or 2 times as + // long as the underlying iterator. + (low + ext, high.and_then(|n| n.checked_mul(2)).and_then(|n| n.checked_add(ext))) + } +} + +#[stable(feature = "encode_wide_fused_iterator", since = "1.62.0")] +impl FusedIterator for EncodeWide<'_> {} + +impl Hash for CodePoint { + #[inline] + fn hash(&self, state: &mut H) { + self.value.hash(state) + } +} + +impl Hash for Wtf8 { + #[inline] + fn hash(&self, state: &mut H) { + state.write(&self.bytes); + 0xfeu8.hash(state) + } +} + +#[unstable(feature = "clone_to_uninit", issue = "126799")] +unsafe impl CloneToUninit for Wtf8 { + #[inline] + #[cfg_attr(debug_assertions, track_caller)] + unsafe fn clone_to_uninit(&self, dst: *mut Self) { + // SAFETY: we're just a wrapper around [u8] + unsafe { self.bytes.clone_to_uninit(addr_of_mut!((*dst).bytes)) } + } +} diff --git a/library/core/src/lib.rs b/library/core/src/lib.rs index e60bcf3aa5db7..299fda449635a 100644 --- a/library/core/src/lib.rs +++ b/library/core/src/lib.rs @@ -407,6 +407,13 @@ pub mod slice; pub mod str; pub mod time; +#[unstable( + feature = "path_internals", + reason = "internal details of the implementation of path", + issue = "none" +)] +#[doc(hidden)] +pub mod path; pub mod unicode; /* Async */ diff --git a/library/core/src/path.rs b/library/core/src/path.rs new file mode 100644 index 0000000000000..8b3d2a28457cc --- /dev/null +++ b/library/core/src/path.rs @@ -0,0 +1,2033 @@ +#![deny(unsafe_op_in_unsafe_fn)] + +use crate::clone::CloneToUninit; +use crate::error::Error; +use crate::ffi::os_str::{self, OsStr}; +use crate::hash::{Hash, Hasher}; +use crate::iter::FusedIterator; +use crate::{cmp, fmt}; + +#[cfg(target_os = "windows")] +mod windows; +#[unstable( + feature = "path_internals", + reason = "internal details of the implementation of path", + issue = "none" +)] +#[doc(hidden)] +#[cfg(target_os = "windows")] +pub use windows::*; + +#[cfg(all(target_vendor = "fortanix", target_env = "sgx"))] +mod sgx; +#[unstable( + feature = "path_internals", + reason = "internal details of the implementation of path", + issue = "none" +)] +#[doc(hidden)] +#[cfg(all(target_vendor = "fortanix", target_env = "sgx"))] +pub use sgx::*; + +#[cfg(any(target_os = "uefi", target_os = "solid_asp3",))] +mod unsupported_backslash; +#[unstable( + feature = "path_internals", + reason = "internal details of the implementation of path", + issue = "none" +)] +#[doc(hidden)] +#[cfg(any(target_os = "uefi", target_os = "solid_asp3",))] +pub use unsupported_backslash::*; + +#[cfg(unix)] +mod unix; +#[unstable( + feature = "path_internals", + reason = "internal details of the implementation of path", + issue = "none" +)] +#[doc(hidden)] +#[cfg(unix)] +pub use unix::*; + +//////////////////////////////////////////////////////////////////////////////// +// GENERAL NOTES +//////////////////////////////////////////////////////////////////////////////// +// +// Parsing in this module is done by directly transmuting OsStr to [u8] slices, +// taking advantage of the fact that OsStr always encodes ASCII characters +// as-is. Eventually, this transmutation should be replaced by direct uses of +// OsStr APIs for parsing, but it will take a while for those to become +// available. + +//////////////////////////////////////////////////////////////////////////////// +// Windows Prefixes +//////////////////////////////////////////////////////////////////////////////// + +/// Windows path prefixes, e.g., `C:` or `\\server\share`. +/// +/// Windows uses a variety of path prefix styles, including references to drive +/// volumes (like `C:`), network shared folders (like `\\server\share`), and +/// others. In addition, some path prefixes are "verbatim" (i.e., prefixed with +/// `\\?\`), in which case `/` is *not* treated as a separator and essentially +/// no normalization is performed. +/// +/// # Examples +/// +/// ``` +/// use std::path::{Component, Path, Prefix}; +/// use std::path::Prefix::*; +/// use std::ffi::OsStr; +/// +/// fn get_path_prefix(s: &str) -> Prefix<'_> { +/// let path = Path::new(s); +/// match path.components().next().unwrap() { +/// Component::Prefix(prefix_component) => prefix_component.kind(), +/// _ => panic!(), +/// } +/// } +/// +/// # if cfg!(windows) { +/// assert_eq!(Verbatim(OsStr::new("pictures")), +/// get_path_prefix(r"\\?\pictures\kittens")); +/// assert_eq!(VerbatimUNC(OsStr::new("server"), OsStr::new("share")), +/// get_path_prefix(r"\\?\UNC\server\share")); +/// assert_eq!(VerbatimDisk(b'C'), get_path_prefix(r"\\?\c:\")); +/// assert_eq!(DeviceNS(OsStr::new("BrainInterface")), +/// get_path_prefix(r"\\.\BrainInterface")); +/// assert_eq!(UNC(OsStr::new("server"), OsStr::new("share")), +/// get_path_prefix(r"\\server\share")); +/// assert_eq!(Disk(b'C'), get_path_prefix(r"C:\Users\Rust\Pictures\Ferris")); +/// # } +/// ``` +#[derive(Copy, Clone, Debug, Hash, PartialOrd, Ord, PartialEq, Eq)] +#[stable(feature = "rust1", since = "1.0.0")] +pub enum Prefix<'a> { + /// Verbatim prefix, e.g., `\\?\cat_pics`. + /// + /// Verbatim prefixes consist of `\\?\` immediately followed by the given + /// component. + #[stable(feature = "rust1", since = "1.0.0")] + Verbatim(#[stable(feature = "rust1", since = "1.0.0")] &'a OsStr), + + /// Verbatim prefix using Windows' _**U**niform **N**aming **C**onvention_, + /// e.g., `\\?\UNC\server\share`. + /// + /// Verbatim UNC prefixes consist of `\\?\UNC\` immediately followed by the + /// server's hostname and a share name. + #[stable(feature = "rust1", since = "1.0.0")] + VerbatimUNC( + #[stable(feature = "rust1", since = "1.0.0")] &'a OsStr, + #[stable(feature = "rust1", since = "1.0.0")] &'a OsStr, + ), + + /// Verbatim disk prefix, e.g., `\\?\C:`. + /// + /// Verbatim disk prefixes consist of `\\?\` immediately followed by the + /// drive letter and `:`. + #[stable(feature = "rust1", since = "1.0.0")] + VerbatimDisk(#[stable(feature = "rust1", since = "1.0.0")] u8), + + /// Device namespace prefix, e.g., `\\.\COM42`. + /// + /// Device namespace prefixes consist of `\\.\` (possibly using `/` + /// instead of `\`), immediately followed by the device name. + #[stable(feature = "rust1", since = "1.0.0")] + DeviceNS(#[stable(feature = "rust1", since = "1.0.0")] &'a OsStr), + + /// Prefix using Windows' _**U**niform **N**aming **C**onvention_, e.g. + /// `\\server\share`. + /// + /// UNC prefixes consist of the server's hostname and a share name. + #[stable(feature = "rust1", since = "1.0.0")] + UNC( + #[stable(feature = "rust1", since = "1.0.0")] &'a OsStr, + #[stable(feature = "rust1", since = "1.0.0")] &'a OsStr, + ), + + /// Prefix `C:` for the given disk drive. + #[stable(feature = "rust1", since = "1.0.0")] + Disk(#[stable(feature = "rust1", since = "1.0.0")] u8), +} + +impl<'a> Prefix<'a> { + /// Length + #[unstable( + feature = "path_internals", + reason = "internal details of the implementation of path", + issue = "none" + )] + #[doc(hidden)] + #[inline] + pub fn len(&self) -> usize { + use self::Prefix::*; + fn os_str_len(s: &OsStr) -> usize { + s.as_encoded_bytes().len() + } + match *self { + Verbatim(x) => 4 + os_str_len(x), + VerbatimUNC(x, y) => { + 8 + os_str_len(x) + if os_str_len(y) > 0 { 1 + os_str_len(y) } else { 0 } + } + VerbatimDisk(_) => 6, + UNC(x, y) => 2 + os_str_len(x) + if os_str_len(y) > 0 { 1 + os_str_len(y) } else { 0 }, + DeviceNS(x) => 4 + os_str_len(x), + Disk(_) => 2, + } + } + + /// Determines if the prefix is verbatim, i.e., begins with `\\?\`. + /// + /// # Examples + /// + /// ``` + /// use std::path::Prefix::*; + /// use std::ffi::OsStr; + /// + /// assert!(Verbatim(OsStr::new("pictures")).is_verbatim()); + /// assert!(VerbatimUNC(OsStr::new("server"), OsStr::new("share")).is_verbatim()); + /// assert!(VerbatimDisk(b'C').is_verbatim()); + /// assert!(!DeviceNS(OsStr::new("BrainInterface")).is_verbatim()); + /// assert!(!UNC(OsStr::new("server"), OsStr::new("share")).is_verbatim()); + /// assert!(!Disk(b'C').is_verbatim()); + /// ``` + #[inline] + #[must_use] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn is_verbatim(&self) -> bool { + use self::Prefix::*; + matches!(*self, Verbatim(_) | VerbatimDisk(_) | VerbatimUNC(..)) + } + + /// Is drive + #[unstable( + feature = "path_internals", + reason = "internal details of the implementation of path", + issue = "none" + )] + #[doc(hidden)] + #[inline] + pub fn is_drive(&self) -> bool { + matches!(*self, Prefix::Disk(_)) + } + + #[inline] + fn has_implicit_root(&self) -> bool { + !self.is_drive() + } +} + +//////////////////////////////////////////////////////////////////////////////// +// Exposed parsing helpers +//////////////////////////////////////////////////////////////////////////////// + +/// Determines whether the character is one of the permitted path +/// separators for the current platform. +/// +/// # Examples +/// +/// ``` +/// use std::path; +/// +/// assert!(path::is_separator('/')); // '/' works for both Unix and Windows +/// assert!(!path::is_separator('❤')); +/// ``` +#[must_use] +#[stable(feature = "rust1", since = "1.0.0")] +pub fn is_separator(c: char) -> bool { + c.is_ascii() && is_sep_byte(c as u8) +} + +/// The primary separator of path components for the current platform. +/// +/// For example, `/` on Unix and `\` on Windows. +#[stable(feature = "rust1", since = "1.0.0")] +pub const MAIN_SEPARATOR: char = MAIN_SEP; + +/// The primary separator of path components for the current platform. +/// +/// For example, `/` on Unix and `\` on Windows. +#[stable(feature = "main_separator_str", since = "1.68.0")] +pub const MAIN_SEPARATOR_STR: &str = MAIN_SEP_STR; + +//////////////////////////////////////////////////////////////////////////////// +// Misc helpers +//////////////////////////////////////////////////////////////////////////////// + +// Iterate through `iter` while it matches `prefix`; return `None` if `prefix` +// is not a prefix of `iter`, otherwise return `Some(iter_after_prefix)` giving +// `iter` after having exhausted `prefix`. +fn iter_after<'a, 'b, I, J>(mut iter: I, mut prefix: J) -> Option +where + I: Iterator> + Clone, + J: Iterator>, +{ + loop { + let mut iter_next = iter.clone(); + match (iter_next.next(), prefix.next()) { + (Some(ref x), Some(ref y)) if x == y => (), + (Some(_), Some(_)) => return None, + (Some(_), None) => return Some(iter), + (None, None) => return Some(iter), + (None, Some(_)) => return None, + } + iter = iter_next; + } +} + +// Detect scheme on Redox +fn has_redox_scheme(s: &[u8]) -> bool { + cfg!(target_os = "redox") && s.contains(&b':') +} + +//////////////////////////////////////////////////////////////////////////////// +// Cross-platform, iterator-independent parsing +//////////////////////////////////////////////////////////////////////////////// + +/// Says whether the first byte after the prefix is a separator. +fn has_physical_root(s: &[u8], prefix: Option>) -> bool { + let path = if let Some(p) = prefix { &s[p.len()..] } else { s }; + !path.is_empty() && is_sep_byte(path[0]) +} + +// basic workhorse for splitting stem and extension +fn rsplit_file_at_dot(file: &OsStr) -> (Option<&OsStr>, Option<&OsStr>) { + if file.as_encoded_bytes() == b".." { + return (Some(file), None); + } + + let mut iter = file.as_encoded_bytes().rsplitn(2, |b| *b == b'.'); + let after = iter.next(); + let before = iter.next(); + if before == Some(b"") { + (Some(file), None) + } else { + // SAFETY: + // The unsafety here stems from converting between &OsStr and &[u8] + // and back. This is safe to do because (1) we only look at ASCII + // contents of the encoding and (2) new &OsStr values are produced + // only from ASCII-bounded slices of existing &OsStr values. + unsafe { + ( + before.map(|s| OsStr::from_encoded_bytes_unchecked(s)), + after.map(|s| OsStr::from_encoded_bytes_unchecked(s)), + ) + } + } +} + +fn split_file_at_dot(file: &OsStr) -> (&OsStr, Option<&OsStr>) { + let slice = file.as_encoded_bytes(); + if slice == b".." { + return (file, None); + } + + let i = match slice[1..].iter().position(|b| *b == b'.') { + Some(i) => i + 1, + None => return (file, None), + }; + let before = &slice[..i]; + let after = &slice[i + 1..]; + // SAFETY: + // The unsafety here stems from converting between &OsStr and &[u8] + // and back. This is safe to do because (1) we only look at ASCII + // contents of the encoding and (2) new &OsStr values are produced + // only from ASCII-bounded slices of existing &OsStr values. + unsafe { + ( + OsStr::from_encoded_bytes_unchecked(before), + Some(OsStr::from_encoded_bytes_unchecked(after)), + ) + } +} + +//////////////////////////////////////////////////////////////////////////////// +// The core iterators +//////////////////////////////////////////////////////////////////////////////// + +/// Component parsing works by a double-ended state machine; the cursors at the +/// front and back of the path each keep track of what parts of the path have +/// been consumed so far. +/// +/// Going front to back, a path is made up of a prefix, a starting +/// directory component, and a body (of normal components) +#[derive(Copy, Clone, PartialEq, PartialOrd, Debug)] +enum State { + Prefix = 0, // c: + StartDir = 1, // / or . or nothing + Body = 2, // foo/bar/baz + Done = 3, +} + +/// A structure wrapping a Windows path prefix as well as its unparsed string +/// representation. +/// +/// In addition to the parsed [`Prefix`] information returned by [`kind`], +/// `PrefixComponent` also holds the raw and unparsed [`OsStr`] slice, +/// returned by [`as_os_str`]. +/// +/// Instances of this `struct` can be obtained by matching against the +/// [`Prefix` variant] on [`Component`]. +/// +/// Does not occur on Unix. +/// +/// # Examples +/// +/// ``` +/// # if cfg!(windows) { +/// use std::path::{Component, Path, Prefix}; +/// use std::ffi::OsStr; +/// +/// let path = Path::new(r"c:\you\later\"); +/// match path.components().next().unwrap() { +/// Component::Prefix(prefix_component) => { +/// assert_eq!(Prefix::Disk(b'C'), prefix_component.kind()); +/// assert_eq!(OsStr::new("c:"), prefix_component.as_os_str()); +/// } +/// _ => unreachable!(), +/// } +/// # } +/// ``` +/// +/// [`as_os_str`]: PrefixComponent::as_os_str +/// [`kind`]: PrefixComponent::kind +/// [`Prefix` variant]: Component::Prefix +#[stable(feature = "rust1", since = "1.0.0")] +#[derive(Copy, Clone, Eq, Debug)] +pub struct PrefixComponent<'a> { + /// The prefix as an unparsed `OsStr` slice. + raw: &'a OsStr, + + /// The parsed prefix data. + parsed: Prefix<'a>, +} + +impl<'a> PrefixComponent<'a> { + /// Returns the parsed prefix data. + /// + /// See [`Prefix`]'s documentation for more information on the different + /// kinds of prefixes. + #[stable(feature = "rust1", since = "1.0.0")] + #[must_use] + #[inline] + pub fn kind(&self) -> Prefix<'a> { + self.parsed + } + + /// Returns the raw [`OsStr`] slice for this prefix. + #[stable(feature = "rust1", since = "1.0.0")] + #[must_use] + #[inline] + pub fn as_os_str(&self) -> &'a OsStr { + self.raw + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> PartialEq for PrefixComponent<'a> { + #[inline] + fn eq(&self, other: &PrefixComponent<'a>) -> bool { + self.parsed == other.parsed + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> PartialOrd for PrefixComponent<'a> { + #[inline] + fn partial_cmp(&self, other: &PrefixComponent<'a>) -> Option { + PartialOrd::partial_cmp(&self.parsed, &other.parsed) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Ord for PrefixComponent<'_> { + #[inline] + fn cmp(&self, other: &Self) -> cmp::Ordering { + Ord::cmp(&self.parsed, &other.parsed) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Hash for PrefixComponent<'_> { + fn hash(&self, h: &mut H) { + self.parsed.hash(h); + } +} + +/// A single component of a path. +/// +/// A `Component` roughly corresponds to a substring between path separators +/// (`/` or `\`). +/// +/// This `enum` is created by iterating over [`Components`], which in turn is +/// created by the [`components`](Path::components) method on [`Path`]. +/// +/// # Examples +/// +/// ```rust +/// use std::path::{Component, Path}; +/// +/// let path = Path::new("/tmp/foo/bar.txt"); +/// let components = path.components().collect::>(); +/// assert_eq!(&components, &[ +/// Component::RootDir, +/// Component::Normal("tmp".as_ref()), +/// Component::Normal("foo".as_ref()), +/// Component::Normal("bar.txt".as_ref()), +/// ]); +/// ``` +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] +#[stable(feature = "rust1", since = "1.0.0")] +pub enum Component<'a> { + /// A Windows path prefix, e.g., `C:` or `\\server\share`. + /// + /// There is a large variety of prefix types, see [`Prefix`]'s documentation + /// for more. + /// + /// Does not occur on Unix. + #[stable(feature = "rust1", since = "1.0.0")] + Prefix(#[stable(feature = "rust1", since = "1.0.0")] PrefixComponent<'a>), + + /// The root directory component, appears after any prefix and before anything else. + /// + /// It represents a separator that designates that a path starts from root. + #[stable(feature = "rust1", since = "1.0.0")] + RootDir, + + /// A reference to the current directory, i.e., `.`. + #[stable(feature = "rust1", since = "1.0.0")] + CurDir, + + /// A reference to the parent directory, i.e., `..`. + #[stable(feature = "rust1", since = "1.0.0")] + ParentDir, + + /// A normal component, e.g., `a` and `b` in `a/b`. + /// + /// This variant is the most common one, it represents references to files + /// or directories. + #[stable(feature = "rust1", since = "1.0.0")] + Normal(#[stable(feature = "rust1", since = "1.0.0")] &'a OsStr), +} + +impl<'a> Component<'a> { + /// Extracts the underlying [`OsStr`] slice. + /// + /// # Examples + /// + /// ``` + /// use std::path::Path; + /// + /// let path = Path::new("./tmp/foo/bar.txt"); + /// let components: Vec<_> = path.components().map(|comp| comp.as_os_str()).collect(); + /// assert_eq!(&components, &[".", "tmp", "foo", "bar.txt"]); + /// ``` + #[must_use = "`self` will be dropped if the result is not used"] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn as_os_str(self) -> &'a OsStr { + match self { + Component::Prefix(p) => p.as_os_str(), + Component::RootDir => OsStr::new(MAIN_SEP_STR), + Component::CurDir => OsStr::new("."), + Component::ParentDir => OsStr::new(".."), + Component::Normal(path) => path, + } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl AsRef for Component<'_> { + #[inline] + fn as_ref(&self) -> &OsStr { + self.as_os_str() + } +} + +#[stable(feature = "path_component_asref", since = "1.25.0")] +impl AsRef for Component<'_> { + #[inline] + fn as_ref(&self) -> &Path { + self.as_os_str().as_ref() + } +} + +/// An iterator over the [`Component`]s of a [`Path`]. +/// +/// This `struct` is created by the [`components`] method on [`Path`]. +/// See its documentation for more. +/// +/// # Examples +/// +/// ``` +/// use std::path::Path; +/// +/// let path = Path::new("/tmp/foo/bar.txt"); +/// +/// for component in path.components() { +/// println!("{component:?}"); +/// } +/// ``` +/// +/// [`components`]: Path::components +#[derive(Clone)] +#[must_use = "iterators are lazy and do nothing unless consumed"] +#[stable(feature = "rust1", since = "1.0.0")] +pub struct Components<'a> { + // The path left to parse components from + path: &'a [u8], + + // The prefix as it was originally parsed, if any + prefix: Option>, + + // true if path *physically* has a root separator; for most Windows + // prefixes, it may have a "logical" root separator for the purposes of + // normalization, e.g., \\server\share == \\server\share\. + has_physical_root: bool, + + // The iterator is double-ended, and these two states keep track of what has + // been produced from either end + front: State, + back: State, +} + +/// An iterator over the [`Component`]s of a [`Path`], as [`OsStr`] slices. +/// +/// This `struct` is created by the [`iter`] method on [`Path`]. +/// See its documentation for more. +/// +/// [`iter`]: Path::iter +#[derive(Clone)] +#[must_use = "iterators are lazy and do nothing unless consumed"] +#[stable(feature = "rust1", since = "1.0.0")] +pub struct Iter<'a> { + inner: Components<'a>, +} + +#[stable(feature = "path_components_debug", since = "1.13.0")] +impl fmt::Debug for Components<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + struct DebugHelper<'a>(&'a Path); + + impl fmt::Debug for DebugHelper<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list().entries(self.0.components()).finish() + } + } + + f.debug_tuple("Components").field(&DebugHelper(self.as_path())).finish() + } +} + +impl<'a> Components<'a> { + // The path left to parse components from + #[unstable( + feature = "path_internals", + reason = "internal details of the implementation of path", + issue = "none" + )] + #[doc(hidden)] + #[inline] + pub fn path_left_to_parse(&self) -> &'a [u8] { + self.path + } + + // The prefix as it was originally parsed, if any + #[unstable( + feature = "path_internals", + reason = "internal details of the implementation of path", + issue = "none" + )] + #[doc(hidden)] + #[inline] + pub fn prefix(&self) -> Option> { + self.prefix + } + + // how long is the prefix, if any? + #[unstable( + feature = "path_internals", + reason = "internal details of the implementation of path", + issue = "none" + )] + #[doc(hidden)] + #[inline] + pub fn prefix_len(&self) -> usize { + self.prefix.as_ref().map(Prefix::len).unwrap_or(0) + } + + /// Internal method + #[unstable( + feature = "path_internals", + reason = "internal details of the implementation of path", + issue = "none" + )] + #[doc(hidden)] + #[inline] + pub fn prefix_verbatim(&self) -> bool { + self.prefix.as_ref().map(Prefix::is_verbatim).unwrap_or(false) + } + + /// how much of the prefix is left from the point of view of iteration? + #[unstable( + feature = "path_internals", + reason = "internal details of the implementation of path", + issue = "none" + )] + #[doc(hidden)] + #[inline] + pub fn prefix_remaining(&self) -> usize { + if self.front == State::Prefix { self.prefix_len() } else { 0 } + } + + // Given the iteration so far, how much of the pre-State::Body path is left? + #[inline] + fn len_before_body(&self) -> usize { + let root = if self.front <= State::StartDir && self.has_physical_root { 1 } else { 0 }; + let cur_dir = if self.front <= State::StartDir && self.include_cur_dir() { 1 } else { 0 }; + self.prefix_remaining() + root + cur_dir + } + + // is the iteration complete? + #[inline] + fn finished(&self) -> bool { + self.front == State::Done || self.back == State::Done || self.front > self.back + } + + #[inline] + fn is_sep_byte(&self, b: u8) -> bool { + if self.prefix_verbatim() { is_verbatim_sep(b) } else { is_sep_byte(b) } + } + + /// Extracts a slice corresponding to the portion of the path remaining for iteration. + /// + /// # Examples + /// + /// ``` + /// use std::path::Path; + /// + /// let mut components = Path::new("/tmp/foo/bar.txt").components(); + /// components.next(); + /// components.next(); + /// + /// assert_eq!(Path::new("foo/bar.txt"), components.as_path()); + /// ``` + #[must_use] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn as_path(&self) -> &'a Path { + let mut comps = self.clone(); + if comps.front == State::Body { + comps.trim_left(); + } + if comps.back == State::Body { + comps.trim_right(); + } + // SAFETY: comps.path is valid `OsStr` + unsafe { Path::from_u8_slice(comps.path) } + } + + /// Is the *original* path rooted? + fn has_root(&self) -> bool { + if self.has_physical_root { + return true; + } + if let Some(p) = self.prefix { + if p.has_implicit_root() { + return true; + } + } + false + } + + /// Should the normalized path include a leading . ? + fn include_cur_dir(&self) -> bool { + if self.has_root() { + return false; + } + let mut iter = self.path[self.prefix_remaining()..].iter(); + match (iter.next(), iter.next()) { + (Some(&b'.'), None) => true, + (Some(&b'.'), Some(&b)) => self.is_sep_byte(b), + _ => false, + } + } + + // parse a given byte sequence following the OsStr encoding into the + // corresponding path component + unsafe fn parse_single_component<'b>(&self, comp: &'b [u8]) -> Option> { + match comp { + b"." if self.prefix_verbatim() => Some(Component::CurDir), + b"." => None, // . components are normalized away, except at + // the beginning of a path, which is treated + // separately via `include_cur_dir` + b".." => Some(Component::ParentDir), + b"" => None, + // SAFETY: comp is valid `OsStr` + _ => Some(Component::Normal(unsafe { OsStr::from_encoded_bytes_unchecked(comp) })), + } + } + + // parse a component from the left, saying how many bytes to consume to + // remove the component + fn parse_next_component(&self) -> (usize, Option>) { + debug_assert!(self.front == State::Body); + let (extra, comp) = match self.path.iter().position(|b| self.is_sep_byte(*b)) { + None => (0, self.path), + Some(i) => (1, &self.path[..i]), + }; + // SAFETY: `comp` is a valid substring, since it is split on a separator. + (comp.len() + extra, unsafe { self.parse_single_component(comp) }) + } + + // parse a component from the right, saying how many bytes to consume to + // remove the component + fn parse_next_component_back(&self) -> (usize, Option>) { + debug_assert!(self.back == State::Body); + let start = self.len_before_body(); + let (extra, comp) = match self.path[start..].iter().rposition(|b| self.is_sep_byte(*b)) { + None => (0, &self.path[start..]), + Some(i) => (1, &self.path[start + i + 1..]), + }; + // SAFETY: `comp` is a valid substring, since it is split on a separator. + (comp.len() + extra, unsafe { self.parse_single_component(comp) }) + } + + // trim away repeated separators (i.e., empty components) on the left + fn trim_left(&mut self) { + while !self.path.is_empty() { + let (size, comp) = self.parse_next_component(); + if comp.is_some() { + return; + } else { + self.path = &self.path[size..]; + } + } + } + + // trim away repeated separators (i.e., empty components) on the right + fn trim_right(&mut self) { + while self.path.len() > self.len_before_body() { + let (size, comp) = self.parse_next_component_back(); + if comp.is_some() { + return; + } else { + self.path = &self.path[..self.path.len() - size]; + } + } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl AsRef for Components<'_> { + #[inline] + fn as_ref(&self) -> &Path { + self.as_path() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl AsRef for Components<'_> { + #[inline] + fn as_ref(&self) -> &OsStr { + self.as_path().as_os_str() + } +} + +#[stable(feature = "path_iter_debug", since = "1.13.0")] +impl fmt::Debug for Iter<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + struct DebugHelper<'a>(&'a Path); + + impl fmt::Debug for DebugHelper<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list().entries(self.0.iter()).finish() + } + } + + f.debug_tuple("Iter").field(&DebugHelper(self.as_path())).finish() + } +} + +impl<'a> Iter<'a> { + /// Extracts a slice corresponding to the portion of the path remaining for iteration. + /// + /// # Examples + /// + /// ``` + /// use std::path::Path; + /// + /// let mut iter = Path::new("/tmp/foo/bar.txt").iter(); + /// iter.next(); + /// iter.next(); + /// + /// assert_eq!(Path::new("foo/bar.txt"), iter.as_path()); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[must_use] + #[inline] + pub fn as_path(&self) -> &'a Path { + self.inner.as_path() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl AsRef for Iter<'_> { + #[inline] + fn as_ref(&self) -> &Path { + self.as_path() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl AsRef for Iter<'_> { + #[inline] + fn as_ref(&self) -> &OsStr { + self.as_path().as_os_str() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> Iterator for Iter<'a> { + type Item = &'a OsStr; + + #[inline] + fn next(&mut self) -> Option<&'a OsStr> { + self.inner.next().map(Component::as_os_str) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> DoubleEndedIterator for Iter<'a> { + #[inline] + fn next_back(&mut self) -> Option<&'a OsStr> { + self.inner.next_back().map(Component::as_os_str) + } +} + +#[stable(feature = "fused", since = "1.26.0")] +impl FusedIterator for Iter<'_> {} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> Iterator for Components<'a> { + type Item = Component<'a>; + + fn next(&mut self) -> Option> { + while !self.finished() { + match self.front { + State::Prefix if self.prefix_len() > 0 => { + self.front = State::StartDir; + debug_assert!(self.prefix_len() <= self.path.len()); + let raw = &self.path[..self.prefix_len()]; + self.path = &self.path[self.prefix_len()..]; + return Some(Component::Prefix(PrefixComponent { + // SAFETY: raw is valid `OsStr` + raw: unsafe { OsStr::from_encoded_bytes_unchecked(raw) }, + parsed: self.prefix.unwrap(), + })); + } + State::Prefix => { + self.front = State::StartDir; + } + State::StartDir => { + self.front = State::Body; + if self.has_physical_root { + debug_assert!(!self.path.is_empty()); + self.path = &self.path[1..]; + return Some(Component::RootDir); + } else if let Some(p) = self.prefix { + if p.has_implicit_root() && !p.is_verbatim() { + return Some(Component::RootDir); + } + } else if self.include_cur_dir() { + debug_assert!(!self.path.is_empty()); + self.path = &self.path[1..]; + return Some(Component::CurDir); + } + } + State::Body if !self.path.is_empty() => { + let (size, comp) = self.parse_next_component(); + self.path = &self.path[size..]; + if comp.is_some() { + return comp; + } + } + State::Body => { + self.front = State::Done; + } + State::Done => unreachable!(), + } + } + None + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> DoubleEndedIterator for Components<'a> { + fn next_back(&mut self) -> Option> { + while !self.finished() { + match self.back { + State::Body if self.path.len() > self.len_before_body() => { + let (size, comp) = self.parse_next_component_back(); + self.path = &self.path[..self.path.len() - size]; + if comp.is_some() { + return comp; + } + } + State::Body => { + self.back = State::StartDir; + } + State::StartDir => { + self.back = State::Prefix; + if self.has_physical_root { + self.path = &self.path[..self.path.len() - 1]; + return Some(Component::RootDir); + } else if let Some(p) = self.prefix { + if p.has_implicit_root() && !p.is_verbatim() { + return Some(Component::RootDir); + } + } else if self.include_cur_dir() { + self.path = &self.path[..self.path.len() - 1]; + return Some(Component::CurDir); + } + } + State::Prefix if self.prefix_len() > 0 => { + self.back = State::Done; + return Some(Component::Prefix(PrefixComponent { + // SAFETY: self.path is valid `OsStr` + raw: unsafe { OsStr::from_encoded_bytes_unchecked(self.path) }, + parsed: self.prefix.unwrap(), + })); + } + State::Prefix => { + self.back = State::Done; + return None; + } + State::Done => unreachable!(), + } + } + None + } +} + +#[stable(feature = "fused", since = "1.26.0")] +impl FusedIterator for Components<'_> {} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> PartialEq for Components<'a> { + #[inline] + fn eq(&self, other: &Components<'a>) -> bool { + let Components { path: _, front: _, back: _, has_physical_root: _, prefix: _ } = self; + + // Fast path for exact matches, e.g. for hashmap lookups. + // Don't explicitly compare the prefix or has_physical_root fields since they'll + // either be covered by the `path` buffer or are only relevant for `prefix_verbatim()`. + if self.path.len() == other.path.len() + && self.front == other.front + && self.back == State::Body + && other.back == State::Body + && self.prefix_verbatim() == other.prefix_verbatim() + { + // possible future improvement: this could bail out earlier if there were a + // reverse memcmp/bcmp comparing back to front + if self.path == other.path { + return true; + } + } + + // compare back to front since absolute paths often share long prefixes + Iterator::eq(self.clone().rev(), other.clone().rev()) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Eq for Components<'_> {} + +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> PartialOrd for Components<'a> { + #[inline] + fn partial_cmp(&self, other: &Components<'a>) -> Option { + Some(compare_components(self.clone(), other.clone())) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Ord for Components<'_> { + #[inline] + fn cmp(&self, other: &Self) -> cmp::Ordering { + compare_components(self.clone(), other.clone()) + } +} + +#[unstable( + feature = "path_internals", + reason = "internal details of the implementation of path", + issue = "none" +)] +#[doc(hidden)] +pub fn compare_components(mut left: Components<'_>, mut right: Components<'_>) -> cmp::Ordering { + // Fast path for long shared prefixes + // + // - compare raw bytes to find first mismatch + // - backtrack to find separator before mismatch to avoid ambiguous parsings of '.' or '..' characters + // - if found update state to only do a component-wise comparison on the remainder, + // otherwise do it on the full path + // + // The fast path isn't taken for paths with a PrefixComponent to avoid backtracking into + // the middle of one + if left.prefix.is_none() && right.prefix.is_none() && left.front == right.front { + // possible future improvement: a [u8]::first_mismatch simd implementation + let first_difference = match left.path.iter().zip(right.path).position(|(&a, &b)| a != b) { + None if left.path.len() == right.path.len() => return cmp::Ordering::Equal, + None => left.path.len().min(right.path.len()), + Some(diff) => diff, + }; + + if let Some(previous_sep) = + left.path[..first_difference].iter().rposition(|&b| left.is_sep_byte(b)) + { + let mismatched_component_start = previous_sep + 1; + left.path = &left.path[mismatched_component_start..]; + left.front = State::Body; + right.path = &right.path[mismatched_component_start..]; + right.front = State::Body; + } + } + + Iterator::cmp(left, right) +} + +/// An iterator over [`Path`] and its ancestors. +/// +/// This `struct` is created by the [`ancestors`] method on [`Path`]. +/// See its documentation for more. +/// +/// # Examples +/// +/// ``` +/// use std::path::Path; +/// +/// let path = Path::new("/foo/bar"); +/// +/// for ancestor in path.ancestors() { +/// println!("{}", ancestor.display()); +/// } +/// ``` +/// +/// [`ancestors`]: Path::ancestors +#[derive(Copy, Clone, Debug)] +#[must_use = "iterators are lazy and do nothing unless consumed"] +#[stable(feature = "path_ancestors", since = "1.28.0")] +pub struct Ancestors<'a> { + next: Option<&'a Path>, +} + +#[stable(feature = "path_ancestors", since = "1.28.0")] +impl<'a> Iterator for Ancestors<'a> { + type Item = &'a Path; + + #[inline] + fn next(&mut self) -> Option { + let next = self.next; + self.next = next.and_then(Path::parent); + next + } +} + +#[stable(feature = "path_ancestors", since = "1.28.0")] +impl FusedIterator for Ancestors<'_> {} + +//////////////////////////////////////////////////////////////////////////////// +// Basic types and traits +//////////////////////////////////////////////////////////////////////////////// + +/// A slice of a path (akin to [`str`]). +/// +/// This type supports a number of operations for inspecting a path, including +/// breaking the path into its components (separated by `/` on Unix and by either +/// `/` or `\` on Windows), extracting the file name, determining whether the path +/// is absolute, and so on. +/// +/// This is an *unsized* type, meaning that it must always be used behind a +/// pointer like `&` or [`Box`]. For an owned version of this type, +/// see [`PathBuf`]. +/// +/// More details about the overall approach can be found in +/// the [module documentation](self). +/// +/// # Examples +/// +/// ``` +/// use std::path::Path; +/// use std::ffi::OsStr; +/// +/// // Note: this example does work on Windows +/// let path = Path::new("./foo/bar.txt"); +/// +/// let parent = path.parent(); +/// assert_eq!(parent, Some(Path::new("./foo"))); +/// +/// let file_stem = path.file_stem(); +/// assert_eq!(file_stem, Some(OsStr::new("bar"))); +/// +/// let extension = path.extension(); +/// assert_eq!(extension, Some(OsStr::new("txt"))); +/// ``` +#[cfg_attr(not(test), rustc_diagnostic_item = "Path")] +#[stable(feature = "rust1", since = "1.0.0")] +#[rustc_has_incoherent_inherent_impls] +// `Path::new` current implementation relies +// on `Path` being layout-compatible with `OsStr`. +// However, `Path` layout is considered an implementation detail and must not be relied upon. +#[repr(transparent)] +pub struct Path { + inner: OsStr, +} + +/// An error returned from [`Path::strip_prefix`] if the prefix was not found. +/// +/// This `struct` is created by the [`strip_prefix`] method on [`Path`]. +/// See its documentation for more. +/// +/// [`strip_prefix`]: Path::strip_prefix +#[derive(Debug, Clone, PartialEq, Eq)] +#[stable(since = "1.7.0", feature = "strip_prefix")] +pub struct StripPrefixError(()); + +impl Path { + // The following (private!) function allows construction of a path from a u8 + // slice, which is only safe when it is known to follow the OsStr encoding. + unsafe fn from_u8_slice(s: &[u8]) -> &Path { + // SAFETY: s must be a valid `OsStr` + unsafe { Path::new(OsStr::from_encoded_bytes_unchecked(s)) } + } + + #[unstable( + feature = "path_internals", + reason = "internal details of the implementation of path", + issue = "none" + )] + #[doc(hidden)] + /// The following (private!) function reveals the byte encoding used for OsStr. + pub fn as_u8_slice(&self) -> &[u8] { + self.inner.as_encoded_bytes() + } + + /// Directly wraps a string slice as a `Path` slice. + /// + /// This is a cost-free conversion. + /// + /// # Examples + /// + /// ``` + /// use std::path::Path; + /// + /// Path::new("foo.txt"); + /// ``` + /// + /// You can create `Path`s from `String`s, or even other `Path`s: + /// + /// ``` + /// use std::path::Path; + /// + /// let string = String::from("foo.txt"); + /// let from_string = Path::new(&string); + /// let from_path = Path::new(&from_string); + /// assert_eq!(from_string, from_path); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + pub fn new + ?Sized>(s: &S) -> &Path { + // SAFETY: `Path` is just a wrapper of `OsStr` + unsafe { &*(s.as_ref() as *const OsStr as *const Path) } + } + + /// From mutable `OsStr` + #[unstable( + feature = "path_internals", + reason = "internal details of the implementation of path", + issue = "none" + )] + #[doc(hidden)] + pub fn from_inner_mut(inner: &mut OsStr) -> &mut Path { + // SAFETY: Path is just a wrapper around OsStr, + // therefore converting &mut OsStr to &mut Path is safe. + unsafe { &mut *(inner as *mut OsStr as *mut Path) } + } + + /// Yields the underlying [`OsStr`] slice. + /// + /// # Examples + /// + /// ``` + /// use std::path::Path; + /// + /// let os_str = Path::new("foo.txt").as_os_str(); + /// assert_eq!(os_str, std::ffi::OsStr::new("foo.txt")); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[must_use] + #[inline] + pub fn as_os_str(&self) -> &OsStr { + &self.inner + } + + /// Yields a mutable reference to the underlying [`OsStr`] slice. + /// + /// # Examples + /// + /// ``` + /// use std::path::{Path, PathBuf}; + /// + /// let mut path = PathBuf::from("Foo.TXT"); + /// + /// assert_ne!(path, Path::new("foo.txt")); + /// + /// path.as_mut_os_str().make_ascii_lowercase(); + /// assert_eq!(path, Path::new("foo.txt")); + /// ``` + #[stable(feature = "path_as_mut_os_str", since = "1.70.0")] + #[must_use] + #[inline] + pub fn as_mut_os_str(&mut self) -> &mut OsStr { + &mut self.inner + } + + /// Yields a [`&str`] slice if the `Path` is valid unicode. + /// + /// This conversion may entail doing a check for UTF-8 validity. + /// Note that validation is performed because non-UTF-8 strings are + /// perfectly valid for some OS. + /// + /// [`&str`]: str + /// + /// # Examples + /// + /// ``` + /// use std::path::Path; + /// + /// let path = Path::new("foo.txt"); + /// assert_eq!(path.to_str(), Some("foo.txt")); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[must_use = "this returns the result of the operation, \ + without modifying the original"] + #[inline] + pub fn to_str(&self) -> Option<&str> { + self.inner.to_str() + } + + /// Returns `true` if the `Path` is absolute, i.e., if it is independent of + /// the current directory. + /// + /// * On Unix, a path is absolute if it starts with the root, so + /// `is_absolute` and [`has_root`] are equivalent. + /// + /// * On Windows, a path is absolute if it has a prefix and starts with the + /// root: `c:\windows` is absolute, while `c:temp` and `\temp` are not. + /// + /// # Examples + /// + /// ``` + /// use std::path::Path; + /// + /// assert!(!Path::new("foo.txt").is_absolute()); + /// ``` + /// + /// [`has_root`]: Path::has_root + #[stable(feature = "rust1", since = "1.0.0")] + #[must_use] + #[allow(deprecated)] + pub fn is_absolute(&self) -> bool { + if cfg!(target_os = "redox") { + // FIXME: Allow Redox prefixes + self.has_root() || has_redox_scheme(self.as_u8_slice()) + } else { + self.has_root() && (cfg!(any(unix, target_os = "wasi")) || self.prefix().is_some()) + } + } + + /// Returns `true` if the `Path` is relative, i.e., not absolute. + /// + /// See [`is_absolute`]'s documentation for more details. + /// + /// # Examples + /// + /// ``` + /// use std::path::Path; + /// + /// assert!(Path::new("foo.txt").is_relative()); + /// ``` + /// + /// [`is_absolute`]: Path::is_absolute + #[stable(feature = "rust1", since = "1.0.0")] + #[must_use] + #[inline] + pub fn is_relative(&self) -> bool { + !self.is_absolute() + } + + /// Prefix + #[unstable( + feature = "path_internals", + reason = "internal details of the implementation of path", + issue = "none" + )] + #[doc(hidden)] + pub fn prefix(&self) -> Option> { + self.components().prefix + } + + /// Returns `true` if the `Path` has a root. + /// + /// * On Unix, a path has a root if it begins with `/`. + /// + /// * On Windows, a path has a root if it: + /// * has no prefix and begins with a separator, e.g., `\windows` + /// * has a prefix followed by a separator, e.g., `c:\windows` but not `c:windows` + /// * has any non-disk prefix, e.g., `\\server\share` + /// + /// # Examples + /// + /// ``` + /// use std::path::Path; + /// + /// assert!(Path::new("/etc/passwd").has_root()); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[must_use] + #[inline] + pub fn has_root(&self) -> bool { + self.components().has_root() + } + + /// Returns the `Path` without its final component, if there is one. + /// + /// This means it returns `Some("")` for relative paths with one component. + /// + /// Returns [`None`] if the path terminates in a root or prefix, or if it's + /// the empty string. + /// + /// # Examples + /// + /// ``` + /// use std::path::Path; + /// + /// let path = Path::new("/foo/bar"); + /// let parent = path.parent().unwrap(); + /// assert_eq!(parent, Path::new("/foo")); + /// + /// let grand_parent = parent.parent().unwrap(); + /// assert_eq!(grand_parent, Path::new("/")); + /// assert_eq!(grand_parent.parent(), None); + /// + /// let relative_path = Path::new("foo/bar"); + /// let parent = relative_path.parent(); + /// assert_eq!(parent, Some(Path::new("foo"))); + /// let grand_parent = parent.and_then(Path::parent); + /// assert_eq!(grand_parent, Some(Path::new(""))); + /// let great_grand_parent = grand_parent.and_then(Path::parent); + /// assert_eq!(great_grand_parent, None); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[doc(alias = "dirname")] + #[must_use] + pub fn parent(&self) -> Option<&Path> { + let mut comps = self.components(); + let comp = comps.next_back(); + comp.and_then(|p| match p { + Component::Normal(_) | Component::CurDir | Component::ParentDir => { + Some(comps.as_path()) + } + _ => None, + }) + } + + /// Produces an iterator over `Path` and its ancestors. + /// + /// The iterator will yield the `Path` that is returned if the [`parent`] method is used zero + /// or more times. If the [`parent`] method returns [`None`], the iterator will do likewise. + /// The iterator will always yield at least one value, namely `Some(&self)`. Next it will yield + /// `&self.parent()`, `&self.parent().and_then(Path::parent)` and so on. + /// + /// # Examples + /// + /// ``` + /// use std::path::Path; + /// + /// let mut ancestors = Path::new("/foo/bar").ancestors(); + /// assert_eq!(ancestors.next(), Some(Path::new("/foo/bar"))); + /// assert_eq!(ancestors.next(), Some(Path::new("/foo"))); + /// assert_eq!(ancestors.next(), Some(Path::new("/"))); + /// assert_eq!(ancestors.next(), None); + /// + /// let mut ancestors = Path::new("../foo/bar").ancestors(); + /// assert_eq!(ancestors.next(), Some(Path::new("../foo/bar"))); + /// assert_eq!(ancestors.next(), Some(Path::new("../foo"))); + /// assert_eq!(ancestors.next(), Some(Path::new(".."))); + /// assert_eq!(ancestors.next(), Some(Path::new(""))); + /// assert_eq!(ancestors.next(), None); + /// ``` + /// + /// [`parent`]: Path::parent + #[stable(feature = "path_ancestors", since = "1.28.0")] + #[inline] + pub fn ancestors(&self) -> Ancestors<'_> { + Ancestors { next: Some(&self) } + } + + /// Returns the final component of the `Path`, if there is one. + /// + /// If the path is a normal file, this is the file name. If it's the path of a directory, this + /// is the directory name. + /// + /// Returns [`None`] if the path terminates in `..`. + /// + /// # Examples + /// + /// ``` + /// use std::path::Path; + /// use std::ffi::OsStr; + /// + /// assert_eq!(Some(OsStr::new("bin")), Path::new("/usr/bin/").file_name()); + /// assert_eq!(Some(OsStr::new("foo.txt")), Path::new("tmp/foo.txt").file_name()); + /// assert_eq!(Some(OsStr::new("foo.txt")), Path::new("foo.txt/.").file_name()); + /// assert_eq!(Some(OsStr::new("foo.txt")), Path::new("foo.txt/.//").file_name()); + /// assert_eq!(None, Path::new("foo.txt/..").file_name()); + /// assert_eq!(None, Path::new("/").file_name()); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[doc(alias = "basename")] + #[must_use] + pub fn file_name(&self) -> Option<&OsStr> { + self.components().next_back().and_then(|p| match p { + Component::Normal(p) => Some(p), + _ => None, + }) + } + + /// Returns a path that, when joined onto `base`, yields `self`. + /// + /// # Errors + /// + /// If `base` is not a prefix of `self` (i.e., [`starts_with`] + /// returns `false`), returns [`Err`]. + /// + /// [`starts_with`]: Path::starts_with + /// + /// # Examples + /// + /// ``` + /// use std::path::{Path, PathBuf}; + /// + /// let path = Path::new("/test/haha/foo.txt"); + /// + /// assert_eq!(path.strip_prefix("/"), Ok(Path::new("test/haha/foo.txt"))); + /// assert_eq!(path.strip_prefix("/test"), Ok(Path::new("haha/foo.txt"))); + /// assert_eq!(path.strip_prefix("/test/"), Ok(Path::new("haha/foo.txt"))); + /// assert_eq!(path.strip_prefix("/test/haha/foo.txt"), Ok(Path::new(""))); + /// assert_eq!(path.strip_prefix("/test/haha/foo.txt/"), Ok(Path::new(""))); + /// + /// assert!(path.strip_prefix("test").is_err()); + /// assert!(path.strip_prefix("/haha").is_err()); + /// + /// let prefix = PathBuf::from("/test/"); + /// assert_eq!(path.strip_prefix(prefix), Ok(Path::new("haha/foo.txt"))); + /// ``` + #[stable(since = "1.7.0", feature = "path_strip_prefix")] + pub fn strip_prefix

(&self, base: P) -> Result<&Path, StripPrefixError> + where + P: AsRef, + { + self._strip_prefix(base.as_ref()) + } + + fn _strip_prefix(&self, base: &Path) -> Result<&Path, StripPrefixError> { + iter_after(self.components(), base.components()) + .map(|c| c.as_path()) + .ok_or(StripPrefixError(())) + } + + /// Determines whether `base` is a prefix of `self`. + /// + /// Only considers whole path components to match. + /// + /// # Examples + /// + /// ``` + /// use std::path::Path; + /// + /// let path = Path::new("/etc/passwd"); + /// + /// assert!(path.starts_with("/etc")); + /// assert!(path.starts_with("/etc/")); + /// assert!(path.starts_with("/etc/passwd")); + /// assert!(path.starts_with("/etc/passwd/")); // extra slash is okay + /// assert!(path.starts_with("/etc/passwd///")); // multiple extra slashes are okay + /// + /// assert!(!path.starts_with("/e")); + /// assert!(!path.starts_with("/etc/passwd.txt")); + /// + /// assert!(!Path::new("/etc/foo.rs").starts_with("/etc/foo")); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[must_use] + pub fn starts_with>(&self, base: P) -> bool { + self._starts_with(base.as_ref()) + } + + fn _starts_with(&self, base: &Path) -> bool { + iter_after(self.components(), base.components()).is_some() + } + + /// Determines whether `child` is a suffix of `self`. + /// + /// Only considers whole path components to match. + /// + /// # Examples + /// + /// ``` + /// use std::path::Path; + /// + /// let path = Path::new("/etc/resolv.conf"); + /// + /// assert!(path.ends_with("resolv.conf")); + /// assert!(path.ends_with("etc/resolv.conf")); + /// assert!(path.ends_with("/etc/resolv.conf")); + /// + /// assert!(!path.ends_with("/resolv.conf")); + /// assert!(!path.ends_with("conf")); // use .extension() instead + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[must_use] + pub fn ends_with>(&self, child: P) -> bool { + self._ends_with(child.as_ref()) + } + + fn _ends_with(&self, child: &Path) -> bool { + iter_after(self.components().rev(), child.components().rev()).is_some() + } + + /// Extracts the stem (non-extension) portion of [`self.file_name`]. + /// + /// [`self.file_name`]: Path::file_name + /// + /// The stem is: + /// + /// * [`None`], if there is no file name; + /// * The entire file name if there is no embedded `.`; + /// * The entire file name if the file name begins with `.` and has no other `.`s within; + /// * Otherwise, the portion of the file name before the final `.` + /// + /// # Examples + /// + /// ``` + /// use std::path::Path; + /// + /// assert_eq!("foo", Path::new("foo.rs").file_stem().unwrap()); + /// assert_eq!("foo.tar", Path::new("foo.tar.gz").file_stem().unwrap()); + /// ``` + /// + /// # See Also + /// This method is similar to [`Path::file_prefix`], which extracts the portion of the file name + /// before the *first* `.` + /// + /// [`Path::file_prefix`]: Path::file_prefix + /// + #[stable(feature = "rust1", since = "1.0.0")] + #[must_use] + pub fn file_stem(&self) -> Option<&OsStr> { + self.file_name().map(rsplit_file_at_dot).and_then(|(before, after)| before.or(after)) + } + + /// Extracts the prefix of [`self.file_name`]. + /// + /// The prefix is: + /// + /// * [`None`], if there is no file name; + /// * The entire file name if there is no embedded `.`; + /// * The portion of the file name before the first non-beginning `.`; + /// * The entire file name if the file name begins with `.` and has no other `.`s within; + /// * The portion of the file name before the second `.` if the file name begins with `.` + /// + /// [`self.file_name`]: Path::file_name + /// + /// # Examples + /// + /// ``` + /// # #![feature(path_file_prefix)] + /// use std::path::Path; + /// + /// assert_eq!("foo", Path::new("foo.rs").file_prefix().unwrap()); + /// assert_eq!("foo", Path::new("foo.tar.gz").file_prefix().unwrap()); + /// ``` + /// + /// # See Also + /// This method is similar to [`Path::file_stem`], which extracts the portion of the file name + /// before the *last* `.` + /// + /// [`Path::file_stem`]: Path::file_stem + /// + #[unstable(feature = "path_file_prefix", issue = "86319")] + #[must_use] + pub fn file_prefix(&self) -> Option<&OsStr> { + self.file_name().map(split_file_at_dot).and_then(|(before, _after)| Some(before)) + } + + /// Extracts the extension (without the leading dot) of [`self.file_name`], if possible. + /// + /// The extension is: + /// + /// * [`None`], if there is no file name; + /// * [`None`], if there is no embedded `.`; + /// * [`None`], if the file name begins with `.` and has no other `.`s within; + /// * Otherwise, the portion of the file name after the final `.` + /// + /// [`self.file_name`]: Path::file_name + /// + /// # Examples + /// + /// ``` + /// use std::path::Path; + /// + /// assert_eq!("rs", Path::new("foo.rs").extension().unwrap()); + /// assert_eq!("gz", Path::new("foo.tar.gz").extension().unwrap()); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[must_use] + pub fn extension(&self) -> Option<&OsStr> { + self.file_name().map(rsplit_file_at_dot).and_then(|(before, after)| before.and(after)) + } + + /// Produces an iterator over the [`Component`]s of the path. + /// + /// When parsing the path, there is a small amount of normalization: + /// + /// * Repeated separators are ignored, so `a/b` and `a//b` both have + /// `a` and `b` as components. + /// + /// * Occurrences of `.` are normalized away, except if they are at the + /// beginning of the path. For example, `a/./b`, `a/b/`, `a/b/.` and + /// `a/b` all have `a` and `b` as components, but `./a/b` starts with + /// an additional [`CurDir`] component. + /// + /// * A trailing slash is normalized away, `/a/b` and `/a/b/` are equivalent. + /// + /// Note that no other normalization takes place; in particular, `a/c` + /// and `a/b/../c` are distinct, to account for the possibility that `b` + /// is a symbolic link (so its parent isn't `a`). + /// + /// # Examples + /// + /// ``` + /// use std::path::{Path, Component}; + /// use std::ffi::OsStr; + /// + /// let mut components = Path::new("/tmp/foo.txt").components(); + /// + /// assert_eq!(components.next(), Some(Component::RootDir)); + /// assert_eq!(components.next(), Some(Component::Normal(OsStr::new("tmp")))); + /// assert_eq!(components.next(), Some(Component::Normal(OsStr::new("foo.txt")))); + /// assert_eq!(components.next(), None) + /// ``` + /// + /// [`CurDir`]: Component::CurDir + #[stable(feature = "rust1", since = "1.0.0")] + pub fn components(&self) -> Components<'_> { + let prefix = parse_prefix(self.as_os_str()); + Components { + path: self.as_u8_slice(), + prefix, + has_physical_root: has_physical_root(self.as_u8_slice(), prefix) + || has_redox_scheme(self.as_u8_slice()), + front: State::Prefix, + back: State::Body, + } + } + + /// Produces an iterator over the path's components viewed as [`OsStr`] + /// slices. + /// + /// For more information about the particulars of how the path is separated + /// into components, see [`components`]. + /// + /// [`components`]: Path::components + /// + /// # Examples + /// + /// ``` + /// use std::path::{self, Path}; + /// use std::ffi::OsStr; + /// + /// let mut it = Path::new("/tmp/foo.txt").iter(); + /// assert_eq!(it.next(), Some(OsStr::new(&path::MAIN_SEPARATOR.to_string()))); + /// assert_eq!(it.next(), Some(OsStr::new("tmp"))); + /// assert_eq!(it.next(), Some(OsStr::new("foo.txt"))); + /// assert_eq!(it.next(), None) + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[inline] + pub fn iter(&self) -> Iter<'_> { + Iter { inner: self.components() } + } + + /// Returns an object that implements [`Display`] for safely printing paths + /// that may contain non-Unicode data. This may perform lossy conversion, + /// depending on the platform. If you would like an implementation which + /// escapes the path please use [`Debug`] instead. + /// + /// [`Display`]: fmt::Display + /// [`Debug`]: fmt::Debug + /// + /// # Examples + /// + /// ``` + /// use std::path::Path; + /// + /// let path = Path::new("/tmp/foo.rs"); + /// + /// println!("{}", path.display()); + /// ``` + #[stable(feature = "rust1", since = "1.0.0")] + #[must_use = "this does not display the path, \ + it returns an object that can be displayed"] + #[inline] + pub fn display(&self) -> Display<'_> { + Display { inner: self.inner.display() } + } +} + +#[unstable(feature = "clone_to_uninit", issue = "126799")] +unsafe impl CloneToUninit for Path { + #[inline] + #[cfg_attr(debug_assertions, track_caller)] + unsafe fn clone_to_uninit(&self, dst: *mut Self) { + // SAFETY: Path is just a wrapper around OsStr + unsafe { self.inner.clone_to_uninit(core::ptr::addr_of_mut!((*dst).inner)) } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl AsRef for Path { + #[inline] + fn as_ref(&self) -> &OsStr { + &self.inner + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Debug for Path { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&self.inner, formatter) + } +} + +/// Helper struct for safely printing paths with [`format!`] and `{}`. +/// +/// A [`Path`] might contain non-Unicode data. This `struct` implements the +/// [`Display`] trait in a way that mitigates that. It is created by the +/// [`display`](Path::display) method on [`Path`]. This may perform lossy +/// conversion, depending on the platform. If you would like an implementation +/// which escapes the path please use [`Debug`] instead. +/// +/// # Examples +/// +/// ``` +/// use std::path::Path; +/// +/// let path = Path::new("/tmp/foo.rs"); +/// +/// println!("{}", path.display()); +/// ``` +/// +/// [`Display`]: fmt::Display +/// [`format!`]: crate::format +#[stable(feature = "rust1", since = "1.0.0")] +pub struct Display<'a> { + inner: os_str::Display<'a>, +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Debug for Display<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&self.inner, f) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Display for Display<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&self.inner, f) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl PartialEq for Path { + #[inline] + fn eq(&self, other: &Path) -> bool { + self.components() == other.components() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Hash for Path { + fn hash(&self, h: &mut H) { + let bytes = self.as_u8_slice(); + let (prefix_len, verbatim) = match parse_prefix(&self.inner) { + Some(prefix) => { + prefix.hash(h); + (prefix.len(), prefix.is_verbatim()) + } + None => (0, false), + }; + let bytes = &bytes[prefix_len..]; + + let mut component_start = 0; + // track some extra state to avoid prefix collisions. + // ["foo", "bar"] and ["foobar"], will have the same payload bytes + // but result in different chunk_bits + let mut chunk_bits: usize = 0; + + for i in 0..bytes.len() { + let is_sep = if verbatim { is_verbatim_sep(bytes[i]) } else { is_sep_byte(bytes[i]) }; + if is_sep { + if i > component_start { + let to_hash = &bytes[component_start..i]; + chunk_bits = chunk_bits.wrapping_add(to_hash.len()); + chunk_bits = chunk_bits.rotate_right(2); + h.write(to_hash); + } + + // skip over separator and optionally a following CurDir item + // since components() would normalize these away. + component_start = i + 1; + + let tail = &bytes[component_start..]; + + if !verbatim { + component_start += match tail { + [b'.'] => 1, + [b'.', sep @ _, ..] if is_sep_byte(*sep) => 1, + _ => 0, + }; + } + } + } + + if component_start < bytes.len() { + let to_hash = &bytes[component_start..]; + chunk_bits = chunk_bits.wrapping_add(to_hash.len()); + chunk_bits = chunk_bits.rotate_right(2); + h.write(to_hash); + } + + h.write_usize(chunk_bits); + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Eq for Path {} + +#[stable(feature = "rust1", since = "1.0.0")] +impl PartialOrd for Path { + #[inline] + fn partial_cmp(&self, other: &Path) -> Option { + Some(compare_components(self.components(), other.components())) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Ord for Path { + #[inline] + fn cmp(&self, other: &Path) -> cmp::Ordering { + compare_components(self.components(), other.components()) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl AsRef for Path { + #[inline] + fn as_ref(&self) -> &Path { + self + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl AsRef for OsStr { + #[inline] + fn as_ref(&self) -> &Path { + Path::new(self) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl AsRef for str { + #[inline] + fn as_ref(&self) -> &Path { + Path::new(self) + } +} + +#[stable(feature = "path_into_iter", since = "1.6.0")] +impl<'a> IntoIterator for &'a Path { + type Item = &'a OsStr; + type IntoIter = Iter<'a>; + #[inline] + fn into_iter(self) -> Iter<'a> { + self.iter() + } +} + +macro_rules! impl_cmp_os_str { + (<$($life:lifetime),*> $lhs:ty, $rhs: ty) => { + #[stable(feature = "cmp_path", since = "1.8.0")] + impl<$($life),*> PartialEq<$rhs> for $lhs { + #[inline] + fn eq(&self, other: &$rhs) -> bool { + ::eq(self, other.as_ref()) + } + } + + #[stable(feature = "cmp_path", since = "1.8.0")] + impl<$($life),*> PartialEq<$lhs> for $rhs { + #[inline] + fn eq(&self, other: &$lhs) -> bool { + ::eq(self.as_ref(), other) + } + } + + #[stable(feature = "cmp_path", since = "1.8.0")] + impl<$($life),*> PartialOrd<$rhs> for $lhs { + #[inline] + fn partial_cmp(&self, other: &$rhs) -> Option { + ::partial_cmp(self, other.as_ref()) + } + } + + #[stable(feature = "cmp_path", since = "1.8.0")] + impl<$($life),*> PartialOrd<$lhs> for $rhs { + #[inline] + fn partial_cmp(&self, other: &$lhs) -> Option { + ::partial_cmp(self.as_ref(), other) + } + } + }; +} + +impl_cmp_os_str!(<> Path, OsStr); +impl_cmp_os_str!(<'a> Path, &'a OsStr); +impl_cmp_os_str!(<'a> &'a Path, OsStr); + +#[stable(since = "1.7.0", feature = "strip_prefix")] +impl fmt::Display for StripPrefixError { + #[allow(deprecated, deprecated_in_future)] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.description().fmt(f) + } +} + +#[stable(since = "1.7.0", feature = "strip_prefix")] +impl Error for StripPrefixError { + #[allow(deprecated)] + fn description(&self) -> &str { + "prefix not found" + } +} diff --git a/library/core/src/path/sgx.rs b/library/core/src/path/sgx.rs new file mode 100644 index 0000000000000..17467c4254ff8 --- /dev/null +++ b/library/core/src/path/sgx.rs @@ -0,0 +1,18 @@ +use super::*; + +#[inline] +pub fn is_sep_byte(b: u8) -> bool { + b == b'/' +} + +#[inline] +pub fn is_verbatim_sep(b: u8) -> bool { + b == b'/' +} + +pub fn parse_prefix(_: &OsStr) -> Option> { + None +} + +pub const MAIN_SEP_STR: &str = "/"; +pub const MAIN_SEP: char = '/'; diff --git a/library/core/src/path/unix.rs b/library/core/src/path/unix.rs new file mode 100644 index 0000000000000..3ba7dd4862975 --- /dev/null +++ b/library/core/src/path/unix.rs @@ -0,0 +1,19 @@ +use super::*; + +#[inline] +pub fn is_sep_byte(b: u8) -> bool { + b == b'/' +} + +#[inline] +pub fn is_verbatim_sep(b: u8) -> bool { + b == b'/' +} + +#[inline] +pub fn parse_prefix(_: &OsStr) -> Option> { + None +} + +pub const MAIN_SEP_STR: &str = "/"; +pub const MAIN_SEP: char = '/'; diff --git a/library/core/src/path/unsupported_backslash.rs b/library/core/src/path/unsupported_backslash.rs new file mode 100644 index 0000000000000..dd89a4d77d579 --- /dev/null +++ b/library/core/src/path/unsupported_backslash.rs @@ -0,0 +1,18 @@ +use super::*; + +#[inline] +pub fn is_sep_byte(b: u8) -> bool { + b == b'\\' +} + +#[inline] +pub fn is_verbatim_sep(b: u8) -> bool { + b == b'\\' +} + +pub fn parse_prefix(_: &OsStr) -> Option> { + None +} + +pub const MAIN_SEP_STR: &str = "\\"; +pub const MAIN_SEP: char = '\\'; diff --git a/library/core/src/path/windows.rs b/library/core/src/path/windows.rs new file mode 100644 index 0000000000000..099f988436fb3 --- /dev/null +++ b/library/core/src/path/windows.rs @@ -0,0 +1,187 @@ +use super::*; + +pub const MAIN_SEP_STR: &str = "\\"; +pub const MAIN_SEP: char = '\\'; + +#[inline] +pub fn is_sep_byte(b: u8) -> bool { + b == b'/' || b == b'\\' +} + +#[inline] +pub fn is_verbatim_sep(b: u8) -> bool { + b == b'\\' +} + +#[allow(missing_debug_implementations)] +pub struct PrefixParser<'a, const LEN: usize> { + pub path: &'a OsStr, + pub prefix: [u8; LEN], +} + +impl<'a, const LEN: usize> PrefixParser<'a, LEN> { + #[inline] + pub fn get_prefix(path: &OsStr) -> [u8; LEN] { + let mut prefix = [0; LEN]; + // SAFETY: Only ASCII characters are modified. + for (i, &ch) in path.as_encoded_bytes().iter().take(LEN).enumerate() { + prefix[i] = if ch == b'/' { b'\\' } else { ch }; + } + prefix + } + + pub fn new(path: &'a OsStr) -> Self { + Self { path, prefix: Self::get_prefix(path) } + } + + pub fn as_slice(&self) -> PrefixParserSlice<'a, '_> { + PrefixParserSlice { + path: self.path, + prefix: &self.prefix[..LEN.min(self.path.len())], + index: 0, + } + } +} + +#[allow(missing_debug_implementations)] +pub struct PrefixParserSlice<'a, 'b> { + pub path: &'a OsStr, + pub prefix: &'b [u8], + pub index: usize, +} + +impl<'a> PrefixParserSlice<'a, '_> { + pub fn strip_prefix(&self, prefix: &str) -> Option { + self.prefix[self.index..] + .starts_with(prefix.as_bytes()) + .then_some(Self { index: self.index + prefix.len(), ..*self }) + } + + pub fn prefix_bytes(&self) -> &'a [u8] { + &self.path.as_encoded_bytes()[..self.index] + } + + pub fn finish(self) -> &'a OsStr { + // SAFETY: The unsafety here stems from converting between &OsStr and + // &[u8] and back. This is safe to do because (1) we only look at ASCII + // contents of the encoding and (2) new &OsStr values are produced only + // from ASCII-bounded slices of existing &OsStr values. + unsafe { OsStr::from_encoded_bytes_unchecked(&self.path.as_encoded_bytes()[self.index..]) } + } +} + +pub fn parse_prefix(path: &OsStr) -> Option> { + use Prefix::{DeviceNS, Disk, Verbatim, VerbatimDisk, VerbatimUNC, UNC}; + + let parser = PrefixParser::<8>::new(path); + let parser = parser.as_slice(); + if let Some(parser) = parser.strip_prefix(r"\\") { + // \\ + + // The meaning of verbatim paths can change when they use a different + // separator. + if let Some(parser) = parser.strip_prefix(r"?\") + && !parser.prefix_bytes().iter().any(|&x| x == b'/') + { + // \\?\ + if let Some(parser) = parser.strip_prefix(r"UNC\") { + // \\?\UNC\server\share + + let path = parser.finish(); + let (server, path) = parse_next_component(path, true); + let (share, _) = parse_next_component(path, true); + + Some(VerbatimUNC(server, share)) + } else { + let path = parser.finish(); + + // in verbatim paths only recognize an exact drive prefix + if let Some(drive) = parse_drive_exact(path) { + // \\?\C: + Some(VerbatimDisk(drive)) + } else { + // \\?\prefix + let (prefix, _) = parse_next_component(path, true); + Some(Verbatim(prefix)) + } + } + } else if let Some(parser) = parser.strip_prefix(r".\") { + // \\.\COM42 + let path = parser.finish(); + let (prefix, _) = parse_next_component(path, false); + Some(DeviceNS(prefix)) + } else { + let path = parser.finish(); + let (server, path) = parse_next_component(path, false); + let (share, _) = parse_next_component(path, false); + + if !server.is_empty() && !share.is_empty() { + // \\server\share + Some(UNC(server, share)) + } else { + // no valid prefix beginning with "\\" recognized + None + } + } + } else { + // If it has a drive like `C:` then it's a disk. + // Otherwise there is no prefix. + parse_drive(path).map(Disk) + } +} + +// Parses a drive prefix, e.g. "C:" and "C:\whatever" +pub fn parse_drive(path: &OsStr) -> Option { + // In most DOS systems, it is not possible to have more than 26 drive letters. + // See . + fn is_valid_drive_letter(drive: &u8) -> bool { + drive.is_ascii_alphabetic() + } + + match path.as_encoded_bytes() { + [drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()), + _ => None, + } +} + +// Parses a drive prefix exactly, e.g. "C:" +pub fn parse_drive_exact(path: &OsStr) -> Option { + // only parse two bytes: the drive letter and the drive separator + if path.as_encoded_bytes().get(2).map(|&x| is_sep_byte(x)).unwrap_or(true) { + parse_drive(path) + } else { + None + } +} + +// Parse the next path component. +// +// Returns the next component and the rest of the path excluding the component and separator. +// Does not recognize `/` as a separator character if `verbatim` is true. +pub fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) { + let separator = if verbatim { is_verbatim_sep } else { is_sep_byte }; + + match path.as_encoded_bytes().iter().position(|&x| separator(x)) { + Some(separator_start) => { + let separator_end = separator_start + 1; + + let component = &path.as_encoded_bytes()[..separator_start]; + + // Panic safe + // The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index. + let path = &path.as_encoded_bytes()[separator_end..]; + + // SAFETY: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\') + // is encoded in a single byte, therefore `bytes[separator_start]` and + // `bytes[separator_end]` must be code point boundaries and thus + // `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices. + unsafe { + ( + OsStr::from_encoded_bytes_unchecked(component), + OsStr::from_encoded_bytes_unchecked(path), + ) + } + } + None => (path, OsStr::new("")), + } +} diff --git a/library/core/tests/ffi.rs b/library/core/tests/ffi.rs index 2b33fbd95f073..4ee1c2754761e 100644 --- a/library/core/tests/ffi.rs +++ b/library/core/tests/ffi.rs @@ -1 +1,4 @@ +mod bytes; mod cstr; +mod os_str; +mod wtf8; diff --git a/library/std/src/sys/os_str/bytes/tests.rs b/library/core/tests/ffi/bytes.rs similarity index 87% rename from library/std/src/sys/os_str/bytes/tests.rs rename to library/core/tests/ffi/bytes.rs index e2a99045e41f6..ef62776ca2f02 100644 --- a/library/std/src/sys/os_str/bytes/tests.rs +++ b/library/core/tests/ffi/bytes.rs @@ -1,4 +1,6 @@ -use super::*; +#![feature(os_str_internals)] + +use core::ffi::os_str::Slice; #[test] fn slice_debug_output() { diff --git a/library/core/tests/ffi/os_str.rs b/library/core/tests/ffi/os_str.rs new file mode 100644 index 0000000000000..61eb07065c160 --- /dev/null +++ b/library/core/tests/ffi/os_str.rs @@ -0,0 +1,43 @@ +use core::ffi::os_str::OsStr; +use core::mem::MaybeUninit; +use core::ptr; + +#[test] +fn test_os_str_default() { + let os_str: &OsStr = Default::default(); + assert_eq!("", os_str); +} + +#[test] +fn slice_encoded_bytes() { + let os_str = OsStr::new("123θგ🦀"); + // ASCII + let digits = os_str.slice_encoded_bytes(..3); + assert_eq!(digits, "123"); + let three = os_str.slice_encoded_bytes(2..3); + assert_eq!(three, "3"); + // 2-byte UTF-8 + let theta = os_str.slice_encoded_bytes(3..5); + assert_eq!(theta, "θ"); + // 3-byte UTF-8 + let gani = os_str.slice_encoded_bytes(5..8); + assert_eq!(gani, "გ"); + // 4-byte UTF-8 + let crab = os_str.slice_encoded_bytes(8..); + assert_eq!(crab, "🦀"); +} + +#[test] +fn clone_to_uninit() { + let a = OsStr::new("hello.txt"); + + let mut storage = vec![MaybeUninit::::uninit(); size_of_val::(a)]; + unsafe { a.clone_to_uninit(ptr::from_mut::<[_]>(storage.as_mut_slice()) as *mut OsStr) }; + assert_eq!(a.as_encoded_bytes(), unsafe { MaybeUninit::slice_assume_init_ref(&storage) }); + + let mut b: Box = OsStr::new("world.exe").into(); + assert_eq!(size_of_val::(a), size_of_val::(&b)); + assert_ne!(a, &*b); + unsafe { a.clone_to_uninit(ptr::from_mut::(&mut b)) }; + assert_eq!(a, &*b); +} diff --git a/library/core/tests/ffi/wtf8.rs b/library/core/tests/ffi/wtf8.rs new file mode 100644 index 0000000000000..46c4004a2fefc --- /dev/null +++ b/library/core/tests/ffi/wtf8.rs @@ -0,0 +1,152 @@ +#![feature(os_str_internals)] + +use core::ffi::wtf8::*; + +#[test] +fn code_point_from_u32() { + assert!(CodePoint::from_u32(0).is_some()); + assert!(CodePoint::from_u32(0xD800).is_some()); + assert!(CodePoint::from_u32(0x10FFFF).is_some()); + assert!(CodePoint::from_u32(0x110000).is_none()); +} + +#[test] +fn code_point_to_u32() { + fn c(value: u32) -> CodePoint { + CodePoint::from_u32(value).unwrap() + } + assert_eq!(c(0).to_u32(), 0); + assert_eq!(c(0xD800).to_u32(), 0xD800); + assert_eq!(c(0x10FFFF).to_u32(), 0x10FFFF); +} + +#[test] +fn code_point_to_lead_surrogate() { + fn c(value: u32) -> CodePoint { + CodePoint::from_u32(value).unwrap() + } + assert_eq!(c(0).to_lead_surrogate(), None); + assert_eq!(c(0xE9).to_lead_surrogate(), None); + assert_eq!(c(0xD800).to_lead_surrogate(), Some(0xD800)); + assert_eq!(c(0xDBFF).to_lead_surrogate(), Some(0xDBFF)); + assert_eq!(c(0xDC00).to_lead_surrogate(), None); + assert_eq!(c(0xDFFF).to_lead_surrogate(), None); + assert_eq!(c(0x1F4A9).to_lead_surrogate(), None); + assert_eq!(c(0x10FFFF).to_lead_surrogate(), None); +} + +#[test] +fn code_point_to_trail_surrogate() { + fn c(value: u32) -> CodePoint { + CodePoint::from_u32(value).unwrap() + } + assert_eq!(c(0).to_trail_surrogate(), None); + assert_eq!(c(0xE9).to_trail_surrogate(), None); + assert_eq!(c(0xD800).to_trail_surrogate(), None); + assert_eq!(c(0xDBFF).to_trail_surrogate(), None); + assert_eq!(c(0xDC00).to_trail_surrogate(), Some(0xDC00)); + assert_eq!(c(0xDFFF).to_trail_surrogate(), Some(0xDFFF)); + assert_eq!(c(0x1F4A9).to_trail_surrogate(), None); + assert_eq!(c(0x10FFFF).to_trail_surrogate(), None); +} + +#[test] +fn code_point_from_char() { + assert_eq!(CodePoint::from_char('a').to_u32(), 0x61); + assert_eq!(CodePoint::from_char('💩').to_u32(), 0x1F4A9); +} + +#[test] +fn code_point_to_string() { + assert_eq!(format!("{:?}", CodePoint::from_char('a')), "U+0061"); + assert_eq!(format!("{:?}", CodePoint::from_char('💩')), "U+1F4A9"); +} + +#[test] +fn code_point_to_char() { + fn c(value: u32) -> CodePoint { + CodePoint::from_u32(value).unwrap() + } + assert_eq!(c(0x61).to_char(), Some('a')); + assert_eq!(c(0x1F4A9).to_char(), Some('💩')); + assert_eq!(c(0xD800).to_char(), None); +} + +#[test] +fn code_point_to_char_lossy() { + fn c(value: u32) -> CodePoint { + CodePoint::from_u32(value).unwrap() + } + assert_eq!(c(0x61).to_char_lossy(), 'a'); + assert_eq!(c(0x1F4A9).to_char_lossy(), '💩'); + assert_eq!(c(0xD800).to_char_lossy(), '\u{FFFD}'); +} + +#[test] +fn wtf8_from_str() { + assert_eq!(&Wtf8::from_str("").bytes, b""); + assert_eq!(&Wtf8::from_str("aé 💩").bytes, b"a\xC3\xA9 \xF0\x9F\x92\xA9"); +} + +#[test] +fn wtf8_len() { + assert_eq!(Wtf8::from_str("").len(), 0); + assert_eq!(Wtf8::from_str("aé 💩").len(), 8); +} + +#[test] +fn wtf8_slice() { + assert_eq!(&Wtf8::from_str("aé 💩")[1..4].bytes, b"\xC3\xA9 "); +} + +#[test] +#[should_panic] +fn wtf8_slice_not_code_point_boundary() { + let _ = &Wtf8::from_str("aé 💩")[2..4]; +} + +#[test] +fn wtf8_slice_from() { + assert_eq!(&Wtf8::from_str("aé 💩")[1..].bytes, b"\xC3\xA9 \xF0\x9F\x92\xA9"); +} + +#[test] +#[should_panic] +fn wtf8_slice_from_not_code_point_boundary() { + let _ = &Wtf8::from_str("aé 💩")[2..]; +} + +#[test] +fn wtf8_slice_to() { + assert_eq!(&Wtf8::from_str("aé 💩")[..4].bytes, b"a\xC3\xA9 "); +} + +#[test] +#[should_panic] +fn wtf8_slice_to_not_code_point_boundary() { + let _ = &Wtf8::from_str("aé 💩")[5..]; +} + +#[test] +fn wtf8_ascii_byte_at() { + let slice = Wtf8::from_str("aé 💩"); + assert_eq!(slice.ascii_byte_at(0), b'a'); + assert_eq!(slice.ascii_byte_at(1), b'\xFF'); + assert_eq!(slice.ascii_byte_at(2), b'\xFF'); + assert_eq!(slice.ascii_byte_at(3), b' '); + assert_eq!(slice.ascii_byte_at(4), b'\xFF'); +} + +#[test] +#[should_panic(expected = "byte index 4 is out of bounds")] +fn wtf8_utf8_boundary_out_of_bounds() { + let string = Wtf8::from_str("aé"); + check_utf8_boundary(&string, 4); +} + +#[test] +#[should_panic(expected = "byte index 1 is not a codepoint boundary")] +fn wtf8_utf8_boundary_inside_codepoint() { + let string = Wtf8::from_str("é"); + check_utf8_boundary(&string, 1); +} diff --git a/library/core/tests/path.rs b/library/core/tests/path.rs new file mode 100644 index 0000000000000..7ac0d9c3ddf8d --- /dev/null +++ b/library/core/tests/path.rs @@ -0,0 +1,326 @@ +#[cfg(target_os = "windows")] +mod windows; + +use crate::collections::{BTreeSet, HashSet}; +use crate::hash::DefaultHasher; +use crate::hint::black_box; +use crate::mem::MaybeUninit; +use crate::path::*; +use crate::ptr; + +#[test] +pub fn test_compare() { + use crate::hash::{DefaultHasher, Hash, Hasher}; + + fn hash(t: T) -> u64 { + let mut s = DefaultHasher::new(); + t.hash(&mut s); + s.finish() + } + + macro_rules! tc ( + ($path1:expr, $path2:expr, eq: $eq:expr, + starts_with: $starts_with:expr, ends_with: $ends_with:expr, + relative_from: $relative_from:expr) => ({ + let path1 = Path::new($path1); + let path2 = Path::new($path2); + + let eq = path1 == path2; + assert!(eq == $eq, "{:?} == {:?}, expected {:?}, got {:?}", + $path1, $path2, $eq, eq); + assert!($eq == (hash(path1) == hash(path2)), + "{:?} == {:?}, expected {:?}, got {} and {}", + $path1, $path2, $eq, hash(path1), hash(path2)); + + let starts_with = path1.starts_with(path2); + assert!(starts_with == $starts_with, + "{:?}.starts_with({:?}), expected {:?}, got {:?}", $path1, $path2, + $starts_with, starts_with); + + let ends_with = path1.ends_with(path2); + assert!(ends_with == $ends_with, + "{:?}.ends_with({:?}), expected {:?}, got {:?}", $path1, $path2, + $ends_with, ends_with); + + let relative_from = path1.strip_prefix(path2) + .map(|p| p.to_str().unwrap()) + .ok(); + let exp: Option<&str> = $relative_from; + assert!(relative_from == exp, + "{:?}.strip_prefix({:?}), expected {:?}, got {:?}", + $path1, $path2, exp, relative_from); + }); + ); + + tc!("", "", + eq: true, + starts_with: true, + ends_with: true, + relative_from: Some("") + ); + + tc!("foo", "", + eq: false, + starts_with: true, + ends_with: true, + relative_from: Some("foo") + ); + + tc!("", "foo", + eq: false, + starts_with: false, + ends_with: false, + relative_from: None + ); + + tc!("foo", "foo", + eq: true, + starts_with: true, + ends_with: true, + relative_from: Some("") + ); + + tc!("foo/", "foo", + eq: true, + starts_with: true, + ends_with: true, + relative_from: Some("") + ); + + tc!("foo//", "foo", + eq: true, + starts_with: true, + ends_with: true, + relative_from: Some("") + ); + + tc!("foo///", "foo", + eq: true, + starts_with: true, + ends_with: true, + relative_from: Some("") + ); + + tc!("foo/.", "foo", + eq: true, + starts_with: true, + ends_with: true, + relative_from: Some("") + ); + + tc!("foo/./bar", "foo/bar", + eq: true, + starts_with: true, + ends_with: true, + relative_from: Some("") + ); + + tc!("foo/.//bar", "foo/bar", + eq: true, + starts_with: true, + ends_with: true, + relative_from: Some("") + ); + + tc!("foo//./bar", "foo/bar", + eq: true, + starts_with: true, + ends_with: true, + relative_from: Some("") + ); + + tc!("foo/bar", "foo", + eq: false, + starts_with: true, + ends_with: false, + relative_from: Some("bar") + ); + + tc!("foo/bar", "foobar", + eq: false, + starts_with: false, + ends_with: false, + relative_from: None + ); + + tc!("foo/bar/baz", "foo/bar", + eq: false, + starts_with: true, + ends_with: false, + relative_from: Some("baz") + ); + + tc!("foo/bar", "foo/bar/baz", + eq: false, + starts_with: false, + ends_with: false, + relative_from: None + ); + + tc!("./foo/bar/", ".", + eq: false, + starts_with: true, + ends_with: false, + relative_from: Some("foo/bar") + ); + + if cfg!(windows) { + tc!(r"C:\src\rust\cargo-test\test\Cargo.toml", + r"c:\src\rust\cargo-test\test", + eq: false, + starts_with: true, + ends_with: false, + relative_from: Some("Cargo.toml") + ); + + tc!(r"c:\foo", r"C:\foo", + eq: true, + starts_with: true, + ends_with: true, + relative_from: Some("") + ); + + tc!(r"C:\foo\.\bar.txt", r"C:\foo\bar.txt", + eq: true, + starts_with: true, + ends_with: true, + relative_from: Some("") + ); + + tc!(r"C:\foo\.", r"C:\foo", + eq: true, + starts_with: true, + ends_with: true, + relative_from: Some("") + ); + + tc!(r"\\?\C:\foo\.\bar.txt", r"\\?\C:\foo\bar.txt", + eq: false, + starts_with: false, + ends_with: false, + relative_from: None + ); + } +} + +#[test] +fn test_components_debug() { + let path = Path::new("/tmp"); + + let mut components = path.components(); + + let expected = "Components([RootDir, Normal(\"tmp\")])"; + let actual = format!("{components:?}"); + assert_eq!(expected, actual); + + let _ = components.next().unwrap(); + let expected = "Components([Normal(\"tmp\")])"; + let actual = format!("{components:?}"); + assert_eq!(expected, actual); + + let _ = components.next().unwrap(); + let expected = "Components([])"; + let actual = format!("{components:?}"); + assert_eq!(expected, actual); +} + +#[cfg(unix)] +#[test] +fn test_iter_debug() { + let path = Path::new("/tmp"); + + let mut iter = path.iter(); + + let expected = "Iter([\"/\", \"tmp\"])"; + let actual = format!("{iter:?}"); + assert_eq!(expected, actual); + + let _ = iter.next().unwrap(); + let expected = "Iter([\"tmp\"])"; + let actual = format!("{iter:?}"); + assert_eq!(expected, actual); + + let _ = iter.next().unwrap(); + let expected = "Iter([])"; + let actual = format!("{iter:?}"); + assert_eq!(expected, actual); +} + +#[test] +fn display_format_flags() { + assert_eq!(format!("a{:#<5}b", Path::new("").display()), "a#####b"); + assert_eq!(format!("a{:#<5}b", Path::new("a").display()), "aa####b"); +} + +#[test] +fn test_ord() { + macro_rules! ord( + ($ord:ident, $left:expr, $right:expr) => ({ + use crate::cmp::Ordering; + + let left = Path::new($left); + let right = Path::new($right); + assert_eq!(left.cmp(&right), Ordering::$ord); + if (core::cmp::Ordering::$ord == Ordering::Equal) { + assert_eq!(left, right); + + let mut hasher = DefaultHasher::new(); + left.hash(&mut hasher); + let left_hash = hasher.finish(); + hasher = DefaultHasher::new(); + right.hash(&mut hasher); + let right_hash = hasher.finish(); + + assert_eq!(left_hash, right_hash, "hashes for {:?} and {:?} must match", left, right); + } else { + assert_ne!(left, right); + } + }); + ); + + ord!(Less, "1", "2"); + ord!(Less, "/foo/bar", "/foo./bar"); + ord!(Less, "foo/bar", "foo/bar."); + ord!(Equal, "foo/./bar", "foo/bar/"); + ord!(Equal, "foo/bar", "foo/bar/"); + ord!(Equal, "foo/bar", "foo/bar/."); + ord!(Equal, "foo/bar", "foo/bar//"); +} + +#[bench] +fn bench_hash_path_short(b: &mut test::Bencher) { + let mut hasher = DefaultHasher::new(); + let path = Path::new("explorer.exe"); + + b.iter(|| black_box(path).hash(&mut hasher)); + + black_box(hasher.finish()); +} + +#[bench] +fn bench_hash_path_long(b: &mut test::Bencher) { + let mut hasher = DefaultHasher::new(); + let path = + Path::new("/aaaaa/aaaaaa/./../aaaaaaaa/bbbbbbbbbbbbb/ccccccccccc/ddddddddd/eeeeeee.fff"); + + b.iter(|| black_box(path).hash(&mut hasher)); + + black_box(hasher.finish()); +} + +#[test] +fn clone_to_uninit() { + let a = Path::new("hello.txt"); + + let mut storage = vec![MaybeUninit::::uninit(); size_of_val::(a)]; + unsafe { a.clone_to_uninit(ptr::from_mut::<[_]>(storage.as_mut_slice()) as *mut Path) }; + assert_eq!(a.as_os_str().as_encoded_bytes(), unsafe { + MaybeUninit::slice_assume_init_ref(&storage) + }); + + let mut b: Box = Path::new("world.exe").into(); + assert_eq!(size_of_val::(a), size_of_val::(&b)); + assert_ne!(a, &*b); + unsafe { a.clone_to_uninit(ptr::from_mut::(&mut b)) }; + assert_eq!(a, &*b); +} diff --git a/library/std/src/sys/path/windows/tests.rs b/library/core/tests/path/windows.rs similarity index 99% rename from library/std/src/sys/path/windows/tests.rs rename to library/core/tests/path/windows.rs index 623c6236166da..28a08ba3c7b11 100644 --- a/library/std/src/sys/path/windows/tests.rs +++ b/library/core/tests/path/windows.rs @@ -1,4 +1,4 @@ -use super::*; +use crate::path::*; #[test] fn test_parse_next_component() { diff --git a/library/std/src/ffi/os_str.rs b/library/std/src/ffi/os_str.rs index 918eec2d0d8ef..a8e7b65e5b8de 100644 --- a/library/std/src/ffi/os_str.rs +++ b/library/std/src/ffi/os_str.rs @@ -1,1742 +1,4 @@ //! The [`OsStr`] and [`OsString`] types and associated utilities. -#[cfg(test)] -mod tests; - -use core::clone::CloneToUninit; - -use crate::borrow::{Borrow, Cow}; -use crate::collections::TryReserveError; -use crate::hash::{Hash, Hasher}; -use crate::ops::{self, Range}; -use crate::ptr::addr_of_mut; -use crate::rc::Rc; -use crate::str::FromStr; -use crate::sync::Arc; -use crate::sys::os_str::{Buf, Slice}; -use crate::sys_common::{AsInner, FromInner, IntoInner}; -use crate::{cmp, fmt, slice}; - -/// A type that can represent owned, mutable platform-native strings, but is -/// cheaply inter-convertible with Rust strings. -/// -/// The need for this type arises from the fact that: -/// -/// * On Unix systems, strings are often arbitrary sequences of non-zero -/// bytes, in many cases interpreted as UTF-8. -/// -/// * On Windows, strings are often arbitrary sequences of non-zero 16-bit -/// values, interpreted as UTF-16 when it is valid to do so. -/// -/// * In Rust, strings are always valid UTF-8, which may contain zeros. -/// -/// `OsString` and [`OsStr`] bridge this gap by simultaneously representing Rust -/// and platform-native string values, and in particular allowing a Rust string -/// to be converted into an "OS" string with no cost if possible. A consequence -/// of this is that `OsString` instances are *not* `NUL` terminated; in order -/// to pass to e.g., Unix system call, you should create a [`CStr`]. -/// -/// `OsString` is to &[OsStr] as [`String`] is to &[str]: the former -/// in each pair are owned strings; the latter are borrowed -/// references. -/// -/// Note, `OsString` and [`OsStr`] internally do not necessarily hold strings in -/// the form native to the platform; While on Unix, strings are stored as a -/// sequence of 8-bit values, on Windows, where strings are 16-bit value based -/// as just discussed, strings are also actually stored as a sequence of 8-bit -/// values, encoded in a less-strict variant of UTF-8. This is useful to -/// understand when handling capacity and length values. -/// -/// # Capacity of `OsString` -/// -/// Capacity uses units of UTF-8 bytes for OS strings which were created from valid unicode, and -/// uses units of bytes in an unspecified encoding for other contents. On a given target, all -/// `OsString` and `OsStr` values use the same units for capacity, so the following will work: -/// ``` -/// use std::ffi::{OsStr, OsString}; -/// -/// fn concat_os_strings(a: &OsStr, b: &OsStr) -> OsString { -/// let mut ret = OsString::with_capacity(a.len() + b.len()); // This will allocate -/// ret.push(a); // This will not allocate further -/// ret.push(b); // This will not allocate further -/// ret -/// } -/// ``` -/// -/// # Creating an `OsString` -/// -/// **From a Rust string**: `OsString` implements -/// [From]<[String]>, so you can use my_string.[into]\() to -/// create an `OsString` from a normal Rust string. -/// -/// **From slices:** Just like you can start with an empty Rust -/// [`String`] and then [`String::push_str`] some &[str] -/// sub-string slices into it, you can create an empty `OsString` with -/// the [`OsString::new`] method and then push string slices into it with the -/// [`OsString::push`] method. -/// -/// # Extracting a borrowed reference to the whole OS string -/// -/// You can use the [`OsString::as_os_str`] method to get an &[OsStr] from -/// an `OsString`; this is effectively a borrowed reference to the -/// whole string. -/// -/// # Conversions -/// -/// See the [module's toplevel documentation about conversions][conversions] for a discussion on -/// the traits which `OsString` implements for [conversions] from/to native representations. -/// -/// [`CStr`]: crate::ffi::CStr -/// [conversions]: super#conversions -/// [into]: Into::into -#[cfg_attr(not(test), rustc_diagnostic_item = "OsString")] -#[stable(feature = "rust1", since = "1.0.0")] -pub struct OsString { - inner: Buf, -} - -/// Allows extension traits within `std`. -#[unstable(feature = "sealed", issue = "none")] -impl crate::sealed::Sealed for OsString {} - -/// Borrowed reference to an OS string (see [`OsString`]). -/// -/// This type represents a borrowed reference to a string in the operating system's preferred -/// representation. -/// -/// `&OsStr` is to [`OsString`] as &[str] is to [`String`]: the -/// former in each pair are borrowed references; the latter are owned strings. -/// -/// See the [module's toplevel documentation about conversions][conversions] for a discussion on -/// the traits which `OsStr` implements for [conversions] from/to native representations. -/// -/// [conversions]: super#conversions -#[cfg_attr(not(test), rustc_diagnostic_item = "OsStr")] -#[stable(feature = "rust1", since = "1.0.0")] -// `OsStr::from_inner` current implementation relies -// on `OsStr` being layout-compatible with `Slice`. -// However, `OsStr` layout is considered an implementation detail and must not be relied upon. -#[repr(transparent)] -pub struct OsStr { - inner: Slice, -} - -/// Allows extension traits within `std`. -#[unstable(feature = "sealed", issue = "none")] -impl crate::sealed::Sealed for OsStr {} - -impl OsString { - /// Constructs a new empty `OsString`. - /// - /// # Examples - /// - /// ``` - /// use std::ffi::OsString; - /// - /// let os_string = OsString::new(); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - #[must_use] - #[inline] - pub fn new() -> OsString { - OsString { inner: Buf::from_string(String::new()) } - } - - /// Converts bytes to an `OsString` without checking that the bytes contains - /// valid [`OsStr`]-encoded data. - /// - /// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8. - /// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit - /// ASCII. - /// - /// See the [module's toplevel documentation about conversions][conversions] for safe, - /// cross-platform [conversions] from/to native representations. - /// - /// # Safety - /// - /// As the encoding is unspecified, callers must pass in bytes that originated as a mixture of - /// validated UTF-8 and bytes from [`OsStr::as_encoded_bytes`] from within the same Rust version - /// built for the same target platform. For example, reconstructing an `OsString` from bytes sent - /// over the network or stored in a file will likely violate these safety rules. - /// - /// Due to the encoding being self-synchronizing, the bytes from [`OsStr::as_encoded_bytes`] can be - /// split either immediately before or immediately after any valid non-empty UTF-8 substring. - /// - /// # Example - /// - /// ``` - /// use std::ffi::OsStr; - /// - /// let os_str = OsStr::new("Mary had a little lamb"); - /// let bytes = os_str.as_encoded_bytes(); - /// let words = bytes.split(|b| *b == b' '); - /// let words: Vec<&OsStr> = words.map(|word| { - /// // SAFETY: - /// // - Each `word` only contains content that originated from `OsStr::as_encoded_bytes` - /// // - Only split with ASCII whitespace which is a non-empty UTF-8 substring - /// unsafe { OsStr::from_encoded_bytes_unchecked(word) } - /// }).collect(); - /// ``` - /// - /// [conversions]: super#conversions - #[inline] - #[stable(feature = "os_str_bytes", since = "1.74.0")] - pub unsafe fn from_encoded_bytes_unchecked(bytes: Vec) -> Self { - OsString { inner: unsafe { Buf::from_encoded_bytes_unchecked(bytes) } } - } - - /// Converts to an [`OsStr`] slice. - /// - /// # Examples - /// - /// ``` - /// use std::ffi::{OsString, OsStr}; - /// - /// let os_string = OsString::from("foo"); - /// let os_str = OsStr::new("foo"); - /// assert_eq!(os_string.as_os_str(), os_str); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - #[must_use] - #[inline] - pub fn as_os_str(&self) -> &OsStr { - self - } - - /// Converts the `OsString` into a byte slice. To convert the byte slice back into an - /// `OsString`, use the [`OsStr::from_encoded_bytes_unchecked`] function. - /// - /// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8. - /// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit - /// ASCII. - /// - /// Note: As the encoding is unspecified, any sub-slice of bytes that is not valid UTF-8 should - /// be treated as opaque and only comparable within the same Rust version built for the same - /// target platform. For example, sending the bytes over the network or storing it in a file - /// will likely result in incompatible data. See [`OsString`] for more encoding details - /// and [`std::ffi`] for platform-specific, specified conversions. - /// - /// [`std::ffi`]: crate::ffi - #[inline] - #[stable(feature = "os_str_bytes", since = "1.74.0")] - pub fn into_encoded_bytes(self) -> Vec { - self.inner.into_encoded_bytes() - } - - /// Converts the `OsString` into a [`String`] if it contains valid Unicode data. - /// - /// On failure, ownership of the original `OsString` is returned. - /// - /// # Examples - /// - /// ``` - /// use std::ffi::OsString; - /// - /// let os_string = OsString::from("foo"); - /// let string = os_string.into_string(); - /// assert_eq!(string, Ok(String::from("foo"))); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - #[inline] - pub fn into_string(self) -> Result { - self.inner.into_string().map_err(|buf| OsString { inner: buf }) - } - - /// Extends the string with the given &[OsStr] slice. - /// - /// # Examples - /// - /// ``` - /// use std::ffi::OsString; - /// - /// let mut os_string = OsString::from("foo"); - /// os_string.push("bar"); - /// assert_eq!(&os_string, "foobar"); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - #[inline] - #[rustc_confusables("append", "put")] - pub fn push>(&mut self, s: T) { - self.inner.push_slice(&s.as_ref().inner) - } - - /// Creates a new `OsString` with at least the given capacity. - /// - /// The string will be able to hold at least `capacity` length units of other - /// OS strings without reallocating. This method is allowed to allocate for - /// more units than `capacity`. If `capacity` is 0, the string will not - /// allocate. - /// - /// See the main `OsString` documentation information about encoding and capacity units. - /// - /// # Examples - /// - /// ``` - /// use std::ffi::OsString; - /// - /// let mut os_string = OsString::with_capacity(10); - /// let capacity = os_string.capacity(); - /// - /// // This push is done without reallocating - /// os_string.push("foo"); - /// - /// assert_eq!(capacity, os_string.capacity()); - /// ``` - #[stable(feature = "osstring_simple_functions", since = "1.9.0")] - #[must_use] - #[inline] - pub fn with_capacity(capacity: usize) -> OsString { - OsString { inner: Buf::with_capacity(capacity) } - } - - /// Truncates the `OsString` to zero length. - /// - /// # Examples - /// - /// ``` - /// use std::ffi::OsString; - /// - /// let mut os_string = OsString::from("foo"); - /// assert_eq!(&os_string, "foo"); - /// - /// os_string.clear(); - /// assert_eq!(&os_string, ""); - /// ``` - #[stable(feature = "osstring_simple_functions", since = "1.9.0")] - #[inline] - pub fn clear(&mut self) { - self.inner.clear() - } - - /// Returns the capacity this `OsString` can hold without reallocating. - /// - /// See the main `OsString` documentation information about encoding and capacity units. - /// - /// # Examples - /// - /// ``` - /// use std::ffi::OsString; - /// - /// let os_string = OsString::with_capacity(10); - /// assert!(os_string.capacity() >= 10); - /// ``` - #[stable(feature = "osstring_simple_functions", since = "1.9.0")] - #[must_use] - #[inline] - pub fn capacity(&self) -> usize { - self.inner.capacity() - } - - /// Reserves capacity for at least `additional` more capacity to be inserted - /// in the given `OsString`. Does nothing if the capacity is - /// already sufficient. - /// - /// The collection may reserve more space to speculatively avoid frequent reallocations. - /// - /// See the main `OsString` documentation information about encoding and capacity units. - /// - /// # Examples - /// - /// ``` - /// use std::ffi::OsString; - /// - /// let mut s = OsString::new(); - /// s.reserve(10); - /// assert!(s.capacity() >= 10); - /// ``` - #[stable(feature = "osstring_simple_functions", since = "1.9.0")] - #[inline] - pub fn reserve(&mut self, additional: usize) { - self.inner.reserve(additional) - } - - /// Tries to reserve capacity for at least `additional` more length units - /// in the given `OsString`. The string may reserve more space to speculatively avoid - /// frequent reallocations. After calling `try_reserve`, capacity will be - /// greater than or equal to `self.len() + additional` if it returns `Ok(())`. - /// Does nothing if capacity is already sufficient. This method preserves - /// the contents even if an error occurs. - /// - /// See the main `OsString` documentation information about encoding and capacity units. - /// - /// # Errors - /// - /// If the capacity overflows, or the allocator reports a failure, then an error - /// is returned. - /// - /// # Examples - /// - /// ``` - /// use std::ffi::{OsStr, OsString}; - /// use std::collections::TryReserveError; - /// - /// fn process_data(data: &str) -> Result { - /// let mut s = OsString::new(); - /// - /// // Pre-reserve the memory, exiting if we can't - /// s.try_reserve(OsStr::new(data).len())?; - /// - /// // Now we know this can't OOM in the middle of our complex work - /// s.push(data); - /// - /// Ok(s) - /// } - /// # process_data("123").expect("why is the test harness OOMing on 3 bytes?"); - /// ``` - #[stable(feature = "try_reserve_2", since = "1.63.0")] - #[inline] - pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> { - self.inner.try_reserve(additional) - } - - /// Reserves the minimum capacity for at least `additional` more capacity to - /// be inserted in the given `OsString`. Does nothing if the capacity is - /// already sufficient. - /// - /// Note that the allocator may give the collection more space than it - /// requests. Therefore, capacity can not be relied upon to be precisely - /// minimal. Prefer [`reserve`] if future insertions are expected. - /// - /// [`reserve`]: OsString::reserve - /// - /// See the main `OsString` documentation information about encoding and capacity units. - /// - /// # Examples - /// - /// ``` - /// use std::ffi::OsString; - /// - /// let mut s = OsString::new(); - /// s.reserve_exact(10); - /// assert!(s.capacity() >= 10); - /// ``` - #[stable(feature = "osstring_simple_functions", since = "1.9.0")] - #[inline] - pub fn reserve_exact(&mut self, additional: usize) { - self.inner.reserve_exact(additional) - } - - /// Tries to reserve the minimum capacity for at least `additional` - /// more length units in the given `OsString`. After calling - /// `try_reserve_exact`, capacity will be greater than or equal to - /// `self.len() + additional` if it returns `Ok(())`. - /// Does nothing if the capacity is already sufficient. - /// - /// Note that the allocator may give the `OsString` more space than it - /// requests. Therefore, capacity can not be relied upon to be precisely - /// minimal. Prefer [`try_reserve`] if future insertions are expected. - /// - /// [`try_reserve`]: OsString::try_reserve - /// - /// See the main `OsString` documentation information about encoding and capacity units. - /// - /// # Errors - /// - /// If the capacity overflows, or the allocator reports a failure, then an error - /// is returned. - /// - /// # Examples - /// - /// ``` - /// use std::ffi::{OsStr, OsString}; - /// use std::collections::TryReserveError; - /// - /// fn process_data(data: &str) -> Result { - /// let mut s = OsString::new(); - /// - /// // Pre-reserve the memory, exiting if we can't - /// s.try_reserve_exact(OsStr::new(data).len())?; - /// - /// // Now we know this can't OOM in the middle of our complex work - /// s.push(data); - /// - /// Ok(s) - /// } - /// # process_data("123").expect("why is the test harness OOMing on 3 bytes?"); - /// ``` - #[stable(feature = "try_reserve_2", since = "1.63.0")] - #[inline] - pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> { - self.inner.try_reserve_exact(additional) - } - - /// Shrinks the capacity of the `OsString` to match its length. - /// - /// See the main `OsString` documentation information about encoding and capacity units. - /// - /// # Examples - /// - /// ``` - /// use std::ffi::OsString; - /// - /// let mut s = OsString::from("foo"); - /// - /// s.reserve(100); - /// assert!(s.capacity() >= 100); - /// - /// s.shrink_to_fit(); - /// assert_eq!(3, s.capacity()); - /// ``` - #[stable(feature = "osstring_shrink_to_fit", since = "1.19.0")] - #[inline] - pub fn shrink_to_fit(&mut self) { - self.inner.shrink_to_fit() - } - - /// Shrinks the capacity of the `OsString` with a lower bound. - /// - /// The capacity will remain at least as large as both the length - /// and the supplied value. - /// - /// If the current capacity is less than the lower limit, this is a no-op. - /// - /// See the main `OsString` documentation information about encoding and capacity units. - /// - /// # Examples - /// - /// ``` - /// use std::ffi::OsString; - /// - /// let mut s = OsString::from("foo"); - /// - /// s.reserve(100); - /// assert!(s.capacity() >= 100); - /// - /// s.shrink_to(10); - /// assert!(s.capacity() >= 10); - /// s.shrink_to(0); - /// assert!(s.capacity() >= 3); - /// ``` - #[inline] - #[stable(feature = "shrink_to", since = "1.56.0")] - pub fn shrink_to(&mut self, min_capacity: usize) { - self.inner.shrink_to(min_capacity) - } - - /// Converts this `OsString` into a boxed [`OsStr`]. - /// - /// # Examples - /// - /// ``` - /// use std::ffi::{OsString, OsStr}; - /// - /// let s = OsString::from("hello"); - /// - /// let b: Box = s.into_boxed_os_str(); - /// ``` - #[must_use = "`self` will be dropped if the result is not used"] - #[stable(feature = "into_boxed_os_str", since = "1.20.0")] - pub fn into_boxed_os_str(self) -> Box { - let rw = Box::into_raw(self.inner.into_box()) as *mut OsStr; - unsafe { Box::from_raw(rw) } - } - - /// Consumes and leaks the `OsString`, returning a mutable reference to the contents, - /// `&'a mut OsStr`. - /// - /// The caller has free choice over the returned lifetime, including 'static. - /// Indeed, this function is ideally used for data that lives for the remainder of - /// the program’s life, as dropping the returned reference will cause a memory leak. - /// - /// It does not reallocate or shrink the `OsString`, so the leaked allocation may include - /// unused capacity that is not part of the returned slice. If you want to discard excess - /// capacity, call [`into_boxed_os_str`], and then [`Box::leak`] instead. - /// However, keep in mind that trimming the capacity may result in a reallocation and copy. - /// - /// [`into_boxed_os_str`]: Self::into_boxed_os_str - #[unstable(feature = "os_string_pathbuf_leak", issue = "125965")] - #[inline] - pub fn leak<'a>(self) -> &'a mut OsStr { - OsStr::from_inner_mut(self.inner.leak()) - } - - /// Provides plumbing to core `Vec::truncate`. - /// More well behaving alternative to allowing outer types - /// full mutable access to the core `Vec`. - #[inline] - pub(crate) fn truncate(&mut self, len: usize) { - self.inner.truncate(len); - } - - /// Provides plumbing to core `Vec::extend_from_slice`. - /// More well behaving alternative to allowing outer types - /// full mutable access to the core `Vec`. - #[inline] - pub(crate) fn extend_from_slice(&mut self, other: &[u8]) { - self.inner.extend_from_slice(other); - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl From for OsString { - /// Converts a [`String`] into an [`OsString`]. - /// - /// This conversion does not allocate or copy memory. - #[inline] - fn from(s: String) -> OsString { - OsString { inner: Buf::from_string(s) } - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl> From<&T> for OsString { - /// Copies any value implementing [AsRef]<[OsStr]> - /// into a newly allocated [`OsString`]. - fn from(s: &T) -> OsString { - s.as_ref().to_os_string() - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl ops::Index for OsString { - type Output = OsStr; - - #[inline] - fn index(&self, _index: ops::RangeFull) -> &OsStr { - OsStr::from_inner(self.inner.as_slice()) - } -} - -#[stable(feature = "mut_osstr", since = "1.44.0")] -impl ops::IndexMut for OsString { - #[inline] - fn index_mut(&mut self, _index: ops::RangeFull) -> &mut OsStr { - OsStr::from_inner_mut(self.inner.as_mut_slice()) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl ops::Deref for OsString { - type Target = OsStr; - - #[inline] - fn deref(&self) -> &OsStr { - &self[..] - } -} - -#[stable(feature = "mut_osstr", since = "1.44.0")] -impl ops::DerefMut for OsString { - #[inline] - fn deref_mut(&mut self) -> &mut OsStr { - &mut self[..] - } -} - -#[stable(feature = "osstring_default", since = "1.9.0")] -impl Default for OsString { - /// Constructs an empty `OsString`. - #[inline] - fn default() -> OsString { - OsString::new() - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl Clone for OsString { - #[inline] - fn clone(&self) -> Self { - OsString { inner: self.inner.clone() } - } - - /// Clones the contents of `source` into `self`. - /// - /// This method is preferred over simply assigning `source.clone()` to `self`, - /// as it avoids reallocation if possible. - #[inline] - fn clone_from(&mut self, source: &Self) { - self.inner.clone_from(&source.inner) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl fmt::Debug for OsString { - fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Debug::fmt(&**self, formatter) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl PartialEq for OsString { - #[inline] - fn eq(&self, other: &OsString) -> bool { - &**self == &**other - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl PartialEq for OsString { - #[inline] - fn eq(&self, other: &str) -> bool { - &**self == other - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl PartialEq for str { - #[inline] - fn eq(&self, other: &OsString) -> bool { - &**other == self - } -} - -#[stable(feature = "os_str_str_ref_eq", since = "1.29.0")] -impl PartialEq<&str> for OsString { - #[inline] - fn eq(&self, other: &&str) -> bool { - **self == **other - } -} - -#[stable(feature = "os_str_str_ref_eq", since = "1.29.0")] -impl<'a> PartialEq for &'a str { - #[inline] - fn eq(&self, other: &OsString) -> bool { - **other == **self - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl Eq for OsString {} - -#[stable(feature = "rust1", since = "1.0.0")] -impl PartialOrd for OsString { - #[inline] - fn partial_cmp(&self, other: &OsString) -> Option { - (&**self).partial_cmp(&**other) - } - #[inline] - fn lt(&self, other: &OsString) -> bool { - &**self < &**other - } - #[inline] - fn le(&self, other: &OsString) -> bool { - &**self <= &**other - } - #[inline] - fn gt(&self, other: &OsString) -> bool { - &**self > &**other - } - #[inline] - fn ge(&self, other: &OsString) -> bool { - &**self >= &**other - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl PartialOrd for OsString { - #[inline] - fn partial_cmp(&self, other: &str) -> Option { - (&**self).partial_cmp(other) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl Ord for OsString { - #[inline] - fn cmp(&self, other: &OsString) -> cmp::Ordering { - (&**self).cmp(&**other) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl Hash for OsString { - #[inline] - fn hash(&self, state: &mut H) { - (&**self).hash(state) - } -} - -#[stable(feature = "os_string_fmt_write", since = "1.64.0")] -impl fmt::Write for OsString { - fn write_str(&mut self, s: &str) -> fmt::Result { - self.push(s); - Ok(()) - } -} - -impl OsStr { - /// Coerces into an `OsStr` slice. - /// - /// # Examples - /// - /// ``` - /// use std::ffi::OsStr; - /// - /// let os_str = OsStr::new("foo"); - /// ``` - #[inline] - #[stable(feature = "rust1", since = "1.0.0")] - pub fn new + ?Sized>(s: &S) -> &OsStr { - s.as_ref() - } - - /// Converts a slice of bytes to an OS string slice without checking that the string contains - /// valid `OsStr`-encoded data. - /// - /// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8. - /// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit - /// ASCII. - /// - /// See the [module's toplevel documentation about conversions][conversions] for safe, - /// cross-platform [conversions] from/to native representations. - /// - /// # Safety - /// - /// As the encoding is unspecified, callers must pass in bytes that originated as a mixture of - /// validated UTF-8 and bytes from [`OsStr::as_encoded_bytes`] from within the same Rust version - /// built for the same target platform. For example, reconstructing an `OsStr` from bytes sent - /// over the network or stored in a file will likely violate these safety rules. - /// - /// Due to the encoding being self-synchronizing, the bytes from [`OsStr::as_encoded_bytes`] can be - /// split either immediately before or immediately after any valid non-empty UTF-8 substring. - /// - /// # Example - /// - /// ``` - /// use std::ffi::OsStr; - /// - /// let os_str = OsStr::new("Mary had a little lamb"); - /// let bytes = os_str.as_encoded_bytes(); - /// let words = bytes.split(|b| *b == b' '); - /// let words: Vec<&OsStr> = words.map(|word| { - /// // SAFETY: - /// // - Each `word` only contains content that originated from `OsStr::as_encoded_bytes` - /// // - Only split with ASCII whitespace which is a non-empty UTF-8 substring - /// unsafe { OsStr::from_encoded_bytes_unchecked(word) } - /// }).collect(); - /// ``` - /// - /// [conversions]: super#conversions - #[inline] - #[stable(feature = "os_str_bytes", since = "1.74.0")] - pub unsafe fn from_encoded_bytes_unchecked(bytes: &[u8]) -> &Self { - Self::from_inner(unsafe { Slice::from_encoded_bytes_unchecked(bytes) }) - } - - #[inline] - fn from_inner(inner: &Slice) -> &OsStr { - // SAFETY: OsStr is just a wrapper of Slice, - // therefore converting &Slice to &OsStr is safe. - unsafe { &*(inner as *const Slice as *const OsStr) } - } - - #[inline] - fn from_inner_mut(inner: &mut Slice) -> &mut OsStr { - // SAFETY: OsStr is just a wrapper of Slice, - // therefore converting &mut Slice to &mut OsStr is safe. - // Any method that mutates OsStr must be careful not to - // break platform-specific encoding, in particular Wtf8 on Windows. - unsafe { &mut *(inner as *mut Slice as *mut OsStr) } - } - - /// Yields a &[str] slice if the `OsStr` is valid Unicode. - /// - /// This conversion may entail doing a check for UTF-8 validity. - /// - /// # Examples - /// - /// ``` - /// use std::ffi::OsStr; - /// - /// let os_str = OsStr::new("foo"); - /// assert_eq!(os_str.to_str(), Some("foo")); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - #[must_use = "this returns the result of the operation, \ - without modifying the original"] - #[inline] - pub fn to_str(&self) -> Option<&str> { - self.inner.to_str().ok() - } - - /// Converts an `OsStr` to a [Cow]<[str]>. - /// - /// Any non-Unicode sequences are replaced with - /// [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD]. - /// - /// [U+FFFD]: crate::char::REPLACEMENT_CHARACTER - /// - /// # Examples - /// - /// Calling `to_string_lossy` on an `OsStr` with invalid unicode: - /// - /// ``` - /// // Note, due to differences in how Unix and Windows represent strings, - /// // we are forced to complicate this example, setting up example `OsStr`s - /// // with different source data and via different platform extensions. - /// // Understand that in reality you could end up with such example invalid - /// // sequences simply through collecting user command line arguments, for - /// // example. - /// - /// #[cfg(unix)] { - /// use std::ffi::OsStr; - /// use std::os::unix::ffi::OsStrExt; - /// - /// // Here, the values 0x66 and 0x6f correspond to 'f' and 'o' - /// // respectively. The value 0x80 is a lone continuation byte, invalid - /// // in a UTF-8 sequence. - /// let source = [0x66, 0x6f, 0x80, 0x6f]; - /// let os_str = OsStr::from_bytes(&source[..]); - /// - /// assert_eq!(os_str.to_string_lossy(), "fo�o"); - /// } - /// #[cfg(windows)] { - /// use std::ffi::OsString; - /// use std::os::windows::prelude::*; - /// - /// // Here the values 0x0066 and 0x006f correspond to 'f' and 'o' - /// // respectively. The value 0xD800 is a lone surrogate half, invalid - /// // in a UTF-16 sequence. - /// let source = [0x0066, 0x006f, 0xD800, 0x006f]; - /// let os_string = OsString::from_wide(&source[..]); - /// let os_str = os_string.as_os_str(); - /// - /// assert_eq!(os_str.to_string_lossy(), "fo�o"); - /// } - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - #[must_use = "this returns the result of the operation, \ - without modifying the original"] - #[inline] - pub fn to_string_lossy(&self) -> Cow<'_, str> { - self.inner.to_string_lossy() - } - - /// Copies the slice into an owned [`OsString`]. - /// - /// # Examples - /// - /// ``` - /// use std::ffi::{OsStr, OsString}; - /// - /// let os_str = OsStr::new("foo"); - /// let os_string = os_str.to_os_string(); - /// assert_eq!(os_string, OsString::from("foo")); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - #[must_use = "this returns the result of the operation, \ - without modifying the original"] - #[inline] - pub fn to_os_string(&self) -> OsString { - OsString { inner: self.inner.to_owned() } - } - - /// Checks whether the `OsStr` is empty. - /// - /// # Examples - /// - /// ``` - /// use std::ffi::OsStr; - /// - /// let os_str = OsStr::new(""); - /// assert!(os_str.is_empty()); - /// - /// let os_str = OsStr::new("foo"); - /// assert!(!os_str.is_empty()); - /// ``` - #[stable(feature = "osstring_simple_functions", since = "1.9.0")] - #[must_use] - #[inline] - pub fn is_empty(&self) -> bool { - self.inner.inner.is_empty() - } - - /// Returns the length of this `OsStr`. - /// - /// Note that this does **not** return the number of bytes in the string in - /// OS string form. - /// - /// The length returned is that of the underlying storage used by `OsStr`. - /// As discussed in the [`OsString`] introduction, [`OsString`] and `OsStr` - /// store strings in a form best suited for cheap inter-conversion between - /// native-platform and Rust string forms, which may differ significantly - /// from both of them, including in storage size and encoding. - /// - /// This number is simply useful for passing to other methods, like - /// [`OsString::with_capacity`] to avoid reallocations. - /// - /// See the main `OsString` documentation information about encoding and capacity units. - /// - /// # Examples - /// - /// ``` - /// use std::ffi::OsStr; - /// - /// let os_str = OsStr::new(""); - /// assert_eq!(os_str.len(), 0); - /// - /// let os_str = OsStr::new("foo"); - /// assert_eq!(os_str.len(), 3); - /// ``` - #[stable(feature = "osstring_simple_functions", since = "1.9.0")] - #[must_use] - #[inline] - pub fn len(&self) -> usize { - self.inner.inner.len() - } - - /// Converts a [Box]<[OsStr]> into an [`OsString`] without copying or allocating. - #[stable(feature = "into_boxed_os_str", since = "1.20.0")] - #[must_use = "`self` will be dropped if the result is not used"] - pub fn into_os_string(self: Box) -> OsString { - let boxed = unsafe { Box::from_raw(Box::into_raw(self) as *mut Slice) }; - OsString { inner: Buf::from_box(boxed) } - } - - /// Converts an OS string slice to a byte slice. To convert the byte slice back into an OS - /// string slice, use the [`OsStr::from_encoded_bytes_unchecked`] function. - /// - /// The byte encoding is an unspecified, platform-specific, self-synchronizing superset of UTF-8. - /// By being a self-synchronizing superset of UTF-8, this encoding is also a superset of 7-bit - /// ASCII. - /// - /// Note: As the encoding is unspecified, any sub-slice of bytes that is not valid UTF-8 should - /// be treated as opaque and only comparable within the same Rust version built for the same - /// target platform. For example, sending the slice over the network or storing it in a file - /// will likely result in incompatible byte slices. See [`OsString`] for more encoding details - /// and [`std::ffi`] for platform-specific, specified conversions. - /// - /// [`std::ffi`]: crate::ffi - #[inline] - #[stable(feature = "os_str_bytes", since = "1.74.0")] - pub fn as_encoded_bytes(&self) -> &[u8] { - self.inner.as_encoded_bytes() - } - - /// Takes a substring based on a range that corresponds to the return value of - /// [`OsStr::as_encoded_bytes`]. - /// - /// The range's start and end must lie on valid `OsStr` boundaries. - /// A valid `OsStr` boundary is one of: - /// - The start of the string - /// - The end of the string - /// - Immediately before a valid non-empty UTF-8 substring - /// - Immediately after a valid non-empty UTF-8 substring - /// - /// # Panics - /// - /// Panics if `range` does not lie on valid `OsStr` boundaries or if it - /// exceeds the end of the string. - /// - /// # Example - /// - /// ``` - /// #![feature(os_str_slice)] - /// - /// use std::ffi::OsStr; - /// - /// let os_str = OsStr::new("foo=bar"); - /// let bytes = os_str.as_encoded_bytes(); - /// if let Some(index) = bytes.iter().position(|b| *b == b'=') { - /// let key = os_str.slice_encoded_bytes(..index); - /// let value = os_str.slice_encoded_bytes(index + 1..); - /// assert_eq!(key, "foo"); - /// assert_eq!(value, "bar"); - /// } - /// ``` - #[unstable(feature = "os_str_slice", issue = "118485")] - pub fn slice_encoded_bytes>(&self, range: R) -> &Self { - let encoded_bytes = self.as_encoded_bytes(); - let Range { start, end } = slice::range(range, ..encoded_bytes.len()); - - // `check_public_boundary` should panic if the index does not lie on an - // `OsStr` boundary as described above. It's possible to do this in an - // encoding-agnostic way, but details of the internal encoding might - // permit a more efficient implementation. - self.inner.check_public_boundary(start); - self.inner.check_public_boundary(end); - - // SAFETY: `slice::range` ensures that `start` and `end` are valid - let slice = unsafe { encoded_bytes.get_unchecked(start..end) }; - - // SAFETY: `slice` comes from `self` and we validated the boundaries - unsafe { Self::from_encoded_bytes_unchecked(slice) } - } - - /// Converts this string to its ASCII lower case equivalent in-place. - /// - /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', - /// but non-ASCII letters are unchanged. - /// - /// To return a new lowercased value without modifying the existing one, use - /// [`OsStr::to_ascii_lowercase`]. - /// - /// # Examples - /// - /// ``` - /// use std::ffi::OsString; - /// - /// let mut s = OsString::from("GRÜßE, JÜRGEN ❤"); - /// - /// s.make_ascii_lowercase(); - /// - /// assert_eq!("grÜße, jÜrgen ❤", s); - /// ``` - #[stable(feature = "osstring_ascii", since = "1.53.0")] - #[inline] - pub fn make_ascii_lowercase(&mut self) { - self.inner.make_ascii_lowercase() - } - - /// Converts this string to its ASCII upper case equivalent in-place. - /// - /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', - /// but non-ASCII letters are unchanged. - /// - /// To return a new uppercased value without modifying the existing one, use - /// [`OsStr::to_ascii_uppercase`]. - /// - /// # Examples - /// - /// ``` - /// use std::ffi::OsString; - /// - /// let mut s = OsString::from("Grüße, Jürgen ❤"); - /// - /// s.make_ascii_uppercase(); - /// - /// assert_eq!("GRüßE, JüRGEN ❤", s); - /// ``` - #[stable(feature = "osstring_ascii", since = "1.53.0")] - #[inline] - pub fn make_ascii_uppercase(&mut self) { - self.inner.make_ascii_uppercase() - } - - /// Returns a copy of this string where each character is mapped to its - /// ASCII lower case equivalent. - /// - /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', - /// but non-ASCII letters are unchanged. - /// - /// To lowercase the value in-place, use [`OsStr::make_ascii_lowercase`]. - /// - /// # Examples - /// - /// ``` - /// use std::ffi::OsString; - /// let s = OsString::from("Grüße, Jürgen ❤"); - /// - /// assert_eq!("grüße, jürgen ❤", s.to_ascii_lowercase()); - /// ``` - #[must_use = "to lowercase the value in-place, use `make_ascii_lowercase`"] - #[stable(feature = "osstring_ascii", since = "1.53.0")] - pub fn to_ascii_lowercase(&self) -> OsString { - OsString::from_inner(self.inner.to_ascii_lowercase()) - } - - /// Returns a copy of this string where each character is mapped to its - /// ASCII upper case equivalent. - /// - /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', - /// but non-ASCII letters are unchanged. - /// - /// To uppercase the value in-place, use [`OsStr::make_ascii_uppercase`]. - /// - /// # Examples - /// - /// ``` - /// use std::ffi::OsString; - /// let s = OsString::from("Grüße, Jürgen ❤"); - /// - /// assert_eq!("GRüßE, JüRGEN ❤", s.to_ascii_uppercase()); - /// ``` - #[must_use = "to uppercase the value in-place, use `make_ascii_uppercase`"] - #[stable(feature = "osstring_ascii", since = "1.53.0")] - pub fn to_ascii_uppercase(&self) -> OsString { - OsString::from_inner(self.inner.to_ascii_uppercase()) - } - - /// Checks if all characters in this string are within the ASCII range. - /// - /// # Examples - /// - /// ``` - /// use std::ffi::OsString; - /// - /// let ascii = OsString::from("hello!\n"); - /// let non_ascii = OsString::from("Grüße, Jürgen ❤"); - /// - /// assert!(ascii.is_ascii()); - /// assert!(!non_ascii.is_ascii()); - /// ``` - #[stable(feature = "osstring_ascii", since = "1.53.0")] - #[must_use] - #[inline] - pub fn is_ascii(&self) -> bool { - self.inner.is_ascii() - } - - /// Checks that two strings are an ASCII case-insensitive match. - /// - /// Same as `to_ascii_lowercase(a) == to_ascii_lowercase(b)`, - /// but without allocating and copying temporaries. - /// - /// # Examples - /// - /// ``` - /// use std::ffi::OsString; - /// - /// assert!(OsString::from("Ferris").eq_ignore_ascii_case("FERRIS")); - /// assert!(OsString::from("Ferrös").eq_ignore_ascii_case("FERRöS")); - /// assert!(!OsString::from("Ferrös").eq_ignore_ascii_case("FERRÖS")); - /// ``` - #[stable(feature = "osstring_ascii", since = "1.53.0")] - pub fn eq_ignore_ascii_case>(&self, other: S) -> bool { - self.inner.eq_ignore_ascii_case(&other.as_ref().inner) - } - - /// Returns an object that implements [`Display`] for safely printing an - /// [`OsStr`] that may contain non-Unicode data. This may perform lossy - /// conversion, depending on the platform. If you would like an - /// implementation which escapes the [`OsStr`] please use [`Debug`] - /// instead. - /// - /// [`Display`]: fmt::Display - /// [`Debug`]: fmt::Debug - /// - /// # Examples - /// - /// ``` - /// #![feature(os_str_display)] - /// use std::ffi::OsStr; - /// - /// let s = OsStr::new("Hello, world!"); - /// println!("{}", s.display()); - /// ``` - #[unstable(feature = "os_str_display", issue = "120048")] - #[must_use = "this does not display the `OsStr`; \ - it returns an object that can be displayed"] - #[inline] - pub fn display(&self) -> Display<'_> { - Display { os_str: self } - } -} - -#[stable(feature = "box_from_os_str", since = "1.17.0")] -impl From<&OsStr> for Box { - /// Copies the string into a newly allocated [Box]<[OsStr]>. - #[inline] - fn from(s: &OsStr) -> Box { - let rw = Box::into_raw(s.inner.into_box()) as *mut OsStr; - unsafe { Box::from_raw(rw) } - } -} - -#[stable(feature = "box_from_cow", since = "1.45.0")] -impl From> for Box { - /// Converts a `Cow<'a, OsStr>` into a [Box]<[OsStr]>, - /// by copying the contents if they are borrowed. - #[inline] - fn from(cow: Cow<'_, OsStr>) -> Box { - match cow { - Cow::Borrowed(s) => Box::from(s), - Cow::Owned(s) => Box::from(s), - } - } -} - -#[stable(feature = "os_string_from_box", since = "1.18.0")] -impl From> for OsString { - /// Converts a [Box]<[OsStr]> into an [`OsString`] without copying or - /// allocating. - #[inline] - fn from(boxed: Box) -> OsString { - boxed.into_os_string() - } -} - -#[stable(feature = "box_from_os_string", since = "1.20.0")] -impl From for Box { - /// Converts an [`OsString`] into a [Box]<[OsStr]> without copying or allocating. - #[inline] - fn from(s: OsString) -> Box { - s.into_boxed_os_str() - } -} - -#[stable(feature = "more_box_slice_clone", since = "1.29.0")] -impl Clone for Box { - #[inline] - fn clone(&self) -> Self { - self.to_os_string().into_boxed_os_str() - } -} - -#[unstable(feature = "clone_to_uninit", issue = "126799")] -unsafe impl CloneToUninit for OsStr { - #[inline] - #[cfg_attr(debug_assertions, track_caller)] - unsafe fn clone_to_uninit(&self, dst: *mut Self) { - // SAFETY: we're just a wrapper around a platform-specific Slice - unsafe { self.inner.clone_to_uninit(addr_of_mut!((*dst).inner)) } - } -} - -#[stable(feature = "shared_from_slice2", since = "1.24.0")] -impl From for Arc { - /// Converts an [`OsString`] into an [Arc]<[OsStr]> by moving the [`OsString`] - /// data into a new [`Arc`] buffer. - #[inline] - fn from(s: OsString) -> Arc { - let arc = s.inner.into_arc(); - unsafe { Arc::from_raw(Arc::into_raw(arc) as *const OsStr) } - } -} - -#[stable(feature = "shared_from_slice2", since = "1.24.0")] -impl From<&OsStr> for Arc { - /// Copies the string into a newly allocated [Arc]<[OsStr]>. - #[inline] - fn from(s: &OsStr) -> Arc { - let arc = s.inner.into_arc(); - unsafe { Arc::from_raw(Arc::into_raw(arc) as *const OsStr) } - } -} - -#[stable(feature = "shared_from_slice2", since = "1.24.0")] -impl From for Rc { - /// Converts an [`OsString`] into an [Rc]<[OsStr]> by moving the [`OsString`] - /// data into a new [`Rc`] buffer. - #[inline] - fn from(s: OsString) -> Rc { - let rc = s.inner.into_rc(); - unsafe { Rc::from_raw(Rc::into_raw(rc) as *const OsStr) } - } -} - -#[stable(feature = "shared_from_slice2", since = "1.24.0")] -impl From<&OsStr> for Rc { - /// Copies the string into a newly allocated [Rc]<[OsStr]>. - #[inline] - fn from(s: &OsStr) -> Rc { - let rc = s.inner.into_rc(); - unsafe { Rc::from_raw(Rc::into_raw(rc) as *const OsStr) } - } -} - -#[stable(feature = "cow_from_osstr", since = "1.28.0")] -impl<'a> From for Cow<'a, OsStr> { - /// Moves the string into a [`Cow::Owned`]. - #[inline] - fn from(s: OsString) -> Cow<'a, OsStr> { - Cow::Owned(s) - } -} - -#[stable(feature = "cow_from_osstr", since = "1.28.0")] -impl<'a> From<&'a OsStr> for Cow<'a, OsStr> { - /// Converts the string reference into a [`Cow::Borrowed`]. - #[inline] - fn from(s: &'a OsStr) -> Cow<'a, OsStr> { - Cow::Borrowed(s) - } -} - -#[stable(feature = "cow_from_osstr", since = "1.28.0")] -impl<'a> From<&'a OsString> for Cow<'a, OsStr> { - /// Converts the string reference into a [`Cow::Borrowed`]. - #[inline] - fn from(s: &'a OsString) -> Cow<'a, OsStr> { - Cow::Borrowed(s.as_os_str()) - } -} - -#[stable(feature = "osstring_from_cow_osstr", since = "1.28.0")] -impl<'a> From> for OsString { - /// Converts a `Cow<'a, OsStr>` into an [`OsString`], - /// by copying the contents if they are borrowed. - #[inline] - fn from(s: Cow<'a, OsStr>) -> Self { - s.into_owned() - } -} - -#[stable(feature = "str_tryfrom_osstr_impl", since = "1.72.0")] -impl<'a> TryFrom<&'a OsStr> for &'a str { - type Error = crate::str::Utf8Error; - - /// Tries to convert an `&OsStr` to a `&str`. - /// - /// ``` - /// use std::ffi::OsStr; - /// - /// let os_str = OsStr::new("foo"); - /// let as_str = <&str>::try_from(os_str).unwrap(); - /// assert_eq!(as_str, "foo"); - /// ``` - fn try_from(value: &'a OsStr) -> Result { - value.inner.to_str() - } -} - -#[stable(feature = "box_default_extra", since = "1.17.0")] -impl Default for Box { - #[inline] - fn default() -> Box { - let rw = Box::into_raw(Slice::empty_box()) as *mut OsStr; - unsafe { Box::from_raw(rw) } - } -} - -#[stable(feature = "osstring_default", since = "1.9.0")] -impl Default for &OsStr { - /// Creates an empty `OsStr`. - #[inline] - fn default() -> Self { - OsStr::new("") - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl PartialEq for OsStr { - #[inline] - fn eq(&self, other: &OsStr) -> bool { - self.as_encoded_bytes().eq(other.as_encoded_bytes()) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl PartialEq for OsStr { - #[inline] - fn eq(&self, other: &str) -> bool { - *self == *OsStr::new(other) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl PartialEq for str { - #[inline] - fn eq(&self, other: &OsStr) -> bool { - *other == *OsStr::new(self) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl Eq for OsStr {} - -#[stable(feature = "rust1", since = "1.0.0")] -impl PartialOrd for OsStr { - #[inline] - fn partial_cmp(&self, other: &OsStr) -> Option { - self.as_encoded_bytes().partial_cmp(other.as_encoded_bytes()) - } - #[inline] - fn lt(&self, other: &OsStr) -> bool { - self.as_encoded_bytes().lt(other.as_encoded_bytes()) - } - #[inline] - fn le(&self, other: &OsStr) -> bool { - self.as_encoded_bytes().le(other.as_encoded_bytes()) - } - #[inline] - fn gt(&self, other: &OsStr) -> bool { - self.as_encoded_bytes().gt(other.as_encoded_bytes()) - } - #[inline] - fn ge(&self, other: &OsStr) -> bool { - self.as_encoded_bytes().ge(other.as_encoded_bytes()) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl PartialOrd for OsStr { - #[inline] - fn partial_cmp(&self, other: &str) -> Option { - self.partial_cmp(OsStr::new(other)) - } -} - -// FIXME (#19470): cannot provide PartialOrd for str until we -// have more flexible coherence rules. - -#[stable(feature = "rust1", since = "1.0.0")] -impl Ord for OsStr { - #[inline] - fn cmp(&self, other: &OsStr) -> cmp::Ordering { - self.as_encoded_bytes().cmp(other.as_encoded_bytes()) - } -} - -macro_rules! impl_cmp { - ($lhs:ty, $rhs: ty) => { - #[stable(feature = "cmp_os_str", since = "1.8.0")] - impl<'a, 'b> PartialEq<$rhs> for $lhs { - #[inline] - fn eq(&self, other: &$rhs) -> bool { - ::eq(self, other) - } - } - - #[stable(feature = "cmp_os_str", since = "1.8.0")] - impl<'a, 'b> PartialEq<$lhs> for $rhs { - #[inline] - fn eq(&self, other: &$lhs) -> bool { - ::eq(self, other) - } - } - - #[stable(feature = "cmp_os_str", since = "1.8.0")] - impl<'a, 'b> PartialOrd<$rhs> for $lhs { - #[inline] - fn partial_cmp(&self, other: &$rhs) -> Option { - ::partial_cmp(self, other) - } - } - - #[stable(feature = "cmp_os_str", since = "1.8.0")] - impl<'a, 'b> PartialOrd<$lhs> for $rhs { - #[inline] - fn partial_cmp(&self, other: &$lhs) -> Option { - ::partial_cmp(self, other) - } - } - }; -} - -impl_cmp!(OsString, OsStr); -impl_cmp!(OsString, &'a OsStr); -impl_cmp!(Cow<'a, OsStr>, OsStr); -impl_cmp!(Cow<'a, OsStr>, &'b OsStr); -impl_cmp!(Cow<'a, OsStr>, OsString); - -#[stable(feature = "rust1", since = "1.0.0")] -impl Hash for OsStr { - #[inline] - fn hash(&self, state: &mut H) { - self.as_encoded_bytes().hash(state) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl fmt::Debug for OsStr { - fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Debug::fmt(&self.inner, formatter) - } -} - -/// Helper struct for safely printing an [`OsStr`] with [`format!`] and `{}`. -/// -/// An [`OsStr`] might contain non-Unicode data. This `struct` implements the -/// [`Display`] trait in a way that mitigates that. It is created by the -/// [`display`](OsStr::display) method on [`OsStr`]. This may perform lossy -/// conversion, depending on the platform. If you would like an implementation -/// which escapes the [`OsStr`] please use [`Debug`] instead. -/// -/// # Examples -/// -/// ``` -/// #![feature(os_str_display)] -/// use std::ffi::OsStr; -/// -/// let s = OsStr::new("Hello, world!"); -/// println!("{}", s.display()); -/// ``` -/// -/// [`Display`]: fmt::Display -/// [`format!`]: crate::format -#[unstable(feature = "os_str_display", issue = "120048")] -pub struct Display<'a> { - os_str: &'a OsStr, -} - -#[unstable(feature = "os_str_display", issue = "120048")] -impl fmt::Debug for Display<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Debug::fmt(&self.os_str, f) - } -} - -#[unstable(feature = "os_str_display", issue = "120048")] -impl fmt::Display for Display<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(&self.os_str.inner, f) - } -} - -#[unstable(feature = "slice_concat_ext", issue = "27747")] -impl> alloc::slice::Join<&OsStr> for [S] { - type Output = OsString; - - fn join(slice: &Self, sep: &OsStr) -> OsString { - let Some((first, suffix)) = slice.split_first() else { - return OsString::new(); - }; - let first_owned = first.borrow().to_owned(); - suffix.iter().fold(first_owned, |mut a, b| { - a.push(sep); - a.push(b.borrow()); - a - }) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl Borrow for OsString { - #[inline] - fn borrow(&self) -> &OsStr { - &self[..] - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl ToOwned for OsStr { - type Owned = OsString; - #[inline] - fn to_owned(&self) -> OsString { - self.to_os_string() - } - #[inline] - fn clone_into(&self, target: &mut OsString) { - self.inner.clone_into(&mut target.inner) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl AsRef for OsStr { - #[inline] - fn as_ref(&self) -> &OsStr { - self - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl AsRef for OsString { - #[inline] - fn as_ref(&self) -> &OsStr { - self - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl AsRef for str { - #[inline] - fn as_ref(&self) -> &OsStr { - OsStr::from_inner(Slice::from_str(self)) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl AsRef for String { - #[inline] - fn as_ref(&self) -> &OsStr { - (&**self).as_ref() - } -} - -impl FromInner for OsString { - #[inline] - fn from_inner(buf: Buf) -> OsString { - OsString { inner: buf } - } -} - -impl IntoInner for OsString { - #[inline] - fn into_inner(self) -> Buf { - self.inner - } -} - -impl AsInner for OsStr { - #[inline] - fn as_inner(&self) -> &Slice { - &self.inner - } -} - -#[stable(feature = "osstring_from_str", since = "1.45.0")] -impl FromStr for OsString { - type Err = core::convert::Infallible; - - #[inline] - fn from_str(s: &str) -> Result { - Ok(OsString::from(s)) - } -} - -#[stable(feature = "osstring_extend", since = "1.52.0")] -impl Extend for OsString { - #[inline] - fn extend>(&mut self, iter: T) { - for s in iter { - self.push(&s); - } - } -} - -#[stable(feature = "osstring_extend", since = "1.52.0")] -impl<'a> Extend<&'a OsStr> for OsString { - #[inline] - fn extend>(&mut self, iter: T) { - for s in iter { - self.push(s); - } - } -} - -#[stable(feature = "osstring_extend", since = "1.52.0")] -impl<'a> Extend> for OsString { - #[inline] - fn extend>>(&mut self, iter: T) { - for s in iter { - self.push(&s); - } - } -} - -#[stable(feature = "osstring_extend", since = "1.52.0")] -impl FromIterator for OsString { - #[inline] - fn from_iter>(iter: I) -> Self { - let mut iterator = iter.into_iter(); - - // Because we're iterating over `OsString`s, we can avoid at least - // one allocation by getting the first string from the iterator - // and appending to it all the subsequent strings. - match iterator.next() { - None => OsString::new(), - Some(mut buf) => { - buf.extend(iterator); - buf - } - } - } -} - -#[stable(feature = "osstring_extend", since = "1.52.0")] -impl<'a> FromIterator<&'a OsStr> for OsString { - #[inline] - fn from_iter>(iter: I) -> Self { - let mut buf = Self::new(); - for s in iter { - buf.push(s); - } - buf - } -} - -#[stable(feature = "osstring_extend", since = "1.52.0")] -impl<'a> FromIterator> for OsString { - #[inline] - fn from_iter>>(iter: I) -> Self { - let mut iterator = iter.into_iter(); - - // Because we're iterating over `OsString`s, we can avoid at least - // one allocation by getting the first owned string from the iterator - // and appending to it all the subsequent strings. - match iterator.next() { - None => OsString::new(), - Some(Cow::Owned(mut buf)) => { - buf.extend(iterator); - buf - } - Some(Cow::Borrowed(buf)) => { - let mut buf = OsString::from(buf); - buf.extend(iterator); - buf - } - } - } -} +pub use alloc::ffi::os_str::OsString; +pub use core::ffi::os_str::{Display, OsStr}; diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs index 606d75668c4d5..08658e09a515f 100644 --- a/library/std/src/lib.rs +++ b/library/std/src/lib.rs @@ -307,6 +307,10 @@ #![feature(never_type)] #![feature(no_sanitize)] #![feature(optimize_attribute)] +#![feature(os_str_display)] +#![feature(os_str_internals)] +#![feature(os_string_pathbuf_leak)] +#![feature(path_internals)] #![feature(prelude_import)] #![feature(rustc_attrs)] #![feature(rustdoc_internals)] diff --git a/library/std/src/os/unix/ffi/os_str.rs b/library/std/src/os/unix/ffi/os_str.rs index 650f712bc6eef..68974e8874d2f 100644 --- a/library/std/src/os/unix/ffi/os_str.rs +++ b/library/std/src/os/unix/ffi/os_str.rs @@ -1,70 +1,7 @@ -use crate::ffi::{OsStr, OsString}; -use crate::mem; -use crate::sealed::Sealed; -use crate::sys::os_str::Buf; -use crate::sys_common::{AsInner, FromInner, IntoInner}; - // Note: this file is currently reused in other `std::os::{platform}::ffi` modules to reduce duplication. // Keep this in mind when applying changes to this file that only apply to `unix`. -/// Platform-specific extensions to [`OsString`]. -/// -/// This trait is sealed: it cannot be implemented outside the standard library. -/// This is so that future additional methods are not breaking changes. -#[stable(feature = "rust1", since = "1.0.0")] -pub trait OsStringExt: Sealed { - /// Creates an [`OsString`] from a byte vector. - /// - /// See the module documentation for an example. - #[stable(feature = "rust1", since = "1.0.0")] - fn from_vec(vec: Vec) -> Self; - - /// Yields the underlying byte vector of this [`OsString`]. - /// - /// See the module documentation for an example. - #[stable(feature = "rust1", since = "1.0.0")] - fn into_vec(self) -> Vec; -} - #[stable(feature = "rust1", since = "1.0.0")] -impl OsStringExt for OsString { - #[inline] - fn from_vec(vec: Vec) -> OsString { - FromInner::from_inner(Buf { inner: vec }) - } - #[inline] - fn into_vec(self) -> Vec { - self.into_inner().inner - } -} - -/// Platform-specific extensions to [`OsStr`]. -/// -/// This trait is sealed: it cannot be implemented outside the standard library. -/// This is so that future additional methods are not breaking changes. -#[stable(feature = "rust1", since = "1.0.0")] -pub trait OsStrExt: Sealed { - #[stable(feature = "rust1", since = "1.0.0")] - /// Creates an [`OsStr`] from a byte slice. - /// - /// See the module documentation for an example. - fn from_bytes(slice: &[u8]) -> &Self; - - /// Gets the underlying byte view of the [`OsStr`] slice. - /// - /// See the module documentation for an example. - #[stable(feature = "rust1", since = "1.0.0")] - fn as_bytes(&self) -> &[u8]; -} - +pub use alloc::ffi::os_str::os_str_ext_unix::OsStringExt; #[stable(feature = "rust1", since = "1.0.0")] -impl OsStrExt for OsStr { - #[inline] - fn from_bytes(slice: &[u8]) -> &OsStr { - unsafe { mem::transmute(slice) } - } - #[inline] - fn as_bytes(&self) -> &[u8] { - &self.as_inner().inner - } -} +pub use core::ffi::os_str::os_str_ext_unix::OsStrExt; diff --git a/library/std/src/os/windows/ffi.rs b/library/std/src/os/windows/ffi.rs index 496443dbbc3ac..31e7e674a21cf 100644 --- a/library/std/src/os/windows/ffi.rs +++ b/library/std/src/os/windows/ffi.rs @@ -53,83 +53,7 @@ #![stable(feature = "rust1", since = "1.0.0")] -use crate::ffi::{OsStr, OsString}; -use crate::sealed::Sealed; -use crate::sys::os_str::Buf; #[stable(feature = "rust1", since = "1.0.0")] -pub use crate::sys_common::wtf8::EncodeWide; -use crate::sys_common::wtf8::Wtf8Buf; -use crate::sys_common::{AsInner, FromInner}; - -/// Windows-specific extensions to [`OsString`]. -/// -/// This trait is sealed: it cannot be implemented outside the standard library. -/// This is so that future additional methods are not breaking changes. -#[stable(feature = "rust1", since = "1.0.0")] -pub trait OsStringExt: Sealed { - /// Creates an `OsString` from a potentially ill-formed UTF-16 slice of - /// 16-bit code units. - /// - /// This is lossless: calling [`OsStrExt::encode_wide`] on the resulting string - /// will always return the original code units. - /// - /// # Examples - /// - /// ``` - /// use std::ffi::OsString; - /// use std::os::windows::prelude::*; - /// - /// // UTF-16 encoding for "Unicode". - /// let source = [0x0055, 0x006E, 0x0069, 0x0063, 0x006F, 0x0064, 0x0065]; - /// - /// let string = OsString::from_wide(&source[..]); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - fn from_wide(wide: &[u16]) -> Self; -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl OsStringExt for OsString { - fn from_wide(wide: &[u16]) -> OsString { - FromInner::from_inner(Buf { inner: Wtf8Buf::from_wide(wide) }) - } -} - -/// Windows-specific extensions to [`OsStr`]. -/// -/// This trait is sealed: it cannot be implemented outside the standard library. -/// This is so that future additional methods are not breaking changes. -#[stable(feature = "rust1", since = "1.0.0")] -pub trait OsStrExt: Sealed { - /// Re-encodes an `OsStr` as a wide character sequence, i.e., potentially - /// ill-formed UTF-16. - /// - /// This is lossless: calling [`OsStringExt::from_wide`] and then - /// `encode_wide` on the result will yield the original code units. - /// Note that the encoding does not add a final null terminator. - /// - /// # Examples - /// - /// ``` - /// use std::ffi::OsString; - /// use std::os::windows::prelude::*; - /// - /// // UTF-16 encoding for "Unicode". - /// let source = [0x0055, 0x006E, 0x0069, 0x0063, 0x006F, 0x0064, 0x0065]; - /// - /// let string = OsString::from_wide(&source[..]); - /// - /// let result: Vec = string.encode_wide().collect(); - /// assert_eq!(&source[..], &result[..]); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - fn encode_wide(&self) -> EncodeWide<'_>; -} - +pub use alloc::ffi::os_str::os_str_ext_windows::OsStringExt; #[stable(feature = "rust1", since = "1.0.0")] -impl OsStrExt for OsStr { - #[inline] - fn encode_wide(&self) -> EncodeWide<'_> { - self.as_inner().inner.encode_wide() - } -} +pub use core::ffi::os_str::os_str_ext_windows::{EncodeWide, OsStrExt}; diff --git a/library/std/src/path.rs b/library/std/src/path.rs index 9eaa0e01c2c00..e0df47dcecbb3 100644 --- a/library/std/src/path.rs +++ b/library/std/src/path.rs @@ -70,2778 +70,27 @@ #[cfg(test)] mod tests; -use core::clone::CloneToUninit; - -use crate::borrow::{Borrow, Cow}; -use crate::collections::TryReserveError; -use crate::error::Error; -use crate::ffi::{os_str, OsStr, OsString}; -use crate::hash::{Hash, Hasher}; -use crate::iter::FusedIterator; -use crate::ops::{self, Deref}; -use crate::rc::Rc; -use crate::str::FromStr; -use crate::sync::Arc; -use crate::sys::path::{is_sep_byte, is_verbatim_sep, parse_prefix, MAIN_SEP_STR}; -use crate::{cmp, fmt, fs, io, sys}; - -//////////////////////////////////////////////////////////////////////////////// -// GENERAL NOTES -//////////////////////////////////////////////////////////////////////////////// -// -// Parsing in this module is done by directly transmuting OsStr to [u8] slices, -// taking advantage of the fact that OsStr always encodes ASCII characters -// as-is. Eventually, this transmutation should be replaced by direct uses of -// OsStr APIs for parsing, but it will take a while for those to become -// available. - -//////////////////////////////////////////////////////////////////////////////// -// Windows Prefixes -//////////////////////////////////////////////////////////////////////////////// - -/// Windows path prefixes, e.g., `C:` or `\\server\share`. -/// -/// Windows uses a variety of path prefix styles, including references to drive -/// volumes (like `C:`), network shared folders (like `\\server\share`), and -/// others. In addition, some path prefixes are "verbatim" (i.e., prefixed with -/// `\\?\`), in which case `/` is *not* treated as a separator and essentially -/// no normalization is performed. -/// -/// # Examples -/// -/// ``` -/// use std::path::{Component, Path, Prefix}; -/// use std::path::Prefix::*; -/// use std::ffi::OsStr; -/// -/// fn get_path_prefix(s: &str) -> Prefix<'_> { -/// let path = Path::new(s); -/// match path.components().next().unwrap() { -/// Component::Prefix(prefix_component) => prefix_component.kind(), -/// _ => panic!(), -/// } -/// } -/// -/// # if cfg!(windows) { -/// assert_eq!(Verbatim(OsStr::new("pictures")), -/// get_path_prefix(r"\\?\pictures\kittens")); -/// assert_eq!(VerbatimUNC(OsStr::new("server"), OsStr::new("share")), -/// get_path_prefix(r"\\?\UNC\server\share")); -/// assert_eq!(VerbatimDisk(b'C'), get_path_prefix(r"\\?\c:\")); -/// assert_eq!(DeviceNS(OsStr::new("BrainInterface")), -/// get_path_prefix(r"\\.\BrainInterface")); -/// assert_eq!(UNC(OsStr::new("server"), OsStr::new("share")), -/// get_path_prefix(r"\\server\share")); -/// assert_eq!(Disk(b'C'), get_path_prefix(r"C:\Users\Rust\Pictures\Ferris")); -/// # } -/// ``` -#[derive(Copy, Clone, Debug, Hash, PartialOrd, Ord, PartialEq, Eq)] -#[stable(feature = "rust1", since = "1.0.0")] -pub enum Prefix<'a> { - /// Verbatim prefix, e.g., `\\?\cat_pics`. - /// - /// Verbatim prefixes consist of `\\?\` immediately followed by the given - /// component. - #[stable(feature = "rust1", since = "1.0.0")] - Verbatim(#[stable(feature = "rust1", since = "1.0.0")] &'a OsStr), - - /// Verbatim prefix using Windows' _**U**niform **N**aming **C**onvention_, - /// e.g., `\\?\UNC\server\share`. - /// - /// Verbatim UNC prefixes consist of `\\?\UNC\` immediately followed by the - /// server's hostname and a share name. - #[stable(feature = "rust1", since = "1.0.0")] - VerbatimUNC( - #[stable(feature = "rust1", since = "1.0.0")] &'a OsStr, - #[stable(feature = "rust1", since = "1.0.0")] &'a OsStr, - ), - - /// Verbatim disk prefix, e.g., `\\?\C:`. - /// - /// Verbatim disk prefixes consist of `\\?\` immediately followed by the - /// drive letter and `:`. - #[stable(feature = "rust1", since = "1.0.0")] - VerbatimDisk(#[stable(feature = "rust1", since = "1.0.0")] u8), - - /// Device namespace prefix, e.g., `\\.\COM42`. - /// - /// Device namespace prefixes consist of `\\.\` (possibly using `/` - /// instead of `\`), immediately followed by the device name. - #[stable(feature = "rust1", since = "1.0.0")] - DeviceNS(#[stable(feature = "rust1", since = "1.0.0")] &'a OsStr), - - /// Prefix using Windows' _**U**niform **N**aming **C**onvention_, e.g. - /// `\\server\share`. - /// - /// UNC prefixes consist of the server's hostname and a share name. - #[stable(feature = "rust1", since = "1.0.0")] - UNC( - #[stable(feature = "rust1", since = "1.0.0")] &'a OsStr, - #[stable(feature = "rust1", since = "1.0.0")] &'a OsStr, - ), - - /// Prefix `C:` for the given disk drive. - #[stable(feature = "rust1", since = "1.0.0")] - Disk(#[stable(feature = "rust1", since = "1.0.0")] u8), -} - -impl<'a> Prefix<'a> { - #[inline] - fn len(&self) -> usize { - use self::Prefix::*; - fn os_str_len(s: &OsStr) -> usize { - s.as_encoded_bytes().len() - } - match *self { - Verbatim(x) => 4 + os_str_len(x), - VerbatimUNC(x, y) => { - 8 + os_str_len(x) + if os_str_len(y) > 0 { 1 + os_str_len(y) } else { 0 } - } - VerbatimDisk(_) => 6, - UNC(x, y) => 2 + os_str_len(x) + if os_str_len(y) > 0 { 1 + os_str_len(y) } else { 0 }, - DeviceNS(x) => 4 + os_str_len(x), - Disk(_) => 2, - } - } - - /// Determines if the prefix is verbatim, i.e., begins with `\\?\`. - /// - /// # Examples - /// - /// ``` - /// use std::path::Prefix::*; - /// use std::ffi::OsStr; - /// - /// assert!(Verbatim(OsStr::new("pictures")).is_verbatim()); - /// assert!(VerbatimUNC(OsStr::new("server"), OsStr::new("share")).is_verbatim()); - /// assert!(VerbatimDisk(b'C').is_verbatim()); - /// assert!(!DeviceNS(OsStr::new("BrainInterface")).is_verbatim()); - /// assert!(!UNC(OsStr::new("server"), OsStr::new("share")).is_verbatim()); - /// assert!(!Disk(b'C').is_verbatim()); - /// ``` - #[inline] - #[must_use] - #[stable(feature = "rust1", since = "1.0.0")] - pub fn is_verbatim(&self) -> bool { - use self::Prefix::*; - matches!(*self, Verbatim(_) | VerbatimDisk(_) | VerbatimUNC(..)) - } - - #[inline] - fn is_drive(&self) -> bool { - matches!(*self, Prefix::Disk(_)) - } - - #[inline] - fn has_implicit_root(&self) -> bool { - !self.is_drive() - } -} - -//////////////////////////////////////////////////////////////////////////////// -// Exposed parsing helpers -//////////////////////////////////////////////////////////////////////////////// - -/// Determines whether the character is one of the permitted path -/// separators for the current platform. -/// -/// # Examples -/// -/// ``` -/// use std::path; -/// -/// assert!(path::is_separator('/')); // '/' works for both Unix and Windows -/// assert!(!path::is_separator('❤')); -/// ``` -#[must_use] -#[stable(feature = "rust1", since = "1.0.0")] -pub fn is_separator(c: char) -> bool { - c.is_ascii() && is_sep_byte(c as u8) -} - -/// The primary separator of path components for the current platform. -/// -/// For example, `/` on Unix and `\` on Windows. -#[stable(feature = "rust1", since = "1.0.0")] -pub const MAIN_SEPARATOR: char = crate::sys::path::MAIN_SEP; - -/// The primary separator of path components for the current platform. -/// -/// For example, `/` on Unix and `\` on Windows. -#[stable(feature = "main_separator_str", since = "1.68.0")] -pub const MAIN_SEPARATOR_STR: &str = crate::sys::path::MAIN_SEP_STR; - -//////////////////////////////////////////////////////////////////////////////// -// Misc helpers -//////////////////////////////////////////////////////////////////////////////// - -// Iterate through `iter` while it matches `prefix`; return `None` if `prefix` -// is not a prefix of `iter`, otherwise return `Some(iter_after_prefix)` giving -// `iter` after having exhausted `prefix`. -fn iter_after<'a, 'b, I, J>(mut iter: I, mut prefix: J) -> Option -where - I: Iterator> + Clone, - J: Iterator>, -{ - loop { - let mut iter_next = iter.clone(); - match (iter_next.next(), prefix.next()) { - (Some(ref x), Some(ref y)) if x == y => (), - (Some(_), Some(_)) => return None, - (Some(_), None) => return Some(iter), - (None, None) => return Some(iter), - (None, Some(_)) => return None, - } - iter = iter_next; - } -} - -// Detect scheme on Redox -fn has_redox_scheme(s: &[u8]) -> bool { - cfg!(target_os = "redox") && s.contains(&b':') -} - -//////////////////////////////////////////////////////////////////////////////// -// Cross-platform, iterator-independent parsing -//////////////////////////////////////////////////////////////////////////////// - -/// Says whether the first byte after the prefix is a separator. -fn has_physical_root(s: &[u8], prefix: Option>) -> bool { - let path = if let Some(p) = prefix { &s[p.len()..] } else { s }; - !path.is_empty() && is_sep_byte(path[0]) -} - -// basic workhorse for splitting stem and extension -fn rsplit_file_at_dot(file: &OsStr) -> (Option<&OsStr>, Option<&OsStr>) { - if file.as_encoded_bytes() == b".." { - return (Some(file), None); - } - - // The unsafety here stems from converting between &OsStr and &[u8] - // and back. This is safe to do because (1) we only look at ASCII - // contents of the encoding and (2) new &OsStr values are produced - // only from ASCII-bounded slices of existing &OsStr values. - let mut iter = file.as_encoded_bytes().rsplitn(2, |b| *b == b'.'); - let after = iter.next(); - let before = iter.next(); - if before == Some(b"") { - (Some(file), None) - } else { - unsafe { - ( - before.map(|s| OsStr::from_encoded_bytes_unchecked(s)), - after.map(|s| OsStr::from_encoded_bytes_unchecked(s)), - ) - } - } -} - -fn split_file_at_dot(file: &OsStr) -> (&OsStr, Option<&OsStr>) { - let slice = file.as_encoded_bytes(); - if slice == b".." { - return (file, None); - } - - // The unsafety here stems from converting between &OsStr and &[u8] - // and back. This is safe to do because (1) we only look at ASCII - // contents of the encoding and (2) new &OsStr values are produced - // only from ASCII-bounded slices of existing &OsStr values. - let i = match slice[1..].iter().position(|b| *b == b'.') { - Some(i) => i + 1, - None => return (file, None), - }; - let before = &slice[..i]; - let after = &slice[i + 1..]; - unsafe { - ( - OsStr::from_encoded_bytes_unchecked(before), - Some(OsStr::from_encoded_bytes_unchecked(after)), - ) - } -} - -//////////////////////////////////////////////////////////////////////////////// -// The core iterators -//////////////////////////////////////////////////////////////////////////////// - -/// Component parsing works by a double-ended state machine; the cursors at the -/// front and back of the path each keep track of what parts of the path have -/// been consumed so far. -/// -/// Going front to back, a path is made up of a prefix, a starting -/// directory component, and a body (of normal components) -#[derive(Copy, Clone, PartialEq, PartialOrd, Debug)] -enum State { - Prefix = 0, // c: - StartDir = 1, // / or . or nothing - Body = 2, // foo/bar/baz - Done = 3, -} - -/// A structure wrapping a Windows path prefix as well as its unparsed string -/// representation. -/// -/// In addition to the parsed [`Prefix`] information returned by [`kind`], -/// `PrefixComponent` also holds the raw and unparsed [`OsStr`] slice, -/// returned by [`as_os_str`]. -/// -/// Instances of this `struct` can be obtained by matching against the -/// [`Prefix` variant] on [`Component`]. -/// -/// Does not occur on Unix. -/// -/// # Examples -/// -/// ``` -/// # if cfg!(windows) { -/// use std::path::{Component, Path, Prefix}; -/// use std::ffi::OsStr; -/// -/// let path = Path::new(r"c:\you\later\"); -/// match path.components().next().unwrap() { -/// Component::Prefix(prefix_component) => { -/// assert_eq!(Prefix::Disk(b'C'), prefix_component.kind()); -/// assert_eq!(OsStr::new("c:"), prefix_component.as_os_str()); -/// } -/// _ => unreachable!(), -/// } -/// # } -/// ``` -/// -/// [`as_os_str`]: PrefixComponent::as_os_str -/// [`kind`]: PrefixComponent::kind -/// [`Prefix` variant]: Component::Prefix -#[stable(feature = "rust1", since = "1.0.0")] -#[derive(Copy, Clone, Eq, Debug)] -pub struct PrefixComponent<'a> { - /// The prefix as an unparsed `OsStr` slice. - raw: &'a OsStr, - - /// The parsed prefix data. - parsed: Prefix<'a>, -} - -impl<'a> PrefixComponent<'a> { - /// Returns the parsed prefix data. - /// - /// See [`Prefix`]'s documentation for more information on the different - /// kinds of prefixes. - #[stable(feature = "rust1", since = "1.0.0")] - #[must_use] - #[inline] - pub fn kind(&self) -> Prefix<'a> { - self.parsed - } - - /// Returns the raw [`OsStr`] slice for this prefix. - #[stable(feature = "rust1", since = "1.0.0")] - #[must_use] - #[inline] - pub fn as_os_str(&self) -> &'a OsStr { - self.raw - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl<'a> PartialEq for PrefixComponent<'a> { - #[inline] - fn eq(&self, other: &PrefixComponent<'a>) -> bool { - self.parsed == other.parsed - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl<'a> PartialOrd for PrefixComponent<'a> { - #[inline] - fn partial_cmp(&self, other: &PrefixComponent<'a>) -> Option { - PartialOrd::partial_cmp(&self.parsed, &other.parsed) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl Ord for PrefixComponent<'_> { - #[inline] - fn cmp(&self, other: &Self) -> cmp::Ordering { - Ord::cmp(&self.parsed, &other.parsed) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl Hash for PrefixComponent<'_> { - fn hash(&self, h: &mut H) { - self.parsed.hash(h); - } -} - -/// A single component of a path. -/// -/// A `Component` roughly corresponds to a substring between path separators -/// (`/` or `\`). -/// -/// This `enum` is created by iterating over [`Components`], which in turn is -/// created by the [`components`](Path::components) method on [`Path`]. -/// -/// # Examples -/// -/// ```rust -/// use std::path::{Component, Path}; -/// -/// let path = Path::new("/tmp/foo/bar.txt"); -/// let components = path.components().collect::>(); -/// assert_eq!(&components, &[ -/// Component::RootDir, -/// Component::Normal("tmp".as_ref()), -/// Component::Normal("foo".as_ref()), -/// Component::Normal("bar.txt".as_ref()), -/// ]); -/// ``` -#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] -#[stable(feature = "rust1", since = "1.0.0")] -pub enum Component<'a> { - /// A Windows path prefix, e.g., `C:` or `\\server\share`. - /// - /// There is a large variety of prefix types, see [`Prefix`]'s documentation - /// for more. - /// - /// Does not occur on Unix. - #[stable(feature = "rust1", since = "1.0.0")] - Prefix(#[stable(feature = "rust1", since = "1.0.0")] PrefixComponent<'a>), - - /// The root directory component, appears after any prefix and before anything else. - /// - /// It represents a separator that designates that a path starts from root. - #[stable(feature = "rust1", since = "1.0.0")] - RootDir, - - /// A reference to the current directory, i.e., `.`. - #[stable(feature = "rust1", since = "1.0.0")] - CurDir, - - /// A reference to the parent directory, i.e., `..`. - #[stable(feature = "rust1", since = "1.0.0")] - ParentDir, - - /// A normal component, e.g., `a` and `b` in `a/b`. - /// - /// This variant is the most common one, it represents references to files - /// or directories. - #[stable(feature = "rust1", since = "1.0.0")] - Normal(#[stable(feature = "rust1", since = "1.0.0")] &'a OsStr), -} - -impl<'a> Component<'a> { - /// Extracts the underlying [`OsStr`] slice. - /// - /// # Examples - /// - /// ``` - /// use std::path::Path; - /// - /// let path = Path::new("./tmp/foo/bar.txt"); - /// let components: Vec<_> = path.components().map(|comp| comp.as_os_str()).collect(); - /// assert_eq!(&components, &[".", "tmp", "foo", "bar.txt"]); - /// ``` - #[must_use = "`self` will be dropped if the result is not used"] - #[stable(feature = "rust1", since = "1.0.0")] - pub fn as_os_str(self) -> &'a OsStr { - match self { - Component::Prefix(p) => p.as_os_str(), - Component::RootDir => OsStr::new(MAIN_SEP_STR), - Component::CurDir => OsStr::new("."), - Component::ParentDir => OsStr::new(".."), - Component::Normal(path) => path, - } - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl AsRef for Component<'_> { - #[inline] - fn as_ref(&self) -> &OsStr { - self.as_os_str() - } -} - -#[stable(feature = "path_component_asref", since = "1.25.0")] -impl AsRef for Component<'_> { - #[inline] - fn as_ref(&self) -> &Path { - self.as_os_str().as_ref() - } -} - -/// An iterator over the [`Component`]s of a [`Path`]. -/// -/// This `struct` is created by the [`components`] method on [`Path`]. -/// See its documentation for more. -/// -/// # Examples -/// -/// ``` -/// use std::path::Path; -/// -/// let path = Path::new("/tmp/foo/bar.txt"); -/// -/// for component in path.components() { -/// println!("{component:?}"); -/// } -/// ``` -/// -/// [`components`]: Path::components -#[derive(Clone)] -#[must_use = "iterators are lazy and do nothing unless consumed"] -#[stable(feature = "rust1", since = "1.0.0")] -pub struct Components<'a> { - // The path left to parse components from - path: &'a [u8], - - // The prefix as it was originally parsed, if any - prefix: Option>, - - // true if path *physically* has a root separator; for most Windows - // prefixes, it may have a "logical" root separator for the purposes of - // normalization, e.g., \\server\share == \\server\share\. - has_physical_root: bool, - - // The iterator is double-ended, and these two states keep track of what has - // been produced from either end - front: State, - back: State, -} - -/// An iterator over the [`Component`]s of a [`Path`], as [`OsStr`] slices. -/// -/// This `struct` is created by the [`iter`] method on [`Path`]. -/// See its documentation for more. -/// -/// [`iter`]: Path::iter -#[derive(Clone)] -#[must_use = "iterators are lazy and do nothing unless consumed"] -#[stable(feature = "rust1", since = "1.0.0")] -pub struct Iter<'a> { - inner: Components<'a>, -} - -#[stable(feature = "path_components_debug", since = "1.13.0")] -impl fmt::Debug for Components<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - struct DebugHelper<'a>(&'a Path); - - impl fmt::Debug for DebugHelper<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_list().entries(self.0.components()).finish() - } - } - - f.debug_tuple("Components").field(&DebugHelper(self.as_path())).finish() - } -} - -impl<'a> Components<'a> { - // how long is the prefix, if any? - #[inline] - fn prefix_len(&self) -> usize { - self.prefix.as_ref().map(Prefix::len).unwrap_or(0) - } - - #[inline] - fn prefix_verbatim(&self) -> bool { - self.prefix.as_ref().map(Prefix::is_verbatim).unwrap_or(false) - } - - /// how much of the prefix is left from the point of view of iteration? - #[inline] - fn prefix_remaining(&self) -> usize { - if self.front == State::Prefix { self.prefix_len() } else { 0 } - } - - // Given the iteration so far, how much of the pre-State::Body path is left? - #[inline] - fn len_before_body(&self) -> usize { - let root = if self.front <= State::StartDir && self.has_physical_root { 1 } else { 0 }; - let cur_dir = if self.front <= State::StartDir && self.include_cur_dir() { 1 } else { 0 }; - self.prefix_remaining() + root + cur_dir - } - - // is the iteration complete? - #[inline] - fn finished(&self) -> bool { - self.front == State::Done || self.back == State::Done || self.front > self.back - } - - #[inline] - fn is_sep_byte(&self, b: u8) -> bool { - if self.prefix_verbatim() { is_verbatim_sep(b) } else { is_sep_byte(b) } - } - - /// Extracts a slice corresponding to the portion of the path remaining for iteration. - /// - /// # Examples - /// - /// ``` - /// use std::path::Path; - /// - /// let mut components = Path::new("/tmp/foo/bar.txt").components(); - /// components.next(); - /// components.next(); - /// - /// assert_eq!(Path::new("foo/bar.txt"), components.as_path()); - /// ``` - #[must_use] - #[stable(feature = "rust1", since = "1.0.0")] - pub fn as_path(&self) -> &'a Path { - let mut comps = self.clone(); - if comps.front == State::Body { - comps.trim_left(); - } - if comps.back == State::Body { - comps.trim_right(); - } - unsafe { Path::from_u8_slice(comps.path) } - } - - /// Is the *original* path rooted? - fn has_root(&self) -> bool { - if self.has_physical_root { - return true; - } - if let Some(p) = self.prefix { - if p.has_implicit_root() { - return true; - } - } - false - } - - /// Should the normalized path include a leading . ? - fn include_cur_dir(&self) -> bool { - if self.has_root() { - return false; - } - let mut iter = self.path[self.prefix_remaining()..].iter(); - match (iter.next(), iter.next()) { - (Some(&b'.'), None) => true, - (Some(&b'.'), Some(&b)) => self.is_sep_byte(b), - _ => false, - } - } - - // parse a given byte sequence following the OsStr encoding into the - // corresponding path component - unsafe fn parse_single_component<'b>(&self, comp: &'b [u8]) -> Option> { - match comp { - b"." if self.prefix_verbatim() => Some(Component::CurDir), - b"." => None, // . components are normalized away, except at - // the beginning of a path, which is treated - // separately via `include_cur_dir` - b".." => Some(Component::ParentDir), - b"" => None, - _ => Some(Component::Normal(unsafe { OsStr::from_encoded_bytes_unchecked(comp) })), - } - } - - // parse a component from the left, saying how many bytes to consume to - // remove the component - fn parse_next_component(&self) -> (usize, Option>) { - debug_assert!(self.front == State::Body); - let (extra, comp) = match self.path.iter().position(|b| self.is_sep_byte(*b)) { - None => (0, self.path), - Some(i) => (1, &self.path[..i]), - }; - // SAFETY: `comp` is a valid substring, since it is split on a separator. - (comp.len() + extra, unsafe { self.parse_single_component(comp) }) - } - - // parse a component from the right, saying how many bytes to consume to - // remove the component - fn parse_next_component_back(&self) -> (usize, Option>) { - debug_assert!(self.back == State::Body); - let start = self.len_before_body(); - let (extra, comp) = match self.path[start..].iter().rposition(|b| self.is_sep_byte(*b)) { - None => (0, &self.path[start..]), - Some(i) => (1, &self.path[start + i + 1..]), - }; - // SAFETY: `comp` is a valid substring, since it is split on a separator. - (comp.len() + extra, unsafe { self.parse_single_component(comp) }) - } - - // trim away repeated separators (i.e., empty components) on the left - fn trim_left(&mut self) { - while !self.path.is_empty() { - let (size, comp) = self.parse_next_component(); - if comp.is_some() { - return; - } else { - self.path = &self.path[size..]; - } - } - } - - // trim away repeated separators (i.e., empty components) on the right - fn trim_right(&mut self) { - while self.path.len() > self.len_before_body() { - let (size, comp) = self.parse_next_component_back(); - if comp.is_some() { - return; - } else { - self.path = &self.path[..self.path.len() - size]; - } - } - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl AsRef for Components<'_> { - #[inline] - fn as_ref(&self) -> &Path { - self.as_path() - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl AsRef for Components<'_> { - #[inline] - fn as_ref(&self) -> &OsStr { - self.as_path().as_os_str() - } -} - -#[stable(feature = "path_iter_debug", since = "1.13.0")] -impl fmt::Debug for Iter<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - struct DebugHelper<'a>(&'a Path); - - impl fmt::Debug for DebugHelper<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.debug_list().entries(self.0.iter()).finish() - } - } - - f.debug_tuple("Iter").field(&DebugHelper(self.as_path())).finish() - } -} - -impl<'a> Iter<'a> { - /// Extracts a slice corresponding to the portion of the path remaining for iteration. - /// - /// # Examples - /// - /// ``` - /// use std::path::Path; - /// - /// let mut iter = Path::new("/tmp/foo/bar.txt").iter(); - /// iter.next(); - /// iter.next(); - /// - /// assert_eq!(Path::new("foo/bar.txt"), iter.as_path()); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - #[must_use] - #[inline] - pub fn as_path(&self) -> &'a Path { - self.inner.as_path() - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl AsRef for Iter<'_> { - #[inline] - fn as_ref(&self) -> &Path { - self.as_path() - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl AsRef for Iter<'_> { - #[inline] - fn as_ref(&self) -> &OsStr { - self.as_path().as_os_str() - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl<'a> Iterator for Iter<'a> { - type Item = &'a OsStr; - - #[inline] - fn next(&mut self) -> Option<&'a OsStr> { - self.inner.next().map(Component::as_os_str) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl<'a> DoubleEndedIterator for Iter<'a> { - #[inline] - fn next_back(&mut self) -> Option<&'a OsStr> { - self.inner.next_back().map(Component::as_os_str) - } -} - -#[stable(feature = "fused", since = "1.26.0")] -impl FusedIterator for Iter<'_> {} - -#[stable(feature = "rust1", since = "1.0.0")] -impl<'a> Iterator for Components<'a> { - type Item = Component<'a>; - - fn next(&mut self) -> Option> { - while !self.finished() { - match self.front { - State::Prefix if self.prefix_len() > 0 => { - self.front = State::StartDir; - debug_assert!(self.prefix_len() <= self.path.len()); - let raw = &self.path[..self.prefix_len()]; - self.path = &self.path[self.prefix_len()..]; - return Some(Component::Prefix(PrefixComponent { - raw: unsafe { OsStr::from_encoded_bytes_unchecked(raw) }, - parsed: self.prefix.unwrap(), - })); - } - State::Prefix => { - self.front = State::StartDir; - } - State::StartDir => { - self.front = State::Body; - if self.has_physical_root { - debug_assert!(!self.path.is_empty()); - self.path = &self.path[1..]; - return Some(Component::RootDir); - } else if let Some(p) = self.prefix { - if p.has_implicit_root() && !p.is_verbatim() { - return Some(Component::RootDir); - } - } else if self.include_cur_dir() { - debug_assert!(!self.path.is_empty()); - self.path = &self.path[1..]; - return Some(Component::CurDir); - } - } - State::Body if !self.path.is_empty() => { - let (size, comp) = self.parse_next_component(); - self.path = &self.path[size..]; - if comp.is_some() { - return comp; - } - } - State::Body => { - self.front = State::Done; - } - State::Done => unreachable!(), - } - } - None - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl<'a> DoubleEndedIterator for Components<'a> { - fn next_back(&mut self) -> Option> { - while !self.finished() { - match self.back { - State::Body if self.path.len() > self.len_before_body() => { - let (size, comp) = self.parse_next_component_back(); - self.path = &self.path[..self.path.len() - size]; - if comp.is_some() { - return comp; - } - } - State::Body => { - self.back = State::StartDir; - } - State::StartDir => { - self.back = State::Prefix; - if self.has_physical_root { - self.path = &self.path[..self.path.len() - 1]; - return Some(Component::RootDir); - } else if let Some(p) = self.prefix { - if p.has_implicit_root() && !p.is_verbatim() { - return Some(Component::RootDir); - } - } else if self.include_cur_dir() { - self.path = &self.path[..self.path.len() - 1]; - return Some(Component::CurDir); - } - } - State::Prefix if self.prefix_len() > 0 => { - self.back = State::Done; - return Some(Component::Prefix(PrefixComponent { - raw: unsafe { OsStr::from_encoded_bytes_unchecked(self.path) }, - parsed: self.prefix.unwrap(), - })); - } - State::Prefix => { - self.back = State::Done; - return None; - } - State::Done => unreachable!(), - } - } - None - } -} - -#[stable(feature = "fused", since = "1.26.0")] -impl FusedIterator for Components<'_> {} - -#[stable(feature = "rust1", since = "1.0.0")] -impl<'a> PartialEq for Components<'a> { - #[inline] - fn eq(&self, other: &Components<'a>) -> bool { - let Components { path: _, front: _, back: _, has_physical_root: _, prefix: _ } = self; - - // Fast path for exact matches, e.g. for hashmap lookups. - // Don't explicitly compare the prefix or has_physical_root fields since they'll - // either be covered by the `path` buffer or are only relevant for `prefix_verbatim()`. - if self.path.len() == other.path.len() - && self.front == other.front - && self.back == State::Body - && other.back == State::Body - && self.prefix_verbatim() == other.prefix_verbatim() - { - // possible future improvement: this could bail out earlier if there were a - // reverse memcmp/bcmp comparing back to front - if self.path == other.path { - return true; - } - } - - // compare back to front since absolute paths often share long prefixes - Iterator::eq(self.clone().rev(), other.clone().rev()) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl Eq for Components<'_> {} - -#[stable(feature = "rust1", since = "1.0.0")] -impl<'a> PartialOrd for Components<'a> { - #[inline] - fn partial_cmp(&self, other: &Components<'a>) -> Option { - Some(compare_components(self.clone(), other.clone())) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl Ord for Components<'_> { - #[inline] - fn cmp(&self, other: &Self) -> cmp::Ordering { - compare_components(self.clone(), other.clone()) - } -} - -fn compare_components(mut left: Components<'_>, mut right: Components<'_>) -> cmp::Ordering { - // Fast path for long shared prefixes - // - // - compare raw bytes to find first mismatch - // - backtrack to find separator before mismatch to avoid ambiguous parsings of '.' or '..' characters - // - if found update state to only do a component-wise comparison on the remainder, - // otherwise do it on the full path - // - // The fast path isn't taken for paths with a PrefixComponent to avoid backtracking into - // the middle of one - if left.prefix.is_none() && right.prefix.is_none() && left.front == right.front { - // possible future improvement: a [u8]::first_mismatch simd implementation - let first_difference = match left.path.iter().zip(right.path).position(|(&a, &b)| a != b) { - None if left.path.len() == right.path.len() => return cmp::Ordering::Equal, - None => left.path.len().min(right.path.len()), - Some(diff) => diff, - }; - - if let Some(previous_sep) = - left.path[..first_difference].iter().rposition(|&b| left.is_sep_byte(b)) - { - let mismatched_component_start = previous_sep + 1; - left.path = &left.path[mismatched_component_start..]; - left.front = State::Body; - right.path = &right.path[mismatched_component_start..]; - right.front = State::Body; - } - } - - Iterator::cmp(left, right) -} - -/// An iterator over [`Path`] and its ancestors. -/// -/// This `struct` is created by the [`ancestors`] method on [`Path`]. -/// See its documentation for more. -/// -/// # Examples -/// -/// ``` -/// use std::path::Path; -/// -/// let path = Path::new("/foo/bar"); -/// -/// for ancestor in path.ancestors() { -/// println!("{}", ancestor.display()); -/// } -/// ``` -/// -/// [`ancestors`]: Path::ancestors -#[derive(Copy, Clone, Debug)] -#[must_use = "iterators are lazy and do nothing unless consumed"] -#[stable(feature = "path_ancestors", since = "1.28.0")] -pub struct Ancestors<'a> { - next: Option<&'a Path>, -} - -#[stable(feature = "path_ancestors", since = "1.28.0")] -impl<'a> Iterator for Ancestors<'a> { - type Item = &'a Path; - - #[inline] - fn next(&mut self) -> Option { - let next = self.next; - self.next = next.and_then(Path::parent); - next - } -} - -#[stable(feature = "path_ancestors", since = "1.28.0")] -impl FusedIterator for Ancestors<'_> {} - -//////////////////////////////////////////////////////////////////////////////// -// Basic types and traits -//////////////////////////////////////////////////////////////////////////////// - -/// An owned, mutable path (akin to [`String`]). -/// -/// This type provides methods like [`push`] and [`set_extension`] that mutate -/// the path in place. It also implements [`Deref`] to [`Path`], meaning that -/// all methods on [`Path`] slices are available on `PathBuf` values as well. -/// -/// [`push`]: PathBuf::push -/// [`set_extension`]: PathBuf::set_extension -/// -/// More details about the overall approach can be found in -/// the [module documentation](self). -/// -/// # Examples -/// -/// You can use [`push`] to build up a `PathBuf` from -/// components: -/// -/// ``` -/// use std::path::PathBuf; -/// -/// let mut path = PathBuf::new(); -/// -/// path.push(r"C:\"); -/// path.push("windows"); -/// path.push("system32"); -/// -/// path.set_extension("dll"); -/// ``` -/// -/// However, [`push`] is best used for dynamic situations. This is a better way -/// to do this when you know all of the components ahead of time: -/// -/// ``` -/// use std::path::PathBuf; -/// -/// let path: PathBuf = [r"C:\", "windows", "system32.dll"].iter().collect(); -/// ``` -/// -/// We can still do better than this! Since these are all strings, we can use -/// `From::from`: -/// -/// ``` -/// use std::path::PathBuf; -/// -/// let path = PathBuf::from(r"C:\windows\system32.dll"); -/// ``` -/// -/// Which method works best depends on what kind of situation you're in. -#[cfg_attr(not(test), rustc_diagnostic_item = "PathBuf")] -#[stable(feature = "rust1", since = "1.0.0")] -pub struct PathBuf { - inner: OsString, -} - -impl PathBuf { - /// Allocates an empty `PathBuf`. - /// - /// # Examples - /// - /// ``` - /// use std::path::PathBuf; - /// - /// let path = PathBuf::new(); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - #[must_use] - #[inline] - pub fn new() -> PathBuf { - PathBuf { inner: OsString::new() } - } - - /// Creates a new `PathBuf` with a given capacity used to create the - /// internal [`OsString`]. See [`with_capacity`] defined on [`OsString`]. - /// - /// # Examples - /// - /// ``` - /// use std::path::PathBuf; - /// - /// let mut path = PathBuf::with_capacity(10); - /// let capacity = path.capacity(); - /// - /// // This push is done without reallocating - /// path.push(r"C:\"); - /// - /// assert_eq!(capacity, path.capacity()); - /// ``` - /// - /// [`with_capacity`]: OsString::with_capacity - #[stable(feature = "path_buf_capacity", since = "1.44.0")] - #[must_use] - #[inline] - pub fn with_capacity(capacity: usize) -> PathBuf { - PathBuf { inner: OsString::with_capacity(capacity) } - } - - /// Coerces to a [`Path`] slice. - /// - /// # Examples - /// - /// ``` - /// use std::path::{Path, PathBuf}; - /// - /// let p = PathBuf::from("/test"); - /// assert_eq!(Path::new("/test"), p.as_path()); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - #[must_use] - #[inline] - pub fn as_path(&self) -> &Path { - self - } - - /// Consumes and leaks the `PathBuf`, returning a mutable reference to the contents, - /// `&'a mut Path`. - /// - /// The caller has free choice over the returned lifetime, including 'static. - /// Indeed, this function is ideally used for data that lives for the remainder of - /// the program’s life, as dropping the returned reference will cause a memory leak. - /// - /// It does not reallocate or shrink the `PathBuf`, so the leaked allocation may include - /// unused capacity that is not part of the returned slice. If you want to discard excess - /// capacity, call [`into_boxed_path`], and then [`Box::leak`] instead. - /// However, keep in mind that trimming the capacity may result in a reallocation and copy. - /// - /// [`into_boxed_path`]: Self::into_boxed_path - #[unstable(feature = "os_string_pathbuf_leak", issue = "125965")] - #[inline] - pub fn leak<'a>(self) -> &'a mut Path { - Path::from_inner_mut(self.inner.leak()) - } - - /// Extends `self` with `path`. - /// - /// If `path` is absolute, it replaces the current path. - /// - /// On Windows: - /// - /// * if `path` has a root but no prefix (e.g., `\windows`), it - /// replaces everything except for the prefix (if any) of `self`. - /// * if `path` has a prefix but no root, it replaces `self`. - /// * if `self` has a verbatim prefix (e.g. `\\?\C:\windows`) - /// and `path` is not empty, the new path is normalized: all references - /// to `.` and `..` are removed. - /// - /// Consider using [`Path::join`] if you need a new `PathBuf` instead of - /// using this function on a cloned `PathBuf`. - /// - /// # Examples - /// - /// Pushing a relative path extends the existing path: - /// - /// ``` - /// use std::path::PathBuf; - /// - /// let mut path = PathBuf::from("/tmp"); - /// path.push("file.bk"); - /// assert_eq!(path, PathBuf::from("/tmp/file.bk")); - /// ``` - /// - /// Pushing an absolute path replaces the existing path: - /// - /// ``` - /// use std::path::PathBuf; - /// - /// let mut path = PathBuf::from("/tmp"); - /// path.push("/etc"); - /// assert_eq!(path, PathBuf::from("/etc")); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - #[rustc_confusables("append", "put")] - pub fn push>(&mut self, path: P) { - self._push(path.as_ref()) - } - - fn _push(&mut self, path: &Path) { - // in general, a separator is needed if the rightmost byte is not a separator - let buf = self.inner.as_encoded_bytes(); - let mut need_sep = buf.last().map(|c| !is_sep_byte(*c)).unwrap_or(false); - - // in the special case of `C:` on Windows, do *not* add a separator - let comps = self.components(); - - if comps.prefix_len() > 0 - && comps.prefix_len() == comps.path.len() - && comps.prefix.unwrap().is_drive() - { - need_sep = false - } - - // absolute `path` replaces `self` - if path.is_absolute() || path.prefix().is_some() { - self.inner.truncate(0); - - // verbatim paths need . and .. removed - } else if comps.prefix_verbatim() && !path.inner.is_empty() { - let mut buf: Vec<_> = comps.collect(); - for c in path.components() { - match c { - Component::RootDir => { - buf.truncate(1); - buf.push(c); - } - Component::CurDir => (), - Component::ParentDir => { - if let Some(Component::Normal(_)) = buf.last() { - buf.pop(); - } - } - _ => buf.push(c), - } - } - - let mut res = OsString::new(); - let mut need_sep = false; - - for c in buf { - if need_sep && c != Component::RootDir { - res.push(MAIN_SEP_STR); - } - res.push(c.as_os_str()); - - need_sep = match c { - Component::RootDir => false, - Component::Prefix(prefix) => { - !prefix.parsed.is_drive() && prefix.parsed.len() > 0 - } - _ => true, - } - } - - self.inner = res; - return; - - // `path` has a root but no prefix, e.g., `\windows` (Windows only) - } else if path.has_root() { - let prefix_len = self.components().prefix_remaining(); - self.inner.truncate(prefix_len); - - // `path` is a pure relative path - } else if need_sep { - self.inner.push(MAIN_SEP_STR); - } - - self.inner.push(path); - } - - /// Truncates `self` to [`self.parent`]. - /// - /// Returns `false` and does nothing if [`self.parent`] is [`None`]. - /// Otherwise, returns `true`. - /// - /// [`self.parent`]: Path::parent - /// - /// # Examples - /// - /// ``` - /// use std::path::{Path, PathBuf}; - /// - /// let mut p = PathBuf::from("/spirited/away.rs"); - /// - /// p.pop(); - /// assert_eq!(Path::new("/spirited"), p); - /// p.pop(); - /// assert_eq!(Path::new("/"), p); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - pub fn pop(&mut self) -> bool { - match self.parent().map(|p| p.as_u8_slice().len()) { - Some(len) => { - self.inner.truncate(len); - true - } - None => false, - } - } - - /// Updates [`self.file_name`] to `file_name`. - /// - /// If [`self.file_name`] was [`None`], this is equivalent to pushing - /// `file_name`. - /// - /// Otherwise it is equivalent to calling [`pop`] and then pushing - /// `file_name`. The new path will be a sibling of the original path. - /// (That is, it will have the same parent.) - /// - /// [`self.file_name`]: Path::file_name - /// [`pop`]: PathBuf::pop - /// - /// # Examples - /// - /// ``` - /// use std::path::PathBuf; - /// - /// let mut buf = PathBuf::from("/"); - /// assert!(buf.file_name() == None); - /// - /// buf.set_file_name("foo.txt"); - /// assert!(buf == PathBuf::from("/foo.txt")); - /// assert!(buf.file_name().is_some()); - /// - /// buf.set_file_name("bar.txt"); - /// assert!(buf == PathBuf::from("/bar.txt")); - /// - /// buf.set_file_name("baz"); - /// assert!(buf == PathBuf::from("/baz")); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - pub fn set_file_name>(&mut self, file_name: S) { - self._set_file_name(file_name.as_ref()) - } - - fn _set_file_name(&mut self, file_name: &OsStr) { - if self.file_name().is_some() { - let popped = self.pop(); - debug_assert!(popped); - } - self.push(file_name); - } - - /// Updates [`self.extension`] to `Some(extension)` or to `None` if - /// `extension` is empty. - /// - /// Returns `false` and does nothing if [`self.file_name`] is [`None`], - /// returns `true` and updates the extension otherwise. - /// - /// If [`self.extension`] is [`None`], the extension is added; otherwise - /// it is replaced. - /// - /// If `extension` is the empty string, [`self.extension`] will be [`None`] - /// afterwards, not `Some("")`. - /// - /// # Panics - /// - /// Panics if the passed extension contains a path separator (see - /// [`is_separator`]). - /// - /// # Caveats - /// - /// The new `extension` may contain dots and will be used in its entirety, - /// but only the part after the final dot will be reflected in - /// [`self.extension`]. - /// - /// If the file stem contains internal dots and `extension` is empty, part - /// of the old file stem will be considered the new [`self.extension`]. - /// - /// See the examples below. - /// - /// [`self.file_name`]: Path::file_name - /// [`self.extension`]: Path::extension - /// - /// # Examples - /// - /// ``` - /// use std::path::{Path, PathBuf}; - /// - /// let mut p = PathBuf::from("/feel/the"); - /// - /// p.set_extension("force"); - /// assert_eq!(Path::new("/feel/the.force"), p.as_path()); - /// - /// p.set_extension("dark.side"); - /// assert_eq!(Path::new("/feel/the.dark.side"), p.as_path()); - /// - /// p.set_extension("cookie"); - /// assert_eq!(Path::new("/feel/the.dark.cookie"), p.as_path()); - /// - /// p.set_extension(""); - /// assert_eq!(Path::new("/feel/the.dark"), p.as_path()); - /// - /// p.set_extension(""); - /// assert_eq!(Path::new("/feel/the"), p.as_path()); - /// - /// p.set_extension(""); - /// assert_eq!(Path::new("/feel/the"), p.as_path()); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - pub fn set_extension>(&mut self, extension: S) -> bool { - self._set_extension(extension.as_ref()) - } - - fn _set_extension(&mut self, extension: &OsStr) -> bool { - for &b in extension.as_encoded_bytes() { - if b < 128 { - if is_separator(b as char) { - panic!("extension cannot contain path separators: {:?}", extension); - } - } - } - - let file_stem = match self.file_stem() { - None => return false, - Some(f) => f.as_encoded_bytes(), - }; - - // truncate until right after the file stem - let end_file_stem = file_stem[file_stem.len()..].as_ptr().addr(); - let start = self.inner.as_encoded_bytes().as_ptr().addr(); - self.inner.truncate(end_file_stem.wrapping_sub(start)); - - // add the new extension, if any - let new = extension; - if !new.is_empty() { - self.inner.reserve_exact(new.len() + 1); - self.inner.push(OsStr::new(".")); - self.inner.push(new); - } - - true - } - - /// Append [`self.extension`] with `extension`. - /// - /// Returns `false` and does nothing if [`self.file_name`] is [`None`], - /// returns `true` and updates the extension otherwise. - /// - /// # Caveats - /// - /// The appended `extension` may contain dots and will be used in its entirety, - /// but only the part after the final dot will be reflected in - /// [`self.extension`]. - /// - /// See the examples below. - /// - /// [`self.file_name`]: Path::file_name - /// [`self.extension`]: Path::extension - /// - /// # Examples - /// - /// ``` - /// #![feature(path_add_extension)] - /// - /// use std::path::{Path, PathBuf}; - /// - /// let mut p = PathBuf::from("/feel/the"); - /// - /// p.add_extension("formatted"); - /// assert_eq!(Path::new("/feel/the.formatted"), p.as_path()); - /// - /// p.add_extension("dark.side"); - /// assert_eq!(Path::new("/feel/the.formatted.dark.side"), p.as_path()); - /// - /// p.set_extension("cookie"); - /// assert_eq!(Path::new("/feel/the.formatted.dark.cookie"), p.as_path()); - /// - /// p.set_extension(""); - /// assert_eq!(Path::new("/feel/the.formatted.dark"), p.as_path()); - /// - /// p.add_extension(""); - /// assert_eq!(Path::new("/feel/the.formatted.dark"), p.as_path()); - /// ``` - #[unstable(feature = "path_add_extension", issue = "127292")] - pub fn add_extension>(&mut self, extension: S) -> bool { - self._add_extension(extension.as_ref()) - } - - fn _add_extension(&mut self, extension: &OsStr) -> bool { - let file_name = match self.file_name() { - None => return false, - Some(f) => f.as_encoded_bytes(), - }; - - let new = extension; - if !new.is_empty() { - // truncate until right after the file name - // this is necessary for trimming the trailing slash - let end_file_name = file_name[file_name.len()..].as_ptr().addr(); - let start = self.inner.as_encoded_bytes().as_ptr().addr(); - self.inner.truncate(end_file_name.wrapping_sub(start)); - - // append the new extension - self.inner.reserve_exact(new.len() + 1); - self.inner.push(OsStr::new(".")); - self.inner.push(new); - } - - true - } - - /// Yields a mutable reference to the underlying [`OsString`] instance. - /// - /// # Examples - /// - /// ``` - /// use std::path::{Path, PathBuf}; - /// - /// let mut path = PathBuf::from("/foo"); - /// - /// path.push("bar"); - /// assert_eq!(path, Path::new("/foo/bar")); - /// - /// // OsString's `push` does not add a separator. - /// path.as_mut_os_string().push("baz"); - /// assert_eq!(path, Path::new("/foo/barbaz")); - /// ``` - #[stable(feature = "path_as_mut_os_str", since = "1.70.0")] - #[must_use] - #[inline] - pub fn as_mut_os_string(&mut self) -> &mut OsString { - &mut self.inner - } - - /// Consumes the `PathBuf`, yielding its internal [`OsString`] storage. - /// - /// # Examples - /// - /// ``` - /// use std::path::PathBuf; - /// - /// let p = PathBuf::from("/the/head"); - /// let os_str = p.into_os_string(); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - #[must_use = "`self` will be dropped if the result is not used"] - #[inline] - pub fn into_os_string(self) -> OsString { - self.inner - } - - /// Converts this `PathBuf` into a [boxed](Box) [`Path`]. - #[stable(feature = "into_boxed_path", since = "1.20.0")] - #[must_use = "`self` will be dropped if the result is not used"] - #[inline] - pub fn into_boxed_path(self) -> Box { - let rw = Box::into_raw(self.inner.into_boxed_os_str()) as *mut Path; - unsafe { Box::from_raw(rw) } - } - - /// Invokes [`capacity`] on the underlying instance of [`OsString`]. - /// - /// [`capacity`]: OsString::capacity - #[stable(feature = "path_buf_capacity", since = "1.44.0")] - #[must_use] - #[inline] - pub fn capacity(&self) -> usize { - self.inner.capacity() - } - - /// Invokes [`clear`] on the underlying instance of [`OsString`]. - /// - /// [`clear`]: OsString::clear - #[stable(feature = "path_buf_capacity", since = "1.44.0")] - #[inline] - pub fn clear(&mut self) { - self.inner.clear() - } - - /// Invokes [`reserve`] on the underlying instance of [`OsString`]. - /// - /// [`reserve`]: OsString::reserve - #[stable(feature = "path_buf_capacity", since = "1.44.0")] - #[inline] - pub fn reserve(&mut self, additional: usize) { - self.inner.reserve(additional) - } - - /// Invokes [`try_reserve`] on the underlying instance of [`OsString`]. - /// - /// [`try_reserve`]: OsString::try_reserve - #[stable(feature = "try_reserve_2", since = "1.63.0")] - #[inline] - pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> { - self.inner.try_reserve(additional) - } - - /// Invokes [`reserve_exact`] on the underlying instance of [`OsString`]. - /// - /// [`reserve_exact`]: OsString::reserve_exact - #[stable(feature = "path_buf_capacity", since = "1.44.0")] - #[inline] - pub fn reserve_exact(&mut self, additional: usize) { - self.inner.reserve_exact(additional) - } - - /// Invokes [`try_reserve_exact`] on the underlying instance of [`OsString`]. - /// - /// [`try_reserve_exact`]: OsString::try_reserve_exact - #[stable(feature = "try_reserve_2", since = "1.63.0")] - #[inline] - pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> { - self.inner.try_reserve_exact(additional) - } - - /// Invokes [`shrink_to_fit`] on the underlying instance of [`OsString`]. - /// - /// [`shrink_to_fit`]: OsString::shrink_to_fit - #[stable(feature = "path_buf_capacity", since = "1.44.0")] - #[inline] - pub fn shrink_to_fit(&mut self) { - self.inner.shrink_to_fit() - } - - /// Invokes [`shrink_to`] on the underlying instance of [`OsString`]. - /// - /// [`shrink_to`]: OsString::shrink_to - #[stable(feature = "shrink_to", since = "1.56.0")] - #[inline] - pub fn shrink_to(&mut self, min_capacity: usize) { - self.inner.shrink_to(min_capacity) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl Clone for PathBuf { - #[inline] - fn clone(&self) -> Self { - PathBuf { inner: self.inner.clone() } - } - - /// Clones the contents of `source` into `self`. - /// - /// This method is preferred over simply assigning `source.clone()` to `self`, - /// as it avoids reallocation if possible. - #[inline] - fn clone_from(&mut self, source: &Self) { - self.inner.clone_from(&source.inner) - } -} - -#[stable(feature = "box_from_path", since = "1.17.0")] -impl From<&Path> for Box { - /// Creates a boxed [`Path`] from a reference. - /// - /// This will allocate and clone `path` to it. - fn from(path: &Path) -> Box { - let boxed: Box = path.inner.into(); - let rw = Box::into_raw(boxed) as *mut Path; - unsafe { Box::from_raw(rw) } - } -} - -#[stable(feature = "box_from_cow", since = "1.45.0")] -impl From> for Box { - /// Creates a boxed [`Path`] from a clone-on-write pointer. - /// - /// Converting from a `Cow::Owned` does not clone or allocate. - #[inline] - fn from(cow: Cow<'_, Path>) -> Box { - match cow { - Cow::Borrowed(path) => Box::from(path), - Cow::Owned(path) => Box::from(path), - } - } -} - -#[stable(feature = "path_buf_from_box", since = "1.18.0")] -impl From> for PathBuf { - /// Converts a [Box]<[Path]> into a [`PathBuf`]. - /// - /// This conversion does not allocate or copy memory. - #[inline] - fn from(boxed: Box) -> PathBuf { - boxed.into_path_buf() - } -} - -#[stable(feature = "box_from_path_buf", since = "1.20.0")] -impl From for Box { - /// Converts a [`PathBuf`] into a [Box]<[Path]>. - /// - /// This conversion currently should not allocate memory, - /// but this behavior is not guaranteed on all platforms or in all future versions. - #[inline] - fn from(p: PathBuf) -> Box { - p.into_boxed_path() - } -} - -#[stable(feature = "more_box_slice_clone", since = "1.29.0")] -impl Clone for Box { - #[inline] - fn clone(&self) -> Self { - self.to_path_buf().into_boxed_path() - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl> From<&T> for PathBuf { - /// Converts a borrowed [`OsStr`] to a [`PathBuf`]. - /// - /// Allocates a [`PathBuf`] and copies the data into it. - #[inline] - fn from(s: &T) -> PathBuf { - PathBuf::from(s.as_ref().to_os_string()) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl From for PathBuf { - /// Converts an [`OsString`] into a [`PathBuf`]. - /// - /// This conversion does not allocate or copy memory. - #[inline] - fn from(s: OsString) -> PathBuf { - PathBuf { inner: s } - } -} - -#[stable(feature = "from_path_buf_for_os_string", since = "1.14.0")] -impl From for OsString { - /// Converts a [`PathBuf`] into an [`OsString`] - /// - /// This conversion does not allocate or copy memory. - #[inline] - fn from(path_buf: PathBuf) -> OsString { - path_buf.inner - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl From for PathBuf { - /// Converts a [`String`] into a [`PathBuf`] - /// - /// This conversion does not allocate or copy memory. - #[inline] - fn from(s: String) -> PathBuf { - PathBuf::from(OsString::from(s)) - } -} - -#[stable(feature = "path_from_str", since = "1.32.0")] -impl FromStr for PathBuf { - type Err = core::convert::Infallible; - - #[inline] - fn from_str(s: &str) -> Result { - Ok(PathBuf::from(s)) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl> FromIterator

for PathBuf { - fn from_iter>(iter: I) -> PathBuf { - let mut buf = PathBuf::new(); - buf.extend(iter); - buf - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl> Extend

for PathBuf { - fn extend>(&mut self, iter: I) { - iter.into_iter().for_each(move |p| self.push(p.as_ref())); - } - - #[inline] - fn extend_one(&mut self, p: P) { - self.push(p.as_ref()); - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl fmt::Debug for PathBuf { - fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Debug::fmt(&**self, formatter) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl ops::Deref for PathBuf { - type Target = Path; - #[inline] - fn deref(&self) -> &Path { - Path::new(&self.inner) - } -} - -#[stable(feature = "path_buf_deref_mut", since = "1.68.0")] -impl ops::DerefMut for PathBuf { - #[inline] - fn deref_mut(&mut self) -> &mut Path { - Path::from_inner_mut(&mut self.inner) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl Borrow for PathBuf { - #[inline] - fn borrow(&self) -> &Path { - self.deref() - } -} - -#[stable(feature = "default_for_pathbuf", since = "1.17.0")] -impl Default for PathBuf { - #[inline] - fn default() -> Self { - PathBuf::new() - } -} - -#[stable(feature = "cow_from_path", since = "1.6.0")] -impl<'a> From<&'a Path> for Cow<'a, Path> { - /// Creates a clone-on-write pointer from a reference to - /// [`Path`]. - /// - /// This conversion does not clone or allocate. - #[inline] - fn from(s: &'a Path) -> Cow<'a, Path> { - Cow::Borrowed(s) - } -} - -#[stable(feature = "cow_from_path", since = "1.6.0")] -impl<'a> From for Cow<'a, Path> { - /// Creates a clone-on-write pointer from an owned - /// instance of [`PathBuf`]. - /// - /// This conversion does not clone or allocate. - #[inline] - fn from(s: PathBuf) -> Cow<'a, Path> { - Cow::Owned(s) - } -} - -#[stable(feature = "cow_from_pathbuf_ref", since = "1.28.0")] -impl<'a> From<&'a PathBuf> for Cow<'a, Path> { - /// Creates a clone-on-write pointer from a reference to - /// [`PathBuf`]. - /// - /// This conversion does not clone or allocate. - #[inline] - fn from(p: &'a PathBuf) -> Cow<'a, Path> { - Cow::Borrowed(p.as_path()) - } -} - -#[stable(feature = "pathbuf_from_cow_path", since = "1.28.0")] -impl<'a> From> for PathBuf { - /// Converts a clone-on-write pointer to an owned path. - /// - /// Converting from a `Cow::Owned` does not clone or allocate. - #[inline] - fn from(p: Cow<'a, Path>) -> Self { - p.into_owned() - } -} - -#[stable(feature = "shared_from_slice2", since = "1.24.0")] -impl From for Arc { - /// Converts a [`PathBuf`] into an [Arc]<[Path]> by moving the [`PathBuf`] data - /// into a new [`Arc`] buffer. - #[inline] - fn from(s: PathBuf) -> Arc { - let arc: Arc = Arc::from(s.into_os_string()); - unsafe { Arc::from_raw(Arc::into_raw(arc) as *const Path) } - } -} - -#[stable(feature = "shared_from_slice2", since = "1.24.0")] -impl From<&Path> for Arc { - /// Converts a [`Path`] into an [`Arc`] by copying the [`Path`] data into a new [`Arc`] buffer. - #[inline] - fn from(s: &Path) -> Arc { - let arc: Arc = Arc::from(s.as_os_str()); - unsafe { Arc::from_raw(Arc::into_raw(arc) as *const Path) } - } -} - -#[stable(feature = "shared_from_slice2", since = "1.24.0")] -impl From for Rc { - /// Converts a [`PathBuf`] into an [Rc]<[Path]> by moving the [`PathBuf`] data into - /// a new [`Rc`] buffer. - #[inline] - fn from(s: PathBuf) -> Rc { - let rc: Rc = Rc::from(s.into_os_string()); - unsafe { Rc::from_raw(Rc::into_raw(rc) as *const Path) } - } -} - -#[stable(feature = "shared_from_slice2", since = "1.24.0")] -impl From<&Path> for Rc { - /// Converts a [`Path`] into an [`Rc`] by copying the [`Path`] data into a new [`Rc`] buffer. - #[inline] - fn from(s: &Path) -> Rc { - let rc: Rc = Rc::from(s.as_os_str()); - unsafe { Rc::from_raw(Rc::into_raw(rc) as *const Path) } - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl ToOwned for Path { - type Owned = PathBuf; - #[inline] - fn to_owned(&self) -> PathBuf { - self.to_path_buf() - } - #[inline] - fn clone_into(&self, target: &mut PathBuf) { - self.inner.clone_into(&mut target.inner); - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl PartialEq for PathBuf { - #[inline] - fn eq(&self, other: &PathBuf) -> bool { - self.components() == other.components() - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl Hash for PathBuf { - fn hash(&self, h: &mut H) { - self.as_path().hash(h) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl Eq for PathBuf {} - -#[stable(feature = "rust1", since = "1.0.0")] -impl PartialOrd for PathBuf { - #[inline] - fn partial_cmp(&self, other: &PathBuf) -> Option { - Some(compare_components(self.components(), other.components())) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl Ord for PathBuf { - #[inline] - fn cmp(&self, other: &PathBuf) -> cmp::Ordering { - compare_components(self.components(), other.components()) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl AsRef for PathBuf { - #[inline] - fn as_ref(&self) -> &OsStr { - &self.inner[..] - } -} - -/// A slice of a path (akin to [`str`]). -/// -/// This type supports a number of operations for inspecting a path, including -/// breaking the path into its components (separated by `/` on Unix and by either -/// `/` or `\` on Windows), extracting the file name, determining whether the path -/// is absolute, and so on. -/// -/// This is an *unsized* type, meaning that it must always be used behind a -/// pointer like `&` or [`Box`]. For an owned version of this type, -/// see [`PathBuf`]. -/// -/// More details about the overall approach can be found in -/// the [module documentation](self). -/// -/// # Examples -/// -/// ``` -/// use std::path::Path; -/// use std::ffi::OsStr; -/// -/// // Note: this example does work on Windows -/// let path = Path::new("./foo/bar.txt"); -/// -/// let parent = path.parent(); -/// assert_eq!(parent, Some(Path::new("./foo"))); -/// -/// let file_stem = path.file_stem(); -/// assert_eq!(file_stem, Some(OsStr::new("bar"))); -/// -/// let extension = path.extension(); -/// assert_eq!(extension, Some(OsStr::new("txt"))); -/// ``` -#[cfg_attr(not(test), rustc_diagnostic_item = "Path")] -#[stable(feature = "rust1", since = "1.0.0")] -// `Path::new` current implementation relies -// on `Path` being layout-compatible with `OsStr`. -// However, `Path` layout is considered an implementation detail and must not be relied upon. -#[repr(transparent)] -pub struct Path { - inner: OsStr, -} - -/// An error returned from [`Path::strip_prefix`] if the prefix was not found. -/// -/// This `struct` is created by the [`strip_prefix`] method on [`Path`]. -/// See its documentation for more. -/// -/// [`strip_prefix`]: Path::strip_prefix -#[derive(Debug, Clone, PartialEq, Eq)] -#[stable(since = "1.7.0", feature = "strip_prefix")] -pub struct StripPrefixError(()); - -impl Path { - // The following (private!) function allows construction of a path from a u8 - // slice, which is only safe when it is known to follow the OsStr encoding. - unsafe fn from_u8_slice(s: &[u8]) -> &Path { - unsafe { Path::new(OsStr::from_encoded_bytes_unchecked(s)) } - } - // The following (private!) function reveals the byte encoding used for OsStr. - fn as_u8_slice(&self) -> &[u8] { - self.inner.as_encoded_bytes() - } - - /// Directly wraps a string slice as a `Path` slice. - /// - /// This is a cost-free conversion. - /// - /// # Examples - /// - /// ``` - /// use std::path::Path; - /// - /// Path::new("foo.txt"); - /// ``` - /// - /// You can create `Path`s from `String`s, or even other `Path`s: - /// - /// ``` - /// use std::path::Path; - /// - /// let string = String::from("foo.txt"); - /// let from_string = Path::new(&string); - /// let from_path = Path::new(&from_string); - /// assert_eq!(from_string, from_path); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - pub fn new + ?Sized>(s: &S) -> &Path { - unsafe { &*(s.as_ref() as *const OsStr as *const Path) } - } - - fn from_inner_mut(inner: &mut OsStr) -> &mut Path { - // SAFETY: Path is just a wrapper around OsStr, - // therefore converting &mut OsStr to &mut Path is safe. - unsafe { &mut *(inner as *mut OsStr as *mut Path) } - } - - /// Yields the underlying [`OsStr`] slice. - /// - /// # Examples - /// - /// ``` - /// use std::path::Path; - /// - /// let os_str = Path::new("foo.txt").as_os_str(); - /// assert_eq!(os_str, std::ffi::OsStr::new("foo.txt")); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - #[must_use] - #[inline] - pub fn as_os_str(&self) -> &OsStr { - &self.inner - } - - /// Yields a mutable reference to the underlying [`OsStr`] slice. - /// - /// # Examples - /// - /// ``` - /// use std::path::{Path, PathBuf}; - /// - /// let mut path = PathBuf::from("Foo.TXT"); - /// - /// assert_ne!(path, Path::new("foo.txt")); - /// - /// path.as_mut_os_str().make_ascii_lowercase(); - /// assert_eq!(path, Path::new("foo.txt")); - /// ``` - #[stable(feature = "path_as_mut_os_str", since = "1.70.0")] - #[must_use] - #[inline] - pub fn as_mut_os_str(&mut self) -> &mut OsStr { - &mut self.inner - } - - /// Yields a [`&str`] slice if the `Path` is valid unicode. - /// - /// This conversion may entail doing a check for UTF-8 validity. - /// Note that validation is performed because non-UTF-8 strings are - /// perfectly valid for some OS. - /// - /// [`&str`]: str - /// - /// # Examples - /// - /// ``` - /// use std::path::Path; - /// - /// let path = Path::new("foo.txt"); - /// assert_eq!(path.to_str(), Some("foo.txt")); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - #[must_use = "this returns the result of the operation, \ - without modifying the original"] - #[inline] - pub fn to_str(&self) -> Option<&str> { - self.inner.to_str() - } - - /// Converts a `Path` to a [`Cow`]. - /// - /// Any non-Unicode sequences are replaced with - /// [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD]. - /// - /// [U+FFFD]: super::char::REPLACEMENT_CHARACTER - /// - /// # Examples - /// - /// Calling `to_string_lossy` on a `Path` with valid unicode: - /// - /// ``` - /// use std::path::Path; - /// - /// let path = Path::new("foo.txt"); - /// assert_eq!(path.to_string_lossy(), "foo.txt"); - /// ``` - /// - /// Had `path` contained invalid unicode, the `to_string_lossy` call might - /// have returned `"fo�.txt"`. - #[stable(feature = "rust1", since = "1.0.0")] - #[must_use = "this returns the result of the operation, \ - without modifying the original"] - #[inline] - pub fn to_string_lossy(&self) -> Cow<'_, str> { - self.inner.to_string_lossy() - } - - /// Converts a `Path` to an owned [`PathBuf`]. - /// - /// # Examples - /// - /// ``` - /// use std::path::{Path, PathBuf}; - /// - /// let path_buf = Path::new("foo.txt").to_path_buf(); - /// assert_eq!(path_buf, PathBuf::from("foo.txt")); - /// ``` - #[rustc_conversion_suggestion] - #[must_use = "this returns the result of the operation, \ - without modifying the original"] - #[stable(feature = "rust1", since = "1.0.0")] - pub fn to_path_buf(&self) -> PathBuf { - PathBuf::from(self.inner.to_os_string()) - } - - /// Returns `true` if the `Path` is absolute, i.e., if it is independent of - /// the current directory. - /// - /// * On Unix, a path is absolute if it starts with the root, so - /// `is_absolute` and [`has_root`] are equivalent. - /// - /// * On Windows, a path is absolute if it has a prefix and starts with the - /// root: `c:\windows` is absolute, while `c:temp` and `\temp` are not. - /// - /// # Examples - /// - /// ``` - /// use std::path::Path; - /// - /// assert!(!Path::new("foo.txt").is_absolute()); - /// ``` - /// - /// [`has_root`]: Path::has_root - #[stable(feature = "rust1", since = "1.0.0")] - #[must_use] - #[allow(deprecated)] - pub fn is_absolute(&self) -> bool { - if cfg!(target_os = "redox") { - // FIXME: Allow Redox prefixes - self.has_root() || has_redox_scheme(self.as_u8_slice()) - } else { - self.has_root() && (cfg!(any(unix, target_os = "wasi")) || self.prefix().is_some()) - } - } - - /// Returns `true` if the `Path` is relative, i.e., not absolute. - /// - /// See [`is_absolute`]'s documentation for more details. - /// - /// # Examples - /// - /// ``` - /// use std::path::Path; - /// - /// assert!(Path::new("foo.txt").is_relative()); - /// ``` - /// - /// [`is_absolute`]: Path::is_absolute - #[stable(feature = "rust1", since = "1.0.0")] - #[must_use] - #[inline] - pub fn is_relative(&self) -> bool { - !self.is_absolute() - } - - fn prefix(&self) -> Option> { - self.components().prefix - } - - /// Returns `true` if the `Path` has a root. - /// - /// * On Unix, a path has a root if it begins with `/`. - /// - /// * On Windows, a path has a root if it: - /// * has no prefix and begins with a separator, e.g., `\windows` - /// * has a prefix followed by a separator, e.g., `c:\windows` but not `c:windows` - /// * has any non-disk prefix, e.g., `\\server\share` - /// - /// # Examples - /// - /// ``` - /// use std::path::Path; - /// - /// assert!(Path::new("/etc/passwd").has_root()); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - #[must_use] - #[inline] - pub fn has_root(&self) -> bool { - self.components().has_root() - } - - /// Returns the `Path` without its final component, if there is one. - /// - /// This means it returns `Some("")` for relative paths with one component. - /// - /// Returns [`None`] if the path terminates in a root or prefix, or if it's - /// the empty string. - /// - /// # Examples - /// - /// ``` - /// use std::path::Path; - /// - /// let path = Path::new("/foo/bar"); - /// let parent = path.parent().unwrap(); - /// assert_eq!(parent, Path::new("/foo")); - /// - /// let grand_parent = parent.parent().unwrap(); - /// assert_eq!(grand_parent, Path::new("/")); - /// assert_eq!(grand_parent.parent(), None); - /// - /// let relative_path = Path::new("foo/bar"); - /// let parent = relative_path.parent(); - /// assert_eq!(parent, Some(Path::new("foo"))); - /// let grand_parent = parent.and_then(Path::parent); - /// assert_eq!(grand_parent, Some(Path::new(""))); - /// let great_grand_parent = grand_parent.and_then(Path::parent); - /// assert_eq!(great_grand_parent, None); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - #[doc(alias = "dirname")] - #[must_use] - pub fn parent(&self) -> Option<&Path> { - let mut comps = self.components(); - let comp = comps.next_back(); - comp.and_then(|p| match p { - Component::Normal(_) | Component::CurDir | Component::ParentDir => { - Some(comps.as_path()) - } - _ => None, - }) - } - - /// Produces an iterator over `Path` and its ancestors. - /// - /// The iterator will yield the `Path` that is returned if the [`parent`] method is used zero - /// or more times. If the [`parent`] method returns [`None`], the iterator will do likewise. - /// The iterator will always yield at least one value, namely `Some(&self)`. Next it will yield - /// `&self.parent()`, `&self.parent().and_then(Path::parent)` and so on. - /// - /// # Examples - /// - /// ``` - /// use std::path::Path; - /// - /// let mut ancestors = Path::new("/foo/bar").ancestors(); - /// assert_eq!(ancestors.next(), Some(Path::new("/foo/bar"))); - /// assert_eq!(ancestors.next(), Some(Path::new("/foo"))); - /// assert_eq!(ancestors.next(), Some(Path::new("/"))); - /// assert_eq!(ancestors.next(), None); - /// - /// let mut ancestors = Path::new("../foo/bar").ancestors(); - /// assert_eq!(ancestors.next(), Some(Path::new("../foo/bar"))); - /// assert_eq!(ancestors.next(), Some(Path::new("../foo"))); - /// assert_eq!(ancestors.next(), Some(Path::new(".."))); - /// assert_eq!(ancestors.next(), Some(Path::new(""))); - /// assert_eq!(ancestors.next(), None); - /// ``` - /// - /// [`parent`]: Path::parent - #[stable(feature = "path_ancestors", since = "1.28.0")] - #[inline] - pub fn ancestors(&self) -> Ancestors<'_> { - Ancestors { next: Some(&self) } - } - - /// Returns the final component of the `Path`, if there is one. - /// - /// If the path is a normal file, this is the file name. If it's the path of a directory, this - /// is the directory name. - /// - /// Returns [`None`] if the path terminates in `..`. - /// - /// # Examples - /// - /// ``` - /// use std::path::Path; - /// use std::ffi::OsStr; - /// - /// assert_eq!(Some(OsStr::new("bin")), Path::new("/usr/bin/").file_name()); - /// assert_eq!(Some(OsStr::new("foo.txt")), Path::new("tmp/foo.txt").file_name()); - /// assert_eq!(Some(OsStr::new("foo.txt")), Path::new("foo.txt/.").file_name()); - /// assert_eq!(Some(OsStr::new("foo.txt")), Path::new("foo.txt/.//").file_name()); - /// assert_eq!(None, Path::new("foo.txt/..").file_name()); - /// assert_eq!(None, Path::new("/").file_name()); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - #[doc(alias = "basename")] - #[must_use] - pub fn file_name(&self) -> Option<&OsStr> { - self.components().next_back().and_then(|p| match p { - Component::Normal(p) => Some(p), - _ => None, - }) - } - - /// Returns a path that, when joined onto `base`, yields `self`. - /// - /// # Errors - /// - /// If `base` is not a prefix of `self` (i.e., [`starts_with`] - /// returns `false`), returns [`Err`]. - /// - /// [`starts_with`]: Path::starts_with - /// - /// # Examples - /// - /// ``` - /// use std::path::{Path, PathBuf}; - /// - /// let path = Path::new("/test/haha/foo.txt"); - /// - /// assert_eq!(path.strip_prefix("/"), Ok(Path::new("test/haha/foo.txt"))); - /// assert_eq!(path.strip_prefix("/test"), Ok(Path::new("haha/foo.txt"))); - /// assert_eq!(path.strip_prefix("/test/"), Ok(Path::new("haha/foo.txt"))); - /// assert_eq!(path.strip_prefix("/test/haha/foo.txt"), Ok(Path::new(""))); - /// assert_eq!(path.strip_prefix("/test/haha/foo.txt/"), Ok(Path::new(""))); - /// - /// assert!(path.strip_prefix("test").is_err()); - /// assert!(path.strip_prefix("/haha").is_err()); - /// - /// let prefix = PathBuf::from("/test/"); - /// assert_eq!(path.strip_prefix(prefix), Ok(Path::new("haha/foo.txt"))); - /// ``` - #[stable(since = "1.7.0", feature = "path_strip_prefix")] - pub fn strip_prefix

(&self, base: P) -> Result<&Path, StripPrefixError> - where - P: AsRef, - { - self._strip_prefix(base.as_ref()) - } - - fn _strip_prefix(&self, base: &Path) -> Result<&Path, StripPrefixError> { - iter_after(self.components(), base.components()) - .map(|c| c.as_path()) - .ok_or(StripPrefixError(())) - } - - /// Determines whether `base` is a prefix of `self`. - /// - /// Only considers whole path components to match. - /// - /// # Examples - /// - /// ``` - /// use std::path::Path; - /// - /// let path = Path::new("/etc/passwd"); - /// - /// assert!(path.starts_with("/etc")); - /// assert!(path.starts_with("/etc/")); - /// assert!(path.starts_with("/etc/passwd")); - /// assert!(path.starts_with("/etc/passwd/")); // extra slash is okay - /// assert!(path.starts_with("/etc/passwd///")); // multiple extra slashes are okay - /// - /// assert!(!path.starts_with("/e")); - /// assert!(!path.starts_with("/etc/passwd.txt")); - /// - /// assert!(!Path::new("/etc/foo.rs").starts_with("/etc/foo")); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - #[must_use] - pub fn starts_with>(&self, base: P) -> bool { - self._starts_with(base.as_ref()) - } - - fn _starts_with(&self, base: &Path) -> bool { - iter_after(self.components(), base.components()).is_some() - } - - /// Determines whether `child` is a suffix of `self`. - /// - /// Only considers whole path components to match. - /// - /// # Examples - /// - /// ``` - /// use std::path::Path; - /// - /// let path = Path::new("/etc/resolv.conf"); - /// - /// assert!(path.ends_with("resolv.conf")); - /// assert!(path.ends_with("etc/resolv.conf")); - /// assert!(path.ends_with("/etc/resolv.conf")); - /// - /// assert!(!path.ends_with("/resolv.conf")); - /// assert!(!path.ends_with("conf")); // use .extension() instead - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - #[must_use] - pub fn ends_with>(&self, child: P) -> bool { - self._ends_with(child.as_ref()) - } - - fn _ends_with(&self, child: &Path) -> bool { - iter_after(self.components().rev(), child.components().rev()).is_some() - } - - /// Extracts the stem (non-extension) portion of [`self.file_name`]. - /// - /// [`self.file_name`]: Path::file_name - /// - /// The stem is: - /// - /// * [`None`], if there is no file name; - /// * The entire file name if there is no embedded `.`; - /// * The entire file name if the file name begins with `.` and has no other `.`s within; - /// * Otherwise, the portion of the file name before the final `.` - /// - /// # Examples - /// - /// ``` - /// use std::path::Path; - /// - /// assert_eq!("foo", Path::new("foo.rs").file_stem().unwrap()); - /// assert_eq!("foo.tar", Path::new("foo.tar.gz").file_stem().unwrap()); - /// ``` - /// - /// # See Also - /// This method is similar to [`Path::file_prefix`], which extracts the portion of the file name - /// before the *first* `.` - /// - /// [`Path::file_prefix`]: Path::file_prefix - /// - #[stable(feature = "rust1", since = "1.0.0")] - #[must_use] - pub fn file_stem(&self) -> Option<&OsStr> { - self.file_name().map(rsplit_file_at_dot).and_then(|(before, after)| before.or(after)) - } - - /// Extracts the prefix of [`self.file_name`]. - /// - /// The prefix is: - /// - /// * [`None`], if there is no file name; - /// * The entire file name if there is no embedded `.`; - /// * The portion of the file name before the first non-beginning `.`; - /// * The entire file name if the file name begins with `.` and has no other `.`s within; - /// * The portion of the file name before the second `.` if the file name begins with `.` - /// - /// [`self.file_name`]: Path::file_name - /// - /// # Examples - /// - /// ``` - /// # #![feature(path_file_prefix)] - /// use std::path::Path; - /// - /// assert_eq!("foo", Path::new("foo.rs").file_prefix().unwrap()); - /// assert_eq!("foo", Path::new("foo.tar.gz").file_prefix().unwrap()); - /// ``` - /// - /// # See Also - /// This method is similar to [`Path::file_stem`], which extracts the portion of the file name - /// before the *last* `.` - /// - /// [`Path::file_stem`]: Path::file_stem - /// - #[unstable(feature = "path_file_prefix", issue = "86319")] - #[must_use] - pub fn file_prefix(&self) -> Option<&OsStr> { - self.file_name().map(split_file_at_dot).and_then(|(before, _after)| Some(before)) - } - - /// Extracts the extension (without the leading dot) of [`self.file_name`], if possible. - /// - /// The extension is: - /// - /// * [`None`], if there is no file name; - /// * [`None`], if there is no embedded `.`; - /// * [`None`], if the file name begins with `.` and has no other `.`s within; - /// * Otherwise, the portion of the file name after the final `.` - /// - /// [`self.file_name`]: Path::file_name - /// - /// # Examples - /// - /// ``` - /// use std::path::Path; - /// - /// assert_eq!("rs", Path::new("foo.rs").extension().unwrap()); - /// assert_eq!("gz", Path::new("foo.tar.gz").extension().unwrap()); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - #[must_use] - pub fn extension(&self) -> Option<&OsStr> { - self.file_name().map(rsplit_file_at_dot).and_then(|(before, after)| before.and(after)) - } - - /// Creates an owned [`PathBuf`] with `path` adjoined to `self`. - /// - /// If `path` is absolute, it replaces the current path. - /// - /// See [`PathBuf::push`] for more details on what it means to adjoin a path. - /// - /// # Examples - /// - /// ``` - /// use std::path::{Path, PathBuf}; - /// - /// assert_eq!(Path::new("/etc").join("passwd"), PathBuf::from("/etc/passwd")); - /// assert_eq!(Path::new("/etc").join("/bin/sh"), PathBuf::from("/bin/sh")); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - #[must_use] - pub fn join>(&self, path: P) -> PathBuf { - self._join(path.as_ref()) - } - - fn _join(&self, path: &Path) -> PathBuf { - let mut buf = self.to_path_buf(); - buf.push(path); - buf - } - - /// Creates an owned [`PathBuf`] like `self` but with the given file name. - /// - /// See [`PathBuf::set_file_name`] for more details. - /// - /// # Examples - /// - /// ``` - /// use std::path::{Path, PathBuf}; - /// - /// let path = Path::new("/tmp/foo.png"); - /// assert_eq!(path.with_file_name("bar"), PathBuf::from("/tmp/bar")); - /// assert_eq!(path.with_file_name("bar.txt"), PathBuf::from("/tmp/bar.txt")); - /// - /// let path = Path::new("/tmp"); - /// assert_eq!(path.with_file_name("var"), PathBuf::from("/var")); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - #[must_use] - pub fn with_file_name>(&self, file_name: S) -> PathBuf { - self._with_file_name(file_name.as_ref()) - } - - fn _with_file_name(&self, file_name: &OsStr) -> PathBuf { - let mut buf = self.to_path_buf(); - buf.set_file_name(file_name); - buf - } - - /// Creates an owned [`PathBuf`] like `self` but with the given extension. - /// - /// See [`PathBuf::set_extension`] for more details. - /// - /// # Examples - /// - /// ``` - /// use std::path::{Path, PathBuf}; - /// - /// let path = Path::new("foo.rs"); - /// assert_eq!(path.with_extension("txt"), PathBuf::from("foo.txt")); - /// - /// let path = Path::new("foo.tar.gz"); - /// assert_eq!(path.with_extension(""), PathBuf::from("foo.tar")); - /// assert_eq!(path.with_extension("xz"), PathBuf::from("foo.tar.xz")); - /// assert_eq!(path.with_extension("").with_extension("txt"), PathBuf::from("foo.txt")); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - pub fn with_extension>(&self, extension: S) -> PathBuf { - self._with_extension(extension.as_ref()) - } - - fn _with_extension(&self, extension: &OsStr) -> PathBuf { - let self_len = self.as_os_str().len(); - let self_bytes = self.as_os_str().as_encoded_bytes(); - - let (new_capacity, slice_to_copy) = match self.extension() { - None => { - // Enough capacity for the extension and the dot - let capacity = self_len + extension.len() + 1; - let whole_path = self_bytes; - (capacity, whole_path) - } - Some(previous_extension) => { - let capacity = self_len + extension.len() - previous_extension.len(); - let path_till_dot = &self_bytes[..self_len - previous_extension.len()]; - (capacity, path_till_dot) - } - }; - - let mut new_path = PathBuf::with_capacity(new_capacity); - new_path.inner.extend_from_slice(slice_to_copy); - new_path.set_extension(extension); - new_path - } - - /// Creates an owned [`PathBuf`] like `self` but with the extension added. - /// - /// See [`PathBuf::add_extension`] for more details. - /// - /// # Examples - /// - /// ``` - /// #![feature(path_add_extension)] - /// - /// use std::path::{Path, PathBuf}; - /// - /// let path = Path::new("foo.rs"); - /// assert_eq!(path.with_added_extension("txt"), PathBuf::from("foo.rs.txt")); - /// - /// let path = Path::new("foo.tar.gz"); - /// assert_eq!(path.with_added_extension(""), PathBuf::from("foo.tar.gz")); - /// assert_eq!(path.with_added_extension("xz"), PathBuf::from("foo.tar.gz.xz")); - /// assert_eq!(path.with_added_extension("").with_added_extension("txt"), PathBuf::from("foo.tar.gz.txt")); - /// ``` - #[unstable(feature = "path_add_extension", issue = "127292")] - pub fn with_added_extension>(&self, extension: S) -> PathBuf { - let mut new_path = self.to_path_buf(); - new_path.add_extension(extension); - new_path - } - - /// Produces an iterator over the [`Component`]s of the path. - /// - /// When parsing the path, there is a small amount of normalization: - /// - /// * Repeated separators are ignored, so `a/b` and `a//b` both have - /// `a` and `b` as components. - /// - /// * Occurrences of `.` are normalized away, except if they are at the - /// beginning of the path. For example, `a/./b`, `a/b/`, `a/b/.` and - /// `a/b` all have `a` and `b` as components, but `./a/b` starts with - /// an additional [`CurDir`] component. - /// - /// * A trailing slash is normalized away, `/a/b` and `/a/b/` are equivalent. - /// - /// Note that no other normalization takes place; in particular, `a/c` - /// and `a/b/../c` are distinct, to account for the possibility that `b` - /// is a symbolic link (so its parent isn't `a`). - /// - /// # Examples - /// - /// ``` - /// use std::path::{Path, Component}; - /// use std::ffi::OsStr; - /// - /// let mut components = Path::new("/tmp/foo.txt").components(); - /// - /// assert_eq!(components.next(), Some(Component::RootDir)); - /// assert_eq!(components.next(), Some(Component::Normal(OsStr::new("tmp")))); - /// assert_eq!(components.next(), Some(Component::Normal(OsStr::new("foo.txt")))); - /// assert_eq!(components.next(), None) - /// ``` - /// - /// [`CurDir`]: Component::CurDir - #[stable(feature = "rust1", since = "1.0.0")] - pub fn components(&self) -> Components<'_> { - let prefix = parse_prefix(self.as_os_str()); - Components { - path: self.as_u8_slice(), - prefix, - has_physical_root: has_physical_root(self.as_u8_slice(), prefix) - || has_redox_scheme(self.as_u8_slice()), - front: State::Prefix, - back: State::Body, - } - } +#[stable(feature = "rust1", since = "1.0.0")] +pub use alloc::path::PathBuf; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::path::{ + is_separator, Ancestors, Component, Components, Display, Iter, Path, Prefix, PrefixComponent, + StripPrefixError, MAIN_SEPARATOR, MAIN_SEPARATOR_STR, +}; - /// Produces an iterator over the path's components viewed as [`OsStr`] - /// slices. - /// - /// For more information about the particulars of how the path is separated - /// into components, see [`components`]. - /// - /// [`components`]: Path::components - /// - /// # Examples - /// - /// ``` - /// use std::path::{self, Path}; - /// use std::ffi::OsStr; - /// - /// let mut it = Path::new("/tmp/foo.txt").iter(); - /// assert_eq!(it.next(), Some(OsStr::new(&path::MAIN_SEPARATOR.to_string()))); - /// assert_eq!(it.next(), Some(OsStr::new("tmp"))); - /// assert_eq!(it.next(), Some(OsStr::new("foo.txt"))); - /// assert_eq!(it.next(), None) - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - #[inline] - pub fn iter(&self) -> Iter<'_> { - Iter { inner: self.components() } - } +use crate::{fs, io, sys}; - /// Returns an object that implements [`Display`] for safely printing paths - /// that may contain non-Unicode data. This may perform lossy conversion, - /// depending on the platform. If you would like an implementation which - /// escapes the path please use [`Debug`] instead. - /// - /// [`Display`]: fmt::Display - /// [`Debug`]: fmt::Debug - /// - /// # Examples - /// - /// ``` - /// use std::path::Path; - /// - /// let path = Path::new("/tmp/foo.rs"); - /// - /// println!("{}", path.display()); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - #[must_use = "this does not display the path, \ - it returns an object that can be displayed"] - #[inline] - pub fn display(&self) -> Display<'_> { - Display { inner: self.inner.display() } - } +//////////////////////////////////////////////////////////////////////////////// +// GENERAL NOTES +//////////////////////////////////////////////////////////////////////////////// +// +// Parsing in this module is done by directly transmuting OsStr to [u8] slices, +// taking advantage of the fact that OsStr always encodes ASCII characters +// as-is. Eventually, this transmutation should be replaced by direct uses of +// OsStr APIs for parsing, but it will take a while for those to become +// available. +impl Path { /// Queries the file system to get information about a file, directory, etc. /// /// This function will traverse symbolic links to query information about the @@ -2858,6 +107,7 @@ impl Path { /// let metadata = path.metadata().expect("metadata call failed"); /// println!("{:?}", metadata.file_type()); /// ``` + #[rustc_allow_incoherent_impl] #[stable(feature = "path_ext", since = "1.5.0")] #[inline] pub fn metadata(&self) -> io::Result { @@ -2877,6 +127,7 @@ impl Path { /// let metadata = path.symlink_metadata().expect("symlink_metadata call failed"); /// println!("{:?}", metadata.file_type()); /// ``` + #[rustc_allow_incoherent_impl] #[stable(feature = "path_ext", since = "1.5.0")] #[inline] pub fn symlink_metadata(&self) -> io::Result { @@ -2896,6 +147,7 @@ impl Path { /// let path = Path::new("/foo/test/../test/bar.rs"); /// assert_eq!(path.canonicalize().unwrap(), PathBuf::from("/foo/test/bar.rs")); /// ``` + #[rustc_allow_incoherent_impl] #[stable(feature = "path_ext", since = "1.5.0")] #[inline] pub fn canonicalize(&self) -> io::Result { @@ -2914,6 +166,7 @@ impl Path { /// let path = Path::new("/laputa/sky_castle.rs"); /// let path_link = path.read_link().expect("read_link call failed"); /// ``` + #[rustc_allow_incoherent_impl] #[stable(feature = "path_ext", since = "1.5.0")] #[inline] pub fn read_link(&self) -> io::Result { @@ -2939,6 +192,7 @@ impl Path { /// } /// } /// ``` + #[rustc_allow_incoherent_impl] #[stable(feature = "path_ext", since = "1.5.0")] #[inline] pub fn read_dir(&self) -> io::Result { @@ -2969,6 +223,7 @@ impl Path { /// check errors, call [`Path::try_exists`]. /// /// [`try_exists()`]: Self::try_exists + #[rustc_allow_incoherent_impl] #[stable(feature = "path_ext", since = "1.5.0")] #[must_use] #[inline] @@ -3002,6 +257,7 @@ impl Path { /// ``` /// /// [`exists()`]: Self::exists + #[rustc_allow_incoherent_impl] #[stable(feature = "path_try_exists", since = "1.63.0")] #[inline] pub fn try_exists(&self) -> io::Result { @@ -3035,6 +291,7 @@ impl Path { /// it. Only using `is_file` can break workflows like `diff <( prog_a )` on /// a Unix-like system for example. See [`fs::File::open`] or /// [`fs::OpenOptions::open`] for more information. + #[rustc_allow_incoherent_impl] #[stable(feature = "path_ext", since = "1.5.0")] #[must_use] pub fn is_file(&self) -> bool { @@ -3062,6 +319,7 @@ impl Path { /// This is a convenience function that coerces errors to false. If you want to /// check errors, call [`fs::metadata`] and handle its [`Result`]. Then call /// [`fs::Metadata::is_dir`] if it was [`Ok`]. + #[rustc_allow_incoherent_impl] #[stable(feature = "path_ext", since = "1.5.0")] #[must_use] pub fn is_dir(&self) -> bool { @@ -3078,6 +336,7 @@ impl Path { /// /// # Examples /// + #[rustc_allow_incoherent_impl] #[cfg_attr(unix, doc = "```no_run")] #[cfg_attr(not(unix), doc = "```ignore")] /// use std::path::Path; @@ -3099,348 +358,6 @@ impl Path { pub fn is_symlink(&self) -> bool { fs::symlink_metadata(self).map(|m| m.is_symlink()).unwrap_or(false) } - - /// Converts a [`Box`](Box) into a [`PathBuf`] without copying or - /// allocating. - #[stable(feature = "into_boxed_path", since = "1.20.0")] - #[must_use = "`self` will be dropped if the result is not used"] - pub fn into_path_buf(self: Box) -> PathBuf { - let rw = Box::into_raw(self) as *mut OsStr; - let inner = unsafe { Box::from_raw(rw) }; - PathBuf { inner: OsString::from(inner) } - } -} - -#[unstable(feature = "clone_to_uninit", issue = "126799")] -unsafe impl CloneToUninit for Path { - #[inline] - #[cfg_attr(debug_assertions, track_caller)] - unsafe fn clone_to_uninit(&self, dst: *mut Self) { - // SAFETY: Path is just a wrapper around OsStr - unsafe { self.inner.clone_to_uninit(core::ptr::addr_of_mut!((*dst).inner)) } - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl AsRef for Path { - #[inline] - fn as_ref(&self) -> &OsStr { - &self.inner - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl fmt::Debug for Path { - fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Debug::fmt(&self.inner, formatter) - } -} - -/// Helper struct for safely printing paths with [`format!`] and `{}`. -/// -/// A [`Path`] might contain non-Unicode data. This `struct` implements the -/// [`Display`] trait in a way that mitigates that. It is created by the -/// [`display`](Path::display) method on [`Path`]. This may perform lossy -/// conversion, depending on the platform. If you would like an implementation -/// which escapes the path please use [`Debug`] instead. -/// -/// # Examples -/// -/// ``` -/// use std::path::Path; -/// -/// let path = Path::new("/tmp/foo.rs"); -/// -/// println!("{}", path.display()); -/// ``` -/// -/// [`Display`]: fmt::Display -/// [`format!`]: crate::format -#[stable(feature = "rust1", since = "1.0.0")] -pub struct Display<'a> { - inner: os_str::Display<'a>, -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl fmt::Debug for Display<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Debug::fmt(&self.inner, f) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl fmt::Display for Display<'_> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(&self.inner, f) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl PartialEq for Path { - #[inline] - fn eq(&self, other: &Path) -> bool { - self.components() == other.components() - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl Hash for Path { - fn hash(&self, h: &mut H) { - let bytes = self.as_u8_slice(); - let (prefix_len, verbatim) = match parse_prefix(&self.inner) { - Some(prefix) => { - prefix.hash(h); - (prefix.len(), prefix.is_verbatim()) - } - None => (0, false), - }; - let bytes = &bytes[prefix_len..]; - - let mut component_start = 0; - // track some extra state to avoid prefix collisions. - // ["foo", "bar"] and ["foobar"], will have the same payload bytes - // but result in different chunk_bits - let mut chunk_bits: usize = 0; - - for i in 0..bytes.len() { - let is_sep = if verbatim { is_verbatim_sep(bytes[i]) } else { is_sep_byte(bytes[i]) }; - if is_sep { - if i > component_start { - let to_hash = &bytes[component_start..i]; - chunk_bits = chunk_bits.wrapping_add(to_hash.len()); - chunk_bits = chunk_bits.rotate_right(2); - h.write(to_hash); - } - - // skip over separator and optionally a following CurDir item - // since components() would normalize these away. - component_start = i + 1; - - let tail = &bytes[component_start..]; - - if !verbatim { - component_start += match tail { - [b'.'] => 1, - [b'.', sep @ _, ..] if is_sep_byte(*sep) => 1, - _ => 0, - }; - } - } - } - - if component_start < bytes.len() { - let to_hash = &bytes[component_start..]; - chunk_bits = chunk_bits.wrapping_add(to_hash.len()); - chunk_bits = chunk_bits.rotate_right(2); - h.write(to_hash); - } - - h.write_usize(chunk_bits); - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl Eq for Path {} - -#[stable(feature = "rust1", since = "1.0.0")] -impl PartialOrd for Path { - #[inline] - fn partial_cmp(&self, other: &Path) -> Option { - Some(compare_components(self.components(), other.components())) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl Ord for Path { - #[inline] - fn cmp(&self, other: &Path) -> cmp::Ordering { - compare_components(self.components(), other.components()) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl AsRef for Path { - #[inline] - fn as_ref(&self) -> &Path { - self - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl AsRef for OsStr { - #[inline] - fn as_ref(&self) -> &Path { - Path::new(self) - } -} - -#[stable(feature = "cow_os_str_as_ref_path", since = "1.8.0")] -impl AsRef for Cow<'_, OsStr> { - #[inline] - fn as_ref(&self) -> &Path { - Path::new(self) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl AsRef for OsString { - #[inline] - fn as_ref(&self) -> &Path { - Path::new(self) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl AsRef for str { - #[inline] - fn as_ref(&self) -> &Path { - Path::new(self) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl AsRef for String { - #[inline] - fn as_ref(&self) -> &Path { - Path::new(self) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl AsRef for PathBuf { - #[inline] - fn as_ref(&self) -> &Path { - self - } -} - -#[stable(feature = "path_into_iter", since = "1.6.0")] -impl<'a> IntoIterator for &'a PathBuf { - type Item = &'a OsStr; - type IntoIter = Iter<'a>; - #[inline] - fn into_iter(self) -> Iter<'a> { - self.iter() - } -} - -#[stable(feature = "path_into_iter", since = "1.6.0")] -impl<'a> IntoIterator for &'a Path { - type Item = &'a OsStr; - type IntoIter = Iter<'a>; - #[inline] - fn into_iter(self) -> Iter<'a> { - self.iter() - } -} - -macro_rules! impl_cmp { - (<$($life:lifetime),*> $lhs:ty, $rhs: ty) => { - #[stable(feature = "partialeq_path", since = "1.6.0")] - impl<$($life),*> PartialEq<$rhs> for $lhs { - #[inline] - fn eq(&self, other: &$rhs) -> bool { - ::eq(self, other) - } - } - - #[stable(feature = "partialeq_path", since = "1.6.0")] - impl<$($life),*> PartialEq<$lhs> for $rhs { - #[inline] - fn eq(&self, other: &$lhs) -> bool { - ::eq(self, other) - } - } - - #[stable(feature = "cmp_path", since = "1.8.0")] - impl<$($life),*> PartialOrd<$rhs> for $lhs { - #[inline] - fn partial_cmp(&self, other: &$rhs) -> Option { - ::partial_cmp(self, other) - } - } - - #[stable(feature = "cmp_path", since = "1.8.0")] - impl<$($life),*> PartialOrd<$lhs> for $rhs { - #[inline] - fn partial_cmp(&self, other: &$lhs) -> Option { - ::partial_cmp(self, other) - } - } - }; -} - -impl_cmp!(<> PathBuf, Path); -impl_cmp!(<'a> PathBuf, &'a Path); -impl_cmp!(<'a> Cow<'a, Path>, Path); -impl_cmp!(<'a, 'b> Cow<'a, Path>, &'b Path); -impl_cmp!(<'a> Cow<'a, Path>, PathBuf); - -macro_rules! impl_cmp_os_str { - (<$($life:lifetime),*> $lhs:ty, $rhs: ty) => { - #[stable(feature = "cmp_path", since = "1.8.0")] - impl<$($life),*> PartialEq<$rhs> for $lhs { - #[inline] - fn eq(&self, other: &$rhs) -> bool { - ::eq(self, other.as_ref()) - } - } - - #[stable(feature = "cmp_path", since = "1.8.0")] - impl<$($life),*> PartialEq<$lhs> for $rhs { - #[inline] - fn eq(&self, other: &$lhs) -> bool { - ::eq(self.as_ref(), other) - } - } - - #[stable(feature = "cmp_path", since = "1.8.0")] - impl<$($life),*> PartialOrd<$rhs> for $lhs { - #[inline] - fn partial_cmp(&self, other: &$rhs) -> Option { - ::partial_cmp(self, other.as_ref()) - } - } - - #[stable(feature = "cmp_path", since = "1.8.0")] - impl<$($life),*> PartialOrd<$lhs> for $rhs { - #[inline] - fn partial_cmp(&self, other: &$lhs) -> Option { - ::partial_cmp(self.as_ref(), other) - } - } - }; -} - -impl_cmp_os_str!(<> PathBuf, OsStr); -impl_cmp_os_str!(<'a> PathBuf, &'a OsStr); -impl_cmp_os_str!(<'a> PathBuf, Cow<'a, OsStr>); -impl_cmp_os_str!(<> PathBuf, OsString); -impl_cmp_os_str!(<> Path, OsStr); -impl_cmp_os_str!(<'a> Path, &'a OsStr); -impl_cmp_os_str!(<'a> Path, Cow<'a, OsStr>); -impl_cmp_os_str!(<> Path, OsString); -impl_cmp_os_str!(<'a> &'a Path, OsStr); -impl_cmp_os_str!(<'a, 'b> &'a Path, Cow<'b, OsStr>); -impl_cmp_os_str!(<'a> &'a Path, OsString); -impl_cmp_os_str!(<'a> Cow<'a, Path>, OsStr); -impl_cmp_os_str!(<'a, 'b> Cow<'a, Path>, &'b OsStr); -impl_cmp_os_str!(<'a> Cow<'a, Path>, OsString); - -#[stable(since = "1.7.0", feature = "strip_prefix")] -impl fmt::Display for StripPrefixError { - #[allow(deprecated, deprecated_in_future)] - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.description().fmt(f) - } -} - -#[stable(since = "1.7.0", feature = "strip_prefix")] -impl Error for StripPrefixError { - #[allow(deprecated)] - fn description(&self) -> &str { - "prefix not found" - } } /// Makes the path absolute without accessing the filesystem. diff --git a/library/std/src/path/tests.rs b/library/std/src/path/tests.rs index 6436872087d6c..58b78ea6c4015 100644 --- a/library/std/src/path/tests.rs +++ b/library/std/src/path/tests.rs @@ -1,1862 +1,4 @@ -use core::hint::black_box; - use super::*; -use crate::collections::{BTreeSet, HashSet}; -use crate::hash::DefaultHasher; -use crate::mem::MaybeUninit; -use crate::ptr; - -#[allow(unknown_lints, unused_macro_rules)] -macro_rules! t ( - ($path:expr, iter: $iter:expr) => ( - { - let path = Path::new($path); - - // Forward iteration - let comps = path.iter() - .map(|p| p.to_string_lossy().into_owned()) - .collect::>(); - let exp: &[&str] = &$iter; - let exps = exp.iter().map(|s| s.to_string()).collect::>(); - assert!(comps == exps, "iter: Expected {:?}, found {:?}", - exps, comps); - - // Reverse iteration - let comps = Path::new($path).iter().rev() - .map(|p| p.to_string_lossy().into_owned()) - .collect::>(); - let exps = exps.into_iter().rev().collect::>(); - assert!(comps == exps, "iter().rev(): Expected {:?}, found {:?}", - exps, comps); - } - ); - - ($path:expr, has_root: $has_root:expr, is_absolute: $is_absolute:expr) => ( - { - let path = Path::new($path); - - let act_root = path.has_root(); - assert!(act_root == $has_root, "has_root: Expected {:?}, found {:?}", - $has_root, act_root); - - let act_abs = path.is_absolute(); - assert!(act_abs == $is_absolute, "is_absolute: Expected {:?}, found {:?}", - $is_absolute, act_abs); - } - ); - - ($path:expr, parent: $parent:expr, file_name: $file:expr) => ( - { - let path = Path::new($path); - - let parent = path.parent().map(|p| p.to_str().unwrap()); - let exp_parent: Option<&str> = $parent; - assert!(parent == exp_parent, "parent: Expected {:?}, found {:?}", - exp_parent, parent); - - let file = path.file_name().map(|p| p.to_str().unwrap()); - let exp_file: Option<&str> = $file; - assert!(file == exp_file, "file_name: Expected {:?}, found {:?}", - exp_file, file); - } - ); - - ($path:expr, file_stem: $file_stem:expr, extension: $extension:expr) => ( - { - let path = Path::new($path); - - let stem = path.file_stem().map(|p| p.to_str().unwrap()); - let exp_stem: Option<&str> = $file_stem; - assert!(stem == exp_stem, "file_stem: Expected {:?}, found {:?}", - exp_stem, stem); - - let ext = path.extension().map(|p| p.to_str().unwrap()); - let exp_ext: Option<&str> = $extension; - assert!(ext == exp_ext, "extension: Expected {:?}, found {:?}", - exp_ext, ext); - } - ); - - ($path:expr, file_prefix: $file_prefix:expr, extension: $extension:expr) => ( - { - let path = Path::new($path); - - let prefix = path.file_prefix().map(|p| p.to_str().unwrap()); - let exp_prefix: Option<&str> = $file_prefix; - assert!(prefix == exp_prefix, "file_prefix: Expected {:?}, found {:?}", - exp_prefix, prefix); - - let ext = path.extension().map(|p| p.to_str().unwrap()); - let exp_ext: Option<&str> = $extension; - assert!(ext == exp_ext, "extension: Expected {:?}, found {:?}", - exp_ext, ext); - } - ); - - ($path:expr, iter: $iter:expr, - has_root: $has_root:expr, is_absolute: $is_absolute:expr, - parent: $parent:expr, file_name: $file:expr, - file_stem: $file_stem:expr, extension: $extension:expr, - file_prefix: $file_prefix:expr) => ( - { - t!($path, iter: $iter); - t!($path, has_root: $has_root, is_absolute: $is_absolute); - t!($path, parent: $parent, file_name: $file); - t!($path, file_stem: $file_stem, extension: $extension); - t!($path, file_prefix: $file_prefix, extension: $extension); - } - ); -); - -#[test] -fn into() { - use crate::borrow::Cow; - - let static_path = Path::new("/home/foo"); - let static_cow_path: Cow<'static, Path> = static_path.into(); - let pathbuf = PathBuf::from("/home/foo"); - - { - let path: &Path = &pathbuf; - let borrowed_cow_path: Cow<'_, Path> = path.into(); - - assert_eq!(static_cow_path, borrowed_cow_path); - } - - let owned_cow_path: Cow<'static, Path> = pathbuf.into(); - - assert_eq!(static_cow_path, owned_cow_path); -} - -#[test] -fn test_pathbuf_leak() { - let string = "/have/a/cake".to_owned(); - let (len, cap) = (string.len(), string.capacity()); - let buf = PathBuf::from(string); - let leaked = buf.leak(); - assert_eq!(leaked.as_os_str().as_encoded_bytes(), b"/have/a/cake"); - unsafe { drop(String::from_raw_parts(leaked.as_mut_os_str() as *mut OsStr as _, len, cap)) } -} - -#[test] -#[cfg(unix)] -pub fn test_decompositions_unix() { - t!("", - iter: [], - has_root: false, - is_absolute: false, - parent: None, - file_name: None, - file_stem: None, - extension: None, - file_prefix: None - ); - - t!("foo", - iter: ["foo"], - has_root: false, - is_absolute: false, - parent: Some(""), - file_name: Some("foo"), - file_stem: Some("foo"), - extension: None, - file_prefix: Some("foo") - ); - - t!("/", - iter: ["/"], - has_root: true, - is_absolute: true, - parent: None, - file_name: None, - file_stem: None, - extension: None, - file_prefix: None - ); - - t!("/foo", - iter: ["/", "foo"], - has_root: true, - is_absolute: true, - parent: Some("/"), - file_name: Some("foo"), - file_stem: Some("foo"), - extension: None, - file_prefix: Some("foo") - ); - - t!("foo/", - iter: ["foo"], - has_root: false, - is_absolute: false, - parent: Some(""), - file_name: Some("foo"), - file_stem: Some("foo"), - extension: None, - file_prefix: Some("foo") - ); - - t!("/foo/", - iter: ["/", "foo"], - has_root: true, - is_absolute: true, - parent: Some("/"), - file_name: Some("foo"), - file_stem: Some("foo"), - extension: None, - file_prefix: Some("foo") - ); - - t!("foo/bar", - iter: ["foo", "bar"], - has_root: false, - is_absolute: false, - parent: Some("foo"), - file_name: Some("bar"), - file_stem: Some("bar"), - extension: None, - file_prefix: Some("bar") - ); - - t!("/foo/bar", - iter: ["/", "foo", "bar"], - has_root: true, - is_absolute: true, - parent: Some("/foo"), - file_name: Some("bar"), - file_stem: Some("bar"), - extension: None, - file_prefix: Some("bar") - ); - - t!("///foo///", - iter: ["/", "foo"], - has_root: true, - is_absolute: true, - parent: Some("/"), - file_name: Some("foo"), - file_stem: Some("foo"), - extension: None, - file_prefix: Some("foo") - ); - - t!("///foo///bar", - iter: ["/", "foo", "bar"], - has_root: true, - is_absolute: true, - parent: Some("///foo"), - file_name: Some("bar"), - file_stem: Some("bar"), - extension: None, - file_prefix: Some("bar") - ); - - t!("./.", - iter: ["."], - has_root: false, - is_absolute: false, - parent: Some(""), - file_name: None, - file_stem: None, - extension: None, - file_prefix: None - ); - - t!("/..", - iter: ["/", ".."], - has_root: true, - is_absolute: true, - parent: Some("/"), - file_name: None, - file_stem: None, - extension: None, - file_prefix: None - ); - - t!("../", - iter: [".."], - has_root: false, - is_absolute: false, - parent: Some(""), - file_name: None, - file_stem: None, - extension: None, - file_prefix: None - ); - - t!("foo/.", - iter: ["foo"], - has_root: false, - is_absolute: false, - parent: Some(""), - file_name: Some("foo"), - file_stem: Some("foo"), - extension: None, - file_prefix: Some("foo") - ); - - t!("foo/..", - iter: ["foo", ".."], - has_root: false, - is_absolute: false, - parent: Some("foo"), - file_name: None, - file_stem: None, - extension: None, - file_prefix: None - ); - - t!("foo/./", - iter: ["foo"], - has_root: false, - is_absolute: false, - parent: Some(""), - file_name: Some("foo"), - file_stem: Some("foo"), - extension: None, - file_prefix: Some("foo") - ); - - t!("foo/./bar", - iter: ["foo", "bar"], - has_root: false, - is_absolute: false, - parent: Some("foo"), - file_name: Some("bar"), - file_stem: Some("bar"), - extension: None, - file_prefix: Some("bar") - ); - - t!("foo/../", - iter: ["foo", ".."], - has_root: false, - is_absolute: false, - parent: Some("foo"), - file_name: None, - file_stem: None, - extension: None, - file_prefix: None - ); - - t!("foo/../bar", - iter: ["foo", "..", "bar"], - has_root: false, - is_absolute: false, - parent: Some("foo/.."), - file_name: Some("bar"), - file_stem: Some("bar"), - extension: None, - file_prefix: Some("bar") - ); - - t!("./a", - iter: [".", "a"], - has_root: false, - is_absolute: false, - parent: Some("."), - file_name: Some("a"), - file_stem: Some("a"), - extension: None, - file_prefix: Some("a") - ); - - t!(".", - iter: ["."], - has_root: false, - is_absolute: false, - parent: Some(""), - file_name: None, - file_stem: None, - extension: None, - file_prefix: None - ); - - t!("./", - iter: ["."], - has_root: false, - is_absolute: false, - parent: Some(""), - file_name: None, - file_stem: None, - extension: None, - file_prefix: None - ); - - t!("a/b", - iter: ["a", "b"], - has_root: false, - is_absolute: false, - parent: Some("a"), - file_name: Some("b"), - file_stem: Some("b"), - extension: None, - file_prefix: Some("b") - ); - - t!("a//b", - iter: ["a", "b"], - has_root: false, - is_absolute: false, - parent: Some("a"), - file_name: Some("b"), - file_stem: Some("b"), - extension: None, - file_prefix: Some("b") - ); - - t!("a/./b", - iter: ["a", "b"], - has_root: false, - is_absolute: false, - parent: Some("a"), - file_name: Some("b"), - file_stem: Some("b"), - extension: None, - file_prefix: Some("b") - ); - - t!("a/b/c", - iter: ["a", "b", "c"], - has_root: false, - is_absolute: false, - parent: Some("a/b"), - file_name: Some("c"), - file_stem: Some("c"), - extension: None, - file_prefix: Some("c") - ); - - t!(".foo", - iter: [".foo"], - has_root: false, - is_absolute: false, - parent: Some(""), - file_name: Some(".foo"), - file_stem: Some(".foo"), - extension: None, - file_prefix: Some(".foo") - ); - - t!("a/.foo", - iter: ["a", ".foo"], - has_root: false, - is_absolute: false, - parent: Some("a"), - file_name: Some(".foo"), - file_stem: Some(".foo"), - extension: None, - file_prefix: Some(".foo") - ); - - t!("a/.rustfmt.toml", - iter: ["a", ".rustfmt.toml"], - has_root: false, - is_absolute: false, - parent: Some("a"), - file_name: Some(".rustfmt.toml"), - file_stem: Some(".rustfmt"), - extension: Some("toml"), - file_prefix: Some(".rustfmt") - ); - - t!("a/.x.y.z", - iter: ["a", ".x.y.z"], - has_root: false, - is_absolute: false, - parent: Some("a"), - file_name: Some(".x.y.z"), - file_stem: Some(".x.y"), - extension: Some("z"), - file_prefix: Some(".x") - ); -} - -#[test] -#[cfg(windows)] -pub fn test_decompositions_windows() { - t!("", - iter: [], - has_root: false, - is_absolute: false, - parent: None, - file_name: None, - file_stem: None, - extension: None, - file_prefix: None - ); - - t!("foo", - iter: ["foo"], - has_root: false, - is_absolute: false, - parent: Some(""), - file_name: Some("foo"), - file_stem: Some("foo"), - extension: None, - file_prefix: Some("foo") - ); - - t!("/", - iter: ["\\"], - has_root: true, - is_absolute: false, - parent: None, - file_name: None, - file_stem: None, - extension: None, - file_prefix: None - ); - - t!("\\", - iter: ["\\"], - has_root: true, - is_absolute: false, - parent: None, - file_name: None, - file_stem: None, - extension: None, - file_prefix: None - ); - - t!("c:", - iter: ["c:"], - has_root: false, - is_absolute: false, - parent: None, - file_name: None, - file_stem: None, - extension: None, - file_prefix: None - ); - - t!("c:\\", - iter: ["c:", "\\"], - has_root: true, - is_absolute: true, - parent: None, - file_name: None, - file_stem: None, - extension: None, - file_prefix: None - ); - - t!("c:/", - iter: ["c:", "\\"], - has_root: true, - is_absolute: true, - parent: None, - file_name: None, - file_stem: None, - extension: None, - file_prefix: None - ); - - t!("/foo", - iter: ["\\", "foo"], - has_root: true, - is_absolute: false, - parent: Some("/"), - file_name: Some("foo"), - file_stem: Some("foo"), - extension: None, - file_prefix: Some("foo") - ); - - t!("foo/", - iter: ["foo"], - has_root: false, - is_absolute: false, - parent: Some(""), - file_name: Some("foo"), - file_stem: Some("foo"), - extension: None, - file_prefix: Some("foo") - ); - - t!("/foo/", - iter: ["\\", "foo"], - has_root: true, - is_absolute: false, - parent: Some("/"), - file_name: Some("foo"), - file_stem: Some("foo"), - extension: None, - file_prefix: Some("foo") - ); - - t!("foo/bar", - iter: ["foo", "bar"], - has_root: false, - is_absolute: false, - parent: Some("foo"), - file_name: Some("bar"), - file_stem: Some("bar"), - extension: None, - file_prefix: Some("bar") - ); - - t!("/foo/bar", - iter: ["\\", "foo", "bar"], - has_root: true, - is_absolute: false, - parent: Some("/foo"), - file_name: Some("bar"), - file_stem: Some("bar"), - extension: None, - file_prefix: Some("bar") - ); - - t!("///foo///", - iter: ["\\", "foo"], - has_root: true, - is_absolute: false, - parent: Some("/"), - file_name: Some("foo"), - file_stem: Some("foo"), - extension: None, - file_prefix: Some("foo") - ); - - t!("///foo///bar", - iter: ["\\", "foo", "bar"], - has_root: true, - is_absolute: false, - parent: Some("///foo"), - file_name: Some("bar"), - file_stem: Some("bar"), - extension: None, - file_prefix: Some("bar") - ); - - t!("./.", - iter: ["."], - has_root: false, - is_absolute: false, - parent: Some(""), - file_name: None, - file_stem: None, - extension: None, - file_prefix: None - ); - - t!("/..", - iter: ["\\", ".."], - has_root: true, - is_absolute: false, - parent: Some("/"), - file_name: None, - file_stem: None, - extension: None, - file_prefix: None - ); - - t!("../", - iter: [".."], - has_root: false, - is_absolute: false, - parent: Some(""), - file_name: None, - file_stem: None, - extension: None, - file_prefix: None - ); - - t!("foo/.", - iter: ["foo"], - has_root: false, - is_absolute: false, - parent: Some(""), - file_name: Some("foo"), - file_stem: Some("foo"), - extension: None, - file_prefix: Some("foo") - ); - - t!("foo/..", - iter: ["foo", ".."], - has_root: false, - is_absolute: false, - parent: Some("foo"), - file_name: None, - file_stem: None, - extension: None, - file_prefix: None - ); - - t!("foo/./", - iter: ["foo"], - has_root: false, - is_absolute: false, - parent: Some(""), - file_name: Some("foo"), - file_stem: Some("foo"), - extension: None, - file_prefix: Some("foo") - ); - - t!("foo/./bar", - iter: ["foo", "bar"], - has_root: false, - is_absolute: false, - parent: Some("foo"), - file_name: Some("bar"), - file_stem: Some("bar"), - extension: None, - file_prefix: Some("bar") - ); - - t!("foo/../", - iter: ["foo", ".."], - has_root: false, - is_absolute: false, - parent: Some("foo"), - file_name: None, - file_stem: None, - extension: None, - file_prefix: None - ); - - t!("foo/../bar", - iter: ["foo", "..", "bar"], - has_root: false, - is_absolute: false, - parent: Some("foo/.."), - file_name: Some("bar"), - file_stem: Some("bar"), - extension: None, - file_prefix: Some("bar") - ); - - t!("./a", - iter: [".", "a"], - has_root: false, - is_absolute: false, - parent: Some("."), - file_name: Some("a"), - file_stem: Some("a"), - extension: None, - file_prefix: Some("a") - ); - - t!(".", - iter: ["."], - has_root: false, - is_absolute: false, - parent: Some(""), - file_name: None, - file_stem: None, - extension: None, - file_prefix: None - ); - - t!("./", - iter: ["."], - has_root: false, - is_absolute: false, - parent: Some(""), - file_name: None, - file_stem: None, - extension: None, - file_prefix: None - ); - - t!("a/b", - iter: ["a", "b"], - has_root: false, - is_absolute: false, - parent: Some("a"), - file_name: Some("b"), - file_stem: Some("b"), - extension: None, - file_prefix: Some("b") - ); - - t!("a//b", - iter: ["a", "b"], - has_root: false, - is_absolute: false, - parent: Some("a"), - file_name: Some("b"), - file_stem: Some("b"), - extension: None, - file_prefix: Some("b") - ); - - t!("a/./b", - iter: ["a", "b"], - has_root: false, - is_absolute: false, - parent: Some("a"), - file_name: Some("b"), - file_stem: Some("b"), - extension: None, - file_prefix: Some("b") - ); - - t!("a/b/c", - iter: ["a", "b", "c"], - has_root: false, - is_absolute: false, - parent: Some("a/b"), - file_name: Some("c"), - file_stem: Some("c"), - extension: None, - file_prefix: Some("c") - ); - - t!("a\\b\\c", - iter: ["a", "b", "c"], - has_root: false, - is_absolute: false, - parent: Some("a\\b"), - file_name: Some("c"), - file_stem: Some("c"), - extension: None, - file_prefix: Some("c") - ); - - t!("\\a", - iter: ["\\", "a"], - has_root: true, - is_absolute: false, - parent: Some("\\"), - file_name: Some("a"), - file_stem: Some("a"), - extension: None, - file_prefix: Some("a") - ); - - t!("c:\\foo.txt", - iter: ["c:", "\\", "foo.txt"], - has_root: true, - is_absolute: true, - parent: Some("c:\\"), - file_name: Some("foo.txt"), - file_stem: Some("foo"), - extension: Some("txt"), - file_prefix: Some("foo") - ); - - t!("\\\\server\\share\\foo.txt", - iter: ["\\\\server\\share", "\\", "foo.txt"], - has_root: true, - is_absolute: true, - parent: Some("\\\\server\\share\\"), - file_name: Some("foo.txt"), - file_stem: Some("foo"), - extension: Some("txt"), - file_prefix: Some("foo") - ); - - t!("\\\\server\\share", - iter: ["\\\\server\\share", "\\"], - has_root: true, - is_absolute: true, - parent: None, - file_name: None, - file_stem: None, - extension: None, - file_prefix: None - ); - - t!("\\\\server", - iter: ["\\", "server"], - has_root: true, - is_absolute: false, - parent: Some("\\"), - file_name: Some("server"), - file_stem: Some("server"), - extension: None, - file_prefix: Some("server") - ); - - t!("\\\\?\\bar\\foo.txt", - iter: ["\\\\?\\bar", "\\", "foo.txt"], - has_root: true, - is_absolute: true, - parent: Some("\\\\?\\bar\\"), - file_name: Some("foo.txt"), - file_stem: Some("foo"), - extension: Some("txt"), - file_prefix: Some("foo") - ); - - t!("\\\\?\\bar", - iter: ["\\\\?\\bar"], - has_root: true, - is_absolute: true, - parent: None, - file_name: None, - file_stem: None, - extension: None, - file_prefix: None - ); - - t!("\\\\?\\", - iter: ["\\\\?\\"], - has_root: true, - is_absolute: true, - parent: None, - file_name: None, - file_stem: None, - extension: None, - file_prefix: None - ); - - t!("\\\\?\\UNC\\server\\share\\foo.txt", - iter: ["\\\\?\\UNC\\server\\share", "\\", "foo.txt"], - has_root: true, - is_absolute: true, - parent: Some("\\\\?\\UNC\\server\\share\\"), - file_name: Some("foo.txt"), - file_stem: Some("foo"), - extension: Some("txt"), - file_prefix: Some("foo") - ); - - t!("\\\\?\\UNC\\server", - iter: ["\\\\?\\UNC\\server"], - has_root: true, - is_absolute: true, - parent: None, - file_name: None, - file_stem: None, - extension: None, - file_prefix: None - ); - - t!("\\\\?\\UNC\\", - iter: ["\\\\?\\UNC\\"], - has_root: true, - is_absolute: true, - parent: None, - file_name: None, - file_stem: None, - extension: None, - file_prefix: None - ); - - t!("\\\\?\\C:\\foo.txt", - iter: ["\\\\?\\C:", "\\", "foo.txt"], - has_root: true, - is_absolute: true, - parent: Some("\\\\?\\C:\\"), - file_name: Some("foo.txt"), - file_stem: Some("foo"), - extension: Some("txt"), - file_prefix: Some("foo") - ); - - t!("\\\\?\\C:\\", - iter: ["\\\\?\\C:", "\\"], - has_root: true, - is_absolute: true, - parent: None, - file_name: None, - file_stem: None, - extension: None, - file_prefix: None - ); - - t!("\\\\?\\C:", - iter: ["\\\\?\\C:"], - has_root: true, - is_absolute: true, - parent: None, - file_name: None, - file_stem: None, - extension: None, - file_prefix: None - ); - - t!("\\\\?\\foo/bar", - iter: ["\\\\?\\foo/bar"], - has_root: true, - is_absolute: true, - parent: None, - file_name: None, - file_stem: None, - extension: None, - file_prefix: None - ); - - t!("\\\\?\\C:/foo/bar", - iter: ["\\\\?\\C:", "\\", "foo/bar"], - has_root: true, - is_absolute: true, - parent: Some("\\\\?\\C:/"), - file_name: Some("foo/bar"), - file_stem: Some("foo/bar"), - extension: None, - file_prefix: Some("foo/bar") - ); - - t!("\\\\.\\foo\\bar", - iter: ["\\\\.\\foo", "\\", "bar"], - has_root: true, - is_absolute: true, - parent: Some("\\\\.\\foo\\"), - file_name: Some("bar"), - file_stem: Some("bar"), - extension: None, - file_prefix: Some("bar") - ); - - t!("\\\\.\\foo", - iter: ["\\\\.\\foo", "\\"], - has_root: true, - is_absolute: true, - parent: None, - file_name: None, - file_stem: None, - extension: None, - file_prefix: None - ); - - t!("\\\\.\\foo/bar", - iter: ["\\\\.\\foo", "\\", "bar"], - has_root: true, - is_absolute: true, - parent: Some("\\\\.\\foo/"), - file_name: Some("bar"), - file_stem: Some("bar"), - extension: None, - file_prefix: Some("bar") - ); - - t!("\\\\.\\foo\\bar/baz", - iter: ["\\\\.\\foo", "\\", "bar", "baz"], - has_root: true, - is_absolute: true, - parent: Some("\\\\.\\foo\\bar"), - file_name: Some("baz"), - file_stem: Some("baz"), - extension: None, - file_prefix: Some("baz") - ); - - t!("\\\\.\\", - iter: ["\\\\.\\", "\\"], - has_root: true, - is_absolute: true, - parent: None, - file_name: None, - file_stem: None, - extension: None, - file_prefix: None - ); - - t!("\\\\?\\a\\b\\", - iter: ["\\\\?\\a", "\\", "b"], - has_root: true, - is_absolute: true, - parent: Some("\\\\?\\a\\"), - file_name: Some("b"), - file_stem: Some("b"), - extension: None, - file_prefix: Some("b") - ); - - t!("\\\\?\\C:\\foo.txt.zip", - iter: ["\\\\?\\C:", "\\", "foo.txt.zip"], - has_root: true, - is_absolute: true, - parent: Some("\\\\?\\C:\\"), - file_name: Some("foo.txt.zip"), - file_stem: Some("foo.txt"), - extension: Some("zip"), - file_prefix: Some("foo") - ); - - t!("\\\\?\\C:\\.foo.txt.zip", - iter: ["\\\\?\\C:", "\\", ".foo.txt.zip"], - has_root: true, - is_absolute: true, - parent: Some("\\\\?\\C:\\"), - file_name: Some(".foo.txt.zip"), - file_stem: Some(".foo.txt"), - extension: Some("zip"), - file_prefix: Some(".foo") - ); - - t!("\\\\?\\C:\\.foo", - iter: ["\\\\?\\C:", "\\", ".foo"], - has_root: true, - is_absolute: true, - parent: Some("\\\\?\\C:\\"), - file_name: Some(".foo"), - file_stem: Some(".foo"), - extension: None, - file_prefix: Some(".foo") - ); - - t!("a/.x.y.z", - iter: ["a", ".x.y.z"], - has_root: false, - is_absolute: false, - parent: Some("a"), - file_name: Some(".x.y.z"), - file_stem: Some(".x.y"), - extension: Some("z"), - file_prefix: Some(".x") - ); -} - -#[test] -pub fn test_stem_ext() { - t!("foo", - file_stem: Some("foo"), - extension: None - ); - - t!("foo.", - file_stem: Some("foo"), - extension: Some("") - ); - - t!(".foo", - file_stem: Some(".foo"), - extension: None - ); - - t!("foo.txt", - file_stem: Some("foo"), - extension: Some("txt") - ); - - t!("foo.bar.txt", - file_stem: Some("foo.bar"), - extension: Some("txt") - ); - - t!("foo.bar.", - file_stem: Some("foo.bar"), - extension: Some("") - ); - - t!(".", file_stem: None, extension: None); - - t!("..", file_stem: None, extension: None); - - t!(".x.y.z", file_stem: Some(".x.y"), extension: Some("z")); - - t!("..x.y.z", file_stem: Some("..x.y"), extension: Some("z")); - - t!("", file_stem: None, extension: None); -} - -#[test] -pub fn test_prefix_ext() { - t!("foo", - file_prefix: Some("foo"), - extension: None - ); - - t!("foo.", - file_prefix: Some("foo"), - extension: Some("") - ); - - t!(".foo", - file_prefix: Some(".foo"), - extension: None - ); - - t!("foo.txt", - file_prefix: Some("foo"), - extension: Some("txt") - ); - - t!("foo.bar.txt", - file_prefix: Some("foo"), - extension: Some("txt") - ); - - t!("foo.bar.", - file_prefix: Some("foo"), - extension: Some("") - ); - - t!(".", file_prefix: None, extension: None); - - t!("..", file_prefix: None, extension: None); - - t!(".x.y.z", file_prefix: Some(".x"), extension: Some("z")); - - t!("..x.y.z", file_prefix: Some("."), extension: Some("z")); - - t!("", file_prefix: None, extension: None); -} - -#[test] -pub fn test_push() { - macro_rules! tp ( - ($path:expr, $push:expr, $expected:expr) => ({ - let mut actual = PathBuf::from($path); - actual.push($push); - assert!(actual.to_str() == Some($expected), - "pushing {:?} onto {:?}: Expected {:?}, got {:?}", - $push, $path, $expected, actual.to_str().unwrap()); - }); - ); - - if cfg!(unix) || cfg!(all(target_env = "sgx", target_vendor = "fortanix")) { - tp!("", "foo", "foo"); - tp!("foo", "bar", "foo/bar"); - tp!("foo/", "bar", "foo/bar"); - tp!("foo//", "bar", "foo//bar"); - tp!("foo/.", "bar", "foo/./bar"); - tp!("foo./.", "bar", "foo././bar"); - tp!("foo", "", "foo/"); - tp!("foo", ".", "foo/."); - tp!("foo", "..", "foo/.."); - tp!("foo", "/", "/"); - tp!("/foo/bar", "/", "/"); - tp!("/foo/bar", "/baz", "/baz"); - tp!("/foo/bar", "./baz", "/foo/bar/./baz"); - } else { - tp!("", "foo", "foo"); - tp!("foo", "bar", r"foo\bar"); - tp!("foo/", "bar", r"foo/bar"); - tp!(r"foo\", "bar", r"foo\bar"); - tp!("foo//", "bar", r"foo//bar"); - tp!(r"foo\\", "bar", r"foo\\bar"); - tp!("foo/.", "bar", r"foo/.\bar"); - tp!("foo./.", "bar", r"foo./.\bar"); - tp!(r"foo\.", "bar", r"foo\.\bar"); - tp!(r"foo.\.", "bar", r"foo.\.\bar"); - tp!("foo", "", "foo\\"); - tp!("foo", ".", r"foo\."); - tp!("foo", "..", r"foo\.."); - tp!("foo", "/", "/"); - tp!("foo", r"\", r"\"); - tp!("/foo/bar", "/", "/"); - tp!(r"\foo\bar", r"\", r"\"); - tp!("/foo/bar", "/baz", "/baz"); - tp!("/foo/bar", r"\baz", r"\baz"); - tp!("/foo/bar", "./baz", r"/foo/bar\./baz"); - tp!("/foo/bar", r".\baz", r"/foo/bar\.\baz"); - - tp!("c:\\", "windows", "c:\\windows"); - tp!("c:", "windows", "c:windows"); - - tp!("a\\b\\c", "d", "a\\b\\c\\d"); - tp!("\\a\\b\\c", "d", "\\a\\b\\c\\d"); - tp!("a\\b", "c\\d", "a\\b\\c\\d"); - tp!("a\\b", "\\c\\d", "\\c\\d"); - tp!("a\\b", ".", "a\\b\\."); - tp!("a\\b", "..\\c", "a\\b\\..\\c"); - tp!("a\\b", "C:a.txt", "C:a.txt"); - tp!("a\\b", "C:\\a.txt", "C:\\a.txt"); - tp!("C:\\a", "C:\\b.txt", "C:\\b.txt"); - tp!("C:\\a\\b\\c", "C:d", "C:d"); - tp!("C:a\\b\\c", "C:d", "C:d"); - tp!("C:", r"a\b\c", r"C:a\b\c"); - tp!("C:", r"..\a", r"C:..\a"); - tp!("\\\\server\\share\\foo", "bar", "\\\\server\\share\\foo\\bar"); - tp!("\\\\server\\share\\foo", "C:baz", "C:baz"); - tp!("\\\\?\\C:\\a\\b", "C:c\\d", "C:c\\d"); - tp!("\\\\?\\C:a\\b", "C:c\\d", "C:c\\d"); - tp!("\\\\?\\C:\\a\\b", "C:\\c\\d", "C:\\c\\d"); - tp!("\\\\?\\foo\\bar", "baz", "\\\\?\\foo\\bar\\baz"); - tp!("\\\\?\\UNC\\server\\share\\foo", "bar", "\\\\?\\UNC\\server\\share\\foo\\bar"); - tp!("\\\\?\\UNC\\server\\share", "C:\\a", "C:\\a"); - tp!("\\\\?\\UNC\\server\\share", "C:a", "C:a"); - - // Note: modified from old path API - tp!("\\\\?\\UNC\\server", "foo", "\\\\?\\UNC\\server\\foo"); - - tp!("C:\\a", "\\\\?\\UNC\\server\\share", "\\\\?\\UNC\\server\\share"); - tp!("\\\\.\\foo\\bar", "baz", "\\\\.\\foo\\bar\\baz"); - tp!("\\\\.\\foo\\bar", "C:a", "C:a"); - // again, not sure about the following, but I'm assuming \\.\ should be verbatim - tp!("\\\\.\\foo", "..\\bar", "\\\\.\\foo\\..\\bar"); - - tp!("\\\\?\\C:", "foo", "\\\\?\\C:\\foo"); // this is a weird one - - tp!(r"\\?\C:\bar", "../foo", r"\\?\C:\foo"); - tp!(r"\\?\C:\bar", "../../foo", r"\\?\C:\foo"); - tp!(r"\\?\C:\", "../foo", r"\\?\C:\foo"); - tp!(r"\\?\C:", r"D:\foo/./", r"D:\foo/./"); - tp!(r"\\?\C:", r"\\?\D:\foo\.\", r"\\?\D:\foo\.\"); - tp!(r"\\?\A:\x\y", "/foo", r"\\?\A:\foo"); - tp!(r"\\?\A:", r"..\foo\.", r"\\?\A:\foo"); - tp!(r"\\?\A:\x\y", r".\foo\.", r"\\?\A:\x\y\foo"); - tp!(r"\\?\A:\x\y", r"", r"\\?\A:\x\y\"); - } -} - -#[test] -pub fn test_pop() { - macro_rules! tp ( - ($path:expr, $expected:expr, $output:expr) => ({ - let mut actual = PathBuf::from($path); - let output = actual.pop(); - assert!(actual.to_str() == Some($expected) && output == $output, - "popping from {:?}: Expected {:?}/{:?}, got {:?}/{:?}", - $path, $expected, $output, - actual.to_str().unwrap(), output); - }); - ); - - tp!("", "", false); - tp!("/", "/", false); - tp!("foo", "", true); - tp!(".", "", true); - tp!("/foo", "/", true); - tp!("/foo/bar", "/foo", true); - tp!("foo/bar", "foo", true); - tp!("foo/.", "", true); - tp!("foo//bar", "foo", true); - - if cfg!(windows) { - tp!("a\\b\\c", "a\\b", true); - tp!("\\a", "\\", true); - tp!("\\", "\\", false); - - tp!("C:\\a\\b", "C:\\a", true); - tp!("C:\\a", "C:\\", true); - tp!("C:\\", "C:\\", false); - tp!("C:a\\b", "C:a", true); - tp!("C:a", "C:", true); - tp!("C:", "C:", false); - tp!("\\\\server\\share\\a\\b", "\\\\server\\share\\a", true); - tp!("\\\\server\\share\\a", "\\\\server\\share\\", true); - tp!("\\\\server\\share", "\\\\server\\share", false); - tp!("\\\\?\\a\\b\\c", "\\\\?\\a\\b", true); - tp!("\\\\?\\a\\b", "\\\\?\\a\\", true); - tp!("\\\\?\\a", "\\\\?\\a", false); - tp!("\\\\?\\C:\\a\\b", "\\\\?\\C:\\a", true); - tp!("\\\\?\\C:\\a", "\\\\?\\C:\\", true); - tp!("\\\\?\\C:\\", "\\\\?\\C:\\", false); - tp!("\\\\?\\UNC\\server\\share\\a\\b", "\\\\?\\UNC\\server\\share\\a", true); - tp!("\\\\?\\UNC\\server\\share\\a", "\\\\?\\UNC\\server\\share\\", true); - tp!("\\\\?\\UNC\\server\\share", "\\\\?\\UNC\\server\\share", false); - tp!("\\\\.\\a\\b\\c", "\\\\.\\a\\b", true); - tp!("\\\\.\\a\\b", "\\\\.\\a\\", true); - tp!("\\\\.\\a", "\\\\.\\a", false); - - tp!("\\\\?\\a\\b\\", "\\\\?\\a\\", true); - } -} - -#[test] -pub fn test_set_file_name() { - macro_rules! tfn ( - ($path:expr, $file:expr, $expected:expr) => ({ - let mut p = PathBuf::from($path); - p.set_file_name($file); - assert!(p.to_str() == Some($expected), - "setting file name of {:?} to {:?}: Expected {:?}, got {:?}", - $path, $file, $expected, - p.to_str().unwrap()); - }); - ); - - tfn!("foo", "foo", "foo"); - tfn!("foo", "bar", "bar"); - tfn!("foo", "", ""); - tfn!("", "foo", "foo"); - if cfg!(unix) || cfg!(all(target_env = "sgx", target_vendor = "fortanix")) { - tfn!(".", "foo", "./foo"); - tfn!("foo/", "bar", "bar"); - tfn!("foo/.", "bar", "bar"); - tfn!("..", "foo", "../foo"); - tfn!("foo/..", "bar", "foo/../bar"); - tfn!("/", "foo", "/foo"); - } else { - tfn!(".", "foo", r".\foo"); - tfn!(r"foo\", "bar", r"bar"); - tfn!(r"foo\.", "bar", r"bar"); - tfn!("..", "foo", r"..\foo"); - tfn!(r"foo\..", "bar", r"foo\..\bar"); - tfn!(r"\", "foo", r"\foo"); - } -} - -#[test] -pub fn test_set_extension() { - macro_rules! tfe ( - ($path:expr, $ext:expr, $expected:expr, $output:expr) => ({ - let mut p = PathBuf::from($path); - let output = p.set_extension($ext); - assert!(p.to_str() == Some($expected) && output == $output, - "setting extension of {:?} to {:?}: Expected {:?}/{:?}, got {:?}/{:?}", - $path, $ext, $expected, $output, - p.to_str().unwrap(), output); - }); - ); - - tfe!("foo", "txt", "foo.txt", true); - tfe!("foo.bar", "txt", "foo.txt", true); - tfe!("foo.bar.baz", "txt", "foo.bar.txt", true); - tfe!(".test", "txt", ".test.txt", true); - tfe!("foo.txt", "", "foo", true); - tfe!("foo", "", "foo", true); - tfe!("", "foo", "", false); - tfe!(".", "foo", ".", false); - tfe!("foo/", "bar", "foo.bar", true); - tfe!("foo/.", "bar", "foo.bar", true); - tfe!("..", "foo", "..", false); - tfe!("foo/..", "bar", "foo/..", false); - tfe!("/", "foo", "/", false); -} - -#[test] -pub fn test_add_extension() { - macro_rules! tfe ( - ($path:expr, $ext:expr, $expected:expr, $output:expr) => ({ - let mut p = PathBuf::from($path); - let output = p.add_extension($ext); - assert!(p.to_str() == Some($expected) && output == $output, - "adding extension of {:?} to {:?}: Expected {:?}/{:?}, got {:?}/{:?}", - $path, $ext, $expected, $output, - p.to_str().unwrap(), output); - }); - ); - - tfe!("foo", "txt", "foo.txt", true); - tfe!("foo.bar", "txt", "foo.bar.txt", true); - tfe!("foo.bar.baz", "txt", "foo.bar.baz.txt", true); - tfe!(".test", "txt", ".test.txt", true); - tfe!("foo.txt", "", "foo.txt", true); - tfe!("foo", "", "foo", true); - tfe!("", "foo", "", false); - tfe!(".", "foo", ".", false); - tfe!("foo/", "bar", "foo.bar", true); - tfe!("foo/.", "bar", "foo.bar", true); - tfe!("..", "foo", "..", false); - tfe!("foo/..", "bar", "foo/..", false); - tfe!("/", "foo", "/", false); - - // edge cases - tfe!("/foo.ext////", "bar", "/foo.ext.bar", true); -} - -#[test] -pub fn test_with_extension() { - macro_rules! twe ( - ($input:expr, $extension:expr, $expected:expr) => ({ - let input = Path::new($input); - let output = input.with_extension($extension); - - assert!( - output.to_str() == Some($expected), - "calling Path::new({:?}).with_extension({:?}): Expected {:?}, got {:?}", - $input, $extension, $expected, output, - ); - }); - ); - - twe!("foo", "txt", "foo.txt"); - twe!("foo.bar", "txt", "foo.txt"); - twe!("foo.bar.baz", "txt", "foo.bar.txt"); - twe!(".test", "txt", ".test.txt"); - twe!("foo.txt", "", "foo"); - twe!("foo", "", "foo"); - twe!("", "foo", ""); - twe!(".", "foo", "."); - twe!("foo/", "bar", "foo.bar"); - twe!("foo/.", "bar", "foo.bar"); - twe!("..", "foo", ".."); - twe!("foo/..", "bar", "foo/.."); - twe!("/", "foo", "/"); - - // New extension is smaller than file name - twe!("aaa_aaa_aaa", "bbb_bbb", "aaa_aaa_aaa.bbb_bbb"); - // New extension is greater than file name - twe!("bbb_bbb", "aaa_aaa_aaa", "bbb_bbb.aaa_aaa_aaa"); - - // New extension is smaller than previous extension - twe!("ccc.aaa_aaa_aaa", "bbb_bbb", "ccc.bbb_bbb"); - // New extension is greater than previous extension - twe!("ccc.bbb_bbb", "aaa_aaa_aaa", "ccc.aaa_aaa_aaa"); -} - -#[test] -pub fn test_with_added_extension() { - macro_rules! twe ( - ($input:expr, $extension:expr, $expected:expr) => ({ - let input = Path::new($input); - let output = input.with_added_extension($extension); - - assert!( - output.to_str() == Some($expected), - "calling Path::new({:?}).with_added_extension({:?}): Expected {:?}, got {:?}", - $input, $extension, $expected, output, - ); - }); - ); - - twe!("foo", "txt", "foo.txt"); - twe!("foo.bar", "txt", "foo.bar.txt"); - twe!("foo.bar.baz", "txt", "foo.bar.baz.txt"); - twe!(".test", "txt", ".test.txt"); - twe!("foo.txt", "", "foo.txt"); - twe!("foo", "", "foo"); - twe!("", "foo", ""); - twe!(".", "foo", "."); - twe!("foo/", "bar", "foo.bar"); - twe!("foo/.", "bar", "foo.bar"); - twe!("..", "foo", ".."); - twe!("foo/..", "bar", "foo/.."); - twe!("/", "foo", "/"); - - // edge cases - twe!("/foo.ext////", "bar", "/foo.ext.bar"); - - // New extension is smaller than file name - twe!("aaa_aaa_aaa", "bbb_bbb", "aaa_aaa_aaa.bbb_bbb"); - // New extension is greater than file name - twe!("bbb_bbb", "aaa_aaa_aaa", "bbb_bbb.aaa_aaa_aaa"); - - // New extension is smaller than previous extension - twe!("ccc.aaa_aaa_aaa", "bbb_bbb", "ccc.aaa_aaa_aaa.bbb_bbb"); - // New extension is greater than previous extension - twe!("ccc.bbb_bbb", "aaa_aaa_aaa", "ccc.bbb_bbb.aaa_aaa_aaa"); -} - -#[test] -fn test_eq_receivers() { - use crate::borrow::Cow; - - let borrowed: &Path = Path::new("foo/bar"); - let mut owned: PathBuf = PathBuf::new(); - owned.push("foo"); - owned.push("bar"); - let borrowed_cow: Cow<'_, Path> = borrowed.into(); - let owned_cow: Cow<'_, Path> = owned.clone().into(); - - macro_rules! t { - ($($current:expr),+) => { - $( - assert_eq!($current, borrowed); - assert_eq!($current, owned); - assert_eq!($current, borrowed_cow); - assert_eq!($current, owned_cow); - )+ - } - } - - t!(borrowed, owned, borrowed_cow, owned_cow); -} - -#[test] -pub fn test_compare() { - use crate::hash::{DefaultHasher, Hash, Hasher}; - - fn hash(t: T) -> u64 { - let mut s = DefaultHasher::new(); - t.hash(&mut s); - s.finish() - } - - macro_rules! tc ( - ($path1:expr, $path2:expr, eq: $eq:expr, - starts_with: $starts_with:expr, ends_with: $ends_with:expr, - relative_from: $relative_from:expr) => ({ - let path1 = Path::new($path1); - let path2 = Path::new($path2); - - let eq = path1 == path2; - assert!(eq == $eq, "{:?} == {:?}, expected {:?}, got {:?}", - $path1, $path2, $eq, eq); - assert!($eq == (hash(path1) == hash(path2)), - "{:?} == {:?}, expected {:?}, got {} and {}", - $path1, $path2, $eq, hash(path1), hash(path2)); - - let starts_with = path1.starts_with(path2); - assert!(starts_with == $starts_with, - "{:?}.starts_with({:?}), expected {:?}, got {:?}", $path1, $path2, - $starts_with, starts_with); - - let ends_with = path1.ends_with(path2); - assert!(ends_with == $ends_with, - "{:?}.ends_with({:?}), expected {:?}, got {:?}", $path1, $path2, - $ends_with, ends_with); - - let relative_from = path1.strip_prefix(path2) - .map(|p| p.to_str().unwrap()) - .ok(); - let exp: Option<&str> = $relative_from; - assert!(relative_from == exp, - "{:?}.strip_prefix({:?}), expected {:?}, got {:?}", - $path1, $path2, exp, relative_from); - }); - ); - - tc!("", "", - eq: true, - starts_with: true, - ends_with: true, - relative_from: Some("") - ); - - tc!("foo", "", - eq: false, - starts_with: true, - ends_with: true, - relative_from: Some("foo") - ); - - tc!("", "foo", - eq: false, - starts_with: false, - ends_with: false, - relative_from: None - ); - - tc!("foo", "foo", - eq: true, - starts_with: true, - ends_with: true, - relative_from: Some("") - ); - - tc!("foo/", "foo", - eq: true, - starts_with: true, - ends_with: true, - relative_from: Some("") - ); - - tc!("foo//", "foo", - eq: true, - starts_with: true, - ends_with: true, - relative_from: Some("") - ); - - tc!("foo///", "foo", - eq: true, - starts_with: true, - ends_with: true, - relative_from: Some("") - ); - - tc!("foo/.", "foo", - eq: true, - starts_with: true, - ends_with: true, - relative_from: Some("") - ); - - tc!("foo/./bar", "foo/bar", - eq: true, - starts_with: true, - ends_with: true, - relative_from: Some("") - ); - - tc!("foo/.//bar", "foo/bar", - eq: true, - starts_with: true, - ends_with: true, - relative_from: Some("") - ); - - tc!("foo//./bar", "foo/bar", - eq: true, - starts_with: true, - ends_with: true, - relative_from: Some("") - ); - - tc!("foo/bar", "foo", - eq: false, - starts_with: true, - ends_with: false, - relative_from: Some("bar") - ); - - tc!("foo/bar", "foobar", - eq: false, - starts_with: false, - ends_with: false, - relative_from: None - ); - - tc!("foo/bar/baz", "foo/bar", - eq: false, - starts_with: true, - ends_with: false, - relative_from: Some("baz") - ); - - tc!("foo/bar", "foo/bar/baz", - eq: false, - starts_with: false, - ends_with: false, - relative_from: None - ); - - tc!("./foo/bar/", ".", - eq: false, - starts_with: true, - ends_with: false, - relative_from: Some("foo/bar") - ); - - if cfg!(windows) { - tc!(r"C:\src\rust\cargo-test\test\Cargo.toml", - r"c:\src\rust\cargo-test\test", - eq: false, - starts_with: true, - ends_with: false, - relative_from: Some("Cargo.toml") - ); - - tc!(r"c:\foo", r"C:\foo", - eq: true, - starts_with: true, - ends_with: true, - relative_from: Some("") - ); - - tc!(r"C:\foo\.\bar.txt", r"C:\foo\bar.txt", - eq: true, - starts_with: true, - ends_with: true, - relative_from: Some("") - ); - - tc!(r"C:\foo\.", r"C:\foo", - eq: true, - starts_with: true, - ends_with: true, - relative_from: Some("") - ); - - tc!(r"\\?\C:\foo\.\bar.txt", r"\\?\C:\foo\bar.txt", - eq: false, - starts_with: false, - ends_with: false, - relative_from: None - ); - } -} - -#[test] -fn test_components_debug() { - let path = Path::new("/tmp"); - - let mut components = path.components(); - - let expected = "Components([RootDir, Normal(\"tmp\")])"; - let actual = format!("{components:?}"); - assert_eq!(expected, actual); - - let _ = components.next().unwrap(); - let expected = "Components([Normal(\"tmp\")])"; - let actual = format!("{components:?}"); - assert_eq!(expected, actual); - - let _ = components.next().unwrap(); - let expected = "Components([])"; - let actual = format!("{components:?}"); - assert_eq!(expected, actual); -} - -#[cfg(unix)] -#[test] -fn test_iter_debug() { - let path = Path::new("/tmp"); - - let mut iter = path.iter(); - - let expected = "Iter([\"/\", \"tmp\"])"; - let actual = format!("{iter:?}"); - assert_eq!(expected, actual); - - let _ = iter.next().unwrap(); - let expected = "Iter([\"tmp\"])"; - let actual = format!("{iter:?}"); - assert_eq!(expected, actual); - - let _ = iter.next().unwrap(); - let expected = "Iter([])"; - let actual = format!("{iter:?}"); - assert_eq!(expected, actual); -} - -#[test] -fn into_boxed() { - let orig: &str = "some/sort/of/path"; - let path = Path::new(orig); - let boxed: Box = Box::from(path); - let path_buf = path.to_owned().into_boxed_path().into_path_buf(); - assert_eq!(path, &*boxed); - assert_eq!(&*boxed, &*path_buf); - assert_eq!(&*path_buf, path); -} - -#[test] -fn test_clone_into() { - let mut path_buf = PathBuf::from("supercalifragilisticexpialidocious"); - let path = Path::new("short"); - path.clone_into(&mut path_buf); - assert_eq!(path, path_buf); - assert!(path_buf.into_os_string().capacity() >= 15); -} - -#[test] -fn display_format_flags() { - assert_eq!(format!("a{:#<5}b", Path::new("").display()), "a#####b"); - assert_eq!(format!("a{:#<5}b", Path::new("a").display()), "aa####b"); -} - -#[test] -fn into_rc() { - let orig = "hello/world"; - let path = Path::new(orig); - let rc: Rc = Rc::from(path); - let arc: Arc = Arc::from(path); - - assert_eq!(&*rc, path); - assert_eq!(&*arc, path); - - let rc2: Rc = Rc::from(path.to_owned()); - let arc2: Arc = Arc::from(path.to_owned()); - - assert_eq!(&*rc2, path); - assert_eq!(&*arc2, path); -} - -#[test] -fn test_ord() { - macro_rules! ord( - ($ord:ident, $left:expr, $right:expr) => ({ - use core::cmp::Ordering; - - let left = Path::new($left); - let right = Path::new($right); - assert_eq!(left.cmp(&right), Ordering::$ord); - if (core::cmp::Ordering::$ord == Ordering::Equal) { - assert_eq!(left, right); - - let mut hasher = DefaultHasher::new(); - left.hash(&mut hasher); - let left_hash = hasher.finish(); - hasher = DefaultHasher::new(); - right.hash(&mut hasher); - let right_hash = hasher.finish(); - - assert_eq!(left_hash, right_hash, "hashes for {:?} and {:?} must match", left, right); - } else { - assert_ne!(left, right); - } - }); - ); - - ord!(Less, "1", "2"); - ord!(Less, "/foo/bar", "/foo./bar"); - ord!(Less, "foo/bar", "foo/bar."); - ord!(Equal, "foo/./bar", "foo/bar/"); - ord!(Equal, "foo/bar", "foo/bar/"); - ord!(Equal, "foo/bar", "foo/bar/."); - ord!(Equal, "foo/bar", "foo/bar//"); -} #[test] #[cfg(unix)] @@ -1884,7 +26,8 @@ fn test_unix_absolute() { // Test leading `.` and `..` components let curdir = crate::env::current_dir().unwrap(); assert_eq!(absolute("./a").unwrap().as_os_str(), curdir.join("a").as_os_str()); - assert_eq!(absolute("../a").unwrap().as_os_str(), curdir.join("../a").as_os_str()); // return /pwd/../a + assert_eq!(absolute("../a").unwrap().as_os_str(), curdir.join("../a").as_os_str()); + // return /pwd/../a } #[test] @@ -1923,153 +66,3 @@ fn test_windows_absolute() { ); assert_eq!(absolute(r"COM1").unwrap().as_os_str(), Path::new(r"\\.\COM1").as_os_str()); } - -#[test] -#[should_panic = "path separator"] -fn test_extension_path_sep() { - let mut path = PathBuf::from("path/to/file"); - path.set_extension("d/../../../../../etc/passwd"); -} - -#[test] -#[should_panic = "path separator"] -#[cfg(windows)] -fn test_extension_path_sep_alternate() { - let mut path = PathBuf::from("path/to/file"); - path.set_extension("d\\test"); -} - -#[test] -#[cfg(not(windows))] -fn test_extension_path_sep_alternate() { - let mut path = PathBuf::from("path/to/file"); - path.set_extension("d\\test"); - assert_eq!(path, Path::new("path/to/file.d\\test")); -} - -#[bench] -#[cfg_attr(miri, ignore)] // Miri isn't fast... -fn bench_path_cmp_fast_path_buf_sort(b: &mut test::Bencher) { - let prefix = "my/home"; - let mut paths: Vec<_> = - (0..1000).map(|num| PathBuf::from(prefix).join(format!("file {num}.rs"))).collect(); - - paths.sort(); - - b.iter(|| { - black_box(paths.as_mut_slice()).sort_unstable(); - }); -} - -#[bench] -#[cfg_attr(miri, ignore)] // Miri isn't fast... -fn bench_path_cmp_fast_path_long(b: &mut test::Bencher) { - let prefix = "/my/home/is/my/castle/and/my/castle/has/a/rusty/workbench/"; - let paths: Vec<_> = - (0..1000).map(|num| PathBuf::from(prefix).join(format!("file {num}.rs"))).collect(); - - let mut set = BTreeSet::new(); - - paths.iter().for_each(|p| { - set.insert(p.as_path()); - }); - - b.iter(|| { - set.remove(paths[500].as_path()); - set.insert(paths[500].as_path()); - }); -} - -#[bench] -#[cfg_attr(miri, ignore)] // Miri isn't fast... -fn bench_path_cmp_fast_path_short(b: &mut test::Bencher) { - let prefix = "my/home"; - let paths: Vec<_> = - (0..1000).map(|num| PathBuf::from(prefix).join(format!("file {num}.rs"))).collect(); - - let mut set = BTreeSet::new(); - - paths.iter().for_each(|p| { - set.insert(p.as_path()); - }); - - b.iter(|| { - set.remove(paths[500].as_path()); - set.insert(paths[500].as_path()); - }); -} - -#[bench] -#[cfg_attr(miri, ignore)] // Miri isn't fast... -fn bench_path_hashset(b: &mut test::Bencher) { - let prefix = "/my/home/is/my/castle/and/my/castle/has/a/rusty/workbench/"; - let paths: Vec<_> = - (0..1000).map(|num| PathBuf::from(prefix).join(format!("file {num}.rs"))).collect(); - - let mut set = HashSet::new(); - - paths.iter().for_each(|p| { - set.insert(p.as_path()); - }); - - b.iter(|| { - set.remove(paths[500].as_path()); - set.insert(black_box(paths[500].as_path())) - }); -} - -#[bench] -#[cfg_attr(miri, ignore)] // Miri isn't fast... -fn bench_path_hashset_miss(b: &mut test::Bencher) { - let prefix = "/my/home/is/my/castle/and/my/castle/has/a/rusty/workbench/"; - let paths: Vec<_> = - (0..1000).map(|num| PathBuf::from(prefix).join(format!("file {num}.rs"))).collect(); - - let mut set = HashSet::new(); - - paths.iter().for_each(|p| { - set.insert(p.as_path()); - }); - - let probe = PathBuf::from(prefix).join("other"); - - b.iter(|| set.remove(black_box(probe.as_path()))); -} - -#[bench] -fn bench_hash_path_short(b: &mut test::Bencher) { - let mut hasher = DefaultHasher::new(); - let path = Path::new("explorer.exe"); - - b.iter(|| black_box(path).hash(&mut hasher)); - - black_box(hasher.finish()); -} - -#[bench] -fn bench_hash_path_long(b: &mut test::Bencher) { - let mut hasher = DefaultHasher::new(); - let path = - Path::new("/aaaaa/aaaaaa/./../aaaaaaaa/bbbbbbbbbbbbb/ccccccccccc/ddddddddd/eeeeeee.fff"); - - b.iter(|| black_box(path).hash(&mut hasher)); - - black_box(hasher.finish()); -} - -#[test] -fn clone_to_uninit() { - let a = Path::new("hello.txt"); - - let mut storage = vec![MaybeUninit::::uninit(); size_of_val::(a)]; - unsafe { a.clone_to_uninit(ptr::from_mut::<[_]>(storage.as_mut_slice()) as *mut Path) }; - assert_eq!(a.as_os_str().as_encoded_bytes(), unsafe { - MaybeUninit::slice_assume_init_ref(&storage) - }); - - let mut b: Box = Path::new("world.exe").into(); - assert_eq!(size_of_val::(a), size_of_val::(&b)); - assert_ne!(a, &*b); - unsafe { a.clone_to_uninit(ptr::from_mut::(&mut b)) }; - assert_eq!(a, &*b); -} diff --git a/library/std/src/sys/mod.rs b/library/std/src/sys/mod.rs index 1ef17dd530fd2..9cb01770f92e0 100644 --- a/library/std/src/sys/mod.rs +++ b/library/std/src/sys/mod.rs @@ -12,7 +12,6 @@ pub mod anonymous_pipe; pub mod backtrace; pub mod cmath; pub mod exit_guard; -pub mod os_str; pub mod path; pub mod sync; pub mod thread_local; diff --git a/library/std/src/sys/os_str/bytes.rs b/library/std/src/sys/os_str/bytes.rs deleted file mode 100644 index 992767211d083..0000000000000 --- a/library/std/src/sys/os_str/bytes.rs +++ /dev/null @@ -1,360 +0,0 @@ -//! The underlying OsString/OsStr implementation on Unix and many other -//! systems: just a `Vec`/`[u8]`. - -use core::clone::CloneToUninit; -use core::ptr::addr_of_mut; - -use crate::borrow::Cow; -use crate::collections::TryReserveError; -use crate::fmt::Write; -use crate::rc::Rc; -use crate::sync::Arc; -use crate::sys_common::{AsInner, IntoInner}; -use crate::{fmt, mem, str}; - -#[cfg(test)] -mod tests; - -#[derive(Hash)] -#[repr(transparent)] -pub struct Buf { - pub inner: Vec, -} - -#[repr(transparent)] -pub struct Slice { - pub inner: [u8], -} - -impl fmt::Debug for Slice { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Debug::fmt(&self.inner.utf8_chunks().debug(), f) - } -} - -impl fmt::Display for Slice { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - // If we're the empty string then our iterator won't actually yield - // anything, so perform the formatting manually - if self.inner.is_empty() { - return "".fmt(f); - } - - for chunk in self.inner.utf8_chunks() { - let valid = chunk.valid(); - // If we successfully decoded the whole chunk as a valid string then - // we can return a direct formatting of the string which will also - // respect various formatting flags if possible. - if chunk.invalid().is_empty() { - return valid.fmt(f); - } - - f.write_str(valid)?; - f.write_char(char::REPLACEMENT_CHARACTER)?; - } - Ok(()) - } -} - -impl fmt::Debug for Buf { - fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Debug::fmt(self.as_slice(), formatter) - } -} - -impl fmt::Display for Buf { - fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(self.as_slice(), formatter) - } -} - -impl Clone for Buf { - #[inline] - fn clone(&self) -> Self { - Buf { inner: self.inner.clone() } - } - - #[inline] - fn clone_from(&mut self, source: &Self) { - self.inner.clone_from(&source.inner) - } -} - -impl IntoInner> for Buf { - fn into_inner(self) -> Vec { - self.inner - } -} - -impl AsInner<[u8]> for Buf { - #[inline] - fn as_inner(&self) -> &[u8] { - &self.inner - } -} - -impl Buf { - #[inline] - pub fn into_encoded_bytes(self) -> Vec { - self.inner - } - - #[inline] - pub unsafe fn from_encoded_bytes_unchecked(s: Vec) -> Self { - Self { inner: s } - } - - pub fn from_string(s: String) -> Buf { - Buf { inner: s.into_bytes() } - } - - #[inline] - pub fn with_capacity(capacity: usize) -> Buf { - Buf { inner: Vec::with_capacity(capacity) } - } - - #[inline] - pub fn clear(&mut self) { - self.inner.clear() - } - - #[inline] - pub fn capacity(&self) -> usize { - self.inner.capacity() - } - - #[inline] - pub fn reserve(&mut self, additional: usize) { - self.inner.reserve(additional) - } - - #[inline] - pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> { - self.inner.try_reserve(additional) - } - - #[inline] - pub fn reserve_exact(&mut self, additional: usize) { - self.inner.reserve_exact(additional) - } - - #[inline] - pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> { - self.inner.try_reserve_exact(additional) - } - - #[inline] - pub fn shrink_to_fit(&mut self) { - self.inner.shrink_to_fit() - } - - #[inline] - pub fn shrink_to(&mut self, min_capacity: usize) { - self.inner.shrink_to(min_capacity) - } - - #[inline] - pub fn as_slice(&self) -> &Slice { - // SAFETY: Slice just wraps [u8], - // and &*self.inner is &[u8], therefore - // transmuting &[u8] to &Slice is safe. - unsafe { mem::transmute(&*self.inner) } - } - - #[inline] - pub fn as_mut_slice(&mut self) -> &mut Slice { - // SAFETY: Slice just wraps [u8], - // and &mut *self.inner is &mut [u8], therefore - // transmuting &mut [u8] to &mut Slice is safe. - unsafe { mem::transmute(&mut *self.inner) } - } - - pub fn into_string(self) -> Result { - String::from_utf8(self.inner).map_err(|p| Buf { inner: p.into_bytes() }) - } - - pub fn push_slice(&mut self, s: &Slice) { - self.inner.extend_from_slice(&s.inner) - } - - #[inline] - pub fn leak<'a>(self) -> &'a mut Slice { - unsafe { mem::transmute(self.inner.leak()) } - } - - #[inline] - pub fn into_box(self) -> Box { - unsafe { mem::transmute(self.inner.into_boxed_slice()) } - } - - #[inline] - pub fn from_box(boxed: Box) -> Buf { - let inner: Box<[u8]> = unsafe { mem::transmute(boxed) }; - Buf { inner: inner.into_vec() } - } - - #[inline] - pub fn into_arc(&self) -> Arc { - self.as_slice().into_arc() - } - - #[inline] - pub fn into_rc(&self) -> Rc { - self.as_slice().into_rc() - } - - /// Provides plumbing to core `Vec::truncate`. - /// More well behaving alternative to allowing outer types - /// full mutable access to the core `Vec`. - #[inline] - pub(crate) fn truncate(&mut self, len: usize) { - self.inner.truncate(len); - } - - /// Provides plumbing to core `Vec::extend_from_slice`. - /// More well behaving alternative to allowing outer types - /// full mutable access to the core `Vec`. - #[inline] - pub(crate) fn extend_from_slice(&mut self, other: &[u8]) { - self.inner.extend_from_slice(other); - } -} - -impl Slice { - #[inline] - pub fn as_encoded_bytes(&self) -> &[u8] { - &self.inner - } - - #[inline] - pub unsafe fn from_encoded_bytes_unchecked(s: &[u8]) -> &Slice { - unsafe { mem::transmute(s) } - } - - #[track_caller] - #[inline] - pub fn check_public_boundary(&self, index: usize) { - if index == 0 || index == self.inner.len() { - return; - } - if index < self.inner.len() - && (self.inner[index - 1].is_ascii() || self.inner[index].is_ascii()) - { - return; - } - - slow_path(&self.inner, index); - - /// We're betting that typical splits will involve an ASCII character. - /// - /// Putting the expensive checks in a separate function generates notably - /// better assembly. - #[track_caller] - #[inline(never)] - fn slow_path(bytes: &[u8], index: usize) { - let (before, after) = bytes.split_at(index); - - // UTF-8 takes at most 4 bytes per codepoint, so we don't - // need to check more than that. - let after = after.get(..4).unwrap_or(after); - match str::from_utf8(after) { - Ok(_) => return, - Err(err) if err.valid_up_to() != 0 => return, - Err(_) => (), - } - - for len in 2..=4.min(index) { - let before = &before[index - len..]; - if str::from_utf8(before).is_ok() { - return; - } - } - - panic!("byte index {index} is not an OsStr boundary"); - } - } - - #[inline] - pub fn from_str(s: &str) -> &Slice { - unsafe { Slice::from_encoded_bytes_unchecked(s.as_bytes()) } - } - - pub fn to_str(&self) -> Result<&str, crate::str::Utf8Error> { - str::from_utf8(&self.inner) - } - - pub fn to_string_lossy(&self) -> Cow<'_, str> { - String::from_utf8_lossy(&self.inner) - } - - pub fn to_owned(&self) -> Buf { - Buf { inner: self.inner.to_vec() } - } - - pub fn clone_into(&self, buf: &mut Buf) { - self.inner.clone_into(&mut buf.inner) - } - - #[inline] - pub fn into_box(&self) -> Box { - let boxed: Box<[u8]> = self.inner.into(); - unsafe { mem::transmute(boxed) } - } - - pub fn empty_box() -> Box { - let boxed: Box<[u8]> = Default::default(); - unsafe { mem::transmute(boxed) } - } - - #[inline] - pub fn into_arc(&self) -> Arc { - let arc: Arc<[u8]> = Arc::from(&self.inner); - unsafe { Arc::from_raw(Arc::into_raw(arc) as *const Slice) } - } - - #[inline] - pub fn into_rc(&self) -> Rc { - let rc: Rc<[u8]> = Rc::from(&self.inner); - unsafe { Rc::from_raw(Rc::into_raw(rc) as *const Slice) } - } - - #[inline] - pub fn make_ascii_lowercase(&mut self) { - self.inner.make_ascii_lowercase() - } - - #[inline] - pub fn make_ascii_uppercase(&mut self) { - self.inner.make_ascii_uppercase() - } - - #[inline] - pub fn to_ascii_lowercase(&self) -> Buf { - Buf { inner: self.inner.to_ascii_lowercase() } - } - - #[inline] - pub fn to_ascii_uppercase(&self) -> Buf { - Buf { inner: self.inner.to_ascii_uppercase() } - } - - #[inline] - pub fn is_ascii(&self) -> bool { - self.inner.is_ascii() - } - - #[inline] - pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { - self.inner.eq_ignore_ascii_case(&other.inner) - } -} - -#[unstable(feature = "clone_to_uninit", issue = "126799")] -unsafe impl CloneToUninit for Slice { - #[inline] - #[cfg_attr(debug_assertions, track_caller)] - unsafe fn clone_to_uninit(&self, dst: *mut Self) { - // SAFETY: we're just a wrapper around [u8] - unsafe { self.inner.clone_to_uninit(addr_of_mut!((*dst).inner)) } - } -} diff --git a/library/std/src/sys/os_str/mod.rs b/library/std/src/sys/os_str/mod.rs deleted file mode 100644 index 345e661586d03..0000000000000 --- a/library/std/src/sys/os_str/mod.rs +++ /dev/null @@ -1,14 +0,0 @@ -#![forbid(unsafe_op_in_unsafe_fn)] - -cfg_if::cfg_if! { - if #[cfg(any( - target_os = "windows", - target_os = "uefi", - ))] { - mod wtf8; - pub use wtf8::{Buf, Slice}; - } else { - mod bytes; - pub use bytes::{Buf, Slice}; - } -} diff --git a/library/std/src/sys/os_str/wtf8.rs b/library/std/src/sys/os_str/wtf8.rs deleted file mode 100644 index 433237aa6e7bf..0000000000000 --- a/library/std/src/sys/os_str/wtf8.rs +++ /dev/null @@ -1,283 +0,0 @@ -//! The underlying OsString/OsStr implementation on Windows is a -//! wrapper around the "WTF-8" encoding; see the `wtf8` module for more. -use core::clone::CloneToUninit; -use core::ptr::addr_of_mut; - -use crate::borrow::Cow; -use crate::collections::TryReserveError; -use crate::rc::Rc; -use crate::sync::Arc; -use crate::sys_common::wtf8::{check_utf8_boundary, Wtf8, Wtf8Buf}; -use crate::sys_common::{AsInner, FromInner, IntoInner}; -use crate::{fmt, mem}; - -#[derive(Clone, Hash)] -pub struct Buf { - pub inner: Wtf8Buf, -} - -impl IntoInner for Buf { - fn into_inner(self) -> Wtf8Buf { - self.inner - } -} - -impl FromInner for Buf { - fn from_inner(inner: Wtf8Buf) -> Self { - Buf { inner } - } -} - -impl AsInner for Buf { - #[inline] - fn as_inner(&self) -> &Wtf8 { - &self.inner - } -} - -impl fmt::Debug for Buf { - fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Debug::fmt(self.as_slice(), formatter) - } -} - -impl fmt::Display for Buf { - fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(self.as_slice(), formatter) - } -} - -#[repr(transparent)] -pub struct Slice { - pub inner: Wtf8, -} - -impl fmt::Debug for Slice { - fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Debug::fmt(&self.inner, formatter) - } -} - -impl fmt::Display for Slice { - fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(&self.inner, formatter) - } -} - -impl Buf { - #[inline] - pub fn into_encoded_bytes(self) -> Vec { - self.inner.into_bytes() - } - - #[inline] - pub unsafe fn from_encoded_bytes_unchecked(s: Vec) -> Self { - unsafe { Self { inner: Wtf8Buf::from_bytes_unchecked(s) } } - } - - pub fn with_capacity(capacity: usize) -> Buf { - Buf { inner: Wtf8Buf::with_capacity(capacity) } - } - - pub fn clear(&mut self) { - self.inner.clear() - } - - pub fn capacity(&self) -> usize { - self.inner.capacity() - } - - pub fn from_string(s: String) -> Buf { - Buf { inner: Wtf8Buf::from_string(s) } - } - - pub fn as_slice(&self) -> &Slice { - // SAFETY: Slice is just a wrapper for Wtf8, - // and self.inner.as_slice() returns &Wtf8. - // Therefore, transmuting &Wtf8 to &Slice is safe. - unsafe { mem::transmute(self.inner.as_slice()) } - } - - pub fn as_mut_slice(&mut self) -> &mut Slice { - // SAFETY: Slice is just a wrapper for Wtf8, - // and self.inner.as_mut_slice() returns &mut Wtf8. - // Therefore, transmuting &mut Wtf8 to &mut Slice is safe. - // Additionally, care should be taken to ensure the slice - // is always valid Wtf8. - unsafe { mem::transmute(self.inner.as_mut_slice()) } - } - - pub fn into_string(self) -> Result { - self.inner.into_string().map_err(|buf| Buf { inner: buf }) - } - - pub fn push_slice(&mut self, s: &Slice) { - self.inner.push_wtf8(&s.inner) - } - - pub fn reserve(&mut self, additional: usize) { - self.inner.reserve(additional) - } - - pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> { - self.inner.try_reserve(additional) - } - - pub fn reserve_exact(&mut self, additional: usize) { - self.inner.reserve_exact(additional) - } - - pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> { - self.inner.try_reserve_exact(additional) - } - - pub fn shrink_to_fit(&mut self) { - self.inner.shrink_to_fit() - } - - #[inline] - pub fn shrink_to(&mut self, min_capacity: usize) { - self.inner.shrink_to(min_capacity) - } - - #[inline] - pub fn leak<'a>(self) -> &'a mut Slice { - unsafe { mem::transmute(self.inner.leak()) } - } - - #[inline] - pub fn into_box(self) -> Box { - unsafe { mem::transmute(self.inner.into_box()) } - } - - #[inline] - pub fn from_box(boxed: Box) -> Buf { - let inner: Box = unsafe { mem::transmute(boxed) }; - Buf { inner: Wtf8Buf::from_box(inner) } - } - - #[inline] - pub fn into_arc(&self) -> Arc { - self.as_slice().into_arc() - } - - #[inline] - pub fn into_rc(&self) -> Rc { - self.as_slice().into_rc() - } - - /// Provides plumbing to core `Vec::truncate`. - /// More well behaving alternative to allowing outer types - /// full mutable access to the core `Vec`. - #[inline] - pub(crate) fn truncate(&mut self, len: usize) { - self.inner.truncate(len); - } - - /// Provides plumbing to core `Vec::extend_from_slice`. - /// More well behaving alternative to allowing outer types - /// full mutable access to the core `Vec`. - #[inline] - pub(crate) fn extend_from_slice(&mut self, other: &[u8]) { - self.inner.extend_from_slice(other); - } -} - -impl Slice { - #[inline] - pub fn as_encoded_bytes(&self) -> &[u8] { - self.inner.as_bytes() - } - - #[inline] - pub unsafe fn from_encoded_bytes_unchecked(s: &[u8]) -> &Slice { - unsafe { mem::transmute(Wtf8::from_bytes_unchecked(s)) } - } - - #[track_caller] - pub fn check_public_boundary(&self, index: usize) { - check_utf8_boundary(&self.inner, index); - } - - #[inline] - pub fn from_str(s: &str) -> &Slice { - unsafe { mem::transmute(Wtf8::from_str(s)) } - } - - pub fn to_str(&self) -> Result<&str, crate::str::Utf8Error> { - self.inner.as_str() - } - - pub fn to_string_lossy(&self) -> Cow<'_, str> { - self.inner.to_string_lossy() - } - - pub fn to_owned(&self) -> Buf { - Buf { inner: self.inner.to_owned() } - } - - pub fn clone_into(&self, buf: &mut Buf) { - self.inner.clone_into(&mut buf.inner) - } - - #[inline] - pub fn into_box(&self) -> Box { - unsafe { mem::transmute(self.inner.into_box()) } - } - - pub fn empty_box() -> Box { - unsafe { mem::transmute(Wtf8::empty_box()) } - } - - #[inline] - pub fn into_arc(&self) -> Arc { - let arc = self.inner.into_arc(); - unsafe { Arc::from_raw(Arc::into_raw(arc) as *const Slice) } - } - - #[inline] - pub fn into_rc(&self) -> Rc { - let rc = self.inner.into_rc(); - unsafe { Rc::from_raw(Rc::into_raw(rc) as *const Slice) } - } - - #[inline] - pub fn make_ascii_lowercase(&mut self) { - self.inner.make_ascii_lowercase() - } - - #[inline] - pub fn make_ascii_uppercase(&mut self) { - self.inner.make_ascii_uppercase() - } - - #[inline] - pub fn to_ascii_lowercase(&self) -> Buf { - Buf { inner: self.inner.to_ascii_lowercase() } - } - - #[inline] - pub fn to_ascii_uppercase(&self) -> Buf { - Buf { inner: self.inner.to_ascii_uppercase() } - } - - #[inline] - pub fn is_ascii(&self) -> bool { - self.inner.is_ascii() - } - - #[inline] - pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { - self.inner.eq_ignore_ascii_case(&other.inner) - } -} - -#[unstable(feature = "clone_to_uninit", issue = "126799")] -unsafe impl CloneToUninit for Slice { - #[inline] - #[cfg_attr(debug_assertions, track_caller)] - unsafe fn clone_to_uninit(&self, dst: *mut Self) { - // SAFETY: we're just a wrapper around Wtf8 - unsafe { self.inner.clone_to_uninit(addr_of_mut!((*dst).inner)) } - } -} diff --git a/library/std/src/sys/pal/unix/fs.rs b/library/std/src/sys/pal/unix/fs.rs index 7fa147c9754b9..5e12e58fca83a 100644 --- a/library/std/src/sys/pal/unix/fs.rs +++ b/library/std/src/sys/pal/unix/fs.rs @@ -4,6 +4,9 @@ #[cfg(test)] mod tests; +use alloc::ffi::os_str::os_str_ext_unix::OsStringExt; +use core::ffi::os_str::os_str_ext_unix::OsStrExt; + #[cfg(all(target_os = "linux", target_env = "gnu"))] use libc::c_char; #[cfg(any( diff --git a/library/std/src/sys/pal/unix/os.rs b/library/std/src/sys/pal/unix/os.rs index a785b97ac8dc5..8514a1fbb9c8b 100644 --- a/library/std/src/sys/pal/unix/os.rs +++ b/library/std/src/sys/pal/unix/os.rs @@ -5,6 +5,8 @@ #[cfg(test)] mod tests; +use alloc::ffi::os_str::os_str_ext_unix::OsStringExt; +use core::ffi::os_str::os_str_ext_unix::OsStrExt; use core::slice::memchr; use libc::{c_char, c_int, c_void}; diff --git a/library/std/src/sys/pal/unix/process/process_common.rs b/library/std/src/sys/pal/unix/process/process_common.rs index fec825054195a..af96db64da2f9 100644 --- a/library/std/src/sys/pal/unix/process/process_common.rs +++ b/library/std/src/sys/pal/unix/process/process_common.rs @@ -1,6 +1,9 @@ #[cfg(all(test, not(target_os = "emscripten")))] mod tests; +use alloc::ffi::os_str::os_str_ext_unix::OsStringExt; +use core::ffi::os_str::os_str_ext_unix::OsStrExt; + use libc::{c_char, c_int, gid_t, pid_t, uid_t, EXIT_FAILURE, EXIT_SUCCESS}; use crate::collections::BTreeMap; diff --git a/library/std/src/sys/pal/windows/args.rs b/library/std/src/sys/pal/windows/args.rs index 66e75a8357149..efd6b63030b84 100644 --- a/library/std/src/sys/pal/windows/args.rs +++ b/library/std/src/sys/pal/windows/args.rs @@ -15,7 +15,6 @@ use crate::sys::path::get_long_path; use crate::sys::process::ensure_no_nuls; use crate::sys::{c, to_u16s}; use crate::sys_common::wstr::WStrUnits; -use crate::sys_common::AsInner; use crate::{fmt, io, iter, vec}; /// This is the const equivalent to `NonZero::new(n).unwrap()` diff --git a/library/std/src/sys/path/mod.rs b/library/std/src/sys/path/mod.rs index 24a94ec782824..2203e3e942d36 100644 --- a/library/std/src/sys/path/mod.rs +++ b/library/std/src/sys/path/mod.rs @@ -1,18 +1,18 @@ cfg_if::cfg_if! { if #[cfg(target_os = "windows")] { mod windows; - pub use windows::*; + pub(crate) use windows::*; } else if #[cfg(all(target_vendor = "fortanix", target_env = "sgx"))] { mod sgx; - pub use sgx::*; + pub(crate) use sgx::*; } else if #[cfg(any( target_os = "uefi", target_os = "solid_asp3", ))] { mod unsupported_backslash; - pub use unsupported_backslash::*; + pub(crate) use unsupported_backslash::*; } else { mod unix; - pub use unix::*; + pub(crate) use unix::*; } } diff --git a/library/std/src/sys/path/sgx.rs b/library/std/src/sys/path/sgx.rs index c805c15e70245..31c1ce3af7e9c 100644 --- a/library/std/src/sys/path/sgx.rs +++ b/library/std/src/sys/path/sgx.rs @@ -1,25 +1,7 @@ -use crate::ffi::OsStr; use crate::io; -use crate::path::{Path, PathBuf, Prefix}; +use crate::path::{Path, PathBuf}; use crate::sys::unsupported; -#[inline] -pub fn is_sep_byte(b: u8) -> bool { - b == b'/' -} - -#[inline] -pub fn is_verbatim_sep(b: u8) -> bool { - b == b'/' -} - -pub fn parse_prefix(_: &OsStr) -> Option> { - None -} - -pub const MAIN_SEP_STR: &str = "/"; -pub const MAIN_SEP: char = '/'; - pub(crate) fn absolute(_path: &Path) -> io::Result { unsupported() } diff --git a/library/std/src/sys/path/unix.rs b/library/std/src/sys/path/unix.rs index 2a7c025c3c46a..4e13c23cb3b67 100644 --- a/library/std/src/sys/path/unix.rs +++ b/library/std/src/sys/path/unix.rs @@ -1,25 +1,6 @@ -use crate::ffi::OsStr; -use crate::path::{Path, PathBuf, Prefix}; +use crate::path::{Path, PathBuf}; use crate::{env, io}; -#[inline] -pub fn is_sep_byte(b: u8) -> bool { - b == b'/' -} - -#[inline] -pub fn is_verbatim_sep(b: u8) -> bool { - b == b'/' -} - -#[inline] -pub fn parse_prefix(_: &OsStr) -> Option> { - None -} - -pub const MAIN_SEP_STR: &str = "/"; -pub const MAIN_SEP: char = '/'; - /// Make a POSIX path absolute without changing its semantics. pub(crate) fn absolute(path: &Path) -> io::Result { // This is mostly a wrapper around collecting `Path::components`, with diff --git a/library/std/src/sys/path/unsupported_backslash.rs b/library/std/src/sys/path/unsupported_backslash.rs index 855f443678c6c..427d24353ed6d 100644 --- a/library/std/src/sys/path/unsupported_backslash.rs +++ b/library/std/src/sys/path/unsupported_backslash.rs @@ -1,26 +1,8 @@ #![forbid(unsafe_op_in_unsafe_fn)] -use crate::ffi::OsStr; use crate::io; -use crate::path::{Path, PathBuf, Prefix}; +use crate::path::{Path, PathBuf}; use crate::sys::unsupported; -#[inline] -pub fn is_sep_byte(b: u8) -> bool { - b == b'\\' -} - -#[inline] -pub fn is_verbatim_sep(b: u8) -> bool { - b == b'\\' -} - -pub fn parse_prefix(_: &OsStr) -> Option> { - None -} - -pub const MAIN_SEP_STR: &str = "\\"; -pub const MAIN_SEP: char = '\\'; - pub(crate) fn absolute(_path: &Path) -> io::Result { unsupported() } diff --git a/library/std/src/sys/path/windows.rs b/library/std/src/sys/path/windows.rs index 21841eb18cc0e..8dd1ce4479870 100644 --- a/library/std/src/sys/path/windows.rs +++ b/library/std/src/sys/path/windows.rs @@ -1,24 +1,10 @@ +use core::path::*; + use crate::ffi::{OsStr, OsString}; -use crate::path::{Path, PathBuf, Prefix}; +use crate::path::PathBuf; use crate::sys::pal::{c, fill_utf16_buf, os2path, to_u16s}; use crate::{io, ptr}; -#[cfg(test)] -mod tests; - -pub const MAIN_SEP_STR: &str = "\\"; -pub const MAIN_SEP: char = '\\'; - -#[inline] -pub fn is_sep_byte(b: u8) -> bool { - b == b'/' || b == b'\\' -} - -#[inline] -pub fn is_verbatim_sep(b: u8) -> bool { - b == b'\\' -} - /// Returns true if `path` looks like a lone filename. pub(crate) fn is_file_name(path: &OsStr) -> bool { !path.as_encoded_bytes().iter().copied().any(is_sep_byte) diff --git a/library/std/src/sys_common/mod.rs b/library/std/src/sys_common/mod.rs index 1c884f107beeb..66e4208e1f2ab 100644 --- a/library/std/src/sys_common/mod.rs +++ b/library/std/src/sys_common/mod.rs @@ -25,7 +25,6 @@ pub mod io; pub mod lazy_box; pub mod process; pub mod wstr; -pub mod wtf8; cfg_if::cfg_if! { if #[cfg(any( diff --git a/library/std/src/sys_common/wtf8.rs b/library/std/src/sys_common/wtf8.rs deleted file mode 100644 index 063451ad54e1c..0000000000000 --- a/library/std/src/sys_common/wtf8.rs +++ /dev/null @@ -1,1060 +0,0 @@ -//! Implementation of [the WTF-8 encoding](https://simonsapin.github.io/wtf-8/). -//! -//! This library uses Rust’s type system to maintain -//! [well-formedness](https://simonsapin.github.io/wtf-8/#well-formed), -//! like the `String` and `&str` types do for UTF-8. -//! -//! Since [WTF-8 must not be used -//! for interchange](https://simonsapin.github.io/wtf-8/#intended-audience), -//! this library deliberately does not provide access to the underlying bytes -//! of WTF-8 strings, -//! nor can it decode WTF-8 from arbitrary bytes. -//! WTF-8 strings can be obtained from UTF-8, UTF-16, or code points. - -// this module is imported from @SimonSapin's repo and has tons of dead code on -// unix (it's mostly used on windows), so don't worry about dead code here. -#![allow(dead_code)] - -#[cfg(test)] -mod tests; - -use core::char::{encode_utf16_raw, encode_utf8_raw}; -use core::clone::CloneToUninit; -use core::str::next_code_point; - -use crate::borrow::Cow; -use crate::collections::TryReserveError; -use crate::hash::{Hash, Hasher}; -use crate::iter::FusedIterator; -use crate::ptr::addr_of_mut; -use crate::rc::Rc; -use crate::sync::Arc; -use crate::sys_common::AsInner; -use crate::{fmt, mem, ops, slice, str}; - -const UTF8_REPLACEMENT_CHARACTER: &str = "\u{FFFD}"; - -/// A Unicode code point: from U+0000 to U+10FFFF. -/// -/// Compares with the `char` type, -/// which represents a Unicode scalar value: -/// a code point that is not a surrogate (U+D800 to U+DFFF). -#[derive(Eq, PartialEq, Ord, PartialOrd, Clone, Copy)] -pub struct CodePoint { - value: u32, -} - -/// Format the code point as `U+` followed by four to six hexadecimal digits. -/// Example: `U+1F4A9` -impl fmt::Debug for CodePoint { - #[inline] - fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(formatter, "U+{:04X}", self.value) - } -} - -impl CodePoint { - /// Unsafely creates a new `CodePoint` without checking the value. - /// - /// Only use when `value` is known to be less than or equal to 0x10FFFF. - #[inline] - pub unsafe fn from_u32_unchecked(value: u32) -> CodePoint { - CodePoint { value } - } - - /// Creates a new `CodePoint` if the value is a valid code point. - /// - /// Returns `None` if `value` is above 0x10FFFF. - #[inline] - pub fn from_u32(value: u32) -> Option { - match value { - 0..=0x10FFFF => Some(CodePoint { value }), - _ => None, - } - } - - /// Creates a new `CodePoint` from a `char`. - /// - /// Since all Unicode scalar values are code points, this always succeeds. - #[inline] - pub fn from_char(value: char) -> CodePoint { - CodePoint { value: value as u32 } - } - - /// Returns the numeric value of the code point. - #[inline] - pub fn to_u32(&self) -> u32 { - self.value - } - - /// Returns the numeric value of the code point if it is a leading surrogate. - #[inline] - pub fn to_lead_surrogate(&self) -> Option { - match self.value { - lead @ 0xD800..=0xDBFF => Some(lead as u16), - _ => None, - } - } - - /// Returns the numeric value of the code point if it is a trailing surrogate. - #[inline] - pub fn to_trail_surrogate(&self) -> Option { - match self.value { - trail @ 0xDC00..=0xDFFF => Some(trail as u16), - _ => None, - } - } - - /// Optionally returns a Unicode scalar value for the code point. - /// - /// Returns `None` if the code point is a surrogate (from U+D800 to U+DFFF). - #[inline] - pub fn to_char(&self) -> Option { - match self.value { - 0xD800..=0xDFFF => None, - _ => Some(unsafe { char::from_u32_unchecked(self.value) }), - } - } - - /// Returns a Unicode scalar value for the code point. - /// - /// Returns `'\u{FFFD}'` (the replacement character “�”) - /// if the code point is a surrogate (from U+D800 to U+DFFF). - #[inline] - pub fn to_char_lossy(&self) -> char { - self.to_char().unwrap_or('\u{FFFD}') - } -} - -/// An owned, growable string of well-formed WTF-8 data. -/// -/// Similar to `String`, but can additionally contain surrogate code points -/// if they’re not in a surrogate pair. -#[derive(Eq, PartialEq, Ord, PartialOrd, Clone)] -pub struct Wtf8Buf { - bytes: Vec, - - /// Do we know that `bytes` holds a valid UTF-8 encoding? We can easily - /// know this if we're constructed from a `String` or `&str`. - /// - /// It is possible for `bytes` to have valid UTF-8 without this being - /// set, such as when we're concatenating `&Wtf8`'s and surrogates become - /// paired, as we don't bother to rescan the entire string. - is_known_utf8: bool, -} - -impl ops::Deref for Wtf8Buf { - type Target = Wtf8; - - fn deref(&self) -> &Wtf8 { - self.as_slice() - } -} - -impl ops::DerefMut for Wtf8Buf { - fn deref_mut(&mut self) -> &mut Wtf8 { - self.as_mut_slice() - } -} - -/// Format the string with double quotes, -/// and surrogates as `\u` followed by four hexadecimal digits. -/// Example: `"a\u{D800}"` for a string with code points [U+0061, U+D800] -impl fmt::Debug for Wtf8Buf { - #[inline] - fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Debug::fmt(&**self, formatter) - } -} - -impl Wtf8Buf { - /// Creates a new, empty WTF-8 string. - #[inline] - pub fn new() -> Wtf8Buf { - Wtf8Buf { bytes: Vec::new(), is_known_utf8: true } - } - - /// Creates a new, empty WTF-8 string with pre-allocated capacity for `capacity` bytes. - #[inline] - pub fn with_capacity(capacity: usize) -> Wtf8Buf { - Wtf8Buf { bytes: Vec::with_capacity(capacity), is_known_utf8: true } - } - - /// Creates a WTF-8 string from a WTF-8 byte vec. - /// - /// Since the byte vec is not checked for valid WTF-8, this functions is - /// marked unsafe. - #[inline] - pub unsafe fn from_bytes_unchecked(value: Vec) -> Wtf8Buf { - Wtf8Buf { bytes: value, is_known_utf8: false } - } - - /// Creates a WTF-8 string from a UTF-8 `String`. - /// - /// This takes ownership of the `String` and does not copy. - /// - /// Since WTF-8 is a superset of UTF-8, this always succeeds. - #[inline] - pub fn from_string(string: String) -> Wtf8Buf { - Wtf8Buf { bytes: string.into_bytes(), is_known_utf8: true } - } - - /// Creates a WTF-8 string from a UTF-8 `&str` slice. - /// - /// This copies the content of the slice. - /// - /// Since WTF-8 is a superset of UTF-8, this always succeeds. - #[inline] - pub fn from_str(str: &str) -> Wtf8Buf { - Wtf8Buf { bytes: <[_]>::to_vec(str.as_bytes()), is_known_utf8: true } - } - - pub fn clear(&mut self) { - self.bytes.clear(); - self.is_known_utf8 = true; - } - - /// Creates a WTF-8 string from a potentially ill-formed UTF-16 slice of 16-bit code units. - /// - /// This is lossless: calling `.encode_wide()` on the resulting string - /// will always return the original code units. - pub fn from_wide(v: &[u16]) -> Wtf8Buf { - let mut string = Wtf8Buf::with_capacity(v.len()); - for item in char::decode_utf16(v.iter().cloned()) { - match item { - Ok(ch) => string.push_char(ch), - Err(surrogate) => { - let surrogate = surrogate.unpaired_surrogate(); - // Surrogates are known to be in the code point range. - let code_point = unsafe { CodePoint::from_u32_unchecked(surrogate as u32) }; - // The string will now contain an unpaired surrogate. - string.is_known_utf8 = false; - // Skip the WTF-8 concatenation check, - // surrogate pairs are already decoded by decode_utf16 - string.push_code_point_unchecked(code_point); - } - } - } - string - } - - /// Copied from String::push - /// This does **not** include the WTF-8 concatenation check or `is_known_utf8` check. - fn push_code_point_unchecked(&mut self, code_point: CodePoint) { - let mut bytes = [0; 4]; - let bytes = encode_utf8_raw(code_point.value, &mut bytes); - self.bytes.extend_from_slice(bytes) - } - - #[inline] - pub fn as_slice(&self) -> &Wtf8 { - unsafe { Wtf8::from_bytes_unchecked(&self.bytes) } - } - - #[inline] - pub fn as_mut_slice(&mut self) -> &mut Wtf8 { - // Safety: `Wtf8` doesn't expose any way to mutate the bytes that would - // cause them to change from well-formed UTF-8 to ill-formed UTF-8, - // which would break the assumptions of the `is_known_utf8` field. - unsafe { Wtf8::from_mut_bytes_unchecked(&mut self.bytes) } - } - - /// Reserves capacity for at least `additional` more bytes to be inserted - /// in the given `Wtf8Buf`. - /// The collection may reserve more space to avoid frequent reallocations. - /// - /// # Panics - /// - /// Panics if the new capacity overflows `usize`. - #[inline] - pub fn reserve(&mut self, additional: usize) { - self.bytes.reserve(additional) - } - - /// Tries to reserve capacity for at least `additional` more length units - /// in the given `Wtf8Buf`. The `Wtf8Buf` may reserve more space to avoid - /// frequent reallocations. After calling `try_reserve`, capacity will be - /// greater than or equal to `self.len() + additional`. Does nothing if - /// capacity is already sufficient. This method preserves the contents even - /// if an error occurs. - /// - /// # Errors - /// - /// If the capacity overflows, or the allocator reports a failure, then an error - /// is returned. - #[inline] - pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> { - self.bytes.try_reserve(additional) - } - - #[inline] - pub fn reserve_exact(&mut self, additional: usize) { - self.bytes.reserve_exact(additional) - } - - /// Tries to reserve the minimum capacity for exactly `additional` - /// length units in the given `Wtf8Buf`. After calling - /// `try_reserve_exact`, capacity will be greater than or equal to - /// `self.len() + additional` if it returns `Ok(())`. - /// Does nothing if the capacity is already sufficient. - /// - /// Note that the allocator may give the `Wtf8Buf` more space than it - /// requests. Therefore, capacity can not be relied upon to be precisely - /// minimal. Prefer [`try_reserve`] if future insertions are expected. - /// - /// [`try_reserve`]: Wtf8Buf::try_reserve - /// - /// # Errors - /// - /// If the capacity overflows, or the allocator reports a failure, then an error - /// is returned. - #[inline] - pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> { - self.bytes.try_reserve_exact(additional) - } - - #[inline] - pub fn shrink_to_fit(&mut self) { - self.bytes.shrink_to_fit() - } - - #[inline] - pub fn shrink_to(&mut self, min_capacity: usize) { - self.bytes.shrink_to(min_capacity) - } - - #[inline] - pub fn leak<'a>(self) -> &'a mut Wtf8 { - unsafe { Wtf8::from_mut_bytes_unchecked(self.bytes.leak()) } - } - - /// Returns the number of bytes that this string buffer can hold without reallocating. - #[inline] - pub fn capacity(&self) -> usize { - self.bytes.capacity() - } - - /// Append a UTF-8 slice at the end of the string. - #[inline] - pub fn push_str(&mut self, other: &str) { - self.bytes.extend_from_slice(other.as_bytes()) - } - - /// Append a WTF-8 slice at the end of the string. - /// - /// This replaces newly paired surrogates at the boundary - /// with a supplementary code point, - /// like concatenating ill-formed UTF-16 strings effectively would. - #[inline] - pub fn push_wtf8(&mut self, other: &Wtf8) { - match ((&*self).final_lead_surrogate(), other.initial_trail_surrogate()) { - // Replace newly paired surrogates by a supplementary code point. - (Some(lead), Some(trail)) => { - let len_without_lead_surrogate = self.len() - 3; - self.bytes.truncate(len_without_lead_surrogate); - let other_without_trail_surrogate = &other.bytes[3..]; - // 4 bytes for the supplementary code point - self.bytes.reserve(4 + other_without_trail_surrogate.len()); - self.push_char(decode_surrogate_pair(lead, trail)); - self.bytes.extend_from_slice(other_without_trail_surrogate); - } - _ => { - // If we'll be pushing a string containing a surrogate, we may - // no longer have UTF-8. - if other.next_surrogate(0).is_some() { - self.is_known_utf8 = false; - } - - self.bytes.extend_from_slice(&other.bytes); - } - } - } - - /// Append a Unicode scalar value at the end of the string. - #[inline] - pub fn push_char(&mut self, c: char) { - self.push_code_point_unchecked(CodePoint::from_char(c)) - } - - /// Append a code point at the end of the string. - /// - /// This replaces newly paired surrogates at the boundary - /// with a supplementary code point, - /// like concatenating ill-formed UTF-16 strings effectively would. - #[inline] - pub fn push(&mut self, code_point: CodePoint) { - if let Some(trail) = code_point.to_trail_surrogate() { - if let Some(lead) = (&*self).final_lead_surrogate() { - let len_without_lead_surrogate = self.len() - 3; - self.bytes.truncate(len_without_lead_surrogate); - self.push_char(decode_surrogate_pair(lead, trail)); - return; - } - - // We're pushing a trailing surrogate. - self.is_known_utf8 = false; - } else if code_point.to_lead_surrogate().is_some() { - // We're pushing a leading surrogate. - self.is_known_utf8 = false; - } - - // No newly paired surrogates at the boundary. - self.push_code_point_unchecked(code_point) - } - - /// Shortens a string to the specified length. - /// - /// # Panics - /// - /// Panics if `new_len` > current length, - /// or if `new_len` is not a code point boundary. - #[inline] - pub fn truncate(&mut self, new_len: usize) { - assert!(is_code_point_boundary(self, new_len)); - self.bytes.truncate(new_len) - } - - /// Consumes the WTF-8 string and tries to convert it to a vec of bytes. - #[inline] - pub fn into_bytes(self) -> Vec { - self.bytes - } - - /// Consumes the WTF-8 string and tries to convert it to UTF-8. - /// - /// This does not copy the data. - /// - /// If the contents are not well-formed UTF-8 - /// (that is, if the string contains surrogates), - /// the original WTF-8 string is returned instead. - pub fn into_string(self) -> Result { - if self.is_known_utf8 || self.next_surrogate(0).is_none() { - Ok(unsafe { String::from_utf8_unchecked(self.bytes) }) - } else { - Err(self) - } - } - - /// Consumes the WTF-8 string and converts it lossily to UTF-8. - /// - /// This does not copy the data (but may overwrite parts of it in place). - /// - /// Surrogates are replaced with `"\u{FFFD}"` (the replacement character “�”) - pub fn into_string_lossy(mut self) -> String { - // Fast path: If we already have UTF-8, we can return it immediately. - if self.is_known_utf8 { - return unsafe { String::from_utf8_unchecked(self.bytes) }; - } - - let mut pos = 0; - loop { - match self.next_surrogate(pos) { - Some((surrogate_pos, _)) => { - pos = surrogate_pos + 3; - self.bytes[surrogate_pos..pos] - .copy_from_slice(UTF8_REPLACEMENT_CHARACTER.as_bytes()); - } - None => return unsafe { String::from_utf8_unchecked(self.bytes) }, - } - } - } - - /// Converts this `Wtf8Buf` into a boxed `Wtf8`. - #[inline] - pub fn into_box(self) -> Box { - // SAFETY: relies on `Wtf8` being `repr(transparent)`. - unsafe { mem::transmute(self.bytes.into_boxed_slice()) } - } - - /// Converts a `Box` into a `Wtf8Buf`. - pub fn from_box(boxed: Box) -> Wtf8Buf { - let bytes: Box<[u8]> = unsafe { mem::transmute(boxed) }; - Wtf8Buf { bytes: bytes.into_vec(), is_known_utf8: false } - } - - /// Provides plumbing to core `Vec::extend_from_slice`. - /// More well behaving alternative to allowing outer types - /// full mutable access to the core `Vec`. - #[inline] - pub(crate) fn extend_from_slice(&mut self, other: &[u8]) { - self.bytes.extend_from_slice(other); - self.is_known_utf8 = false; - } -} - -/// Creates a new WTF-8 string from an iterator of code points. -/// -/// This replaces surrogate code point pairs with supplementary code points, -/// like concatenating ill-formed UTF-16 strings effectively would. -impl FromIterator for Wtf8Buf { - fn from_iter>(iter: T) -> Wtf8Buf { - let mut string = Wtf8Buf::new(); - string.extend(iter); - string - } -} - -/// Append code points from an iterator to the string. -/// -/// This replaces surrogate code point pairs with supplementary code points, -/// like concatenating ill-formed UTF-16 strings effectively would. -impl Extend for Wtf8Buf { - fn extend>(&mut self, iter: T) { - let iterator = iter.into_iter(); - let (low, _high) = iterator.size_hint(); - // Lower bound of one byte per code point (ASCII only) - self.bytes.reserve(low); - iterator.for_each(move |code_point| self.push(code_point)); - } - - #[inline] - fn extend_one(&mut self, code_point: CodePoint) { - self.push(code_point); - } - - #[inline] - fn extend_reserve(&mut self, additional: usize) { - // Lower bound of one byte per code point (ASCII only) - self.bytes.reserve(additional); - } -} - -/// A borrowed slice of well-formed WTF-8 data. -/// -/// Similar to `&str`, but can additionally contain surrogate code points -/// if they’re not in a surrogate pair. -#[derive(Eq, Ord, PartialEq, PartialOrd)] -#[repr(transparent)] -pub struct Wtf8 { - bytes: [u8], -} - -impl AsInner<[u8]> for Wtf8 { - #[inline] - fn as_inner(&self) -> &[u8] { - &self.bytes - } -} - -/// Format the slice with double quotes, -/// and surrogates as `\u` followed by four hexadecimal digits. -/// Example: `"a\u{D800}"` for a slice with code points [U+0061, U+D800] -impl fmt::Debug for Wtf8 { - fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { - fn write_str_escaped(f: &mut fmt::Formatter<'_>, s: &str) -> fmt::Result { - use crate::fmt::Write; - for c in s.chars().flat_map(|c| c.escape_debug()) { - f.write_char(c)? - } - Ok(()) - } - - formatter.write_str("\"")?; - let mut pos = 0; - while let Some((surrogate_pos, surrogate)) = self.next_surrogate(pos) { - write_str_escaped(formatter, unsafe { - str::from_utf8_unchecked(&self.bytes[pos..surrogate_pos]) - })?; - write!(formatter, "\\u{{{:x}}}", surrogate)?; - pos = surrogate_pos + 3; - } - write_str_escaped(formatter, unsafe { str::from_utf8_unchecked(&self.bytes[pos..]) })?; - formatter.write_str("\"") - } -} - -impl fmt::Display for Wtf8 { - fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { - let wtf8_bytes = &self.bytes; - let mut pos = 0; - loop { - match self.next_surrogate(pos) { - Some((surrogate_pos, _)) => { - formatter.write_str(unsafe { - str::from_utf8_unchecked(&wtf8_bytes[pos..surrogate_pos]) - })?; - formatter.write_str(UTF8_REPLACEMENT_CHARACTER)?; - pos = surrogate_pos + 3; - } - None => { - let s = unsafe { str::from_utf8_unchecked(&wtf8_bytes[pos..]) }; - if pos == 0 { return s.fmt(formatter) } else { return formatter.write_str(s) } - } - } - } - } -} - -impl Wtf8 { - /// Creates a WTF-8 slice from a UTF-8 `&str` slice. - /// - /// Since WTF-8 is a superset of UTF-8, this always succeeds. - #[inline] - pub fn from_str(value: &str) -> &Wtf8 { - unsafe { Wtf8::from_bytes_unchecked(value.as_bytes()) } - } - - /// Creates a WTF-8 slice from a WTF-8 byte slice. - /// - /// Since the byte slice is not checked for valid WTF-8, this functions is - /// marked unsafe. - #[inline] - pub unsafe fn from_bytes_unchecked(value: &[u8]) -> &Wtf8 { - // SAFETY: start with &[u8], end with fancy &[u8] - unsafe { &*(value as *const [u8] as *const Wtf8) } - } - - /// Creates a mutable WTF-8 slice from a mutable WTF-8 byte slice. - /// - /// Since the byte slice is not checked for valid WTF-8, this functions is - /// marked unsafe. - #[inline] - unsafe fn from_mut_bytes_unchecked(value: &mut [u8]) -> &mut Wtf8 { - // SAFETY: start with &mut [u8], end with fancy &mut [u8] - unsafe { &mut *(value as *mut [u8] as *mut Wtf8) } - } - - /// Returns the length, in WTF-8 bytes. - #[inline] - pub fn len(&self) -> usize { - self.bytes.len() - } - - #[inline] - pub fn is_empty(&self) -> bool { - self.bytes.is_empty() - } - - /// Returns the code point at `position` if it is in the ASCII range, - /// or `b'\xFF'` otherwise. - /// - /// # Panics - /// - /// Panics if `position` is beyond the end of the string. - #[inline] - pub fn ascii_byte_at(&self, position: usize) -> u8 { - match self.bytes[position] { - ascii_byte @ 0x00..=0x7F => ascii_byte, - _ => 0xFF, - } - } - - /// Returns an iterator for the string’s code points. - #[inline] - pub fn code_points(&self) -> Wtf8CodePoints<'_> { - Wtf8CodePoints { bytes: self.bytes.iter() } - } - - /// Access raw bytes of WTF-8 data - #[inline] - pub fn as_bytes(&self) -> &[u8] { - &self.bytes - } - - /// Tries to convert the string to UTF-8 and return a `&str` slice. - /// - /// Returns `None` if the string contains surrogates. - /// - /// This does not copy the data. - #[inline] - pub fn as_str(&self) -> Result<&str, str::Utf8Error> { - str::from_utf8(&self.bytes) - } - - /// Creates an owned `Wtf8Buf` from a borrowed `Wtf8`. - pub fn to_owned(&self) -> Wtf8Buf { - Wtf8Buf { bytes: self.bytes.to_vec(), is_known_utf8: false } - } - - /// Lossily converts the string to UTF-8. - /// Returns a UTF-8 `&str` slice if the contents are well-formed in UTF-8. - /// - /// Surrogates are replaced with `"\u{FFFD}"` (the replacement character “�”). - /// - /// This only copies the data if necessary (if it contains any surrogate). - pub fn to_string_lossy(&self) -> Cow<'_, str> { - let surrogate_pos = match self.next_surrogate(0) { - None => return Cow::Borrowed(unsafe { str::from_utf8_unchecked(&self.bytes) }), - Some((pos, _)) => pos, - }; - let wtf8_bytes = &self.bytes; - let mut utf8_bytes = Vec::with_capacity(self.len()); - utf8_bytes.extend_from_slice(&wtf8_bytes[..surrogate_pos]); - utf8_bytes.extend_from_slice(UTF8_REPLACEMENT_CHARACTER.as_bytes()); - let mut pos = surrogate_pos + 3; - loop { - match self.next_surrogate(pos) { - Some((surrogate_pos, _)) => { - utf8_bytes.extend_from_slice(&wtf8_bytes[pos..surrogate_pos]); - utf8_bytes.extend_from_slice(UTF8_REPLACEMENT_CHARACTER.as_bytes()); - pos = surrogate_pos + 3; - } - None => { - utf8_bytes.extend_from_slice(&wtf8_bytes[pos..]); - return Cow::Owned(unsafe { String::from_utf8_unchecked(utf8_bytes) }); - } - } - } - } - - /// Converts the WTF-8 string to potentially ill-formed UTF-16 - /// and return an iterator of 16-bit code units. - /// - /// This is lossless: - /// calling `Wtf8Buf::from_ill_formed_utf16` on the resulting code units - /// would always return the original WTF-8 string. - #[inline] - pub fn encode_wide(&self) -> EncodeWide<'_> { - EncodeWide { code_points: self.code_points(), extra: 0 } - } - - #[inline] - fn next_surrogate(&self, mut pos: usize) -> Option<(usize, u16)> { - let mut iter = self.bytes[pos..].iter(); - loop { - let b = *iter.next()?; - if b < 0x80 { - pos += 1; - } else if b < 0xE0 { - iter.next(); - pos += 2; - } else if b == 0xED { - match (iter.next(), iter.next()) { - (Some(&b2), Some(&b3)) if b2 >= 0xA0 => { - return Some((pos, decode_surrogate(b2, b3))); - } - _ => pos += 3, - } - } else if b < 0xF0 { - iter.next(); - iter.next(); - pos += 3; - } else { - iter.next(); - iter.next(); - iter.next(); - pos += 4; - } - } - } - - #[inline] - fn final_lead_surrogate(&self) -> Option { - match self.bytes { - [.., 0xED, b2 @ 0xA0..=0xAF, b3] => Some(decode_surrogate(b2, b3)), - _ => None, - } - } - - #[inline] - fn initial_trail_surrogate(&self) -> Option { - match self.bytes { - [0xED, b2 @ 0xB0..=0xBF, b3, ..] => Some(decode_surrogate(b2, b3)), - _ => None, - } - } - - pub fn clone_into(&self, buf: &mut Wtf8Buf) { - buf.is_known_utf8 = false; - self.bytes.clone_into(&mut buf.bytes); - } - - /// Boxes this `Wtf8`. - #[inline] - pub fn into_box(&self) -> Box { - let boxed: Box<[u8]> = self.bytes.into(); - unsafe { mem::transmute(boxed) } - } - - /// Creates a boxed, empty `Wtf8`. - pub fn empty_box() -> Box { - let boxed: Box<[u8]> = Default::default(); - unsafe { mem::transmute(boxed) } - } - - #[inline] - pub fn into_arc(&self) -> Arc { - let arc: Arc<[u8]> = Arc::from(&self.bytes); - unsafe { Arc::from_raw(Arc::into_raw(arc) as *const Wtf8) } - } - - #[inline] - pub fn into_rc(&self) -> Rc { - let rc: Rc<[u8]> = Rc::from(&self.bytes); - unsafe { Rc::from_raw(Rc::into_raw(rc) as *const Wtf8) } - } - - #[inline] - pub fn make_ascii_lowercase(&mut self) { - self.bytes.make_ascii_lowercase() - } - - #[inline] - pub fn make_ascii_uppercase(&mut self) { - self.bytes.make_ascii_uppercase() - } - - #[inline] - pub fn to_ascii_lowercase(&self) -> Wtf8Buf { - Wtf8Buf { bytes: self.bytes.to_ascii_lowercase(), is_known_utf8: false } - } - - #[inline] - pub fn to_ascii_uppercase(&self) -> Wtf8Buf { - Wtf8Buf { bytes: self.bytes.to_ascii_uppercase(), is_known_utf8: false } - } - - #[inline] - pub fn is_ascii(&self) -> bool { - self.bytes.is_ascii() - } - - #[inline] - pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool { - self.bytes.eq_ignore_ascii_case(&other.bytes) - } -} - -/// Returns a slice of the given string for the byte range \[`begin`..`end`). -/// -/// # Panics -/// -/// Panics when `begin` and `end` do not point to code point boundaries, -/// or point beyond the end of the string. -impl ops::Index> for Wtf8 { - type Output = Wtf8; - - #[inline] - fn index(&self, range: ops::Range) -> &Wtf8 { - // is_code_point_boundary checks that the index is in [0, .len()] - if range.start <= range.end - && is_code_point_boundary(self, range.start) - && is_code_point_boundary(self, range.end) - { - unsafe { slice_unchecked(self, range.start, range.end) } - } else { - slice_error_fail(self, range.start, range.end) - } - } -} - -/// Returns a slice of the given string from byte `begin` to its end. -/// -/// # Panics -/// -/// Panics when `begin` is not at a code point boundary, -/// or is beyond the end of the string. -impl ops::Index> for Wtf8 { - type Output = Wtf8; - - #[inline] - fn index(&self, range: ops::RangeFrom) -> &Wtf8 { - // is_code_point_boundary checks that the index is in [0, .len()] - if is_code_point_boundary(self, range.start) { - unsafe { slice_unchecked(self, range.start, self.len()) } - } else { - slice_error_fail(self, range.start, self.len()) - } - } -} - -/// Returns a slice of the given string from its beginning to byte `end`. -/// -/// # Panics -/// -/// Panics when `end` is not at a code point boundary, -/// or is beyond the end of the string. -impl ops::Index> for Wtf8 { - type Output = Wtf8; - - #[inline] - fn index(&self, range: ops::RangeTo) -> &Wtf8 { - // is_code_point_boundary checks that the index is in [0, .len()] - if is_code_point_boundary(self, range.end) { - unsafe { slice_unchecked(self, 0, range.end) } - } else { - slice_error_fail(self, 0, range.end) - } - } -} - -impl ops::Index for Wtf8 { - type Output = Wtf8; - - #[inline] - fn index(&self, _range: ops::RangeFull) -> &Wtf8 { - self - } -} - -#[inline] -fn decode_surrogate(second_byte: u8, third_byte: u8) -> u16 { - // The first byte is assumed to be 0xED - 0xD800 | (second_byte as u16 & 0x3F) << 6 | third_byte as u16 & 0x3F -} - -#[inline] -fn decode_surrogate_pair(lead: u16, trail: u16) -> char { - let code_point = 0x10000 + ((((lead - 0xD800) as u32) << 10) | (trail - 0xDC00) as u32); - unsafe { char::from_u32_unchecked(code_point) } -} - -/// Copied from str::is_char_boundary -#[inline] -pub fn is_code_point_boundary(slice: &Wtf8, index: usize) -> bool { - if index == 0 { - return true; - } - match slice.bytes.get(index) { - None => index == slice.len(), - Some(&b) => (b as i8) >= -0x40, - } -} - -/// Verify that `index` is at the edge of either a valid UTF-8 codepoint -/// (i.e. a codepoint that's not a surrogate) or of the whole string. -/// -/// These are the cases currently permitted by `OsStr::slice_encoded_bytes`. -/// Splitting between surrogates is valid as far as WTF-8 is concerned, but -/// we do not permit it in the public API because WTF-8 is considered an -/// implementation detail. -#[track_caller] -#[inline] -pub fn check_utf8_boundary(slice: &Wtf8, index: usize) { - if index == 0 { - return; - } - match slice.bytes.get(index) { - Some(0xED) => (), // Might be a surrogate - Some(&b) if (b as i8) >= -0x40 => return, - Some(_) => panic!("byte index {index} is not a codepoint boundary"), - None if index == slice.len() => return, - None => panic!("byte index {index} is out of bounds"), - } - if slice.bytes[index + 1] >= 0xA0 { - // There's a surrogate after index. Now check before index. - if index >= 3 && slice.bytes[index - 3] == 0xED && slice.bytes[index - 2] >= 0xA0 { - panic!("byte index {index} lies between surrogate codepoints"); - } - } -} - -/// Copied from core::str::raw::slice_unchecked -#[inline] -pub unsafe fn slice_unchecked(s: &Wtf8, begin: usize, end: usize) -> &Wtf8 { - // SAFETY: memory layout of a &[u8] and &Wtf8 are the same - unsafe { - let len = end - begin; - let start = s.as_bytes().as_ptr().add(begin); - Wtf8::from_bytes_unchecked(slice::from_raw_parts(start, len)) - } -} - -/// Copied from core::str::raw::slice_error_fail -#[inline(never)] -pub fn slice_error_fail(s: &Wtf8, begin: usize, end: usize) -> ! { - assert!(begin <= end); - panic!("index {begin} and/or {end} in `{s:?}` do not lie on character boundary"); -} - -/// Iterator for the code points of a WTF-8 string. -/// -/// Created with the method `.code_points()`. -#[derive(Clone)] -pub struct Wtf8CodePoints<'a> { - bytes: slice::Iter<'a, u8>, -} - -impl<'a> Iterator for Wtf8CodePoints<'a> { - type Item = CodePoint; - - #[inline] - fn next(&mut self) -> Option { - // SAFETY: `self.bytes` has been created from a WTF-8 string - unsafe { next_code_point(&mut self.bytes).map(|c| CodePoint { value: c }) } - } - - #[inline] - fn size_hint(&self) -> (usize, Option) { - let len = self.bytes.len(); - (len.saturating_add(3) / 4, Some(len)) - } -} - -/// Generates a wide character sequence for potentially ill-formed UTF-16. -#[stable(feature = "rust1", since = "1.0.0")] -#[derive(Clone)] -pub struct EncodeWide<'a> { - code_points: Wtf8CodePoints<'a>, - extra: u16, -} - -// Copied from libunicode/u_str.rs -#[stable(feature = "rust1", since = "1.0.0")] -impl<'a> Iterator for EncodeWide<'a> { - type Item = u16; - - #[inline] - fn next(&mut self) -> Option { - if self.extra != 0 { - let tmp = self.extra; - self.extra = 0; - return Some(tmp); - } - - let mut buf = [0; 2]; - self.code_points.next().map(|code_point| { - let n = encode_utf16_raw(code_point.value, &mut buf).len(); - if n == 2 { - self.extra = buf[1]; - } - buf[0] - }) - } - - #[inline] - fn size_hint(&self) -> (usize, Option) { - let (low, high) = self.code_points.size_hint(); - let ext = (self.extra != 0) as usize; - // every code point gets either one u16 or two u16, - // so this iterator is between 1 or 2 times as - // long as the underlying iterator. - (low + ext, high.and_then(|n| n.checked_mul(2)).and_then(|n| n.checked_add(ext))) - } -} - -#[stable(feature = "encode_wide_fused_iterator", since = "1.62.0")] -impl FusedIterator for EncodeWide<'_> {} - -impl Hash for CodePoint { - #[inline] - fn hash(&self, state: &mut H) { - self.value.hash(state) - } -} - -impl Hash for Wtf8Buf { - #[inline] - fn hash(&self, state: &mut H) { - state.write(&self.bytes); - 0xfeu8.hash(state) - } -} - -impl Hash for Wtf8 { - #[inline] - fn hash(&self, state: &mut H) { - state.write(&self.bytes); - 0xfeu8.hash(state) - } -} - -#[unstable(feature = "clone_to_uninit", issue = "126799")] -unsafe impl CloneToUninit for Wtf8 { - #[inline] - #[cfg_attr(debug_assertions, track_caller)] - unsafe fn clone_to_uninit(&self, dst: *mut Self) { - // SAFETY: we're just a wrapper around [u8] - unsafe { self.bytes.clone_to_uninit(addr_of_mut!((*dst).bytes)) } - } -} diff --git a/src/tools/tidy/src/pal.rs b/src/tools/tidy/src/pal.rs index c650fd0eec6d8..d66a137d4160a 100644 --- a/src/tools/tidy/src/pal.rs +++ b/src/tools/tidy/src/pal.rs @@ -50,6 +50,9 @@ const EXCEPTION_PATHS: &[&str] = &[ "library/core/src/ffi/va_list.rs", // We placed a linkage against Windows libraries here "library/core/src/ffi/mod.rs", + "library/core/src/ffi/os_str.rs", + "library/core/src/path.rs", + "library/alloc/src/ffi/os_str.rs", "library/std/src/sys", // Platform-specific code for std lives here. "library/std/src/os", // Platform-specific public interfaces // Temporary `std` exceptions