|  | 
|  | 1 | +use super::{is_sep_byte, is_verbatim_sep}; | 
|  | 2 | +use crate::ffi::OsStr; | 
|  | 3 | +use crate::path::Prefix; | 
|  | 4 | + | 
|  | 5 | +struct PrefixParser<'a, const LEN: usize> { | 
|  | 6 | +    path: &'a OsStr, | 
|  | 7 | +    prefix: [u8; LEN], | 
|  | 8 | +} | 
|  | 9 | + | 
|  | 10 | +impl<'a, const LEN: usize> PrefixParser<'a, LEN> { | 
|  | 11 | +    #[inline] | 
|  | 12 | +    fn get_prefix(path: &OsStr) -> [u8; LEN] { | 
|  | 13 | +        let mut prefix = [0; LEN]; | 
|  | 14 | +        // SAFETY: Only ASCII characters are modified. | 
|  | 15 | +        for (i, &ch) in path.as_encoded_bytes().iter().take(LEN).enumerate() { | 
|  | 16 | +            prefix[i] = if ch == b'/' { b'\\' } else { ch }; | 
|  | 17 | +        } | 
|  | 18 | +        prefix | 
|  | 19 | +    } | 
|  | 20 | + | 
|  | 21 | +    fn new(path: &'a OsStr) -> Self { | 
|  | 22 | +        Self { path, prefix: Self::get_prefix(path) } | 
|  | 23 | +    } | 
|  | 24 | + | 
|  | 25 | +    fn as_slice(&self) -> PrefixParserSlice<'a, '_> { | 
|  | 26 | +        PrefixParserSlice { | 
|  | 27 | +            path: self.path, | 
|  | 28 | +            prefix: &self.prefix[..LEN.min(self.path.len())], | 
|  | 29 | +            index: 0, | 
|  | 30 | +        } | 
|  | 31 | +    } | 
|  | 32 | +} | 
|  | 33 | + | 
|  | 34 | +struct PrefixParserSlice<'a, 'b> { | 
|  | 35 | +    path: &'a OsStr, | 
|  | 36 | +    prefix: &'b [u8], | 
|  | 37 | +    index: usize, | 
|  | 38 | +} | 
|  | 39 | + | 
|  | 40 | +impl<'a> PrefixParserSlice<'a, '_> { | 
|  | 41 | +    fn strip_prefix(&self, prefix: &str) -> Option<Self> { | 
|  | 42 | +        self.prefix[self.index..] | 
|  | 43 | +            .starts_with(prefix.as_bytes()) | 
|  | 44 | +            .then_some(Self { index: self.index + prefix.len(), ..*self }) | 
|  | 45 | +    } | 
|  | 46 | + | 
|  | 47 | +    fn prefix_bytes(&self) -> &'a [u8] { | 
|  | 48 | +        &self.path.as_encoded_bytes()[..self.index] | 
|  | 49 | +    } | 
|  | 50 | + | 
|  | 51 | +    fn finish(self) -> &'a OsStr { | 
|  | 52 | +        // SAFETY: The unsafety here stems from converting between &OsStr and | 
|  | 53 | +        // &[u8] and back. This is safe to do because (1) we only look at ASCII | 
|  | 54 | +        // contents of the encoding and (2) new &OsStr values are produced only | 
|  | 55 | +        // from ASCII-bounded slices of existing &OsStr values. | 
|  | 56 | +        unsafe { OsStr::from_encoded_bytes_unchecked(&self.path.as_encoded_bytes()[self.index..]) } | 
|  | 57 | +    } | 
|  | 58 | +} | 
|  | 59 | + | 
|  | 60 | +pub fn parse_prefix(path: &OsStr) -> Option<Prefix<'_>> { | 
|  | 61 | +    use Prefix::{DeviceNS, Disk, UNC, Verbatim, VerbatimDisk, VerbatimUNC}; | 
|  | 62 | + | 
|  | 63 | +    let parser = PrefixParser::<8>::new(path); | 
|  | 64 | +    let parser = parser.as_slice(); | 
|  | 65 | +    if let Some(parser) = parser.strip_prefix(r"\\") { | 
|  | 66 | +        // \\ | 
|  | 67 | + | 
|  | 68 | +        // The meaning of verbatim paths can change when they use a different | 
|  | 69 | +        // separator. | 
|  | 70 | +        if let Some(parser) = parser.strip_prefix(r"?\") | 
|  | 71 | +            // Cygwin allows `/` in verbatim paths. | 
|  | 72 | +            && (cfg!(target_os = "cygwin") || !parser.prefix_bytes().iter().any(|&x| x == b'/')) | 
|  | 73 | +        { | 
|  | 74 | +            // \\?\ | 
|  | 75 | +            if let Some(parser) = parser.strip_prefix(r"UNC\") { | 
|  | 76 | +                // \\?\UNC\server\share | 
|  | 77 | + | 
|  | 78 | +                let path = parser.finish(); | 
|  | 79 | +                let (server, path) = parse_next_component(path, true); | 
|  | 80 | +                let (share, _) = parse_next_component(path, true); | 
|  | 81 | + | 
|  | 82 | +                Some(VerbatimUNC(server, share)) | 
|  | 83 | +            } else { | 
|  | 84 | +                let path = parser.finish(); | 
|  | 85 | + | 
|  | 86 | +                // in verbatim paths only recognize an exact drive prefix | 
|  | 87 | +                if let Some(drive) = parse_drive_exact(path) { | 
|  | 88 | +                    // \\?\C: | 
|  | 89 | +                    Some(VerbatimDisk(drive)) | 
|  | 90 | +                } else { | 
|  | 91 | +                    // \\?\prefix | 
|  | 92 | +                    let (prefix, _) = parse_next_component(path, true); | 
|  | 93 | +                    Some(Verbatim(prefix)) | 
|  | 94 | +                } | 
|  | 95 | +            } | 
|  | 96 | +        } else if let Some(parser) = parser.strip_prefix(r".\") { | 
|  | 97 | +            // \\.\COM42 | 
|  | 98 | +            let path = parser.finish(); | 
|  | 99 | +            let (prefix, _) = parse_next_component(path, false); | 
|  | 100 | +            Some(DeviceNS(prefix)) | 
|  | 101 | +        } else { | 
|  | 102 | +            let path = parser.finish(); | 
|  | 103 | +            let (server, path) = parse_next_component(path, false); | 
|  | 104 | +            let (share, _) = parse_next_component(path, false); | 
|  | 105 | + | 
|  | 106 | +            if !server.is_empty() && !share.is_empty() { | 
|  | 107 | +                // \\server\share | 
|  | 108 | +                Some(UNC(server, share)) | 
|  | 109 | +            } else { | 
|  | 110 | +                // no valid prefix beginning with "\\" recognized | 
|  | 111 | +                None | 
|  | 112 | +            } | 
|  | 113 | +        } | 
|  | 114 | +    } else { | 
|  | 115 | +        // If it has a drive like `C:` then it's a disk. | 
|  | 116 | +        // Otherwise there is no prefix. | 
|  | 117 | +        parse_drive(path).map(Disk) | 
|  | 118 | +    } | 
|  | 119 | +} | 
|  | 120 | + | 
|  | 121 | +// Parses a drive prefix, e.g. "C:" and "C:\whatever" | 
|  | 122 | +fn parse_drive(path: &OsStr) -> Option<u8> { | 
|  | 123 | +    // In most DOS systems, it is not possible to have more than 26 drive letters. | 
|  | 124 | +    // See <https://en.wikipedia.org/wiki/Drive_letter_assignment#Common_assignments>. | 
|  | 125 | +    fn is_valid_drive_letter(drive: &u8) -> bool { | 
|  | 126 | +        drive.is_ascii_alphabetic() | 
|  | 127 | +    } | 
|  | 128 | + | 
|  | 129 | +    match path.as_encoded_bytes() { | 
|  | 130 | +        [drive, b':', ..] if is_valid_drive_letter(drive) => Some(drive.to_ascii_uppercase()), | 
|  | 131 | +        _ => None, | 
|  | 132 | +    } | 
|  | 133 | +} | 
|  | 134 | + | 
|  | 135 | +// Parses a drive prefix exactly, e.g. "C:" | 
|  | 136 | +fn parse_drive_exact(path: &OsStr) -> Option<u8> { | 
|  | 137 | +    // only parse two bytes: the drive letter and the drive separator | 
|  | 138 | +    if path.as_encoded_bytes().get(2).map(|&x| is_sep_byte(x)).unwrap_or(true) { | 
|  | 139 | +        parse_drive(path) | 
|  | 140 | +    } else { | 
|  | 141 | +        None | 
|  | 142 | +    } | 
|  | 143 | +} | 
|  | 144 | + | 
|  | 145 | +// Parse the next path component. | 
|  | 146 | +// | 
|  | 147 | +// Returns the next component and the rest of the path excluding the component and separator. | 
|  | 148 | +// Does not recognize `/` as a separator character on Windows if `verbatim` is true. | 
|  | 149 | +fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) { | 
|  | 150 | +    let separator = if verbatim { is_verbatim_sep } else { is_sep_byte }; | 
|  | 151 | + | 
|  | 152 | +    match path.as_encoded_bytes().iter().position(|&x| separator(x)) { | 
|  | 153 | +        Some(separator_start) => { | 
|  | 154 | +            let separator_end = separator_start + 1; | 
|  | 155 | + | 
|  | 156 | +            let component = &path.as_encoded_bytes()[..separator_start]; | 
|  | 157 | + | 
|  | 158 | +            // Panic safe | 
|  | 159 | +            // The max `separator_end` is `bytes.len()` and `bytes[bytes.len()..]` is a valid index. | 
|  | 160 | +            let path = &path.as_encoded_bytes()[separator_end..]; | 
|  | 161 | + | 
|  | 162 | +            // SAFETY: `path` is a valid wtf8 encoded slice and each of the separators ('/', '\') | 
|  | 163 | +            // is encoded in a single byte, therefore `bytes[separator_start]` and | 
|  | 164 | +            // `bytes[separator_end]` must be code point boundaries and thus | 
|  | 165 | +            // `bytes[..separator_start]` and `bytes[separator_end..]` are valid wtf8 slices. | 
|  | 166 | +            unsafe { | 
|  | 167 | +                ( | 
|  | 168 | +                    OsStr::from_encoded_bytes_unchecked(component), | 
|  | 169 | +                    OsStr::from_encoded_bytes_unchecked(path), | 
|  | 170 | +                ) | 
|  | 171 | +            } | 
|  | 172 | +        } | 
|  | 173 | +        None => (path, OsStr::new("")), | 
|  | 174 | +    } | 
|  | 175 | +} | 
0 commit comments