Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change path normalization strategy to not resolve symlinks #9330

Merged
merged 2 commits into from
Jan 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions helix-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,4 @@ parking_lot = "0.12"
[dev-dependencies]
quickcheck = { version = "1", default-features = false }
indoc = "2.0.4"
tempfile = "3.7.0"
71 changes: 45 additions & 26 deletions helix-core/src/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,31 +30,10 @@ pub fn expand_tilde(path: &Path) -> PathBuf {
path.to_path_buf()
}

/// Normalize a path, removing things like `.` and `..`.
///
/// CAUTION: This does not resolve symlinks (unlike
/// [`std::fs::canonicalize`]). This may cause incorrect or surprising
/// behavior at times. This should be used carefully. Unfortunately,
/// [`std::fs::canonicalize`] can be hard to use correctly, since it can often
/// fail, or on Windows returns annoying device paths. This is a problem Cargo
/// needs to improve on.
/// Copied from cargo: <https://github.com/rust-lang/cargo/blob/070e459c2d8b79c5b2ac5218064e7603329c92ae/crates/cargo-util/src/paths.rs#L81>
/// Normalize a path without resolving symlinks.
// Strategy: start from the first component and move up. Cannonicalize previous path,
// join component, cannonicalize new path, strip prefix and join to the final result.
pub fn get_normalized_path(path: &Path) -> PathBuf {
// normalization strategy is to canonicalize first ancestor path that exists (i.e., canonicalize as much as possible),
// then run handrolled normalization on the non-existent remainder
let (base, path) = path
.ancestors()
.find_map(|base| {
let canonicalized_base = dunce::canonicalize(base).ok()?;
let remainder = path.strip_prefix(base).ok()?.into();
Some((canonicalized_base, remainder))
})
.unwrap_or_else(|| (PathBuf::new(), PathBuf::from(path)));

if path.as_os_str().is_empty() {
return base;
}

let mut components = path.components().peekable();
let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().cloned() {
components.next();
Expand All @@ -70,20 +49,60 @@ pub fn get_normalized_path(path: &Path) -> PathBuf {
ret.push(component.as_os_str());
}
Component::CurDir => {}
#[cfg(not(windows))]
Component::ParentDir => {
ret.pop();
}
#[cfg(windows)]
Component::ParentDir => {
if let Some(head) = ret.components().next_back() {
match head {
Component::Prefix(_) | Component::RootDir => {}
Component::CurDir => unreachable!(),
// If we left previous component as ".." it means we met a symlink before and we can't pop path.
Component::ParentDir => {
ret.push("..");
}
Component::Normal(_) => {
if ret.is_symlink() {
ret.push("..");
} else {
ret.pop();
}
}
}
}
}
#[cfg(not(windows))]
Component::Normal(c) => {
ret.push(c);
}
#[cfg(windows)]
Component::Normal(c) => 'normal: {
use std::fs::canonicalize;

let new_path = ret.join(c);
if new_path.is_symlink() {
ret = new_path;
break 'normal;
}
let (can_new, can_old) = (canonicalize(&new_path), canonicalize(&ret));
match (can_new, can_old) {
(Ok(can_new), Ok(can_old)) => {
let striped = can_new.strip_prefix(can_old);
ret.push(striped.unwrap_or_else(|_| c.as_ref()));
}
_ => ret.push(c),
}
}
}
}
base.join(ret)
dunce::simplified(&ret).to_path_buf()
}

/// Returns the canonical, absolute form of a path with all intermediate components normalized.
///
/// This function is used instead of `std::fs::canonicalize` because we don't want to verify
/// This function is used instead of [`std::fs::canonicalize`] because we don't want to verify
/// here if the path exists, just normalize it's components.
pub fn get_canonicalized_path(path: &Path) -> PathBuf {
let path = expand_tilde(path);
Expand Down
124 changes: 124 additions & 0 deletions helix-core/tests/path.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#![cfg(windows)]

use std::{
env::set_current_dir,
error::Error,
path::{Component, Path, PathBuf},
};

use helix_core::path::get_normalized_path;
use tempfile::Builder;

// Paths on Windows are almost always case-insensitive.
// Normalization should return the original path.
// E.g. mkdir `CaSe`, normalize(`case`) = `CaSe`.
#[test]
fn test_case_folding_windows() -> Result<(), Box<dyn Error>> {
// tmp/root/case
let tmp_prefix = std::env::temp_dir();
set_current_dir(&tmp_prefix)?;

let root = Builder::new().prefix("root-").tempdir()?;
let case = Builder::new().prefix("CaSe-").tempdir_in(&root)?;

let root_without_prefix = root.path().strip_prefix(&tmp_prefix)?;

let lowercase_case = format!(
"case-{}",
case.path()
.file_name()
.unwrap()
.to_string_lossy()
.split_at(5)
.1
);
let test_path = root_without_prefix.join(lowercase_case);
assert_eq!(
get_normalized_path(&test_path),
case.path().strip_prefix(&tmp_prefix)?
);

Ok(())
}

#[test]
fn test_normalize_path() -> Result<(), Box<dyn Error>> {
/*
tmp/root/
├── link -> dir1/orig_file
├── dir1/
│ └── orig_file
└── dir2/
└── dir_link -> ../dir1/
*/

let tmp_prefix = std::env::temp_dir();
set_current_dir(&tmp_prefix)?;

// Create a tree structure as shown above
let root = Builder::new().prefix("root-").tempdir()?;
let dir1 = Builder::new().prefix("dir1-").tempdir_in(&root)?;
let orig_file = Builder::new().prefix("orig_file-").tempfile_in(&dir1)?;
let dir2 = Builder::new().prefix("dir2-").tempdir_in(&root)?;

// Create path and delete existing file
let dir_link = Builder::new()
.prefix("dir_link-")
.tempfile_in(&dir2)?
.path()
.to_owned();
let link = Builder::new()
.prefix("link-")
.tempfile_in(&root)?
.path()
.to_owned();

use std::os::windows;
windows::fs::symlink_dir(&dir1, &dir_link)?;
windows::fs::symlink_file(&orig_file, &link)?;

// root/link
let path = link.strip_prefix(&tmp_prefix)?;
assert_eq!(
get_normalized_path(path),
path,
"input {:?} and symlink last component shouldn't be resolved",
path
);

// root/dir2/dir_link/orig_file/../..
let path = dir_link
.strip_prefix(&tmp_prefix)
.unwrap()
.join(orig_file.path().file_name().unwrap())
.join(Component::ParentDir)
.join(Component::ParentDir);
let expected = dir_link
.strip_prefix(&tmp_prefix)
.unwrap()
.join(Component::ParentDir);
assert_eq!(
get_normalized_path(&path),
expected,
"input {:?} and \"..\" should not erase the simlink that goes ahead",
&path
);

// root/link/.././../dir2/../
let path = link
.strip_prefix(&tmp_prefix)
.unwrap()
.join(Component::ParentDir)
.join(Component::CurDir)
.join(Component::ParentDir)
.join(dir2.path().file_name().unwrap())
.join(Component::ParentDir);
let expected = link
.strip_prefix(&tmp_prefix)
.unwrap()
.join(Component::ParentDir)
.join(Component::ParentDir);
assert_eq!(get_normalized_path(&path), expected, "input {:?}", &path);

Ok(())
}
Loading