From e98a6a1b8cfad24834156efcf023996f81f2c465 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Thu, 31 Oct 2024 13:51:36 -0400 Subject: [PATCH 1/2] Sanitize filenames during zip extraction --- Cargo.lock | 11 +++++++++++ Cargo.toml | 1 + crates/uv-extract/Cargo.toml | 1 + crates/uv-extract/src/stream.rs | 28 ++++++++++++++++++++++++---- crates/uv/tests/it/pip_sync.rs | 29 +++++++++++++++++++++++++++++ 5 files changed, 66 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 03fdd3cd8074..8b4cfd56475e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3098,6 +3098,16 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "sanitize-filename" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ed72fbaf78e6f2d41744923916966c4fbe3d7c74e3037a8ee482f1115572603" +dependencies = [ + "lazy_static", + "regex", +] + [[package]] name = "schannel" version = "0.1.26" @@ -4720,6 +4730,7 @@ dependencies = [ "rayon", "reqwest", "rustc-hash", + "sanitize-filename", "sha2", "thiserror", "tokio", diff --git a/Cargo.toml b/Cargo.toml index c5e7e483b33d..35981b31aebf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -142,6 +142,7 @@ rust-netrc = { version = "0.1.2" } rustc-hash = { version = "2.0.0" } rustix = { version = "0.38.37", default-features = false, features = ["fs", "std"] } same-file = { version = "1.0.6" } +sanitize-filename = { version = "0.5.0" } schemars = { version = "0.8.21", features = ["url"] } seahash = { version = "4.1.0" } serde = { version = "1.0.210", features = ["derive"] } diff --git a/crates/uv-extract/Cargo.toml b/crates/uv-extract/Cargo.toml index 1ebc4edc69c3..e9f37c71ae8a 100644 --- a/crates/uv-extract/Cargo.toml +++ b/crates/uv-extract/Cargo.toml @@ -28,6 +28,7 @@ md-5 = { workspace = true } rayon = { workspace = true } reqwest = { workspace = true } rustc-hash = { workspace = true } +sanitize-filename = { workspace = true } sha2 = { workspace = true } thiserror = { workspace = true } tokio = { workspace = true } diff --git a/crates/uv-extract/src/stream.rs b/crates/uv-extract/src/stream.rs index 2ff9278d85f9..218b236a4582 100644 --- a/crates/uv-extract/src/stream.rs +++ b/crates/uv-extract/src/stream.rs @@ -1,13 +1,15 @@ -use std::path::Path; +use std::path::{Path, PathBuf}; use std::pin::Pin; -use crate::Error; use futures::StreamExt; use rustc_hash::FxHashSet; use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt}; use tracing::warn; + use uv_distribution_filename::SourceDistExtension; +use crate::Error; + const DEFAULT_BUF_SIZE: usize = 128 * 1024; /// Unpack a `.zip` archive into the target directory, without requiring `Seek`. @@ -19,6 +21,24 @@ pub async fn unzip( reader: R, target: impl AsRef, ) -> Result<(), Error> { + /// Sanitize a filename for use on Windows. + fn sanitize(filename: &str) -> PathBuf { + filename + .replace('\\', "/") + .split('/') + .map(|segment| { + sanitize_filename::sanitize_with_options( + segment, + sanitize_filename::Options { + windows: cfg!(windows), + truncate: false, + replacement: "", + }, + ) + }) + .collect() + } + let target = target.as_ref(); let mut reader = futures::io::BufReader::with_capacity(DEFAULT_BUF_SIZE, reader.compat()); let mut zip = async_zip::base::read::stream::ZipFileReader::new(&mut reader); @@ -28,7 +48,7 @@ pub async fn unzip( while let Some(mut entry) = zip.next_with_entry().await? { // Construct the (expected) path to the file on-disk. let path = entry.reader().entry().filename().as_str()?; - let path = target.join(path); + let path = target.join(sanitize(path)); let is_dir = entry.reader().entry().dir()?; // Either create the directory or write the file to disk. @@ -84,7 +104,7 @@ pub async fn unzip( if has_any_executable_bit != 0 { // Construct the (expected) path to the file on-disk. let path = entry.filename().as_str()?; - let path = target.join(path); + let path = target.join(sanitize(path)); let permissions = fs_err::tokio::metadata(&path).await?.permissions(); if permissions.mode() & 0o111 != 0o111 { diff --git a/crates/uv/tests/it/pip_sync.rs b/crates/uv/tests/it/pip_sync.rs index 3b0a78510031..9bb93c921a26 100644 --- a/crates/uv/tests/it/pip_sync.rs +++ b/crates/uv/tests/it/pip_sync.rs @@ -5607,3 +5607,32 @@ fn sync_seed() -> Result<()> { Ok(()) } + +/// Sanitize zip files during extraction. +#[test] +fn sanitize() -> Result<()> { + let context = TestContext::new("3.12"); + + // Install a zip file that includes a path that extends outside the parent. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str("payload-package @ https://github.com/astral-sh/sanitize-wheel-test/raw/bc59283d5b4b136a191792e32baa51b477fdf65e/payload_package-0.1.0-py3-none-any.whl")?; + + uv_snapshot!(context.pip_sync() + .arg("requirements.txt"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Prepared 1 package in [TIME] + Installed 1 package in [TIME] + + payload-package==0.1.0 (from https://github.com/astral-sh/sanitize-wheel-test/raw/bc59283d5b4b136a191792e32baa51b477fdf65e/payload_package-0.1.0-py3-none-any.whl) + "### + ); + + // There should be a `payload` file in `site-packages` (but _not_ outside of it). + assert!(context.site_packages().join("payload").exists()); + + Ok(()) +} From 6b8fccd63c45c1ff88d2e357f56e7880077dc2f8 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Thu, 31 Oct 2024 15:03:20 -0400 Subject: [PATCH 2/2] Enclosed --- Cargo.lock | 11 ------- Cargo.toml | 1 - crates/uv-extract/Cargo.toml | 1 - crates/uv-extract/src/stream.rs | 55 +++++++++++++++++++++------------ crates/uv-extract/src/sync.rs | 2 ++ crates/uv/tests/it/pip_sync.rs | 6 ++-- 6 files changed, 41 insertions(+), 35 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8b4cfd56475e..03fdd3cd8074 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3098,16 +3098,6 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "sanitize-filename" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ed72fbaf78e6f2d41744923916966c4fbe3d7c74e3037a8ee482f1115572603" -dependencies = [ - "lazy_static", - "regex", -] - [[package]] name = "schannel" version = "0.1.26" @@ -4730,7 +4720,6 @@ dependencies = [ "rayon", "reqwest", "rustc-hash", - "sanitize-filename", "sha2", "thiserror", "tokio", diff --git a/Cargo.toml b/Cargo.toml index 35981b31aebf..c5e7e483b33d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -142,7 +142,6 @@ rust-netrc = { version = "0.1.2" } rustc-hash = { version = "2.0.0" } rustix = { version = "0.38.37", default-features = false, features = ["fs", "std"] } same-file = { version = "1.0.6" } -sanitize-filename = { version = "0.5.0" } schemars = { version = "0.8.21", features = ["url"] } seahash = { version = "4.1.0" } serde = { version = "1.0.210", features = ["derive"] } diff --git a/crates/uv-extract/Cargo.toml b/crates/uv-extract/Cargo.toml index e9f37c71ae8a..1ebc4edc69c3 100644 --- a/crates/uv-extract/Cargo.toml +++ b/crates/uv-extract/Cargo.toml @@ -28,7 +28,6 @@ md-5 = { workspace = true } rayon = { workspace = true } reqwest = { workspace = true } rustc-hash = { workspace = true } -sanitize-filename = { workspace = true } sha2 = { workspace = true } thiserror = { workspace = true } tokio = { workspace = true } diff --git a/crates/uv-extract/src/stream.rs b/crates/uv-extract/src/stream.rs index 218b236a4582..483be2f50588 100644 --- a/crates/uv-extract/src/stream.rs +++ b/crates/uv-extract/src/stream.rs @@ -1,4 +1,4 @@ -use std::path::{Path, PathBuf}; +use std::path::{Component, Path, PathBuf}; use std::pin::Pin; use futures::StreamExt; @@ -21,22 +21,24 @@ pub async fn unzip( reader: R, target: impl AsRef, ) -> Result<(), Error> { - /// Sanitize a filename for use on Windows. - fn sanitize(filename: &str) -> PathBuf { - filename - .replace('\\', "/") - .split('/') - .map(|segment| { - sanitize_filename::sanitize_with_options( - segment, - sanitize_filename::Options { - windows: cfg!(windows), - truncate: false, - replacement: "", - }, - ) - }) - .collect() + /// Ensure the file path is safe to use as a [`Path`]. + /// + /// See: + pub(crate) fn enclosed_name(file_name: &str) -> Option { + if file_name.contains('\0') { + return None; + } + let path = PathBuf::from(file_name); + let mut depth = 0usize; + for component in path.components() { + match component { + Component::Prefix(_) | Component::RootDir => return None, + Component::ParentDir => depth = depth.checked_sub(1)?, + Component::Normal(_) => depth += 1, + Component::CurDir => (), + } + } + Some(path) } let target = target.as_ref(); @@ -48,7 +50,17 @@ pub async fn unzip( while let Some(mut entry) = zip.next_with_entry().await? { // Construct the (expected) path to the file on-disk. let path = entry.reader().entry().filename().as_str()?; - let path = target.join(sanitize(path)); + + // Sanitize the file name to prevent directory traversal attacks. + let Some(path) = enclosed_name(path) else { + warn!("Skipping unsafe file name: {path}"); + + // Close current file prior to proceeding, as per: + // https://docs.rs/async_zip/0.0.16/async_zip/base/read/stream/ + zip = entry.skip().await?; + continue; + }; + let path = target.join(path); let is_dir = entry.reader().entry().dir()?; // Either create the directory or write the file to disk. @@ -75,7 +87,7 @@ pub async fn unzip( tokio::io::copy(&mut reader, &mut writer).await?; } - // Close current file to get access to the next one. See docs: + // Close current file prior to proceeding, as per: // https://docs.rs/async_zip/0.0.16/async_zip/base/read/stream/ zip = entry.skip().await?; } @@ -104,7 +116,10 @@ pub async fn unzip( if has_any_executable_bit != 0 { // Construct the (expected) path to the file on-disk. let path = entry.filename().as_str()?; - let path = target.join(sanitize(path)); + let Some(path) = enclosed_name(path) else { + continue; + }; + let path = target.join(path); let permissions = fs_err::tokio::metadata(&path).await?.permissions(); if permissions.mode() & 0o111 != 0o111 { diff --git a/crates/uv-extract/src/sync.rs b/crates/uv-extract/src/sync.rs index 77a7945a30b9..4c1ad1a8ac58 100644 --- a/crates/uv-extract/src/sync.rs +++ b/crates/uv-extract/src/sync.rs @@ -3,6 +3,7 @@ use std::sync::Mutex; use rayon::prelude::*; use rustc_hash::FxHashSet; +use tracing::warn; use zip::ZipArchive; use crate::vendor::{CloneableSeekableReader, HasLength}; @@ -25,6 +26,7 @@ pub fn unzip( // Determine the path of the file within the wheel. let Some(enclosed_name) = file.enclosed_name() else { + warn!("Skipping unsafe file name: {}", file.name()); return Ok(()); }; diff --git a/crates/uv/tests/it/pip_sync.rs b/crates/uv/tests/it/pip_sync.rs index 9bb93c921a26..920f2125121b 100644 --- a/crates/uv/tests/it/pip_sync.rs +++ b/crates/uv/tests/it/pip_sync.rs @@ -5631,8 +5631,10 @@ fn sanitize() -> Result<()> { "### ); - // There should be a `payload` file in `site-packages` (but _not_ outside of it). - assert!(context.site_packages().join("payload").exists()); + // There should be no `payload` file in the root. + if let Some(parent) = context.temp_dir.parent() { + assert!(!parent.join("payload").exists()); + } Ok(()) }