From f3264583ac059bd834064b80c11603447d4f2cda Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Thu, 31 Oct 2024 15:12:51 -0400 Subject: [PATCH] Sanitize filenames during zip extraction (#8732) ## Summary Based on the example in `async-zip`: https://github.com/Majored/rs-async-zip/blob/527bda9d58c1ba1fa973a0faeb68dce91fa4ffe4/examples/file_extraction.rs#L33 Closes: https://github.com/astral-sh/uv/issues/8731. ## Test Plan Created https://github.com/astral-sh/sanitize-wheel-test. --- crates/uv-extract/src/stream.rs | 41 ++++++++++++++++++++++++++++++--- crates/uv-extract/src/sync.rs | 2 ++ crates/uv/tests/it/pip_sync.rs | 31 +++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 3 deletions(-) diff --git a/crates/uv-extract/src/stream.rs b/crates/uv-extract/src/stream.rs index 2ff9278d85f9..483be2f50588 100644 --- a/crates/uv-extract/src/stream.rs +++ b/crates/uv-extract/src/stream.rs @@ -1,13 +1,15 @@ -use std::path::Path; +use std::path::{Component, Path, PathBuf}; use std::pin::Pin; -use crate::Error; use futures::StreamExt; use rustc_hash::FxHashSet; use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt}; use tracing::warn; + use uv_distribution_filename::SourceDistExtension; +use crate::Error; + const DEFAULT_BUF_SIZE: usize = 128 * 1024; /// Unpack a `.zip` archive into the target directory, without requiring `Seek`. @@ -19,6 +21,26 @@ pub async fn unzip( reader: R, target: impl AsRef, ) -> Result<(), Error> { + /// Ensure the file path is safe to use as a [`Path`]. + /// + /// See: + pub(crate) fn enclosed_name(file_name: &str) -> Option { + if file_name.contains('\0') { + return None; + } + let path = PathBuf::from(file_name); + let mut depth = 0usize; + for component in path.components() { + match component { + Component::Prefix(_) | Component::RootDir => return None, + Component::ParentDir => depth = depth.checked_sub(1)?, + Component::Normal(_) => depth += 1, + Component::CurDir => (), + } + } + Some(path) + } + let target = target.as_ref(); let mut reader = futures::io::BufReader::with_capacity(DEFAULT_BUF_SIZE, reader.compat()); let mut zip = async_zip::base::read::stream::ZipFileReader::new(&mut reader); @@ -28,6 +50,16 @@ pub async fn unzip( while let Some(mut entry) = zip.next_with_entry().await? { // Construct the (expected) path to the file on-disk. let path = entry.reader().entry().filename().as_str()?; + + // Sanitize the file name to prevent directory traversal attacks. + let Some(path) = enclosed_name(path) else { + warn!("Skipping unsafe file name: {path}"); + + // Close current file prior to proceeding, as per: + // https://docs.rs/async_zip/0.0.16/async_zip/base/read/stream/ + zip = entry.skip().await?; + continue; + }; let path = target.join(path); let is_dir = entry.reader().entry().dir()?; @@ -55,7 +87,7 @@ pub async fn unzip( tokio::io::copy(&mut reader, &mut writer).await?; } - // Close current file to get access to the next one. See docs: + // Close current file prior to proceeding, as per: // https://docs.rs/async_zip/0.0.16/async_zip/base/read/stream/ zip = entry.skip().await?; } @@ -84,6 +116,9 @@ pub async fn unzip( if has_any_executable_bit != 0 { // Construct the (expected) path to the file on-disk. let path = entry.filename().as_str()?; + let Some(path) = enclosed_name(path) else { + continue; + }; let path = target.join(path); let permissions = fs_err::tokio::metadata(&path).await?.permissions(); diff --git a/crates/uv-extract/src/sync.rs b/crates/uv-extract/src/sync.rs index 77a7945a30b9..4c1ad1a8ac58 100644 --- a/crates/uv-extract/src/sync.rs +++ b/crates/uv-extract/src/sync.rs @@ -3,6 +3,7 @@ use std::sync::Mutex; use rayon::prelude::*; use rustc_hash::FxHashSet; +use tracing::warn; use zip::ZipArchive; use crate::vendor::{CloneableSeekableReader, HasLength}; @@ -25,6 +26,7 @@ pub fn unzip( // Determine the path of the file within the wheel. let Some(enclosed_name) = file.enclosed_name() else { + warn!("Skipping unsafe file name: {}", file.name()); return Ok(()); }; diff --git a/crates/uv/tests/it/pip_sync.rs b/crates/uv/tests/it/pip_sync.rs index 3b0a78510031..920f2125121b 100644 --- a/crates/uv/tests/it/pip_sync.rs +++ b/crates/uv/tests/it/pip_sync.rs @@ -5607,3 +5607,34 @@ fn sync_seed() -> Result<()> { Ok(()) } + +/// Sanitize zip files during extraction. +#[test] +fn sanitize() -> Result<()> { + let context = TestContext::new("3.12"); + + // Install a zip file that includes a path that extends outside the parent. + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str("payload-package @ https://github.com/astral-sh/sanitize-wheel-test/raw/bc59283d5b4b136a191792e32baa51b477fdf65e/payload_package-0.1.0-py3-none-any.whl")?; + + uv_snapshot!(context.pip_sync() + .arg("requirements.txt"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 1 package in [TIME] + Prepared 1 package in [TIME] + Installed 1 package in [TIME] + + payload-package==0.1.0 (from https://github.com/astral-sh/sanitize-wheel-test/raw/bc59283d5b4b136a191792e32baa51b477fdf65e/payload_package-0.1.0-py3-none-any.whl) + "### + ); + + // There should be no `payload` file in the root. + if let Some(parent) = context.temp_dir.parent() { + assert!(!parent.join("payload").exists()); + } + + Ok(()) +}