Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/uv-requirements-txt/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ fs-err = { workspace = true }
memchr = { workspace = true }
reqwest = { workspace = true, optional = true }
reqwest-middleware = { workspace = true, optional = true }
rustc-hash = { workspace = true }
thiserror = { workspace = true }
tracing = { workspace = true }
unscanny = { workspace = true }
Expand Down
208 changes: 191 additions & 17 deletions crates/uv-requirements-txt/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ use std::io;
use std::path::{Path, PathBuf};
use std::str::FromStr;

use rustc_hash::FxHashSet;
use tracing::instrument;
use unscanny::{Pattern, Scanner};
use url::Url;
Expand Down Expand Up @@ -169,6 +170,24 @@ impl RequirementsTxt {
requirements_txt: impl AsRef<Path>,
working_dir: impl AsRef<Path>,
client_builder: &BaseClientBuilder<'_>,
) -> Result<Self, RequirementsTxtFileError> {
let mut visited = VisitedFiles::Requirements {
requirements: &mut FxHashSet::default(),
constraints: &mut FxHashSet::default(),
};
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider a default impl?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't allow one for &mut unfortunately.

Self::parse_impl(requirements_txt, working_dir, client_builder, &mut visited).await
}

/// See module level documentation
#[instrument(
skip_all,
fields(requirements_txt = requirements_txt.as_ref().as_os_str().to_str())
)]
async fn parse_impl(
requirements_txt: impl AsRef<Path>,
working_dir: impl AsRef<Path>,
client_builder: &BaseClientBuilder<'_>,
visited: &mut VisitedFiles<'_>,
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have to use nested &mut to convince rustc that the inner &mut FxHashSet remain unique.

) -> Result<Self, RequirementsTxtFileError> {
let requirements_txt = requirements_txt.as_ref();
let working_dir = working_dir.as_ref();
Expand Down Expand Up @@ -220,6 +239,7 @@ impl RequirementsTxt {
requirements_dir,
client_builder,
requirements_txt,
visited,
)
.await
.map_err(|err| RequirementsTxtFileError {
Expand All @@ -236,12 +256,13 @@ impl RequirementsTxt {
/// the current working directory. However, relative paths to sub-files (e.g., `-r ../requirements.txt`)
/// are resolved against the directory of the containing `requirements.txt` file, to match
/// `pip`'s behavior.
pub async fn parse_inner(
async fn parse_inner(
content: &str,
working_dir: &Path,
requirements_dir: &Path,
client_builder: &BaseClientBuilder<'_>,
requirements_txt: &Path,
visited: &mut VisitedFiles<'_>,
) -> Result<Self, RequirementsTxtParserError> {
let mut s = Scanner::new(content);

Expand Down Expand Up @@ -276,14 +297,33 @@ impl RequirementsTxt {
} else {
requirements_dir.join(filename.as_ref())
};
let sub_requirements =
Box::pin(Self::parse(&sub_file, working_dir, client_builder))
.await
.map_err(|err| RequirementsTxtParserError::Subfile {
source: Box::new(err),
start,
end,
})?;
match visited {
VisitedFiles::Requirements { requirements, .. } => {
if !requirements.insert(sub_file.clone()) {
continue;
}
}
// Treat any nested requirements or constraints as constraints. This differs
// from `pip`, which seems to treat `-r` requirements in constraints files as
// _requirements_, but we don't want to support that.
VisitedFiles::Constraints { constraints } => {
if !constraints.insert(sub_file.clone()) {
continue;
}
}
}
let sub_requirements = Box::pin(Self::parse_impl(
&sub_file,
working_dir,
client_builder,
visited,
))
.await
.map_err(|err| RequirementsTxtParserError::Subfile {
source: Box::new(err),
start,
end,
})?;

// Disallow conflicting `--index-url` in nested `requirements` files.
if sub_requirements.index_url.is_some()
Expand Down Expand Up @@ -331,14 +371,35 @@ impl RequirementsTxt {
} else {
requirements_dir.join(filename.as_ref())
};
let sub_constraints =
Box::pin(Self::parse(&sub_file, working_dir, client_builder))
.await
.map_err(|err| RequirementsTxtParserError::Subfile {
source: Box::new(err),
start,
end,
})?;

// Switch to constraints mode, if we aren't in it already.
let mut visited = match visited {
VisitedFiles::Requirements { constraints, .. } => {
if !constraints.insert(sub_file.clone()) {
continue;
}
VisitedFiles::Constraints { constraints }
}
VisitedFiles::Constraints { constraints } => {
if !constraints.insert(sub_file.clone()) {
continue;
}
VisitedFiles::Constraints { constraints }
}
};

let sub_constraints = Box::pin(Self::parse_impl(
&sub_file,
working_dir,
client_builder,
&mut visited,
))
.await
.map_err(|err| RequirementsTxtParserError::Subfile {
source: Box::new(err),
start,
end,
})?;

// Treat any nested requirements or constraints as constraints. This differs
// from `pip`, which seems to treat `-r` requirements in constraints files as
Expand Down Expand Up @@ -1312,6 +1373,23 @@ impl RequirementsTxtParserError {
}
}

/// Avoid infinite recursion through recursive inclusions, while also being mindful of nested
/// requirements and constraint inclusions.
#[derive(Debug)]
enum VisitedFiles<'a> {
/// The requirements are included as regular requirements, and can recursively include both
/// requirements and constraints.
Requirements {
requirements: &'a mut FxHashSet<PathBuf>,
constraints: &'a mut FxHashSet<PathBuf>,
},
/// The requirements are included as constraints, all recursive inclusions are considered
/// constraints.
Constraints {
constraints: &'a mut FxHashSet<PathBuf>,
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do these need to be unowned? Why not requirements: FxHashSet?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that they are ugly; What I'm trying to do is keep track of the visited files for requirements and constraints separately, but upon recursing into -c, keep track of only constraints, since -r nested in a -c'd file is also considered a constraint, like in the logic below.

                    // Switch to constraints mode, if we aren't in it already.
                    let mut visited = match visited {
                        VisitedFiles::Requirements { constraints, .. } => {
                            if !constraints.insert(sub_file.clone()) {
                                continue;
                            }
                            VisitedFiles::Constraints { constraints }
                        }
                        VisitedFiles::Constraints { constraints } => {
                            if !constraints.insert(sub_file.clone()) {
                                continue;
                            }
                            VisitedFiles::Constraints { constraints }
                        }
                    };

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Going to merge this, I'll refactor it if someone knows how to write this in more elegant rust.

},
}

/// Calculates the column and line offset of a given cursor based on the
/// number of Unicode codepoints.
fn calculate_row_column(content: &str, position: usize) -> (usize, usize) {
Expand Down Expand Up @@ -1354,12 +1432,14 @@ fn calculate_row_column(content: &str, position: usize) -> (usize, usize) {

#[cfg(test)]
mod test {
use std::collections::BTreeSet;
use std::path::{Path, PathBuf};

use anyhow::Result;
use assert_fs::prelude::*;
use fs_err as fs;
use indoc::indoc;
use insta::assert_debug_snapshot;
use itertools::Itertools;
use tempfile::tempdir;
use test_case::test_case;
Expand Down Expand Up @@ -2792,4 +2872,98 @@ mod test {
// Assert line and columns are expected
assert_eq!(line_column, expected, "Issues with input: {input}");
}

/// Test different kinds of recursive inclusions with requirements and constraints
#[tokio::test]
async fn recursive_circular_inclusion() -> Result<()> {
let temp_dir = assert_fs::TempDir::new()?;
let both = temp_dir.child("both.txt");
both.write_str(indoc! {"
pkg-both
"})?;
let both = temp_dir.child("both-recursive.txt");
both.write_str(indoc! {"
pkg-both-recursive
-r both-recursive.txt
-c both-recursive.txt
"})?;
let requirements_only = temp_dir.child("requirements-only.txt");
requirements_only.write_str(indoc! {"
pkg-requirements-only
-r requirements-only.txt
"})?;
let requirements_only = temp_dir.child("requirements-only-recursive.txt");
requirements_only.write_str(indoc! {"
pkg-requirements-only-recursive
-r requirements-only-recursive.txt
"})?;
let constraints_only = temp_dir.child("requirements-in-constraints.txt");
constraints_only.write_str(indoc! {"
pkg-requirements-in-constraints
# Some nested recursion for good measure
-c constraints-only.txt
"})?;
let constraints_only = temp_dir.child("constraints-only.txt");
constraints_only.write_str(indoc! {"
pkg-constraints-only
-c constraints-only.txt
# Using `-r` inside `-c`
-r requirements-in-constraints.txt
"})?;
let constraints_only = temp_dir.child("constraints-only-recursive.txt");
constraints_only.write_str(indoc! {"
pkg-constraints-only-recursive
-r constraints-only-recursive.txt
"})?;

let requirements = temp_dir.child("requirements.txt");
requirements.write_str(indoc! {"
# Even if a package was already included as a constraint, it is also included as
# requirement
-c both.txt
-r both.txt
-c both-recursive.txt
-r both-recursive.txt

-r requirements-only.txt
-r requirements-only-recursive.txt
-c constraints-only.txt
-c constraints-only-recursive.txt
"})?;

let parsed = RequirementsTxt::parse(
&requirements,
temp_dir.path(),
&BaseClientBuilder::default(),
)
.await?;

let requirements: BTreeSet<String> = parsed
.requirements
.iter()
.map(|entry| entry.requirement.to_string())
.collect();
let constraints: BTreeSet<String> =
parsed.constraints.iter().map(ToString::to_string).collect();

assert_debug_snapshot!(requirements, @r#"
{
"pkg-both",
"pkg-both-recursive",
"pkg-requirements-only",
"pkg-requirements-only-recursive",
}
"#);
assert_debug_snapshot!(constraints, @r#"
{
"pkg-both",
"pkg-both-recursive",
"pkg-constraints-only",
"pkg-constraints-only-recursive",
"pkg-requirements-in-constraints",
}
"#);

Ok(())
}
}
12 changes: 10 additions & 2 deletions crates/uv-requirements-txt/src/requirement.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::fmt::Display;
use std::path::Path;

use uv_normalize::PackageName;
Expand Down Expand Up @@ -124,9 +125,7 @@ impl RequirementsTxtRequirement {
}
}
}
}

impl RequirementsTxtRequirement {
/// Parse a requirement as seen in a `requirements.txt` file.
pub fn parse(
input: &str,
Expand Down Expand Up @@ -162,3 +161,12 @@ impl RequirementsTxtRequirement {
.map_err(Box::new)
}
}

impl Display for RequirementsTxtRequirement {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Named(requirement) => Display::fmt(&requirement, f),
Self::Unnamed(requirement) => Display::fmt(&requirement, f),
}
}
}
Loading