diff --git a/Cargo.lock b/Cargo.lock index bce3ca73..9559ae28 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -383,8 +383,8 @@ dependencies = [ "jsonschema", "once_cell", "ordered-float", - "packageurl", "pretty_assertions", + "purl", "regex", "serde", "serde_json", @@ -1094,16 +1094,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "packageurl" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c53362339d1c48910f1b0c35e2ae96e2d32e442c7dc3ac5f622908ec87221f08" -dependencies = [ - "percent-encoding", - "thiserror", -] - [[package]] name = "parking_lot" version = "0.12.1" @@ -1256,9 +1246,9 @@ dependencies = [ [[package]] name = "purl" -version = "0.1.2" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d153044e55fb5c0a6f0f0f974c3335d15a842263ba4b208d2656120fe530a5ab" +checksum = "c14fe28c8495f7eaf77a6e6106966f95211c0a2404b9da50d248fc32af3a3f14" dependencies = [ "hex", "percent-encoding", diff --git a/cargo-cyclonedx/Cargo.toml b/cargo-cyclonedx/Cargo.toml index 64948c66..48abcbce 100644 --- a/cargo-cyclonedx/Cargo.toml +++ b/cargo-cyclonedx/Cargo.toml @@ -31,7 +31,7 @@ log = "0.4.20" once_cell = "1.18.0" pathdiff = { version = "0.2.1", features = ["camino"] } percent-encoding = "2.3.1" -purl = { version = "0.1.2", default-features = false, features = ["package-type"] } +purl = { version = "0.1.3", default-features = false, features = ["package-type"] } regex = "1.9.3" serde = { version = "1.0.193", features = ["derive"] } thiserror = "1.0.48" diff --git a/cargo-cyclonedx/src/lib.rs b/cargo-cyclonedx/src/lib.rs index 0c75b016..6b8ac7c5 100644 --- a/cargo-cyclonedx/src/lib.rs +++ b/cargo-cyclonedx/src/lib.rs @@ -21,6 +21,5 @@ pub mod format; pub mod generator; pub mod platform; pub mod purl; -pub mod urlencode; pub use crate::generator::*; diff --git a/cargo-cyclonedx/src/purl.rs b/cargo-cyclonedx/src/purl.rs index cb075bf4..10da4d92 100644 --- a/cargo-cyclonedx/src/purl.rs +++ b/cargo-cyclonedx/src/purl.rs @@ -5,8 +5,6 @@ use cyclonedx_bom::{external_models::uri::validate_purl, prelude::Purl as CdxPur use pathdiff::diff_utf8_paths; use purl::{PackageError, PackageType, PurlBuilder}; -use crate::urlencode::urlencode; - pub fn get_purl( package: &Package, root_package: &Package, @@ -25,7 +23,7 @@ pub fn get_purl( builder = builder.with_qualifier("vcs_url", source_to_vcs_url(source))? } Some(("registry", registry_url)) => { - builder = builder.with_qualifier("repository_url", urlencode(registry_url))? + builder = builder.with_qualifier("repository_url", registry_url)? } Some((source, _path)) => log::warn!("Unknown source kind {}", source), None => { @@ -49,9 +47,9 @@ pub fn get_purl( } } // url-encode the path to the package manifest to make it a valid URL - let manifest_url = format!("file://{}", urlencode(package_dir.as_str())); + let manifest_url = format!("file://{}", package_dir.as_str()); // url-encode the whole URL *again* because we are embedding this URL inside another URL (PURL) - builder = builder.with_qualifier("download_url", urlencode(&manifest_url))? + builder = builder.with_qualifier("download_url", &manifest_url)? } if let Some(subpath) = subpath { @@ -70,13 +68,13 @@ pub fn get_purl( /// Assumes that the source kind is `git`, panics if it isn't. fn source_to_vcs_url(source: &cargo_metadata::Source) -> String { assert!(source.repr.starts_with("git+")); - urlencode(&source.repr.replace('#', "@")) + source.repr.replace('#', "@") } /// Converts a relative path to PURL subpath fn to_purl_subpath(path: &Utf8Path) -> String { assert!(path.is_relative()); - let parts: Vec = path.components().map(|c| urlencode(c.as_str())).collect(); + let parts: Vec<&str> = path.components().map(|c| c.as_str()).collect(); parts.join("/") } diff --git a/cargo-cyclonedx/src/urlencode.rs b/cargo-cyclonedx/src/urlencode.rs deleted file mode 100644 index d0732c7e..00000000 --- a/cargo-cyclonedx/src/urlencode.rs +++ /dev/null @@ -1,124 +0,0 @@ -//! If you are reading this - buckle up, we are going on an adventure! -//! -//! So in the purl spec there is this innocuous example of two valid PURLs: -//! ```text -//! pkg:generic/openssl@1.1.10g?download_url=https://openssl.org/source/openssl-1.1.0g.tar.gz&checksum=sha256:de4d501267da -//! pkg:generic/bitwarderl?vcs_url=git%2Bhttps://git.fsfe.org/dxtr/bitwarderl%40cc55108da32 -//! ``` -//! from -//! -//! Note the `git%2Bhttps` part. The `%2B` is a percent-encoded `+` character, which is necessary, because otherwise -//! the `+` would be turned into a space when decoding and the original string would not be recoverable. -//! -//! Let's dive into the specs to see how to create such strings. -//! -//! ### WHATWG URL specification -//! -//! I have naively assumed that there is a single, well-defined percent encoding standard. -//! -//! In reality The URL spec has numerous different sets of characters that should or should not be URL-encoded. -//! -//! This part, `?foo=bar`, is called the "query" in the URL spec: -//! -//! -//! And this is what characters are supposed to be URL-encoded there: -//! -//! -//! Note the absence of the `+` character in this set! -//! It is apparently legal to put a + in there, but the generic URL parsers I tried, -//! as well as the official JS and Go PURL implementations convert it into a space! -//! -//! There are only two character sets that escape `+`: -//! 1. -//! to be used for "components", but the spec NEVER DEFINES WHAT A COMPONENT IS. -//! 2. to be used for form submission, -//! [which is apparently our case](https://github.com/CycloneDX/cyclonedx-rust-cargo/pull/523#discussion_r1378020167)? -//! -//! Both of which also escape `:`, so it's not possible to produce *both* of the valid URL examples with the same implementation - -//! at least using any of the standard character sets. -//! -//! The URL spec also includes this lovely bit: -//! -//! > This is used by HTML for registerProtocolHandler(), and could also be used by other standards -//! > to percent-encode data that can then be embedded in a URL’s path, query, or fragment; or in an opaque host. -//! > Using it with UTF-8 percent-encode gives identical results to JavaScript’s encodeURIComponent() -//! -//! Except it does NOT specify which of these two it refers to - component or form character set! -//! -//! ### RFC 3986 URI specification -//! -//! PURL specifies that it ALSO follows the rfc3986 spec - the URI spec: -//! -//! -//! PURL claims it adheres to both, which is curious because the specs are subtly incompabitle. -//! -//! (They are incompatible in the way they do percent encoding too, -//! see which is distinct from , -//! but that's a whole other rabbit hole and I'm not going down it right now.) -//! -//! So let's see what the URI spec escapes: -//! -//! Okay so `:` is super escaped and `+` is maybe escaped. -//! The official PURL examples that escape `+` but not `:` are impossible to obtain with that too! -//! -//! ### PURL specification -//! -//! PURL spec documents the process of creating a valid PURL! Maybe that will help? -//! -//! If you follow the "how to write purl" part from the spec, -//! -//! it specifies that you first join the special "checksums" value together with `,` signs and do all the other stuff to it, -//! and then you percent the result (doesn't specify using which of a gazillion possible character sets). -//! -//! So according to the spec, the `checksum=sha256:de4d501267da` part in the example should not be possible to obtain! -//! It should be `checksum=sha256%3Ade4d501267da` instead! -//! -//! And even if checksums were special somehow, there is no character set that would produce `git%2Bhttps://`` -//! because every single character set that escapes `+` also escapes `:`! -//! -//! The fact that the percent encoding is insufficiently defined in the PURL spec, -//! and that different PURL implementations disagree about what they escape, has been known since 2018 -//! but as of this writing has not been acted upon: -//! -//! ### Putting it all together -//! -//! To sum up: -//! -//! 1. There are many different percent encodings -//! 2. PURL spec does not specify which one it uses -//! 3. The official PURL examples CANNOT be produced with ANY one of those standard percent encodings -//! 4. The one that `purl` crate implements for qualifiers in accordance with the WHATWG URL spec (not the rfc3986 URI spec) -//! produces nonsensical results (does not escape `+` where it is clearly necessary) which breaks PURL parsers -//! (but the crate authors have seen this text and [will probably do something about that](https://github.com/phylum-dev/purl/issues/11)) -//! -//! So the specs failed us, and we have to rely on implementation behavior. We need to encode data so that as many decoders as possible will read it correctly. -//! -//! Percent decoders do not have a whitelist of characters they don't percent-decode, they just decode everything starting with a %. -//! That is how the spec defines percent decoding, too: -//! So when the PURL spec says "The value is the percent-decoded right side", the decoder should just decode everything starting with a %. -//! -//! So overdoing the encoding is not an issue; it's doing too little that's the problem. -//! Therefore, we pick the most agressive encoding out of the whole URL spec and use that: -//! -//! Which also happens to be compliant with the URL spec. At least we are following one of the three! -//! -//! This can be revisited if/when the PURL spec actually specifies which characters exactly have to be escaped in qualifiers. - -use percent_encoding::{self, utf8_percent_encode, AsciiSet, NON_ALPHANUMERIC}; - -/// https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set -/// -/// We use the _character set_ but not the _encoding scheme_ of form-urlencoded. -/// The difference is that on top of the character set it also defines that ' ' (space) should be encoded as '+', not "%20". -/// However, official decoder implementations are split about 50/50 on whether they decode '+' as '+' or as a space: -/// -/// Therefore, we never emit the '+' sign to avoid issues with half of the extant implementations. -const FORM_URLENCODED: &AsciiSet = &NON_ALPHANUMERIC - .remove(b'*') - .remove(b'-') - .remove(b'.') - .remove(b'_'); - -pub fn urlencode(s: &str) -> String { - utf8_percent_encode(s, FORM_URLENCODED).to_string() -} diff --git a/cyclonedx-bom/Cargo.toml b/cyclonedx-bom/Cargo.toml index 26a51624..8f8c09fa 100644 --- a/cyclonedx-bom/Cargo.toml +++ b/cyclonedx-bom/Cargo.toml @@ -20,7 +20,7 @@ indexmap = "2.2.2" jsonschema = "0.17.1" once_cell = "1.18.0" ordered-float = { version = "4.2.0", default-features = false } -packageurl = "0.3.0" +purl = { version = "0.1.3", default-features = false } regex = "1.9.3" serde = { version = "1.0.193", features = ["derive"] } serde_json = "1.0.108" diff --git a/cyclonedx-bom/src/external_models/uri.rs b/cyclonedx-bom/src/external_models/uri.rs index 7febe171..90d66f3f 100644 --- a/cyclonedx-bom/src/external_models/uri.rs +++ b/cyclonedx-bom/src/external_models/uri.rs @@ -19,16 +19,16 @@ use std::{convert::TryFrom, str::FromStr}; use fluent_uri::Uri as Url; -use packageurl::PackageUrl; +use purl::{GenericPurl, GenericPurlBuilder}; use thiserror::Error; use crate::validation::ValidationError; pub fn validate_purl(purl: &Purl) -> Result<(), ValidationError> { - if PackageUrl::from_str(&purl.0).is_err() { - return Err("Purl does not conform to Package URL spec".into()); + match GenericPurl::::from_str(&purl.0) { + Ok(_) => Ok(()), + Err(e) => Err(format!("Purl does not conform to Package URL spec: {e}").into()), } - Ok(()) } #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -36,8 +36,10 @@ pub struct Purl(pub(crate) String); impl Purl { pub fn new(package_type: &str, name: &str, version: &str) -> Result { - match packageurl::PackageUrl::new(package_type, name) { - Ok(mut purl) => Ok(Self(purl.with_version(version.trim()).to_string())), + let builder = GenericPurlBuilder::new(package_type.to_string(), name).with_version(version); + + match builder.build() { + Ok(purl) => Ok(Self(purl.to_string())), Err(e) => Err(UriError::InvalidPurl(e.to_string())), } } @@ -137,7 +139,7 @@ mod test { let validation_result = validate_purl(&Purl("invalid purl".to_string())); assert_eq!( validation_result, - Err("Purl does not conform to Package URL spec".into()), + Err("Purl does not conform to Package URL spec: URL scheme must be pkg".into()), ); } diff --git a/cyclonedx-bom/src/models/component.rs b/cyclonedx-bom/src/models/component.rs index 45c51e08..937dbf21 100644 --- a/cyclonedx-bom/src/models/component.rs +++ b/cyclonedx-bom/src/models/component.rs @@ -1008,7 +1008,7 @@ mod test { ), validation::field( "purl", - "Purl does not conform to Package URL spec" + "Purl does not conform to Package URL spec: URL scheme must be pkg" ), validation::r#struct( "swid",