Skip to content

Commit

Permalink
fix: Improve validation of iri & iri-reference
Browse files Browse the repository at this point in the history
Signed-off-by: Dmitry Dygalo <[email protected]>
  • Loading branch information
Stranger6667 committed Sep 29, 2024
1 parent dfa74db commit be14c1e
Show file tree
Hide file tree
Showing 15 changed files with 141 additions and 96 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,12 @@ While backward compatibility is maintained for now, users are encouraged to upda
- Cross-draft validation from newer to older ones.
- Changing base URI in folder.
- Location-independent identifier in remote resource.
- Missing some format validation for Draft 2020-12.
- Incomplete `iri` & `iri-reference` validation.

### Performance

- Faster `uri-reference` validation.
- Faster validation for `uri`, `iri`, `uri-reference`, and `iri-reference` formats.

## [0.20.0] - 2024-09-18

Expand Down
4 changes: 3 additions & 1 deletion crates/jsonschema-py/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,12 @@
- Cross-draft validation from newer to older ones.
- Changing base URI in folder.
- Location-independent identifier in remote resource.
- Missing some format validation for Draft 2020-12.
- Incomplete `iri` & `iri-reference` validation.

### Performance

- Faster `uri-reference` validation.
- Faster validation for `uri`, `iri`, `uri-reference`, and `iri-reference` formats.

## [0.20.0] - 2024-09-18

Expand Down
2 changes: 1 addition & 1 deletion crates/jsonschema-referencing/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ license.workspace = true

[dependencies]
ahash.workspace = true
fluent-uri = { version = "0.2.0", features = ["serde"] }
fluent-uri = { version = "0.3.0", features = ["serde"] }
once_cell = "1.19.0"
percent-encoding = "2.3.1"
serde_json.workspace = true
Expand Down
6 changes: 4 additions & 2 deletions crates/jsonschema-referencing/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,7 @@ pub use retriever::{DefaultRetriever, Retrieve};
pub(crate) use segments::Segments;
pub use specification::Draft;

pub type Uri = fluent_uri::UriRef<String>;
pub type UriRef<'a> = fluent_uri::UriRef<&'a str>;
pub type Uri<T> = fluent_uri::Uri<T>;
pub type Iri<T> = fluent_uri::Iri<T>;
pub type UriRef<T> = fluent_uri::UriRef<T>;
pub type IriRef<T> = fluent_uri::IriRef<T>;
45 changes: 24 additions & 21 deletions crates/jsonschema-referencing/src/registry.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
use std::{collections::VecDeque, fmt::Debug, sync::Arc};

use ahash::{AHashMap, AHashSet};
use fluent_uri::UriRef;
use once_cell::sync::Lazy;
use serde_json::Value;

use crate::{
anchors::{AnchorKey, AnchorKeyRef},
meta, uri, Anchor, DefaultRetriever, Draft, Error, Resolver, Resource, Retrieve, Uri,
meta, uri, Anchor, DefaultRetriever, Draft, Error, Resolver, Resource, Retrieve,
};

type ResourceMap = AHashMap<Uri, Arc<Resource>>;
type ResourceMap = AHashMap<UriRef<String>, Arc<Resource>>;

pub static SPECIFICATIONS: Lazy<Registry> = Lazy::new(|| {
let pairs = meta::META_SCHEMAS.into_iter().map(|(uri, schema)| {
Expand Down Expand Up @@ -215,14 +216,21 @@ impl Registry {
}
/// Create a new [`Resolver`] for this registry with a known valid base URI.
#[must_use]
pub fn resolver(&self, base_uri: Uri) -> Resolver {
pub fn resolver(&self, base_uri: UriRef<String>) -> Resolver {
Resolver::new(self, base_uri)
}
#[must_use]
pub fn resolver_from_raw_parts(&self, base_uri: Uri, scopes: VecDeque<Uri>) -> Resolver {
pub fn resolver_from_raw_parts(
&self,
base_uri: UriRef<String>,
scopes: VecDeque<UriRef<String>>,
) -> Resolver {
Resolver::from_parts(self, base_uri, scopes)
}
pub(crate) fn get_or_retrieve<'r>(&'r self, uri: &Uri) -> Result<&'r Resource, Error> {
pub(crate) fn get_or_retrieve<'r>(
&'r self,
uri: &UriRef<String>,
) -> Result<&'r Resource, Error> {
if let Some(resource) = self.resources.get(uri) {
Ok(resource)
} else {
Expand All @@ -235,7 +243,11 @@ impl Registry {
))
}
}
pub(crate) fn anchor<'a>(&self, uri: &'a Uri, name: &'a str) -> Result<&Anchor, Error> {
pub(crate) fn anchor<'a>(
&self,
uri: &'a UriRef<String>,
name: &'a str,
) -> Result<&Anchor, Error> {
let key = AnchorKeyRef::new(uri, name);
if let Some(value) = self.anchors.get(key.borrow_dyn()) {
return Ok(value);
Expand Down Expand Up @@ -347,16 +359,16 @@ fn process_resources(
}

fn collect_external_references(
base: &Uri,
base: &UriRef<String>,
contents: &Value,
collected: &mut AHashSet<Uri>,
collected: &mut AHashSet<UriRef<String>>,
) -> Result<(), Error> {
if base.scheme().map(fluent_uri::component::Scheme::as_str) == Some("urn") {
return Ok(());
}
if let Some(reference) = contents.get("$ref").and_then(Value::as_str) {
let resolved = uri::resolve_against(&base.borrow(), reference)?;
let builder = Uri::builder();
let builder = UriRef::builder();
let base_uri = match (resolved.scheme(), resolved.authority()) {
(Some(scheme), Some(auth)) => {
builder.scheme(scheme).authority(auth).path(resolved.path())
Expand Down Expand Up @@ -701,20 +713,11 @@ mod tests {

let result = Registry::try_from_resources(input_resources.into_iter());
let error = result.expect_err("Should fail");
assert_eq!(
error.to_string(),
"Invalid URI: base URI without scheme or with fragment"
);
assert_eq!(error.to_string(), "Invalid URI: base URI/IRI with fragment");
let source_error = error.source().expect("Should have a source");
assert_eq!(
source_error.to_string(),
"base URI without scheme or with fragment"
);
assert_eq!(source_error.to_string(), "base URI/IRI with fragment");
let inner_source = source_error.source().expect("Should have a source");
assert_eq!(
inner_source.to_string(),
"base URI without scheme or with fragment"
);
assert_eq!(inner_source.to_string(), "base URI/IRI with fragment");
}

#[test]
Expand Down
24 changes: 22 additions & 2 deletions crates/jsonschema-referencing/src/uri.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,35 @@
use fluent_uri::{encoding::EString, UriRef};
use fluent_uri::{encoding::EString, Uri, UriRef};

use crate::Error;
pub use fluent_uri::encoding::encoder::Path;

const fn table() -> [bool; 259] {
let mut bytes = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-.".as_slice();
let mut table = [false; 259];
while let [cur, rem @ ..] = bytes {
table[*cur as usize] = true;
bytes = rem;
}
table
}

const SCHEME: &[bool; 259] = &table();
/// Resolves the URI reference against the given base URI and returns the target URI.
///
/// # Errors
///
/// Returns an error if base has not schema or there is a fragment.
pub fn resolve_against(base: &UriRef<&str>, uri: &str) -> Result<UriRef<String>, Error> {
Ok(UriRef::parse(uri)?.resolve_against(base)?.normalize())
// Emulate `ParseError` to avoid panic on invalid input.
//
// See https://github.com/yescallop/fluent-uri-rs/issues/28
if base.as_str().is_empty() || !base.as_str().bytes().any(|x| !SCHEME[x as usize]) {
Uri::try_from("/".to_string()).map_err(|err| err.strip_input())?;
}
Ok(UriRef::parse(uri)?
.resolve_against(&(Uri::try_from(*base)?))?
.normalize()
.into())
}

/// Parses a URI reference from a string into a [`crate::Uri`].
Expand Down
12 changes: 7 additions & 5 deletions crates/jsonschema/src/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,15 @@ use crate::{
use ahash::{AHashMap, AHashSet};
use once_cell::sync::Lazy;
use referencing::{
uri, Draft, Registry, Resolved, Resolver, Resource, ResourceRef, Retrieve, Uri, SPECIFICATIONS,
uri, Draft, Registry, Resolved, Resolver, Resource, ResourceRef, Retrieve, UriRef,
SPECIFICATIONS,
};
use serde_json::Value;
use std::{cell::RefCell, collections::VecDeque, rc::Rc, sync::Arc};

const DEFAULT_SCHEME: &str = "json-schema";
pub(crate) const DEFAULT_ROOT_URL: &str = "json-schema:///";
type BaseUri = UriRef<String>;

/// Container for information required to build a tree.
///
Expand All @@ -35,7 +37,7 @@ pub(crate) struct Context<'a> {
resolver: Rc<Resolver<'a>>,
pub(crate) path: JsonPointerNode<'a, 'a>,
pub(crate) draft: Draft,
seen: Rc<RefCell<AHashSet<Uri>>>,
seen: Rc<RefCell<AHashSet<UriRef<String>>>>,
}

impl<'a> Context<'a> {
Expand Down Expand Up @@ -100,7 +102,7 @@ impl<'a> Context<'a> {
self.resolver.lookup(reference)
}

pub(crate) fn scopes(&self) -> VecDeque<Uri> {
pub(crate) fn scopes(&self) -> VecDeque<UriRef<String>> {
VecDeque::from_iter(self.resolver.dynamic_scope().cloned())
}

Expand All @@ -116,7 +118,7 @@ impl<'a> Context<'a> {
self.path.push(chunk).into()
}

pub(crate) fn base_uri(&self) -> Option<Uri> {
pub(crate) fn base_uri(&self) -> Option<UriRef<String>> {
let base_uri = self.resolver.base_uri();
if base_uri.scheme().map(|s| s.as_str()) == Some(DEFAULT_SCHEME) {
None
Expand Down Expand Up @@ -202,7 +204,7 @@ impl<'a> Context<'a> {
&self,
reference: &str,
is_recursive: bool,
) -> Result<Option<(Uri, VecDeque<Uri>, Resource)>, ValidationError<'static>> {
) -> Result<Option<(BaseUri, VecDeque<BaseUri>, Resource)>, ValidationError<'static>> {
let resolved = if reference == "#" {
// Known & simple recursive reference
// It may also use some additional logic from the `$recursiveAnchor` keyword
Expand Down
6 changes: 3 additions & 3 deletions crates/jsonschema/src/keywords/additional_properties.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use crate::{
properties::*,
validator::{PartialApplication, Validate},
};
use referencing::Uri;
use referencing::UriRef;
use serde_json::{Map, Value};

macro_rules! is_valid {
Expand Down Expand Up @@ -457,7 +457,7 @@ pub(crate) struct AdditionalPropertiesWithPatternsValidator {
/// "additionalProperties" as it's path. However, we need to produce annotations which have the
/// patternProperties keyword as their path so we store the paths here.
pattern_keyword_path: JsonPointer,
pattern_keyword_absolute_location: Option<Uri>,
pattern_keyword_absolute_location: Option<UriRef<String>>,
}
impl AdditionalPropertiesWithPatternsValidator {
#[inline]
Expand Down Expand Up @@ -591,7 +591,7 @@ pub(crate) struct AdditionalPropertiesWithPatternsFalseValidator {
patterns: PatternedValidators,
schema_path: JsonPointer,
pattern_keyword_path: JsonPointer,
pattern_keyword_absolute_location: Option<Uri>,
pattern_keyword_absolute_location: Option<UriRef<String>>,
}
impl AdditionalPropertiesWithPatternsFalseValidator {
#[inline]
Expand Down
Loading

0 comments on commit be14c1e

Please sign in to comment.