Skip to content

Commit

Permalink
ParseMode: Convert from enum to configurable struct (#50)
Browse files Browse the repository at this point in the history
* ParseMode: Convert from enum to configurable struct

This allows users to only opt-in to some of the laxness, if necessary

* Cleanup/expand docs

* Cleanup docs in minimize

Co-authored-by: Jake Shadle <[email protected]>
  • Loading branch information
Turbo87 and Jake-Shadle authored Dec 21, 2021
1 parent 1fd188a commit d2425ce
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 38 deletions.
22 changes: 12 additions & 10 deletions src/expression/minimize.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use super::Expression;
use crate::{LicenseReq, Licensee};
use std::fmt;

Expand Down Expand Up @@ -36,21 +37,22 @@ impl std::error::Error for MinimizeError {
}
}

impl super::Expression {
impl Expression {
/// Given a set of [`Licensee`]s, attempts to find the minimum number that
/// satisfy this [`Expression`]. The list of licensees should be given in
/// priority order, eg, if you wish to accept the `Apache-2.0` license if
/// it is available and the `MIT` if not, putting `Apache-2.0` before `MIT`
/// will cause the ubiquitous `Apache-2.0 OR MIT` expression to minimize to
/// just `Apache-2.0` as only only 1 of the licenses is required, and the
/// `Apache-2.0` has priority.
/// satisfy this [`Expression`].
///
/// The list of licensees should be given in priority order, eg, if you wish
/// to accept the `Apache-2.0` license if it is available, and the `MIT` if
/// not, putting `Apache-2.0` before `MIT` will cause the ubiquitous
/// `Apache-2.0 OR MIT` expression to minimize to just `Apache-2.0` as only
/// 1 of the licenses is required, and `Apache-2.0` has priority.
///
/// # Errors
///
/// This method will fail if more than 64 unique licensees are satisfied by
/// this expression, but such a case is unlikely to say the least in a real
/// world scenario. The list of licensees must also actually satisfy this
/// expression, otherwise it can't be minimized.
/// this expression, but such a case is unlikely in a real world scenario.
/// The list of licensees must also actually satisfy this expression,
/// otherwise it can't be minimized.
///
/// # Example
///
Expand Down
6 changes: 3 additions & 3 deletions src/expression/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ impl Expression {
/// spdx::Expression::parse("MIT OR Apache-2.0 WITH LLVM-exception").unwrap();
/// ```
pub fn parse(original: &str) -> Result<Self, ParseError> {
Self::parse_mode(original, ParseMode::Strict)
Self::parse_mode(original, ParseMode::STRICT)
}

/// Parses an expression with the specified `ParseMode`. With
Expand All @@ -33,7 +33,7 @@ impl Expression {
/// ```
/// spdx::Expression::parse_mode(
/// "mit/Apache-2.0 WITH LLVM-exception",
/// spdx::ParseMode::Lax
/// spdx::ParseMode::LAX
/// ).unwrap();
/// ```
pub fn parse_mode(original: &str, mode: ParseMode) -> Result<Self, ParseError> {
Expand Down Expand Up @@ -130,7 +130,7 @@ impl Expression {
..
}) => {
// Handle GNU licenses differently, as they should *NOT* be used with the `+`
if mode == ParseMode::Strict && id.is_gnu() {
if !mode.allow_postfix_plus_on_gpl && id.is_gnu() {
return Err(ParseError {
original: original.to_owned(),
span: lt.span,
Expand Down
77 changes: 57 additions & 20 deletions src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,59 @@ use crate::{
ExceptionId, LicenseId,
};

/// Available modes when parsing SPDX expressions
#[derive(Copy, Clone, PartialEq)]
pub enum ParseMode {
/// Strict SPDX parsing.
/// Parsing configuration for SPDX expression
#[derive(Default, Copy, Clone)]
pub struct ParseMode {
/// The `AND`, `OR`, and `WITH` operators are required to be uppercase in
/// the SPDX spec, but enabling this option allows them to be lowercased
pub allow_lower_case_operators: bool,
/// Allows the use of `/` as a synonym for the `OR` operator. This also
/// allows for not having whitespace between the `/` and the terms on either
/// side
pub allow_slash_as_or_operator: bool,
/// Allows some invalid/imprecise identifiers as synonyms for an actual
/// license identifier. See [`IMPRECISE_NAMES`](crate::identifiers::IMPRECISE_NAMES)
/// for a list of the current synonyms. Note that this list is not
/// comprehensive but can be expanded upon when invalid identifiers are
/// found in the wild.
pub allow_imprecise_license_names: bool,
/// The various GPL licenses diverge from every other license in the SPDX
/// license list by having an `-or-later` variant that used as a suffix on a
/// base license (eg. `GPL-3.0-or-later`) rather than the canonical `GPL-3.0+`.
/// This option just allows GPL licenses to be treated similarly to all of
/// the other SPDX licenses.
pub allow_postfix_plus_on_gpl: bool,
}

impl ParseMode {
/// Strict, specification compliant SPDX parsing.
///
/// 1. Only license identifiers in the SPDX license list, or
/// Document/LicenseRef, are allowed. The license identifiers are also
/// case-sensitive.
/// 1. `WITH`, `AND`, and `OR` are the only valid operators
Strict,
pub const STRICT: Self = Self {
allow_lower_case_operators: false,
allow_slash_as_or_operator: false,
allow_imprecise_license_names: false,
allow_postfix_plus_on_gpl: false,
};

/// Allow non-conforming syntax for crates-io compatibility
///
/// 1. Additional, invalid, identifiers are accepted and mapped to a correct
/// SPDX license identifier. See
/// [identifiers::IMPRECISE_NAMES](../identifiers/constant.IMPRECISE_NAMES.html)
/// for the list of additionally accepted identifiers and the license they
/// SPDX license identifier.
/// See [`IMPRECISE_NAMES`](crate::identifiers::IMPRECISE_NAMES) for the
/// list of additionally accepted identifiers and the license they
/// correspond to.
/// 1. `/` can by used as a synonym for `OR`, and doesn't need to be
/// separated by whitespace from the terms it combines
Lax,
pub const LAX: Self = Self {
allow_lower_case_operators: true,
allow_slash_as_or_operator: true,
allow_imprecise_license_names: true,
allow_postfix_plus_on_gpl: true,
};
}

/// A single token in an SPDX license expression
Expand Down Expand Up @@ -85,7 +120,7 @@ pub struct Lexer<'a> {
inner: &'a str,
original: &'a str,
offset: usize,
lax: bool,
mode: ParseMode,
}

impl<'a> Lexer<'a> {
Expand All @@ -96,7 +131,7 @@ impl<'a> Lexer<'a> {
inner: text,
original: text,
offset: 0,
lax: false,
mode: ParseMode::STRICT,
}
}

Expand All @@ -110,7 +145,7 @@ impl<'a> Lexer<'a> {
inner: text,
original: text,
offset: 0,
lax: mode == ParseMode::Lax,
mode,
}
}

Expand Down Expand Up @@ -197,7 +232,7 @@ impl<'a> Iterator for Lexer<'a> {
}
Some('(') => ok_token(Token::OpenParen),
Some(')') => ok_token(Token::CloseParen),
Some('/') if self.lax => Some(Ok((Token::Or, 1))),
Some('/') if self.mode.allow_slash_as_or_operator => Some(Ok((Token::Or, 1))),
Some(_) => match Lexer::find_text_token(self.inner) {
None => Some(Err(ParseError {
original: self.original.to_owned(),
Expand All @@ -211,9 +246,9 @@ impl<'a> Iterator for Lexer<'a> {
ok_token(Token::And)
} else if m == "OR" {
ok_token(Token::Or)
} else if self.lax && m == "and" {
} else if self.mode.allow_lower_case_operators && m == "and" {
ok_token(Token::And)
} else if self.lax && m == "or" {
} else if self.mode.allow_lower_case_operators && m == "or" {
ok_token(Token::Or)
} else if let Some(lic_id) = crate::license_id(m) {
ok_token(Token::Spdx(lic_id))
Expand All @@ -230,11 +265,13 @@ impl<'a> Iterator for Lexer<'a> {
doc_ref: None,
lic_ref,
})
} else if let Some((lic_id, token_len)) = if self.lax {
crate::imprecise_license_id(self.inner)
} else {
None
} {
} else if let Some((lic_id, token_len)) =
if self.mode.allow_imprecise_license_names {
crate::imprecise_license_id(self.inner)
} else {
None
}
{
Some(Ok((Token::Spdx(lic_id), token_len)))
} else {
Some(Err(ParseError {
Expand Down
6 changes: 3 additions & 3 deletions tests/check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@ macro_rules! exact {

macro_rules! check {
($le:expr => [$($logical_expr:expr => $is_allowed:expr),+$(,)?]) => {
check_mode!(spdx::ParseMode::Strict, $le => [$($logical_expr => $is_allowed),+])
check_mode!(spdx::ParseMode::STRICT, $le => [$($logical_expr => $is_allowed),+])
};
}

macro_rules! check_lax {
($le:expr => [$($logical_expr:expr => $is_allowed:expr),+$(,)?]) => {
check_mode!(spdx::ParseMode::Lax, $le => [$($logical_expr => $is_allowed),+])
check_mode!(spdx::ParseMode::LAX, $le => [$($logical_expr => $is_allowed),+])
};
}

Expand Down Expand Up @@ -191,7 +191,7 @@ fn gpl_or_later_plus_strict() {

#[test]
fn gpl_or_later_plus_lax() {
spdx::Expression::parse_mode("GPL-2.0+", spdx::ParseMode::Lax).unwrap();
spdx::Expression::parse_mode("GPL-2.0+", spdx::ParseMode::LAX).unwrap();
}

#[test]
Expand Down
4 changes: 2 additions & 2 deletions tests/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ fn fails_with_slash() {

#[test]
fn lax_takes_slash() {
let lexed: Vec<_> = Lexer::new_mode("MIT/Apache", spdx::ParseMode::Lax)
let lexed: Vec<_> = Lexer::new_mode("MIT/Apache", spdx::ParseMode::LAX)
.map(|r| r.map(|lt| lt.token).unwrap())
.collect();
assert_eq!(
Expand All @@ -131,7 +131,7 @@ fn lax_takes_slash() {

#[test]
fn fixes_license_names() {
let lexed: Vec<_> = Lexer::new_mode("gpl v2 / bsd 2-clause", spdx::ParseMode::Lax)
let lexed: Vec<_> = Lexer::new_mode("gpl v2 / bsd 2-clause", spdx::ParseMode::LAX)
.map(|r| r.map(|lt| lt.token).unwrap())
.collect();
assert_eq!(
Expand Down

0 comments on commit d2425ce

Please sign in to comment.