Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .coderabbit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,6 @@ reviews:
path_filters:
# Exclude all Verify snapshot testing files
- "!**/*.verified.txt"
# Provide custom prompt instructions to CodeRabbit
instructions: |
Do not suggest adding XML documentation comments or summary tags to public methods, classes, or properties unless specifically requested.
9 changes: 2 additions & 7 deletions src/FileLicenseMatcher/Combine/LicenseMatcher.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,9 @@

namespace FileLicenseMatcher.Combine
{
public class LicenseMatcher : IFileLicenseMatcher
public class LicenseMatcher(IReadOnlyList<IFileLicenseMatcher> matchers) : IFileLicenseMatcher
{
private readonly IReadOnlyCollection<IFileLicenseMatcher> _matchers;

public LicenseMatcher(IReadOnlyList<IFileLicenseMatcher> matchers)
{
_matchers = matchers;
}
private readonly IReadOnlyCollection<IFileLicenseMatcher> _matchers = matchers;

public string Match(string licenseText)
{
Expand Down
18 changes: 5 additions & 13 deletions src/FileLicenseMatcher/Compare/LicenseMatcher.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,19 @@

namespace FileLicenseMatcher.Compare
{
public class LicenseMatcher : IFileLicenseMatcher
public class LicenseMatcher(IFileSystem fileSystem, IDictionary<string, string> fileLicenseMap)
: IFileLicenseMatcher
{
private readonly IFileSystem _fileSystem;
private readonly IDictionary<string, string> _fileLicenseMap;

public LicenseMatcher(IFileSystem fileSystem, IDictionary<string, string> fileLicenseMap)
{
_fileSystem = fileSystem;
_fileLicenseMap = fileLicenseMap;
}

public string Match(string licenseText)
{
string[] licenseContent = licenseText.Split(Array.Empty<char>(), StringSplitOptions.RemoveEmptyEntries);
foreach (KeyValuePair<string, string> kvp in _fileLicenseMap)
foreach (KeyValuePair<string, string> kvp in fileLicenseMap)
{
if (!_fileSystem.File.Exists(kvp.Key))
if (!fileSystem.File.Exists(kvp.Key))
{
continue;
}
IEnumerable<string> fileContent = _fileSystem.File.ReadAllText(kvp.Key).Split(Array.Empty<char>(), StringSplitOptions.RemoveEmptyEntries);
IEnumerable<string> fileContent = fileSystem.File.ReadAllText(kvp.Key).Split(Array.Empty<char>(), StringSplitOptions.RemoveEmptyEntries);
if (licenseContent.SequenceEqual(fileContent))
{
return kvp.Value;
Expand Down
16 changes: 8 additions & 8 deletions src/FileLicenseMatcher/SPDX/FastLicenseMatcher.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ public class FastLicenseMatcher : IFileLicenseMatcher
private const string START_COMMENT_CHAR_PATTERN = "(//|/\\*|\\*|#|' |REM |<!--|--|;|\\(\\*|\\{-)|\\.\\\\\"";

#pragma warning disable IDE1006
private static readonly Regex RULE_PATTERN = new Regex(START_RULE + "\\s*(beginOptional|endOptional|var)", RegexOptions.Compiled);
private static readonly Regex END_RULE_PATTERN = new Regex(END_RULE, RegexOptions.Compiled);
private static readonly Regex END_COMMENT_PATTERN = new Regex("(\\*/|-->|-}|\\*\\)|\\s\\*)\\s*$", RegexOptions.Compiled);
private static readonly Regex START_COMMENT_PATTERN = new Regex("^\\s*" + START_COMMENT_CHAR_PATTERN, RegexOptions.Compiled);
private static readonly Regex BEGIN_OPTIONAL_COMMENT_PATTERN = new Regex("^\\s*<<beginOptional>>\\s*" + START_COMMENT_CHAR_PATTERN, RegexOptions.Compiled);
private static readonly Regex RULE_PATTERN = new(START_RULE + "\\s*(beginOptional|endOptional|var)", RegexOptions.Compiled);
private static readonly Regex END_RULE_PATTERN = new(END_RULE, RegexOptions.Compiled);
private static readonly Regex END_COMMENT_PATTERN = new("(\\*/|-->|-}|\\*\\)|\\s\\*)\\s*$", RegexOptions.Compiled);
private static readonly Regex START_COMMENT_PATTERN = new("^\\s*" + START_COMMENT_CHAR_PATTERN, RegexOptions.Compiled);
private static readonly Regex BEGIN_OPTIONAL_COMMENT_PATTERN = new("^\\s*<<beginOptional>>\\s*" + START_COMMENT_CHAR_PATTERN, RegexOptions.Compiled);
#pragma warning restore IDE1006

private readonly IImmutableDictionary<string, ParseInstruction> _templateInstructions;
Expand Down Expand Up @@ -94,7 +94,7 @@ private static ParseInstruction ParseTemplate(ILicense license)
end = endMatch.Index + endMatch.Length;
string ruleString = licenseTemplate.Substring(ruleMatches.Current.Index + START_RULE.Length, end - END_RULE.Length - ruleMatches.Current.Index - START_RULE.Length);

LicenseTemplateRule rule = new LicenseTemplateRule(ruleString);
LicenseTemplateRule rule = new(ruleString);
if (rule.Type == LicenseTemplateRule.RuleType.VARIABLE)
{
instructionStack.Peek().addSubInstruction(new ParseInstruction(rule, null));
Expand All @@ -116,7 +116,7 @@ private static ParseInstruction ParseTemplate(ILicense license)
else
{
throw new LicenseTemplateRuleException(
"Unrecognized rule: " + rule.Type.ToString() + " after text '" + upToTheFind + "'");
"Unrecognized rule: " + rule.Type + " after text '" + upToTheFind + "'");
}
}
if (instructionStack.Count > 1)
Expand Down Expand Up @@ -147,7 +147,7 @@ private static string removeCommentChars(string s)
{
return "";
}
StringBuilder sb = new StringBuilder();
StringBuilder sb = new();
using var reader = new StringReader(s);
try
{
Expand Down
10 changes: 5 additions & 5 deletions src/FileLicenseMatcher/SPDX/JavaCore/LicenseTemplateRule.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ public class LicenseTemplateRule
public enum RuleType { VARIABLE, BEGIN_OPTIONAL, END_OPTIONAL }

public RuleType? Type { get; set; }
public string? Original { get; set; } = null;
public string? Original { get; set; }
public string? Name { get; set; }
public string? Example { get; set; } = null;
public string? Match { get; set; } = null;
public string? Example { get; set; }
public string? Match { get; set; }

#pragma warning disable IDE1006
static readonly Regex SPLIT_REGEX = new Regex("[^\\\\];", RegexOptions.Compiled);
static readonly Regex SPLIT_REGEX = new("[^\\\\];", RegexOptions.Compiled);
#pragma warning restore IDE1006
private const string EXAMPLE_KEYWORD = "example";
private const string NAME_KEYWORD = "name";
Expand Down Expand Up @@ -144,7 +144,7 @@ public void parseLicenseTemplateRule(string parseableLicenseTemplateRule)
string typeStr;
if (rulePartMatcher.Count > 0)
{
typeStr = parseableLicenseTemplateRule.Substring(start, rulePartMatcher[0].Index + 1 - start).Trim();
typeStr = parseableLicenseTemplateRule.Substring(start, rulePartMatcher[0].Index + 1).Trim();
start = rulePartMatcher[0].Index + rulePartMatcher[0].Length;
}
else
Expand Down
124 changes: 20 additions & 104 deletions src/FileLicenseMatcher/SPDX/JavaCore/LicenseTextHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,25 +24,25 @@ public static class LicenseTextHelper
{

private const string TOKEN_SPLIT_REGEX = "(^|[^\\s.,?'();:\"/\\[\\]<>]{1,100})((\\s|\\.|,|\\?|'|\"|\\(|\\)|;|:|/|\\[|]|<|>|$){1,100})";
public static readonly Regex TOKEN_SPLIT_PATTERN = new Regex(TOKEN_SPLIT_REGEX, RegexOptions.Compiled);
public static readonly Regex TOKEN_SPLIT_PATTERN = new(TOKEN_SPLIT_REGEX, RegexOptions.Compiled);
#pragma warning disable IDE1006
private static readonly ImmutableHashSet<string> PUNCTUATION = [".", ",", "?", "\"", "'", "(", ")", ";", ":", "/", "[", "]", "<", ">"];
// most of these are comments for common programming languages (C style, Java, Ruby, Python)
private static readonly ImmutableHashSet<string> SKIPPABLE_TOKENS = ["//", "/*", "*/", "/**", "#", "##", "*", "**", "\"\"\"", "/", "=begin", "=end"];
static readonly Regex DASHES_REGEX = new Regex("[\\u2010\\u2011\\u2012\\u2013\\u2014\\u2015\\uFE58\\uFF0D\\-]{1,2}", RegexOptions.Compiled);
static readonly Regex SPACE_PATTERN = new Regex("[\\u202F\\u2007\\u2060\\u2009]", RegexOptions.Compiled);
static readonly Regex COMMA_PATTERN = new Regex("[\\uFF0C\\uFE10\\uFE50]");
static readonly Regex PER_CENT_PATTERN = new Regex("per cent", RegexOptions.IgnoreCase | RegexOptions.Compiled);
static readonly Regex COPYRIGHT_HOLDER_PATTERN = new Regex("copyright holder", RegexOptions.IgnoreCase | RegexOptions.Compiled);
static readonly Regex COPYRIGHT_HOLDERS_PATTERN = new Regex("copyright holders", RegexOptions.IgnoreCase | RegexOptions.Compiled);
static readonly Regex COPYRIGHT_OWNERS_PATTERN = new Regex("copyright owners", RegexOptions.IgnoreCase | RegexOptions.Compiled);
static readonly Regex COPYRIGHT_OWNER_PATTERN = new Regex("copyright owner", RegexOptions.IgnoreCase | RegexOptions.Compiled);
static readonly Regex PER_CENT_PATTERN_LF = new Regex("per\\s{0,100}\\n{1,10}\\s{0,100}cent", RegexOptions.IgnoreCase | RegexOptions.Compiled);
static readonly Regex COPYRIGHT_HOLDERS_PATTERN_LF = new Regex("copyright\\s{0,100}\\n{1,10}\\s{0,100}holders", RegexOptions.IgnoreCase | RegexOptions.Compiled);
static readonly Regex COPYRIGHT_HOLDER_PATTERN_LF = new Regex("copyright\\s{0,100}\\n{1,10}\\s{0,100}holder", RegexOptions.IgnoreCase | RegexOptions.Compiled);
static readonly Regex COPYRIGHT_OWNERS_PATTERN_LF = new Regex("copyright\\s{0,100}\\n{1,10}\\s{0,100}owners", RegexOptions.IgnoreCase | RegexOptions.Compiled);
static readonly Regex COPYRIGHT_OWNER_PATTERN_LF = new Regex("copyright\\s{0,100}\\n{1,10}\\s{0,100}owner", RegexOptions.IgnoreCase | RegexOptions.Compiled);
static readonly Regex COPYRIGHT_SYMBOL_PATTERN = new Regex("\\(c\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
static readonly Regex DASHES_REGEX = new("[\\u2010\\u2011\\u2012\\u2013\\u2014\\u2015\\uFE58\\uFF0D\\-]{1,2}", RegexOptions.Compiled);
static readonly Regex SPACE_PATTERN = new("[\\u202F\\u2007\\u2060\\u2009]", RegexOptions.Compiled);
static readonly Regex COMMA_PATTERN = new("[\\uFF0C\\uFE10\\uFE50]");
static readonly Regex PER_CENT_PATTERN = new("per cent", RegexOptions.IgnoreCase | RegexOptions.Compiled);
static readonly Regex COPYRIGHT_HOLDER_PATTERN = new("copyright holder", RegexOptions.IgnoreCase | RegexOptions.Compiled);
static readonly Regex COPYRIGHT_HOLDERS_PATTERN = new("copyright holders", RegexOptions.IgnoreCase | RegexOptions.Compiled);
static readonly Regex COPYRIGHT_OWNERS_PATTERN = new("copyright owners", RegexOptions.IgnoreCase | RegexOptions.Compiled);
static readonly Regex COPYRIGHT_OWNER_PATTERN = new("copyright owner", RegexOptions.IgnoreCase | RegexOptions.Compiled);
static readonly Regex PER_CENT_PATTERN_LF = new("per\\s{0,100}\\n{1,10}\\s{0,100}cent", RegexOptions.IgnoreCase | RegexOptions.Compiled);
static readonly Regex COPYRIGHT_HOLDERS_PATTERN_LF = new("copyright\\s{0,100}\\n{1,10}\\s{0,100}holders", RegexOptions.IgnoreCase | RegexOptions.Compiled);
static readonly Regex COPYRIGHT_HOLDER_PATTERN_LF = new("copyright\\s{0,100}\\n{1,10}\\s{0,100}holder", RegexOptions.IgnoreCase | RegexOptions.Compiled);
static readonly Regex COPYRIGHT_OWNERS_PATTERN_LF = new("copyright\\s{0,100}\\n{1,10}\\s{0,100}owners", RegexOptions.IgnoreCase | RegexOptions.Compiled);
static readonly Regex COPYRIGHT_OWNER_PATTERN_LF = new("copyright\\s{0,100}\\n{1,10}\\s{0,100}owner", RegexOptions.IgnoreCase | RegexOptions.Compiled);
static readonly Regex COPYRIGHT_SYMBOL_PATTERN = new("\\(c\\)", RegexOptions.IgnoreCase | RegexOptions.Compiled);
#pragma warning restore IDE1006
public static readonly IImmutableDictionary<string, string> NORMALIZE_TOKENS = ImmutableDictionary.CreateRange(
[
Expand Down Expand Up @@ -96,90 +96,6 @@ public static class LicenseTextHelper
]
);

/**
* Returns true if two sets of license text is considered a match per
* the SPDX License matching guidelines documented at spdx.org (currently <a href="https://spdx.github.io/spdx-spec/v2.3/license-matching-guidelines-and-templates/">license matching guidelines</a>)
* There are 2 unimplemented features - bullets/numbering is not considered and comments with no whitespace between text is not skipped
* @param licenseTextA text to compare
* @param licenseTextB text to compare
* @return true if the license text is equivalent
*/
public static bool isLicenseTextEquivalent(string licenseTextA, string licenseTextB)
{
// Need to take care of multi-word equivalent words - convert to single words with hyphens

// tokenize each of the strings
if (licenseTextA == null)
{
return string.IsNullOrEmpty(licenseTextB);
}
if (licenseTextB == null)
{
return string.IsNullOrEmpty(licenseTextA);
}
if (licenseTextA.Equals(licenseTextB))
{
return true;
}
IDictionary<int, LineColumn> tokenToLocationA = new Dictionary<int, LineColumn>();
IDictionary<int, LineColumn> tokenToLocationB = new Dictionary<int, LineColumn>();
IReadOnlyList<string> licenseATokens = tokenizeLicenseText(licenseTextA, tokenToLocationA);
IReadOnlyList<string> licenseBTokens = tokenizeLicenseText(licenseTextB, tokenToLocationB);
int bTokenCounter = 0;
int aTokenCounter = 0;
string? nextAToken = getTokenAt(licenseATokens, aTokenCounter++);
string? nextBToken = getTokenAt(licenseBTokens, bTokenCounter++);
while (nextAToken != null)
{
if (nextBToken == null)
{
// end of b stream
while (canSkip(nextAToken))
{
nextAToken = getTokenAt(licenseATokens, aTokenCounter++);
}
if (nextAToken != null)
{
return false; // there is more stuff in the license text B, so not equal
}
}
else if (tokensEquivalent(nextAToken, nextBToken))
{
// just move onto the next set of tokens
nextAToken = getTokenAt(licenseATokens, aTokenCounter++);
nextBToken = getTokenAt(licenseBTokens, bTokenCounter++);
}
else
{
// see if we can skip through some B tokens to find a match
while (canSkip(nextBToken))
{
nextBToken = getTokenAt(licenseBTokens, bTokenCounter++);
}
// just to be sure, skip forward on the A license
while (canSkip(nextAToken))
{
nextAToken = getTokenAt(licenseATokens, aTokenCounter++);
}
if (!tokensEquivalent(nextAToken, nextBToken))
{
return false;
}
else
{
nextAToken = getTokenAt(licenseATokens, aTokenCounter++);
nextBToken = getTokenAt(licenseBTokens, bTokenCounter++);
}
}
}
// need to make sure B is at the end
while (canSkip(nextBToken))
{
nextBToken = getTokenAt(licenseBTokens, bTokenCounter++);
}
return nextBToken == null;
}

/**
* Tokenizes the license text, normalizes quotes, lowercases and converts
* multi-words for better equiv. comparisons
Expand Down Expand Up @@ -231,7 +147,7 @@ public static IReadOnlyList<string> tokenizeLicenseText(string licenseText, IDic
{
// Don't fill in the lines, take a simpler approach
MatchCollection m = TOKEN_SPLIT_PATTERN.Matches(textToTokenize);
foreach (GroupCollection groups in m.Cast<Match>().Select(m => m.Groups))
foreach (GroupCollection groups in m.Cast<Match>().Select(match => match.Groups))
{
string word = groups[1].Value.Trim();
string separator = groups[2].Value.Trim();
Expand Down Expand Up @@ -357,9 +273,9 @@ public static string replaceMultWord(string s)
* @param s String to normalize
* @return String normalized for comparison
*/
private static readonly Regex s_singleQuotePattern = new Regex("[‘’‛‚`]", RegexOptions.Compiled);
private static readonly Regex s_doubleQuotePattern = new Regex("[“”‟„]", RegexOptions.Compiled);
private static readonly Regex s_dashPattern = new Regex("[—–]", RegexOptions.Compiled);
private static readonly Regex s_singleQuotePattern = new("[‘’‛‚`]", RegexOptions.Compiled);
private static readonly Regex s_doubleQuotePattern = new("[“”‟„]", RegexOptions.Compiled);
private static readonly Regex s_dashPattern = new("[—–]", RegexOptions.Compiled);
public static string normalizeText(string s)
{
// First normalize single quotes, then normalize two single quotes to a double quote, normalize double quotes
Expand All @@ -377,7 +293,7 @@ public static string normalizeText(string s)
* @param s Input string
* @return s without any line separators (---, ***, ===)
*/
private static readonly Regex s_removeLineSeparatorsRegex = new Regex("[-=*]{3,}\\s*$", RegexOptions.Compiled);
private static readonly Regex s_removeLineSeparatorsRegex = new("[-=*]{3,}\\s*$", RegexOptions.Compiled);
public static string removeLineSeparators(string s)
{
return s_removeLineSeparatorsRegex.Replace(s, ""); // Remove ----, ***, and ====
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ public static class SpdxLicenseTemplateHelper
{
private const string START_RULE = "<<";
private const string END_RULE = ">>";
public static readonly Regex RULE_PATTERN = new Regex(START_RULE + "\\s*(beginOptional|endOptional|var)", RegexOptions.Compiled);
public static readonly Regex END_RULE_PATTERN = new Regex(END_RULE, RegexOptions.Compiled);
public static readonly Regex RULE_PATTERN = new(START_RULE + "\\s*(beginOptional|endOptional|var)", RegexOptions.Compiled);
public static readonly Regex END_RULE_PATTERN = new(END_RULE, RegexOptions.Compiled);

/**
* Parses the license template calling the templateOutputHandler for any text
Expand All @@ -36,13 +36,13 @@ public static class SpdxLicenseTemplateHelper
*/
public static void parseTemplate(string licenseTemplate, ILicenseTemplateOutputHandler templateOutputHandler)
{
IEnumerator<Match> ruleMatcher = RULE_PATTERN.Matches(licenseTemplate).Cast<Match>().GetEnumerator();
using IEnumerator<Match> ruleMatcher = RULE_PATTERN.Matches(licenseTemplate).Cast<Match>().GetEnumerator();
int end = 0;
int optionalNestLevel = 0;
while (ruleMatcher.MoveNext())
{
// copy everything up to the start of the find
string upToTheFind = licenseTemplate.Substring(end, ruleMatcher.Current.Index - end);
string upToTheFind = licenseTemplate.Substring(end, ruleMatcher.Current!.Index - end);
if (!string.IsNullOrWhiteSpace(upToTheFind))
{
templateOutputHandler.text(upToTheFind);
Expand All @@ -55,7 +55,7 @@ public static void parseTemplate(string licenseTemplate, ILicenseTemplateOutputH
end = endMatch.Index + endMatch.Length;
string ruleString = licenseTemplate.Substring(ruleMatcher.Current.Index + START_RULE.Length, end - END_RULE.Length - ruleMatcher.Current.Index - START_RULE.Length);

LicenseTemplateRule rule = new LicenseTemplateRule(ruleString);
LicenseTemplateRule rule = new(ruleString);
if (rule.Type == LicenseTemplateRule.RuleType.VARIABLE)
{
templateOutputHandler.variableRule(rule);
Expand All @@ -78,7 +78,7 @@ public static void parseTemplate(string licenseTemplate, ILicenseTemplateOutputH
else
{
throw new LicenseTemplateRuleException(
"Unrecognized rule: " + rule.Type.ToString() + " after text '" + upToTheFind + "'");
"Unrecognized rule: " + rule.Type + " after text '" + upToTheFind + "'");
}
}
if (optionalNestLevel > 0)
Expand Down
Loading