Skip to content

Commit

Permalink
Merge pull request #636 from hippietrail/matcher-categories
Browse files Browse the repository at this point in the history
feat: split triggers in `matcher.rs` into categories with explanations
  • Loading branch information
elijah-potter authored Feb 10, 2025
2 parents 807d32e + 93f2736 commit 058306a
Showing 1 changed file with 125 additions and 62 deletions.
187 changes: 125 additions & 62 deletions harper-core/src/linting/matcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,90 +101,153 @@ impl Matcher {
pub fn new() -> Self {
// This match list needs to be automatically expanded instead of explicitly
// defined like it is now.
let mut triggers = pt! {
let mut triggers = Vec::new();

// stylistic improvements
triggers.extend(pt! {
"all", "of", "the" => "all the",
"and","also" => "and"
});

// phrase typos, each word passes spellcheck but one word is wrong
triggers.extend(pt! {
"an","in" => "and in",
"bee","there" => "been there",
"can","be","seem" => "can be seen",
"eight","grade" => "eighth grade",
"gong","to" => "going to",
"I","a","m" => "I am",
"It","cam" => "It can",
"kid","regards" => "kind regards",
"mu","house" => "my house",
"no","to" => "not to",
"No","to" => "not to",
"spacial","attention" => "special attention",
"wellbeing" => "well-being",
"hashtable" => "hash table",
"hashmap" => "hash map",
"the", "this" => "that this",
"The","re" => "There",
"There","fore" => "Therefore",
"though", "process" => "thought process",
"We","a","re" => "We are",
"you","r" => "your",
"you","re" => "you're"
});

// phrase capitalization
triggers.extend(pt! {
"black","sea" => "Black Sea",
"geiger","counter" => "Geiger counter",
"my","french" => "my French"
});

// hyphenate phrasal adjectives
triggers.extend(pt! {
"case", "sensitive" => "case-sensitive",
"ngram" => "n-gram",
"off","the","cuff" => "off-the-cuff",
"Tree", "sitter" => "Tree-sitter",
"wellbeing" => "well-being"
});

// expand abbreviations
triggers.extend(pt! {
"dep" => "dependency",
"deps" => "dependencies",
"off","the","cuff" => "off-the-cuff",
"an","in" => "and in",
"my","self" => "myself",
"eight","grade" => "eighth grade",
"and","also" => "and",
"todo" => "to-do",
"To-Do" => "To-do",
"performing","this" => "perform this",
"mins" => "minutes",
"min" => "minute",
"hr" => "hour",
"hrs" => "hours",
"min" => "minimum",
"secs" => "seconds",
"min" => "minute",
"mins" => "minutes",
"ms" => "milliseconds",
"sec" => "second",
"hrs" => "hours",
"hr" => "hour",
"w/o" => "without",
"w/" => "with",
"wordlist" => "word list",
"the","challenged" => "that challenged",
"secs" => "seconds",
"stdin" => "standard input",
"stdout" => "standard output",
"no","to" => "not to",
"No","to" => "not to",
"ngram" => "n-gram",
"grammer" => "grammar",
"There","fore" => "Therefore",
"fatal","outcome" => "death",
"geiger","counter" => "Geiger counter",
"world","war","2" => "World War II",
"World","war","ii" => "World War II",
"world","War","ii" => "World War II",
"World","War","Ii" => "World War II",
"World","War","iI" => "World War II",
"black","sea" => "Black Sea",
"I","a","m" => "I am",
"We","a","re" => "We are",
"The","re" => "There",
"my","french" => "my French",
"It","cam" => "It can",
"can","be","seem" => "can be seen",
"mu","house" => "my house",
"kid","regards" => "kind regards",
"w/" => "with",
"w/o" => "without"
});

// replace euphemisms
triggers.extend(pt! {
"fatal","outcome" => "death"
});

// spellos
triggers.extend(pt! {
"grammer" => "grammar"
});

// expand compound words
triggers.extend(pt! {
"hashmap" => "hash map",
"hashtable" => "hash table",
"wordlist" => "word list"
});

// prefixes written as separate words
triggers.extend(pt! {
"miss","understand" => "misunderstand",
"miss","use" => "misuse",
"miss","used" => "misused",
"bee","there" => "been there",
"want","be" => "won't be",
"my","self" => "myself"
});

// mixing up than/then in context
triggers.extend(pt! {
"more","then" => "more than",
"gong","to" => "going to",
"then","others" => "than others",
"Then","others" => "than others",
"then","before" => "than before",
"Then","before" => "than before",
"then","last","week" => "than last week",
"then","her" => "than her",
"then","hers" => "than hers",
"then","him" => "than him",
"then","his" => "than his",
"then","last","week" => "than last week",
"then","others" => "than others",
"Then","others" => "than others"
});

// not a perfect fit for any of the other categories
triggers.extend(pt! {
"performing","this" => "perform this",
"simply","grammatical" => "simple grammatical",
"you","r" => "your",
"you","re" => "you're",
"that","s" => "that's",
"That","s" => "That's",
"that","s" => "that is",
"That","s" => "that is",
"ms" => "milliseconds",
"case", "sensitive" => "case-sensitive",
"Tree", "sitter" => "Tree-sitter",
"all", "of", "the" => "all the",
"the","challenged" => "that challenged",
"to", "towards" => "towards",
"though", "process" => "thought process",
"the", "this" => "that this",
"To-Do" => "To-do",
"todo" => "to-do"
});

// wrong set phrases and collocations
triggers.extend(pt! {
"same", "than" => "same as",
"Same", "than" => "same as",
"Same", "than" => "same as"
});

// belonging to multiple of the other categories
triggers.extend(pt! {
"same", "then" => "same as",
"Same", "then" => "same as"
};
});

// suffixes written as separate words
triggers.extend(pt! {
"that","s" => "that is",
"That","s" => "that is",
"that","s" => "that's",
"That","s" => "That's"
});

// near homophones
triggers.extend(pt! {
"want","be" => "won't be"
});

// normalization
triggers.extend(pt! {
"world","war","2" => "World War II",
"world","War","ii" => "World War II",
"World","war","ii" => "World War II",
"World","War","iI" => "World War II",
"World","War","Ii" => "World War II"
});

triggers.push(Rule {
pattern: vec![pt!("L"), pt!(Period), pt!("L"), pt!(Period), pt!("M")],
Expand Down

0 comments on commit 058306a

Please sign in to comment.