From 8702334374a051dd44d3b97023c13439726022e5 Mon Sep 17 00:00:00 2001 From: hippietrail Date: Mon, 10 Feb 2025 13:23:30 +0800 Subject: [PATCH 1/2] feat: split triggers in `matcher.rs` into categories with explanations I know the logic here is up for replacement but I think this categorization will still help in various ways: It will help elucidate different classes and types of replacements. It will help us see which categories will be amenable to various kinds of automation. It will help curators adding more lints in the meantime. It also shows that some lints combine multiple kinds of problems while others don't fit well into any category. There is some fuzziness. I first sorted them alphabetically then grouped them, so the groups themselves are more in a kind of alphabetical order than any semantic order. --- harper-core/src/linting/matcher.rs | 188 +++++++++++++++++++---------- 1 file changed, 125 insertions(+), 63 deletions(-) diff --git a/harper-core/src/linting/matcher.rs b/harper-core/src/linting/matcher.rs index 71ffb825..f3735ea2 100644 --- a/harper-core/src/linting/matcher.rs +++ b/harper-core/src/linting/matcher.rs @@ -101,90 +101,152 @@ impl Matcher { pub fn new() -> Self { // This match list needs to be automatically expanded instead of explicitly // defined like it is now. - let mut triggers = pt! { + let mut triggers = Vec::new(); + + // stylistic improvements + triggers.extend(pt! { + "all", "of", "the" => "all the", + "and","also" => "and" + }); + + // phrase typos, each word passes spellcheck but one word is wrong + triggers.extend(pt! { + "an","in" => "and in", + "bee","there" => "been there", + "can","be","seem" => "can be seen", + "eight","grade" => "eighth grade", + "gong","to" => "going to", + "I","a","m" => "I am", + "It","cam" => "It can", + "kid","regards" => "kind regards", + "mu","house" => "my house", + "no","to" => "not to", + "No","to" => "not to", "spacial","attention" => "special attention", - "wellbeing" => "well-being", - "hashtable" => "hash table", - "hashmap" => "hash map", + "the", "this" => "that this", + "The","re" => "There", + "There","fore" => "Therefore", + "though", "process" => "thought process", + "We","a","re" => "We are", + "you","r" => "your", + "you","re" => "you're" + }); + + // phrase capitalization + triggers.extend(pt! { + "black","sea" => "Black Sea", + "geiger","counter" => "Geiger counter", + "my","french" => "my French" + }); + + // hyphenate phrasal adjectives + triggers.extend(pt! { + "case", "sensitive" => "case-sensitive", + "ngram" => "n-gram", + "off","the","cuff" => "off-the-cuff", + "Tree", "sitter" => "Tree-sitter", + "wellbeing" => "well-being" + }); + + // expand abbreviations + triggers.extend(pt! { "dep" => "dependency", "deps" => "dependencies", - "off","the","cuff" => "off-the-cuff", - "an","in" => "and in", - "my","self" => "myself", - "eight","grade" => "eighth grade", - "and","also" => "and", - "todo" => "to-do", - "To-Do" => "To-do", - "performing","this" => "perform this", - "mins" => "minutes", - "min" => "minute", + "hr" => "hour", + "hrs" => "hours", "min" => "minimum", - "secs" => "seconds", + "min" => "minute", + "mins" => "minutes", + "ms" => "milliseconds", "sec" => "second", - "hrs" => "hours", - "hr" => "hour", - "w/o" => "without", - "w/" => "with", - "wordlist" => "word list", - "the","challenged" => "that challenged", + "secs" => "seconds", "stdin" => "standard input", "stdout" => "standard output", - "no","to" => "not to", - "No","to" => "not to", - "ngram" => "n-gram", - "grammer" => "grammar", - "There","fore" => "Therefore", - "fatal","outcome" => "death", - "geiger","counter" => "Geiger counter", - "world","war","2" => "World War II", - "World","war","ii" => "World War II", - "world","War","ii" => "World War II", - "World","War","Ii" => "World War II", - "World","War","iI" => "World War II", - "black","sea" => "Black Sea", - "I","a","m" => "I am", - "We","a","re" => "We are", - "The","re" => "There", - "my","french" => "my French", - "It","cam" => "It can", - "can","be","seem" => "can be seen", - "mu","house" => "my house", - "kid","regards" => "kind regards", + "w/" => "with", + "w/o" => "without" + }); + + // replace euphemisms + triggers.extend(pt! { + "fatal","outcome" => "death" + }); + + // spellos + triggers.extend(pt! { + "grammer" => "grammar" + }); + + // expand compound words + triggers.extend(pt! { + "hashmap" => "hash map", + "hashtable" => "hash table", + "wordlist" => "word list" + }); + + // prefixes written as separate words + triggers.extend(pt! { "miss","understand" => "misunderstand", "miss","use" => "misuse", "miss","used" => "misused", - "bee","there" => "been there", - "want","be" => "won't be", + "my","self" => "myself" + }); + + // mixing up than/then in context + triggers.extend(pt! { "more","then" => "more than", - "gong","to" => "going to", - "then","others" => "than others", - "Then","others" => "than others", "then","before" => "than before", "Then","before" => "than before", - "then","last","week" => "than last week", "then","her" => "than her", "then","hers" => "than hers", "then","him" => "than him", "then","his" => "than his", + "then","last","week" => "than last week", + "then","others" => "than others", + "Then","others" => "than others" + }); + + // not a perfect fit for any of the other categories + triggers.extend(pt! { + "performing","this" => "perform this", "simply","grammatical" => "simple grammatical", - "you","r" => "your", - "you","re" => "you're", - "that","s" => "that's", - "That","s" => "That's", - "that","s" => "that is", - "That","s" => "that is", - "ms" => "milliseconds", - "case", "sensitive" => "case-sensitive", - "Tree", "sitter" => "Tree-sitter", - "all", "of", "the" => "all the", + "the","challenged" => "that challenged", "to", "towards" => "towards", - "though", "process" => "thought process", - "the", "this" => "that this", - "same", "than" => "same as", - "Same", "than" => "same as", + "To-Do" => "To-do", + "todo" => "to-do" + }); + + // wrong set phrases and collocations + triggers.extend(pt! { + "same", "than" => "same as", + "Same", "than" => "same as" }); + + // belonging to multiple of the other categories + triggers.extend(pt! { "same", "then" => "same as", "Same", "then" => "same as" - }; + }); + + // suffixes written as separate words + triggers.extend(pt! { + "that","s" => "that is", + "That","s" => "that is", + "that","s" => "that's", + "That","s" => "That's" + }); + + // near homophones + triggers.extend(pt! { + "want","be" => "won't be" + }); + + // normalization + triggers.extend(pt! { + "world","war","2" => "World War II", + "world","War","ii" => "World War II", + "World","war","ii" => "World War II", + "World","War","iI" => "World War II", + "World","War","Ii" => "World War II" + }); triggers.push(Rule { pattern: vec![pt!("L"), pt!(Period), pt!("L"), pt!(Period), pt!("M")], From 93f2736e4ddfbd08c0849133346a50f1a7be6fd4 Mon Sep 17 00:00:00 2001 From: hippietrail Date: Mon, 10 Feb 2025 13:31:34 +0800 Subject: [PATCH 2/2] chore: fmt --- harper-core/src/linting/matcher.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/harper-core/src/linting/matcher.rs b/harper-core/src/linting/matcher.rs index f3735ea2..dc83c521 100644 --- a/harper-core/src/linting/matcher.rs +++ b/harper-core/src/linting/matcher.rs @@ -217,8 +217,9 @@ impl Matcher { // wrong set phrases and collocations triggers.extend(pt! { - "same", "than" => "same as", - "Same", "than" => "same as" }); + "same", "than" => "same as", + "Same", "than" => "same as" + }); // belonging to multiple of the other categories triggers.extend(pt! {