From c882099a320b1972975a6704efc28937b9e21fe3 Mon Sep 17 00:00:00 2001 From: "Arend van Beelen jr." Date: Mon, 15 Jul 2024 22:00:32 +0200 Subject: [PATCH] fix(grit): leaf node normalization --- crates/biome_grit_patterns/src/errors.rs | 7 ++ .../src/grit_node_patterns.rs | 23 +++-- .../src/grit_target_language.rs | 88 +++++++++++++++++++ .../js_target_language.rs | 32 ++++++- .../src/pattern_compiler/snippet_compiler.rs | 38 ++++++-- .../tests/specs/ts/strings.grit | 1 + .../tests/specs/ts/strings.snap | 35 ++++++++ .../tests/specs/ts/strings.ts | 4 + 8 files changed, 215 insertions(+), 13 deletions(-) create mode 100644 crates/biome_grit_patterns/tests/specs/ts/strings.grit create mode 100644 crates/biome_grit_patterns/tests/specs/ts/strings.snap create mode 100644 crates/biome_grit_patterns/tests/specs/ts/strings.ts diff --git a/crates/biome_grit_patterns/src/errors.rs b/crates/biome_grit_patterns/src/errors.rs index 1b9d47485fef..d73d7a0c7d34 100644 --- a/crates/biome_grit_patterns/src/errors.rs +++ b/crates/biome_grit_patterns/src/errors.rs @@ -66,6 +66,10 @@ pub enum CompileError { /// A pattern is required to compile a Grit query. MissingPattern, + /// A node inside a code snippet failed to be normalized for its + /// equivalence class. + NormalizationError, + /// Bracketed metavariables are only allowed on the right-hand side of /// rewrite. InvalidBracketedMetavariable, @@ -135,6 +139,9 @@ impl Diagnostic for CompileError { fmt.write_markup(markup! { "Literal value out of range: "{{value}} }) } CompileError::MissingPattern => fmt.write_markup(markup! { "Missing pattern" }), + CompileError::NormalizationError => { + fmt.write_markup(markup! { "Could not normalize node in code snippet" }) + } CompileError::InvalidBracketedMetavariable => { fmt.write_markup(markup! { "Invalid bracketed metavariable" }) } diff --git a/crates/biome_grit_patterns/src/grit_node_patterns.rs b/crates/biome_grit_patterns/src/grit_node_patterns.rs index 3daaa33d749a..4d2ef49271c9 100644 --- a/crates/biome_grit_patterns/src/grit_node_patterns.rs +++ b/crates/biome_grit_patterns/src/grit_node_patterns.rs @@ -1,6 +1,8 @@ use crate::grit_context::{GritExecContext, GritQueryContext}; use crate::grit_resolved_pattern::GritResolvedPattern; +use crate::grit_target_language::LeafEquivalenceClass; use crate::grit_target_node::{GritTargetNode, GritTargetSyntaxKind}; +use crate::{CompileError, GritTargetLanguage}; use anyhow::Result; use grit_pattern_matcher::binding::Binding; use grit_pattern_matcher::context::ExecContext; @@ -119,15 +121,23 @@ impl GritNodePatternArg { #[derive(Clone, Debug)] pub struct GritLeafNodePattern { kind: GritTargetSyntaxKind, + equivalence_class: Option, text: String, } impl GritLeafNodePattern { - pub fn new(kind: GritTargetSyntaxKind, text: impl Into) -> Self { - Self { + pub fn new( + kind: GritTargetSyntaxKind, + text: impl Into, + lang: &GritTargetLanguage, + ) -> Result { + let text = text.into(); + let equivalence_class = lang.get_equivalence_class(kind, &text)?; + Ok(Self { kind, - text: text.into(), - } + equivalence_class, + text, + }) } } @@ -148,8 +158,9 @@ impl Matcher for GritLeafNodePattern { let Some(node) = binding.get_last_binding().and_then(Binding::singleton) else { return Ok(false); }; - // TODO: Implement leaf node normalization. - if self.kind != node.kind() { + if let Some(class) = &self.equivalence_class { + Ok(class.are_equivalent(node.kind(), node.text())) + } else if self.kind != node.kind() { Ok(false) } else { Ok(node.text() == self.text) diff --git a/crates/biome_grit_patterns/src/grit_target_language.rs b/crates/biome_grit_patterns/src/grit_target_language.rs index ca0a6382532e..81f995890ca9 100644 --- a/crates/biome_grit_patterns/src/grit_target_language.rs +++ b/crates/biome_grit_patterns/src/grit_target_language.rs @@ -6,6 +6,7 @@ pub use js_target_language::JsTargetLanguage; use crate::grit_js_parser::GritJsParser; use crate::grit_target_node::{GritTargetNode, GritTargetSyntaxKind}; use crate::grit_tree::GritTargetTree; +use crate::CompileError; use biome_rowan::SyntaxKind; use grit_util::{AnalysisLogs, Ast, CodeRange, EffectRange, Language, Parser, SnippetTree}; use std::borrow::Cow; @@ -72,6 +73,16 @@ macro_rules! generate_target_language { $(Self::$language(_) => $language::is_comment_kind(kind)),+ } } + + pub fn get_equivalence_class( + &self, + kind: GritTargetSyntaxKind, + text: &str, + ) -> Result, CompileError> { + match self { + $(Self::$language(lang) => lang.get_equivalence_class(kind, text)),+ + } + } } impl Language for GritTargetLanguage { @@ -254,6 +265,18 @@ trait GritTargetLanguageImpl { fn is_alternative_metavariable_kind(_kind: GritTargetSyntaxKind) -> bool { false } + + /// Returns an optional "equivalence class" for the given syntax kind. + /// + /// Equivalence classes allow leaf nodes to be classified as being equal, + /// even when their text representations or syntax kinds differ. + fn get_equivalence_class( + &self, + _kind: GritTargetSyntaxKind, + _text: &str, + ) -> Result, CompileError> { + Ok(None) + } } pub trait GritTargetParser: Parser { @@ -265,3 +288,68 @@ pub trait GritTargetParser: Parser { logs: &mut AnalysisLogs, ) -> Option; } + +#[derive(Clone, Debug)] +pub struct LeafEquivalenceClass { + representative: String, + class: Vec, +} + +impl LeafEquivalenceClass { + pub fn are_equivalent(&self, kind: GritTargetSyntaxKind, text: &str) -> bool { + self.class + .iter() + .find(|eq| eq.kind == kind) + .is_some_and(|normalizer| { + normalizer + .normalize(text) + .is_some_and(|s| s == self.representative) + }) + } + + pub(crate) fn new( + representative: &str, + kind: GritTargetSyntaxKind, + members: &[LeafNormalizer], + ) -> Result, CompileError> { + if let Some(normalizer) = members.iter().find(|norm| norm.kind == kind) { + let rep = normalizer + .normalize(representative) + .ok_or(CompileError::NormalizationError)?; + Ok(Some(Self { + representative: rep.to_owned(), + class: members.to_owned(), + })) + } else { + Ok(None) + } + } +} + +#[derive(Clone, Debug)] +pub(crate) struct LeafNormalizer { + kind: GritTargetSyntaxKind, + normalizer: fn(&str) -> Option<&str>, +} + +impl LeafNormalizer { + fn normalize<'a>(&self, s: &'a str) -> Option<&'a str> { + (self.normalizer)(s) + } + + pub(crate) const fn new( + kind: GritTargetSyntaxKind, + normalizer: fn(&str) -> Option<&str>, + ) -> Self { + Self { kind, normalizer } + } + + pub(crate) fn kind(&self) -> GritTargetSyntaxKind { + self.kind + } +} + +fn normalize_quoted_string(string: &str) -> Option<&str> { + // Strip the quotes, regardless of type: + (string.len() >= 2).then(|| &string[1..string.len() - 1]) +} diff --git a/crates/biome_grit_patterns/src/grit_target_language/js_target_language.rs b/crates/biome_grit_patterns/src/grit_target_language/js_target_language.rs index 5ad0e142436f..4a420f0f0fb1 100644 --- a/crates/biome_grit_patterns/src/grit_target_language/js_target_language.rs +++ b/crates/biome_grit_patterns/src/grit_target_language/js_target_language.rs @@ -1,5 +1,7 @@ -use super::GritTargetLanguageImpl; -use crate::grit_target_node::GritTargetSyntaxKind; +use super::{ + normalize_quoted_string, GritTargetLanguageImpl, LeafEquivalenceClass, LeafNormalizer, +}; +use crate::{grit_target_node::GritTargetSyntaxKind, CompileError}; use biome_js_syntax::{JsLanguage, JsSyntaxKind}; use biome_rowan::{RawSyntaxKind, SyntaxKindSet}; @@ -8,6 +10,17 @@ const COMMENT_KINDS: SyntaxKindSet = SyntaxKindSet::from_raw(RawSyntaxKind(JsSyntaxKind::MULTILINE_COMMENT as u16)), ); +const EQUIVALENT_LEAF_NODES: &[&[LeafNormalizer]] = &[&[ + LeafNormalizer::new( + GritTargetSyntaxKind::JsSyntaxKind(JsSyntaxKind::JS_STRING_LITERAL), + normalize_quoted_string, + ), + LeafNormalizer::new( + GritTargetSyntaxKind::JsSyntaxKind(JsSyntaxKind::JS_STRING_LITERAL_EXPRESSION), + normalize_quoted_string, + ), +]]; + #[derive(Clone, Debug)] pub struct JsTargetLanguage; @@ -120,4 +133,19 @@ impl GritTargetLanguageImpl for JsTargetLanguage { || kind == JsSyntaxKind::TS_TEMPLATE_ELEMENT_LIST }) } + + fn get_equivalence_class( + &self, + kind: GritTargetSyntaxKind, + text: &str, + ) -> Result, CompileError> { + if let Some(class) = EQUIVALENT_LEAF_NODES + .iter() + .find(|v| v.iter().any(|normalizer| normalizer.kind() == kind)) + { + LeafEquivalenceClass::new(text, kind, class) + } else { + Ok(None) + } + } } diff --git a/crates/biome_grit_patterns/src/pattern_compiler/snippet_compiler.rs b/crates/biome_grit_patterns/src/pattern_compiler/snippet_compiler.rs index e1436e601e5c..f7f1809afdaf 100644 --- a/crates/biome_grit_patterns/src/pattern_compiler/snippet_compiler.rs +++ b/crates/biome_grit_patterns/src/pattern_compiler/snippet_compiler.rs @@ -215,9 +215,8 @@ fn pattern_from_node( if !node.has_children() { let content = node.text(); - let pattern = if let Some(regex_pattern) = context - .compilation - .lang + let lang = &context.compilation.lang; + let pattern = if let Some(regex_pattern) = lang .matches_replaced_metavariable(content) .then(|| implicit_metavariable_regex(node, context_range, range_map, context)) .transpose()? @@ -225,7 +224,7 @@ fn pattern_from_node( { Pattern::Regex(Box::new(regex_pattern)) } else { - Pattern::AstLeafNode(GritLeafNodePattern::new(node.kind(), content)) + Pattern::AstLeafNode(GritLeafNodePattern::new(node.kind(), content, lang)?) }; return Ok(pattern); @@ -502,6 +501,7 @@ mod tests { JsTargetLanguage, }; use grit_util::Parser; + use regex::Regex; #[test] fn test_node_from_tree() { @@ -591,7 +591,11 @@ mod tests { let pattern = pattern_from_node(&node, range, &range_map, &mut context, false) .expect("cannot compile pattern from node"); let formatted = format!("{pattern:#?}"); - insta::assert_snapshot!(&formatted, @r###" + let snapshot = Regex::new("normalizer: 0x[0-9a-f]{16}") + .unwrap() + .replace_all(&formatted, "normalizer: [address redacted]"); + + insta::assert_snapshot!(&snapshot, @r###" AstNode( GritNodePattern { kind: JsSyntaxKind( @@ -621,6 +625,7 @@ mod tests { kind: JsSyntaxKind( JS_REFERENCE_IDENTIFIER, ), + equivalence_class: None, text: "console", }, ), @@ -636,6 +641,7 @@ mod tests { kind: JsSyntaxKind( DOT, ), + equivalence_class: None, text: ".", }, ), @@ -647,6 +653,7 @@ mod tests { kind: JsSyntaxKind( JS_NAME, ), + equivalence_class: None, text: "log", }, ), @@ -698,6 +705,7 @@ mod tests { kind: JsSyntaxKind( L_PAREN, ), + equivalence_class: None, text: "(", }, ), @@ -712,6 +720,25 @@ mod tests { kind: JsSyntaxKind( JS_STRING_LITERAL_EXPRESSION, ), + equivalence_class: Some( + LeafEquivalenceClass { + representative: "hello", + class: [ + LeafNormalizer { + kind: JsSyntaxKind( + JS_STRING_LITERAL, + ), + normalizer: [address redacted], + }, + LeafNormalizer { + kind: JsSyntaxKind( + JS_STRING_LITERAL_EXPRESSION, + ), + normalizer: [address redacted], + }, + ], + }, + ), text: "'hello'", }, ), @@ -726,6 +753,7 @@ mod tests { kind: JsSyntaxKind( R_PAREN, ), + equivalence_class: None, text: ")", }, ), diff --git a/crates/biome_grit_patterns/tests/specs/ts/strings.grit b/crates/biome_grit_patterns/tests/specs/ts/strings.grit new file mode 100644 index 000000000000..a152e880a48f --- /dev/null +++ b/crates/biome_grit_patterns/tests/specs/ts/strings.grit @@ -0,0 +1 @@ +`"foo"` diff --git a/crates/biome_grit_patterns/tests/specs/ts/strings.snap b/crates/biome_grit_patterns/tests/specs/ts/strings.snap new file mode 100644 index 000000000000..0ca28cb90405 --- /dev/null +++ b/crates/biome_grit_patterns/tests/specs/ts/strings.snap @@ -0,0 +1,35 @@ +--- +source: crates/biome_grit_patterns/tests/spec_tests.rs +expression: strings +--- +SnapshotResult { + messages: [], + matched_ranges: [ + Range { + start: Position { + line: 2, + column: 11, + }, + end: Position { + line: 2, + column: 16, + }, + start_byte: 11, + end_byte: 16, + }, + Range { + start: Position { + line: 3, + column: 11, + }, + end: Position { + line: 3, + column: 16, + }, + start_byte: 28, + end_byte: 33, + }, + ], + rewritten_files: [], + created_files: [], +} diff --git a/crates/biome_grit_patterns/tests/specs/ts/strings.ts b/crates/biome_grit_patterns/tests/specs/ts/strings.ts new file mode 100644 index 000000000000..54ed4702ba52 --- /dev/null +++ b/crates/biome_grit_patterns/tests/specs/ts/strings.ts @@ -0,0 +1,4 @@ + +const a = 'foo'; +const b = "foo"; +const c = `foo`;