From c1e94836e834e4102500d85ec51161ebd9605895 Mon Sep 17 00:00:00 2001 From: Victorien Elvinger Date: Thu, 11 Jul 2024 17:28:01 +0200 Subject: [PATCH 1/2] fix(deserialize): unescape JSON strings --- crates/biome_deserialize/src/impls.rs | 26 +++++++--- crates/biome_deserialize/src/json.rs | 23 +++++++-- .../invalidCustomRegexAnchor.js | 23 +++++++++ .../invalidCustomRegexAnchor.js.snap | 47 +++++++++++++++++++ .../invalidCustomRegexAnchor.options.json | 23 +++++++++ .../validCustomStyleDollarSuffix.js | 1 + .../validCustomStyleDollarSuffix.js.snap | 8 ++++ .../validCustomStyleDollarSuffix.options.json | 23 +++++++++ 8 files changed, 162 insertions(+), 12 deletions(-) create mode 100644 crates/biome_js_analyze/tests/specs/style/useNamingConvention/invalidCustomRegexAnchor.js create mode 100644 crates/biome_js_analyze/tests/specs/style/useNamingConvention/invalidCustomRegexAnchor.js.snap create mode 100644 crates/biome_js_analyze/tests/specs/style/useNamingConvention/invalidCustomRegexAnchor.options.json create mode 100644 crates/biome_js_analyze/tests/specs/style/useNamingConvention/validCustomStyleDollarSuffix.js create mode 100644 crates/biome_js_analyze/tests/specs/style/useNamingConvention/validCustomStyleDollarSuffix.js.snap create mode 100644 crates/biome_js_analyze/tests/specs/style/useNamingConvention/validCustomStyleDollarSuffix.options.json diff --git a/crates/biome_deserialize/src/impls.rs b/crates/biome_deserialize/src/impls.rs index 98d6b95e9f96..f215f0402763 100644 --- a/crates/biome_deserialize/src/impls.rs +++ b/crates/biome_deserialize/src/impls.rs @@ -16,27 +16,39 @@ use std::{ path::PathBuf, }; -/// Type that allows deserializing a string without heap-allocation. +/// Type that allows deserializing a string without heap-allocation when possible. +/// This is analog to [std::borrow::Cow]: #[derive(Debug, Eq, PartialEq, Hash, Clone)] -pub struct Text(pub(crate) TokenText); +pub enum Text { + Borrowed(TokenText), + Owned(String), +} impl Text { pub fn text(&self) -> &str { - self.0.text() + match self { + Text::Borrowed(token_text) => token_text.text(), + Text::Owned(string) => string, + } + } +} +impl From for String { + fn from(value: Text) -> Self { + match value { + Text::Borrowed(token_text) => token_text.text().to_string(), + Text::Owned(string) => string, + } } } - impl PartialOrd for Text { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } - impl Ord for Text { fn cmp(&self, other: &Self) -> std::cmp::Ordering { self.text().cmp(other.text()) } } - impl Deref for Text { type Target = str; fn deref(&self) -> &Self::Target { @@ -482,7 +494,7 @@ impl Deserializable for String { name: &str, diagnostics: &mut Vec, ) -> Option { - Text::deserialize(value, name, diagnostics).map(|value| value.text().to_string()) + Text::deserialize(value, name, diagnostics).map(|value| value.into()) } } diff --git a/crates/biome_deserialize/src/json.rs b/crates/biome_deserialize/src/json.rs index 4b087b0413f6..2d84827cce54 100644 --- a/crates/biome_deserialize/src/json.rs +++ b/crates/biome_deserialize/src/json.rs @@ -6,7 +6,7 @@ use crate::{ use biome_diagnostics::{DiagnosticExt, Error}; use biome_json_parser::{parse_json, JsonParserOptions}; use biome_json_syntax::{AnyJsonValue, JsonMemberName, JsonRoot, T}; -use biome_rowan::{AstNode, AstSeparatedList}; +use biome_rowan::{AstNode, AstSeparatedList, TokenText}; /// It attempts to parse and deserialize a source file in JSON. Diagnostics from the parse phase /// are consumed and joined with the diagnostics emitted during the deserialization. @@ -119,8 +119,8 @@ impl DeserializableValue for AnyJsonValue { visitor.visit_map(members, range, name, diagnostics) } AnyJsonValue::JsonStringValue(value) => { - let value = value.inner_string_text().ok()?; - visitor.visit_str(Text(value), range, name, diagnostics) + let value = unescape_json(value.inner_string_text().ok()?); + visitor.visit_str(value, range, name, diagnostics) } } } @@ -245,8 +245,8 @@ impl DeserializableValue for JsonMemberName { name: &str, diagnostics: &mut Vec, ) -> Option { - let value = self.inner_string_text().ok()?; - visitor.visit_str(Text(value), AstNode::range(self), name, diagnostics) + let value = unescape_json(self.inner_string_text().ok()?); + visitor.visit_str(value, AstNode::range(self), name, diagnostics) } fn visitable_type(&self) -> Option { @@ -254,6 +254,19 @@ impl DeserializableValue for JsonMemberName { } } +/// Rteurns an unescaped version of `s`. +/// If nothing is escaped, then `s` is returned without any allocation. +/// If at lest one character is escaped, then a string is allocated and hold the unescaped string. +fn unescape_json(s: TokenText) -> Text { + if s.text().bytes().any(|c| c == b'\\') { + // Searching and replacing at the same time should be more optimal. + // However, strings are expected to be small and escapees are expected to be rare. + Text::Owned(s.text().replace(r"\\", r"\")) + } else { + Text::Borrowed(s) + } +} + #[cfg(test)] mod tests { use std::{ diff --git a/crates/biome_js_analyze/tests/specs/style/useNamingConvention/invalidCustomRegexAnchor.js b/crates/biome_js_analyze/tests/specs/style/useNamingConvention/invalidCustomRegexAnchor.js new file mode 100644 index 000000000000..350f5a00e509 --- /dev/null +++ b/crates/biome_js_analyze/tests/specs/style/useNamingConvention/invalidCustomRegexAnchor.js @@ -0,0 +1,23 @@ +{ + "$schema": "../../../../../../packages/@biomejs/biome/configuration_schema.json", + "linter": { + "rules": { + "style": { + "useNamingConvention": { + "level": "error", + "options": { + "conventions": [ + { + "selector": { + "kind": "const" + }, + "match": "(.*?)[$]?", + "formats": ["camelCase"] + } + ] + } + } + } + } + } +} diff --git a/crates/biome_js_analyze/tests/specs/style/useNamingConvention/invalidCustomRegexAnchor.js.snap b/crates/biome_js_analyze/tests/specs/style/useNamingConvention/invalidCustomRegexAnchor.js.snap new file mode 100644 index 000000000000..a445fdcd1ad1 --- /dev/null +++ b/crates/biome_js_analyze/tests/specs/style/useNamingConvention/invalidCustomRegexAnchor.js.snap @@ -0,0 +1,47 @@ +--- +source: crates/biome_js_analyze/tests/spec_tests.rs +expression: invalidCustomRegexAnchor.js +--- +# Input +```jsx +{ + "$schema": "../../../../../../packages/@biomejs/biome/configuration_schema.json", + "linter": { + "rules": { + "style": { + "useNamingConvention": { + "level": "error", + "options": { + "conventions": [ + { + "selector": { + "kind": "const" + }, + "match": "(.*?)[$]?", + "formats": ["camelCase"] + } + ] + } + } + } + } + } +} + +``` + +# Diagnostics +``` +invalidCustomRegexAnchor.options:14:18 deserialize ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + + × Anchors `^` and `$` are not supported. They are implciitly present. + + 12 │ "kind": "const" + 13 │ }, + > 14 │ "match": "(.*?)$", + │ ^^^^^^^^ + 15 │ "formats": ["camelCase"] + 16 │ } + + +``` diff --git a/crates/biome_js_analyze/tests/specs/style/useNamingConvention/invalidCustomRegexAnchor.options.json b/crates/biome_js_analyze/tests/specs/style/useNamingConvention/invalidCustomRegexAnchor.options.json new file mode 100644 index 000000000000..2c41db923ee6 --- /dev/null +++ b/crates/biome_js_analyze/tests/specs/style/useNamingConvention/invalidCustomRegexAnchor.options.json @@ -0,0 +1,23 @@ +{ + "$schema": "../../../../../../packages/@biomejs/biome/configuration_schema.json", + "linter": { + "rules": { + "style": { + "useNamingConvention": { + "level": "error", + "options": { + "conventions": [ + { + "selector": { + "kind": "const" + }, + "match": "(.*?)$", + "formats": ["camelCase"] + } + ] + } + } + } + } + } +} diff --git a/crates/biome_js_analyze/tests/specs/style/useNamingConvention/validCustomStyleDollarSuffix.js b/crates/biome_js_analyze/tests/specs/style/useNamingConvention/validCustomStyleDollarSuffix.js new file mode 100644 index 000000000000..ae00e20bb582 --- /dev/null +++ b/crates/biome_js_analyze/tests/specs/style/useNamingConvention/validCustomStyleDollarSuffix.js @@ -0,0 +1 @@ +const x$ = 0; \ No newline at end of file diff --git a/crates/biome_js_analyze/tests/specs/style/useNamingConvention/validCustomStyleDollarSuffix.js.snap b/crates/biome_js_analyze/tests/specs/style/useNamingConvention/validCustomStyleDollarSuffix.js.snap new file mode 100644 index 000000000000..13e582506c89 --- /dev/null +++ b/crates/biome_js_analyze/tests/specs/style/useNamingConvention/validCustomStyleDollarSuffix.js.snap @@ -0,0 +1,8 @@ +--- +source: crates/biome_js_analyze/tests/spec_tests.rs +expression: validCustomStyleDollarSuffix.js +--- +# Input +```jsx +const x$ = 0; +``` diff --git a/crates/biome_js_analyze/tests/specs/style/useNamingConvention/validCustomStyleDollarSuffix.options.json b/crates/biome_js_analyze/tests/specs/style/useNamingConvention/validCustomStyleDollarSuffix.options.json new file mode 100644 index 000000000000..e04ede40383e --- /dev/null +++ b/crates/biome_js_analyze/tests/specs/style/useNamingConvention/validCustomStyleDollarSuffix.options.json @@ -0,0 +1,23 @@ +{ + "$schema": "../../../../../../packages/@biomejs/biome/configuration_schema.json", + "linter": { + "rules": { + "style": { + "useNamingConvention": { + "level": "error", + "options": { + "conventions": [ + { + "selector": { + "kind": "const" + }, + "match": "(.*?)\\$", + "formats": ["camelCase"] + } + ] + } + } + } + } + } +} From a52e62e0c887075517d39610698320c0aa508bfb Mon Sep 17 00:00:00 2001 From: Victorien Elvinger Date: Thu, 11 Jul 2024 20:50:35 +0200 Subject: [PATCH 2/2] Apply suggestions from code review Co-authored-by: Emanuele Stoppa --- crates/biome_deserialize/src/json.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/biome_deserialize/src/json.rs b/crates/biome_deserialize/src/json.rs index 2d84827cce54..b40df64da979 100644 --- a/crates/biome_deserialize/src/json.rs +++ b/crates/biome_deserialize/src/json.rs @@ -254,9 +254,9 @@ impl DeserializableValue for JsonMemberName { } } -/// Rteurns an unescaped version of `s`. +/// Returns an unescaped version of `s`. /// If nothing is escaped, then `s` is returned without any allocation. -/// If at lest one character is escaped, then a string is allocated and hold the unescaped string. +/// If at least one character is escaped, then a string is allocated and holds the unescaped string. fn unescape_json(s: TokenText) -> Text { if s.text().bytes().any(|c| c == b'\\') { // Searching and replacing at the same time should be more optimal.