From 23050faba61cadbb81fd7d3d0e921f2516ac6018 Mon Sep 17 00:00:00 2001 From: leaysgur <6259812+leaysgur@users.noreply.github.com> Date: Tue, 24 Mar 2026 13:13:56 +0000 Subject: [PATCH] feat(oxfmt): Support markdown-in-js substitution (#20683) Part of #15180, and the last piece...! --- apps/oxfmt/Cargo.toml | 2 +- .../edge-cases/md-in-js/backtick-multibyte.js | 3 + .../md-in-js/nested-codeblock-in-list.js | 25 +++ apps/oxfmt/conformance/run.ts | 13 ++ .../conformance/snapshots/conformance.snap.md | 14 ++ apps/oxfmt/src-js/libs/apis.ts | 8 + apps/oxfmt/src/core/external_formatter.rs | 24 ++- .../src/prettier_compat/from_prettier_doc.rs | 145 ++++++++++++++++-- .../embedded_languages.test.ts.snap | 38 ++--- .../oxc_formatter/src/external_formatter.rs | 2 + .../src/print/template/embed/markdown.rs | 134 ++++++++++++++++ .../src/print/template/embed/mod.rs | 108 +++---------- 12 files changed, 384 insertions(+), 132 deletions(-) create mode 100644 apps/oxfmt/conformance/fixtures/edge-cases/md-in-js/backtick-multibyte.js create mode 100644 apps/oxfmt/conformance/fixtures/edge-cases/md-in-js/nested-codeblock-in-list.js create mode 100644 crates/oxc_formatter/src/print/template/embed/markdown.rs diff --git a/apps/oxfmt/Cargo.toml b/apps/oxfmt/Cargo.toml index 7e2e692eadf0d..865f8ead90e81 100644 --- a/apps/oxfmt/Cargo.toml +++ b/apps/oxfmt/Cargo.toml @@ -49,7 +49,7 @@ rayon = { workspace = true } rustc-hash = { workspace = true } schemars = { workspace = true } serde = { workspace = true } -serde_json = { workspace = true } +serde_json = { workspace = true, features = ["unbounded_depth"] } simdutf8 = { workspace = true } sort-package-json = { workspace = true } oxc-toml = { workspace = true } diff --git a/apps/oxfmt/conformance/fixtures/edge-cases/md-in-js/backtick-multibyte.js b/apps/oxfmt/conformance/fixtures/edge-cases/md-in-js/backtick-multibyte.js new file mode 100644 index 0000000000000..cc443794abcd7 --- /dev/null +++ b/apps/oxfmt/conformance/fixtures/edge-cases/md-in-js/backtick-multibyte.js @@ -0,0 +1,3 @@ +markdown` + Hello \`こんにちは\` world +` diff --git a/apps/oxfmt/conformance/fixtures/edge-cases/md-in-js/nested-codeblock-in-list.js b/apps/oxfmt/conformance/fixtures/edge-cases/md-in-js/nested-codeblock-in-list.js new file mode 100644 index 0000000000000..c3b214f8ef811 --- /dev/null +++ b/apps/oxfmt/conformance/fixtures/edge-cases/md-in-js/nested-codeblock-in-list.js @@ -0,0 +1,25 @@ +md` +- item1 + + \`\`\`js + console.log("hello"); + \`\`\` + +- item2 +` + +function f() { + return md` + - outer item + + \`\`\`js + const x = 1; + \`\`\` + + - another + - nested list + \`\`\`bash + npm install + \`\`\` + `; +} diff --git a/apps/oxfmt/conformance/run.ts b/apps/oxfmt/conformance/run.ts index d45e5bf3efa6d..969a8b5591a2f 100644 --- a/apps/oxfmt/conformance/run.ts +++ b/apps/oxfmt/conformance/run.ts @@ -112,6 +112,19 @@ const categories: Category[] = [ optionSets: [{ printWidth: 80 }, { printWidth: 100, htmlWhitespaceSensitivity: "ignore" }], notes: {}, }, + { + name: "md-in-js", + sources: [ + { + dir: join(FIXTURES_DIR, "prettier", "js/multiparser-markdown"), + ext: ".js", + excludes: ["format.test.js"], + }, + { dir: join(FIXTURES_DIR, "edge-cases", "md-in-js") }, + ], + optionSets: [{ printWidth: 80 }, { printWidth: 100, proseWrap: "always" }], + notes: {}, + }, { name: "xxx-in-js-comment", sources: [ diff --git a/apps/oxfmt/conformance/snapshots/conformance.snap.md b/apps/oxfmt/conformance/snapshots/conformance.snap.md index d0e4078e723bd..4bb072879302a 100644 --- a/apps/oxfmt/conformance/snapshots/conformance.snap.md +++ b/apps/oxfmt/conformance/snapshots/conformance.snap.md @@ -96,6 +96,20 @@ {"printWidth":100,"htmlWhitespaceSensitivity":"ignore"} ``` +## md-in-js + +### Option 1: 8/8 (100.00%) + +```json +{"printWidth":80} +``` + +### Option 2: 8/8 (100.00%) + +```json +{"printWidth":100,"proseWrap":"always"} +``` + ## xxx-in-js-comment ### Option 1: 5/5 (100.00%) diff --git a/apps/oxfmt/src-js/libs/apis.ts b/apps/oxfmt/src-js/libs/apis.ts index 7b212ebdc810a..baa483f941602 100644 --- a/apps/oxfmt/src-js/libs/apis.ts +++ b/apps/oxfmt/src-js/libs/apis.ts @@ -52,6 +52,8 @@ async function loadPrettier(): Promise { // - or flaky traversal of the `Doc` output // to extract the same information, since this hooks into the AST. formatOptionsHiddenDefaults.__onHtmlRoot = null; + // For md-in-js: Use `~` instead of `` ` `` for code fences + formatOptionsHiddenDefaults.__inJsTemplate = null; return prettierCache; } @@ -169,6 +171,12 @@ export async function formatEmbeddedDoc({ (metadata.htmlHasMultipleRootElements = (root.children?.length ?? 0) > 1); } + // md-in-js specific options: see the comment in `loadPrettier()` for rationale + if (options.parser === "markdown") { + // https://github.com/prettier/prettier/blob/90983f40dce5e20beea4e5618b5e0426a6a7f4f0/src/language-js/embed/markdown.js#L21 + options.__inJsTemplate = true; + } + // @ts-expect-error: Use internal API, but it's necessary and only way to get `Doc` const doc = await prettier.__debug.printToDoc(text, options); diff --git a/apps/oxfmt/src/core/external_formatter.rs b/apps/oxfmt/src/core/external_formatter.rs index 5cb000bfff2ae..e413611393cdd 100644 --- a/apps/oxfmt/src/core/external_formatter.rs +++ b/apps/oxfmt/src/core/external_formatter.rs @@ -5,8 +5,9 @@ use napi::{ bindgen_prelude::{FnArgs, Promise, block_on}, threadsafe_function::ThreadsafeFunction, }; +use serde::Deserialize; use serde_json::Value; -use tracing::debug_span; +use tracing::{debug, debug_span}; use oxc_formatter::{ EmbeddedDocFormatterCallback, EmbeddedFormatterCallback, ExternalCallbacks, FormatOptions, @@ -267,10 +268,8 @@ impl ExternalFormatter { code.truncate(trimmed_len); code }) - .map_err(|err| { - format!( - "Failed to format embedded code for parser '{parser_name}': {err}" - ) + .inspect_err(|err| { + debug!("Failed to format embedded code for parser '{parser_name}': {err}"); }) }, ) @@ -303,7 +302,17 @@ impl ExternalFormatter { })?; let doc_jsons = doc_json_strs .into_iter() - .map(|s| serde_json::from_str(&s)) + .map(|s| { + // Prettier's Doc can produce deeply nested arrays. + // (e.g., md-in-js with `proseWrap: preserve`, + // which nests each word in `[[[prev, " "], word], " "]`) + // The default recursion limit of 128 is not enough for long paragraphs. + // This only affects this deserialization call; + // other `serde_json` usage in the codebase keeps the default limit. + let mut de = serde_json::Deserializer::from_str(&s); + de.disable_recursion_limit(); + serde_json::Value::deserialize(&mut de) + }) .collect::, _>>() .map_err(|e| format!("Failed to parse Doc JSON: {e}"))?; @@ -314,6 +323,9 @@ impl ExternalFormatter { group_id_builder, ) }) + .inspect_err(|err| { + debug!("Failed to format embedded doc for parser '{parser_name}': {err}"); + }) })) } else { None diff --git a/apps/oxfmt/src/prettier_compat/from_prettier_doc.rs b/apps/oxfmt/src/prettier_compat/from_prettier_doc.rs index d8f24884cce4b..c2f7cf6df67a1 100644 --- a/apps/oxfmt/src/prettier_compat/from_prettier_doc.rs +++ b/apps/oxfmt/src/prettier_compat/from_prettier_doc.rs @@ -56,9 +56,11 @@ pub fn to_format_elements_for_template<'a>( .map(|envelope| { let (mut ir, _) = convert(envelope, allocator, group_id_builder)?; postprocess( - &mut ir, allocator, + &mut ir, + allocator, // GraphQL uses `.cooked` values, so template chars need escaping - true, None, + TemplateEscape::Full, + None, ); Ok(ir) }) @@ -75,7 +77,7 @@ pub fn to_format_elements_for_template<'a>( &mut ir, allocator, // CSS uses `.raw` values, so no template char escaping needed - false, + TemplateEscape::None, Some(("@prettier-placeholder-", "-id")), ); Ok(EmbeddedDocResult::DocWithPlaceholders { @@ -96,7 +98,7 @@ pub fn to_format_elements_for_template<'a>( &mut ir, allocator, // HTML/Angular use `.cooked` values, so template chars need escaping - true, + TemplateEscape::Full, Some(("PRETTIER_HTML_PLACEHOLDER_", "_IN_JS")), ); Ok(EmbeddedDocResult::DocWithPlaceholders { @@ -105,6 +107,21 @@ pub fn to_format_elements_for_template<'a>( html_has_multiple_root_elements, }) } + "tagged-markdown" => { + let (mut ir, _) = convert( + doc_jsons.into_iter().next().expect("Doc JSON for Markdown"), + allocator, + group_id_builder, + )?; + postprocess( + &mut ir, + allocator, + // Markdown uses `.raw` values with backtick unescaping on Rust side + TemplateEscape::RawBacktick, + None, + ); + Ok(EmbeddedDocResult::SingleDoc(ir)) + } _ => unreachable!("Unsupported embedded_doc language: {language}"), } } @@ -273,10 +290,10 @@ fn convert_align<'a>( } out.push(FormatElement::Tag(Tag::EndDedent(DedentMode::Level))); return Ok(()); - } else if i > 0 && i <= 255 { + } else if i > 0 { + debug_assert!(i <= 255, "align value {i} exceeds NonZeroU8 range"); #[expect(clippy::cast_possible_truncation, clippy::cast_sign_loss)] - let count = i as u8; - if let Some(nz) = NonZeroU8::new(count) { + if let Some(nz) = NonZeroU8::new(i as u8) { out.push(FormatElement::Tag(Tag::StartAlign(Align::new(nz)))); if let Some(contents) = obj.get("contents") { convert_doc(contents, out, ctx)?; @@ -296,6 +313,53 @@ fn convert_align<'a>( out.push(FormatElement::Tag(Tag::EndDedent(DedentMode::Root))); Ok(()) } + Value::String(s) => { + // String alignment (e.g., " " for markdown list continuation indent). + // Prettier uses the string length as the number of spaces to align by. + if s.is_empty() { + // Empty string → no alignment, just render contents + if let Some(contents) = obj.get("contents") { + convert_doc(contents, out, ctx)?; + } + return Ok(()); + } + debug_assert!( + s.len() <= 255, + "align string length {} exceeds NonZeroU8 range", + s.len() + ); + #[expect(clippy::cast_possible_truncation)] + if let Some(nz) = NonZeroU8::new(s.len() as u8) { + out.push(FormatElement::Tag(Tag::StartAlign(Align::new(nz)))); + if let Some(contents) = obj.get("contents") { + convert_doc(contents, out, ctx)?; + } + out.push(FormatElement::Tag(Tag::EndAlign)); + return Ok(()); + } + Err(format!("Unsupported align value: {n}")) + } + Value::Object(obj_val) => { + // `align({type: "root"}, ...)` = Prettier's `markAsRoot()`. + // In Prettier, `markAsRoot` records the current indent position + // so that a later `dedentToRoot` can return to it. + // However, `oxc_formatter`'s `DedentMode::Root` always resets to absolute level 0 + // and has no way to store a custom root position. + // Skipping the root capture is safe because + // embedded language Docs are processed in their own context starting near level 0, + // so `dedentToRoot` to absolute 0 produces the same result. + // + // NOTE: `markAsRoot` is used in Prettier for other cases. + // e.g. JS comment printer, YAML block printer, and front-matter embed. + // But none of those go through this Doc→IR path. + if obj_val.get("type").and_then(Value::as_str) == Some("root") { + if let Some(contents) = obj.get("contents") { + convert_doc(contents, out, ctx)?; + } + return Ok(()); + } + Err(format!("Unsupported align value: {n}")) + } _ => Err(format!("Unsupported align value: {n}")), } } @@ -399,20 +463,28 @@ fn extract_group_id( // --- +#[derive(Clone, Copy)] +enum TemplateEscape { + /// No escaping + None, + /// Full escaping: `\` → `\\`, `` ` `` → `` \` ``, `${` → `\${`. + Full, + /// Raw backtick escaping: `(\\*)\`` → `$1$1\\\``. + RawBacktick, +} + /// Post-process FormatElements in a single compaction pass: /// - strip trailing hardline (useless for embedded parts) /// - collapse double-hardlines `[Hard, ExpandParent, Hard, ExpandParent]` → `[Empty, ExpandParent]` /// - merge consecutive Text nodes (SCSS emits split strings like `"@"` + `"prettier-placeholder-0-id"`) -/// - escape template characters (`\`, `` ` ``, `${`) -/// - for css-in-js, this is not needed because values are already escaped via `.raw` -/// - for others, `.cooked` is used, so escaping is needed +/// - escape template characters (mode determined by [`TemplateEscape`]) /// - count placeholders matching `(prefix)(digits)(_digits)?(suffix)` pattern /// /// Returns the placeholder count (0 when `placeholder` is `None`). fn postprocess<'a>( ir: &mut Vec>, allocator: &'a Allocator, - escape_template_chars: bool, + escape: TemplateEscape, placeholder: Option<(&str, &str)>, ) -> usize { // Strip trailing hardline @@ -458,10 +530,10 @@ fn postprocess<'a>( } sb.into_str() }; - let text = if escape_template_chars { - escape_template_characters(text, allocator) - } else { - text + let text = match escape { + TemplateEscape::None => text, + TemplateEscape::Full => escape_template_characters(text, allocator), + TemplateEscape::RawBacktick => escape_backticks_raw_str(text, allocator), }; let width = TextWidth::from_text(text, IndentWidth::default()); ir[write] = FormatElement::Text { text, width }; @@ -526,7 +598,9 @@ fn escape_template_characters<'a>(s: &'a str, allocator: &'a Allocator) -> &'a s let bytes = s.as_bytes(); let len = bytes.len(); - // Fast path: scan for characters that need escaping. + // Fast path: scan for the first character that needs escaping. + // All characters of interest (`\`, `` ` ``, `$`, `{`) are single-byte ASCII, + // so byte-indexed access is safe and avoids multi-byte decode overhead. let first_escape = (0..len).find(|&i| { let ch = bytes[i]; ch == b'\\' || ch == b'`' || (ch == b'$' && i + 1 < len && bytes[i + 1] == b'{') @@ -536,7 +610,7 @@ fn escape_template_characters<'a>(s: &'a str, allocator: &'a Allocator) -> &'a s return s; }; - // Slow path: build escaped string in the arena. + // Slow path: build escaped string in the arena, reusing the clean prefix. let mut result = StringBuilder::with_capacity_in(len + 1, allocator); result.push_str(&s[..first]); @@ -557,3 +631,40 @@ fn escape_template_characters<'a>(s: &'a str, allocator: &'a Allocator) -> &'a s result.into_str() } + +/// Escape backticks in raw mode for markdown-in-JS template literals. +/// +/// Equivalent to Prettier's `escapeTemplateCharacters(doc, /* raw */ true)`: +/// +/// `str.replaceAll(/(\\*)`/g, "$1$1\\`")` +/// +/// For each backtick, doubles the preceding backslashes and adds `\` before the backtick: +/// - `` ` `` → `` \` `` +/// - `` \` `` → `` \\\` `` +/// - `` \\` `` → `` \\\\\` `` +fn escape_backticks_raw_str<'a>(s: &'a str, allocator: &'a Allocator) -> &'a str { + if !s.contains('`') { + return s; + } + let mut result = StringBuilder::with_capacity_in(s.len() + 1, allocator); + let mut bs_count: usize = 0; + for ch in s.chars() { + if ch == '\\' { + bs_count += 1; + result.push('\\'); + } else if ch == '`' { + // The backslash branch already emitted `bs_count` backslashes. + // Emit another `bs_count` to double them, then add `\``. + for _ in 0..bs_count { + result.push('\\'); + } + result.push('\\'); + result.push('`'); + bs_count = 0; + } else { + bs_count = 0; + result.push(ch); + } + } + result.into_str() +} diff --git a/apps/oxfmt/test/cli/embedded_languages/__snapshots__/embedded_languages.test.ts.snap b/apps/oxfmt/test/cli/embedded_languages/__snapshots__/embedded_languages.test.ts.snap index ce273fb03f49d..410ef75cb2df9 100644 --- a/apps/oxfmt/test/cli/embedded_languages/__snapshots__/embedded_languages.test.ts.snap +++ b/apps/oxfmt/test/cli/embedded_languages/__snapshots__/embedded_languages.test.ts.snap @@ -73,8 +73,8 @@ const mixedQuery = gql\` const mixedTemplate = html\`

Title

\`; const mixedDocs = md\` - #Documentation - This is **important**. +#Documentation +This is **important**. \`; // Multi-line with blank lines - should preserve blank lines without trailing whitespace @@ -572,17 +572,18 @@ npm install package --- AFTER ---------- // Tagged template literals with md and markdown tags const documentation = md\` - #Heading - This is **bold**. - -Item 1 - -Item 2 +#Heading +This is **bold**. +-Item 1 +-Item 2 \`; const readme = markdown\` - ##Installation - \\\`\\\`\\\`bash - npm install package - \\\`\\\`\\\` +##Installation + +~~~bash +npm install package +~~~ \`; --------------------" @@ -845,17 +846,18 @@ npm install package --- AFTER ---------- // Tagged template literals with md and markdown tags const documentation = md\` - #Heading - This is **bold**. - -Item 1 - -Item 2 +#Heading +This is **bold**. +-Item 1 +-Item 2 \`; const readme = markdown\` - ##Installation - \\\`\\\`\\\`bash - npm install package - \\\`\\\`\\\` +##Installation + +~~~bash +npm install package +~~~ \`; -------------------- diff --git a/crates/oxc_formatter/src/external_formatter.rs b/crates/oxc_formatter/src/external_formatter.rs index 1f8948f818338..eda028f738bba 100644 --- a/crates/oxc_formatter/src/external_formatter.rs +++ b/crates/oxc_formatter/src/external_formatter.rs @@ -14,6 +14,7 @@ pub type EmbeddedFormatterCallback = /// The variant depends on the language being formatted: /// - GraphQL: multiple IRs (one per quasi text) /// - CSS/HTML: single IR with placeholder survival count +/// - Markdown: single IR only (no placeholders or metadata) pub enum EmbeddedDocResult<'a> { MultipleDocs(Vec>>), DocWithPlaceholders { @@ -24,6 +25,7 @@ pub enum EmbeddedDocResult<'a> { /// Used to decide whether to `indent` the template content, `None` for non-HTML languages. html_has_multiple_root_elements: Option, }, + SingleDoc(Vec>), } /// Callback function type for formatting embedded code via `Doc`. diff --git a/crates/oxc_formatter/src/print/template/embed/markdown.rs b/crates/oxc_formatter/src/print/template/embed/markdown.rs new file mode 100644 index 0000000000000..7c9cbae8cee1f --- /dev/null +++ b/crates/oxc_formatter/src/print/template/embed/markdown.rs @@ -0,0 +1,134 @@ +use oxc_allocator::{Allocator, StringBuilder}; +use oxc_ast::ast::*; + +use crate::{ + ast_nodes::AstNode, + format_args, + formatter::{Formatter, prelude::*}, + write, +}; + +/// Format a Markdown-in-JS tagged template literal via the Doc→IR path. +/// +/// Unescapes backticks in `.raw`, strips common indentation, formats as markdown, +/// then re-escapes backticks and applies indented or dedent-to-root layout. +pub(super) fn try_embed_markdown<'a>( + tagged: &AstNode<'a, TaggedTemplateExpression<'a>>, + f: &mut Formatter<'_, 'a>, +) -> bool { + let raw = tagged.quasi.quasis[0].value.raw.as_str(); + + if raw.trim().is_empty() { + write!(f, ["``"]); + return true; + } + + let allocator = f.context().allocator(); + + // Phase 1: Unescape backticks (= `raw.replaceAll(/((?:\\\\)*)\\`/g, ...)`) + // https://github.com/prettier/prettier/blob/90983f40dce5e20beea4e5618b5e0426a6a7f4f0/src/language-js/embed/markdown.js#L11-L14 + let text = unescape_backticks(raw, allocator); + + // Phase 2: Detect and strip common indentation + let indentation = get_indentation(text); + let has_indent = !indentation.is_empty(); + let text = if has_indent { strip_indentation(text, indentation, allocator) } else { text }; + + // Phase 3: Get Doc→IR from external formatter + let allocator = f.allocator(); + let group_id_builder = f.group_id_builder(); + let Some(Ok(crate::external_formatter::EmbeddedDocResult::SingleDoc(ir))) = f + .context() + .external_callbacks() + .format_embedded_doc(allocator, group_id_builder, "tagged-markdown", &[text]) + else { + return false; + }; + + // Phase 4: Re-escape backticks in the IR (`escapeTemplateCharacters(doc, true)`) + // This is already handled by oxfmt `prettier_compat/from_prettier_doc.rs` + + // Phase 5: Layout + // https://github.com/prettier/prettier/blob/90983f40dce5e20beea4e5618b5e0426a6a7f4f0/src/language-js/embed/markdown.js#L24-L29 + let content = format_once(|f| f.write_elements(ir)); + if has_indent { + write!(f, ["`", indent(&format_args!(soft_line_break(), content)), soft_line_break(), "`"]); + } else { + let literalline = format_with(|f| super::write_literalline(f, allocator)); + write!(f, ["`", literalline, dedent_to_root(&content), soft_line_break(), "`"]); + } + + true +} + +// --- + +/// Unescape backticks in raw template literal content. +/// Transforms `\`` → `` ` ``, `\\`` → `` \` `` +/// `raw.replaceAll(/((?:\\\\)*)\\`/g, (_, bs) => "\\".repeat(bs.length / 2) + "`")` +/// +fn unescape_backticks<'a>(raw: &'a str, allocator: &'a Allocator) -> &'a str { + if !raw.contains('`') { + return raw; + } + + let mut result = StringBuilder::with_capacity_in(raw.len(), allocator); + let mut chars = raw.chars().peekable(); + while let Some(c) = chars.next() { + if c == '\\' { + // Count consecutive backslashes + let mut bs_count = 1; + while chars.peek() == Some(&'\\') { + bs_count += 1; + chars.next(); + } + if chars.peek() == Some(&'`') { + // Halve the backslashes and unescape the backtick + for _ in 0..bs_count / 2 { + result.push('\\'); + } + result.push('`'); + chars.next(); + } else { + // Not followed by backtick, keep backslashes as-is + for _ in 0..bs_count { + result.push('\\'); + } + } + } else { + result.push(c); + } + } + result.into_str() +} + +/// Get indentation of the first non-empty line. +/// `str.match(/^([^\S\n]*)\S/m)?.[1] ?? ""` +/// +fn get_indentation(text: &str) -> &str { + for line in text.split('\n') { + let trimmed = line.trim_start_matches(|c: char| c.is_ascii_whitespace() && c != '\n'); + if !trimmed.is_empty() { + return &line[..line.len() - trimmed.len()]; + } + } + "" +} + +/// Strip common indentation from all lines. +/// `text.replaceAll(new RegExp(\`^${indentation}\`, "gm"), "")` +/// +fn strip_indentation<'a>(text: &'a str, indent: &str, allocator: &'a Allocator) -> &'a str { + let mut result = StringBuilder::with_capacity_in(text.len(), allocator); + for (i, line) in text.split('\n').enumerate() { + if i > 0 { + result.push('\n'); + } + if let Some(stripped) = line.strip_prefix(indent) { + result.push_str(stripped); + } else { + result.push_str(line); + } + } + result.into_str() +} diff --git a/crates/oxc_formatter/src/print/template/embed/mod.rs b/crates/oxc_formatter/src/print/template/embed/mod.rs index 1ec713ccd08c6..30efd9f68b19f 100644 --- a/crates/oxc_formatter/src/print/template/embed/mod.rs +++ b/crates/oxc_formatter/src/print/template/embed/mod.rs @@ -1,16 +1,15 @@ mod css; mod graphql; mod html; +mod markdown; -use oxc_allocator::{Allocator, StringBuilder}; +use oxc_allocator::Allocator; use oxc_ast::ast::*; -use oxc_syntax::line_terminator::LineTerminatorSplitter; use crate::{ IndentWidth, ast_nodes::{AstNode, AstNodes}, formatter::{FormatElement, Formatter, format_element::TextWidth, prelude::*}, - write, }; /// Try to format a tagged template with the embedded formatter if supported. @@ -23,7 +22,10 @@ pub(super) fn try_format_embedded_template<'a>( Some("css" | "styled") => css::format_css_doc(tagged.quasi(), f), Some("gql" | "graphql") => graphql::format_graphql_doc(tagged.quasi(), f), Some("html") => html::format_html_doc(tagged.quasi(), f, "tagged-html"), - Some("md" | "markdown") => try_embed_markdown(tagged, f), + // Markdown never supports `${}` (Prettier doesn't either) + Some("md" | "markdown") if tagged.quasi.is_no_substitution_template() => { + markdown::try_embed_markdown(tagged, f) + } _ => false, } } @@ -210,86 +212,6 @@ fn get_angular_component_language(node: &AstNode<'_, TemplateLiteral<'_>>) -> Op // --- -fn try_embed_markdown<'a>( - tagged: &AstNode<'a, TaggedTemplateExpression<'a>>, - f: &mut Formatter<'_, 'a>, -) -> bool { - // Markdown never supports expressions (Prettier doesn't either) - if !tagged.quasi.is_no_substitution_template() { - return false; - } - let template_content = tagged.quasi.quasis[0].value.raw.as_str(); - format_embedded_template(f, "tagged-markdown", template_content) -} - -// --- - -/// Format embedded language content inside a template literal using the string path. -/// -/// This is the shared formatting logic for no-substitution templates: -/// dedent → external formatter (Prettier) → reconstruct template structure. -fn format_embedded_template<'a>( - f: &mut Formatter<'_, 'a>, - language: &str, - template_content: &str, -) -> bool { - if template_content.trim().is_empty() { - write!(f, ["``"]); - return true; - } - - let template_content = dedent(template_content, f.context().allocator()); - - let Some(Ok(formatted)) = - f.context().external_callbacks().format_embedded(language, template_content) - else { - return false; - }; - - let format_content = format_with(|f: &mut Formatter<'_, 'a>| { - let content = f.context().allocator().alloc_str(&formatted); - for line in LineTerminatorSplitter::new(content) { - if line.is_empty() { - write!(f, [empty_line()]); - } else { - write!(f, [text(line), hard_line_break()]); - } - } - }); - - // NOTE: This path always returns the formatted string with each line indented, - // regardless of the length of the content, which may not be compatible with Prettier in some cases. - // If we use `Doc` like in the gql-in-js path, it would behave aligned with Prettier. - write!(f, ["`", block_indent(&format_content), "`"]); - true -} - -/// Strip the common leading indentation from all non-empty lines in `text`. -/// The `text` here is taken from `.raw`, so only `\n` is used as the line terminator. -fn dedent<'a>(text: &'a str, allocator: &'a Allocator) -> &'a str { - let min_indent = text - .split('\n') - .filter(|line| !line.trim_ascii_start().is_empty()) - .map(|line| line.bytes().take_while(u8::is_ascii_whitespace).count()) - .min() - .unwrap_or(0); - - if min_indent == 0 { - return text; - } - - let mut result = StringBuilder::with_capacity_in(text.len(), allocator); - for (i, line) in text.split('\n').enumerate() { - if i > 0 { - result.push('\n'); - } - let strip = line.bytes().take_while(u8::is_ascii_whitespace).count().min(min_indent); - result.push_str(&line[strip..]); - } - - result.into_str() -} - /// Split text on placeholder patterns, returning alternating parts: /// `[literal, index_str, literal, index_str, ...]` /// @@ -363,8 +285,7 @@ fn split_on_placeholders<'a>(text: &'a str, prefix: &str, suffix: &str) -> Vec<& /// Emit text with newlines converted to literal line breaks (`replaceEndOfLine()` equivalent). /// -/// Uses `Text("\n") + ExpandParent` (= `literalline()`) -/// instead of `hard_line_break()` to avoid adding indentation. +/// Uses [`write_literalline`] instead of `hard_line_break()` to avoid adding indentation. /// /// The external formatter has already computed proper indentation in the text content, /// so we must not add extra indent from the surrounding `block_indent`. @@ -378,10 +299,7 @@ fn write_text_with_line_breaks<'a>( // Splitting on `\n` is safe because `Doc` only contains normalized linebreaks. for line in text.split('\n') { if !first { - // Emit literalline: Text("\n") + ExpandParent - let newline = allocator.alloc_str("\n"); - f.write_element(FormatElement::Text { text: newline, width: TextWidth::multiline(0) }); - f.write_element(FormatElement::ExpandParent); + write_literalline(f, allocator); } first = false; if !line.is_empty() { @@ -391,3 +309,13 @@ fn write_text_with_line_breaks<'a>( } } } + +/// Emit Prettier's `literalline` equivalent, +/// which newline that preserves indentation from the source. +fn write_literalline<'a>(f: &mut Formatter<'_, 'a>, allocator: &'a Allocator) { + f.write_element(FormatElement::Text { + text: allocator.alloc_str("\n"), + width: TextWidth::multiline(0), + }); + f.write_element(FormatElement::ExpandParent); +}