diff --git a/Cargo.lock b/Cargo.lock index 1b4862769d710..09d154e763634 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -977,7 +977,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a867d7322eb69cf3a68a5426387a25b45cb3b9c5ee41023ee6cea92e2afadd82" dependencies = [ "camino", - "fancy-regex", + "fancy-regex 0.14.0", "libtest-mimic 0.8.1", "walkdir", ] @@ -1166,6 +1166,17 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "fancy-regex" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72cf461f865c862bb7dc573f643dd6a2b6842f7c30b07882b56bd148cc2761b8" +dependencies = [ + "bit-set", + "regex-automata", + "regex-syntax", +] + [[package]] name = "fastrand" version = "2.3.0" @@ -3270,6 +3281,7 @@ dependencies = [ name = "ruff_markdown" version = "0.0.0" dependencies = [ + "fancy-regex 0.17.0", "insta", "regex", "ruff_python_ast", diff --git a/crates/ruff_markdown/Cargo.toml b/crates/ruff_markdown/Cargo.toml index 19fd48c8d6454..1c9f9fac53f1b 100644 --- a/crates/ruff_markdown/Cargo.toml +++ b/crates/ruff_markdown/Cargo.toml @@ -17,6 +17,7 @@ ruff_workspace = { workspace = true } insta = { workspace = true } regex = { workspace = true } +fancy-regex = "0.17.0" [lints] workspace = true diff --git a/crates/ruff_markdown/src/lib.rs b/crates/ruff_markdown/src/lib.rs index 79d2b0a62ea95..621d2cbe7e1cc 100644 --- a/crates/ruff_markdown/src/lib.rs +++ b/crates/ruff_markdown/src/lib.rs @@ -1,6 +1,6 @@ use std::{path::Path, sync::LazyLock}; -use regex::Regex; +use fancy_regex::Regex; use ruff_python_ast::PySourceType; use ruff_python_formatter::format_module_source; use ruff_python_trivia::textwrap::{dedent, indent}; @@ -20,13 +20,13 @@ static MARKDOWN_CODE_BLOCK: LazyLock = LazyLock::new(|| { Regex::new( r"(?imsx) (? - ^(?\ *)```[^\S\r\n]* - (?(?:python|py|python3|py3|pyi)?) + ^(?\ *)(?```+|~~~+)[^\S\r\n]* + (?(?:python|py|python3|py3|pyi)?) (?:\ .*?)?\n ) (?.*?) (? - ^\ *```[^\S\r\n]*$ + ^\ *\k[^\S\r\n]*$ ) ", ) @@ -43,10 +43,14 @@ pub fn format_code_blocks( let mut last_match = 0; for capture in MARKDOWN_CODE_BLOCK.captures_iter(source) { - let (_, [before, code_indent, language, code, after]) = capture.extract(); - - let py_source_type = PySourceType::from_extension(language); - let unformatted_code = dedent(code); + let Ok(capture) = capture else { + continue; + }; + let language = capture.name("language").expect("no language"); + let code = capture.name("code").expect("no code"); + + let py_source_type = PySourceType::from_extension(language.as_str()); + let unformatted_code = dedent(code.as_str()); let options = settings.to_format_options(py_source_type, &unformatted_code, path); // Using `Printed::into_code` requires adding `ruff_formatter` as a direct dependency, and I suspect that Rust can optimize the closure away regardless. @@ -57,15 +61,14 @@ pub fn format_code_blocks( if let Ok(formatted_code) = formatted_code { if formatted_code.len() != unformatted_code.len() || formatted_code != *unformatted_code { - let m = capture.get_match(); - formatted.push_str(&source[last_match..m.start()]); + let code_indent = capture.name("indent").expect("no indent").as_str(); + + formatted.push_str(&source[last_match..code.start()]); let indented_code = indent(&formatted_code, code_indent); - // otherwise I need to deal with a result from write! - #[expect(clippy::format_push_string)] - formatted.push_str(&format!("{before}{indented_code}{after}")); + formatted.push_str(&indented_code); - last_match = m.end(); + last_match = code.end(); changed = true; } }