From b81d4820fb989df82a30d0a5464edeb1e237abc3 Mon Sep 17 00:00:00 2001 From: jfmcdowell <206422+jfmcdowell@users.noreply.github.com> Date: Mon, 29 Dec 2025 20:25:53 -0500 Subject: [PATCH 01/12] feat(markdown): implement CommonMark-compliant parser with spec test harness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CommonMark 0.31.2 parser achieving 75.2% spec compliance (490/652 examples). Block elements: - ATX/setext headings, fenced/indented code blocks, HTML blocks - Block quotes with lazy continuation, thematic breaks - Ordered/unordered lists with nesting and tightness detection - Link reference definitions Inline elements: - Emphasis via delimiter stack algorithm (§6.4) - Links, images, autolinks, code spans - Entity references, backslash escapes, inline HTML - Hard/soft line breaks Testing: - CommonMark spec test harness with HTML comparison - Feature-gated to_html utility, 59 edge-case tests --- Cargo.lock | 71 +- .../src/generated/node_factory.rs | 440 +- .../src/generated/syntax_factory.rs | 433 +- crates/biome_markdown_parser/Cargo.toml | 15 + crates/biome_markdown_parser/src/lexer/mod.rs | 919 ++- .../biome_markdown_parser/src/lexer/tests.rs | 395 +- crates/biome_markdown_parser/src/lib.rs | 69 +- .../src/link_reference.rs | 86 + crates/biome_markdown_parser/src/parser.rs | 410 +- crates/biome_markdown_parser/src/syntax.rs | 1392 ++++- .../src/syntax/fenced_code_block.rs | 405 ++ .../src/syntax/header.rs | 244 + .../src/syntax/html_block.rs | 458 ++ .../src/syntax/inline.rs | 1640 ++++++ .../src/syntax/link_block.rs | 584 ++ .../biome_markdown_parser/src/syntax/list.rs | 1762 ++++++ .../src/syntax/parse_error.rs | 159 + .../biome_markdown_parser/src/syntax/quote.rs | 351 ++ .../src/syntax/thematic_break_block.rs | 28 +- crates/biome_markdown_parser/src/to_html.rs | 1543 +++++ .../biome_markdown_parser/src/token_source.rs | 136 +- .../tests/commonmark_spec.rs | 346 ++ .../tests/list_tightness.rs | 32 + .../error/multiline_label_reference.md | 2 + .../error/multiline_label_reference.md.snap | 101 + .../error/quote_nesting_too_deep.md | 1 + .../error/quote_nesting_too_deep.md.snap | 452 ++ .../md_test_suite/error/too_many_hashes.md | 1 + .../error/too_many_hashes.md.snap | 106 + .../md_test_suite/error/unclosed_code_span.md | 1 + .../error/unclosed_code_span.md.snap | 86 + .../md_test_suite/error/unclosed_image.md | 1 + .../error/unclosed_image.md.snap | 90 + .../md_test_suite/error/unclosed_link.md | 1 + .../md_test_suite/error/unclosed_link.md.snap | 88 + .../error/unclosed_reference_image_label.md | 1 + .../unclosed_reference_image_label.md.snap | 109 + .../error/unclosed_reference_link_label.md | 1 + .../unclosed_reference_link_label.md.snap | 104 + .../error/unterminated_code_fence.md | 4 + .../error/unterminated_code_fence.md.snap | 172 + .../ok/atx_heading_trailing_hash.md | 4 + .../ok/atx_heading_trailing_hash.md.snap | 169 + .../tests/md_test_suite/ok/autolinks.md | 7 + .../tests/md_test_suite/ok/autolinks.md.snap | 225 + .../tests/md_test_suite/ok/block_quote.md | 4 + .../md_test_suite/ok/block_quote.md.snap | 102 + .../md_test_suite/ok/block_quote_grouping.md | 7 + .../ok/block_quote_grouping.md.snap | 154 + .../tests/md_test_suite/ok/bullet_list.md | 9 + .../md_test_suite/ok/bullet_list.md.snap | 238 + .../tests/md_test_suite/ok/edge_cases.md | 30 + .../tests/md_test_suite/ok/edge_cases.md.snap | 702 +++ .../md_test_suite/ok/emphasis_complex.md | 13 + .../md_test_suite/ok/emphasis_complex.md.snap | 395 ++ .../md_test_suite/ok/emphasis_crossing.md | 1 + .../ok/emphasis_crossing.md.snap | 85 + .../md_test_suite/ok/emphasis_edge_cases.md | 17 + .../ok/emphasis_edge_cases.md.snap | 600 ++ .../md_test_suite/ok/emphasis_flanking.md | 11 + .../ok/emphasis_flanking.md.snap | 335 ++ .../md_test_suite/ok/emphasis_link_text.md | 1 + .../ok/emphasis_link_text.md.snap | 99 + .../md_test_suite/ok/entity_references.md | 11 + .../ok/entity_references.md.snap | 400 ++ .../md_test_suite/ok/fenced_code_advanced.md | 38 + .../ok/fenced_code_advanced.md.snap | 630 ++ .../md_test_suite/ok/fenced_code_block.md | 15 + .../ok/fenced_code_block.md.snap | 201 + .../ok/fenced_code_indentation.md | 3 + .../ok/fenced_code_indentation.md.snap | 77 + .../ok/fenced_code_info_backtick.md | 2 + .../ok/fenced_code_info_backtick.md.snap | 81 + .../tests/md_test_suite/ok/hard_line_break.md | 5 + .../md_test_suite/ok/hard_line_break.md.snap | 97 + .../tests/md_test_suite/ok/header.md | 17 + .../tests/md_test_suite/ok/header.md.snap | 541 ++ .../tests/md_test_suite/ok/html_block.md | 6 + .../tests/md_test_suite/ok/html_block.md.snap | 126 + .../md_test_suite/ok/indent_code_block.md | 8 + .../ok/indent_code_block.md.snap | 170 + .../ok/indented_code_blank_lines.md | 5 + .../ok/indented_code_blank_lines.md.snap | 88 + .../tests/md_test_suite/ok/inline_elements.md | 7 + .../md_test_suite/ok/inline_elements.md.snap | 295 + .../tests/md_test_suite/ok/inline_html.md | 11 + .../md_test_suite/ok/inline_html.md.snap | 469 ++ .../ok/inline_html_edge_cases.md | 63 + .../ok/inline_html_edge_cases.md.snap | 2834 +++++++++ .../md_test_suite/ok/inline_html_invalid.md | 21 + .../ok/inline_html_invalid.md.snap | 684 +++ .../ok/inline_link_destination_title.md | 11 + .../ok/inline_link_destination_title.md.snap | 467 ++ .../md_test_suite/ok/lazy_continuation.md | 31 + .../ok/lazy_continuation.md.snap | 541 ++ .../tests/md_test_suite/ok/link_definition.md | 9 + .../md_test_suite/ok/link_definition.md.snap | 305 + .../ok/link_definition_edge_cases.md | 32 + .../ok/link_definition_edge_cases.md.snap | 1058 ++++ .../ok/link_definition_invalid.md | 14 + .../ok/link_definition_invalid.md.snap | 331 ++ .../md_test_suite/ok/list_indentation.md | 60 + .../md_test_suite/ok/list_indentation.md.snap | 1184 ++++ .../md_test_suite/ok/list_interrupt_bullet.md | 2 + .../ok/list_interrupt_bullet.md.snap | 83 + .../ok/list_interrupt_empty_bullet.md | 3 + .../ok/list_interrupt_empty_bullet.md.snap | 76 + .../ok/list_interrupt_ordered.md | 2 + .../ok/list_interrupt_ordered.md.snap | 83 + .../ok/list_no_interrupt_empty_bullet.md | 3 + .../ok/list_no_interrupt_empty_bullet.md.snap | 72 + .../ok/list_no_interrupt_ordered_2.md | 2 + .../ok/list_no_interrupt_ordered_2.md.snap | 66 + .../tests/md_test_suite/ok/list_tightness.md | 58 + .../md_test_suite/ok/list_tightness.md.snap | 1150 ++++ .../md_test_suite/ok/multi_backtick_code.md | 7 + .../ok/multi_backtick_code.md.snap | 235 + .../tests/md_test_suite/ok/multiline_label.md | 2 + .../md_test_suite/ok/multiline_label.md.snap | 86 + .../tests/md_test_suite/ok/multiline_list.md | 12 + .../md_test_suite/ok/multiline_list.md.snap | 305 + .../tests/md_test_suite/ok/nested_quote.md | 5 + .../md_test_suite/ok/nested_quote.md.snap | 145 + .../tests/md_test_suite/ok/ordered_list.md | 6 + .../md_test_suite/ok/ordered_list.md.snap | 178 + .../tests/md_test_suite/ok/paragraph.md | 3 + .../tests/md_test_suite/ok/paragraph.md.snap | 69 + .../ok/paragraph_interruption.md | 16 + .../ok/paragraph_interruption.md.snap | 288 + .../ok/reference_link_not_implemented.md | 1 + .../ok/reference_link_not_implemented.md.snap | 86 + .../tests/md_test_suite/ok/reference_links.md | 33 + .../md_test_suite/ok/reference_links.md.snap | 949 +++ .../tests/md_test_suite/ok/setext_heading.md | 15 + .../md_test_suite/ok/setext_heading.md.snap | 201 + .../ok/thematic_break_block.md.snap | 70 +- .../tests/md_test_suite/ok/unclosed_bold.md | 1 + .../md_test_suite/ok/unclosed_bold.md.snap | 60 + .../md_test_suite/ok/unclosed_emphasis.md | 1 + .../ok/unclosed_emphasis.md.snap | 60 + crates/biome_markdown_parser/tests/spec.json | 5218 +++++++++++++++++ .../biome_markdown_parser/tests/spec_test.rs | 4 +- .../src/generated/kind.rs | 33 +- .../src/generated/macros.rs | 73 +- .../src/generated/nodes.rs | 2579 +++++--- .../src/generated/nodes_mut.rs | 294 +- crates/biome_markdown_syntax/src/lib.rs | 17 +- crates/biome_parser/src/lexer.rs | 67 +- xtask/codegen/markdown.ungram | 167 +- xtask/codegen/src/markdown_kinds_src.rs | 21 +- 150 files changed, 38765 insertions(+), 1537 deletions(-) create mode 100644 crates/biome_markdown_parser/src/link_reference.rs create mode 100644 crates/biome_markdown_parser/src/syntax/fenced_code_block.rs create mode 100644 crates/biome_markdown_parser/src/syntax/header.rs create mode 100644 crates/biome_markdown_parser/src/syntax/html_block.rs create mode 100644 crates/biome_markdown_parser/src/syntax/inline.rs create mode 100644 crates/biome_markdown_parser/src/syntax/link_block.rs create mode 100644 crates/biome_markdown_parser/src/syntax/list.rs create mode 100644 crates/biome_markdown_parser/src/syntax/parse_error.rs create mode 100644 crates/biome_markdown_parser/src/syntax/quote.rs create mode 100644 crates/biome_markdown_parser/src/to_html.rs create mode 100644 crates/biome_markdown_parser/tests/commonmark_spec.rs create mode 100644 crates/biome_markdown_parser/tests/list_tightness.rs create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/error/multiline_label_reference.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/error/multiline_label_reference.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/error/quote_nesting_too_deep.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/error/quote_nesting_too_deep.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/error/too_many_hashes.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/error/too_many_hashes.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_code_span.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_code_span.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_image.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_image.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_link.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_link.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_reference_image_label.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_reference_image_label.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_reference_link_label.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_reference_link_label.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/error/unterminated_code_fence.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/error/unterminated_code_fence.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/atx_heading_trailing_hash.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/atx_heading_trailing_hash.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/autolinks.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/autolinks.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/block_quote.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/block_quote.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/block_quote_grouping.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/block_quote_grouping.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/bullet_list.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/bullet_list.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/edge_cases.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/edge_cases.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_complex.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_complex.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_crossing.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_crossing.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_edge_cases.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_edge_cases.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_flanking.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_flanking.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_link_text.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_link_text.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/entity_references.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/entity_references.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/fenced_code_advanced.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/fenced_code_advanced.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/fenced_code_block.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/fenced_code_block.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/fenced_code_indentation.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/fenced_code_indentation.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/fenced_code_info_backtick.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/fenced_code_info_backtick.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/hard_line_break.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/hard_line_break.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/header.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/header.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/html_block.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/html_block.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/indent_code_block.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/indent_code_block.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/indented_code_blank_lines.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/indented_code_blank_lines.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/inline_elements.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/inline_elements.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/inline_html.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/inline_html.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/inline_html_edge_cases.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/inline_html_edge_cases.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/inline_html_invalid.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/inline_html_invalid.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/inline_link_destination_title.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/inline_link_destination_title.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/lazy_continuation.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/lazy_continuation.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/link_definition.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/link_definition.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/link_definition_edge_cases.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/link_definition_edge_cases.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/link_definition_invalid.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/link_definition_invalid.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/list_indentation.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/list_indentation.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/list_interrupt_bullet.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/list_interrupt_bullet.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/list_interrupt_empty_bullet.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/list_interrupt_empty_bullet.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/list_interrupt_ordered.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/list_interrupt_ordered.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/list_no_interrupt_empty_bullet.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/list_no_interrupt_empty_bullet.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/list_no_interrupt_ordered_2.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/list_no_interrupt_ordered_2.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/list_tightness.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/list_tightness.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/multi_backtick_code.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/multi_backtick_code.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/multiline_label.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/multiline_label.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/multiline_list.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/multiline_list.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/nested_quote.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/nested_quote.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/ordered_list.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/ordered_list.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/paragraph.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/paragraph.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/paragraph_interruption.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/paragraph_interruption.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/reference_link_not_implemented.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/reference_link_not_implemented.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/reference_links.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/reference_links.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/unclosed_bold.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/unclosed_bold.md.snap create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/unclosed_emphasis.md create mode 100644 crates/biome_markdown_parser/tests/md_test_suite/ok/unclosed_emphasis.md.snap create mode 100644 crates/biome_markdown_parser/tests/spec.json diff --git a/Cargo.lock b/Cargo.lock index fe24d1eec6c8..07fee7477725 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1291,14 +1291,18 @@ dependencies = [ "biome_console", "biome_diagnostics", "biome_markdown_factory", + "biome_markdown_parser", "biome_markdown_syntax", "biome_parser", "biome_rowan", "biome_test_utils", "biome_unicode_table", + "htmlize", "insta", "quickcheck", "quickcheck_macros", + "serde", + "serde_json", "tests_macros", "tracing", "unicode-bom", @@ -1933,7 +1937,7 @@ dependencies = [ "hashbrown 0.16.1", "indexmap", "once_cell", - "phf", + "phf 0.13.1", "rustc-hash 2.1.1", "static_assertions", ] @@ -3157,6 +3161,19 @@ version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" +[[package]] +name = "htmlize" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d347c0de239be20ba0982e4822de3124404281e119ae3e11f5d7425a414e1935" +dependencies = [ + "memchr", + "pastey", + "phf 0.11.3", + "phf_codegen", + "serde_json", +] + [[package]] name = "http" version = "1.2.0" @@ -4026,6 +4043,12 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "pastey" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35fb2e5f958ec131621fdd531e9fc186ed768cbe395337403ae56c17a74c68ec" + [[package]] name = "path-absolutize" version = "3.1.1" @@ -4050,6 +4073,15 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "phf" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" +dependencies = [ + "phf_shared 0.11.3", +] + [[package]] name = "phf" version = "0.13.1" @@ -4057,7 +4089,27 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf" dependencies = [ "phf_macros", - "phf_shared", + "phf_shared 0.13.1", +] + +[[package]] +name = "phf_codegen" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" +dependencies = [ + "phf_generator 0.11.3", + "phf_shared 0.11.3", +] + +[[package]] +name = "phf_generator" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" +dependencies = [ + "phf_shared 0.11.3", + "rand 0.8.5", ] [[package]] @@ -4067,7 +4119,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737" dependencies = [ "fastrand", - "phf_shared", + "phf_shared 0.13.1", ] [[package]] @@ -4076,13 +4128,22 @@ version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "812f032b54b1e759ccd5f8b6677695d5268c588701effba24601f6932f8269ef" dependencies = [ - "phf_generator", - "phf_shared", + "phf_generator 0.13.1", + "phf_shared 0.13.1", "proc-macro2", "quote", "syn 2.0.106", ] +[[package]] +name = "phf_shared" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" +dependencies = [ + "siphasher", +] + [[package]] name = "phf_shared" version = "0.13.1" diff --git a/crates/biome_markdown_factory/src/generated/node_factory.rs b/crates/biome_markdown_factory/src/generated/node_factory.rs index a8255300311e..84da802e71b8 100644 --- a/crates/biome_markdown_factory/src/generated/node_factory.rs +++ b/crates/biome_markdown_factory/src/generated/node_factory.rs @@ -6,16 +6,25 @@ use biome_markdown_syntax::{ MarkdownSyntaxToken as SyntaxToken, *, }; use biome_rowan::AstNode; -pub fn md_bullet( - bullet_token: SyntaxToken, - space_token: SyntaxToken, - content: MdInlineItemList, -) -> MdBullet { +pub fn md_autolink( + l_angle_token: SyntaxToken, + value: MdInlineItemList, + r_angle_token: SyntaxToken, +) -> MdAutolink { + MdAutolink::unwrap_cast(SyntaxNode::new_detached( + MarkdownSyntaxKind::MD_AUTOLINK, + [ + Some(SyntaxElement::Token(l_angle_token)), + Some(SyntaxElement::Node(value.into_syntax())), + Some(SyntaxElement::Token(r_angle_token)), + ], + )) +} +pub fn md_bullet(bullet_token: SyntaxToken, content: MdBlockList) -> MdBullet { MdBullet::unwrap_cast(SyntaxNode::new_detached( MarkdownSyntaxKind::MD_BULLET, [ Some(SyntaxElement::Token(bullet_token)), - Some(SyntaxElement::Token(space_token)), Some(SyntaxElement::Node(content.into_syntax())), ], )) @@ -54,12 +63,16 @@ impl MdDocumentBuilder { )) } } +pub fn md_entity_reference(value_token: SyntaxToken) -> MdEntityReference { + MdEntityReference::unwrap_cast(SyntaxNode::new_detached( + MarkdownSyntaxKind::MD_ENTITY_REFERENCE, + [Some(SyntaxElement::Token(value_token))], + )) +} pub fn md_fenced_code_block( l_fence_token: SyntaxToken, code_list: MdCodeNameList, - l_hard_line: MdHardLine, - content: MdTextual, - r_hard_line: MdHardLine, + content: MdInlineItemList, r_fence_token: SyntaxToken, ) -> MdFencedCodeBlock { MdFencedCodeBlock::unwrap_cast(SyntaxNode::new_detached( @@ -67,9 +80,7 @@ pub fn md_fenced_code_block( [ Some(SyntaxElement::Token(l_fence_token)), Some(SyntaxElement::Node(code_list.into_syntax())), - Some(SyntaxElement::Node(l_hard_line.into_syntax())), Some(SyntaxElement::Node(content.into_syntax())), - Some(SyntaxElement::Node(r_hard_line.into_syntax())), Some(SyntaxElement::Token(r_fence_token)), ], )) @@ -115,10 +126,10 @@ impl MdHeaderBuilder { )) } } -pub fn md_html_block(md_textual: MdTextual) -> MdHtmlBlock { +pub fn md_html_block(content: MdInlineItemList) -> MdHtmlBlock { MdHtmlBlock::unwrap_cast(SyntaxNode::new_detached( MarkdownSyntaxKind::MD_HTML_BLOCK, - [Some(SyntaxElement::Node(md_textual.into_syntax()))], + [Some(SyntaxElement::Node(content.into_syntax()))], )) } pub fn md_indent(value_token: SyntaxToken) -> MdIndent { @@ -127,19 +138,10 @@ pub fn md_indent(value_token: SyntaxToken) -> MdIndent { [Some(SyntaxElement::Token(value_token))], )) } -pub fn md_indent_code_block(lines: MdIndentedCodeLineList) -> MdIndentCodeBlock { +pub fn md_indent_code_block(content: MdInlineItemList) -> MdIndentCodeBlock { MdIndentCodeBlock::unwrap_cast(SyntaxNode::new_detached( MarkdownSyntaxKind::MD_INDENT_CODE_BLOCK, - [Some(SyntaxElement::Node(lines.into_syntax()))], - )) -} -pub fn md_indented_code_line(indentation: MdIndent, content: MdTextual) -> MdIndentedCodeLine { - MdIndentedCodeLine::unwrap_cast(SyntaxNode::new_detached( - MarkdownSyntaxKind::MD_INDENTED_CODE_LINE, - [ - Some(SyntaxElement::Node(indentation.into_syntax())), - Some(SyntaxElement::Node(content.into_syntax())), - ], + [Some(SyntaxElement::Node(content.into_syntax()))], )) } pub fn md_inline_code( @@ -170,92 +172,64 @@ pub fn md_inline_emphasis( ], )) } +pub fn md_inline_html(value: MdInlineItemList) -> MdInlineHtml { + MdInlineHtml::unwrap_cast(SyntaxNode::new_detached( + MarkdownSyntaxKind::MD_INLINE_HTML, + [Some(SyntaxElement::Node(value.into_syntax()))], + )) +} pub fn md_inline_image( - l_brack_token: SyntaxToken, excl_token: SyntaxToken, - alt: MdInlineImageAlt, - source: MdInlineImageSource, + l_brack_token: SyntaxToken, + alt: MdInlineItemList, r_brack_token: SyntaxToken, + l_paren_token: SyntaxToken, + destination: MdInlineItemList, + r_paren_token: SyntaxToken, ) -> MdInlineImageBuilder { MdInlineImageBuilder { - l_brack_token, excl_token, + l_brack_token, alt, - source, r_brack_token, - link: None, + l_paren_token, + destination, + r_paren_token, + title: None, } } pub struct MdInlineImageBuilder { - l_brack_token: SyntaxToken, excl_token: SyntaxToken, - alt: MdInlineImageAlt, - source: MdInlineImageSource, + l_brack_token: SyntaxToken, + alt: MdInlineItemList, r_brack_token: SyntaxToken, - link: Option, + l_paren_token: SyntaxToken, + destination: MdInlineItemList, + r_paren_token: SyntaxToken, + title: Option, } impl MdInlineImageBuilder { - pub fn with_link(mut self, link: MdInlineImageLink) -> Self { - self.link = Some(link); + pub fn with_title(mut self, title: MdLinkTitle) -> Self { + self.title = Some(title); self } pub fn build(self) -> MdInlineImage { MdInlineImage::unwrap_cast(SyntaxNode::new_detached( MarkdownSyntaxKind::MD_INLINE_IMAGE, [ - Some(SyntaxElement::Token(self.l_brack_token)), Some(SyntaxElement::Token(self.excl_token)), + Some(SyntaxElement::Token(self.l_brack_token)), Some(SyntaxElement::Node(self.alt.into_syntax())), - Some(SyntaxElement::Node(self.source.into_syntax())), Some(SyntaxElement::Token(self.r_brack_token)), - self.link + Some(SyntaxElement::Token(self.l_paren_token)), + Some(SyntaxElement::Node(self.destination.into_syntax())), + self.title .map(|token| SyntaxElement::Node(token.into_syntax())), + Some(SyntaxElement::Token(self.r_paren_token)), ], )) } } -pub fn md_inline_image_alt( - l_brack_token: SyntaxToken, - content: MdInlineItemList, - r_brack_token: SyntaxToken, -) -> MdInlineImageAlt { - MdInlineImageAlt::unwrap_cast(SyntaxNode::new_detached( - MarkdownSyntaxKind::MD_INLINE_IMAGE_ALT, - [ - Some(SyntaxElement::Token(l_brack_token)), - Some(SyntaxElement::Node(content.into_syntax())), - Some(SyntaxElement::Token(r_brack_token)), - ], - )) -} -pub fn md_inline_image_link( - l_paren_token: SyntaxToken, - content: MdInlineItemList, - r_paren_token: SyntaxToken, -) -> MdInlineImageLink { - MdInlineImageLink::unwrap_cast(SyntaxNode::new_detached( - MarkdownSyntaxKind::MD_INLINE_IMAGE_LINK, - [ - Some(SyntaxElement::Token(l_paren_token)), - Some(SyntaxElement::Node(content.into_syntax())), - Some(SyntaxElement::Token(r_paren_token)), - ], - )) -} -pub fn md_inline_image_source( - l_paren_token: SyntaxToken, - content: MdInlineItemList, - r_paren_token: SyntaxToken, -) -> MdInlineImageSource { - MdInlineImageSource::unwrap_cast(SyntaxNode::new_detached( - MarkdownSyntaxKind::MD_INLINE_IMAGE_SOURCE, - [ - Some(SyntaxElement::Token(l_paren_token)), - Some(SyntaxElement::Node(content.into_syntax())), - Some(SyntaxElement::Token(r_paren_token)), - ], - )) -} pub fn md_inline_italic( l_fence_token: SyntaxToken, content: MdInlineItemList, @@ -275,20 +249,48 @@ pub fn md_inline_link( text: MdInlineItemList, r_brack_token: SyntaxToken, l_paren_token: SyntaxToken, - source: MdInlineItemList, + destination: MdInlineItemList, r_paren_token: SyntaxToken, -) -> MdInlineLink { - MdInlineLink::unwrap_cast(SyntaxNode::new_detached( - MarkdownSyntaxKind::MD_INLINE_LINK, - [ - Some(SyntaxElement::Token(l_brack_token)), - Some(SyntaxElement::Node(text.into_syntax())), - Some(SyntaxElement::Token(r_brack_token)), - Some(SyntaxElement::Token(l_paren_token)), - Some(SyntaxElement::Node(source.into_syntax())), - Some(SyntaxElement::Token(r_paren_token)), - ], - )) +) -> MdInlineLinkBuilder { + MdInlineLinkBuilder { + l_brack_token, + text, + r_brack_token, + l_paren_token, + destination, + r_paren_token, + title: None, + } +} +pub struct MdInlineLinkBuilder { + l_brack_token: SyntaxToken, + text: MdInlineItemList, + r_brack_token: SyntaxToken, + l_paren_token: SyntaxToken, + destination: MdInlineItemList, + r_paren_token: SyntaxToken, + title: Option, +} +impl MdInlineLinkBuilder { + pub fn with_title(mut self, title: MdLinkTitle) -> Self { + self.title = Some(title); + self + } + pub fn build(self) -> MdInlineLink { + MdInlineLink::unwrap_cast(SyntaxNode::new_detached( + MarkdownSyntaxKind::MD_INLINE_LINK, + [ + Some(SyntaxElement::Token(self.l_brack_token)), + Some(SyntaxElement::Node(self.text.into_syntax())), + Some(SyntaxElement::Token(self.r_brack_token)), + Some(SyntaxElement::Token(self.l_paren_token)), + Some(SyntaxElement::Node(self.destination.into_syntax())), + self.title + .map(|token| SyntaxElement::Node(token.into_syntax())), + Some(SyntaxElement::Token(self.r_paren_token)), + ], + )) + } } pub fn md_link_block(label: MdTextual, url: MdTextual) -> MdLinkBlockBuilder { MdLinkBlockBuilder { @@ -319,31 +321,212 @@ impl MdLinkBlockBuilder { )) } } -pub fn md_order_list_item(md_bullet_list: MdBulletList) -> MdOrderListItem { - MdOrderListItem::unwrap_cast(SyntaxNode::new_detached( - MarkdownSyntaxKind::MD_ORDER_LIST_ITEM, +pub fn md_link_destination(content: MdInlineItemList) -> MdLinkDestination { + MdLinkDestination::unwrap_cast(SyntaxNode::new_detached( + MarkdownSyntaxKind::MD_LINK_DESTINATION, + [Some(SyntaxElement::Node(content.into_syntax()))], + )) +} +pub fn md_link_label(content: MdInlineItemList) -> MdLinkLabel { + MdLinkLabel::unwrap_cast(SyntaxNode::new_detached( + MarkdownSyntaxKind::MD_LINK_LABEL, + [Some(SyntaxElement::Node(content.into_syntax()))], + )) +} +pub fn md_link_reference_definition( + l_brack_token: SyntaxToken, + label: MdLinkLabel, + r_brack_token: SyntaxToken, + colon_token: SyntaxToken, + destination: MdLinkDestination, +) -> MdLinkReferenceDefinitionBuilder { + MdLinkReferenceDefinitionBuilder { + l_brack_token, + label, + r_brack_token, + colon_token, + destination, + title: None, + } +} +pub struct MdLinkReferenceDefinitionBuilder { + l_brack_token: SyntaxToken, + label: MdLinkLabel, + r_brack_token: SyntaxToken, + colon_token: SyntaxToken, + destination: MdLinkDestination, + title: Option, +} +impl MdLinkReferenceDefinitionBuilder { + pub fn with_title(mut self, title: MdLinkTitle) -> Self { + self.title = Some(title); + self + } + pub fn build(self) -> MdLinkReferenceDefinition { + MdLinkReferenceDefinition::unwrap_cast(SyntaxNode::new_detached( + MarkdownSyntaxKind::MD_LINK_REFERENCE_DEFINITION, + [ + Some(SyntaxElement::Token(self.l_brack_token)), + Some(SyntaxElement::Node(self.label.into_syntax())), + Some(SyntaxElement::Token(self.r_brack_token)), + Some(SyntaxElement::Token(self.colon_token)), + Some(SyntaxElement::Node(self.destination.into_syntax())), + self.title + .map(|token| SyntaxElement::Node(token.into_syntax())), + ], + )) + } +} +pub fn md_link_title(content: MdInlineItemList) -> MdLinkTitle { + MdLinkTitle::unwrap_cast(SyntaxNode::new_detached( + MarkdownSyntaxKind::MD_LINK_TITLE, + [Some(SyntaxElement::Node(content.into_syntax()))], + )) +} +pub fn md_newline(value_token: SyntaxToken) -> MdNewline { + MdNewline::unwrap_cast(SyntaxNode::new_detached( + MarkdownSyntaxKind::MD_NEWLINE, + [Some(SyntaxElement::Token(value_token))], + )) +} +pub fn md_ordered_list_item(md_bullet_list: MdBulletList) -> MdOrderedListItem { + MdOrderedListItem::unwrap_cast(SyntaxNode::new_detached( + MarkdownSyntaxKind::MD_ORDERED_LIST_ITEM, [Some(SyntaxElement::Node(md_bullet_list.into_syntax()))], )) } -pub fn md_paragraph(list: MdInlineItemList, hard_line: MdHardLine) -> MdParagraph { - MdParagraph::unwrap_cast(SyntaxNode::new_detached( - MarkdownSyntaxKind::MD_PARAGRAPH, +pub fn md_paragraph(list: MdInlineItemList) -> MdParagraphBuilder { + MdParagraphBuilder { + list, + hard_line: None, + } +} +pub struct MdParagraphBuilder { + list: MdInlineItemList, + hard_line: Option, +} +impl MdParagraphBuilder { + pub fn with_hard_line(mut self, hard_line: MdHardLine) -> Self { + self.hard_line = Some(hard_line); + self + } + pub fn build(self) -> MdParagraph { + MdParagraph::unwrap_cast(SyntaxNode::new_detached( + MarkdownSyntaxKind::MD_PARAGRAPH, + [ + Some(SyntaxElement::Node(self.list.into_syntax())), + self.hard_line + .map(|token| SyntaxElement::Node(token.into_syntax())), + ], + )) + } +} +pub fn md_quote(marker_token: SyntaxToken, content: MdBlockList) -> MdQuote { + MdQuote::unwrap_cast(SyntaxNode::new_detached( + MarkdownSyntaxKind::MD_QUOTE, [ - Some(SyntaxElement::Node(list.into_syntax())), - Some(SyntaxElement::Node(hard_line.into_syntax())), + Some(SyntaxElement::Token(marker_token)), + Some(SyntaxElement::Node(content.into_syntax())), ], )) } -pub fn md_quote(any_md_block: AnyMdBlock) -> MdQuote { - MdQuote::unwrap_cast(SyntaxNode::new_detached( - MarkdownSyntaxKind::MD_QUOTE, - [Some(SyntaxElement::Node(any_md_block.into_syntax()))], +pub fn md_reference_image( + excl_token: SyntaxToken, + l_brack_token: SyntaxToken, + alt: MdInlineItemList, + r_brack_token: SyntaxToken, +) -> MdReferenceImageBuilder { + MdReferenceImageBuilder { + excl_token, + l_brack_token, + alt, + r_brack_token, + label: None, + } +} +pub struct MdReferenceImageBuilder { + excl_token: SyntaxToken, + l_brack_token: SyntaxToken, + alt: MdInlineItemList, + r_brack_token: SyntaxToken, + label: Option, +} +impl MdReferenceImageBuilder { + pub fn with_label(mut self, label: MdReferenceLinkLabel) -> Self { + self.label = Some(label); + self + } + pub fn build(self) -> MdReferenceImage { + MdReferenceImage::unwrap_cast(SyntaxNode::new_detached( + MarkdownSyntaxKind::MD_REFERENCE_IMAGE, + [ + Some(SyntaxElement::Token(self.excl_token)), + Some(SyntaxElement::Token(self.l_brack_token)), + Some(SyntaxElement::Node(self.alt.into_syntax())), + Some(SyntaxElement::Token(self.r_brack_token)), + self.label + .map(|token| SyntaxElement::Node(token.into_syntax())), + ], + )) + } +} +pub fn md_reference_link( + l_brack_token: SyntaxToken, + text: MdInlineItemList, + r_brack_token: SyntaxToken, +) -> MdReferenceLinkBuilder { + MdReferenceLinkBuilder { + l_brack_token, + text, + r_brack_token, + label: None, + } +} +pub struct MdReferenceLinkBuilder { + l_brack_token: SyntaxToken, + text: MdInlineItemList, + r_brack_token: SyntaxToken, + label: Option, +} +impl MdReferenceLinkBuilder { + pub fn with_label(mut self, label: MdReferenceLinkLabel) -> Self { + self.label = Some(label); + self + } + pub fn build(self) -> MdReferenceLink { + MdReferenceLink::unwrap_cast(SyntaxNode::new_detached( + MarkdownSyntaxKind::MD_REFERENCE_LINK, + [ + Some(SyntaxElement::Token(self.l_brack_token)), + Some(SyntaxElement::Node(self.text.into_syntax())), + Some(SyntaxElement::Token(self.r_brack_token)), + self.label + .map(|token| SyntaxElement::Node(token.into_syntax())), + ], + )) + } +} +pub fn md_reference_link_label( + l_brack_token: SyntaxToken, + label: MdInlineItemList, + r_brack_token: SyntaxToken, +) -> MdReferenceLinkLabel { + MdReferenceLinkLabel::unwrap_cast(SyntaxNode::new_detached( + MarkdownSyntaxKind::MD_REFERENCE_LINK_LABEL, + [ + Some(SyntaxElement::Token(l_brack_token)), + Some(SyntaxElement::Node(label.into_syntax())), + Some(SyntaxElement::Token(r_brack_token)), + ], )) } -pub fn md_setext_header(md_paragraph: MdParagraph) -> MdSetextHeader { +pub fn md_setext_header(content: MdInlineItemList, underline_token: SyntaxToken) -> MdSetextHeader { MdSetextHeader::unwrap_cast(SyntaxNode::new_detached( MarkdownSyntaxKind::MD_SETEXT_HEADER, - [Some(SyntaxElement::Node(md_paragraph.into_syntax()))], + [ + Some(SyntaxElement::Node(content.into_syntax())), + Some(SyntaxElement::Token(underline_token)), + ], )) } pub fn md_soft_break(value_token: SyntaxToken) -> MdSoftBreak { @@ -388,25 +571,16 @@ where .map(|item| Some(item.into_syntax().into())), )) } -pub fn md_code_name_list(items: I, separators: S) -> MdCodeNameList +pub fn md_code_name_list(items: I) -> MdCodeNameList where I: IntoIterator, I::IntoIter: ExactSizeIterator, - S: IntoIterator, - S::IntoIter: ExactSizeIterator, { - let mut items = items.into_iter(); - let mut separators = separators.into_iter(); - let length = items.len() + separators.len(); MdCodeNameList::unwrap_cast(SyntaxNode::new_detached( MarkdownSyntaxKind::MD_CODE_NAME_LIST, - (0..length).map(|index| { - if index % 2 == 0 { - Some(items.next()?.into_syntax().into()) - } else { - Some(separators.next()?.into()) - } - }), + items + .into_iter() + .map(|item| Some(item.into_syntax().into())), )) } pub fn md_hash_list(items: I) -> MdHashList @@ -421,18 +595,6 @@ where .map(|item| Some(item.into_syntax().into())), )) } -pub fn md_indented_code_line_list(items: I) -> MdIndentedCodeLineList -where - I: IntoIterator, - I::IntoIter: ExactSizeIterator, -{ - MdIndentedCodeLineList::unwrap_cast(SyntaxNode::new_detached( - MarkdownSyntaxKind::MD_INDENTED_CODE_LINE_LIST, - items - .into_iter() - .map(|item| Some(item.into_syntax().into())), - )) -} pub fn md_inline_item_list(items: I) -> MdInlineItemList where I: IntoIterator, @@ -445,18 +607,6 @@ where .map(|item| Some(item.into_syntax().into())), )) } -pub fn md_order_list(items: I) -> MdOrderList -where - I: IntoIterator, - I::IntoIter: ExactSizeIterator, -{ - MdOrderList::unwrap_cast(SyntaxNode::new_detached( - MarkdownSyntaxKind::MD_ORDER_LIST, - items - .into_iter() - .map(|item| Some(item.into_syntax().into())), - )) -} pub fn md_bogus(slots: I) -> MdBogus where I: IntoIterator>, diff --git a/crates/biome_markdown_factory/src/generated/syntax_factory.rs b/crates/biome_markdown_factory/src/generated/syntax_factory.rs index 8a36d836e34b..0eb1dbd122a6 100644 --- a/crates/biome_markdown_factory/src/generated/syntax_factory.rs +++ b/crates/biome_markdown_factory/src/generated/syntax_factory.rs @@ -15,26 +15,55 @@ impl SyntaxFactory for MarkdownSyntaxFactory { ) -> RawSyntaxNode { match kind { MD_BOGUS => RawSyntaxNode::new(kind, children.into_iter().map(Some)), - MD_BULLET => { + MD_AUTOLINK => { let mut elements = (&children).into_iter(); let mut slots: RawNodeSlots<3usize> = RawNodeSlots::default(); let mut current_element = elements.next(); if let Some(element) = ¤t_element - && matches!(element.kind(), T ! [-] | T ! [*]) + && element.kind() == T ! [<] { slots.mark_present(); current_element = elements.next(); } slots.next_slot(); if let Some(element) = ¤t_element - && element.kind() == MD_TEXTUAL_LITERAL + && MdInlineItemList::can_cast(element.kind()) { slots.mark_present(); current_element = elements.next(); } slots.next_slot(); if let Some(element) = ¤t_element - && MdInlineItemList::can_cast(element.kind()) + && element.kind() == T ! [>] + { + slots.mark_present(); + current_element = elements.next(); + } + slots.next_slot(); + if current_element.is_some() { + return RawSyntaxNode::new( + MD_AUTOLINK.to_bogus(), + children.into_iter().map(Some), + ); + } + slots.into_node(MD_AUTOLINK, children) + } + MD_BULLET => { + let mut elements = (&children).into_iter(); + let mut slots: RawNodeSlots<2usize> = RawNodeSlots::default(); + let mut current_element = elements.next(); + if let Some(element) = ¤t_element + && matches!( + element.kind(), + T ! [-] | T ! [*] | T ! [+] | MD_ORDERED_LIST_MARKER + ) + { + slots.mark_present(); + current_element = elements.next(); + } + slots.next_slot(); + if let Some(element) = ¤t_element + && MdBlockList::can_cast(element.kind()) { slots.mark_present(); current_element = elements.next(); @@ -100,47 +129,52 @@ impl SyntaxFactory for MarkdownSyntaxFactory { } slots.into_node(MD_DOCUMENT, children) } - MD_FENCED_CODE_BLOCK => { + MD_ENTITY_REFERENCE => { let mut elements = (&children).into_iter(); - let mut slots: RawNodeSlots<6usize> = RawNodeSlots::default(); + let mut slots: RawNodeSlots<1usize> = RawNodeSlots::default(); let mut current_element = elements.next(); if let Some(element) = ¤t_element - && element.kind() == T!["```"] + && element.kind() == MD_ENTITY_LITERAL { slots.mark_present(); current_element = elements.next(); } slots.next_slot(); - if let Some(element) = ¤t_element - && MdCodeNameList::can_cast(element.kind()) - { - slots.mark_present(); - current_element = elements.next(); + if current_element.is_some() { + return RawSyntaxNode::new( + MD_ENTITY_REFERENCE.to_bogus(), + children.into_iter().map(Some), + ); } - slots.next_slot(); + slots.into_node(MD_ENTITY_REFERENCE, children) + } + MD_FENCED_CODE_BLOCK => { + let mut elements = (&children).into_iter(); + let mut slots: RawNodeSlots<4usize> = RawNodeSlots::default(); + let mut current_element = elements.next(); if let Some(element) = ¤t_element - && MdHardLine::can_cast(element.kind()) + && matches!(element.kind(), T!["```"] | T ! [~~~]) { slots.mark_present(); current_element = elements.next(); } slots.next_slot(); if let Some(element) = ¤t_element - && MdTextual::can_cast(element.kind()) + && MdCodeNameList::can_cast(element.kind()) { slots.mark_present(); current_element = elements.next(); } slots.next_slot(); if let Some(element) = ¤t_element - && MdHardLine::can_cast(element.kind()) + && MdInlineItemList::can_cast(element.kind()) { slots.mark_present(); current_element = elements.next(); } slots.next_slot(); if let Some(element) = ¤t_element - && element.kind() == T!["```"] + && matches!(element.kind(), T!["```"] | T ! [~~~]) { slots.mark_present(); current_element = elements.next(); @@ -227,7 +261,7 @@ impl SyntaxFactory for MarkdownSyntaxFactory { let mut slots: RawNodeSlots<1usize> = RawNodeSlots::default(); let mut current_element = elements.next(); if let Some(element) = ¤t_element - && MdTextual::can_cast(element.kind()) + && MdInlineItemList::can_cast(element.kind()) { slots.mark_present(); current_element = elements.next(); @@ -265,7 +299,7 @@ impl SyntaxFactory for MarkdownSyntaxFactory { let mut slots: RawNodeSlots<1usize> = RawNodeSlots::default(); let mut current_element = elements.next(); if let Some(element) = ¤t_element - && MdIndentedCodeLineList::can_cast(element.kind()) + && MdInlineItemList::can_cast(element.kind()) { slots.mark_present(); current_element = elements.next(); @@ -279,32 +313,6 @@ impl SyntaxFactory for MarkdownSyntaxFactory { } slots.into_node(MD_INDENT_CODE_BLOCK, children) } - MD_INDENTED_CODE_LINE => { - let mut elements = (&children).into_iter(); - let mut slots: RawNodeSlots<2usize> = RawNodeSlots::default(); - let mut current_element = elements.next(); - if let Some(element) = ¤t_element - && MdIndent::can_cast(element.kind()) - { - slots.mark_present(); - current_element = elements.next(); - } - slots.next_slot(); - if let Some(element) = ¤t_element - && MdTextual::can_cast(element.kind()) - { - slots.mark_present(); - current_element = elements.next(); - } - slots.next_slot(); - if current_element.is_some() { - return RawSyntaxNode::new( - MD_INDENTED_CODE_LINE.to_bogus(), - children.into_iter().map(Some), - ); - } - slots.into_node(MD_INDENTED_CODE_LINE, children) - } MD_INLINE_CODE => { let mut elements = (&children).into_iter(); let mut slots: RawNodeSlots<3usize> = RawNodeSlots::default(); @@ -371,17 +379,29 @@ impl SyntaxFactory for MarkdownSyntaxFactory { } slots.into_node(MD_INLINE_EMPHASIS, children) } - MD_INLINE_IMAGE => { + MD_INLINE_HTML => { let mut elements = (&children).into_iter(); - let mut slots: RawNodeSlots<6usize> = RawNodeSlots::default(); + let mut slots: RawNodeSlots<1usize> = RawNodeSlots::default(); let mut current_element = elements.next(); if let Some(element) = ¤t_element - && element.kind() == T!['['] + && MdInlineItemList::can_cast(element.kind()) { slots.mark_present(); current_element = elements.next(); } slots.next_slot(); + if current_element.is_some() { + return RawSyntaxNode::new( + MD_INLINE_HTML.to_bogus(), + children.into_iter().map(Some), + ); + } + slots.into_node(MD_INLINE_HTML, children) + } + MD_INLINE_IMAGE => { + let mut elements = (&children).into_iter(); + let mut slots: RawNodeSlots<8usize> = RawNodeSlots::default(); + let mut current_element = elements.next(); if let Some(element) = ¤t_element && element.kind() == T![!] { @@ -390,14 +410,14 @@ impl SyntaxFactory for MarkdownSyntaxFactory { } slots.next_slot(); if let Some(element) = ¤t_element - && MdInlineImageAlt::can_cast(element.kind()) + && element.kind() == T!['['] { slots.mark_present(); current_element = elements.next(); } slots.next_slot(); if let Some(element) = ¤t_element - && MdInlineImageSource::can_cast(element.kind()) + && MdInlineItemList::can_cast(element.kind()) { slots.mark_present(); current_element = elements.next(); @@ -411,7 +431,28 @@ impl SyntaxFactory for MarkdownSyntaxFactory { } slots.next_slot(); if let Some(element) = ¤t_element - && MdInlineImageLink::can_cast(element.kind()) + && element.kind() == T!['('] + { + slots.mark_present(); + current_element = elements.next(); + } + slots.next_slot(); + if let Some(element) = ¤t_element + && MdInlineItemList::can_cast(element.kind()) + { + slots.mark_present(); + current_element = elements.next(); + } + slots.next_slot(); + if let Some(element) = ¤t_element + && MdLinkTitle::can_cast(element.kind()) + { + slots.mark_present(); + current_element = elements.next(); + } + slots.next_slot(); + if let Some(element) = ¤t_element + && element.kind() == T![')'] { slots.mark_present(); current_element = elements.next(); @@ -425,12 +466,12 @@ impl SyntaxFactory for MarkdownSyntaxFactory { } slots.into_node(MD_INLINE_IMAGE, children) } - MD_INLINE_IMAGE_ALT => { + MD_INLINE_ITALIC => { let mut elements = (&children).into_iter(); let mut slots: RawNodeSlots<3usize> = RawNodeSlots::default(); let mut current_element = elements.next(); if let Some(element) = ¤t_element - && element.kind() == T!['['] + && matches!(element.kind(), T ! [*] | T!["_"]) { slots.mark_present(); current_element = elements.next(); @@ -444,7 +485,7 @@ impl SyntaxFactory for MarkdownSyntaxFactory { } slots.next_slot(); if let Some(element) = ¤t_element - && element.kind() == T![']'] + && matches!(element.kind(), T ! [*] | T!["_"]) { slots.mark_present(); current_element = elements.next(); @@ -452,16 +493,37 @@ impl SyntaxFactory for MarkdownSyntaxFactory { slots.next_slot(); if current_element.is_some() { return RawSyntaxNode::new( - MD_INLINE_IMAGE_ALT.to_bogus(), + MD_INLINE_ITALIC.to_bogus(), children.into_iter().map(Some), ); } - slots.into_node(MD_INLINE_IMAGE_ALT, children) + slots.into_node(MD_INLINE_ITALIC, children) } - MD_INLINE_IMAGE_LINK => { + MD_INLINE_LINK => { let mut elements = (&children).into_iter(); - let mut slots: RawNodeSlots<3usize> = RawNodeSlots::default(); + let mut slots: RawNodeSlots<7usize> = RawNodeSlots::default(); let mut current_element = elements.next(); + if let Some(element) = ¤t_element + && element.kind() == T!['['] + { + slots.mark_present(); + current_element = elements.next(); + } + slots.next_slot(); + if let Some(element) = ¤t_element + && MdInlineItemList::can_cast(element.kind()) + { + slots.mark_present(); + current_element = elements.next(); + } + slots.next_slot(); + if let Some(element) = ¤t_element + && element.kind() == T![']'] + { + slots.mark_present(); + current_element = elements.next(); + } + slots.next_slot(); if let Some(element) = ¤t_element && element.kind() == T!['('] { @@ -476,6 +538,13 @@ impl SyntaxFactory for MarkdownSyntaxFactory { current_element = elements.next(); } slots.next_slot(); + if let Some(element) = ¤t_element + && MdLinkTitle::can_cast(element.kind()) + { + slots.mark_present(); + current_element = elements.next(); + } + slots.next_slot(); if let Some(element) = ¤t_element && element.kind() == T![')'] { @@ -485,32 +554,32 @@ impl SyntaxFactory for MarkdownSyntaxFactory { slots.next_slot(); if current_element.is_some() { return RawSyntaxNode::new( - MD_INLINE_IMAGE_LINK.to_bogus(), + MD_INLINE_LINK.to_bogus(), children.into_iter().map(Some), ); } - slots.into_node(MD_INLINE_IMAGE_LINK, children) + slots.into_node(MD_INLINE_LINK, children) } - MD_INLINE_IMAGE_SOURCE => { + MD_LINK_BLOCK => { let mut elements = (&children).into_iter(); let mut slots: RawNodeSlots<3usize> = RawNodeSlots::default(); let mut current_element = elements.next(); if let Some(element) = ¤t_element - && element.kind() == T!['('] + && MdTextual::can_cast(element.kind()) { slots.mark_present(); current_element = elements.next(); } slots.next_slot(); if let Some(element) = ¤t_element - && MdInlineItemList::can_cast(element.kind()) + && MdTextual::can_cast(element.kind()) { slots.mark_present(); current_element = elements.next(); } slots.next_slot(); if let Some(element) = ¤t_element - && element.kind() == T![')'] + && MdTextual::can_cast(element.kind()) { slots.mark_present(); current_element = elements.next(); @@ -518,23 +587,16 @@ impl SyntaxFactory for MarkdownSyntaxFactory { slots.next_slot(); if current_element.is_some() { return RawSyntaxNode::new( - MD_INLINE_IMAGE_SOURCE.to_bogus(), + MD_LINK_BLOCK.to_bogus(), children.into_iter().map(Some), ); } - slots.into_node(MD_INLINE_IMAGE_SOURCE, children) + slots.into_node(MD_LINK_BLOCK, children) } - MD_INLINE_ITALIC => { + MD_LINK_DESTINATION => { let mut elements = (&children).into_iter(); - let mut slots: RawNodeSlots<3usize> = RawNodeSlots::default(); + let mut slots: RawNodeSlots<1usize> = RawNodeSlots::default(); let mut current_element = elements.next(); - if let Some(element) = ¤t_element - && matches!(element.kind(), T ! [*] | T!["_"]) - { - slots.mark_present(); - current_element = elements.next(); - } - slots.next_slot(); if let Some(element) = ¤t_element && MdInlineItemList::can_cast(element.kind()) { @@ -542,8 +604,20 @@ impl SyntaxFactory for MarkdownSyntaxFactory { current_element = elements.next(); } slots.next_slot(); + if current_element.is_some() { + return RawSyntaxNode::new( + MD_LINK_DESTINATION.to_bogus(), + children.into_iter().map(Some), + ); + } + slots.into_node(MD_LINK_DESTINATION, children) + } + MD_LINK_LABEL => { + let mut elements = (&children).into_iter(); + let mut slots: RawNodeSlots<1usize> = RawNodeSlots::default(); + let mut current_element = elements.next(); if let Some(element) = ¤t_element - && matches!(element.kind(), T ! [*] | T!["_"]) + && MdInlineItemList::can_cast(element.kind()) { slots.mark_present(); current_element = elements.next(); @@ -551,13 +625,13 @@ impl SyntaxFactory for MarkdownSyntaxFactory { slots.next_slot(); if current_element.is_some() { return RawSyntaxNode::new( - MD_INLINE_ITALIC.to_bogus(), + MD_LINK_LABEL.to_bogus(), children.into_iter().map(Some), ); } - slots.into_node(MD_INLINE_ITALIC, children) + slots.into_node(MD_LINK_LABEL, children) } - MD_INLINE_LINK => { + MD_LINK_REFERENCE_DEFINITION => { let mut elements = (&children).into_iter(); let mut slots: RawNodeSlots<6usize> = RawNodeSlots::default(); let mut current_element = elements.next(); @@ -569,7 +643,7 @@ impl SyntaxFactory for MarkdownSyntaxFactory { } slots.next_slot(); if let Some(element) = ¤t_element - && MdInlineItemList::can_cast(element.kind()) + && MdLinkLabel::can_cast(element.kind()) { slots.mark_present(); current_element = elements.next(); @@ -583,21 +657,21 @@ impl SyntaxFactory for MarkdownSyntaxFactory { } slots.next_slot(); if let Some(element) = ¤t_element - && element.kind() == T!['('] + && element.kind() == T ! [:] { slots.mark_present(); current_element = elements.next(); } slots.next_slot(); if let Some(element) = ¤t_element - && MdInlineItemList::can_cast(element.kind()) + && MdLinkDestination::can_cast(element.kind()) { slots.mark_present(); current_element = elements.next(); } slots.next_slot(); if let Some(element) = ¤t_element - && element.kind() == T![')'] + && MdLinkTitle::can_cast(element.kind()) { slots.mark_present(); current_element = elements.next(); @@ -605,32 +679,37 @@ impl SyntaxFactory for MarkdownSyntaxFactory { slots.next_slot(); if current_element.is_some() { return RawSyntaxNode::new( - MD_INLINE_LINK.to_bogus(), + MD_LINK_REFERENCE_DEFINITION.to_bogus(), children.into_iter().map(Some), ); } - slots.into_node(MD_INLINE_LINK, children) + slots.into_node(MD_LINK_REFERENCE_DEFINITION, children) } - MD_LINK_BLOCK => { + MD_LINK_TITLE => { let mut elements = (&children).into_iter(); - let mut slots: RawNodeSlots<3usize> = RawNodeSlots::default(); + let mut slots: RawNodeSlots<1usize> = RawNodeSlots::default(); let mut current_element = elements.next(); if let Some(element) = ¤t_element - && MdTextual::can_cast(element.kind()) + && MdInlineItemList::can_cast(element.kind()) { slots.mark_present(); current_element = elements.next(); } slots.next_slot(); - if let Some(element) = ¤t_element - && MdTextual::can_cast(element.kind()) - { - slots.mark_present(); - current_element = elements.next(); + if current_element.is_some() { + return RawSyntaxNode::new( + MD_LINK_TITLE.to_bogus(), + children.into_iter().map(Some), + ); } - slots.next_slot(); + slots.into_node(MD_LINK_TITLE, children) + } + MD_NEWLINE => { + let mut elements = (&children).into_iter(); + let mut slots: RawNodeSlots<1usize> = RawNodeSlots::default(); + let mut current_element = elements.next(); if let Some(element) = ¤t_element - && MdTextual::can_cast(element.kind()) + && element.kind() == NEWLINE { slots.mark_present(); current_element = elements.next(); @@ -638,13 +717,13 @@ impl SyntaxFactory for MarkdownSyntaxFactory { slots.next_slot(); if current_element.is_some() { return RawSyntaxNode::new( - MD_LINK_BLOCK.to_bogus(), + MD_NEWLINE.to_bogus(), children.into_iter().map(Some), ); } - slots.into_node(MD_LINK_BLOCK, children) + slots.into_node(MD_NEWLINE, children) } - MD_ORDER_LIST_ITEM => { + MD_ORDERED_LIST_ITEM => { let mut elements = (&children).into_iter(); let mut slots: RawNodeSlots<1usize> = RawNodeSlots::default(); let mut current_element = elements.next(); @@ -657,11 +736,11 @@ impl SyntaxFactory for MarkdownSyntaxFactory { slots.next_slot(); if current_element.is_some() { return RawSyntaxNode::new( - MD_ORDER_LIST_ITEM.to_bogus(), + MD_ORDERED_LIST_ITEM.to_bogus(), children.into_iter().map(Some), ); } - slots.into_node(MD_ORDER_LIST_ITEM, children) + slots.into_node(MD_ORDERED_LIST_ITEM, children) } MD_PARAGRAPH => { let mut elements = (&children).into_iter(); @@ -691,10 +770,17 @@ impl SyntaxFactory for MarkdownSyntaxFactory { } MD_QUOTE => { let mut elements = (&children).into_iter(); - let mut slots: RawNodeSlots<1usize> = RawNodeSlots::default(); + let mut slots: RawNodeSlots<2usize> = RawNodeSlots::default(); let mut current_element = elements.next(); if let Some(element) = ¤t_element - && AnyMdBlock::can_cast(element.kind()) + && element.kind() == T ! [>] + { + slots.mark_present(); + current_element = elements.next(); + } + slots.next_slot(); + if let Some(element) = ¤t_element + && MdBlockList::can_cast(element.kind()) { slots.mark_present(); current_element = elements.next(); @@ -705,12 +791,139 @@ impl SyntaxFactory for MarkdownSyntaxFactory { } slots.into_node(MD_QUOTE, children) } + MD_REFERENCE_IMAGE => { + let mut elements = (&children).into_iter(); + let mut slots: RawNodeSlots<5usize> = RawNodeSlots::default(); + let mut current_element = elements.next(); + if let Some(element) = ¤t_element + && element.kind() == T![!] + { + slots.mark_present(); + current_element = elements.next(); + } + slots.next_slot(); + if let Some(element) = ¤t_element + && element.kind() == T!['['] + { + slots.mark_present(); + current_element = elements.next(); + } + slots.next_slot(); + if let Some(element) = ¤t_element + && MdInlineItemList::can_cast(element.kind()) + { + slots.mark_present(); + current_element = elements.next(); + } + slots.next_slot(); + if let Some(element) = ¤t_element + && element.kind() == T![']'] + { + slots.mark_present(); + current_element = elements.next(); + } + slots.next_slot(); + if let Some(element) = ¤t_element + && MdReferenceLinkLabel::can_cast(element.kind()) + { + slots.mark_present(); + current_element = elements.next(); + } + slots.next_slot(); + if current_element.is_some() { + return RawSyntaxNode::new( + MD_REFERENCE_IMAGE.to_bogus(), + children.into_iter().map(Some), + ); + } + slots.into_node(MD_REFERENCE_IMAGE, children) + } + MD_REFERENCE_LINK => { + let mut elements = (&children).into_iter(); + let mut slots: RawNodeSlots<4usize> = RawNodeSlots::default(); + let mut current_element = elements.next(); + if let Some(element) = ¤t_element + && element.kind() == T!['['] + { + slots.mark_present(); + current_element = elements.next(); + } + slots.next_slot(); + if let Some(element) = ¤t_element + && MdInlineItemList::can_cast(element.kind()) + { + slots.mark_present(); + current_element = elements.next(); + } + slots.next_slot(); + if let Some(element) = ¤t_element + && element.kind() == T![']'] + { + slots.mark_present(); + current_element = elements.next(); + } + slots.next_slot(); + if let Some(element) = ¤t_element + && MdReferenceLinkLabel::can_cast(element.kind()) + { + slots.mark_present(); + current_element = elements.next(); + } + slots.next_slot(); + if current_element.is_some() { + return RawSyntaxNode::new( + MD_REFERENCE_LINK.to_bogus(), + children.into_iter().map(Some), + ); + } + slots.into_node(MD_REFERENCE_LINK, children) + } + MD_REFERENCE_LINK_LABEL => { + let mut elements = (&children).into_iter(); + let mut slots: RawNodeSlots<3usize> = RawNodeSlots::default(); + let mut current_element = elements.next(); + if let Some(element) = ¤t_element + && element.kind() == T!['['] + { + slots.mark_present(); + current_element = elements.next(); + } + slots.next_slot(); + if let Some(element) = ¤t_element + && MdInlineItemList::can_cast(element.kind()) + { + slots.mark_present(); + current_element = elements.next(); + } + slots.next_slot(); + if let Some(element) = ¤t_element + && element.kind() == T![']'] + { + slots.mark_present(); + current_element = elements.next(); + } + slots.next_slot(); + if current_element.is_some() { + return RawSyntaxNode::new( + MD_REFERENCE_LINK_LABEL.to_bogus(), + children.into_iter().map(Some), + ); + } + slots.into_node(MD_REFERENCE_LINK_LABEL, children) + } MD_SETEXT_HEADER => { let mut elements = (&children).into_iter(); - let mut slots: RawNodeSlots<1usize> = RawNodeSlots::default(); + let mut slots: RawNodeSlots<2usize> = RawNodeSlots::default(); let mut current_element = elements.next(); if let Some(element) = ¤t_element - && MdParagraph::can_cast(element.kind()) + && MdInlineItemList::can_cast(element.kind()) + { + slots.mark_present(); + current_element = elements.next(); + } + slots.next_slot(); + if let Some(element) = ¤t_element + && element.kind() == MD_SETEXT_UNDERLINE_LITERAL { slots.mark_present(); current_element = elements.next(); @@ -783,21 +996,11 @@ impl SyntaxFactory for MarkdownSyntaxFactory { } MD_BLOCK_LIST => Self::make_node_list_syntax(kind, children, AnyMdBlock::can_cast), MD_BULLET_LIST => Self::make_node_list_syntax(kind, children, MdBullet::can_cast), - MD_CODE_NAME_LIST => Self::make_separated_list_syntax( - kind, - children, - MdTextual::can_cast, - T ! [,], - false, - ), + MD_CODE_NAME_LIST => Self::make_node_list_syntax(kind, children, MdTextual::can_cast), MD_HASH_LIST => Self::make_node_list_syntax(kind, children, MdHash::can_cast), - MD_INDENTED_CODE_LINE_LIST => { - Self::make_node_list_syntax(kind, children, MdIndentedCodeLine::can_cast) - } MD_INLINE_ITEM_LIST => { Self::make_node_list_syntax(kind, children, AnyMdInline::can_cast) } - MD_ORDER_LIST => Self::make_node_list_syntax(kind, children, AnyCodeBlock::can_cast), _ => unreachable!("Is {:?} a token?", kind), } } diff --git a/crates/biome_markdown_parser/Cargo.toml b/crates/biome_markdown_parser/Cargo.toml index c0554874ed43..c4936ea721f3 100644 --- a/crates/biome_markdown_parser/Cargo.toml +++ b/crates/biome_markdown_parser/Cargo.toml @@ -15,6 +15,11 @@ publish = false [package.metadata.workspaces] independent = true +[features] +# Enables test utilities (to_html module) for CommonMark spec compliance testing. +# Not included in production builds to avoid unnecessary dependencies and code. +test_utils = ["dep:htmlize"] + [dependencies] biome_console = { workspace = true } biome_diagnostics = { workspace = true } @@ -26,12 +31,22 @@ biome_unicode_table = { workspace = true } tracing = { workspace = true } unicode-bom = { workspace = true } +# Optional dependency for test_utils feature (HTML rendering for spec tests) +htmlize = { version = "1.0.6", features = ["unescape"], optional = true } + [dev-dependencies] biome_test_utils = { path = "../biome_test_utils" } insta = { workspace = true } quickcheck = { workspace = true } quickcheck_macros = { workspace = true } +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } tests_macros = { path = "../tests_macros" } +# Self-dependency to enable test_utils for integration tests. +# Integration tests are compiled as separate crates and can only access public API, +# so we need to enable the feature here to make to_html available for spec tests. +biome_markdown_parser = { path = ".", features = ["test_utils"] } + [lints] workspace = true diff --git a/crates/biome_markdown_parser/src/lexer/mod.rs b/crates/biome_markdown_parser/src/lexer/mod.rs index 650e770b9aec..070ee2b29479 100644 --- a/crates/biome_markdown_parser/src/lexer/mod.rs +++ b/crates/biome_markdown_parser/src/lexer/mod.rs @@ -1,4 +1,4 @@ -//! An extremely fast, lookup table based, JSON lexer which yields SyntaxKind tokens used by the rome-json parser. +//! An extremely fast, lookup table based, Markdown lexer which yields SyntaxKind tokens used by the biome-markdown parser. #[rustfmt::skip] mod tests; @@ -10,12 +10,35 @@ use biome_parser::lexer::{ LexContext, Lexer, LexerCheckpoint, LexerWithCheckpoint, ReLexer, TokenFlags, }; use biome_rowan::{SyntaxKind, TextSize}; -use biome_unicode_table::{Dispatch::*, lookup_byte}; - +use biome_unicode_table::Dispatch::{self, AMP, *}; +use biome_unicode_table::lookup_byte; + +/// Lexer context for different markdown parsing modes. +/// +/// Different contexts affect how the lexer tokenizes input: +/// - `Regular`: Normal markdown parsing with inline element detection +/// - `FencedCodeBlock`: Inside fenced code block, no markdown parsing +/// - `HtmlBlock`: Inside HTML block, minimal markdown parsing +/// - `LinkDefinition`: Inside link reference definition, whitespace separates tokens #[derive(Debug, Copy, Clone, Eq, PartialEq, Default)] pub enum MarkdownLexContext { + /// Normal markdown parsing with full inline element detection. #[default] Regular, + /// Inside a fenced code block - content is treated as raw text. + /// No markdown parsing occurs within fenced code blocks. + /// Reserved for context-aware lexing in fenced code blocks. + #[expect(dead_code)] + FencedCodeBlock, + /// Inside an HTML block - content is treated as raw HTML. + /// Minimal markdown parsing, primarily looking for block end conditions. + /// Reserved for context-aware lexing in HTML blocks. + #[expect(dead_code)] + HtmlBlock, + /// Inside a link reference definition (after `]:`). + /// In this context, whitespace is significant and separates destination from title. + /// Text tokens stop at whitespace to allow proper parsing. + LinkDefinition, } impl LexContext for MarkdownLexContext { @@ -26,11 +49,14 @@ impl LexContext for MarkdownLexContext { } /// Context in which the [MarkdownLexContext]'s current should be re-lexed. +/// Used for re-lexing scenarios where context changes how tokens are parsed. #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum MarkdownReLexContext { + /// Re-lex using regular markdown rules. #[expect(dead_code)] Regular, - // UnicodeRange, + /// Re-lex for link definition context where whitespace is significant. + LinkDefinition, } /// An extremely fast, lookup table based, lossless Markdown lexer @@ -60,6 +86,7 @@ pub(crate) struct MarkdownLexer<'src> { current_flags: TokenFlags, diagnostics: Vec, + force_ordered_list_marker: bool, } impl<'src> Lexer<'src> for MarkdownLexer<'src> { @@ -90,12 +117,12 @@ impl<'src> Lexer<'src> for MarkdownLexer<'src> { self.diagnostics.push(diagnostic); } - fn next_token(&mut self, _context: Self::LexContext) -> Self::Kind { + fn next_token(&mut self, context: Self::LexContext) -> Self::Kind { self.current_start = self.text_position(); self.current_flags = TokenFlags::empty(); let kind = match self.current_byte() { - Some(current) => self.consume_token(current), + Some(current) => self.consume_token(current, context), None => EOF, }; @@ -103,7 +130,15 @@ impl<'src> Lexer<'src> for MarkdownLexer<'src> { .set(TokenFlags::PRECEDING_LINE_BREAK, self.after_newline); self.current_kind = kind; - if !kind.is_trivia() { + // Reset after_newline for non-trivia tokens, except NEWLINE itself. + // NEWLINE sets after_newline=true in consume_newline() and we preserve that. + // This ensures the *next* token (after NEWLINE) has PRECEDING_LINE_BREAK set. + if !kind.is_trivia() + && kind != NEWLINE + && !(kind == MD_TEXTUAL_LITERAL + && self.after_newline + && self.current_text_is_whitespace()) + { self.after_newline = false; } @@ -166,30 +201,303 @@ impl<'src> MarkdownLexer<'src> { pub fn from_str(source: &'src str) -> Self { Self { source, - after_newline: false, + // Start of document is treated as start of line for indentation purposes + after_newline: true, unicode_bom_length: 0, current_kind: TOMBSTONE, current_start: TextSize::from(0), current_flags: TokenFlags::empty(), position: 0, diagnostics: vec![], + force_ordered_list_marker: false, } } - pub(crate) fn consume_token(&mut self, current: u8) -> MarkdownSyntaxKind { + pub fn set_force_ordered_list_marker(&mut self, value: bool) { + self.force_ordered_list_marker = value; + } + + pub(crate) fn consume_token( + &mut self, + current: u8, + context: MarkdownLexContext, + ) -> MarkdownSyntaxKind { let dispatched = lookup_byte(current); match dispatched { - WHS => self.consume_newline_or_whitespace(), - MUL | MIN | IDT => self.consume_thematic_break_literal(), - _ => self.consume_textual(), + // Whitespace handling depends on context: + // - At start of line (after_newline): whitespace is significant for indentation + // detection (e.g., 4+ spaces = code block), so emit as separate tokens + // - In middle of line: whitespace is just text content, include in textual token + // - Exception: 2+ spaces before newline is a hard line break + // - In LinkDefinition context: whitespace is always significant (separates destination from title) + WHS => { + if current == b'\n' || current == b'\r' { + self.consume_newline() + } else if matches!(context, MarkdownLexContext::LinkDefinition) { + // In link definition context, whitespace separates tokens. + // We consume it as textual literal so it's not treated as trivia by the parser. + self.consume_link_definition_whitespace() + } else if self.after_newline && matches!(current, b' ' | b'\t') { + // At line start, emit single whitespace tokens to allow + // indentation handling and quote marker spacing. + self.consume_single_whitespace_as_text() + } else if matches!(current, b' ' | b'\t') && self.is_after_block_quote_marker() { + // After a block quote marker, emit a single whitespace token + // so the parser can skip the optional space. + self.consume_single_whitespace_as_text() + } else if current == b' ' && self.is_potential_hard_line_break() { + // Handle hard line break (2+ spaces before newline) mid-line + self.consume_whitespace() + } else { + // Whitespace is part of text in Markdown. + self.consume_textual(context) + } + } + MUL | MIN | IDT => self.consume_thematic_break_or_emphasis(dispatched, context), + PLS => self.consume_byte(PLUS), + HAS => self.consume_hash(), + TPL => self.consume_backtick(), + TLD => self.consume_tilde(), + MOR => self.consume_byte(R_ANGLE), + LSS => self.consume_byte(L_ANGLE), + EXL => self.consume_byte(BANG), + BTO => self.consume_byte(L_BRACK), + BTC => self.consume_byte(R_BRACK), + PNO => self.consume_byte(L_PAREN), + PNC => self.consume_byte(R_PAREN), + COL => self.consume_byte(COLON), + AMP => self.consume_entity_or_textual(context), + BSL => self.consume_escape(), + // = at line start could be setext heading underline + EQL if self.after_newline => self.consume_setext_underline_or_textual(), + _ => { + // Check for ordered list markers: digits followed by . or ) at line start + if current.is_ascii_digit() + && (self.after_newline || self.force_ordered_list_marker) + { + self.consume_ordered_list_marker_or_textual() + } else { + self.consume_textual(context) + } + } + } + } + + /// Consume a backslash escape sequence. + /// + /// Per CommonMark spec: + /// - Backslash before ASCII punctuation makes it literal + /// - Backslash before newline is a hard line break + /// + /// Escapable: `!"#$%&'()*+,-./:;<=>?@[\]^_\`{|}~` + fn consume_escape(&mut self) -> MarkdownSyntaxKind { + self.assert_at_char_boundary(); + + // Consume the backslash + self.advance(1); + + // Check for hard line break: backslash followed by newline + if matches!(self.current_byte(), Some(b'\n' | b'\r')) { + match self.current_byte() { + Some(b'\n') => { + self.advance(1); + } + Some(b'\r') => { + if self.peek_byte() == Some(b'\n') { + self.advance(2); + } else { + self.advance(1); + } + } + _ => {} + } + self.after_newline = true; + return MD_HARD_LINE_LITERAL; + } + + // Check if next character is escapable ASCII punctuation + if let Some(next) = self.current_byte() + && matches!( + next, + b'!' | b'"' + | b'#' + | b'$' + | b'%' + | b'&' + | b'\'' + | b'(' + | b')' + | b'*' + | b'+' + | b',' + | b'-' + | b'.' + | b'/' + | b':' + | b';' + | b'<' + | b'=' + | b'>' + | b'?' + | b'@' + | b'[' + | b'\\' + | b']' + | b'^' + | b'_' + | b'`' + | b'{' + | b'|' + | b'}' + | b'~' + ) + { + // Consume the escaped character too + self.advance(1); + } + + MD_TEXTUAL_LITERAL + } + + /// Try to consume an entity or numeric character reference per CommonMark §6.2. + /// + /// Valid patterns: + /// - Named entity: `&name;` where name is 2-31 alphanumeric chars starting with letter + /// - Decimal numeric: `&#digits;` where digits is 1-7 decimal digits + /// - Hexadecimal: `&#xhex;` or `&#Xhex;` where hex is 1-6 hex digits + /// + /// If not valid, falls back to consuming as textual. + fn consume_entity_or_textual(&mut self, context: MarkdownLexContext) -> MarkdownSyntaxKind { + self.assert_at_char_boundary(); + debug_assert!(matches!(self.current_byte(), Some(b'&'))); + + // Try to match entity reference pattern + if let Some(entity_len) = self.match_entity_reference() { + self.advance(entity_len); + return MD_ENTITY_LITERAL; } + + // Not a valid entity - consume as textual + self.consume_textual(context) + } + + /// Check if text at current position matches a valid entity reference pattern. + /// Returns the length of the entity if valid, None otherwise. + /// + /// Patterns per CommonMark §6.2: + /// - Named: `&name;` where name is 2-31 alphanumeric chars starting with letter + /// - Decimal: `&#digits;` where digits is 1-7 decimal digits + /// - Hex: `&#xhex;` or `&#Xhex;` where hex is 1-6 hex digits + fn match_entity_reference(&self) -> Option { + // Must start with & + if self.byte_at(0) != Some(b'&') { + return None; + } + + let next = self.byte_at(1)?; + + if next == b'#' { + // Numeric character reference + self.match_numeric_entity() + } else if next.is_ascii_alphabetic() { + // Named entity reference + self.match_named_entity() + } else { + None + } + } + + /// Match a named entity reference: `&name;` + /// Name must be 2-31 alphanumeric chars starting with a letter. + fn match_named_entity(&self) -> Option { + self.match_entity_with(1, 2, 31, |byte, index| { + if index == 0 { + byte.is_ascii_alphabetic() + } else { + byte.is_ascii_alphanumeric() + } + }) + } + + /// Match a numeric entity reference: `&#digits;` or `&#xhex;` / `&#Xhex;` + fn match_numeric_entity(&self) -> Option { + // Position 0 is '&', position 1 is '#' + let next = self.byte_at(2)?; + + if next == b'x' || next == b'X' { + // Hexadecimal: &#xhex; or &#Xhex; + self.match_hex_entity() + } else if next.is_ascii_digit() { + // Decimal: &#digits; + self.match_decimal_entity() + } else { + None + } + } + + /// Match a decimal numeric entity: `&#digits;` (1-7 decimal digits) + fn match_decimal_entity(&self) -> Option { + self.match_entity_with(2, 1, 7, |byte, _| byte.is_ascii_digit()) + } + + /// Match a hexadecimal numeric entity: `&#xhex;` or `&#Xhex;` (1-6 hex digits) + fn match_hex_entity(&self) -> Option { + self.match_entity_with(3, 1, 6, |byte, _| byte.is_ascii_hexdigit()) + } + + fn match_entity_with( + &self, + start: usize, + min_len: usize, + max_len: usize, + is_valid: F, + ) -> Option + where + F: Fn(u8, usize) -> bool, + { + let mut i = start; + let mut count = 0usize; + + while let Some(byte) = self.byte_at(i) { + if byte == b';' { + if (min_len..=max_len).contains(&count) { + return Some(i + 1); + } + return None; + } + + if is_valid(byte, count) { + count += 1; + if count > max_len { + return None; + } + i += 1; + } else { + return None; + } + } + + None } fn text_position(&self) -> TextSize { TextSize::try_from(self.position).expect("Input to be smaller than 4 GB") } - /// Bumps the current byte and creates a lexed token of the passed in kind + /// Returns true if the current token text is only spaces or tabs. + fn current_text_is_whitespace(&self) -> bool { + let start = u32::from(self.current_start) as usize; + let end = self.position; + if start >= end || end > self.source.len() { + return false; + } + self.source.as_bytes()[start..end] + .iter() + .all(|b| *b == b' ' || *b == b'\t') + } + + /// Bumps the current byte and creates a lexed token of the passed in kind. + /// Reserved for single-byte token consumption patterns. #[expect(dead_code)] fn eat_byte(&mut self, tok: MarkdownSyntaxKind) -> MarkdownSyntaxKind { self.advance(1); @@ -210,19 +518,6 @@ impl<'src> MarkdownLexer<'src> { self.byte_at(1) } - /// Consume one newline or all whitespace until a non-whitespace or a newline is found. - /// - /// ## Safety - /// Must be called at a valid UT8 char boundary - fn consume_newline_or_whitespace(&mut self) -> MarkdownSyntaxKind { - match self.current_byte() { - Some(b'\n' | b'\r') => self.consume_newline(), - Some(b' ') => self.consume_whitespace(), - Some(b'\t') => self.consume_tab(), - _ => self.consume_textual(), - } - } - /// Consume just one newline/line break. /// /// ## Safety @@ -248,52 +543,385 @@ impl<'src> MarkdownLexer<'src> { } /// Consumes all whitespace until a non-whitespace or a newline is found. + /// If there are 2+ spaces followed by a newline, emits MD_HARD_LINE_LITERAL. /// /// ## Safety /// Must be called at a valid UT8 char boundary fn consume_whitespace(&mut self) -> MarkdownSyntaxKind { self.assert_at_char_boundary(); + + let mut space_count = 0; while let Some(b' ') = self.current_byte() { self.advance(1); + space_count += 1; + } + + // Check for hard line break: 2+ spaces followed by newline + if space_count >= 2 && matches!(self.current_byte(), Some(b'\n' | b'\r')) { + // Consume the newline as part of the hard line break + match self.current_byte() { + Some(b'\n') => { + self.advance(1); + } + Some(b'\r') => { + if self.peek_byte() == Some(b'\n') { + self.advance(2); + } else { + self.advance(1); + } + } + _ => {} + } + self.after_newline = true; + return MD_HARD_LINE_LITERAL; } WHITESPACE } - fn consume_tab(&mut self) -> MarkdownSyntaxKind { + /// Consumes whitespace in LinkDefinition context as textual literal. + /// This prevents it from being treated as trivia by the parser, which is critical + /// for correctly parsing link reference definitions where whitespace is a significant separator. + /// + /// ## Safety + /// Must be called at a valid UT8 char boundary + fn consume_link_definition_whitespace(&mut self) -> MarkdownSyntaxKind { self.assert_at_char_boundary(); - if matches!(self.current_byte(), Some(b'\t')) { - self.advance(1) + while let Some(byte) = self.current_byte() { + if byte == b' ' || byte == b'\t' { + self.advance(1); + } else { + break; + } } - TAB + + MD_TEXTUAL_LITERAL } - fn consume_thematic_break_literal(&mut self) -> MarkdownSyntaxKind { + /// Consume a single whitespace character at line start as text. + fn consume_single_whitespace_as_text(&mut self) -> MarkdownSyntaxKind { self.assert_at_char_boundary(); - let start_char = match self.current_byte() { - Some(b'-') => b'-', - Some(b'*') => b'*', - Some(b'_') => b'_', - _ => return self.consume_textual(), + if matches!(self.current_byte(), Some(b' ' | b'\t')) { + self.advance(1); + } + + MD_TEXTUAL_LITERAL + } + + /// Returns true if the current whitespace follows a block quote marker at line start. + fn is_after_block_quote_marker(&self) -> bool { + if self.position == 0 { + return false; + } + + let bytes = self.source.as_bytes(); + let prev_pos = self.position - 1; + if bytes.get(prev_pos) != Some(&b'>') { + return false; + } + + let before = &self.source[..prev_pos]; + let last_newline_pos = before.rfind(['\n', '\r']); + let line_start = match last_newline_pos { + Some(pos) => { + let before_bytes = before.as_bytes(); + if before_bytes.get(pos) == Some(&b'\r') + && before_bytes.get(pos + 1) == Some(&b'\n') + { + pos + 2 + } else { + pos + 1 + } + } + None => 0, + }; + + let prefix = &self.source[line_start..=prev_pos]; + + let mut chars = prefix.chars().peekable(); + let mut indent = 0usize; + + while let Some(&c) = chars.peek() { + if c == ' ' || c == '\t' { + indent += if c == '\t' { 4 } else { 1 }; + if indent > 3 { + return false; + } + chars.next(); + } else { + break; + } + } + + let mut saw_marker = false; + while let Some(c) = chars.next() { + if c == '>' { + saw_marker = true; + if let Some(&next) = chars.peek() + && (next == ' ' || next == '\t') + { + chars.next(); + } + } else { + return false; + } + } + + saw_marker + } + + /// Consumes thematic break literal, setext underline, or returns emphasis marker tokens. + /// Called when we see *, -, or _. + /// + /// For `-` at line start: + /// - 1-2 dashes followed by newline: setext underline (H2) + /// - 3+ dashes followed by newline: thematic break (not setext, since the parser + /// will convert thematic breaks to setext headers when preceded by paragraph) + fn consume_thematic_break_or_emphasis( + &mut self, + dispatched: Dispatch, + context: MarkdownLexContext, + ) -> MarkdownSyntaxKind { + self.assert_at_char_boundary(); + + let start_char = match dispatched { + MUL => b'*', + MIN => b'-', + IDT => { + // IDT can match letters (A-Z, a-z) or underscore + // Only underscore should be treated as emphasis marker + match self.current_byte() { + Some(b'_') => b'_', + _ => return self.consume_textual(context), + } + } + _ => return self.consume_textual(context), }; + // Save position to restore if not a thematic break + let start_position = self.position; + + // For `-` at line start with 1-2 dashes, emit setext underline. + // 3+ dashes could be thematic break, so let that logic handle it. + // The parser will convert thematic breaks to setext when preceded by paragraph. + if start_char == b'-' && self.after_newline { + let mut dash_count = 0; + // Consume only `-` characters (no spaces between) + while matches!(self.current_byte(), Some(b'-')) { + self.advance(1); + dash_count += 1; + } + // Allow trailing spaces/tabs + while matches!(self.current_byte(), Some(b' ' | b'\t')) { + self.advance(1); + } + // 1-2 dashes followed by newline/EOF is a setext underline + // 3+ dashes goes to thematic break logic below + if (1..=2).contains(&dash_count) + && matches!(self.current_byte(), Some(b'\n' | b'\r') | None) + { + return MD_SETEXT_UNDERLINE_LITERAL; + } + // Not a setext underline - restore and try thematic break + self.position = start_position; + } + let mut count = 0; loop { - self.consume_whitespace(); + // Count only the marker characters, skip whitespace if matches!(self.current_byte(), Some(ch) if ch == start_char) { self.advance(1); count += 1; + } else if matches!(self.current_byte(), Some(b' ' | b'\t')) { + self.advance(1); } else { break; } } - // until next newline or eof - if matches!(self.current_byte(), Some(b'\n' | b'\r') | None) && count >= 3 { + + // Check if this is a valid thematic break: 3+ of same char, only whitespace between, + // followed by newline or EOF, AND must be at line start (CommonMark requirement) + if self.after_newline + && count >= 3 + && matches!(self.current_byte(), Some(b'\n' | b'\r') | None) + { return MD_THEMATIC_BREAK_LITERAL; } - ERROR_TOKEN + + // Not a thematic break - restore position and consume as emphasis marker + self.position = start_position; + + // Check for double emphasis markers (**, __) + // Note: -- is not valid markdown emphasis, so we don't check for it + if start_char != b'-' && self.peek_byte() == Some(start_char) { + self.advance(2); + return match start_char { + b'*' => DOUBLE_STAR, + b'_' => DOUBLE_UNDERSCORE, + _ => unreachable!(), + }; + } + + // Single marker + self.advance(1); + match start_char { + b'*' => STAR, + b'_' => UNDERSCORE, + b'-' => MINUS, + _ => unreachable!(), + } + } + + /// Try to consume an ordered list marker (e.g., "1.", "2)", "10."). + /// Returns MD_ORDERED_LIST_MARKER if valid, otherwise falls back to textual. + /// Per CommonMark: 1-9 digits followed by `.` or `)` followed by whitespace. + fn consume_ordered_list_marker_or_textual(&mut self) -> MarkdownSyntaxKind { + self.assert_at_char_boundary(); + + let start_position = self.position; + let mut digit_count = 0; + + // Consume 1-9 digits + while let Some(byte) = self.current_byte() { + if byte.is_ascii_digit() { + self.advance(1); + digit_count += 1; + // CommonMark limits to 9 digits max + if digit_count > 9 { + // Too many digits, not a valid marker + self.position = start_position; + return self.consume_textual(MarkdownLexContext::Regular); + } + } else { + break; + } + } + + // Must have at least one digit + if digit_count == 0 { + self.position = start_position; + return self.consume_textual(MarkdownLexContext::Regular); + } + + // Must be followed by . or ) + let delimiter = self.current_byte(); + if !matches!(delimiter, Some(b'.' | b')')) { + self.position = start_position; + return self.consume_textual(MarkdownLexContext::Regular); + } + self.advance(1); + + // Must be followed by at least one space (or end of line for edge cases) + // Per CommonMark, a space is required but we also allow tab + if !matches!(self.current_byte(), Some(b' ' | b'\t') | None) + && !matches!(self.current_byte(), Some(b'\n' | b'\r')) + { + self.position = start_position; + return self.consume_textual(MarkdownLexContext::Regular); + } + + MD_ORDERED_LIST_MARKER + } + + /// Try to consume a setext heading underline (line of `=` characters). + /// Returns MD_SETEXT_UNDERLINE_LITERAL if valid, otherwise falls back to textual. + /// Per CommonMark: one or more `=` characters with optional spaces, nothing else on line. + fn consume_setext_underline_or_textual(&mut self) -> MarkdownSyntaxKind { + self.assert_at_char_boundary(); + + let start_position = self.position; + let mut eq_count = 0; + + // Consume all `=` and spaces + loop { + match self.current_byte() { + Some(b'=') => { + self.advance(1); + eq_count += 1; + } + Some(b' ') => { + self.advance(1); + } + _ => break, + } + } + + // Must have at least one `=` and be followed by newline or EOF + if eq_count >= 1 && matches!(self.current_byte(), Some(b'\n' | b'\r') | None) { + return MD_SETEXT_UNDERLINE_LITERAL; + } + + // Not a valid setext underline - restore position and consume as textual + self.position = start_position; + self.consume_textual(MarkdownLexContext::Regular) + } + + /// Consume hash character(s). + /// + /// Emits HASH tokens for ATX headers and trailing header markers. + fn consume_hash(&mut self) -> MarkdownSyntaxKind { + self.assert_at_char_boundary(); + + // In all other cases, emit HASH + // - At line start for ATX headers (# Header) + // - After other hashes for multi-level headers (### Header) + // - For trailing hashes (# Header #) + self.advance(1); + HASH + } + + /// Consume backtick(s). + /// + /// At line start with 3+ backticks: emits TRIPLE_BACKTICK for fenced code blocks. + /// Otherwise: emits BACKTICK containing all consecutive backticks (for inline code spans). + /// + /// This allows multi-backtick code spans like `` `code` `` where the parser + /// can determine backtick count from token text length. + fn consume_backtick(&mut self) -> MarkdownSyntaxKind { + self.assert_at_char_boundary(); + + // Count consecutive backticks + let mut count = 0; + while let Some(b'`') = self.byte_at(count) { + count += 1; + } + + // At line start with 3+ backticks: fenced code block + if self.after_newline && count >= 3 { + self.advance(count); + return TRIPLE_BACKTICK; + } + + // Otherwise: emit all consecutive backticks as a single BACKTICK token + // The parser can determine the count from token text length + self.advance(count); + BACKTICK + } + + /// Consume tilde(s) - either single for other uses or triple for fenced code blocks. + /// + /// At line start with 3+ tildes: emits TRIPLE_TILDE for fenced code blocks. + /// Otherwise: emits TILDE containing all consecutive tildes. + fn consume_tilde(&mut self) -> MarkdownSyntaxKind { + self.assert_at_char_boundary(); + + // Count consecutive tildes + let mut count = 0; + while let Some(b'~') = self.byte_at(count) { + count += 1; + } + + // At line start with 3+ tildes: fenced code block + if self.after_newline && count >= 3 { + self.advance(count); + return TRIPLE_TILDE; + } + + // Otherwise: emit all consecutive tildes as a single TILDE token + self.advance(count); + TILDE } /// Get the UTF8 char which starts at the current byte @@ -350,18 +978,210 @@ impl<'src> MarkdownLexer<'src> { self.position >= self.source.len() } + /// Consume consecutive textual characters until we hit a special markdown character. + /// This groups multiple characters into a single MD_TEXTUAL_LITERAL token for efficiency. + /// Spaces and tabs are included in the text token (treated as regular text content), + /// but newlines end the token since they have semantic meaning as block separators. + /// Also stops before trailing spaces that could form a hard line break (2+ spaces before newline). + /// In LinkDefinition context, stops at any whitespace to allow proper destination/title parsing. #[inline] - fn consume_textual(&mut self) -> MarkdownSyntaxKind { + fn consume_textual(&mut self, context: MarkdownLexContext) -> MarkdownSyntaxKind { self.assert_at_char_boundary(); + if self.force_ordered_list_marker + && let Some(byte) = self.current_byte() + && (byte == b' ' || byte == b'\t') + { + let mut idx = self.position; + while matches!(self.source.as_bytes().get(idx), Some(b' ' | b'\t')) { + idx += 1; + } + if self.is_ordered_list_marker_at(idx) { + self.advance(idx - self.position); + return MD_TEXTUAL_LITERAL; + } + } + + // Consume at least one character let char = self.current_char_unchecked(); self.advance(char.len_utf8()); + // Continue consuming characters until we hit a special markdown character + // or end of file. Special characters are those that could start inline elements + // or block structures: * - _ + # ` ~ > ! [ ] ( ) \ and newlines. + // Spaces and tabs are now included as regular text content (except in LinkDefinition context). + while let Some(byte) = self.current_byte() { + if matches!(context, MarkdownLexContext::LinkDefinition) + && (byte == b' ' || byte == b'\t') + { + break; + } + let dispatched = lookup_byte(byte); + match dispatched { + // Include spaces and tabs in text tokens, but check for hard line break pattern. + // In LinkDefinition context, stop at whitespace to separate destination from title. + WHS => { + if matches!(context, MarkdownLexContext::LinkDefinition) { + // In link definition context, whitespace ends the text token + break; + } else if byte == b' ' { + if self.is_at_trailing_hash_closing_whitespace() { + break; + } + // Look ahead to check if this could be the start of a hard line break + // (2+ spaces followed by newline) + if self.is_potential_hard_line_break() { + break; + } + self.advance(1); + } else if byte == b'\t' { + if self.is_at_trailing_hash_closing_whitespace() { + break; + } + self.advance(1); + } else { + // Stop at newlines (\n, \r) + break; + } + } + // Stop at characters that could be markdown syntax + MUL // * + | MIN // - + | PLS // + + | HAS // # + | TPL // ` + | TLD // ~ + | LSS // < + | MOR // > + | EXL // ! + | BTO // [ + | BTC // ] + | PNO // ( + | PNC // ) + | BSL // \ + | AMP // & (entity references) + => break, + // IDT includes A-Z, a-z, and _ - only _ is special for markdown + IDT => { + if byte == b'_' { + break; + } + self.advance_char_unchecked(); + } + // All other characters are regular text + _ => { + self.advance_char_unchecked(); + } + } + } + MD_TEXTUAL_LITERAL } + fn is_ordered_list_marker_at(&self, idx: usize) -> bool { + if idx >= self.source.len() { + return false; + } + + let bytes = self.source.as_bytes(); + let mut pos = idx; + let mut digit_count = 0; + + while pos < bytes.len() && bytes[pos].is_ascii_digit() { + digit_count += 1; + if digit_count > 9 { + return false; + } + pos += 1; + } + + if digit_count == 0 { + return false; + } + + if pos >= bytes.len() || !(bytes[pos] == b'.' || bytes[pos] == b')') { + return false; + } + pos += 1; + + if pos >= bytes.len() { + return true; + } + + matches!(bytes[pos], b' ' | b'\t' | b'\n' | b'\r') + } + + /// Returns true if the current whitespace is the start of a closing ATX hash sequence. + /// + /// Pattern: spaces/tabs + one or more `#` + optional spaces/tabs + newline/EOF. + fn is_at_trailing_hash_closing_whitespace(&self) -> bool { + let mut i = self.position; + let bytes = self.source.as_bytes(); + let len = bytes.len(); + + let mut saw_ws = false; + while i < len { + let b = bytes[i]; + if b == b' ' || b == b'\t' { + saw_ws = true; + i += 1; + } else { + break; + } + } + + if !saw_ws { + return false; + } + + if i >= len || bytes[i] != b'#' { + return false; + } + + while i < len && bytes[i] == b'#' { + i += 1; + } + + while i < len { + let b = bytes[i]; + if b == b' ' || b == b'\t' { + i += 1; + } else { + break; + } + } + + if i >= len { + return true; + } + + matches!(bytes[i], b'\n' | b'\r') + } + + /// Check if current position starts a potential hard line break pattern. + /// Returns true if there are 2+ spaces followed by a newline. + fn is_potential_hard_line_break(&self) -> bool { + // Must have at least one space at current position (already checked by caller) + let mut offset = 0; + let mut space_count = 0; + + // Count consecutive spaces + while let Some(b' ') = self.byte_at(offset) { + space_count += 1; + offset += 1; + } + + // Check if followed by newline + if space_count >= 2 + && let Some(next) = self.byte_at(offset) + { + return next == b'\n' || next == b'\r'; + } + + false + } + /// Bumps the current byte and creates a lexed token of the passed in kind - #[expect(dead_code)] fn consume_byte(&mut self, tok: MarkdownSyntaxKind) -> MarkdownSyntaxKind { self.advance(1); tok @@ -373,16 +1193,19 @@ impl<'src> ReLexer<'src> for MarkdownLexer<'src> { let old_position = self.position; self.position = u32::from(self.current_start) as usize; + let lex_context = match context { + MarkdownReLexContext::Regular => MarkdownLexContext::Regular, + MarkdownReLexContext::LinkDefinition => MarkdownLexContext::LinkDefinition, + }; + let re_lexed_kind = match self.current_byte() { - Some(current) => match context { - MarkdownReLexContext::Regular => self.consume_token(current), - // MarkdownReLexContext::UnicodeRange => self.consume_unicode_range_token(current), - }, + Some(current) => self.consume_token(current, lex_context), None => EOF, }; - if self.current() == re_lexed_kind { - // Didn't re-lex anything. Return existing token again + let new_position = self.position; + if self.current() == re_lexed_kind && new_position == old_position { + // Didn't re-lex anything. Return existing token again. self.position = old_position; } else { self.current_kind = re_lexed_kind; diff --git a/crates/biome_markdown_parser/src/lexer/tests.rs b/crates/biome_markdown_parser/src/lexer/tests.rs index 056ec8c5284b..835e6f077c16 100644 --- a/crates/biome_markdown_parser/src/lexer/tests.rs +++ b/crates/biome_markdown_parser/src/lexer/tests.rs @@ -109,7 +109,7 @@ fn empty() { #[test] fn textual() { assert_lex! { - "+", + "a", MD_TEXTUAL_LITERAL:1, } } @@ -128,7 +128,7 @@ fn new_line() { fn tab() { assert_lex! { "\t", - TAB:1, + MD_TEXTUAL_LITERAL:1, } } @@ -136,7 +136,7 @@ fn tab() { fn whitespace() { assert_lex! { " ", - WHITESPACE:1, + MD_TEXTUAL_LITERAL:1, } } @@ -162,3 +162,392 @@ _ _ _ _ _ "#, MD_THEMATIC_BREAK_LITERAL:11, } } + +#[test] +fn hash_token() { + // Single hash for ATX header + assert_lex! { + "#", + HASH:1, + } +} + +#[test] +fn multiple_hashes() { + // Multiple hashes for different header levels + assert_lex! { + "###", + HASH:1, + HASH:1, + HASH:1, + } +} + +#[test] +fn backtick_token() { + // Single backtick for inline code + assert_lex! { + "`", + BACKTICK:1, + } +} + +#[test] +fn triple_backtick() { + // Triple backtick for fenced code blocks + assert_lex! { + "```", + TRIPLE_BACKTICK:3, + } +} + +#[test] +fn tilde_token() { + // Single tilde + assert_lex! { + "~", + TILDE:1, + } +} + +#[test] +fn triple_tilde() { + // Triple tilde for fenced code blocks + assert_lex! { + "~~~", + TRIPLE_TILDE:3, + } +} + +#[test] +fn greater_than_token() { + // Greater than for block quotes + assert_lex! { + ">", + R_ANGLE:1, + } +} + +#[test] +fn greater_than_with_text() { + // Block quote with content - text is grouped into a single token + // Mid-line whitespace is now included in textual content, + // so " text" becomes a single token + assert_lex! { + "> text", + R_ANGLE:1, + MD_TEXTUAL_LITERAL:1, // optional space after '>' + MD_TEXTUAL_LITERAL:4, // "text" + } +} + +#[test] +fn plus_token() { + // Plus for bullet list marker + assert_lex! { + "+", + PLUS:1, + } +} + +#[test] +fn star_token_single() { + // Single star followed by space (not a thematic break) + // The trailing space is now included in textual content + // since it's not at the start of a line + assert_lex! { + "* ", + STAR:1, + MD_TEXTUAL_LITERAL:1, // trailing space as text + } +} + +#[test] +fn brackets() { + // Brackets for links - text is grouped into single tokens + assert_lex! { + "[text](url)", + L_BRACK:1, + MD_TEXTUAL_LITERAL:4, // "text" grouped + R_BRACK:1, + L_PAREN:1, + MD_TEXTUAL_LITERAL:3, // "url" grouped + R_PAREN:1, + } +} + +#[test] +fn bang_token() { + // Exclamation for images + assert_lex! { + "!", + BANG:1, + } +} + +#[test] +fn image_syntax() { + // Image syntax - text is grouped into single tokens + assert_lex! { + "![alt](src)", + BANG:1, + L_BRACK:1, + MD_TEXTUAL_LITERAL:3, // "alt" grouped + R_BRACK:1, + L_PAREN:1, + MD_TEXTUAL_LITERAL:3, // "src" grouped + R_PAREN:1, + } +} + +#[test] +fn star_and_underscore_emphasis() { + // Single star for emphasis - text is grouped + assert_lex! { + "*text*", + STAR:1, + MD_TEXTUAL_LITERAL:4, // "text" grouped + STAR:1, + } +} + +#[test] +fn double_star_emphasis() { + // Double star for strong emphasis - text is grouped + assert_lex! { + "**bold**", + DOUBLE_STAR:2, + MD_TEXTUAL_LITERAL:4, // "bold" grouped + DOUBLE_STAR:2, + } +} + +#[test] +fn underscore_token() { + // Underscore token for emphasis - text is grouped + assert_lex! { + "_text_", + UNDERSCORE:1, + MD_TEXTUAL_LITERAL:4, // "text" grouped + UNDERSCORE:1, + } +} + +#[test] +fn double_underscore_emphasis() { + // Double underscore for strong emphasis - text is grouped + assert_lex! { + "__bold__", + DOUBLE_UNDERSCORE:2, + MD_TEXTUAL_LITERAL:4, // "bold" grouped + DOUBLE_UNDERSCORE:2, + } +} + +#[test] +fn minus_token_single() { + // Single minus followed by text (not a thematic break) + // Mid-line whitespace is now included in textual content, + // so " item" becomes a single token + assert_lex! { + "- item", + MINUS:1, + MD_TEXTUAL_LITERAL:5, // " item" grouped (space + text) + } +} + +#[test] +fn code_fence_with_language() { + // Code fence with language specifier - language name is grouped + assert_lex! { + "```rust", + TRIPLE_BACKTICK:3, + MD_TEXTUAL_LITERAL:4, // "rust" grouped + } +} + +#[test] +fn escape_sequences() { + // Backslash escapes punctuation characters + assert_lex! { + r#"\*\[\]"#, + MD_TEXTUAL_LITERAL:2, // \* + MD_TEXTUAL_LITERAL:2, // \[ + MD_TEXTUAL_LITERAL:2, // \] + } +} + +#[test] +fn escape_backslash() { + // Escaped backslash + assert_lex! { + r#"\\"#, + MD_TEXTUAL_LITERAL:2, // \\ + } +} + +#[test] +fn escape_non_punctuation() { + // Backslash before non-punctuation is just backslash + assert_lex! { + r#"\a"#, + MD_TEXTUAL_LITERAL:1, // \ + MD_TEXTUAL_LITERAL:1, // a + } +} + +#[test] +fn hard_line_break_trailing_spaces() { + // Two spaces followed by newline is a hard line break + assert_lex! { + "text \nmore", + MD_TEXTUAL_LITERAL:4, // "text" + MD_HARD_LINE_LITERAL:3, // " \n" + MD_TEXTUAL_LITERAL:4, // "more" + } +} + +#[test] +fn hard_line_break_many_trailing_spaces() { + // More than two spaces followed by newline is also a hard line break + assert_lex! { + "text \nmore", + MD_TEXTUAL_LITERAL:4, // "text" + MD_HARD_LINE_LITERAL:5, // " \n" + MD_TEXTUAL_LITERAL:4, // "more" + } +} + +#[test] +fn hard_line_break_backslash_newline() { + // Backslash followed by newline is a hard line break + assert_lex! { + "text\\\nmore", + MD_TEXTUAL_LITERAL:4, // "text" + MD_HARD_LINE_LITERAL:2, // "\\\n" + MD_TEXTUAL_LITERAL:4, // "more" + } +} + +#[test] +fn not_hard_line_break_single_space() { + // Single space followed by newline is NOT a hard line break + assert_lex! { + "text \nmore", + MD_TEXTUAL_LITERAL:5, // "text " (space included in text) + NEWLINE:1, + MD_TEXTUAL_LITERAL:4, // "more" + } +} + +#[test] +fn ordered_list_marker_dot() { + // Ordered list marker with dot + assert_lex! { + "1. item", + MD_ORDERED_LIST_MARKER:2, // "1." + MD_TEXTUAL_LITERAL:5, // " item" (space + text) + } +} + +#[test] +fn ordered_list_marker_paren() { + // Ordered list marker with parenthesis + assert_lex! { + "1) item", + MD_ORDERED_LIST_MARKER:2, // "1)" + MD_TEXTUAL_LITERAL:5, // " item" + } +} + +#[test] +fn ordered_list_marker_multi_digit() { + // Multi-digit ordered list marker + assert_lex! { + "123. item", + MD_ORDERED_LIST_MARKER:4, // "123." + MD_TEXTUAL_LITERAL:5, // " item" + } +} + +#[test] +fn ordered_list_marker_not_at_line_start() { + // Digits not at line start should be textual + assert_lex! { + "text 1. more", + MD_TEXTUAL_LITERAL:12, // entire line is textual + } +} + +#[test] +fn ordered_list_marker_no_space() { + // "1.text" without space is not a list marker + assert_lex! { + "1.text", + MD_TEXTUAL_LITERAL:6, // "1.text" + } +} + +#[test] +fn setext_underline_equals() { + // Setext underline with equals at line start + assert_lex! { + "===", + MD_SETEXT_UNDERLINE_LITERAL:3, + } + + // Setext underline after newline + assert_lex! { + "text\n===", + MD_TEXTUAL_LITERAL:4, // "text" + NEWLINE:1, + MD_SETEXT_UNDERLINE_LITERAL:3, // "===" + } + + // Longer setext underline (9 equals) + assert_lex! { + "Heading 1\n=========", + MD_TEXTUAL_LITERAL:9, // "Heading 1" + NEWLINE:1, + MD_SETEXT_UNDERLINE_LITERAL:9, // "=========" + } +} + +#[test] +fn setext_underline_dashes() { + // Single dash at line start is a setext underline (not enough for thematic break) + assert_lex! { + "-\n", + MD_SETEXT_UNDERLINE_LITERAL:1, + NEWLINE:1, + } + + // Two dashes at line start is a setext underline + assert_lex! { + "--\n", + MD_SETEXT_UNDERLINE_LITERAL:2, + NEWLINE:1, + } + + // Three+ dashes is a thematic break at lexer level + // (parser will convert to setext if preceded by paragraph) + assert_lex! { + "---\n", + MD_THEMATIC_BREAK_LITERAL:3, + NEWLINE:1, + } +} + +#[test] +fn link_reference_definition_tokens() { + // Test tokenizing a link reference definition + // Whitespace after colon is included in textual (not at line start) + assert_lex! { + "[label]: https://example.com", + L_BRACK:1, + MD_TEXTUAL_LITERAL:5, // "label" + R_BRACK:1, + COLON:1, + MD_TEXTUAL_LITERAL:20, // " https://example.com" (with leading space) + } +} diff --git a/crates/biome_markdown_parser/src/lib.rs b/crates/biome_markdown_parser/src/lib.rs index 27a66d8384e7..c4a825e017c0 100644 --- a/crates/biome_markdown_parser/src/lib.rs +++ b/crates/biome_markdown_parser/src/lib.rs @@ -8,30 +8,57 @@ use parser::MarkdownParser; use syntax::parse_document; mod lexer; +mod link_reference; mod parser; mod syntax; mod token_source; +// Test utilities for CommonMark spec compliance testing. +// Only compiled when the `test_utils` feature is enabled. +#[cfg(feature = "test_utils")] +mod to_html; + +pub use parser::MarkdownParseOptions; + +#[cfg(feature = "test_utils")] +pub use to_html::document_to_html; + pub(crate) type MarkdownLosslessTreeSink<'source> = LosslessTreeSink<'source, MarkdownLanguage, MarkdownSyntaxFactory>; +/// Parse markdown source code with default options. pub fn parse_markdown(source: &str) -> MarkdownParse { let mut cache = NodeCache::default(); - parse_markdown_with_cache(source, &mut cache) + parse_markdown_with_cache(source, &mut cache, MarkdownParseOptions::default()) } -pub fn parse_markdown_with_cache(source: &str, cache: &mut NodeCache) -> MarkdownParse { - let mut parser = MarkdownParser::new(source); +/// Parse markdown source code with custom options and a node cache. +pub fn parse_markdown_with_cache( + source: &str, + cache: &mut NodeCache, + options: MarkdownParseOptions, +) -> MarkdownParse { + let link_definitions = + link_reference::collect_link_reference_definitions(source, options.clone()); + let mut parser = MarkdownParser::new(source, options); + parser.set_link_reference_definitions(link_definitions); parse_document(&mut parser); - let (events, diagnostics, trivia) = parser.finish(); + let (events, diagnostics, trivia, list_tightness, list_item_indents, quote_indents) = + parser.finish(); let mut tree_sink = MarkdownLosslessTreeSink::with_cache(source, &trivia, cache); biome_parser::event::process(&mut tree_sink, events, diagnostics); let (green, diagnostics) = tree_sink.finish(); - MarkdownParse::new(green, diagnostics) + MarkdownParse::new( + green, + diagnostics, + list_tightness, + list_item_indents, + quote_indents, + ) } /// A utility struct for managing the result of a parser job @@ -39,11 +66,26 @@ pub fn parse_markdown_with_cache(source: &str, cache: &mut NodeCache) -> Markdow pub struct MarkdownParse { root: MarkdownSyntaxNode, diagnostics: Vec, + list_tightness: Vec, + list_item_indents: Vec, + quote_indents: Vec, } impl MarkdownParse { - pub fn new(root: MarkdownSyntaxNode, diagnostics: Vec) -> Self { - Self { root, diagnostics } + pub fn new( + root: MarkdownSyntaxNode, + diagnostics: Vec, + list_tightness: Vec, + list_item_indents: Vec, + quote_indents: Vec, + ) -> Self { + Self { + root, + diagnostics, + list_tightness, + list_item_indents, + quote_indents, + } } pub fn syntax(&self) -> MarkdownSyntaxNode { @@ -60,6 +102,19 @@ impl MarkdownParse { self.diagnostics } + /// Returns the recorded tight/loose information for list nodes. + pub fn list_tightness(&self) -> &[parser::ListTightness] { + &self.list_tightness + } + + pub fn list_item_indents(&self) -> &[parser::ListItemIndent] { + &self.list_item_indents + } + + pub fn quote_indents(&self) -> &[parser::QuoteIndent] { + &self.quote_indents + } + /// Returns [true] if the parser encountered some errors during the parsing. pub fn has_errors(&self) -> bool { self.diagnostics diff --git a/crates/biome_markdown_parser/src/link_reference.rs b/crates/biome_markdown_parser/src/link_reference.rs new file mode 100644 index 000000000000..7edfb0b9db9f --- /dev/null +++ b/crates/biome_markdown_parser/src/link_reference.rs @@ -0,0 +1,86 @@ +use std::collections::HashSet; + +use biome_markdown_syntax::{MdLinkLabel, MdLinkReferenceDefinition}; +use biome_rowan::{AstNode, Direction}; + +use crate::MarkdownLosslessTreeSink; +use crate::MarkdownParseOptions; +use crate::parser::MarkdownParser; +use crate::syntax::parse_document; + +pub(crate) fn normalize_reference_label(text: &str) -> String { + let mut out = String::new(); + let mut chars = text.chars().peekable(); + let mut saw_whitespace = false; + + while let Some(c) = chars.next() { + if c == '\\' { + if let Some(next) = chars.next() { + push_normalized_char(&mut out, next, &mut saw_whitespace); + } + continue; + } + + if c.is_whitespace() { + saw_whitespace = true; + continue; + } + + push_normalized_char(&mut out, c, &mut saw_whitespace); + } + + out +} + +fn push_normalized_char(out: &mut String, c: char, saw_whitespace: &mut bool) { + if *saw_whitespace && !out.is_empty() { + out.push(' '); + } + *saw_whitespace = false; + for lower in c.to_lowercase() { + out.push(lower); + } +} + +pub(crate) fn collect_link_reference_definitions( + source: &str, + options: MarkdownParseOptions, +) -> HashSet { + let mut parser = MarkdownParser::new(source, options); + parse_document(&mut parser); + let (events, diagnostics, trivia, _list_tightness, _list_item_indents, _quote_indents) = + parser.finish(); + + let mut tree_sink = MarkdownLosslessTreeSink::new(source, &trivia); + biome_parser::event::process(&mut tree_sink, events, diagnostics); + let (root, _) = tree_sink.finish(); + + let mut definitions = HashSet::new(); + + for node in root.descendants() { + if let Some(def) = MdLinkReferenceDefinition::cast(node) + && let Ok(label) = def.label() + { + let raw = collect_label_text(label); + let normalized = normalize_reference_label(&raw); + if !normalized.is_empty() { + definitions.insert(normalized); + } + } + } + + definitions +} + +fn collect_label_text(label: MdLinkLabel) -> String { + let mut text = String::new(); + for token in label + .content() + .syntax() + .descendants_with_tokens(Direction::Next) + .filter_map(|element| element.into_token()) + { + text.push_str(token.text()); + } + text +} diff --git a/crates/biome_markdown_parser/src/parser.rs b/crates/biome_markdown_parser/src/parser.rs index 9cf3922c6744..95933bc75402 100644 --- a/crates/biome_markdown_parser/src/parser.rs +++ b/crates/biome_markdown_parser/src/parser.rs @@ -4,22 +4,208 @@ use biome_parser::event::Event; use biome_parser::prelude::*; use biome_parser::token_source::Trivia; use biome_parser::{ParserContextCheckpoint, diagnostic::merge_diagnostics}; +use biome_rowan::{TextRange, TextSize}; +use std::collections::HashSet; +use crate::syntax::inline::EmphasisContext; use crate::token_source::{MarkdownTokenSource, MarkdownTokenSourceCheckpoint}; +/// Options for configuring the markdown parser. +// ... (omitted for brevity, but I'll include enough context) +#[derive(Default, Debug, Clone)] +pub struct MarkdownParseOptions { + // Reserved for future GFM options +} + +/// Internal parser state for tracking nesting and context. +/// +/// # Depth Tracking +/// +/// These fields track nesting depth to prevent stack overflow from pathological +/// input (e.g., `>>>>...` with hundreds of levels). CommonMark doesn't specify +/// limits, but practical implementations need them. +/// +/// # Future Use +/// +/// - **Lazy continuation**: CommonMark §5.1 allows block quote content to continue +/// without `>` prefix on subsequent lines. Proper implementation requires tracking +/// the current quote depth to know when lazy continuation applies. +/// +/// - **List tight/loose determination**: CommonMark §5.3 distinguishes tight lists +/// (no blank lines between items) from loose lists. This affects HTML output and +/// requires tracking list context during parsing. +#[derive(Default, Debug)] +pub(crate) struct MarkdownParserState { + /// Block quote nesting depth for lazy continuation and depth limits. + /// See CommonMark §5.1 for block quote continuation rules. + pub(crate) block_quote_depth: usize, + /// List nesting depth for tight/loose determination and depth limits. + /// See CommonMark §5.3 for list tightness rules. + pub(crate) list_nesting_depth: usize, + /// Required indentation for list item content continuation. + /// Per CommonMark §5.2, continuation lines must be indented to at least + /// this column (marker width + space width). Zero means no indent required. + pub(crate) list_item_required_indent: usize, + /// Indentation column where the current list marker starts. + /// Used to detect sibling list items after blank lines. + pub(crate) list_item_marker_indent: usize, + /// Emphasis parsing context for the current inline item list. + pub(crate) emphasis_context: Option, + /// Normalized link reference definitions collected in a prepass. + pub(crate) link_reference_definitions: HashSet, + /// Recorded tight/loose list results keyed by list node range. + pub(crate) list_tightness: Vec, + /// Recorded list item indents keyed by bullet node range. + pub(crate) list_item_indents: Vec, + /// Recorded quote marker indents keyed by quote node range. + pub(crate) quote_indents: Vec, + /// Virtual line start override for container prefixes (e.g., block quotes). + pub(crate) virtual_line_start: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ListTightness { + pub range: TextRange, + pub is_tight: bool, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ListItemIndent { + pub range: TextRange, + pub indent: usize, + pub marker_indent: usize, + pub marker_width: usize, + pub spaces_after_marker: usize, +} + +type FinishResult = ( + Vec>, + Vec, + Vec, + Vec, + Vec, + Vec, +); + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct QuoteIndent { + pub range: TextRange, + pub indent: usize, +} + pub(crate) struct MarkdownParser<'source> { context: ParserContext, source: MarkdownTokenSource<'source>, + options: MarkdownParseOptions, + state: MarkdownParserState, } impl<'source> MarkdownParser<'source> { - pub fn new(source: &'source str) -> Self { + pub fn new(source: &'source str, options: MarkdownParseOptions) -> Self { Self { context: ParserContext::default(), source: MarkdownTokenSource::from_str(source), + options, + state: MarkdownParserState::default(), } } + /// Returns parser options. Reserved for GFM extensions. + #[expect(dead_code)] + pub(crate) fn options(&self) -> &MarkdownParseOptions { + &self.options + } + + /// Returns immutable state reference for nesting depth checks. + pub(crate) fn state(&self) -> &MarkdownParserState { + &self.state + } + + /// Returns mutable state reference for nesting depth updates. + pub(crate) fn state_mut(&mut self) -> &mut MarkdownParserState { + &mut self.state + } + + /// Returns the emphasis context for the current inline list, if any. + pub(crate) fn emphasis_context(&self) -> Option<&EmphasisContext> { + self.state.emphasis_context.as_ref() + } + + /// Replace the emphasis context, returning the previous value. + pub(crate) fn set_emphasis_context( + &mut self, + context: Option, + ) -> Option { + std::mem::replace(&mut self.state.emphasis_context, context) + } + + /// Replace the set of normalized link reference definitions. + pub(crate) fn set_link_reference_definitions(&mut self, definitions: HashSet) { + self.state.link_reference_definitions = definitions; + } + + /// Returns true if a normalized label has a link reference definition. + pub(crate) fn has_link_reference_definition(&self, label: &str) -> bool { + self.state.link_reference_definitions.contains(label) + } + + /// Record tight/loose information for a parsed list node. + pub(crate) fn record_list_tightness(&mut self, range: TextRange, is_tight: bool) { + let range = self.trim_range(range); + self.state + .list_tightness + .push(ListTightness { range, is_tight }); + } + + pub(crate) fn record_list_item_indent( + &mut self, + range: TextRange, + indent: usize, + marker_indent: usize, + marker_width: usize, + spaces_after_marker: usize, + ) { + let range = self.trim_range(range); + self.state.list_item_indents.push(ListItemIndent { + range, + indent, + marker_indent, + marker_width, + spaces_after_marker, + }); + } + + pub(crate) fn record_quote_indent(&mut self, range: TextRange, indent: usize) { + let range = self.trim_range(range); + self.state.quote_indents.push(QuoteIndent { range, indent }); + } + + /// Re-lex the current token using LinkDefinition context. + /// This makes whitespace produce separate tokens for destination/title parsing. + pub(crate) fn re_lex_link_definition(&mut self) { + self.source + .re_lex(crate::lexer::MarkdownReLexContext::LinkDefinition); + } + + /// Force re-lex the current token in Regular context. + /// + /// Use this when switching from LinkDefinition context back to Regular context, + /// e.g., when entering title content where whitespace should not split tokens. + pub(crate) fn force_relex_regular(&mut self) { + self.source + .force_relex_in_context(crate::lexer::MarkdownLexContext::Regular); + } + + pub(crate) fn set_force_ordered_list_marker(&mut self, value: bool) { + self.source.set_force_ordered_list_marker(value); + } + + /// Bump the current token using LinkDefinition context. + /// The next token will be lexed with whitespace as separate tokens. + pub(crate) fn bump_link_definition(&mut self) { + self.source.bump_link_definition(); + } + pub fn checkpoint(&self) -> MarkdownParserCheckpoint { MarkdownParserCheckpoint { context: self.context.checkpoint(), @@ -27,8 +213,170 @@ impl<'source> MarkdownParser<'source> { } } - pub fn before_whitespace_count(&self) -> usize { - self.source.before_whitespace_count() + /// Clear any buffered lookahead without changing the current position. + pub(crate) fn reset_lookahead(&mut self) { + let checkpoint = self.source.checkpoint(); + self.source.rewind(checkpoint); + } + + /// Returns leading indentation on the current line, including whitespace + /// inside the current token. + pub fn line_start_leading_indent(&self) -> usize { + if self.state.virtual_line_start == Some(self.cur_range().start()) { + let source = self.source.source_text(); + let start: usize = self.cur_range().start().into(); + return count_leading_indent(&source[start..]); + } + + self.source.line_start_leading_indent() + } + + /// Returns true if the parser is at the start of input (position 0). + /// This is used for detecting block-level constructs at the start of a document. + /// + /// Uses position-based check rather than trivia_len, so it works correctly + /// when NEWLINE becomes an explicit token (not trivia). + pub fn at_start_of_input(&self) -> bool { + self.source.at_start_of_input() + } + + /// Returns true if the parser is at the start of a line. + /// + /// This is true when: + /// - At start of input (position 0) + /// - The current token has a preceding line break (lexer's after_newline flag) + /// + /// Used for detecting block-level constructs that must start at line beginning + /// (e.g., headers, list items, thematic breaks). + pub fn at_line_start(&self) -> bool { + self.at_start_of_input() + || self.has_preceding_line_break() + || self.source.at_line_start_with_whitespace() + || self.state.virtual_line_start == Some(self.cur_range().start()) + } + + pub(crate) fn set_virtual_line_start(&mut self) { + self.state.virtual_line_start = Some(self.cur_range().start()); + } + + pub(crate) fn trim_range(&self, range: TextRange) -> TextRange { + let start: usize = range.start().into(); + let end: usize = range.end().into(); + if start >= end { + return range; + } + + let source = self.source.source_text(); + let slice = &source[start..end]; + if slice + .trim_matches(|c: char| matches!(c, ' ' | '\t' | '\r')) + .is_empty() + { + return TextRange::new(range.start(), range.start()); + } + let leading = slice + .len() + .saturating_sub(slice.trim_start_matches([' ', '\t', '\r']).len()); + let trailing = slice + .len() + .saturating_sub(slice.trim_end_matches([' ', '\t', '\r']).len()); + let new_start = start + leading; + let new_end = end.saturating_sub(trailing); + + TextRange::new((new_start as u32).into(), (new_end as u32).into()) + } + + /// Skip an optional indentation token at line start if it is whitespace-only + /// and does not exceed `max_indent` columns. + pub fn skip_line_indent(&mut self, max_indent: usize) -> bool { + if !self.at_line_start() { + return false; + } + + let mut consumed = 0usize; + let mut did_skip = false; + + while self.at(MarkdownSyntaxKind::MD_TEXTUAL_LITERAL) { + let text = self.cur_text(); + if text.is_empty() || !text.chars().all(|c| c == ' ' || c == '\t') { + break; + } + + let indent = text + .chars() + .map(|c| if c == '\t' { 4 } else { 1 }) + .sum::(); + + if consumed + indent > max_indent { + break; + } + + consumed += indent; + did_skip = true; + self.parse_as_skipped_trivia_tokens(|p| p.bump(MarkdownSyntaxKind::MD_TEXTUAL_LITERAL)); + } + + did_skip + } + + /// Returns true if inline content should stop parsing. + /// + /// Inline content ends at: + /// - EOF + /// - NEWLINE token (NEWLINE is an explicit token in Markdown) + /// - A preceding line break (lexer flag, for compatibility during transition) + /// + /// This provides a unified check for inline parsing loops. + pub fn at_inline_end(&self) -> bool { + self.at(MarkdownSyntaxKind::EOF) + || self.at(MarkdownSyntaxKind::NEWLINE) + || self.has_preceding_line_break() + } + + /// Returns true if the parser is at a blank line boundary. + /// + /// A blank line is a NEWLINE followed by optional whitespace, then another + /// NEWLINE or EOF. This is a token-based lookahead check. + /// + /// Used for: + /// - Paragraph boundaries + /// - Tight/loose list determination + /// - Block quote continuation + /// + /// # NEWLINE Consumption Policy + /// + /// When this returns true, the parser should NOT consume the NEWLINE. + /// Instead, the block-level parser should handle the paragraph boundary. + /// The NEWLINE at a blank line marks the end of the current block. + pub fn at_blank_line(&self) -> bool { + if !self.at(MarkdownSyntaxKind::NEWLINE) { + return false; + } + + // Look at source after the current NEWLINE token + let source = self.source_after_current(); + let newline_len = self.cur_text().len(); + + // Get text after this NEWLINE + if source.len() <= newline_len { + // NEWLINE at end of input = blank line (paragraph ends) + return true; + } + + let after_newline = &source[newline_len..]; + + // Skip optional whitespace/tabs (these are still trivia) + let trimmed = after_newline.trim_start_matches([' ', '\t']); + + // Blank line if what remains is empty (EOF) or starts with another newline + // Handle all line ending variants: LF (\n), CRLF (\r\n), and CR (\r) + trimmed.is_empty() || trimmed.starts_with('\n') || trimmed.starts_with('\r') + } + + /// Returns the source text starting from the current token position. + /// This is useful for lookahead when detecting HTML blocks. + pub fn source_after_current(&self) -> &str { + self.source.source_after_current() } pub fn rewind(&mut self, checkpoint: MarkdownParserCheckpoint) { @@ -38,20 +386,60 @@ impl<'source> MarkdownParser<'source> { self.source.rewind(source); } - pub fn finish( - self, - ) -> ( - Vec>, - Vec, - Vec, - ) { + /// Execute a lookahead operation without consuming tokens. + /// + /// This saves a checkpoint, executes the provided closure, then rewinds + /// to the checkpoint. The closure's return value is passed through. + /// + /// Use this for speculative parsing where you need to examine tokens + /// ahead without committing to parsing them. + /// + /// # Example + /// + /// ```ignore + /// let is_valid = p.lookahead(|p| { + /// p.expect(L_BRACK); + /// // ... check pattern + /// true + /// }); + /// ``` + pub fn lookahead(&mut self, op: F) -> R + where + F: FnOnce(&mut Self) -> R, + { + let checkpoint = self.checkpoint(); + let result = op(self); + self.rewind(checkpoint); + result + } + + pub fn finish(self) -> FinishResult { let (trivia, lexer_diagnostics) = self.source.finish(); let (events, parse_diagnostics) = self.context.finish(); let diagnostics = merge_diagnostics(lexer_diagnostics, parse_diagnostics); - (events, diagnostics, trivia) + ( + events, + diagnostics, + trivia, + self.state.list_tightness, + self.state.list_item_indents, + self.state.quote_indents, + ) + } +} + +fn count_leading_indent(text: &str) -> usize { + let mut count = 0usize; + for c in text.chars() { + match c { + ' ' => count += 1, + '\t' => count += 4, + _ => break, + } } + count } impl<'source> Parser for MarkdownParser<'source> { diff --git a/crates/biome_markdown_parser/src/syntax.rs b/crates/biome_markdown_parser/src/syntax.rs index 3ca2c0b9e10f..41a797913c67 100644 --- a/crates/biome_markdown_parser/src/syntax.rs +++ b/crates/biome_markdown_parser/src/syntax.rs @@ -1,3 +1,35 @@ +//! Block and inline syntax parsing for Markdown. +//! +//! # CommonMark Specification References +//! +//! This module implements CommonMark 0.31.2 block structure: +//! +//! ## Leaf Blocks (§4) +//! - **§4.1 Thematic breaks**: `---`, `***`, `___` +//! - **§4.2 ATX headings**: `# Heading`, `## Heading`, etc. +//! - **§4.3 Setext headings**: Underlined with `===` or `---` +//! - **§4.4 Indented code blocks**: 4+ spaces of indentation +//! - **§4.5 Fenced code blocks**: ``` or ~~~ delimited +//! - **§4.6 HTML blocks**: Raw HTML content +//! - **§4.7 Link reference definitions**: `[label]: url "title"` +//! - **§4.8 Paragraphs**: Default block content +//! +//! ## Container Blocks (§5) +//! - **§5.1 Block quotes**: `>` prefixed content +//! - **§5.2 List items**: `-`, `*`, `+` or `1.` prefixed +//! - **§5.3 Lists**: Sequences of list items +//! +//! ## Inline Content (§6) +//! See [`inline`] module for inline element parsing. + +pub mod fenced_code_block; +pub mod header; +pub mod html_block; +pub mod inline; +pub mod link_block; +pub mod list; +pub mod parse_error; +pub mod quote; pub mod thematic_break_block; use biome_markdown_syntax::{T, kind::MarkdownSyntaxKind::*}; @@ -5,13 +37,33 @@ use biome_parser::{ Parser, prelude::ParsedSyntax::{self, *}, }; +use biome_rowan::TextSize; +use fenced_code_block::{ + at_fenced_code_block, parse_fenced_code_block, parse_fenced_code_block_force, +}; +use header::{at_header, parse_header}; +use html_block::{at_html_block, at_html_block_interrupt, parse_html_block}; +use link_block::{at_link_block, parse_link_block}; +use list::{ + at_bullet_list_item, at_order_list_item, marker_followed_by_whitespace_or_eol, + parse_bullet_list_item, parse_order_list_item, textual_starts_with_ordered_marker, +}; +use quote::{ + at_quote, consume_quote_prefix, consume_quote_prefix_without_virtual, has_quote_prefix, + parse_quote, +}; use thematic_break_block::{at_thematic_break_block, parse_thematic_break_block}; use crate::MarkdownParser; +/// CommonMark requires 4 or more spaces for indented code blocks. +const INDENT_CODE_BLOCK_SPACES: usize = 4; + pub(crate) fn parse_document(p: &mut MarkdownParser) { let m = p.start(); let _ = parse_block_list(p); + // Bump the EOF token - required by the grammar + p.bump(T![EOF]); m.complete(p, MD_DOCUMENT); } @@ -25,8 +77,54 @@ pub(crate) fn parse_block_list(p: &mut MarkdownParser) -> ParsedSyntax { } pub(crate) fn parse_any_block(p: &mut MarkdownParser) { - if at_indent_code_block(p) { + let _ = parse_any_block_with_indent_code_policy(p, true); +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub(crate) enum ParsedBlockKind { + Paragraph, + Other, +} + +pub(crate) fn parse_any_block_with_indent_code_policy( + p: &mut MarkdownParser, + allow_indent_code_block: bool, +) -> ParsedBlockKind { + let start = p.cur_range().start(); + // Handle standalone NEWLINE tokens as MdNewline nodes. + // This prevents inter-block NEWLINEs from becoming "newline-only paragraphs". + if p.at(NEWLINE) { + let m = p.start(); + p.bump(NEWLINE); + m.complete(p, MD_NEWLINE); + return ParsedBlockKind::Other; + } + if at_blank_line_start(p) { + while p.at(MD_TEXTUAL_LITERAL) { + let text = p.cur_text(); + if text == " " || text == "\t" { + p.parse_as_skipped_trivia_tokens(|p| p.bump(MD_TEXTUAL_LITERAL)); + } else { + break; + } + } + if p.at(NEWLINE) { + let m = p.start(); + p.bump(NEWLINE); + m.complete(p, MD_NEWLINE); + } + return ParsedBlockKind::Other; + } + + let kind = if allow_indent_code_block && at_indent_code_block(p) { parse_indent_code_block(p); + ParsedBlockKind::Other + } else if at_fenced_code_block(p) { + let _ = parse_fenced_code_block(p); + ParsedBlockKind::Other + } else if line_starts_with_fence(p) { + let _ = parse_fenced_code_block_force(p); + ParsedBlockKind::Other } else if at_thematic_break_block(p) { let break_block = try_parse(p, |p| { let break_block = parse_thematic_break_block(p); @@ -37,20 +135,1302 @@ pub(crate) fn parse_any_block(p: &mut MarkdownParser) { }); if break_block.is_err() { parse_paragraph(p); + ParsedBlockKind::Paragraph + } else { + ParsedBlockKind::Other + } + } else if at_header(p) { + // Check for too many hashes BEFORE try_parse (which would lose diagnostics on rewind) + let too_many = check_too_many_hashes(p); + let header_result = try_parse(p, |p| { + let header = parse_header(p); + if header.is_absent() { + return Err(()); + } + Ok(header) + }); + if header_result.is_err() { + // Emit diagnostic for too many hashes (outside try_parse to persist) + if let Some((range, count)) = too_many { + p.error(parse_error::too_many_hashes(p, range, count)); + } + // Not a valid header, parse as paragraph + parse_paragraph(p); + ParsedBlockKind::Paragraph + } else { + ParsedBlockKind::Other } + } else if at_quote(p) { + let _ = parse_quote(p); + ParsedBlockKind::Other + } else if at_bullet_list_item(p) { + let _ = parse_bullet_list_item(p); + ParsedBlockKind::Other + } else if at_order_list_item(p) || at_order_list_item_textual(p) { + let forced = if !at_order_list_item(p) && at_order_list_item_textual(p) { + p.set_force_ordered_list_marker(true); + p.force_relex_regular(); + true + } else { + false + }; + let parsed = parse_order_list_item(p); + if forced { + p.set_force_ordered_list_marker(false); + } + if parsed.is_absent() { + parse_paragraph(p); + ParsedBlockKind::Paragraph + } else { + ParsedBlockKind::Other + } + } else if at_html_block(p) { + // Parse as HTML block + let _ = parse_html_block(p); + ParsedBlockKind::Other + } else if at_link_block(p) { + // Try to parse as link reference definition + // Use try_parse to fall back to paragraph if not a valid definition + let link_result = try_parse(p, |p| { + let link = parse_link_block(p); + if link.is_absent() { + return Err(()); + } + Ok(link) + }); + if link_result.is_err() { + parse_paragraph(p); + ParsedBlockKind::Paragraph + } else { + ParsedBlockKind::Other + } + } else if at_block_interrupt(p) { + // We see a block interrupt but didn't match a concrete block above. + // This can happen when list item indentation is still active. + if at_bullet_list_item_at_any_indent(p) { + let prev_required = p.state().list_item_required_indent; + let prev_virtual = p.state().virtual_line_start; + p.state_mut().list_item_required_indent = 0; + p.state_mut().virtual_line_start = Some(p.cur_range().start()); + let parsed = parse_bullet_list_item(p); + p.state_mut().list_item_required_indent = prev_required; + p.state_mut().virtual_line_start = prev_virtual; + if parsed.is_present() { + ParsedBlockKind::Other + } else { + parse_paragraph(p); + ParsedBlockKind::Paragraph + } + } else if at_order_list_item_at_any_indent(p) { + let prev_required = p.state().list_item_required_indent; + let prev_virtual = p.state().virtual_line_start; + p.state_mut().list_item_required_indent = 0; + p.state_mut().virtual_line_start = Some(p.cur_range().start()); + let parsed = parse_order_list_item(p); + p.state_mut().list_item_required_indent = prev_required; + p.state_mut().virtual_line_start = prev_virtual; + if parsed.is_present() { + ParsedBlockKind::Other + } else { + parse_paragraph(p); + ParsedBlockKind::Paragraph + } + } else { + parse_paragraph(p); + ParsedBlockKind::Paragraph + } + } else { + // Default fallback: parse as paragraph + parse_paragraph(p); + ParsedBlockKind::Paragraph + }; + + if start == p.cur_range().start() && std::env::var("CMARK_HANG_DEBUG").is_ok() { + eprintln!( + "parse_any_block stuck at {:?} {:?} => {:?}", + p.cur(), + p.cur_text(), + kind + ); + panic!("parse_any_block made no progress"); } + + kind +} + +fn with_virtual_line_start(p: &mut MarkdownParser, start: TextSize, op: F) -> R +where + F: FnOnce(&mut MarkdownParser) -> R, +{ + let prev_virtual = p.state().virtual_line_start; + p.state_mut().virtual_line_start = Some(start); + let result = op(p); + p.state_mut().virtual_line_start = prev_virtual; + result } +/// Check if we're at an indented code block (4+ spaces of indentation). +/// +/// Uses `line_start_leading_indent()` to correctly handle indentation when NEWLINE +/// tokens are explicit (not trivia). pub(crate) fn at_indent_code_block(p: &mut MarkdownParser) -> bool { - p.before_whitespace_count() > 4 + if !p.at_line_start() { + return false; + } + + if at_blank_line_start(p) { + return false; + } + + let indent = p.line_start_leading_indent(); + let required_indent = p.state().list_item_required_indent; + + // Inside a list item, we need 4 spaces BEYOND the list item's required indent. + // e.g., if list item requires 2 spaces, code block needs 2 + 4 = 6 spaces. + // Outside a list item (required_indent == 0), we need just 4 spaces. + let effective_indent = indent.saturating_sub(required_indent); + effective_indent >= INDENT_CODE_BLOCK_SPACES +} + +/// Parse an indented code block. +/// +/// Grammar: MdIndentCodeBlock = content: MdInlineItemList +/// +/// An indented code block consists of one or more lines with 4+ spaces +/// of indentation. The indentation is tracked in trivia. +/// +/// # NEWLINE Handling +/// +/// NEWLINE is an explicit token. We consume tokens until NEWLINE, +/// then check if the next line has proper indentation. +pub(crate) fn parse_indent_code_block(p: &mut MarkdownParser) { + if !at_indent_code_block(p) { + return; + } + + let m = p.start(); + let content = p.start(); + + // Parse content while we're at indented lines, including interior blank lines. + loop { + if p.at(T![EOF]) { + break; + } + + // Always include NEWLINE tokens in code content + if p.at(NEWLINE) { + if newline_is_blank_line(p) && !has_following_indented_code_line(p) { + break; + } + let text_m = p.start(); + p.bump_remap(MD_TEXTUAL_LITERAL); + text_m.complete(p, MD_TEXTUAL); + p.set_virtual_line_start(); + p.set_virtual_line_start(); + continue; + } + + if p.at_line_start() { + if at_blank_line_start(p) { + if has_following_indented_code_line(p) { + consume_blank_line(p); + continue; + } + break; + } else if !at_indent_code_block(p) { + break; + } + if p.state().list_item_required_indent == 0 { + consume_indent_prefix(p, INDENT_CODE_BLOCK_SPACES); + } + } + + // Consume token as code content + let text_m = p.start(); + p.bump_remap(MD_TEXTUAL_LITERAL); + text_m.complete(p, MD_TEXTUAL); + } + + content.complete(p, MD_INLINE_ITEM_LIST); + m.complete(p, MD_INDENT_CODE_BLOCK); +} + +fn has_following_indented_code_line(p: &mut MarkdownParser) -> bool { + p.lookahead(|p| { + while p.at_line_start() && at_blank_line_start(p) { + while p.at(MD_TEXTUAL_LITERAL) { + let text = p.cur_text(); + if text == " " || text == "\t" { + p.bump(MD_TEXTUAL_LITERAL); + } else { + break; + } + } + + if p.at(NEWLINE) { + p.bump(NEWLINE); + } else { + break; + } + } + + at_indent_code_block(p) + }) +} + +fn newline_is_blank_line(p: &MarkdownParser) -> bool { + let start: usize = p.cur_range().start().into(); + if start == 0 { + return true; + } + + let source = p.source().source_text(); + let prev = source.as_bytes()[start - 1]; + prev == b'\n' || prev == b'\r' +} + +/// Consume exactly `indent` columns of leading whitespace at line start. +fn consume_indent_prefix(p: &mut MarkdownParser, indent: usize) { + if indent == 0 { + return; + } + + let mut consumed = 0usize; + while consumed < indent && p.at(MD_TEXTUAL_LITERAL) { + let text = p.cur_text(); + if text == " " { + consumed += 1; + } else if text == "\t" { + consumed += 4; + } else { + break; + } + + p.parse_as_skipped_trivia_tokens(|p| p.bump(MD_TEXTUAL_LITERAL)); + } +} + +/// Consume exactly `indent` columns of leading whitespace at line start. +fn at_blank_line_start(p: &mut MarkdownParser) -> bool { + if !p.at_line_start() { + return false; + } + + p.lookahead(|p| { + while p.at(MD_TEXTUAL_LITERAL) { + let text = p.cur_text(); + if text == " " || text == "\t" { + p.bump(MD_TEXTUAL_LITERAL); + } else { + break; + } + } + + p.at(NEWLINE) || p.at(T![EOF]) + }) +} + +fn consume_blank_line(p: &mut MarkdownParser) { + while p.at(MD_TEXTUAL_LITERAL) { + let text = p.cur_text(); + if text == " " || text == "\t" { + p.parse_as_skipped_trivia_tokens(|p| p.bump(MD_TEXTUAL_LITERAL)); + } else { + break; + } + } + + if p.at(NEWLINE) { + let text_m = p.start(); + p.bump_remap(MD_TEXTUAL_LITERAL); + text_m.complete(p, MD_TEXTUAL); + } +} + +/// Parse a paragraph block, or a setext heading if followed by an underline. +/// +/// A paragraph is a sequence of non-blank lines that cannot be interpreted as +/// other kinds of blocks. The paragraph ends at a blank line or EOF. +/// +/// If the paragraph is followed by a setext heading underline (=== or ---), +/// it becomes a setext heading instead. +/// +/// Grammar: MdParagraph = list: MdInlineItemList hard_line: MdHardLine? +/// Grammar: MdSetextHeader = content: MdInlineItemList underline: 'md_setext_underline_literal' +pub(crate) fn parse_paragraph(p: &mut MarkdownParser) { + let m = p.start(); + + let inline_start: usize = p.cur_range().start().into(); + parse_inline_item_list(p); + let inline_end: usize = p.cur_range().start().into(); + + let has_inline_content = inline_has_non_whitespace(p, inline_start, inline_end); + let allow_setext = has_inline_content && allow_setext_heading(p); + + // Check if this paragraph is followed by a setext heading underline + // MD_SETEXT_UNDERLINE_LITERAL is for `=` underlines + // MD_THEMATIC_BREAK_LITERAL with only `-` is also a setext underline (H2) + if allow_setext && p.at(MD_SETEXT_UNDERLINE_LITERAL) { + // This is a setext heading (H1 with `=`) - consume the underline + p.bump(MD_SETEXT_UNDERLINE_LITERAL); + m.complete(p, MD_SETEXT_HEADER); + } else if allow_setext && p.at(MD_THEMATIC_BREAK_LITERAL) && is_dash_only_thematic_break(p) { + // This is a setext heading (H2 with `-`) - remap token and consume + p.bump_remap(MD_SETEXT_UNDERLINE_LITERAL); + m.complete(p, MD_SETEXT_HEADER); + } else { + m.complete(p, MD_PARAGRAPH); + } +} + +fn inline_has_non_whitespace(p: &MarkdownParser, start: usize, end: usize) -> bool { + if end <= start { + return false; + } + + let source = p.source().source_text(); + if end > source.len() { + return false; + } + + !source[start..end] + .trim_matches(|c: char| matches!(c, ' ' | '\t' | '\r' | '\n')) + .is_empty() +} + +/// Check if the current thematic break token contains only dashes. +/// This is used to detect H2 setext underlines. +fn is_dash_only_thematic_break(p: &MarkdownParser) -> bool { + let text = p.cur_text(); + !text.is_empty() && text.trim().chars().all(|c| c == '-') +} + +fn allow_setext_heading(p: &MarkdownParser) -> bool { + let required_indent = p.state().list_item_required_indent; + if required_indent > 0 { + let indent = p.line_start_leading_indent(); + if indent < required_indent { + return false; + } + } + + if p.state().list_item_required_indent > 0 && p.at(MD_SETEXT_UNDERLINE_LITERAL) { + let text = p.cur_text().trim_matches(|c| c == ' ' || c == '\t'); + if text == "-" { + return false; + } + } + + let depth = p.state().block_quote_depth; + if depth == 0 { + return true; + } + + line_has_quote_prefix(p, depth) +} + +fn line_has_quote_prefix(p: &MarkdownParser, depth: usize) -> bool { + if depth == 0 { + return false; + } + + let source = p.source().source_text(); + let start: usize = p.cur_range().start().into(); + let line_start = source[..start].rfind('\n').map_or(0, |idx| idx + 1); + + let mut idx = line_start; + let mut indent = 0usize; + while idx < start { + match source.as_bytes()[idx] { + b' ' => { + indent += 1; + idx += 1; + } + b'\t' => { + indent += 4; + idx += 1; + } + _ => break, + } + if indent > 3 { + return false; + } + } + + for _ in 0..depth { + if idx >= start || source.as_bytes()[idx] != b'>' { + return false; + } + idx += 1; + if idx < start { + let c = source.as_bytes()[idx]; + if c == b' ' || c == b'\t' { + idx += 1; + } + } + } + + true +} + +/// Parse the inline item list within a block. +/// +/// Grammar: MdInlineItemList = AnyMdInline* +/// +/// Inline content continues until we hit EOF, a blank line (paragraph boundary), +/// a setext heading underline, or a block-level construct that can interrupt +/// paragraphs per CommonMark. +/// +/// # NEWLINE Handling +/// +/// NEWLINE is an explicit token (not trivia). When we hit NEWLINE: +/// - If it's a blank line (NEWLINE + optional whitespace + NEWLINE/EOF) → stop +/// - Otherwise it's a soft line break → consume and continue to next line +pub(crate) fn parse_inline_item_list(p: &mut MarkdownParser) { + let m = p.start(); + let prev_emphasis_context = set_inline_emphasis_context(p); + let quote_depth = p.state().block_quote_depth; + if quote_depth > 0 && p.at_line_start() && has_quote_prefix(p, quote_depth) { + consume_quote_prefix(p, quote_depth); + } + let inline_start: usize = p.cur_range().start().into(); + let mut has_content = false; + + loop { + // EOF ends inline content + if p.at(T![EOF]) { + break; + } + + // NEWLINE handling: check for blank line (paragraph boundary) + if p.at(NEWLINE) { + if p.at_blank_line() { + // Blank line = paragraph boundary + // Consume this NEWLINE but stop (the second NEWLINE stays for block parser) + let text_m = p.start(); + p.bump_remap(MD_TEXTUAL_LITERAL); + text_m.complete(p, MD_TEXTUAL); + break; + } + + let quote_depth = p.state().block_quote_depth; + if quote_depth > 0 { + let is_quote_blank_line = p.lookahead(|p| { + p.bump(NEWLINE); + if !has_quote_prefix(p, quote_depth) { + return false; + } + consume_quote_prefix_without_virtual(p, quote_depth); + while p.at(MD_TEXTUAL_LITERAL) + && p.cur_text().chars().all(|c| c == ' ' || c == '\t') + { + p.bump(MD_TEXTUAL_LITERAL); + } + p.at(NEWLINE) || p.at(T![EOF]) + }); + if is_quote_blank_line { + let text_m = p.start(); + p.bump_remap(MD_TEXTUAL_LITERAL); + text_m.complete(p, MD_TEXTUAL); + break; + } + } + + // Not a blank line - this is a soft line break within paragraph + // Consume the NEWLINE as textual content (remap to MD_TEXTUAL_LITERAL) + let text_m = p.start(); + p.bump_remap(MD_TEXTUAL_LITERAL); + text_m.complete(p, MD_TEXTUAL); + + // If we're inside a block quote, only consume the quote prefix + // when it doesn't start a new block (e.g., a nested quote). + if quote_depth > 0 && has_quote_prefix(p, quote_depth) { + enum QuoteBreakKind { + None, + SetextUnderline, + Other, + } + + let break_kind = p.lookahead(|p| { + consume_quote_prefix_without_virtual(p, quote_depth); + with_virtual_line_start(p, p.cur_range().start(), |p| { + if p.at(MD_SETEXT_UNDERLINE_LITERAL) + || (p.at(MD_THEMATIC_BREAK_LITERAL) && is_dash_only_thematic_break(p)) + { + QuoteBreakKind::SetextUnderline + } else if at_block_interrupt(p) || textual_looks_like_list_marker(p) { + QuoteBreakKind::Other + } else { + QuoteBreakKind::None + } + }) + }); + match break_kind { + QuoteBreakKind::SetextUnderline => { + // Consume the quote prefix so the setext underline is visible + // to the paragraph parser. + consume_quote_prefix(p, quote_depth); + break; + } + QuoteBreakKind::Other => { + break; + } + QuoteBreakKind::None => { + consume_quote_prefix(p, quote_depth); + } + } + } + if quote_depth > 0 && p.at(R_ANGLE) && !has_quote_prefix(p, quote_depth) { + consume_partial_quote_prefix(p, quote_depth); + } + + // After crossing a line, check for block-level constructs and setext underlines + // Check if we're at a setext heading underline + if has_content && p.at(MD_SETEXT_UNDERLINE_LITERAL) && allow_setext_heading(p) { + break; + } + + // Check if we're at a thematic break that could be a setext underline + if has_content && p.at(MD_THEMATIC_BREAK_LITERAL) && is_dash_only_thematic_break(p) { + break; + } + + // If we're inside a list item and the next line meets the required indent, + // check for block interrupts after skipping that indent. This allows + // nested list markers like "\t - baz" to break out of the paragraph. + let required_indent = p.state().list_item_required_indent; + if required_indent > 0 { + let indent = p.line_start_leading_indent(); + if indent >= required_indent { + let interrupts = p.lookahead(|p| { + p.skip_line_indent(required_indent); + let prev_required = p.state().list_item_required_indent; + with_virtual_line_start(p, p.cur_range().start(), |p| { + p.state_mut().list_item_required_indent = 0; + let breaks = at_block_interrupt(p) || textual_looks_like_list_marker(p); + p.state_mut().list_item_required_indent = prev_required; + breaks + }) + }); + if interrupts { + break; + } + } + } + + // Check for block-level constructs that can interrupt paragraphs + if line_starts_with_fence(p) { + break; + } + if p.at(MD_TEXTUAL_LITERAL) { + let text = p.cur_text(); + if text.starts_with("```") || text.starts_with("~~~") { + break; + } + } + if at_block_interrupt(p) { + break; + } + + // Also check for list markers that appear as textual content. + // Inside inline content, '-' is lexed as MD_TEXTUAL_LITERAL, not MINUS, + // so at_block_interrupt won't detect them. Per CommonMark §5.1, list + // items can interrupt paragraphs (bullet lists always, ordered lists + // only if they start with 1). + if textual_looks_like_list_marker(p) { + break; + } + + // Per CommonMark §5.2, when inside a list item, check indentation. + // If sufficient indentation, skip it. If insufficient, this is + // "lazy continuation" - the content continues without meeting the + // indent requirement (at_block_interrupt already checked above). + if required_indent > 0 { + let indent = p.line_start_leading_indent(); + if indent >= required_indent { + // Sufficient indentation - skip it + p.skip_line_indent(required_indent); + } + // else: Lazy continuation - don't break, don't skip indent. + // The at_block_interrupt check above handles real interruptions. + // Content continues at its actual position. + } + + // For plain paragraphs, strip up to 4 leading spaces on continuation lines. + if required_indent == 0 { + p.skip_line_indent(INDENT_CODE_BLOCK_SPACES); + } + + // Continue parsing on the new line + continue; + } + + // Check if we're at a setext heading underline (stop for paragraph to handle) + if has_content && p.at(MD_SETEXT_UNDERLINE_LITERAL) && allow_setext_heading(p) { + break; + } + + // Check if we're at a thematic break that could be a setext underline + // (dash-only thematic breaks following paragraph content are setext H2) + if has_content && p.at(MD_THEMATIC_BREAK_LITERAL) && is_dash_only_thematic_break(p) { + break; + } + + // Per CommonMark, certain block-level constructs can interrupt paragraphs + // without requiring a blank line. Check for these at line start. + if p.has_preceding_line_break() && at_block_interrupt(p) { + break; + } + + // Also check for list markers in textual content (see comment above) + if p.has_preceding_line_break() && textual_looks_like_list_marker(p) { + break; + } + + // Parse inline content (stops at NEWLINE via at_inline_end) + if parse_any_inline(p).is_absent() { + break; + } + let inline_end: usize = p.cur_range().start().into(); + has_content = inline_has_non_whitespace(p, inline_start, inline_end); + } + + m.complete(p, MD_INLINE_ITEM_LIST); + p.set_emphasis_context(prev_emphasis_context); +} + +/// Build an emphasis context for the current inline list and install it on the parser. +/// Returns the previous context so it can be restored. +fn set_inline_emphasis_context( + p: &mut MarkdownParser, +) -> Option { + let source_len = inline_list_source_len(p); + let source = p.source_after_current(); + let inline_source = if source_len <= source.len() { + &source[..source_len] + } else { + source + }; + let base_offset = u32::from(p.cur_range().start()) as usize; + let context = crate::syntax::inline::EmphasisContext::new(inline_source, base_offset); + p.set_emphasis_context(Some(context)) +} + +/// Compute the byte length of the inline list starting at the current token. +fn inline_list_source_len(p: &mut MarkdownParser) -> usize { + p.lookahead(|p| { + let mut len = 0usize; + + loop { + if p.at(T![EOF]) { + break; + } + + if p.at(NEWLINE) { + if p.at_blank_line() { + len += p.cur_text().len(); + p.bump(NEWLINE); + break; + } + + len += p.cur_text().len(); + p.bump(NEWLINE); + + let quote_depth = p.state().block_quote_depth; + if quote_depth > 0 && has_quote_prefix(p, quote_depth) { + let breaks_paragraph = p.lookahead(|p| { + consume_quote_prefix_without_virtual(p, quote_depth); + with_virtual_line_start(p, p.cur_range().start(), |p| { + p.at(MD_SETEXT_UNDERLINE_LITERAL) + || (p.at(MD_THEMATIC_BREAK_LITERAL) + && is_dash_only_thematic_break(p)) + || at_block_interrupt(p) + }) + }); + if breaks_paragraph { + break; + } + consume_quote_prefix_without_virtual(p, quote_depth); + } + + if p.at(MD_SETEXT_UNDERLINE_LITERAL) && allow_setext_heading(p) { + break; + } + + if p.at(MD_THEMATIC_BREAK_LITERAL) && is_dash_only_thematic_break(p) { + break; + } + + if quote_depth > 0 && p.at(R_ANGLE) && !has_quote_prefix(p, quote_depth) { + consume_partial_quote_prefix_lookahead(p, quote_depth, &mut len); + } + + if line_starts_with_fence(p) { + break; + } + + if at_block_interrupt(p) { + break; + } + + let required_indent = p.state().list_item_required_indent; + if required_indent > 0 { + let indent = p.line_start_leading_indent(); + if indent < required_indent { + break; + } + + let mut consumed = 0usize; + while consumed < required_indent && p.at(MD_TEXTUAL_LITERAL) { + let text = p.cur_text(); + if text.is_empty() || !text.chars().all(|c| c == ' ' || c == '\t') { + break; + } + + let indent = text + .chars() + .map(|c| if c == '\t' { 4 } else { 1 }) + .sum::(); + + if consumed + indent > required_indent { + break; + } + + consumed += indent; + len += text.len(); + p.bump(MD_TEXTUAL_LITERAL); + } + } + + continue; + } + + if p.at(MD_SETEXT_UNDERLINE_LITERAL) && allow_setext_heading(p) { + break; + } + + if p.at(MD_THEMATIC_BREAK_LITERAL) && is_dash_only_thematic_break(p) { + break; + } + + if p.has_preceding_line_break() && at_block_interrupt(p) { + break; + } + + len += p.cur_text().len(); + p.bump(p.cur()); + } + + len + }) +} + +fn line_starts_with_fence(p: &MarkdownParser) -> bool { + if !p.at_line_start() { + return false; + } + + let source = p.source_after_current(); + let mut indent = 0usize; + let mut offset = 0usize; + for (idx, ch) in source.char_indices() { + match ch { + ' ' => indent += 1, + '\t' => indent += 4 - (indent % 4), + _ => { + offset = idx; + break; + } + } + if indent > 3 { + return false; + } + } + + let rest = &source[offset..]; + rest.starts_with("```") || rest.starts_with("~~~") +} + +fn consume_partial_quote_prefix(p: &mut MarkdownParser, depth: usize) -> bool { + let mut consumed = 0usize; + while consumed < depth && p.at(R_ANGLE) { + p.parse_as_skipped_trivia_tokens(|p| p.bump(R_ANGLE)); + if p.at(MD_TEXTUAL_LITERAL) { + let text = p.cur_text(); + if text == " " || text == "\t" { + p.parse_as_skipped_trivia_tokens(|p| p.bump(MD_TEXTUAL_LITERAL)); + } + } + consumed += 1; + } + consumed > 0 +} + +fn consume_partial_quote_prefix_lookahead( + p: &mut MarkdownParser, + depth: usize, + len: &mut usize, +) -> bool { + let mut consumed = 0usize; + while consumed < depth && p.at(R_ANGLE) { + *len += p.cur_text().len(); + p.bump(R_ANGLE); + if p.at(MD_TEXTUAL_LITERAL) { + let text = p.cur_text(); + if text == " " || text == "\t" { + *len += text.len(); + p.bump(MD_TEXTUAL_LITERAL); + } + } + consumed += 1; + } + consumed > 0 +} + +/// Check if we're at a block-level construct that can interrupt a paragraph. +/// +/// Per CommonMark, these constructs can interrupt paragraphs: +/// - ATX headings (# followed by space or EOL) +/// - Fenced code blocks (``` or ~~~) +/// - Block quotes (>) +/// - Thematic breaks (---, ***, ___) +/// - List items (-, *, +, or ordered markers) - with restrictions +/// - HTML blocks (certain types) +/// +/// Note: Setext headings and indented code blocks do NOT interrupt paragraphs. +/// Also, lines with 4+ spaces of indentation cannot start these constructs +/// (they would be indented code blocks instead). +/// +/// ## List Interruption Rules (CommonMark §5.2) +/// +/// - Bullet lists can interrupt paragraphs if item has content OR marker is followed by blank line +/// - Ordered lists can interrupt paragraphs ONLY if starting with `1` AND not empty +pub(crate) fn at_block_interrupt(p: &mut MarkdownParser) -> bool { + // Per CommonMark, lines indented 4+ spaces cannot start block constructs + // that interrupt paragraphs - they would be indented code blocks. + // Tabs count as 4 spaces per CommonMark §2.2. + if p.line_start_leading_indent() >= 4 { + // Inside list items, allow list markers at the current indent to + // interrupt paragraphs (nested lists). + if (p.state().list_nesting_depth > 0 || p.state().list_item_required_indent > 0) + && (at_bullet_list_item(p) || at_order_list_item(p)) + { + return true; + } + return false; + } + + // ATX heading: # at line start (must have space/tab after per CommonMark §4.2) + // Use checkpoint to look ahead and verify it's a valid heading + if at_header(p) { + return is_valid_atx_heading_start(p); + } + + // Fenced code block (``` or ~~~) - lexer already ensures line start + if at_fenced_code_block(p) { + return true; + } + + // Block quote (>) + if at_quote(p) { + return true; + } + + // Thematic break (---, ***, ___) - lexer already ensures line start + if at_thematic_break_block(p) { + return true; + } + + // Bullet list item (-, *, +) + // Per CommonMark §5.2: bullet lists can interrupt paragraphs if: + // - The item has content, OR + // - The item is empty but followed by a blank line + // When inside a list, we also need to check for list items at ANY indent + // (not just at the current context's indent) because a less-indented list + // marker would end the current list item and start a sibling/parent item. + if at_bullet_list_item(p) || at_bullet_list_item_at_any_indent(p) { + let in_list = p.state().list_nesting_depth > 0; + if in_list || can_bullet_interrupt_paragraph(p) { + return true; + } + } + + // Ordered list item (1., 2), etc.) + // Per CommonMark §5.2: ordered lists can interrupt TOP-LEVEL paragraphs only if: + // - Starting with 1 (not 2, 3, etc.), AND + // - The item has content (empty ordered items cannot interrupt) + // Inside a list context, any ordered marker can start a new sibling item. + if at_order_list_item(p) || at_order_list_item_at_any_indent(p) { + let in_list = p.state().list_nesting_depth > 0; + if in_list || (is_ordered_list_starts_with_one(p) && !is_empty_list_item(p)) { + return true; + } + } + + // HTML block (type 7 does not interrupt paragraphs) + if at_html_block_interrupt(p) { + return true; + } + + false +} + +/// Check if the current token looks like a list marker when lexed as textual content. +/// +/// Inside inline content, list markers like `- ` are lexed as MD_TEXTUAL_LITERAL. +/// This function checks if a textual token at line start looks like it would be +/// a list marker in block context. +fn textual_looks_like_list_marker(p: &mut MarkdownParser) -> bool { + if !p.at(MD_TEXTUAL_LITERAL) { + return false; + } + + let text = p.cur_text(); + + // Bullet marker: single -, *, or + followed by space/tab or EOF + if text == "-" || text == "*" || text == "+" { + // Check if followed by space, tab, or at end of line + return p.lookahead(|p| { + p.bump(MD_TEXTUAL_LITERAL); + if p.at(T![EOF]) || p.at(NEWLINE) { + return true; + } + if p.at(MD_TEXTUAL_LITERAL) { + let next = p.cur_text(); + return next.starts_with(' ') || next.starts_with('\t'); + } + false + }); + } + + // Ordered marker: per CommonMark §5.1, only ordered lists starting with 1 + // can interrupt paragraphs. Check if text is "1." or "1)" pattern. + if let Some(rest) = text.strip_prefix('1') { + // "1." or "1)" followed by space (the space might be in next token) + if rest == "." || rest == ")" { + return p.lookahead(|p| { + p.bump(MD_TEXTUAL_LITERAL); + if p.at(T![EOF]) || p.at(NEWLINE) { + return true; + } + if p.at(MD_TEXTUAL_LITERAL) { + let next = p.cur_text(); + return next.starts_with(' ') || next.starts_with('\t'); + } + false + }); + } + // "1. " or "1) " all in one token + if rest.starts_with(". ") + || rest.starts_with(".\t") + || rest.starts_with(") ") + || rest.starts_with(")\t") + { + return true; + } + } + + false +} + +/// Check if an ordered list marker starts with the number 1. +/// +/// Per CommonMark §5.2: "In order to solve of an ambiguity in the spec, +/// only ordered lists starting with 1 can interrupt paragraphs." +/// +/// This prevents accidental list creation from wrapped lines like: +/// "The number of windows is 14. The number of doors is 6." +fn is_ordered_list_starts_with_one(p: &mut MarkdownParser) -> bool { + if !p.at(MD_ORDERED_LIST_MARKER) { + return false; + } + + // The marker text includes digits + delimiter (e.g., "1.", "2)", "10.") + // We want exactly "1." or "1)" - not "10.", "11.", etc. + let text = p.cur_text(); + text == "1." || text == "1)" +} + +/// Check for a bullet list item at any valid top-level indent (0-3 spaces). +/// +/// This is used for detecting paragraph interruption when we're inside a nested +/// context (like a list item) but need to detect list markers at any level, +/// not just at the current context's indent level. +fn at_bullet_list_item_at_any_indent(p: &mut MarkdownParser) -> bool { + p.lookahead(|p| { + if !p.at_line_start() { + return false; + } + + // Top-level list items can have 0-3 spaces of leading indent + let indent = p.line_start_leading_indent(); + if indent > 3 { + return false; + } + + // Skip leading whitespace tokens + while p.at(MD_TEXTUAL_LITERAL) && p.cur_text().chars().all(|c| c == ' ' || c == '\t') { + p.bump(MD_TEXTUAL_LITERAL); + } + + // Check for -, *, or + marker + if p.at(MD_SETEXT_UNDERLINE_LITERAL) { + let trimmed = p.cur_text().trim_matches(|c| c == ' ' || c == '\t'); + if trimmed != "-" { + return false; + } + } else if !p.at(T![-]) && !p.at(T![*]) && !p.at(T![+]) { + return false; + } + + if p.at(MD_SETEXT_UNDERLINE_LITERAL) { + p.bump_remap(T![-]); + } else { + p.bump(p.cur()); + } + marker_followed_by_whitespace_or_eol(p) + }) +} + +/// Check for an ordered list item at any valid top-level indent (0-3 spaces). +/// +/// This is used for detecting paragraph interruption when we're inside a nested +/// context (like a list item) but need to detect list markers at any level. +fn at_order_list_item_at_any_indent(p: &mut MarkdownParser) -> bool { + p.lookahead(|p| { + if !p.at_line_start() { + return false; + } + + // Top-level list items can have 0-3 spaces of leading indent + let indent = p.line_start_leading_indent(); + if indent > 3 { + return false; + } + + // Skip leading whitespace tokens + while p.at(MD_TEXTUAL_LITERAL) && p.cur_text().chars().all(|c| c == ' ' || c == '\t') { + p.bump(MD_TEXTUAL_LITERAL); + } + + // Check for ordered list marker (lexer produces MD_ORDERED_LIST_MARKER) + p.at(MD_ORDERED_LIST_MARKER) + }) +} + +fn at_order_list_item_textual(p: &mut MarkdownParser) -> bool { + p.lookahead(|p| { + if !p.at_line_start() { + return false; + } + + let indent = p.line_start_leading_indent(); + let base_indent = if p.state().virtual_line_start == Some(p.cur_range().start()) + && p.state().list_item_required_indent > 0 + { + 0 + } else { + p.state().list_item_required_indent + }; + + if base_indent == 0 { + if indent > 3 { + return false; + } + } else if indent < base_indent || indent > base_indent + 3 { + return false; + } + + while p.at(MD_TEXTUAL_LITERAL) && p.cur_text().chars().all(|c| c == ' ' || c == '\t') { + p.bump(MD_TEXTUAL_LITERAL); + } + + p.at(MD_TEXTUAL_LITERAL) && textual_starts_with_ordered_marker(p.cur_text()) + }) +} + +/// Check if a bullet list item can interrupt a top-level paragraph. +/// +/// Per CommonMark §5.2: A bullet list can interrupt a paragraph if: +/// - The list item has content (at least one character after marker), OR +/// - The list item is empty but is followed by a blank line +/// +/// This allows patterns like: +/// ```markdown +/// Paragraph text +/// + +/// +/// Next paragraph (interrupted by empty bullet + blank line) +/// ``` +fn can_bullet_interrupt_paragraph(p: &mut MarkdownParser) -> bool { + let checkpoint = p.checkpoint(); + + // Bump the bullet marker (-, *, or +) + if p.at(T![-]) { + p.bump(T![-]); + } else if p.at(T![*]) { + p.bump(T![*]); + } else if p.at(T![+]) { + p.bump(T![+]); + } else { + p.rewind(checkpoint); + return false; + } + + if !marker_followed_by_whitespace_or_eol(p) { + p.rewind(checkpoint); + return false; + } + + // Check what follows the marker + let result = if p.at(T![EOF]) { + // Empty item at EOF - cannot interrupt (no blank line follows) + false + } else if p.at(NEWLINE) { + // Empty item - check if followed by blank line + p.at_blank_line() + } else if p.at(MD_TEXTUAL_LITERAL) && is_whitespace_only(p.cur_text()) { + p.bump(MD_TEXTUAL_LITERAL); + if p.at(NEWLINE) { + p.at_blank_line() + } else { + false + } + } else { + // Has content after marker - can interrupt + true + }; + + p.rewind(checkpoint); + result +} + +/// Check if the current list item is empty (no content after marker). +/// +/// Per CommonMark §5.2: "an empty list item cannot interrupt a paragraph." +/// +/// Uses lookahead to check if only whitespace/newline follows the marker. +fn is_empty_list_item(p: &mut MarkdownParser) -> bool { + let checkpoint = p.checkpoint(); + + // Bump the list marker + if p.at(MD_ORDERED_LIST_MARKER) { + p.bump(MD_ORDERED_LIST_MARKER); + } else if p.at(T![-]) { + p.bump(T![-]); + } else if p.at(T![*]) { + p.bump(T![*]); + } else if p.at(T![+]) { + p.bump(T![+]); + } else { + p.rewind(checkpoint); + return false; + } + + if !marker_followed_by_whitespace_or_eol(p) { + p.rewind(checkpoint); + return false; + } + + // Check what follows the marker + if p.at(MD_TEXTUAL_LITERAL) && is_whitespace_only(p.cur_text()) { + p.bump(MD_TEXTUAL_LITERAL); + } + + // Empty if: EOF or NEWLINE after optional whitespace + let is_empty = p.at(T![EOF]) || p.at(NEWLINE); + + p.rewind(checkpoint); + is_empty } -pub(crate) fn parse_indent_code_block(_p: &mut MarkdownParser) { - todo!() +fn is_whitespace_only(text: &str) -> bool { + !text.is_empty() && text.chars().all(|c| c == ' ' || c == '\t') } -pub(crate) fn parse_paragraph(_p: &mut MarkdownParser) { - todo!() +/// Check if the current position has too many hashes for an ATX heading (>6). +/// +/// Returns `Some((range, count))` if there are >6 hashes, `None` otherwise. +/// This is used to emit a diagnostic BEFORE `try_parse` which would lose it on rewind. +fn check_too_many_hashes(p: &mut MarkdownParser) -> Option<(biome_rowan::TextRange, usize)> { + p.lookahead(|p| { + p.skip_line_indent(3); + + if !p.at(T![#]) { + return None; + } + + let start = p.cur_range().start(); + let mut count = 0; + + while p.at(T![#]) { + p.bump(T![#]); + count += 1; + } + + let end = p.cur_range().start(); + + if count > 6 { + Some((biome_rowan::TextRange::new(start, end), count)) + } else { + None + } + }) +} + +/// Check if we're at a valid ATX heading start (1-6 `#` followed by space or EOL). +/// Uses lookahead to verify without consuming tokens. +fn is_valid_atx_heading_start(p: &mut MarkdownParser) -> bool { + p.lookahead(|p| { + p.skip_line_indent(3); + + let mut hash_count = 0; + + // Count consecutive hashes (must be 1-6) + while p.at(T![#]) && hash_count <= 6 { + p.bump(T![#]); + hash_count += 1; + } + + // Too many hashes - not a valid heading + if hash_count > 6 { + return false; + } + + // Check if followed by space, tab, or EOL/EOF per CommonMark §4.2 + // In Markdown, whitespace is significant and included in token text. + let text = p.cur_text(); + p.at(T![EOF]) + || p.has_preceding_line_break() + || text.starts_with(' ') + || text.starts_with('\t') + }) +} + +/// Parse any inline element. +/// +/// Dispatches to the appropriate inline parser based on the current token. +pub(crate) fn parse_any_inline(p: &mut MarkdownParser) -> ParsedSyntax { + inline::parse_any_inline(p) +} + +/// Parse a textual inline element. +/// +/// Grammar: MdTextual = value: 'md_textual_literal' +/// +/// For now, we treat any non-EOF token as textual content to ensure +/// the paragraph parser makes progress. In later prompts, we'll add +/// proper handling for inline elements like emphasis, links, etc. +pub(crate) fn parse_textual(p: &mut MarkdownParser) -> ParsedSyntax { + if p.at(T![EOF]) { + return Absent; + } + let m = p.start(); + // Remap any token to MD_TEXTUAL_LITERAL so the syntax factory accepts it. + // This is necessary because tokens like L_PAREN, R_PAREN, etc. are lexed + // as their specific token kinds, but MdTextual expects MD_TEXTUAL_LITERAL. + p.bump_remap(MD_TEXTUAL_LITERAL); + Present(m.complete(p, MD_TEXTUAL)) } /// Attempt to parse some input with the given parsing function. If parsing diff --git a/crates/biome_markdown_parser/src/syntax/fenced_code_block.rs b/crates/biome_markdown_parser/src/syntax/fenced_code_block.rs new file mode 100644 index 000000000000..3cac786d94eb --- /dev/null +++ b/crates/biome_markdown_parser/src/syntax/fenced_code_block.rs @@ -0,0 +1,405 @@ +//! Fenced code block parsing for Markdown (CommonMark §4.5). +//! +//! A fenced code block begins with a code fence: at least three consecutive +//! backtick (`) or tilde (~) characters. It ends with a closing fence of at +//! least as many characters of the same type, or at the end of the document. +//! +//! # Examples +//! +//! ````markdown +//! ```rust +//! fn main() {} +//! ``` +//! +//! ~~~python +//! def hello(): +//! pass +//! ~~~ +//! ```` +//! +//! # Info String +//! +//! The opening fence may be followed by an info string (language identifier) +//! that can be used for syntax highlighting. + +use crate::parser::MarkdownParser; +use biome_markdown_syntax::{T, kind::MarkdownSyntaxKind::*}; +use biome_parser::{ + Parser, + prelude::{ + ParsedSyntax::{self, *}, + TokenSource, + }, +}; + +use super::parse_error::unterminated_fenced_code; +use super::quote::{consume_quote_prefix, has_quote_prefix}; + +/// Check if we're at a fenced code block (``` or ~~~). +pub(crate) fn at_fenced_code_block(p: &mut MarkdownParser) -> bool { + p.lookahead(|p| { + if !p.at_start_of_input() && !is_line_start_within_indent(p, 3) { + return false; + } + p.skip_line_indent(3); + + let rest = p.source_after_current(); + let is_backtick_fence = rest.starts_with("```"); + let is_tilde_fence = rest.starts_with("~~~"); + if !is_backtick_fence && !is_tilde_fence { + return false; + } + if is_backtick_fence && info_string_has_backtick(p) { + return false; + } + true + }) +} + +/// Parse a fenced code block. +/// +/// Grammar: +/// MdFencedCodeBlock = +/// l_fence: ('```' | '~~~') +/// code_list: MdCodeNameList +/// content: MdInlineItemList +/// r_fence: ('```' | '~~~') +pub(crate) fn parse_fenced_code_block(p: &mut MarkdownParser) -> ParsedSyntax { + parse_fenced_code_block_impl(p, false) +} + +pub(crate) fn parse_fenced_code_block_force(p: &mut MarkdownParser) -> ParsedSyntax { + parse_fenced_code_block_impl(p, true) +} + +fn parse_fenced_code_block_impl(p: &mut MarkdownParser, force: bool) -> ParsedSyntax { + if !force && !at_fenced_code_block(p) { + return Absent; + } + + let m = p.start(); + + let mut fence_indent = p.line_start_leading_indent(); + if p.state().list_item_required_indent > 0 + && p.state().virtual_line_start == Some(p.cur_range().start()) + { + fence_indent += p.state().list_item_required_indent; + } + p.skip_line_indent(3); + + // Track which fence type we opened with (must close with same type per CommonMark) + let text = p.cur_text(); + let is_textual_tilde_fence = p.at(MD_TEXTUAL_LITERAL) && text.starts_with("~~~"); + let is_tilde_fence = + p.at(TRIPLE_TILDE) || (p.at(TILDE) && p.cur_text().len() >= 3) || is_textual_tilde_fence; + let fence_type = if is_tilde_fence { "~~~" } else { "```" }; + let fence_len = fence_prefix_len(p.cur_text(), if is_tilde_fence { '~' } else { '`' }); + + // Record opening fence range for diagnostic + let opening_range = p.cur_range(); + + // Opening fence (``` or ~~~) + if is_tilde_fence { + if p.at(TRIPLE_TILDE) { + p.bump(TRIPLE_TILDE); + } else { + p.bump_remap(TRIPLE_TILDE); + } + } else if p.at(T!["```"]) { + p.bump(T!["```"]); + } else { + p.bump_remap(T!["```"]); + } + + // Optional language info string (MdCodeNameList) + parse_code_name_list(p); + + // Content (everything until closing fence) + parse_code_content(p, is_tilde_fence, fence_len, fence_indent); + + // Closing fence - emit specific diagnostic if missing + // Must match the opening fence type + let has_closing = at_closing_fence(p, is_tilde_fence, fence_len); + + if has_closing { + if p.state().list_item_required_indent > 0 && p.at_line_start() { + p.skip_line_indent(p.state().list_item_required_indent); + } + p.skip_line_indent(3); + if is_tilde_fence { + if p.at(TRIPLE_TILDE) { + p.bump(TRIPLE_TILDE); + } else { + p.bump_remap(TRIPLE_TILDE); + } + } else if p.at(T!["```"]) { + p.bump(T!["```"]); + } else { + p.bump_remap(T!["```"]); + } + } else { + // Emit diagnostic for unterminated code block + p.error(unterminated_fenced_code(p, opening_range, fence_type)); + } + + Present(m.complete(p, MD_FENCED_CODE_BLOCK)) +} + +fn fence_prefix_len(text: &str, fence_char: char) -> usize { + text.chars().take_while(|c| *c == fence_char).count() +} + +/// Parse the code name list (language info string). +/// Grammar: MdCodeNameList = MdTextual* +/// +/// The language name is on the same line as the opening fence. +/// If the current token has a preceding line break or is NEWLINE, the code block has no language. +fn parse_code_name_list(p: &mut MarkdownParser) { + let m = p.start(); + + // If the current token is already on a new line, there's no language name + if p.at_inline_end() { + m.complete(p, MD_CODE_NAME_LIST); + return; + } + + // Parse language identifiers until we hit end of line + while !p.at_inline_end() { + // Parse each token as textual content + let text_m = p.start(); + p.bump_remap(MD_TEXTUAL_LITERAL); + text_m.complete(p, MD_TEXTUAL); + } + + m.complete(p, MD_CODE_NAME_LIST); +} + +/// Parse the code content until we find a closing fence. +/// Grammar: content: MdInlineItemList +fn parse_code_content( + p: &mut MarkdownParser, + is_tilde_fence: bool, + fence_len: usize, + fence_indent: usize, +) { + let m = p.start(); + let quote_depth = p.state().block_quote_depth; + + // Consume all tokens until we see the matching closing fence or EOF + while !p.at(T![EOF]) { + if quote_depth > 0 && (p.at_line_start() || p.has_preceding_line_break()) { + if !has_quote_prefix(p, quote_depth) { + break; + } + consume_quote_prefix(p, quote_depth); + } + + if at_closing_fence(p, is_tilde_fence, fence_len) { + break; + } + + if p.at_line_start() && fence_indent > 0 { + skip_fenced_content_indent(p, fence_indent); + if at_closing_fence_after_indent(p, is_tilde_fence, fence_len) { + break; + } + } + + // Consume the token as code content (including NEWLINE tokens) + let text_m = p.start(); + p.bump_remap(MD_TEXTUAL_LITERAL); + text_m.complete(p, MD_TEXTUAL); + } + + m.complete(p, MD_INLINE_ITEM_LIST); +} + +fn is_valid_closing_fence(p: &mut MarkdownParser, is_tilde_fence: bool, fence_len: usize) -> bool { + line_has_closing_fence(p, is_tilde_fence, fence_len) +} + +fn info_string_has_backtick(p: &mut MarkdownParser) -> bool { + p.lookahead(|p| { + if p.at(TRIPLE_TILDE) { + return false; + } + + if p.at(T!["```"]) { + p.bump(T!["```"]); + } else if p.at(BACKTICK) { + p.bump(BACKTICK); + } else { + return false; + } + + while !p.at_inline_end() { + if p.at(BACKTICK) { + return true; + } + p.bump(p.cur()); + } + + false + }) +} + +fn at_closing_fence(p: &mut MarkdownParser, is_tilde_fence: bool, fence_len: usize) -> bool { + p.lookahead(|p| is_valid_closing_fence(p, is_tilde_fence, fence_len)) +} + +fn at_closing_fence_after_indent( + p: &mut MarkdownParser, + is_tilde_fence: bool, + fence_len: usize, +) -> bool { + p.lookahead(|p| is_valid_closing_fence(p, is_tilde_fence, fence_len)) +} + +fn skip_fenced_content_indent(p: &mut MarkdownParser, indent: usize) { + let mut consumed = 0usize; + + while consumed < indent && p.at(MD_TEXTUAL_LITERAL) { + let text = p.cur_text(); + if text.is_empty() || !text.chars().all(|c| c == ' ' || c == '\t') { + break; + } + + let width = text + .chars() + .map(|c| if c == '\t' { 4 } else { 1 }) + .sum::(); + + if consumed + width > indent { + break; + } + + consumed += width; + p.parse_as_skipped_trivia_tokens(|p| p.bump(MD_TEXTUAL_LITERAL)); + } +} + +fn line_has_closing_fence(p: &MarkdownParser, is_tilde_fence: bool, fence_len: usize) -> bool { + let start: usize = p.cur_range().start().into(); + let source = p.source().text(); + if start > source.len() { + return false; + } + + let before = &source[..start]; + let last_newline_pos = before.rfind(['\n', '\r']); + let line_start = match last_newline_pos { + Some(pos) => { + let bytes = before.as_bytes(); + if bytes.get(pos) == Some(&b'\r') && bytes.get(pos + 1) == Some(&b'\n') { + pos + 2 + } else { + pos + 1 + } + } + None => 0, + }; + + let prefix = &source[line_start..start]; + if !prefix.chars().all(|c| c == ' ' || c == '\t') { + return false; + } + + let mut idx = line_start; + let mut column = 0usize; + let list_indent = p.state().list_item_required_indent; + + while column < list_indent { + match source.as_bytes().get(idx).copied() { + Some(b' ') => { + column += 1; + idx += 1; + } + Some(b'\t') => { + column += 4 - (column % 4); + idx += 1; + } + _ => return false, + } + } + + let mut extra = 0usize; + while extra < 3 { + match source.as_bytes().get(idx).copied() { + Some(b' ') => { + extra += 1; + idx += 1; + } + Some(b'\t') => { + extra += 4 - (extra % 4); + if extra > 3 { + break; + } + idx += 1; + } + _ => break, + } + } + + let fence_char = if is_tilde_fence { b'~' } else { b'`' }; + let mut fence_count = 0usize; + while source.as_bytes().get(idx + fence_count) == Some(&fence_char) { + fence_count += 1; + } + if fence_count < fence_len { + return false; + } + + let after_fence = &source[idx + fence_count..]; + let line_rest = after_fence + .split_terminator(['\n', '\r']) + .next() + .unwrap_or(""); + + line_rest.chars().all(|c| c == ' ' || c == '\t') +} + +fn is_line_start_within_indent(p: &MarkdownParser, max_indent: usize) -> bool { + if p.state().virtual_line_start == Some(p.cur_range().start()) { + return true; + } + + let start: usize = p.cur_range().start().into(); + let source = p.source().text(); + if start > source.len() { + return false; + } + + let virtual_start: usize = match p.state().virtual_line_start { + Some(virtual_start) => virtual_start.into(), + None => { + let before = &source[..start]; + let last_newline_pos = before.rfind(['\n', '\r']); + match last_newline_pos { + Some(pos) => { + let bytes = before.as_bytes(); + if bytes.get(pos) == Some(&b'\r') && bytes.get(pos + 1) == Some(&b'\n') { + pos + 2 + } else { + pos + 1 + } + } + None => 0, + } + } + }; + + let prefix = &source[virtual_start..start]; + if !prefix.chars().all(|c| c == ' ' || c == '\t') { + return false; + } + + let mut indent = prefix + .chars() + .fold(0usize, |count, c| count + if c == '\t' { 4 } else { 1 }); + + if p.state().virtual_line_start.is_none() && p.state().list_item_required_indent > 0 { + indent = indent.saturating_sub(p.state().list_item_required_indent); + } + + indent <= max_indent +} diff --git a/crates/biome_markdown_parser/src/syntax/header.rs b/crates/biome_markdown_parser/src/syntax/header.rs new file mode 100644 index 000000000000..db31ca172263 --- /dev/null +++ b/crates/biome_markdown_parser/src/syntax/header.rs @@ -0,0 +1,244 @@ +//! ATX and Setext heading parsing for Markdown (CommonMark §4.2-4.3). +//! +//! # ATX Headings (§4.2) +//! +//! An ATX heading consists of 1-6 `#` characters followed by a space and heading +//! text. The number of `#` characters determines the heading level. +//! +//! ```markdown +//! # Heading 1 +//! ## Heading 2 +//! ### Heading 3 +//! ``` +//! +//! # Setext Headings (§4.3) +//! +//! A setext heading consists of one or more lines of text followed by an +//! underline of `=` (level 1) or `-` (level 2) characters. +//! +//! ```markdown +//! Heading 1 +//! ========= +//! +//! Heading 2 +//! --------- +//! ``` + +use crate::parser::MarkdownParser; +use biome_markdown_syntax::{T, kind::MarkdownSyntaxKind::*}; +use biome_parser::{ + Parser, + prelude::ParsedSyntax::{self, *}, +}; + +use super::parse_any_inline; + +/// Maximum number of `#` characters allowed in an ATX heading (CommonMark §4.2). +const MAX_HEADER_HASHES: usize = 6; + +/// Check if we might be at an ATX header. +/// We only check if the current token is a HASH - full validation happens in parse_header. +pub(crate) fn at_header(p: &mut MarkdownParser) -> bool { + p.lookahead(|p| { + if !p.at_line_start() && !p.at_start_of_input() { + return false; + } + p.skip_line_indent(3); + p.at(T![#]) + }) +} + +/// Parse an ATX header. +/// +/// Grammar: MdHeader = before: MdHashList content: MdParagraph? after: MdHashList +/// +/// ATX headers start with 1-6 `#` characters followed by space or end of line. +/// More than 6 `#` characters is not a valid header. +/// +/// Trailing hashes are optional and must be at the end of the line. +pub(crate) fn parse_header(p: &mut MarkdownParser) -> ParsedSyntax { + if !at_header(p) { + return Absent; + } + + let m = p.start(); + + p.skip_line_indent(3); + + // Parse opening hashes (MdHashList containing MdHash nodes) + let hash_count = parse_hash_list(p); + + // Validate hash count (must be 1-6) + // Diagnostic for >6 hashes is emitted in parse_any_block before try_parse + if hash_count > MAX_HEADER_HASHES { + // Not a valid header - abandon and let it be parsed as paragraph + m.abandon(p); + return Absent; + } + + // Per CommonMark §4.2: opening hashes must be followed by space, tab, or end of line. + // `#foo` is NOT a valid header; `# foo`, `#\tfoo`, or `#\n` are valid. + // Check if the next token has preceding whitespace or we're at EOL/EOF. + if !p.at_inline_end() { + // There's a token after hashes on the same line - check for whitespace + // In Markdown, whitespace is significant and included in token text, + // so we check if the token starts with a space or tab character. + let text = p.cur_text(); + let token_starts_with_whitespace = text.starts_with(' ') || text.starts_with('\t'); + if !token_starts_with_whitespace { + // No space/tab after hashes - not a valid header (e.g., "#foo") + m.abandon(p); + return Absent; + } + } + + // Parse content (optional paragraph) - content goes until end of line + // The header ends at a single newline (not blank line) + parse_header_content(p); + + // Parse trailing hashes (MdHashList) + // Trailing hashes are valid if they're at the end of the line + parse_trailing_hashes(p); + + Present(m.complete(p, MD_HEADER)) +} + +/// Parse a list of hash tokens as MdHashList containing MdHash nodes. +/// Returns the number of hashes parsed. +fn parse_hash_list(p: &mut MarkdownParser) -> usize { + let m = p.start(); + let mut count = 0; + + while p.at(T![#]) { + let hash_m = p.start(); + p.bump(T![#]); + hash_m.complete(p, MD_HASH); + count += 1; + } + + m.complete(p, MD_HASH_LIST); + count +} + +/// Parse header content - inline content for the header. +/// +/// This stops at end of line (NEWLINE or EOF) or when trailing hashes are detected. +/// Note: NEWLINE is an explicit token (not trivia), so we check `at_inline_end()`. +fn parse_header_content(p: &mut MarkdownParser) { + // Check if there's any content (not at EOF or NEWLINE) + if p.at_inline_end() { + return; + } + + // Parse content as a paragraph containing inline items + let m = p.start(); + let inline_m = p.start(); + + loop { + if p.at(MD_HARD_LINE_LITERAL) { + // Trailing spaces before newline in ATX headings should be ignored. + p.parse_as_skipped_trivia_tokens(|p| p.bump(MD_HARD_LINE_LITERAL)); + break; + } + + // Check for end of line (EOF, NEWLINE, or preceding line break) + if p.at_inline_end() { + break; + } + + // Check if we're at trailing hashes (optional whitespace + hashes + end of line) + if at_trailing_hashes_start(p) { + // Stop content parsing - trailing hashes will be parsed separately + break; + } + + // Parse an inline element + if parse_any_inline(p).is_absent() { + break; + } + } + + inline_m.complete(p, MD_INLINE_ITEM_LIST); + m.complete(p, MD_PARAGRAPH); +} + +/// Check if the current position has a trailing hash sequence. +/// A trailing hash sequence is one or more `#` characters followed by end of line +/// (NEWLINE or EOF), and NOT preceded by a line break (which would +/// indicate a new block, not trailing hashes). +/// +/// Note: NEWLINE is an explicit token, so we check `at_inline_end()` after +/// consuming hashes to see if we've reached end of line. +fn is_trailing_hash_sequence(p: &mut MarkdownParser) -> bool { + if !p.at(T![#]) { + return false; + } + + let checkpoint = p.checkpoint(); + + while p.at(T![#]) { + p.bump(T![#]); + } + + while p.at(MD_TEXTUAL_LITERAL) { + let text = p.cur_text(); + if text.chars().all(|c| c == ' ' || c == '\t') { + p.bump(MD_TEXTUAL_LITERAL); + } else { + break; + } + } + + let at_end_of_line = p.at_inline_end(); + + p.rewind(checkpoint); + + at_end_of_line +} + +fn at_trailing_hashes_start(p: &mut MarkdownParser) -> bool { + p.lookahead(|p| { + let mut saw_ws = false; + + while p.at(MD_TEXTUAL_LITERAL) { + let text = p.cur_text(); + if text.chars().all(|c| c == ' ' || c == '\t') { + saw_ws = true; + p.bump(MD_TEXTUAL_LITERAL); + } else { + break; + } + } + + saw_ws && is_trailing_hash_sequence(p) + }) +} + +/// Parse trailing hashes for ATX headers. +/// +/// Per CommonMark spec, a closing sequence of `#` characters is optional. +/// It must be at the end of the line, preceded by optional whitespace. +fn parse_trailing_hashes(p: &mut MarkdownParser) { + let m = p.start(); + + if at_trailing_hashes_start(p) { + while p.at(MD_TEXTUAL_LITERAL) { + let text = p.cur_text(); + if text.chars().all(|c| c == ' ' || c == '\t') { + p.parse_as_skipped_trivia_tokens(|p| p.bump(MD_TEXTUAL_LITERAL)); + } else { + break; + } + } + + // Only parse hashes that are on the same line + // Stop if we hit end of line (NEWLINE, EOF, or preceding line break) + while p.at(T![#]) && !p.at_inline_end() { + let hash_m = p.start(); + p.bump(T![#]); + hash_m.complete(p, MD_HASH); + } + } + + m.complete(p, MD_HASH_LIST); +} diff --git a/crates/biome_markdown_parser/src/syntax/html_block.rs b/crates/biome_markdown_parser/src/syntax/html_block.rs new file mode 100644 index 000000000000..ce03cdbc0d04 --- /dev/null +++ b/crates/biome_markdown_parser/src/syntax/html_block.rs @@ -0,0 +1,458 @@ +//! HTML block parsing for Markdown (CommonMark §4.6). +//! +//! Per CommonMark §4.6, there are 7 types of HTML blocks: +//! +//! 1. `` +//! 3. `` +//! 4. `` +//! 5. `` +//! 6. Block-level HTML tags (div, p, table, etc.) - ends at blank line +//! 7. Other tags - ends at blank line +//! +//! HTML blocks start at the beginning of a line (possibly indented by up to 3 spaces). +//! +//! ## Recovery Limits +//! +//! To prevent unbounded consumption for malformed documents, we limit HTML block +//! parsing to a maximum number of tokens. This provides better error recovery +//! when termination markers are missing. + +use biome_markdown_syntax::MarkdownSyntaxKind::*; +use biome_parser::Parser; +use biome_parser::prelude::ParsedSyntax::{self, *}; + +use crate::MarkdownParser; +use crate::syntax::quote::{consume_quote_prefix_without_virtual, has_quote_prefix}; + +/// Maximum number of tokens to consume before giving up on finding an HTML block terminator. +/// This provides reasonable error recovery for unclosed HTML blocks while still +/// supporting large valid HTML blocks (e.g., embedded SVGs or complex tables). +const MAX_HTML_BLOCK_TOKENS: usize = 10_000; + +/// Type 1 tags that end with a specific closing tag (case-insensitive). +const TYPE1_TAGS: &[&str] = &["script", "pre", "style", "textarea"]; + +/// Type 6 tags that end at a blank line (case-insensitive). +/// These are block-level HTML elements. +const TYPE6_TAGS: &[&str] = &[ + "address", + "article", + "aside", + "base", + "basefont", + "blockquote", + "body", + "caption", + "center", + "col", + "colgroup", + "dd", + "details", + "dialog", + "dir", + "div", + "dl", + "dt", + "fieldset", + "figcaption", + "figure", + "footer", + "form", + "frame", + "frameset", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "head", + "header", + "hr", + "html", + "iframe", + "legend", + "li", + "link", + "main", + "menu", + "menuitem", + "nav", + "noframes", + "ol", + "optgroup", + "option", + "p", + "param", + "section", + "source", + "summary", + "table", + "tbody", + "td", + "template", + "tfoot", + "th", + "thead", + "title", + "tr", + "track", + "ul", +]; + +/// The type of HTML block, determining how it ends. +#[derive(Debug, Clone, Copy, PartialEq)] +enum HtmlBlockType { + /// Type 1: script, pre, style, textarea - ends at closing tag + Type1(&'static str), + /// Type 2: HTML comment - ends at `-->` + Type2, + /// Type 3: Processing instruction - ends at `?>` + Type3, + /// Type 4: Declaration - ends at `>` + Type4, + /// Type 5: CDATA - ends at `]]>` + Type5, + /// Type 6: Block-level tags - ends at blank line + Type6, + /// Type 7: Other tags - ends at blank line + Type7, +} + +/// Check if we're at the start of an HTML block. +/// +/// HTML blocks can start at the beginning of a line (up to 3 spaces indentation). +/// We detect by looking for `<` followed by appropriate patterns. +pub(crate) fn at_html_block(p: &mut MarkdownParser) -> bool { + p.lookahead(|p| { + if !p.at_line_start() && !p.at_start_of_input() { + return false; + } + // Must be at line start (with at most 3 spaces indentation) + if p.line_start_leading_indent() > 3 { + return false; + } + + p.skip_line_indent(3); + + // Must be at `<` + if !p.at(L_ANGLE) { + return false; + } + + // Look ahead to determine the type + detect_html_block_type(p).is_some() + }) +} + +/// HTML blocks of type 7 do not interrupt paragraphs per CommonMark. +pub(crate) fn at_html_block_interrupt(p: &mut MarkdownParser) -> bool { + p.lookahead(|p| { + if !at_html_block(p) { + return false; + } + !matches!(detect_html_block_type(p), Some(HtmlBlockType::Type7)) + }) +} + +/// Detect what type of HTML block this is. +fn detect_html_block_type(p: &MarkdownParser) -> Option { + let remaining = p.source_after_current(); + if !remaining.starts_with('<') { + return None; + } + + let line_end = remaining.find(['\n', '\r']).unwrap_or(remaining.len()); + let line = &remaining[..line_end]; + let after_angle = &line[1..]; + + // Type 2: HTML comment + if after_angle.starts_with("!--") { + return Some(HtmlBlockType::Type2); + } + + // Type 5: CDATA + if after_angle.starts_with("![CDATA[") { + return Some(HtmlBlockType::Type5); + } + + // Type 4: Declaration (') + || after_tag.starts_with("/>"); + + if !valid_suffix { + return None; + } + + // Type 1: script, pre, style, textarea (case-insensitive, opening tag only) + for &type1_tag in TYPE1_TAGS { + if !is_closing && tag_name.eq_ignore_ascii_case(type1_tag) { + return Some(HtmlBlockType::Type1(type1_tag)); + } + } + + // Type 6: Block-level tags (case-insensitive) + for &type6_tag in TYPE6_TAGS { + if tag_name.eq_ignore_ascii_case(type6_tag) { + return Some(HtmlBlockType::Type6); + } + } + + // Type 7: Other valid open/close tag + // Only treat as HTML block if the line contains only the tag. + if line_has_only_tag(line) { + return Some(HtmlBlockType::Type7); + } + + None +} + +/// Parse an HTML block. +/// +/// Grammar: MdHtmlBlock = content: MdInlineItemList +/// +/// The entire HTML block content is stored as raw text within an inline item list. +pub(crate) fn parse_html_block(p: &mut MarkdownParser) -> ParsedSyntax { + p.skip_line_indent(3); + + let html_block_type = match detect_html_block_type(p) { + Some(t) => t, + None => return Absent, + }; + + let m = p.start(); + let content_m = p.start(); + + // Parse content based on block type + match html_block_type { + HtmlBlockType::Type1(tag) => parse_type1_block(p, tag), + HtmlBlockType::Type2 => parse_until_sequence(p, "-->"), + HtmlBlockType::Type3 => parse_until_sequence(p, "?>"), + HtmlBlockType::Type4 => parse_until_char(p, '>'), + HtmlBlockType::Type5 => parse_until_sequence(p, "]]>"), + HtmlBlockType::Type6 | HtmlBlockType::Type7 => parse_until_blank_line(p), + } + + content_m.complete(p, MD_INLINE_ITEM_LIST); + Present(m.complete(p, MD_HTML_BLOCK)) +} + +/// Parse Type 1 HTML block (script, pre, style, textarea) until closing tag. +fn parse_type1_block(p: &mut MarkdownParser, tag: &str) { + let closing = format!(" usize { + // Single line shouldn't have many tokens, but add limit for safety + const MAX_LINE_TOKENS: usize = 1000; + let mut token_count = 0; + + while !p.at(EOF) && token_count < MAX_LINE_TOKENS { + let text_m = p.start(); + let current = p.cur(); + + // Bump the current token + p.bump_remap(MD_TEXTUAL_LITERAL); + text_m.complete(p, MD_TEXTUAL); + token_count += 1; + + // Stop after consuming a newline or if this was end of file + if current == NEWLINE { + break; + } + } + + token_count +} + +fn current_line_contains_sequence(p: &MarkdownParser, sequence: &str) -> bool { + let remaining = p.source_after_current(); + let line_end = remaining.find(['\n', '\r']).unwrap_or(remaining.len()); + remaining[..line_end].contains(sequence) +} + +fn current_line_contains_sequence_case_insensitive(p: &MarkdownParser, sequence: &str) -> bool { + let remaining = p.source_after_current(); + let line_end = remaining.find(['\n', '\r']).unwrap_or(remaining.len()); + let line = &remaining[..line_end]; + if sequence.len() > line.len() { + return false; + } + line.as_bytes() + .windows(sequence.len()) + .any(|w| w.eq_ignore_ascii_case(sequence.as_bytes())) +} + +fn current_line_contains_char(p: &MarkdownParser, target: char) -> bool { + let remaining = p.source_after_current(); + let line_end = remaining.find(['\n', '\r']).unwrap_or(remaining.len()); + remaining[..line_end].contains(target) +} + +fn line_has_only_tag(line: &str) -> bool { + if let Some(pos) = line.find('>') { + line[pos + 1..].trim().is_empty() + } else { + false + } +} + +fn skip_container_prefixes(p: &mut MarkdownParser) { + let quote_depth = p.state().block_quote_depth; + if quote_depth > 0 && has_quote_prefix(p, quote_depth) { + consume_quote_prefix_without_virtual(p, quote_depth); + p.state_mut().virtual_line_start = Some(p.cur_range().start()); + } + + let required_indent = p.state().list_item_required_indent; + if required_indent > 0 { + p.skip_line_indent(required_indent); + p.state_mut().virtual_line_start = Some(p.cur_range().start()); + } +} + +fn at_container_boundary(p: &mut MarkdownParser) -> bool { + let quote_depth = p.state().block_quote_depth; + if quote_depth > 0 && p.at_line_start() && !has_quote_prefix(p, quote_depth) { + return true; + } + + let required_indent = p.state().list_item_required_indent; + if required_indent > 0 && p.at_line_start() { + let indent = p.line_start_leading_indent(); + if indent < required_indent { + return true; + } + } + + false +} diff --git a/crates/biome_markdown_parser/src/syntax/inline.rs b/crates/biome_markdown_parser/src/syntax/inline.rs new file mode 100644 index 000000000000..1c8f140ff959 --- /dev/null +++ b/crates/biome_markdown_parser/src/syntax/inline.rs @@ -0,0 +1,1640 @@ +//! Inline element parsing for Markdown. +//! +//! Handles inline code spans, emphasis (bold/italic), links, images, line breaks, and raw HTML. +//! +//! # CommonMark Specification References +//! +//! This module implements the following CommonMark 0.31.2 sections: +//! +//! - **§6.1 Code spans**: Backtick-delimited inline code (`code`) +//! - **§6.2 Emphasis and strong emphasis**: `*italic*`, `**bold**`, `_italic_`, `__bold__` +//! - **§6.3 Links**: `[text](url)` inline links +//! - **§6.4 Autolinks (URI)**: `` +//! - **§6.5 Autolinks (email)**: `` +//! - **§6.6 Hard line breaks**: Trailing spaces or backslash before newline +//! - **§6.7 Soft line breaks**: Single newline within paragraph +//! - **§6.8 Raw HTML**: ``, ``, ``, ``, ``, `` +//! +//! # Emphasis Algorithm (§6.4) +//! +//! This module implements the CommonMark delimiter stack algorithm for emphasis: +//! +//! 1. **First pass**: Collect delimiter runs from the inline content +//! 2. **Second pass**: Match openers and closers using the delimiter stack algorithm +//! 3. **Rule of 3**: If (opener_count + closer_count) % 3 == 0 and both can open/close, +//! skip the match unless both counts are divisible by 3 +//! +//! # Emphasis Flanking Rules (§6.2) +//! +//! A delimiter run is **left-flanking** if: +//! 1. Not followed by Unicode whitespace, AND +//! 2. Not followed by punctuation, OR preceded by whitespace/punctuation +//! +//! A delimiter run is **right-flanking** if: +//! 1. Not preceded by Unicode whitespace, AND +//! 2. Not preceded by punctuation, OR followed by whitespace/punctuation +//! +//! Underscore (`_`) has additional intraword restrictions (§6.2 rules 2, 5, 7, 8). + +use biome_markdown_syntax::MarkdownSyntaxKind; +use biome_markdown_syntax::T; +use biome_markdown_syntax::kind::MarkdownSyntaxKind::*; +use biome_parser::Parser; +use biome_parser::prelude::ParsedSyntax::{self, *}; + +use biome_rowan::{TextRange, TextSize}; + +use crate::MarkdownParser; +use crate::link_reference::normalize_reference_label; + +// ============================================================================ +// Delimiter Stack Types for Emphasis Parsing +// ============================================================================ + +/// Kind of emphasis delimiter (* or _) +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum DelimKind { + Star, + Underscore, +} + +/// A delimiter run collected during the first pass +#[derive(Debug, Clone)] +struct DelimRun { + /// The delimiter character kind + kind: DelimKind, + /// Number of delimiter characters in this run + count: usize, + /// Whether this can open emphasis (left-flanking) + can_open: bool, + /// Whether this can close emphasis (right-flanking) + can_close: bool, + /// Byte offset in the source where this run starts + start_offset: usize, +} + +/// A matched emphasis span (opener + closer) +#[derive(Debug, Clone)] +struct EmphasisMatch { + /// Byte offset where the opener delimiter starts + opener_start: usize, + /// Byte offset where the closer delimiter starts + closer_start: usize, + /// Whether this is strong (2 chars) or regular (1 char) emphasis + is_strong: bool, +} + +/// Check if a character is Unicode whitespace for flanking rules. +fn is_whitespace(c: char) -> bool { + c.is_whitespace() +} + +/// Check if a character is Unicode punctuation for flanking rules. +/// Per CommonMark spec, this includes ASCII punctuation and Unicode punctuation categories. +fn is_punctuation(c: char) -> bool { + // ASCII punctuation + Unicode punctuation categories + matches!( + c, + '!' | '"' + | '#' + | '$' + | '%' + | '&' + | '\'' + | '(' + | ')' + | '*' + | '+' + | ',' + | '-' + | '.' + | '/' + | ':' + | ';' + | '<' + | '=' + | '>' + | '?' + | '@' + | '[' + | '\\' + | ']' + | '^' + | '_' + | '`' + | '{' + | '|' + | '}' + | '~' + ) || c.is_ascii_punctuation() + || matches!(c, '\u{2000}'..='\u{206F}' | '\u{2E00}'..='\u{2E7F}') +} + +/// Check if an opening delimiter is left-flanking per CommonMark rules. +/// A left-flanking delimiter run is one that is: +/// - Not followed by Unicode whitespace, AND +/// - Either (a) not followed by punctuation, OR (b) preceded by whitespace/punctuation +fn is_left_flanking_delimiter(char_after: Option, char_before: Option) -> bool { + match char_after { + None => false, // At end of input, can't be left-flanking + Some(c) if is_whitespace(c) => false, // Followed by whitespace + Some(c) if is_punctuation(c) => { + // Followed by punctuation - only left-flanking if preceded by whitespace or punctuation + match char_before { + None => true, // Start of input counts as whitespace + Some(b) => is_whitespace(b) || is_punctuation(b), + } + } + Some(_) => true, // Not followed by whitespace or punctuation = left-flanking + } +} + +/// Check if a closing delimiter is right-flanking per CommonMark rules. +/// A right-flanking delimiter run is one that is: +/// - Not preceded by Unicode whitespace, AND +/// - Either (a) not preceded by punctuation, OR (b) followed by whitespace/punctuation +fn is_right_flanking_delimiter(char_before: Option, char_after: Option) -> bool { + match char_before { + None => false, // At start of input, can't be right-flanking + Some(c) if is_whitespace(c) => false, // Preceded by whitespace + Some(c) if is_punctuation(c) => { + // Preceded by punctuation - only right-flanking if followed by whitespace or punctuation + match char_after { + None => true, // End of input counts as whitespace + Some(a) => is_whitespace(a) || is_punctuation(a), + } + } + Some(_) => true, // Not preceded by whitespace or punctuation = right-flanking + } +} + +/// Check if underscore can open emphasis (stricter rules than asterisk). +/// Per CommonMark 6.2, underscore can open emphasis iff it is left-flanking AND either: +/// - Not part of a right-flanking delimiter run, OR +/// - Preceded by a punctuation character +fn can_underscore_open(char_before: Option, char_after: Option) -> bool { + // Must be left-flanking + if !is_left_flanking_delimiter(char_after, char_before) { + return false; + } + // If also right-flanking, must be preceded by punctuation + if is_right_flanking_delimiter(char_before, char_after) { + return matches!(char_before, Some(c) if is_punctuation(c)); + } + true +} + +/// Check if underscore can close emphasis (stricter rules than asterisk). +/// Per CommonMark 6.2, underscore can close emphasis iff it is right-flanking AND either: +/// - Not part of a left-flanking delimiter run, OR +/// - Followed by a punctuation character +fn can_underscore_close(char_before: Option, char_after: Option) -> bool { + // Must be right-flanking + if !is_right_flanking_delimiter(char_before, char_after) { + return false; + } + // If also left-flanking, must be followed by punctuation + if is_left_flanking_delimiter(char_after, char_before) { + return matches!(char_after, Some(c) if is_punctuation(c)); + } + true +} + +// ============================================================================ +// Delimiter Stack Algorithm Implementation +// ============================================================================ + +/// Collect all delimiter runs from source text. +/// +/// This is the first pass of the CommonMark emphasis algorithm. It scans +/// the source text and identifies all potential delimiter runs (sequences +/// of `*` or `_`), computing their flanking status. +fn collect_delimiter_runs(source: &str) -> Vec { + let mut runs = Vec::new(); + let bytes = source.as_bytes(); + let mut i = 0; + + while i < bytes.len() { + let b = bytes[i]; + + // Check for delimiter characters + if b == b'*' || b == b'_' { + let kind = if b == b'*' { + DelimKind::Star + } else { + DelimKind::Underscore + }; + let start_offset = i; + + // Count consecutive delimiter characters + let mut count = 1; + while i + count < bytes.len() && bytes[i + count] == b { + count += 1; + } + let end_offset = i + count; + + // Get character before delimiter run + let char_before = if start_offset > 0 { + // Get the char ending at start_offset + let before_slice = &source[..start_offset]; + before_slice.chars().next_back() + } else { + None + }; + + // Get character after delimiter run + let char_after = source[end_offset..].chars().next(); + + // Compute flanking status + let (can_open, can_close) = if kind == DelimKind::Underscore { + ( + can_underscore_open(char_before, char_after), + can_underscore_close(char_before, char_after), + ) + } else { + // Asterisk: can open if left-flanking, can close if right-flanking + ( + is_left_flanking_delimiter(char_after, char_before), + is_right_flanking_delimiter(char_before, char_after), + ) + }; + + runs.push(DelimRun { + kind, + count, + can_open, + can_close, + start_offset, + }); + + i = end_offset; + } else if b == b'`' { + // Skip code spans - they block emphasis + let backtick_count = { + let mut c = 1; + while i + c < bytes.len() && bytes[i + c] == b'`' { + c += 1; + } + c + }; + i += backtick_count; + + // Find closing backticks + while i < bytes.len() { + if bytes[i] == b'`' { + let close_count = { + let mut c = 1; + while i + c < bytes.len() && bytes[i + c] == b'`' { + c += 1; + } + c + }; + i += close_count; + if close_count == backtick_count { + break; + } + } else { + i += 1; + } + } + } else if b == b'<' { + // Skip potential HTML tags and autolinks + i += 1; + while i < bytes.len() && bytes[i] != b'>' && bytes[i] != b'\n' { + i += 1; + } + if i < bytes.len() && bytes[i] == b'>' { + i += 1; + } + } else if b == b'\\' && i + 1 < bytes.len() { + // Skip escaped characters + i += 2; + } else { + i += 1; + } + } + + runs +} + +/// Match delimiter runs using the CommonMark algorithm. +/// +/// This is the second pass. It processes closers from left to right, +/// searching backward for matching openers. Returns a list of matched +/// emphasis spans sorted by opener position. +fn match_delimiters(runs: &mut [DelimRun]) -> Vec { + let mut matches = Vec::new(); + let mut opener_stack: Vec = Vec::new(); + + for idx in 0..runs.len() { + if runs[idx].can_close && runs[idx].count > 0 { + loop { + let mut opener_stack_pos = None; + let prefer_strong = runs[idx].count >= 2; + + for pass in 0..2 { + for (pos, &opener_idx) in opener_stack.iter().enumerate().rev() { + let opener = &runs[opener_idx]; + let closer = &runs[idx]; + + if opener.kind != closer.kind || !opener.can_open || opener.count == 0 { + continue; + } + + if prefer_strong && pass == 0 && opener.count < 2 { + continue; + } + + // Rule of 3: if (opener_count + closer_count) % 3 == 0 and + // the closer can open or the opener can close, skip unless + // both counts are divisible by 3 + let opener_count = opener.count; + let closer_count = closer.count; + if ((opener.can_open && opener.can_close) + || (closer.can_open && closer.can_close)) + && (opener_count + closer_count).is_multiple_of(3) + && (!opener_count.is_multiple_of(3) || !closer_count.is_multiple_of(3)) + { + continue; + } + + opener_stack_pos = Some(pos); + break; + } + + if opener_stack_pos.is_some() { + break; + } + } + + let Some(pos) = opener_stack_pos else { break }; + let opener_idx = opener_stack[pos]; + let use_count = if runs[opener_idx].count >= 2 && runs[idx].count >= 2 { + 2 + } else { + 1 + }; + + let opener_start = runs[opener_idx].start_offset; + let closer_start = runs[idx].start_offset; + + matches.push(EmphasisMatch { + opener_start, + closer_start, + is_strong: use_count == 2, + }); + + runs[opener_idx].count -= use_count; + runs[opener_idx].start_offset += use_count; + runs[idx].count -= use_count; + runs[idx].start_offset += use_count; + + // Remove openers between the matched opener and this closer. + opener_stack.truncate(pos + 1); + if runs[opener_idx].count == 0 { + opener_stack.pop(); + } + + if use_count == 2 && runs[opener_idx].count > 0 && runs[idx].count > 0 { + // Avoid crossing matches from odd-length runs (e.g. ***foo***). + break; + } + + if runs[idx].count == 0 { + break; + } + } + } + + if runs[idx].can_open && runs[idx].count > 0 { + opener_stack.push(idx); + } + } + + // Sort matches by opener position for nested processing + matches.sort_by_key(|m| m.opener_start); + + matches +} + +/// Context for emphasis-aware inline parsing +#[derive(Debug)] +pub(crate) struct EmphasisContext { + /// Matched emphasis spans, sorted by opener_start + matches: Vec, + /// Base offset of the inline content in the source + base_offset: usize, +} + +impl EmphasisContext { + /// Create a new emphasis context by analyzing the source text + pub(crate) fn new(source: &str, base_offset: usize) -> Self { + let mut runs = collect_delimiter_runs(source); + let matches = match_delimiters(&mut runs); + Self { + matches, + base_offset, + } + } + + /// Check if there's an emphasis opener at the given offset + fn opener_at(&self, offset: usize) -> Option<&EmphasisMatch> { + let abs_offset = offset; + self.matches + .iter() + .find(|m| m.opener_start + self.base_offset == abs_offset) + } +} + +/// Parse a hard line break. +/// +/// Grammar: MdHardLine = value: 'md_hard_line_literal' +/// +/// A hard line break is created by either: +/// - Two or more trailing spaces followed by a newline +/// - A backslash followed by a newline +pub(crate) fn parse_hard_line(p: &mut MarkdownParser) -> ParsedSyntax { + if !p.at(MD_HARD_LINE_LITERAL) { + return Absent; + } + + let m = p.start(); + p.bump(MD_HARD_LINE_LITERAL); + Present(m.complete(p, MD_HARD_LINE)) +} + +/// Parse inline code span (`` `code` `` or ``` `` `code` `` ```). +/// +/// Grammar: MdInlineCode = l_tick: '`' content: MdInlineItemList r_tick: '`' +/// +/// Per CommonMark, code spans can use multiple backticks to allow literal +/// backticks inside: ``` `` `code` `` ``` wraps around code containing backticks. +/// The opening and closing backtick strings must be the same length. +pub(crate) fn parse_inline_code(p: &mut MarkdownParser) -> ParsedSyntax { + if !p.at(BACKTICK) { + return Absent; + } + + let m = p.start(); + + // Count opening backticks from token text + let opening_count = p.cur_text().len(); + let opening_range = p.cur_range(); + + // Opening backtick(s) + p.bump(BACKTICK); + + // Content - parse until we find a BACKTICK with matching count, or EOF + let content = p.start(); + let mut found_closing = false; + loop { + if p.at_inline_end() { + break; + } + + // Check for matching closing backticks + if p.at(BACKTICK) { + let closing_count = p.cur_text().len(); + if closing_count == opening_count { + // Found matching closing backticks + found_closing = true; + break; + } + // Not matching - consume as content + let text_m = p.start(); + p.bump_remap(MD_TEXTUAL_LITERAL); + text_m.complete(p, MD_TEXTUAL); + continue; + } + + // Regular content + let text_m = p.start(); + p.bump_remap(MD_TEXTUAL_LITERAL); + text_m.complete(p, MD_TEXTUAL); + } + content.complete(p, MD_INLINE_ITEM_LIST); + + // Closing backtick(s) - emit custom diagnostic if missing + if found_closing { + p.bump(BACKTICK); + } else { + p.error(super::parse_error::unclosed_code_span( + p, + opening_range, + opening_count, + )); + } + + Present(m.complete(p, MD_INLINE_CODE)) +} + +/// Parse emphasis using the delimiter stack matches. +fn parse_emphasis_from_context(p: &mut MarkdownParser, expect_strong: bool) -> ParsedSyntax { + let context = match p.emphasis_context() { + Some(context) => context, + None => return Absent, + }; + + let offset = u32::from(p.cur_range().start()) as usize; + let matched = match context.opener_at(offset) { + Some(matched) => matched, + None => return Absent, + }; + + if matched.is_strong != expect_strong { + return Absent; + } + + let (opener_kind, closer_kind, opener_text) = if expect_strong { + if p.at(DOUBLE_STAR) { + (DOUBLE_STAR, DOUBLE_STAR, "**") + } else if p.at(DOUBLE_UNDERSCORE) { + (DOUBLE_UNDERSCORE, DOUBLE_UNDERSCORE, "__") + } else { + return Absent; + } + } else if p.at(T![*]) { + (T![*], T![*], "*") + } else if p.at(UNDERSCORE) { + (UNDERSCORE, UNDERSCORE, "_") + } else { + return Absent; + }; + + let closer_offset = matched.closer_start + context.base_offset; + let m = p.start(); + let opening_range = p.cur_range(); + + p.bump(opener_kind); + + let content = p.start(); + loop { + if p.at_inline_end() { + break; + } + + let current_offset = u32::from(p.cur_range().start()) as usize; + if current_offset == closer_offset { + break; + } + + if parse_any_inline(p).is_absent() { + break; + } + } + content.complete(p, MD_INLINE_ITEM_LIST); + + if p.at(closer_kind) && u32::from(p.cur_range().start()) as usize == closer_offset { + p.bump(closer_kind); + } else { + p.error(super::parse_error::unclosed_emphasis( + p, + opening_range, + opener_text, + )); + } + + if expect_strong { + Present(m.complete(p, MD_INLINE_EMPHASIS)) + } else { + Present(m.complete(p, MD_INLINE_ITALIC)) + } +} + +/// Parse inline emphasis (bold: `**text**` or `__text__`). +pub(crate) fn parse_inline_emphasis(p: &mut MarkdownParser) -> ParsedSyntax { + parse_emphasis_from_context(p, true) +} + +/// Parse inline italic (`*text*` or `_text_`). +pub(crate) fn parse_inline_italic(p: &mut MarkdownParser) -> ParsedSyntax { + parse_emphasis_from_context(p, false) +} + +fn parse_inline_item_list_until_no_links(p: &mut MarkdownParser, stop: MarkdownSyntaxKind) { + let m = p.start(); + let prev_context = set_inline_emphasis_context_until(p, stop); + + loop { + if p.at(stop) || p.at_inline_end() { + break; + } + + if parse_any_inline_no_links(p).is_absent() { + break; + } + } + + m.complete(p, MD_INLINE_ITEM_LIST); + p.set_emphasis_context(prev_context); +} + +fn parse_any_inline_no_links(p: &mut MarkdownParser) -> ParsedSyntax { + if (p.at(BANG) && p.nth_at(1, L_BRACK)) || p.at(L_BRACK) { + return super::parse_textual(p); + } + + parse_any_inline(p) +} + +fn set_inline_emphasis_context_until( + p: &mut MarkdownParser, + stop: MarkdownSyntaxKind, +) -> Option { + let source_len = inline_list_source_len_until(p, stop); + let source = p.source_after_current(); + let inline_source = if source_len <= source.len() { + &source[..source_len] + } else { + source + }; + let base_offset = u32::from(p.cur_range().start()) as usize; + let context = EmphasisContext::new(inline_source, base_offset); + p.set_emphasis_context(Some(context)) +} + +fn inline_list_source_len_until(p: &mut MarkdownParser, stop: MarkdownSyntaxKind) -> usize { + p.lookahead(|p| { + let mut len = 0usize; + + loop { + if p.at(T![EOF]) || p.at(stop) || p.at_inline_end() { + break; + } + + len += p.cur_text().len(); + p.bump(p.cur()); + } + + len + }) +} + +/// Parse link starting with `[` - dispatches to inline link or reference link. +/// +/// After parsing `[text]`: +/// - If followed by `(` → inline link `[text](url)` +/// - If followed by `[` → reference link `[text][label]` or `[text][]` +/// - Otherwise → shortcut reference `[text]` +pub(crate) fn parse_link_or_reference(p: &mut MarkdownParser) -> ParsedSyntax { + parse_link_or_image(p, LinkParseKind::Link) +} + +/// Parse reference link label `[label]` or `[]`. +/// +/// Grammar: `MdReferenceLinkLabel = '[' label: MdInlineItemList ']'` +/// +/// Returns Present if `[` and `]` are found (even if empty for collapsed reference). +/// On failure (missing `]`), rewinds to the checkpoint so no tokens are consumed. +fn parse_reference_label(p: &mut MarkdownParser) -> ParsedSyntax { + if !p.at(L_BRACK) { + return Absent; + } + + // Checkpoint so we can rewind if ] is missing + let checkpoint = p.checkpoint(); + let m = p.start(); + + // [ + p.bump(L_BRACK); + + // Label content (may be empty for collapsed reference) + let label = p.start(); + while !p.at(R_BRACK) && !p.at_inline_end() { + let text_m = p.start(); + p.bump_remap(MD_TEXTUAL_LITERAL); + text_m.complete(p, MD_TEXTUAL); + } + label.complete(p, MD_INLINE_ITEM_LIST); + + // ] + if !p.eat(R_BRACK) { + // Missing closing bracket - abandon and rewind to not consume tokens + m.abandon(p); + p.rewind(checkpoint); + return Absent; + } + + Present(m.complete(p, MD_REFERENCE_LINK_LABEL)) +} + +/// Parse inline link (`[text](url)`). +/// +/// Grammar: `MdInlineLink = '[' text: MdInlineItemList ']' '(' source: MdInlineItemList ')'` +/// +/// Note: This is kept for backwards compatibility but `parse_link_or_reference` +/// is the preferred entry point for link parsing. +pub(crate) fn parse_inline_link(p: &mut MarkdownParser) -> ParsedSyntax { + parse_link_or_reference(p) +} + +/// Parse image starting with `![` - dispatches to inline image or reference image. +/// +/// After parsing `![alt]`: +/// - If followed by `(` → inline image `![alt](url)` +/// - If followed by `[` → reference image `![alt][label]` or `![alt][]` +/// - Otherwise → shortcut reference image `![alt]` +pub(crate) fn parse_image_or_reference(p: &mut MarkdownParser) -> ParsedSyntax { + parse_link_or_image(p, LinkParseKind::Image) +} + +#[derive(Copy, Clone)] +enum LinkParseKind { + Link, + Image, +} + +impl LinkParseKind { + fn starts_here(self, p: &mut MarkdownParser) -> bool { + match self { + Self::Link => p.at(L_BRACK), + Self::Image => p.at(BANG) && p.nth_at(1, L_BRACK), + } + } + + fn bump_opening(self, p: &mut MarkdownParser) { + if matches!(self, Self::Image) { + p.bump(BANG); + } + p.bump(L_BRACK); + } + + fn lookahead_reference(self, p: &mut MarkdownParser) -> Option { + match self { + Self::Link => lookahead_reference_link(p), + Self::Image => lookahead_reference_image(p), + } + } + + fn inline_kind(self) -> MarkdownSyntaxKind { + match self { + Self::Link => MD_INLINE_LINK, + Self::Image => MD_INLINE_IMAGE, + } + } + + fn reference_kind(self) -> MarkdownSyntaxKind { + match self { + Self::Link => MD_REFERENCE_LINK, + Self::Image => MD_REFERENCE_IMAGE, + } + } + + fn report_unclosed_text(self, p: &mut MarkdownParser, opening_range: TextRange) { + match self { + Self::Link => p.error(super::parse_error::unclosed_link( + p, + opening_range, + "expected `]` to close link text", + )), + Self::Image => p.error(super::parse_error::unclosed_image( + p, + opening_range, + "expected `]` to close alt text", + )), + } + } + + fn report_unclosed_destination(self, p: &mut MarkdownParser, opening_range: TextRange) { + match self { + Self::Link => p.error(super::parse_error::unclosed_link( + p, + opening_range, + "expected `)` to close URL", + )), + Self::Image => p.error(super::parse_error::unclosed_image( + p, + opening_range, + "expected `)` to close image URL", + )), + } + } +} + +fn parse_link_or_image(p: &mut MarkdownParser, kind: LinkParseKind) -> ParsedSyntax { + if !kind.starts_here(p) { + return Absent; + } + + let checkpoint = p.checkpoint(); + let m = p.start(); + let opening_range = p.cur_range(); + let reference = kind.lookahead_reference(p); + // Clear any cached lookahead tokens before switching lexing context. + p.reset_lookahead(); + + kind.bump_opening(p); + + // Link text / alt text + parse_inline_item_list_until_no_links(p, R_BRACK); + + // ] - if missing at inline end, emit diagnostic; otherwise rewind + if !p.eat(R_BRACK) { + if p.at_inline_end() { + // Unclosed link/image at end of inline content - emit diagnostic + kind.report_unclosed_text(p, opening_range); + // Return as reference link/image (shortcut) with missing closing bracket + return Present(m.complete(p, kind.reference_kind())); + } + // Not at inline end but missing ] - rewind and treat as text + m.abandon(p); + p.rewind(checkpoint); + return Absent; + } + + // Now decide based on what follows ] + if p.at(L_PAREN) { + // Inline link/image: [text](url) or ![alt](url) + // Bump past ( and lex the following tokens in LinkDefinition context + // so whitespace separates destination and title. + p.expect_with_context(L_PAREN, crate::lexer::MarkdownLexContext::LinkDefinition); + + let destination = p.start(); + parse_inline_link_destination_tokens(p); + let has_title = inline_title_starts_after_whitespace_tokens(p); + while is_whitespace_token(p) { + bump_textual_link_def(p); + } + destination.complete(p, MD_INLINE_ITEM_LIST); + + if has_title { + let title_m = p.start(); + let list_m = p.start(); + parse_title_content(p, get_title_close_char(p)); + list_m.complete(p, MD_INLINE_ITEM_LIST); + title_m.complete(p, MD_LINK_TITLE); + } + + if !p.eat(R_PAREN) { + kind.report_unclosed_destination(p, opening_range); + } + + Present(m.complete(p, kind.inline_kind())) + } else if p.at(L_BRACK) { + // Reference link/image: [text][label] or [text][] + let label = parse_reference_label(p); + let reference = reference.filter(|reference| { + if label.is_absent() { + reference.is_shortcut + } else { + true + } + }); + + if let Some(reference) = reference + && !reference.is_defined(p) + { + m.abandon(p); + p.rewind(checkpoint); + return consume_textual_until_offset(p, reference.end_offset); + } + + Present(m.complete(p, kind.reference_kind())) + } else { + // Shortcut reference: [text] or ![alt] + // No label part - the text/alt IS the label for resolution + if let Some(reference) = reference + && reference.is_shortcut + && !reference.is_defined(p) + { + m.abandon(p); + p.rewind(checkpoint); + return consume_textual_until_offset(p, reference.end_offset); + } + Present(m.complete(p, kind.reference_kind())) + } +} + +struct ReferenceLinkLookahead { + end_offset: TextSize, + label_raw: String, + is_shortcut: bool, +} + +impl ReferenceLinkLookahead { + fn is_defined(&self, p: &MarkdownParser) -> bool { + let normalized = normalize_reference_label(&self.label_raw); + p.has_link_reference_definition(&normalized) + } +} + +fn lookahead_reference_link(p: &mut MarkdownParser) -> Option { + lookahead_reference_common(p, false) +} + +fn lookahead_reference_image(p: &mut MarkdownParser) -> Option { + lookahead_reference_common(p, true) +} + +fn lookahead_reference_common( + p: &mut MarkdownParser, + is_image: bool, +) -> Option { + p.lookahead(|p| { + if is_image { + if !p.at(BANG) || !p.nth_at(1, L_BRACK) { + return None; + } + p.bump(BANG); + } + + if !p.at(L_BRACK) { + return None; + } + + p.bump(L_BRACK); + + let link_text = collect_bracket_text(p)?; + let end_offset = p.cur_range().end(); + p.bump(R_BRACK); + + if p.at(L_PAREN) { + return None; + } + + if p.at(L_BRACK) { + p.bump(L_BRACK); + let label_text = collect_bracket_text(p); + if let Some(label_text) = label_text { + let label = if label_text.is_empty() { + link_text.clone() + } else { + label_text + }; + let end_offset = p.cur_range().end(); + p.bump(R_BRACK); + return Some(ReferenceLinkLookahead { + end_offset, + label_raw: label, + is_shortcut: false, + }); + } + } + + Some(ReferenceLinkLookahead { + end_offset, + label_raw: link_text, + is_shortcut: true, + }) + }) +} + +fn collect_bracket_text(p: &mut MarkdownParser) -> Option { + let mut text = String::new(); + loop { + if p.at(T![EOF]) || p.at_inline_end() { + return None; + } + + if p.at(R_BRACK) { + return Some(text); + } + + text.push_str(p.cur_text()); + p.bump(p.cur()); + } +} + +fn consume_textual_until_offset(p: &mut MarkdownParser, end_offset: TextSize) -> ParsedSyntax { + let mut last = Absent; + + while !p.at(T![EOF]) { + let end = p.cur_range().end(); + last = super::parse_textual(p); + if end >= end_offset { + break; + } + } + + last +} + +fn bump_textual_link_def(p: &mut MarkdownParser) { + use crate::lexer::MarkdownLexContext; + + let item = p.start(); + p.bump_remap_with_context(MD_TEXTUAL_LITERAL, MarkdownLexContext::LinkDefinition); + item.complete(p, MD_TEXTUAL); +} +fn is_whitespace_token(p: &MarkdownParser) -> bool { + let text = p.cur_text(); + !text.is_empty() && text.chars().all(|c| c == ' ' || c == '\t') +} + +fn inline_title_starts_after_whitespace_tokens(p: &mut MarkdownParser) -> bool { + p.lookahead(|p| { + while is_whitespace_token(p) { + bump_textual_link_def(p); + } + get_title_close_char(p).is_some() + }) +} + +fn parse_inline_link_destination_tokens(p: &mut MarkdownParser) { + p.re_lex_link_definition(); + + if p.at(L_ANGLE) { + bump_textual_link_def(p); + while !p.at(EOF) && !p.at(NEWLINE) { + if p.at(R_ANGLE) { + bump_textual_link_def(p); + break; + } + if is_whitespace_token(p) { + break; + } + bump_textual_link_def(p); + } + return; + } + + let mut paren_depth: i32 = 0; + while !p.at(EOF) && !p.at(NEWLINE) { + if is_whitespace_token(p) { + break; + } + + if p.at(L_PAREN) { + paren_depth += 1; + } else if p.at(R_PAREN) { + if paren_depth == 0 { + break; + } + paren_depth -= 1; + } + + bump_textual_link_def(p); + } +} + +fn get_title_close_char(p: &MarkdownParser) -> Option { + let text = p.cur_text(); + if text.starts_with('"') { + Some('"') + } else if text.starts_with('\'') { + Some('\'') + } else if p.at(L_PAREN) { + Some(')') + } else { + None + } +} + +fn parse_title_content(p: &mut MarkdownParser, close_char: Option) { + let Some(close_char) = close_char else { + return; + }; + + let text = p.cur_text(); + let is_complete = text.len() >= 2 + && ((close_char == ')' && text.ends_with(')')) + || (close_char != ')' && text.ends_with(close_char))); + + bump_textual_link_def(p); + if is_complete { + return; + } + + loop { + if p.at(EOF) || p.at(NEWLINE) { + return; + } + + let text = p.cur_text(); + if text.ends_with(close_char) { + bump_textual_link_def(p); + return; + } + + bump_textual_link_def(p); + } +} + +/// Parse inline image (`![alt](url)`). +/// +/// Grammar: `MdInlineImage = '!' '[' alt: MdInlineItemList ']' '(' source: MdInlineItemList ')'` +/// +/// Note: This is kept for backwards compatibility but `parse_image_or_reference` +/// is the preferred entry point for image parsing. +pub(crate) fn parse_inline_image(p: &mut MarkdownParser) -> ParsedSyntax { + parse_image_or_reference(p) +} + +/// Check if text starting with `<` is valid inline HTML per CommonMark §6.8. +/// Returns the length of the HTML element if valid, None otherwise. +/// +/// Valid patterns: +/// - Open tags: ``, ``, `` +/// - Close tags: `` +/// - Comments: `` +/// - Processing instructions: `` +/// - Declarations: `` +/// - CDATA: `` +fn is_inline_html(text: &str) -> Option { + let bytes = text.as_bytes(); + if bytes.len() < 2 || bytes[0] != b'<' { + return None; + } + + // HTML comment: + if bytes.starts_with(b" + if let Some(pos) = text[4..].find("-->") { + let body = &text[4..4 + pos]; + // CommonMark: comment cannot start with '>' or '->', and must not contain "--" + if body.starts_with('>') || body.starts_with("->") || body.contains("--") { + return None; + } + return Some(4 + pos + 3); + } + return None; + } + + // Processing instruction: + if bytes.len() >= 2 && bytes[1] == b'?' { + // Find closing ?> + if let Some(pos) = text[2..].find("?>") { + return Some(2 + pos + 2); + } + return None; + } + + // CDATA section: + if bytes.starts_with(b" + if let Some(pos) = text[9..].find("]]>") { + return Some(9 + pos + 3); + } + return None; + } + + // Declaration: + // e.g., + if bytes.len() >= 3 && bytes[1] == b'!' && bytes[2].is_ascii_alphabetic() { + // Find closing > + if let Some(pos) = text[2..].find('>') { + return Some(2 + pos + 1); + } + return None; + } + + // Close tag: + if bytes.len() >= 4 && bytes[1] == b'/' { + if !bytes[2].is_ascii_alphabetic() { + return None; + } + // Tag name: [A-Za-z][A-Za-z0-9-]* + let mut i = 3; + while i < bytes.len() && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'-') { + i += 1; + } + // Skip optional whitespace + while i < bytes.len() + && (bytes[i] == b' ' + || bytes[i] == b'\t' + || bytes[i] == b'\n' + || bytes[i] == b'\r' + || bytes[i] == b'\x0c') + { + i += 1; + } + // Must end with > + if i < bytes.len() && bytes[i] == b'>' { + return Some(i + 1); + } + return None; + } + + // Open tag: or + // Defensive bounds check - should be guaranteed by earlier len check but be explicit + if bytes.len() < 2 || !bytes[1].is_ascii_alphabetic() { + return None; + } + + // Tag name: [A-Za-z][A-Za-z0-9-]* + // Note: tag names cannot contain `.` (so is NOT a valid tag) + let mut i = 2; + while i < bytes.len() && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'-') { + i += 1; + } + + // After tag name, must have valid boundary: whitespace, >, or / + // This prevents from being treated as HTML + if i >= bytes.len() { + return None; + } + let boundary = bytes[i]; + if boundary != b' ' + && boundary != b'\t' + && boundary != b'\n' + && boundary != b'\r' + && boundary != b'\x0c' + && boundary != b'>' + && boundary != b'/' + { + return None; + } + + // Handle immediate close or self-close + if boundary == b'>' { + return Some(i + 1); + } + if boundary == b'/' { + if i + 1 < bytes.len() && bytes[i + 1] == b'>' { + return Some(i + 2); + } + return None; + } + + // Has attributes - validate per CommonMark §6.8 + + let skip_spaces = |i: &mut usize| -> Option { + let mut skipped = false; + while *i < bytes.len() { + match bytes[*i] { + b' ' | b'\t' | b'\n' | b'\r' | b'\x0c' => { + skipped = true; + *i += 1; + } + _ => break, + } + } + Some(skipped) + }; + + let is_attr_name_start = |b: u8| b.is_ascii_alphabetic() || b == b'_' || b == b':'; + let is_attr_name_continue = + |b: u8| b.is_ascii_alphanumeric() || b == b'_' || b == b':' || b == b'.' || b == b'-'; + + loop { + let had_space = skip_spaces(&mut i)?; + if i >= bytes.len() { + return None; + } + + // End or self-close + if bytes[i] == b'>' { + return Some(i + 1); + } + if bytes[i] == b'/' { + if i + 1 < bytes.len() && bytes[i + 1] == b'>' { + return Some(i + 2); + } + return None; + } + + // Attributes must be separated by whitespace + if !had_space { + return None; + } + + // Parse attribute name + if !is_attr_name_start(bytes[i]) { + return None; + } + i += 1; + while i < bytes.len() && is_attr_name_continue(bytes[i]) { + i += 1; + } + + // Optional whitespace and value + skip_spaces(&mut i)?; + if i < bytes.len() && bytes[i] == b'=' { + i += 1; + skip_spaces(&mut i)?; + if i >= bytes.len() { + return None; + } + + match bytes[i] { + b'"' => { + i += 1; + while i < bytes.len() && bytes[i] != b'"' { + i += 1; + } + if i >= bytes.len() { + return None; + } + i += 1; + } + b'\'' => { + i += 1; + while i < bytes.len() && bytes[i] != b'\'' { + i += 1; + } + if i >= bytes.len() { + return None; + } + i += 1; + } + _ => { + let start = i; + while i < bytes.len() { + let b = bytes[i]; + if b <= b' ' + || b == b'"' + || b == b'\'' + || b == b'=' + || b == b'<' + || b == b'>' + || b == b'`' + { + break; + } + i += 1; + } + if i == start { + return None; + } + } + } + } + } +} + +/// Parse entity or numeric character reference per CommonMark §6.2. +/// +/// Grammar: MdEntityReference = value: 'md_entity_literal' +/// +/// Valid patterns: +/// - Named entity: `&name;` where name is 2-31 alphanumeric chars starting with letter +/// - Decimal numeric: `&#digits;` where digits is 1-7 decimal digits +/// - Hexadecimal: `&#xhex;` or `&#Xhex;` where hex is 1-6 hex digits +/// +/// The lexer has already validated and tokenized valid entity references as +/// MD_ENTITY_LITERAL tokens. Invalid patterns remain as textual. +pub(crate) fn parse_entity_reference(p: &mut MarkdownParser) -> ParsedSyntax { + if !p.at(MD_ENTITY_LITERAL) { + return Absent; + } + + let m = p.start(); + p.bump(MD_ENTITY_LITERAL); + Present(m.complete(p, MD_ENTITY_REFERENCE)) +} + +/// Parse raw inline HTML per CommonMark §6.8. +/// +/// Grammar: MdInlineHtml = value: MdInlineItemList +/// +/// Includes: open tags, close tags, comments, processing instructions, +/// declarations, and CDATA sections. +pub(crate) fn parse_inline_html(p: &mut MarkdownParser) -> ParsedSyntax { + if !p.at(L_ANGLE) { + return Absent; + } + + // Get the source text starting from current position + let source = p.source_after_current(); + + // Check if this is valid inline HTML + let html_len = match is_inline_html(source) { + Some(len) => len, + None => return Absent, + }; + + // Valid inline HTML - create the node + // Use checkpoint so we can rewind if token boundaries don't align + let checkpoint = p.checkpoint(); + let m = p.start(); + + // Create content as inline item list containing textual nodes + let content = p.start(); + + // Track remaining bytes to consume + let mut remaining = html_len; + + while remaining > 0 && !p.at(T![EOF]) { + let token_len = p.cur_text().len(); + + // If the current token is larger than remaining bytes, token boundaries + // don't align with our validated HTML - rewind and treat as text + if token_len > remaining { + m.abandon(p); + p.rewind(checkpoint); + return Absent; + } + + let text_m = p.start(); + p.bump_remap(MD_TEXTUAL_LITERAL); + text_m.complete(p, MD_TEXTUAL); + remaining -= token_len; + } + + content.complete(p, MD_INLINE_ITEM_LIST); + + Present(m.complete(p, MD_INLINE_HTML)) +} + +/// Check if the text after `<` looks like a URI autolink. +/// Per CommonMark §6.4: scheme must be 2-32 chars, start with letter, +/// followed by letters/digits/+/-/., then `:`. +fn is_uri_autolink(text: &str) -> bool { + let bytes = text.as_bytes(); + if bytes.is_empty() { + return false; + } + + // Must start with a letter + if !bytes[0].is_ascii_alphabetic() { + return false; + } + + // Find the colon + let mut colon_pos = None; + for (i, &b) in bytes.iter().enumerate().skip(1) { + if b == b':' { + colon_pos = Some(i); + break; + } + // Scheme chars: letters, digits, +, -, . + if !b.is_ascii_alphanumeric() && b != b'+' && b != b'-' && b != b'.' { + return false; + } + } + + // Scheme must be 2-32 chars and followed by colon + match colon_pos { + Some(pos) if (2..=32).contains(&pos) => { + // Must have content after the colon and no whitespace/< in URI + let rest = &text[pos + 1..]; + !rest.is_empty() + && !rest.contains('<') + && !rest.contains('>') + && !rest.chars().any(|c| c.is_whitespace()) + } + _ => false, + } +} + +/// Check if the text after `<` looks like an email autolink. +/// Per CommonMark §6.5: local@domain pattern with specific char restrictions. +fn is_email_autolink(text: &str) -> bool { + // Must contain exactly one @ not at start or end + let at_pos = match text.find('@') { + Some(pos) if pos > 0 && pos < text.len() - 1 => pos, + _ => return false, + }; + + // Check no second @ + if text[at_pos + 1..].contains('@') { + return false; + } + + // Local part: alphanumerics and .!#$%&'*+/=?^_`{|}~- + let local = &text[..at_pos]; + for c in local.chars() { + if !c.is_ascii_alphanumeric() + && !matches!( + c, + '.' | '!' + | '#' + | '$' + | '%' + | '&' + | '\'' + | '*' + | '+' + | '/' + | '=' + | '?' + | '^' + | '_' + | '`' + | '{' + | '|' + | '}' + | '~' + | '-' + ) + { + return false; + } + } + + // Domain part: alphanumerics and hyphens, dots for subdomains + let domain = &text[at_pos + 1..]; + if domain.is_empty() || domain.starts_with('.') || domain.ends_with('.') { + return false; + } + + for c in domain.chars() { + if !c.is_ascii_alphanumeric() && c != '-' && c != '.' { + return false; + } + } + + true +} + +/// Parse an autolink (`` or ``). +/// +/// Grammar: MdAutolink = '<' value: MdInlineItemList '>' +/// +/// Per CommonMark §6.4 and §6.5, autolinks are URIs or email addresses +/// wrapped in angle brackets. +pub(crate) fn parse_autolink(p: &mut MarkdownParser) -> ParsedSyntax { + if !p.at(L_ANGLE) { + return Absent; + } + + // Look ahead to find the closing > and check if content is valid + let source = p.source_after_current(); + + // Skip the < and find > + let after_open = &source[1..]; + let close_pos = match after_open.find('>') { + Some(pos) => pos, + None => return Absent, // No closing > + }; + + // Check for newline before > (not allowed in autolinks) + let content = &after_open[..close_pos]; + if content.contains('\n') || content.contains('\r') { + return Absent; + } + + // Must be either URI or email autolink + if !is_uri_autolink(content) && !is_email_autolink(content) { + return Absent; + } + + // Valid autolink - parse it + let m = p.start(); + + // < + p.bump(L_ANGLE); + + // Content as inline item list containing textual nodes + let content = p.start(); + while !p.at(R_ANGLE) && !p.at_inline_end() { + let text_m = p.start(); + p.bump_remap(MD_TEXTUAL_LITERAL); + text_m.complete(p, MD_TEXTUAL); + } + content.complete(p, MD_INLINE_ITEM_LIST); + + // > + p.expect(R_ANGLE); + + Present(m.complete(p, MD_AUTOLINK)) +} + +/// Dispatch to the appropriate inline parser based on current token. +pub(crate) fn parse_any_inline(p: &mut MarkdownParser) -> ParsedSyntax { + if p.at(MD_HARD_LINE_LITERAL) { + parse_hard_line(p) + } else if p.at(BACKTICK) { + parse_inline_code(p) + } else if p.at(DOUBLE_STAR) || p.at(DOUBLE_UNDERSCORE) { + // Try emphasis, fall back to literal text if flanking rules fail + let result = parse_inline_emphasis(p); + if result.is_absent() { + super::parse_textual(p) + } else { + result + } + } else if p.at(T![*]) || p.at(UNDERSCORE) { + // Try italic, fall back to literal text if flanking rules fail + let result = parse_inline_italic(p); + if result.is_absent() { + super::parse_textual(p) + } else { + result + } + } else if p.at(BANG) && p.nth_at(1, L_BRACK) { + // Try image, fall back to literal text if parsing fails + let result = parse_inline_image(p); + if result.is_absent() { + super::parse_textual(p) + } else { + result + } + } else if p.at(L_BRACK) { + // Try link, fall back to literal text if parsing fails + let result = parse_inline_link(p); + if result.is_absent() { + super::parse_textual(p) + } else { + result + } + } else if p.at(L_ANGLE) { + // Try autolink first (takes priority per CommonMark) + let result = parse_autolink(p); + if result.is_present() { + return result; + } + // Then try inline HTML + let result = parse_inline_html(p); + if result.is_present() { + return result; + } + // Fall back to textual + super::parse_textual(p) + } else if p.at(MD_ENTITY_LITERAL) { + // Entity or numeric character reference (already validated by lexer) + parse_entity_reference(p) + } else { + super::parse_textual(p) + } +} diff --git a/crates/biome_markdown_parser/src/syntax/link_block.rs b/crates/biome_markdown_parser/src/syntax/link_block.rs new file mode 100644 index 000000000000..86d9f5735443 --- /dev/null +++ b/crates/biome_markdown_parser/src/syntax/link_block.rs @@ -0,0 +1,584 @@ +//! Link reference definition parsing for Markdown (CommonMark §4.7). +//! +//! A link reference definition is a block-level construct that defines a label +//! for later reference. The syntax is: +//! +//! ```markdown +//! [label]: url "optional title" +//! [label]: url 'optional title' +//! [label]: url (optional title) +//! [label]: +//! ``` +//! +//! These definitions are not rendered but provide targets for reference links. +//! +//! # CommonMark Spec §4.7 Requirements +//! +//! 1. Label is enclosed in `[` and `]`, followed by `:` +//! 2. Label may contain up to 999 characters (no unescaped `]`) +//! 3. Destination can be angle-bracketed `` or bare URL (no whitespace) +//! 4. Optional title can be quoted with `"`, `'`, or `()` +//! 5. Labels are case-insensitive and whitespace-normalized for matching + +use biome_markdown_syntax::MarkdownSyntaxKind::*; +use biome_parser::Parser; +use biome_parser::prelude::ParsedSyntax::{self, *}; + +use crate::MarkdownParser; + +/// Maximum label length per CommonMark spec (999 characters). +const MAX_LABEL_LENGTH: usize = 999; + +/// Check if we're at the start of a link reference definition. +/// +/// A link reference definition starts with `[` at the beginning of a line +/// (with up to 3 spaces of indentation allowed). +/// +/// We use token-based lookahead to verify the pattern: `[label]: destination` +/// where label doesn't contain unescaped `]` or `[`, and is followed by `:`. +pub(crate) fn at_link_block(p: &mut MarkdownParser) -> bool { + p.lookahead(|p| { + // Must be at line start (or start of input) + if !p.at_line_start() && !p.at_start_of_input() { + return false; + } + + // Check for up to 3 spaces of indentation (more means indented code block) + if p.line_start_leading_indent() > 3 { + return false; + } + + p.skip_line_indent(3); + + // Must start with `[` + if !p.at(L_BRACK) { + return false; + } + + // Use token-based lookahead to verify this is a valid link reference definition + is_valid_link_definition_lookahead(p) + }) +} + +/// Token-based lookahead to verify a link reference definition. +/// +/// This advances tokens to check: `[label]: destination [title]?` +/// Returns true if the pattern is valid, false otherwise. +/// Does NOT build nodes - just validates the structure. +fn is_valid_link_definition_lookahead(p: &mut MarkdownParser) -> bool { + // Expect [ + if !p.at(L_BRACK) { + return false; + } + p.bump_any(); + + // Parse label: consume tokens until ] or invalid state + let mut label_len = 0; + loop { + if p.at(EOF) { + return false; + } + if p.at(NEWLINE) && p.at_blank_line() { + return false; // Blank line ends link definition + } + if p.at(R_BRACK) { + break; + } + if p.at(L_BRACK) { + return false; // Unescaped [ inside label not allowed + } + + let text = p.cur_text(); + // Check for escape sequences + if text.starts_with('\\') && text.len() > 1 { + label_len += 1; // Count escaped char + } else { + label_len += text.chars().count(); + } + + if label_len > MAX_LABEL_LENGTH { + return false; + } + p.bump_any(); + } + + // Label must be non-empty + if label_len == 0 { + return false; + } + + // Expect ] + if !p.at(R_BRACK) { + return false; + } + p.bump_any(); + + // Expect : immediately (no whitespace allowed per CommonMark) + if !p.at(COLON) { + return false; + } + p.bump_any(); + + // Re-lex the current token in LinkDefinition context so whitespace is tokenized. + p.re_lex_link_definition(); + + // Destination is required + if p.at(EOF) || p.at(NEWLINE) { + return false; + } + + // Skip destination + if !skip_destination_tokens(p) { + return false; + } + + // Skip optional whitespace after destination (lookahead only) + skip_whitespace_tokens(p); + + // Check what follows destination + if p.at(EOF) { + return true; // Valid: destination only, EOF + } + + if p.at(NEWLINE) { + // Check for title on next line + p.bump_link_definition(); + skip_whitespace_tokens(p); + + if at_title_start(p) { + return skip_title_tokens(p); + } + // No title on next line - destination-only is valid + return true; + } + + // Check for optional title on same line + if at_title_start(p) { + return skip_title_tokens(p); + } + + // Non-whitespace, non-title after destination = invalid trailing text + false +} + +/// Skip whitespace tokens (spaces/tabs) in lookahead. +fn skip_whitespace_tokens(p: &mut MarkdownParser) { + while !p.at(EOF) && !p.at(NEWLINE) { + let text = p.cur_text(); + if text.chars().all(|c| c == ' ' || c == '\t') && !text.is_empty() { + p.bump_link_definition(); + } else { + break; + } + } +} + +/// Check if at a title start token. +fn at_title_start(p: &MarkdownParser) -> bool { + let text = p.cur_text(); + text.starts_with('"') || text.starts_with('\'') || p.at(L_PAREN) +} + +/// Skip destination tokens in lookahead. Returns false if destination is invalid. +fn skip_destination_tokens(p: &mut MarkdownParser) -> bool { + if p.at(L_ANGLE) { + // Angle-bracketed destination + p.bump_link_definition(); + loop { + if p.at(EOF) || p.at(NEWLINE) { + return false; // Unterminated angle bracket + } + if p.at(R_ANGLE) { + p.bump_link_definition(); + // Consume separator whitespace into destination + skip_whitespace_tokens(p); + return true; + } + p.bump_link_definition(); + } + } else { + // Bare destination with balanced parentheses + let mut paren_depth = 0i32; + let mut has_content = false; + let mut saw_separator = false; + + while !p.at(EOF) && !p.at(NEWLINE) { + let text = p.cur_text(); + // Stop at whitespace + if text.chars().all(|c| c == ' ' || c == '\t') && !text.is_empty() { + if has_content { + saw_separator = true; + } + p.bump_link_definition(); + continue; + } + + if at_title_start(p) && has_content && saw_separator { + break; + } + + if p.at(L_PAREN) { + paren_depth += 1; + } else if p.at(R_PAREN) { + if paren_depth > 0 { + paren_depth -= 1; + } else { + break; // Unbalanced ) ends destination + } + } + + has_content = true; + saw_separator = false; + p.bump_link_definition(); + } + has_content + } +} + +/// Skip title tokens in lookahead. Returns true if valid (ends at EOL/EOF). +fn skip_title_tokens(p: &mut MarkdownParser) -> bool { + let close_char = if p.cur_text().starts_with('"') { + '"' + } else if p.cur_text().starts_with('\'') { + '\'' + } else if p.at(L_PAREN) { + ')' + } else { + return false; + }; + + // Check if first token is complete (e.g., `"title"`) + let first_text = p.cur_text(); + if first_text.len() >= 2 { + let is_complete = if close_char == ')' { + first_text.ends_with(')') + } else { + first_text.ends_with(close_char) + }; + if is_complete { + p.bump_link_definition(); + skip_whitespace_tokens(p); + return p.at(EOF) || p.at(NEWLINE); + } + } + + p.bump_link_definition(); + + // Multi-token title: find closing delimiter + loop { + if p.at(EOF) { + return false; // Unterminated title + } + + // Check for closing delimiter + let is_close = if close_char == ')' { + p.at(R_PAREN) + } else { + p.cur_text().ends_with(close_char) + }; + + if is_close { + p.bump_link_definition(); + skip_whitespace_tokens(p); + return p.at(EOF) || p.at(NEWLINE); + } + + // Titles can span lines, but blank line ends them + if p.at(NEWLINE) && p.at_blank_line() { + return false; + } + + p.bump_link_definition(); + } +} + +/// Parse a link reference definition. +/// +/// Grammar: `MdLinkReferenceDefinition = '[' label: MdLinkLabel ']' ':' destination: MdLinkDestination title: MdLinkTitle?` +/// +/// Returns `Absent` if the current position is not a valid link reference definition. +pub(crate) fn parse_link_block(p: &mut MarkdownParser) -> ParsedSyntax { + if !at_link_block(p) { + return Absent; + } + + let m = p.start(); + + p.skip_line_indent(3); + + // [ - opening bracket + p.expect(L_BRACK); + + // Label - parse until ] + parse_link_label(p); + + // ] - closing bracket + p.expect(R_BRACK); + + // : - separator + p.expect(COLON); + + // Re-lex the current token in LinkDefinition context so whitespace produces + // separate tokens, allowing proper destination/title parsing. + p.re_lex_link_definition(); + + // Destination (required) - in LinkDefinition context, whitespace is separate + parse_link_destination(p); + + // Optional title - can be on same line or next line per CommonMark §4.7 + if at_link_title(p) { + parse_link_title(p); + } else if p.at(NEWLINE) && title_on_next_line(p) { + // Title is on the next line per CommonMark §4.7 + // We parse the newline and whitespace as part of the title + parse_link_title_after_newline(p); + } + + Present(m.complete(p, MD_LINK_REFERENCE_DEFINITION)) +} + +/// Parse the label part of a link reference definition. +/// +/// Grammar: MdLinkLabel = content: MdInlineItemList +/// +/// The label is everything between `[` and `]`, excluding the brackets themselves. +fn parse_link_label(p: &mut MarkdownParser) { + let m = p.start(); + let list = p.start(); + + while !p.at(R_BRACK) && !p.at(EOF) { + if p.at(NEWLINE) && p.at_blank_line() { + break; + } + bump_textual(p); + } + + list.complete(p, MD_INLINE_ITEM_LIST); + m.complete(p, MD_LINK_LABEL); +} + +/// Parse the destination part of a link reference definition. +/// +/// Grammar: MdLinkDestination = content: MdInlineItemList +/// +/// Destination can be: +/// - Angle-bracketed: `` +/// - Bare URL: `url-without-spaces` (balanced parentheses allowed per CommonMark) +/// +/// Uses LinkDefinition lex context so whitespace produces separate tokens. +fn parse_link_destination(p: &mut MarkdownParser) { + let m = p.start(); + let list = p.start(); + + // Include optional whitespace before destination in the destination node. + while is_whitespace_token(p) { + bump_textual_link_def(p); + } + + if p.at(L_ANGLE) { + // Angle-bracketed: consume < ... > + bump_textual_link_def(p); + while !p.at(R_ANGLE) && !p.at(EOF) && !p.at(NEWLINE) { + bump_textual_link_def(p); + } + if p.at(R_ANGLE) { + bump_textual_link_def(p); + } + } else { + // Bare URL with balanced parentheses + let mut paren_depth: i32 = 0; + + while !p.at(EOF) && !p.at(NEWLINE) { + if is_whitespace_token(p) { + break; // Bare destination stops at first whitespace + } + + if p.at(L_PAREN) { + paren_depth += 1; + } else if p.at(R_PAREN) { + if paren_depth > 0 { + paren_depth -= 1; + } else { + break; // Unbalanced ) ends bare destination + } + } + + bump_textual_link_def(p); + } + } + + list.complete(p, MD_INLINE_ITEM_LIST); + m.complete(p, MD_LINK_DESTINATION); +} + +/// Consume the current token as MdTextual using LinkDefinition context. +/// This ensures whitespace produces separate tokens for destination/title parsing. +fn bump_textual_link_def(p: &mut MarkdownParser) { + use crate::lexer::MarkdownLexContext; + + let item = p.start(); + p.bump_remap_with_context(MD_TEXTUAL_LITERAL, MarkdownLexContext::LinkDefinition); + item.complete(p, MD_TEXTUAL); +} + +/// Check if we're at the start of a link title. +/// +/// Title starts with `"`, `'`, or `(` but may be preceded by whitespace. +/// Uses lookahead to skip whitespace and check for title delimiter. +fn at_link_title(p: &mut MarkdownParser) -> bool { + p.lookahead(|p| { + // Skip whitespace before title + while is_whitespace_token(p) { + p.bump_link_definition(); + } + let text = p.cur_text(); + text.starts_with('"') || text.starts_with('\'') || p.at(L_PAREN) + }) +} + +/// Check if there's a title on the next line (when at NEWLINE). +/// +/// Per CommonMark §4.7, title can appear on the line following destination. +/// This looks ahead past the newline and whitespace to check for title starter. +fn title_on_next_line(p: &MarkdownParser) -> bool { + if !p.at(NEWLINE) { + return false; + } + + let source = p.source_after_current(); + let newline_len = p.cur_text().len(); + if source.len() <= newline_len { + return false; + } + + let after_newline = &source[newline_len..]; + let trimmed = after_newline.trim_start_matches([' ', '\t']); + + // Check for title starter + trimmed.starts_with('"') || trimmed.starts_with('\'') || trimmed.starts_with('(') +} + +/// Parse a link title that appears on the next line after a newline. +/// +/// Per CommonMark §4.7, titles can appear on the line following the destination. +fn parse_link_title_after_newline(p: &mut MarkdownParser) { + let m = p.start(); + let list = p.start(); + + // Include the newline as textual content + bump_textual_link_def(p); + + // Include leading whitespace on title line + while is_whitespace_token(p) { + bump_textual_link_def(p); + } + + // Force re-lex in Regular context so title content doesn't split at whitespace + p.force_relex_regular(); + // Parse the actual title content + parse_title_content(p, get_title_close_char(p)); + + list.complete(p, MD_INLINE_ITEM_LIST); + m.complete(p, MD_LINK_TITLE); +} + +/// Parse the optional title part of a link reference definition. +/// +/// Grammar: MdLinkTitle = content: MdInlineItemList +fn parse_link_title(p: &mut MarkdownParser) { + let m = p.start(); + let list = p.start(); + + // Include optional filler whitespace before title + while is_whitespace_token(p) { + bump_textual_link_def(p); + } + + // Force re-lex in Regular context so title content doesn't split at whitespace + p.force_relex_regular(); + parse_title_content(p, get_title_close_char(p)); + + list.complete(p, MD_INLINE_ITEM_LIST); + m.complete(p, MD_LINK_TITLE); +} + +/// Get the closing character for a title based on current token. +/// Returns None if not at a title start. +fn get_title_close_char(p: &MarkdownParser) -> Option { + let text = p.cur_text(); + if text.starts_with('"') { + Some('"') + } else if text.starts_with('\'') { + Some('\'') + } else if p.at(L_PAREN) { + Some(')') + } else { + None + } +} + +/// Parse title content until closing delimiter. +/// +/// Inside title quotes, we use Regular context so whitespace doesn't split tokens. +fn parse_title_content(p: &mut MarkdownParser, close_char: Option) { + let Some(close_char) = close_char else { + return; + }; + + // Check if first token is complete title (e.g., `"title"`) + let text = p.cur_text(); + let is_complete = text.len() >= 2 + && ((close_char == ')' && text.ends_with(')')) + || (close_char != ')' && text.ends_with(close_char))); + + // Bump the opening quote/first token using LinkDefinition context (for whitespace handling before) + bump_textual_link_def(p); + + if is_complete { + return; + } + + // Multi-token title: consume until closing delimiter + // Use Regular context inside title so whitespace doesn't split tokens + loop { + if p.at(EOF) { + break; + } + + // Check for closing delimiter + let is_close = if close_char == ')' { + p.at(R_PAREN) + } else { + p.cur_text().ends_with(close_char) + }; + if is_close { + // Use Regular context for title content + bump_textual(p); + break; + } + + // Stop at blank line + if p.at_blank_line() { + break; + } + + // Use Regular context for title content so whitespace doesn't split + bump_textual(p); + } +} + +/// Check if current token is whitespace (space or tab). +fn is_whitespace_token(p: &MarkdownParser) -> bool { + let text = p.cur_text(); + !text.is_empty() && text.chars().all(|c| c == ' ' || c == '\t') +} + +/// Consume the current token as an MdTextual node. +/// +/// This is a helper to reduce boilerplate for the common pattern: +/// `let item = p.start(); p.bump_remap(MD_TEXTUAL_LITERAL); item.complete(p, MD_TEXTUAL);` +fn bump_textual(p: &mut MarkdownParser) { + let item = p.start(); + p.bump_remap(MD_TEXTUAL_LITERAL); + item.complete(p, MD_TEXTUAL); +} diff --git a/crates/biome_markdown_parser/src/syntax/list.rs b/crates/biome_markdown_parser/src/syntax/list.rs new file mode 100644 index 000000000000..ba23f4802cd9 --- /dev/null +++ b/crates/biome_markdown_parser/src/syntax/list.rs @@ -0,0 +1,1762 @@ +//! List parsing for Markdown (CommonMark §5.2-5.3). +//! +//! Supports bullet lists (`-`, `*`, `+`) and ordered lists (`1.`, `2.`, etc.). +//! Also supports multi-line list items via continuation lines and nested lists. +//! +//! # CommonMark Specification References +//! +//! - **§5.2 List items**: A list item is a sequence of blocks that belong to a +//! single list marker. Items can span multiple lines with proper indentation. +//! - **§5.3 Lists**: A list is a sequence of list items of the same type (bullet +//! or ordered) that are not separated by blank lines (tight) or are (loose). +//! +//! ## Bullet List Markers (§5.2) +//! - `-` (hyphen-minus) +//! - `*` (asterisk) +//! - `+` (plus sign) +//! +//! ## Ordered List Markers (§5.2) +//! - `1.` through `999999999.` (1-9 digits followed by `.`) +//! - `1)` through `999999999)` (1-9 digits followed by `)`) +//! +//! ## Depth Limits +//! +//! To prevent stack overflow from pathological input (deeply nested lists), +//! nesting depth is limited to 100 levels. Deeper nesting emits a diagnostic +//! and treats additional list markers as content. +//! +//! ## Current Limitations +//! +//! - **Tight vs loose lists**: Not yet tracked; affects HTML output formatting. +//! - **List interruption rules**: Some constructs can interrupt lists; not all +//! rules from CommonMark are implemented. + +use biome_markdown_syntax::T; +use biome_markdown_syntax::kind::MarkdownSyntaxKind::{self, *}; +use biome_parser::parse_lists::ParseNodeList; +use biome_parser::parse_recovery::{ParseRecoveryTokenSet, RecoveryResult}; +use biome_parser::prelude::ParsedSyntax::{self, *}; +use biome_parser::prelude::{CompletedMarker, Marker, ParseDiagnostic, TokenSet}; +use biome_parser::{Parser, token_set}; + +use super::quote::{consume_quote_prefix, consume_quote_prefix_without_virtual, has_quote_prefix}; +use biome_rowan::TextRange; + +use super::fenced_code_block::parse_fenced_code_block; +use super::parse_error::{MAX_NESTING_DEPTH, list_nesting_too_deep}; +use super::{ParsedBlockKind, at_block_interrupt, at_indent_code_block}; +use crate::MarkdownParser; +use crate::syntax::parse_any_block_with_indent_code_policy; + +/// Tokens that start a new block (used for recovery) +const BLOCK_RECOVERY_SET: TokenSet = token_set![ + T![-], + T![*], + T![+], + T![>], + T![#], + TRIPLE_BACKTICK, + TRIPLE_TILDE, + MD_ORDERED_LIST_MARKER +]; +/// CommonMark requires 4 or more spaces for indented code blocks. +const INDENT_CODE_BLOCK_SPACES: usize = 4; + +/// Check if we're at the start of a bullet list item (`-`, `*`, or `+`). +/// +/// A bullet list marker at line start followed by content is a list item. +/// We check that it's at line start and not a thematic break. +pub(crate) fn at_bullet_list_item(p: &mut MarkdownParser) -> bool { + at_bullet_list_item_with_base_indent(p, list_marker_base_indent(p)) +} + +fn list_marker_base_indent(p: &MarkdownParser) -> usize { + p.state().list_item_required_indent +} + +fn list_item_within_indent(p: &mut MarkdownParser, base_indent: usize) -> bool { + if !p.at_line_start() { + return false; + } + + let indent = p.line_start_leading_indent(); + let base_indent = + if p.state().virtual_line_start == Some(p.cur_range().start()) && base_indent > 0 { + 0 + } else { + base_indent + }; + + if base_indent == 0 { + indent <= 3 + } else { + indent >= base_indent && indent <= base_indent + 3 + } +} + +fn skip_leading_whitespace_tokens(p: &mut MarkdownParser) { + while p.at(MD_TEXTUAL_LITERAL) && is_whitespace_only(p.cur_text()) { + p.bump(MD_TEXTUAL_LITERAL); + } +} + +fn skip_list_marker_indent(p: &mut MarkdownParser) { + while p.at(MD_TEXTUAL_LITERAL) && is_whitespace_only(p.cur_text()) { + p.parse_as_skipped_trivia_tokens(|p| p.bump(MD_TEXTUAL_LITERAL)); + } +} + +fn is_whitespace_only(text: &str) -> bool { + !text.is_empty() && text.chars().all(|c| c == ' ' || c == '\t') +} + +fn at_bullet_list_item_with_base_indent(p: &mut MarkdownParser, base_indent: usize) -> bool { + p.lookahead(|p| { + if !list_item_within_indent(p, base_indent) { + return false; + } + + skip_leading_whitespace_tokens(p); + + // Check for -, *, or + at the start of a line + // Thematic breaks (--- or ***) are lexed as MD_THEMATIC_BREAK_LITERAL, + // so if we see MINUS, STAR, or PLUS, it's a single character marker. + // A single-dash setext underline token can also represent an empty list item. + if p.at(MD_SETEXT_UNDERLINE_LITERAL) { + if !is_single_dash_setext_marker(p.cur_text()) { + return false; + } + } else if p.at(MD_TEXTUAL_LITERAL) { + if !is_textual_bullet_marker(p.cur_text()) { + return false; + } + } else if !p.at(T![-]) && !p.at(T![*]) && !p.at(T![+]) { + return false; + } + + if p.at(MD_SETEXT_UNDERLINE_LITERAL) { + p.bump_remap(T![-]); + } else if p.at(MD_TEXTUAL_LITERAL) { + let text = p.cur_text(); + if text == "-" { + p.bump_remap(T![-]); + } else if text == "*" { + p.bump_remap(T![*]); + } else if text == "+" { + p.bump_remap(T![+]); + } else { + return false; + } + } else { + p.bump(p.cur()); + } + marker_followed_by_whitespace_or_eol(p) + }) +} + +pub(crate) fn marker_followed_by_whitespace_or_eol(p: &mut MarkdownParser) -> bool { + if p.at(NEWLINE) || p.at(T![EOF]) { + return true; + } + + if p.at(MD_TEXTUAL_LITERAL) { + let text = p.cur_text(); + return text.starts_with(' ') || text.starts_with('\t'); + } + + false +} + +/// Tracks blank-line information for a list item. +#[derive(Default)] +struct ListItemBlankInfo { + /// True if a blank line occurred anywhere within the item content. + has_blank_line: bool, + /// True if the item ended with a blank line. + ends_with_blank_line: bool, +} + +fn skip_blank_lines_between_items( + p: &mut MarkdownParser, + has_item_after_blank_lines: fn(&mut MarkdownParser) -> bool, + is_tight: &mut bool, + last_item_ends_with_blank: &mut bool, +) { + // Skip blank lines between list items. + // Per CommonMark §5.3, blank lines between items make the list loose + // but don't end the list. + while p.at(NEWLINE) { + // Only skip if there's another list item after the blank lines + if !has_item_after_blank_lines(p) { + break; + } + // Blank lines between items make the list loose + *is_tight = false; + *last_item_ends_with_blank = true; + // Skip the blank line as trivia (no tree node created) + p.parse_as_skipped_trivia_tokens(|p| p.bump(NEWLINE)); + } +} + +fn update_list_tightness( + blank_info: ListItemBlankInfo, + is_tight: &mut bool, + last_item_ends_with_blank: &mut bool, +) { + // Blank line between items makes the list loose + if *last_item_ends_with_blank { + *is_tight = false; + } + + // Blank line inside an item makes the list loose + if blank_info.has_blank_line { + *is_tight = false; + } + + *last_item_ends_with_blank = blank_info.ends_with_blank_line; +} + +fn parse_list_element_common( + p: &mut MarkdownParser, + marker_state: &mut Option, + current_marker: FMarker, + parse_item: FParse, + has_item_after_blank_lines: fn(&mut MarkdownParser) -> bool, + is_tight: &mut bool, + last_item_ends_with_blank: &mut bool, +) -> ParsedSyntax +where + FMarker: Fn(&mut MarkdownParser) -> Option, + FParse: Fn(&mut MarkdownParser) -> (ParsedSyntax, ListItemBlankInfo), +{ + skip_blank_lines_between_items( + p, + has_item_after_blank_lines, + is_tight, + last_item_ends_with_blank, + ); + + if marker_state.is_none() { + *marker_state = current_marker(p); + } + + let (parsed, blank_info) = parse_item(p); + update_list_tightness(blank_info, is_tight, last_item_ends_with_blank); + parsed +} + +fn is_at_list_end_common( + p: &mut MarkdownParser, + marker_state: Option, + at_list_item: FAt, + current_marker: FMarker, + has_item_after_blank_lines: fn(&mut MarkdownParser) -> bool, + handle_newline: FNewline, +) -> bool +where + M: Copy + PartialEq, + FAt: Fn(&mut MarkdownParser) -> bool, + FMarker: Fn(&mut MarkdownParser) -> Option, + FNewline: Fn(&mut MarkdownParser, Option) -> Option, +{ + let quote_depth = p.state().block_quote_depth; + let at_virtual_line_start = p.state().virtual_line_start == Some(p.cur_range().start()); + if quote_depth > 0 + && !at_virtual_line_start + && (p.at_line_start() || p.has_preceding_line_break()) + && !has_quote_prefix(p, quote_depth) + { + return true; + } + + // Check if we're directly at a list marker + if at_list_item(p) { + if let (Some(current), Some(next)) = (marker_state, current_marker(p)) + && current != next + { + return true; + } + return false; + } + + // If at a blank line, look ahead to see if there's another list item. + // Per CommonMark §5.3, blank lines between items make the list loose, + // but don't end the list. + if p.at_line_start() && at_blank_line_start(p) { + return !has_item_after_blank_lines(p); + } + + // Also check if we're directly AT a NEWLINE token (blank line) + // This handles the case where we're at the newline itself, not after it + if p.at(NEWLINE) { + if let Some(result) = handle_newline(p, marker_state) { + return result; + } + return !has_item_after_blank_lines(p); + } + + // Not at a marker and not at a blank line with continuation + true +} + +/// Struct implementing `ParseNodeList` for bullet lists. +struct BulletList { + /// A list is tight if there are no blank lines between items or inside items. + is_tight: bool, + /// Whether the last parsed item ended with a blank line. + last_item_ends_with_blank: bool, + /// The marker kind for this list (`-`, `*`, or `+`). + marker_kind: Option, +} + +impl BulletList { + fn new() -> Self { + Self { + is_tight: true, + last_item_ends_with_blank: false, + marker_kind: None, + } + } +} + +impl ParseNodeList for BulletList { + type Kind = MarkdownSyntaxKind; + type Parser<'source> = MarkdownParser<'source>; + + const LIST_KIND: Self::Kind = MD_BULLET_LIST; + + fn parse_element(&mut self, p: &mut Self::Parser<'_>) -> ParsedSyntax { + parse_list_element_common( + p, + &mut self.marker_kind, + current_bullet_marker, + parse_bullet, + has_bullet_item_after_blank_lines, + &mut self.is_tight, + &mut self.last_item_ends_with_blank, + ) + } + + fn is_at_list_end(&self, p: &mut Self::Parser<'_>) -> bool { + is_at_list_end_common( + p, + self.marker_kind, + at_bullet_list_item, + current_bullet_marker, + has_bullet_item_after_blank_lines, + |p, _marker_kind| { + let next_is_bullet = p.lookahead(|p| { + p.bump(NEWLINE); + skip_leading_whitespace_tokens(p); + if p.at(T![-]) || p.at(T![*]) || p.at(T![+]) { + p.bump(p.cur()); + return marker_followed_by_whitespace_or_eol(p); + } + false + }); + if next_is_bullet { + Some(false) + } else { + Some(!has_bullet_item_after_blank_lines(p)) + } + }, + ) + } + + fn recover( + &mut self, + p: &mut Self::Parser<'_>, + parsed_element: ParsedSyntax, + ) -> RecoveryResult { + parsed_element.or_recover_with_token_set( + p, + &ParseRecoveryTokenSet::new(MD_BOGUS_BULLET, BLOCK_RECOVERY_SET) + .enable_recovery_on_line_break(), + expected_bullet, + ) + } + + fn finish_list(&mut self, p: &mut Self::Parser<'_>, m: Marker) -> CompletedMarker { + let completed = m.complete(p, Self::LIST_KIND); + let range = completed.range(p); + p.record_list_tightness(range, self.is_tight); + completed + } +} + +fn current_bullet_marker(p: &mut MarkdownParser) -> Option { + p.lookahead(|p| { + if !p.at_line_start() { + return None; + } + + skip_leading_whitespace_tokens(p); + + if p.at(MD_SETEXT_UNDERLINE_LITERAL) { + if is_single_dash_setext_marker(p.cur_text()) { + return Some(T![-]); + } + return None; + } + + if p.at(MD_TEXTUAL_LITERAL) { + return match p.cur_text() { + "-" => Some(T![-]), + "*" => Some(T![*]), + "+" => Some(T![+]), + _ => None, + }; + } + + if p.at(T![-]) { + return Some(T![-]); + } + if p.at(T![*]) { + return Some(T![*]); + } + if p.at(T![+]) { + return Some(T![+]); + } + + None + }) +} + +/// Error builder for bullet list recovery +fn expected_bullet(p: &MarkdownParser, range: TextRange) -> ParseDiagnostic { + p.err_builder("Expected a list item", range) + .with_hint("List items start with `-`, `*`, or `+` at the beginning of a line") +} + +/// Parse a bullet list item. +/// +/// Grammar: +/// MdBulletListItem = MdBulletList +/// MdBulletList = MdBullet* +/// MdBullet = bullet: ('-' | '*') content: MdBlockList +/// +/// Parses consecutive bullet items into a single list. +/// +/// Nesting is limited to `MAX_NESTING_DEPTH` to prevent stack overflow. +pub(crate) fn parse_bullet_list_item(p: &mut MarkdownParser) -> ParsedSyntax { + if !at_bullet_list_item(p) { + return Absent; + } + + // Check depth limit before parsing + if p.state().list_nesting_depth >= MAX_NESTING_DEPTH { + // Emit diagnostic and treat as content + let range = p.cur_range(); + p.error(list_nesting_too_deep(p, range)); + return Absent; + } + + let item_m = p.start(); + + // Increment list depth + p.state_mut().list_nesting_depth += 1; + + // Use ParseNodeList to parse the list with proper recovery + let mut list_helper = BulletList::new(); + list_helper.parse_list(p); + + // Decrement list depth + p.state_mut().list_nesting_depth -= 1; + + Present(item_m.complete(p, MD_BULLET_LIST_ITEM)) +} + +/// Parse a single bullet (marker + content). +/// +/// Returns `Present` if a bullet was successfully parsed, `Absent` otherwise. +/// Also returns blank-line information for the list item. +fn parse_bullet(p: &mut MarkdownParser) -> (ParsedSyntax, ListItemBlankInfo) { + // Must be at a bullet marker at line start + if !at_bullet_list_item(p) { + return (Absent, ListItemBlankInfo::default()); + } + + let m = p.start(); + + let marker_indent = if p.state().virtual_line_start == Some(p.cur_range().start()) { + 0 + } else { + p.source().line_start_leading_indent() + }; + skip_list_marker_indent(p); + + // Bullet marker is 1 character (-, *, or +) + let marker_width = 1; + + // Bump the bullet marker (-, *, or +) + let mut marker_token_text = None; + if p.at(MD_SETEXT_UNDERLINE_LITERAL) && is_single_dash_setext_marker(p.cur_text()) { + marker_token_text = Some(p.cur_text().to_string()); + p.bump_remap(T![-]); + } else if p.at(MD_TEXTUAL_LITERAL) && is_textual_bullet_marker(p.cur_text()) { + let text = p.cur_text().to_string(); + marker_token_text = Some(text.clone()); + if text == "-" { + p.bump_remap(T![-]); + } else if text == "*" { + p.bump_remap(T![*]); + } else { + p.bump_remap(T![+]); + } + } else if p.at(T![-]) { + p.bump(T![-]); + } else if p.at(T![*]) { + p.bump(T![*]); + } else { + p.bump(T![+]); + } + + // Count spaces after marker to determine required indentation. + // Per CommonMark §5.2, content aligns to first non-space after marker. + let spaces_after_marker = if let Some(text) = marker_token_text.as_deref() { + count_spaces_after_dash_in_token(text, marker_indent + marker_width) + } else { + count_spaces_after_marker(p.source_after_current(), marker_indent + marker_width) + }; + + // Set required indent for continuation lines + // Required indent = marker width + spaces after marker (minimum 1) + let prev_required_indent = p.state().list_item_required_indent; + let prev_marker_indent = p.state().list_item_marker_indent; + p.state_mut().list_item_required_indent = if spaces_after_marker > INDENT_CODE_BLOCK_SPACES { + marker_indent + marker_width + 1 + } else { + marker_indent + marker_width + spaces_after_marker.max(1) + }; + p.state_mut().list_item_marker_indent = marker_indent; + + // Parse block content (MD_BLOCK_LIST) + let blank_info = parse_list_item_block_content(p, spaces_after_marker); + + // Restore previous required indent + p.state_mut().list_item_required_indent = prev_required_indent; + p.state_mut().list_item_marker_indent = prev_marker_indent; + + let completed = m.complete(p, MD_BULLET); + let range = completed.range(p); + let indent = marker_indent + marker_width + spaces_after_marker.max(1); + p.record_list_item_indent( + range, + indent, + marker_indent, + marker_width, + spaces_after_marker, + ); + (Present(completed), blank_info) +} + +/// Check if we're at the start of an ordered list item (e.g., "1.", "2)"). +/// +/// An ordered list marker is a sequence of 1-9 digits followed by `.` or `)`, +/// at the start of a line. +pub(crate) fn at_order_list_item(p: &mut MarkdownParser) -> bool { + at_order_list_item_with_base_indent(p, list_marker_base_indent(p)) +} + +fn at_order_list_item_with_base_indent(p: &mut MarkdownParser, base_indent: usize) -> bool { + p.lookahead(|p| { + if !list_item_within_indent(p, base_indent) { + return false; + } + + skip_leading_whitespace_tokens(p); + + // Check for ordered list marker token at line start + if !p.at(MD_ORDERED_LIST_MARKER) { + return false; + } + + p.bump(MD_ORDERED_LIST_MARKER); + marker_followed_by_whitespace_or_eol(p) + }) +} + +/// Struct implementing `ParseNodeList` for ordered lists. +struct OrderedList { + /// A list is tight if there are no blank lines between items or inside items. + is_tight: bool, + /// Whether the last parsed item ended with a blank line. + last_item_ends_with_blank: bool, + /// The delimiter for this ordered list (`.` or `)`). + marker_delim: Option, +} + +impl OrderedList { + fn new() -> Self { + Self { + is_tight: true, + last_item_ends_with_blank: false, + marker_delim: None, + } + } +} + +impl ParseNodeList for OrderedList { + type Kind = MarkdownSyntaxKind; + type Parser<'source> = MarkdownParser<'source>; + + const LIST_KIND: Self::Kind = MD_BULLET_LIST; // Reuse bullet list node structure + + fn parse_element(&mut self, p: &mut Self::Parser<'_>) -> ParsedSyntax { + parse_list_element_common( + p, + &mut self.marker_delim, + current_ordered_delim, + parse_ordered_bullet, + has_ordered_item_after_blank_lines, + &mut self.is_tight, + &mut self.last_item_ends_with_blank, + ) + } + + fn is_at_list_end(&self, p: &mut Self::Parser<'_>) -> bool { + is_at_list_end_common( + p, + self.marker_delim, + at_order_list_item, + current_ordered_delim, + has_ordered_item_after_blank_lines, + |p, marker_delim| { + let next_is_ordered = p.lookahead(|p| { + p.bump(NEWLINE); + skip_leading_whitespace_tokens(p); + if p.at(MD_ORDERED_LIST_MARKER) { + p.bump(MD_ORDERED_LIST_MARKER); + return marker_followed_by_whitespace_or_eol(p); + } + false + }); + if next_is_ordered { + if let (Some(current_delim), Some(next_delim)) = + (marker_delim, current_ordered_delim(p)) + && current_delim != next_delim + { + return Some(true); + } + return Some(false); + } + Some(!has_ordered_item_after_blank_lines(p)) + }, + ) + } + + fn recover( + &mut self, + p: &mut Self::Parser<'_>, + parsed_element: ParsedSyntax, + ) -> RecoveryResult { + parsed_element.or_recover_with_token_set( + p, + &ParseRecoveryTokenSet::new(MD_BOGUS_BULLET, BLOCK_RECOVERY_SET) + .enable_recovery_on_line_break(), + expected_ordered_item, + ) + } + + fn finish_list(&mut self, p: &mut Self::Parser<'_>, m: Marker) -> CompletedMarker { + let completed = m.complete(p, Self::LIST_KIND); + let range = completed.range(p); + p.record_list_tightness(range, self.is_tight); + completed + } +} + +fn current_ordered_delim(p: &mut MarkdownParser) -> Option { + p.lookahead(|p| { + if !p.at_line_start() { + return None; + } + + skip_leading_whitespace_tokens(p); + + if !p.at(MD_ORDERED_LIST_MARKER) { + return None; + } + + let text = p.cur_text(); + text.chars().last().filter(|c| *c == '.' || *c == ')') + }) +} + +/// Error builder for ordered list recovery +fn expected_ordered_item(p: &MarkdownParser, range: TextRange) -> ParseDiagnostic { + p.err_builder("Expected an ordered list item", range) + .with_hint("Ordered list items start with a number followed by `.` or `)` at the beginning of a line") +} + +/// Parse an ordered list item. +/// +/// Grammar: +/// MdOrderListItem = MdBulletList (reusing bullet list structure) +/// +/// Parses consecutive ordered items into a single list. +/// +/// Nesting is limited to `MAX_NESTING_DEPTH` to prevent stack overflow. +pub(crate) fn parse_order_list_item(p: &mut MarkdownParser) -> ParsedSyntax { + if !at_order_list_item(p) { + return Absent; + } + + // Check depth limit before parsing + if p.state().list_nesting_depth >= MAX_NESTING_DEPTH { + // Emit diagnostic and treat as content + let range = p.cur_range(); + p.error(list_nesting_too_deep(p, range)); + return Absent; + } + + let item_m = p.start(); + + // Increment list depth + p.state_mut().list_nesting_depth += 1; + + // Use ParseNodeList to parse the list with proper recovery + let mut list_helper = OrderedList::new(); + list_helper.parse_list(p); + + // Decrement list depth + p.state_mut().list_nesting_depth -= 1; + + Present(item_m.complete(p, MD_ORDERED_LIST_ITEM)) +} + +/// Parse a single ordered item (marker + content). +fn parse_ordered_bullet(p: &mut MarkdownParser) -> (ParsedSyntax, ListItemBlankInfo) { + if !at_order_list_item(p) { + return (Absent, ListItemBlankInfo::default()); + } + + let m = p.start(); + + let marker_indent = if p.state().virtual_line_start == Some(p.cur_range().start()) { + 0 + } else { + p.source().line_start_leading_indent() + }; + skip_list_marker_indent(p); + + // Get marker width from actual token text (e.g., "1." = 2, "10." = 3) + let marker_width = p.cur_text().len(); + + // Bump the ordered list marker + p.bump(MD_ORDERED_LIST_MARKER); + + // Count spaces after marker to determine required indentation. + // Per CommonMark §5.2, content aligns to first non-space after marker. + let spaces_after_marker = + count_spaces_after_marker(p.source_after_current(), marker_indent + marker_width); + + // Set required indent for continuation lines + // Required indent = marker width + spaces after marker (minimum 1) + let prev_required_indent = p.state().list_item_required_indent; + let prev_marker_indent = p.state().list_item_marker_indent; + p.state_mut().list_item_required_indent = if spaces_after_marker > INDENT_CODE_BLOCK_SPACES { + marker_indent + marker_width + 1 + } else { + marker_indent + marker_width + spaces_after_marker.max(1) + }; + p.state_mut().list_item_marker_indent = marker_indent; + + // Parse block content + let blank_info = parse_list_item_block_content(p, spaces_after_marker); + + // Restore previous required indent + p.state_mut().list_item_required_indent = prev_required_indent; + p.state_mut().list_item_marker_indent = prev_marker_indent; + + let completed = m.complete(p, MD_BULLET); + let range = completed.range(p); + let indent = marker_indent + marker_width + spaces_after_marker.max(1); + p.record_list_item_indent( + range, + indent, + marker_indent, + marker_width, + spaces_after_marker, + ); + (Present(completed), blank_info) +} + +/// Count the number of space/tab characters at the start of a string. +/// Used to determine actual spaces after list marker. +fn count_spaces_after_marker(s: &str, start_column: usize) -> usize { + let mut column = start_column; + + for c in s.chars() { + match c { + ' ' => column += 1, + '\t' => column += 4 - (column % 4), + _ => break, + } + } + + column.saturating_sub(start_column) +} + +fn is_single_dash_setext_marker(text: &str) -> bool { + let trimmed = text.trim_matches(|c| c == ' ' || c == '\t'); + trimmed == "-" +} + +fn is_textual_bullet_marker(text: &str) -> bool { + text == "-" || text == "*" || text == "+" +} + +pub(crate) fn textual_starts_with_ordered_marker(text: &str) -> bool { + let trimmed = text.trim_start_matches([' ', '\t']); + let mut chars = trimmed.chars().peekable(); + let mut digit_count = 0; + + while let Some(c) = chars.peek().copied() { + if c.is_ascii_digit() { + digit_count += 1; + if digit_count > 9 { + return false; + } + chars.next(); + } else { + break; + } + } + + if digit_count == 0 { + return false; + } + + match chars.next() { + Some('.' | ')') => {} + _ => return false, + } + + matches!(chars.peek(), None | Some(' ' | '\t' | '\n' | '\r')) +} + +fn count_spaces_after_dash_in_token(text: &str, start_column: usize) -> usize { + let mut column = start_column; + let mut seen_dash = false; + + for c in text.chars() { + if !seen_dash { + if c == '-' { + seen_dash = true; + } + continue; + } + + match c { + ' ' => column += 1, + '\t' => column += 4 - (column % 4), + _ => break, + } + } + + column.saturating_sub(start_column) +} + +fn line_indent_from_current(p: &MarkdownParser) -> usize { + let mut column = 0usize; + for c in p.source_after_current().chars() { + match c { + ' ' => column += 1, + '\t' => column += 4 - (column % 4), + _ => break, + } + } + column +} + +fn quote_only_line_indent_at_current(p: &MarkdownParser, depth: usize) -> Option { + if depth == 0 { + return None; + } + + let mut start: usize = p.cur_range().start().into(); + let source = p.source().source_text(); + let bytes = source.as_bytes(); + while start > 0 && bytes[start - 1] != b'\n' && bytes[start - 1] != b'\r' { + start -= 1; + } + let line_end = source[start..] + .find('\n') + .map_or(source.len(), |offset| start + offset); + + let mut i = start; + for _ in 0..depth { + let mut column = 0usize; + while i < line_end && column < 3 { + match bytes[i] { + b' ' => { + column += 1; + i += 1; + } + b'\t' => { + let advance = 4 - (column % 4); + column += advance; + i += 1; + } + _ => break, + } + } + + if i >= line_end || bytes[i] != b'>' { + return None; + } + i += 1; + + if i < line_end && (bytes[i] == b' ' || bytes[i] == b'\t') { + i += 1; + } + } + + let mut indent = 0usize; + while i < line_end { + match bytes[i] { + b' ' => { + indent += 1; + i += 1; + } + b'\t' => { + indent += 4 - (indent % 4); + i += 1; + } + _ => return None, + } + } + + Some(indent) +} + +fn next_quote_content_indent(p: &MarkdownParser, depth: usize) -> Option { + if depth == 0 { + return None; + } + + let source = p.source().source_text(); + let bytes = source.as_bytes(); + let mut line_start: usize = p.cur_range().start().into(); + while line_start > 0 && bytes[line_start - 1] != b'\n' && bytes[line_start - 1] != b'\r' { + line_start -= 1; + } + + loop { + let newline_index = source[line_start..] + .find('\n') + .map(|offset| line_start + offset); + let mut line_end = newline_index.unwrap_or(source.len()); + if line_end > line_start && bytes[line_end - 1] == b'\r' { + line_end -= 1; + } + + let mut i = line_start; + for _ in 0..depth { + let mut column = 0usize; + while i < line_end && column < 3 { + match bytes[i] { + b' ' => { + column += 1; + i += 1; + } + b'\t' => { + let advance = 4 - (column % 4); + column += advance; + i += 1; + } + _ => break, + } + } + + if i >= line_end || bytes[i] != b'>' { + return None; + } + i += 1; + + if i < line_end && (bytes[i] == b' ' || bytes[i] == b'\t') { + i += 1; + } + } + + let mut indent = 0usize; + while i < line_end { + match bytes[i] { + b' ' => { + indent += 1; + i += 1; + } + b'\t' => { + indent += 4 - (indent % 4); + i += 1; + } + _ => return Some(indent), + } + } + + let newline_index = newline_index?; + line_start = newline_index + 1; + } +} + +/// Parse block content for a list item. +/// +/// Handles the sequence of blocks belonging to a list item. +/// The first block usually starts on the same line as the marker. +/// Subsequent lines must be indented to at least `required_indent` columns. +/// +/// Per CommonMark §5.2, continuation lines must align with the first non-space +/// character after the list marker. +/// +/// Returns blank-line information for the list item content. +fn parse_list_item_block_content( + p: &mut MarkdownParser, + spaces_after_marker: usize, +) -> ListItemBlankInfo { + let m = p.start(); + let mut has_blank_line = false; + let mut last_was_blank = false; + let mut last_block_was_paragraph = false; + let required_indent = p.state().list_item_required_indent; + let marker_indent = p.state().list_item_marker_indent; + + // Track whether we're on the first line (same line as marker) + let mut first_line = true; + + loop { + if p.at(T![EOF]) { + break; + } + + let quote_depth = p.state().block_quote_depth; + if !first_line + && quote_depth > 0 + && quote_only_line_indent_at_current(p, quote_depth).is_some() + && let Some(next_indent) = next_quote_content_indent(p, quote_depth) + && next_indent < required_indent + { + break; + } + let newline_has_quote_prefix = quote_depth > 0 + && p.at(NEWLINE) + && (p.at_line_start() || p.has_preceding_line_break()) + && has_quote_prefix(p, quote_depth); + + if !first_line && p.at(NEWLINE) && !p.at_blank_line() && !newline_has_quote_prefix { + let action = classify_blank_line(p, required_indent, marker_indent); + match action { + BlankLineAction::ContinueItem => { + consume_blank_line(p); + has_blank_line = true; + last_was_blank = true; + continue; + } + BlankLineAction::EndItemAfterBlank => { + consume_blank_line(p); + has_blank_line = true; + last_was_blank = true; + break; + } + BlankLineAction::EndItemBeforeBlank => { + break; + } + } + } + + let line_has_quote_prefix = quote_depth > 0 + && (p.at_line_start() || p.has_preceding_line_break()) + && (has_quote_prefix(p, quote_depth) + || quote_only_line_indent_at_current(p, quote_depth).is_some()); + + let blank_line_after_prefix = if line_has_quote_prefix { + p.lookahead(|p| { + consume_quote_prefix_without_virtual(p, quote_depth); + at_blank_line_after_prefix(p) + }) + } else { + at_blank_line_after_prefix(p) + }; + + if (p.at_line_start() || line_has_quote_prefix) && blank_line_after_prefix { + if line_has_quote_prefix + && quote_only_line_indent_at_current(p, quote_depth).is_some() + && let Some(next_indent) = next_quote_content_indent(p, quote_depth) + { + if next_indent >= required_indent { + if line_has_quote_prefix { + consume_quote_prefix(p, quote_depth); + } + consume_blank_line(p); + if !first_line { + has_blank_line = true; + } + last_was_blank = true; + first_line = false; + continue; + } + if next_indent < required_indent { + break; + } + } + let marker_line_break = first_line; + let action = if quote_depth > 0 { + classify_blank_line_in_quote(p, required_indent, marker_indent, quote_depth) + } else { + classify_blank_line(p, required_indent, marker_indent) + }; + match action { + BlankLineAction::ContinueItem => { + if line_has_quote_prefix { + consume_quote_prefix(p, quote_depth); + } + consume_blank_line(p); + if !marker_line_break { + has_blank_line = true; + } + last_was_blank = true; + first_line = false; + continue; + } + BlankLineAction::EndItemAfterBlank => { + if line_has_quote_prefix { + consume_quote_prefix(p, quote_depth); + } + consume_blank_line(p); + if !marker_line_break { + has_blank_line = true; + } + last_was_blank = true; + break; + } + BlankLineAction::EndItemBeforeBlank => { + break; + } + } + } + + if line_has_quote_prefix { + consume_quote_prefix(p, quote_depth); + } + let line_started_with_quote_prefix = line_has_quote_prefix; + + let prev_was_blank = last_was_blank; + + if first_line && p.at(NEWLINE) { + let next_is_sibling = p.lookahead(|p| { + p.bump(NEWLINE); + if p.at_line_start() { + at_bullet_list_item_with_base_indent(p, marker_indent) + || at_order_list_item_with_base_indent(p, marker_indent) + } else { + false + } + }); + + // Marker-only line: consume the newline as trivia and continue. + p.parse_as_skipped_trivia_tokens(|p| p.bump(NEWLINE)); + first_line = false; + last_was_blank = false; + + if next_is_sibling { + continue; + } + } + + if first_line { + enum NestedListMarker { + Bullet, + Ordered, + } + + let fenced_code_start = p.lookahead(|p| { + while p.at(MD_TEXTUAL_LITERAL) && is_whitespace_only(p.cur_text()) { + p.bump(MD_TEXTUAL_LITERAL); + } + if p.at(TRIPLE_BACKTICK) || p.at(TRIPLE_TILDE) { + return true; + } + (p.at(BACKTICK) || p.at(TILDE)) && p.cur_text().len() >= 3 + }); + + if fenced_code_start { + while p.at(MD_TEXTUAL_LITERAL) && is_whitespace_only(p.cur_text()) { + p.parse_as_skipped_trivia_tokens(|p| p.bump(MD_TEXTUAL_LITERAL)); + } + + let parsed = super::with_virtual_line_start(p, p.cur_range().start(), |p| { + parse_fenced_code_block(p) + }); + if parsed.is_present() { + last_block_was_paragraph = false; + last_was_blank = false; + first_line = false; + continue; + } + } + + let html_block_start = p.lookahead(|p| { + super::with_virtual_line_start(p, p.cur_range().start(), |p| { + super::html_block::at_html_block(p) + }) + }); + + if html_block_start { + let parsed = super::with_virtual_line_start(p, p.cur_range().start(), |p| { + super::html_block::parse_html_block(p) + }); + if parsed.is_present() { + last_block_was_paragraph = false; + last_was_blank = false; + first_line = false; + continue; + } + } + + let nested_marker = p.lookahead(|p| { + while p.at(MD_TEXTUAL_LITERAL) && is_whitespace_only(p.cur_text()) { + p.bump(MD_TEXTUAL_LITERAL); + } + + if p.at(MD_ORDERED_LIST_MARKER) { + p.bump(MD_ORDERED_LIST_MARKER); + return marker_followed_by_whitespace_or_eol(p) + .then_some(NestedListMarker::Ordered); + } + + if p.at(MD_SETEXT_UNDERLINE_LITERAL) && is_single_dash_setext_marker(p.cur_text()) { + p.bump(MD_SETEXT_UNDERLINE_LITERAL); + return marker_followed_by_whitespace_or_eol(p) + .then_some(NestedListMarker::Bullet); + } + + if p.at(T![-]) || p.at(T![*]) || p.at(T![+]) { + p.bump(p.cur()); + return marker_followed_by_whitespace_or_eol(p) + .then_some(NestedListMarker::Bullet); + } + + if p.at(MD_TEXTUAL_LITERAL) && is_textual_bullet_marker(p.cur_text()) { + p.bump(MD_TEXTUAL_LITERAL); + return marker_followed_by_whitespace_or_eol(p) + .then_some(NestedListMarker::Bullet); + } + + if p.at(MD_TEXTUAL_LITERAL) && textual_starts_with_ordered_marker(p.cur_text()) { + p.bump(MD_TEXTUAL_LITERAL); + return Some(NestedListMarker::Ordered); + } + + None + }); + + if let Some(nested_marker) = nested_marker { + while p.at(MD_TEXTUAL_LITERAL) && is_whitespace_only(p.cur_text()) { + p.parse_as_skipped_trivia_tokens(|p| p.bump(MD_TEXTUAL_LITERAL)); + } + + let prev_virtual = p.state().virtual_line_start; + let prev_required = p.state().list_item_required_indent; + p.state_mut().virtual_line_start = Some(p.cur_range().start()); + p.state_mut().list_item_required_indent = 0; + + let parsed = match nested_marker { + NestedListMarker::Bullet => parse_bullet_list_item(p), + NestedListMarker::Ordered => { + p.set_force_ordered_list_marker(true); + p.force_relex_regular(); + let parsed = parse_order_list_item(p); + p.set_force_ordered_list_marker(false); + parsed + } + }; + if parsed.is_absent() { + let parsed_kind = parse_any_block_with_indent_code_policy(p, true); + last_block_was_paragraph = parsed_kind == ParsedBlockKind::Paragraph; + } else { + last_block_was_paragraph = false; + } + first_line = false; + + p.state_mut().virtual_line_start = prev_virtual; + p.state_mut().list_item_required_indent = prev_required; + continue; + } + } + + if first_line && spaces_after_marker > INDENT_CODE_BLOCK_SPACES { + parse_indent_code_block_in_list_first_line(p); + last_block_was_paragraph = false; + last_was_blank = false; + first_line = false; + continue; + } + // Blank line handling happens above, before consuming quote prefixes. + + // After the first line, check indentation for continuation + // Skip this check on the first line (content on same line as marker) + let mut restore_virtual_line_start = None; + if !first_line && (p.at_line_start() || line_started_with_quote_prefix) { + // Get indentation of current line + let indent = line_indent_from_current(p); + + if indent < marker_indent { + break; + } + + if indent >= required_indent { + let allow_indent_code_block = !last_block_was_paragraph || prev_was_blank; + let is_indent_code_block = + allow_indent_code_block && indent >= required_indent + INDENT_CODE_BLOCK_SPACES; + if !is_indent_code_block { + // Sufficient indentation - skip it and continue + p.skip_line_indent(required_indent); + let prev_virtual = p.state().virtual_line_start; + p.state_mut().virtual_line_start = Some(p.cur_range().start()); + restore_virtual_line_start = Some(prev_virtual); + + if at_bullet_list_item(p) { + let _ = parse_bullet_list_item(p); + last_block_was_paragraph = false; + first_line = false; + p.state_mut().virtual_line_start = prev_virtual; + continue; + } + if at_order_list_item(p) { + let _ = parse_order_list_item(p); + last_block_was_paragraph = false; + first_line = false; + p.state_mut().virtual_line_start = prev_virtual; + continue; + } + } + } else { + // Insufficient indentation - check for block interrupts + + // A new list marker at this indentation starts a sibling item + if at_bullet_list_item_with_base_indent(p, marker_indent) + || at_order_list_item_with_base_indent(p, marker_indent) + { + break; + } + + // Check if this line starts a block-level construct that can + // interrupt paragraphs (headers, quotes, thematic breaks, etc.) + if at_block_interrupt(p) { + break; + } + + // Otherwise, this is "lazy continuation" per CommonMark §5.2: + // Content continues without meeting the indent requirement. + // Don't skip indent, just continue parsing at actual position. + if !last_block_was_paragraph { + break; + } + } + } + + let is_blank_line = p.at_blank_line(); + if is_blank_line { + has_blank_line = true; + last_was_blank = true; + } else { + last_was_blank = false; + } + + // After parsing any block, we'll be on a new line (or EOF) + first_line = false; + + // Parse the next block + // parse_any_block_with_indent_code_policy handles paragraphs, code blocks, etc. + // It consumes newlines as MdNewline if they are blank lines. + let allow_indent_code_block = !last_block_was_paragraph || prev_was_blank; + let parsed_kind = parse_any_block_with_indent_code_policy(p, allow_indent_code_block); + last_block_was_paragraph = parsed_kind == ParsedBlockKind::Paragraph; + if let Some(prev_virtual) = restore_virtual_line_start { + p.state_mut().virtual_line_start = prev_virtual; + } + } + + m.complete(p, MD_BLOCK_LIST); + ListItemBlankInfo { + has_blank_line, + ends_with_blank_line: last_was_blank, + } +} + +fn parse_indent_code_block_in_list_first_line(p: &mut MarkdownParser) { + let m = p.start(); + let content = p.start(); + + loop { + if p.at(T![EOF]) { + break; + } + + if p.at(NEWLINE) { + if list_newline_is_blank_line(p) && !list_has_following_indented_code_line(p) { + break; + } + let text_m = p.start(); + p.bump_remap(MD_TEXTUAL_LITERAL); + text_m.complete(p, MD_TEXTUAL); + continue; + } + + if p.at_line_start() && !at_indent_code_block(p) { + if at_blank_line_start(p) { + if list_has_following_indented_code_line(p) { + consume_blank_line(p); + continue; + } + break; + } + break; + } + + let text_m = p.start(); + p.bump_remap(MD_TEXTUAL_LITERAL); + text_m.complete(p, MD_TEXTUAL); + } + + content.complete(p, MD_INLINE_ITEM_LIST); + m.complete(p, MD_INDENT_CODE_BLOCK); +} + +fn list_has_following_indented_code_line(p: &mut MarkdownParser) -> bool { + p.lookahead(|p| { + while p.at_line_start() && at_blank_line_start(p) { + while p.at(MD_TEXTUAL_LITERAL) { + let text = p.cur_text(); + if text == " " || text == "\t" { + p.bump(MD_TEXTUAL_LITERAL); + } else { + break; + } + } + + if p.at(NEWLINE) { + p.bump(NEWLINE); + } else { + break; + } + } + + at_indent_code_block(p) + }) +} + +fn list_newline_is_blank_line(p: &MarkdownParser) -> bool { + let start: usize = p.cur_range().start().into(); + if start == 0 { + return true; + } + + let source = p.source().source_text(); + let prev = source.as_bytes()[start - 1]; + prev == b'\n' || prev == b'\r' +} + +enum BlankLineAction { + ContinueItem, + EndItemAfterBlank, + EndItemBeforeBlank, +} + +fn classify_blank_line( + p: &mut MarkdownParser, + required_indent: usize, + marker_indent: usize, +) -> BlankLineAction { + p.lookahead(|p| { + // Skip ALL consecutive blank lines (not just one). + // Per CommonMark §5.3, multiple blank lines between items still + // belong to the same list - they just make it "loose". + loop { + let line_is_blank = p.lookahead(|p| { + while p.at(MD_TEXTUAL_LITERAL) { + let text = p.cur_text(); + if text == " " || text == "\t" { + p.bump(MD_TEXTUAL_LITERAL); + } else { + break; + } + } + p.at(NEWLINE) || p.at(T![EOF]) + }); + + if !line_is_blank { + break; + } + + while p.at(MD_TEXTUAL_LITERAL) { + let text = p.cur_text(); + if text == " " || text == "\t" { + p.bump(MD_TEXTUAL_LITERAL); + } else { + break; + } + } + + if p.at(NEWLINE) { + p.bump(NEWLINE); + continue; + } + + break; + } + + if p.at(T![EOF]) { + return BlankLineAction::EndItemBeforeBlank; + } + + // Otherwise, keep blank line as part of item only if indentation is sufficient. + let indent = p.line_start_leading_indent(); + if indent >= required_indent { + return BlankLineAction::ContinueItem; + } + + // If next non-blank line starts a new list item, this is a blank line between items. + if indent <= marker_indent + 3 + && (at_bullet_list_item_with_base_indent(p, marker_indent) + || at_order_list_item_with_base_indent(p, marker_indent)) + { + return BlankLineAction::EndItemAfterBlank; + } + + BlankLineAction::EndItemBeforeBlank + }) +} + +fn classify_blank_line_in_quote( + p: &mut MarkdownParser, + required_indent: usize, + marker_indent: usize, + quote_depth: usize, +) -> BlankLineAction { + p.lookahead(|p| { + loop { + let blank_indent = p.lookahead(|p| { + if !consume_quote_prefix_without_virtual(p, quote_depth) { + return None; + } + Some(line_indent_from_current(p)) + }); + + if let Some(indent) = blank_indent + && indent < required_indent + { + return BlankLineAction::EndItemBeforeBlank; + } + + let line_is_blank = p.lookahead(|p| { + if !consume_quote_prefix_without_virtual(p, quote_depth) { + return false; + } + while p.at(MD_TEXTUAL_LITERAL) && is_whitespace_only(p.cur_text()) { + p.bump(MD_TEXTUAL_LITERAL); + } + p.at(NEWLINE) || p.at(T![EOF]) + }); + + if !line_is_blank { + break; + } + + if !consume_quote_prefix_without_virtual(p, quote_depth) { + return BlankLineAction::EndItemBeforeBlank; + } + + while p.at(MD_TEXTUAL_LITERAL) && is_whitespace_only(p.cur_text()) { + p.bump(MD_TEXTUAL_LITERAL); + } + + if p.at(NEWLINE) { + p.bump(NEWLINE); + continue; + } + + return BlankLineAction::EndItemBeforeBlank; + } + + if p.at(T![EOF]) { + return BlankLineAction::EndItemBeforeBlank; + } + + let prev_virtual = p.state().virtual_line_start; + let has_prefix = consume_quote_prefix(p, quote_depth); + if !has_prefix { + p.state_mut().virtual_line_start = prev_virtual; + return BlankLineAction::EndItemBeforeBlank; + } + let indent = line_indent_from_current(p); + p.state_mut().virtual_line_start = prev_virtual; + if indent >= required_indent { + return BlankLineAction::ContinueItem; + } + + if indent <= marker_indent + 3 { + let is_list_marker = p.lookahead(|p| { + skip_leading_whitespace_tokens(p); + + if p.at(MD_ORDERED_LIST_MARKER) { + p.bump(MD_ORDERED_LIST_MARKER); + return marker_followed_by_whitespace_or_eol(p); + } + + if p.at(MD_SETEXT_UNDERLINE_LITERAL) { + if !is_single_dash_setext_marker(p.cur_text()) { + return false; + } + p.bump(MD_SETEXT_UNDERLINE_LITERAL); + return marker_followed_by_whitespace_or_eol(p); + } + + if p.at(MD_TEXTUAL_LITERAL) && is_textual_bullet_marker(p.cur_text()) { + p.bump(MD_TEXTUAL_LITERAL); + return marker_followed_by_whitespace_or_eol(p); + } + + if p.at(T![-]) || p.at(T![*]) || p.at(T![+]) { + p.bump(p.cur()); + return marker_followed_by_whitespace_or_eol(p); + } + + false + }); + + if is_list_marker { + return BlankLineAction::EndItemAfterBlank; + } + } + + BlankLineAction::EndItemBeforeBlank + }) +} + +fn at_blank_line_start(p: &mut MarkdownParser) -> bool { + if !p.at_line_start() { + return false; + } + + at_blank_line_after_prefix(p) +} + +fn at_blank_line_after_prefix(p: &mut MarkdownParser) -> bool { + p.lookahead(|p| { + if p.at(NEWLINE) { + return p.at_blank_line(); + } + if p.at(T![EOF]) { + return true; + } + while p.at(MD_TEXTUAL_LITERAL) { + let text = p.cur_text(); + if text == " " || text == "\t" { + p.bump(MD_TEXTUAL_LITERAL); + } else { + break; + } + } + + if p.at(NEWLINE) { + return p.source().at_line_start_with_whitespace(); + } + + p.at(T![EOF]) + }) +} + +fn consume_blank_line(p: &mut MarkdownParser) { + while p.at(MD_TEXTUAL_LITERAL) { + let text = p.cur_text(); + if text == " " || text == "\t" { + p.parse_as_skipped_trivia_tokens(|p| p.bump(MD_TEXTUAL_LITERAL)); + } else { + break; + } + } + + if p.at(NEWLINE) { + let m = p.start(); + p.bump(NEWLINE); + m.complete(p, MD_NEWLINE); + } +} + +/// Check if there's a bullet list item after skipping blank lines. +/// +/// Per CommonMark §5.3, blank lines between list items don't end the list, +/// they just make it "loose". This function peeks ahead across blank lines +/// to see if another bullet item follows. +fn has_bullet_item_after_blank_lines(p: &mut MarkdownParser) -> bool { + has_list_item_after_blank_lines(p, |p| { + if p.at(T![-]) || p.at(T![*]) || p.at(T![+]) { + p.bump(p.cur()); + marker_followed_by_whitespace_or_eol(p) + } else { + false + } + }) +} + +/// Check if there's an ordered list item after skipping blank lines. +/// +/// Per CommonMark §5.3, blank lines between list items don't end the list, +/// they just make it "loose". This function peeks ahead across blank lines +/// to see if another ordered item follows. +fn has_ordered_item_after_blank_lines(p: &mut MarkdownParser) -> bool { + has_list_item_after_blank_lines(p, |p| p.at(MD_ORDERED_LIST_MARKER)) +} + +fn has_list_item_after_blank_lines(p: &mut MarkdownParser, has_marker: F) -> bool +where + F: Fn(&mut MarkdownParser) -> bool, +{ + p.lookahead(|p| { + // Skip all blank lines + loop { + // Skip whitespace on current line + while p.at(MD_TEXTUAL_LITERAL) { + let text = p.cur_text(); + if text == " " || text == "\t" { + p.bump(MD_TEXTUAL_LITERAL); + } else { + break; + } + } + + // If at NEWLINE, consume it and continue checking + if p.at(NEWLINE) { + p.bump(NEWLINE); + continue; + } + + // Reached non-blank content or EOF + break; + } + + // Check for marker directly (avoid nested lookahead issues) + // Skip leading indent (up to 3 spaces for list items) + let mut indent = 0; + while p.at(MD_TEXTUAL_LITERAL) { + let text = p.cur_text(); + if text == " " { + indent += 1; + p.bump(MD_TEXTUAL_LITERAL); + } else if text == "\t" { + indent += 4; + p.bump(MD_TEXTUAL_LITERAL); + } else { + break; + } + } + + // More than 3 spaces indent = indented code block, not a list item + if indent > 3 { + return false; + } + + has_marker(p) + }) +} diff --git a/crates/biome_markdown_parser/src/syntax/parse_error.rs b/crates/biome_markdown_parser/src/syntax/parse_error.rs new file mode 100644 index 000000000000..73a5abb5845b --- /dev/null +++ b/crates/biome_markdown_parser/src/syntax/parse_error.rs @@ -0,0 +1,159 @@ +//! Markdown-specific parse error diagnostics. + +use crate::MarkdownParser; +use biome_parser::Parser; +use biome_parser::diagnostic::ParseDiagnostic; +use biome_rowan::TextRange; + +/// Maximum nesting depth for block quotes and lists. +pub(crate) const MAX_NESTING_DEPTH: usize = 100; + +/// Unclosed emphasis (bold/italic). +/// +/// ```markdown +/// *text +/// ^ expected closing * +/// ``` +pub(crate) fn unclosed_emphasis( + p: &MarkdownParser, + opening_range: TextRange, + marker: &str, +) -> ParseDiagnostic { + p.err_builder( + format!("Unclosed emphasis, expected closing `{marker}`."), + opening_range, + ) + .with_detail(opening_range, "emphasis started here") + .with_hint(format!( + "Add closing `{marker}` or remove the opening delimiter." + )) +} + +/// Unclosed inline code span. +/// +/// ```markdown +/// `code +/// ^ expected closing ` +/// ``` +pub(crate) fn unclosed_code_span( + p: &MarkdownParser, + opening_range: TextRange, + backtick_count: usize, +) -> ParseDiagnostic { + let backticks = "`".repeat(backtick_count); + p.err_builder( + format!("Unclosed code span, expected closing `{backticks}`."), + opening_range, + ) + .with_detail(opening_range, "code span started here") + .with_hint(format!("Add closing `{backticks}` to close the code span.")) +} + +/// Unclosed inline link. +/// +/// ```markdown +/// [text +/// ^ expected closing ] and (url) +/// ``` +pub(crate) fn unclosed_link( + p: &MarkdownParser, + opening_range: TextRange, + missing_part: &str, +) -> ParseDiagnostic { + p.err_builder(format!("Unclosed link, {missing_part}."), opening_range) + .with_detail(opening_range, "link started here") + .with_hint("Format: [link text](url)") +} + +/// Unclosed inline image. +/// +/// ```markdown +/// ![alt +/// ^ expected closing ] and (src) +/// ``` +pub(crate) fn unclosed_image( + p: &MarkdownParser, + opening_range: TextRange, + missing_part: &str, +) -> ParseDiagnostic { + p.err_builder(format!("Unclosed image, {missing_part}."), opening_range) + .with_detail(opening_range, "image started here") + .with_hint("Format: ![alt text](image-url)") +} + +/// ATX heading with too many hashes (>6). +/// +/// ```markdown +/// ####### heading +/// ^^^^^^^ too many hashes (max 6) +/// ``` +pub(crate) fn too_many_hashes( + p: &MarkdownParser, + range: TextRange, + count: usize, +) -> ParseDiagnostic { + p.err_builder( + format!("ATX heading has {count} hashes, but maximum is 6."), + range, + ) + .with_detail(range, "heading started here") + .with_hint("Use 1-6 `#` characters for headings. This will be parsed as a paragraph.") +} + +/// Unterminated fenced code block. +/// +/// ```markdown +/// ```rust +/// fn main() {} +/// +/// ^ expected closing ``` +/// ``` +pub(crate) fn unterminated_fenced_code( + p: &MarkdownParser, + opening_range: TextRange, + fence_type: &str, +) -> ParseDiagnostic { + let fence_name = if fence_type == "```" { + "triple backticks (```)" + } else { + "triple tildes (~~~)" + }; + p.err_builder( + format!("Unterminated fenced code block, expected closing {fence_name}."), + opening_range, + ) + .with_detail(opening_range, "code block started here") + .with_hint(format!( + "Add closing {fence_name} at the start of a new line." + )) +} + +/// Block quote nesting too deep. +/// +/// ```markdown +/// >>>>>>>>...>>>> (100+ levels) +/// ^^^^^^^^^^^^^^^^ nesting too deep +/// ``` +pub(crate) fn quote_nesting_too_deep(p: &MarkdownParser, range: TextRange) -> ParseDiagnostic { + p.err_builder( + format!("Block quote nesting exceeds maximum depth of {MAX_NESTING_DEPTH}."), + range, + ) + .with_detail(range, "nesting limit reached here") + .with_hint("Reduce nesting depth. Additional levels will be treated as content.") +} + +/// List nesting too deep. +/// +/// ```markdown +/// - - - - ... - (100+ levels) +/// ^^^^^^^^^^^^^^ nesting too deep +/// ``` +pub(crate) fn list_nesting_too_deep(p: &MarkdownParser, range: TextRange) -> ParseDiagnostic { + p.err_builder( + format!("List nesting exceeds maximum depth of {MAX_NESTING_DEPTH}."), + range, + ) + .with_detail(range, "nesting limit reached here") + .with_hint("Reduce nesting depth. Additional levels will be treated as content.") +} diff --git a/crates/biome_markdown_parser/src/syntax/quote.rs b/crates/biome_markdown_parser/src/syntax/quote.rs new file mode 100644 index 000000000000..416d61e0a98d --- /dev/null +++ b/crates/biome_markdown_parser/src/syntax/quote.rs @@ -0,0 +1,351 @@ +//! Block quote parsing for Markdown (CommonMark §5.1). +//! +//! A block quote begins with `>` at the start of a line and can contain +//! nested block elements. Multiple consecutive `>` lines form a single quote. +//! Nested quotes are created with `>>`, `>>>`, etc. +//! +//! # CommonMark §5.1 Block Quotes +//! +//! A block quote marker consists of 0-3 spaces of indentation, `>`, and an +//! optional space. The contents of the block quote are the result of parsing +//! the remainder of the line (after the `>` and optional space) as blocks. +//! +//! ## Depth Limits +//! +//! To prevent stack overflow from pathological input (e.g., hundreds of `>`), +//! nesting depth is limited to 100 levels. Deeper nesting emits a diagnostic +//! and treats additional `>` as content. +//! +//! ## Lazy Continuation (§5.1) +//! +//! A block quote can contain "lazy continuation lines" — paragraph content +//! that continues without requiring `>` on each line. For example: +//! +//! ```markdown +//! > This is a quote +//! that continues here without > +//! ``` +//! +//! Both lines belong to the same block quote. Lazy continuation stops at: +//! - A blank line +//! - A line that starts another block-level construct (header, code, list, etc.) + +use biome_markdown_syntax::T; +use biome_markdown_syntax::kind::MarkdownSyntaxKind::*; +use biome_parser::Parser; +use biome_parser::prelude::ParsedSyntax::{self, *}; + +use super::parse_error::{MAX_NESTING_DEPTH, quote_nesting_too_deep}; +use crate::MarkdownParser; + +/// Check if we're at the start of a block quote (`>`). +pub(crate) fn at_quote(p: &mut MarkdownParser) -> bool { + p.lookahead(|p| { + let at_virtual_line_start = p.state().virtual_line_start == Some(p.cur_range().start()); + if !p.at_line_start() && !p.at_start_of_input() && !at_virtual_line_start { + return false; + } + let mut indent = p.line_start_leading_indent(); + if at_virtual_line_start && indent > 0 { + // Treat virtual line start as column 0. + indent = 0; + } + if indent > 3 { + return false; + } + p.skip_line_indent(3); + p.at(T![>]) + }) +} + +/// Parse a block quote. +/// +/// Grammar: MdQuote = marker: '>' content: AnyMdBlock +/// +/// A block quote starts with `>` at line start and contains block content. +/// Multi-line quotes: consecutive `>` lines continue the same quote's content. +/// Nested quotes: `>>` creates a nested quote inside the outer quote. +/// +/// Nesting is limited to `MAX_NESTING_DEPTH` to prevent stack overflow. +pub(crate) fn parse_quote(p: &mut MarkdownParser) -> ParsedSyntax { + if !at_quote(p) { + return Absent; + } + + if p.state().block_quote_depth >= MAX_NESTING_DEPTH { + let range = p.cur_range(); + p.error(quote_nesting_too_deep(p, range)); + return Absent; + } + + let m = p.start(); + + p.skip_line_indent(3); + + // Increment quote depth + p.state_mut().block_quote_depth += 1; + + // Bump the `>` marker token + p.bump(T![>]); + + let has_indented_code = at_quote_indented_code_start(p); + let marker_space = skip_optional_marker_space(p, has_indented_code); + p.set_virtual_line_start(); + + parse_quote_block_list(p); + + // Decrement quote depth + p.state_mut().block_quote_depth -= 1; + + let completed = m.complete(p, MD_QUOTE); + let range = completed.range(p); + let indent = 1 + if marker_space { 1 } else { 0 }; + p.record_quote_indent(range, indent); + Present(completed) +} + +fn parse_quote_block_list(p: &mut MarkdownParser) { + let m = p.start(); + let mut first_line = true; + let depth = p.state().block_quote_depth; + let mut last_block_was_paragraph = false; + + loop { + if p.at(T![EOF]) { + break; + } + + let mut line_started_with_prefix = first_line; + if !first_line && !p.at(NEWLINE) && (p.at_line_start() || p.has_preceding_line_break()) { + if has_quote_prefix(p, depth) { + consume_quote_prefix(p, depth); + line_started_with_prefix = true; + } else { + break; + } + } + first_line = false; + + if p.at(NEWLINE) { + if !line_started_with_prefix && line_has_quote_prefix_at_current(p, depth) { + line_started_with_prefix = true; + } + if p.at_blank_line() && !line_started_with_prefix { + break; + } + if has_empty_line_before(p) && !line_started_with_prefix { + break; + } + if last_block_was_paragraph && !line_started_with_prefix { + break; + } + if !line_started_with_prefix { + let has_next_prefix = p.lookahead(|p| { + p.bump(NEWLINE); + has_quote_prefix(p, depth) + }); + if !has_next_prefix { + break; + } + } + let text_m = p.start(); + p.bump(NEWLINE); + text_m.complete(p, MD_NEWLINE); + continue; + } + + if at_quote_indented_code_start(p) { + parse_quote_indented_code_block(p, depth); + last_block_was_paragraph = false; + continue; + } + + let parsed_kind = super::parse_any_block_with_indent_code_policy(p, true); + last_block_was_paragraph = parsed_kind == super::ParsedBlockKind::Paragraph; + } + + m.complete(p, MD_BLOCK_LIST); +} + +fn line_has_quote_prefix_at_current(p: &MarkdownParser, depth: usize) -> bool { + if depth == 0 { + return false; + } + + let source = p.source().source_text(); + let start: usize = p.cur_range().start().into(); + let line_start = source[..start].rfind('\n').map_or(0, |idx| idx + 1); + + let mut idx = line_start; + let mut indent = 0usize; + while idx < start { + match source.as_bytes()[idx] { + b' ' => { + indent += 1; + idx += 1; + } + b'\t' => { + indent += 4; + idx += 1; + } + _ => break, + } + if indent > 3 { + return false; + } + } + + for _ in 0..depth { + if idx >= start || source.as_bytes()[idx] != b'>' { + return false; + } + idx += 1; + if idx < start { + let c = source.as_bytes()[idx]; + if c == b' ' || c == b'\t' { + idx += 1; + } + } + } + + true +} + +fn has_empty_line_before(p: &MarkdownParser) -> bool { + let start: usize = p.cur_range().start().into(); + if start == 0 { + return false; + } + let source = p.source().source_text().as_bytes(); + matches!(source.get(start - 1), Some(b'\n' | b'\r')) +} + +fn at_quote_indented_code_start(p: &MarkdownParser) -> bool { + let mut column = 0usize; + + for c in p.source_after_current().chars() { + match c { + ' ' => column += 1, + '\t' => column += 4 - (column % 4), + _ => break, + } + } + + column >= 4 +} + +fn parse_quote_indented_code_block(p: &mut MarkdownParser, depth: usize) { + let m = p.start(); + let content = p.start(); + + loop { + if p.at(T![EOF]) { + break; + } + + if p.at(NEWLINE) { + let text_m = p.start(); + p.bump_remap(MD_TEXTUAL_LITERAL); + text_m.complete(p, MD_TEXTUAL); + + if p.at(T![EOF]) { + break; + } + + if !has_quote_prefix(p, depth) { + break; + } + consume_quote_prefix(p, depth); + + if p.at(NEWLINE) { + continue; + } + if !at_quote_indented_code_start(p) { + break; + } + continue; + } + + let text_m = p.start(); + p.bump_remap(MD_TEXTUAL_LITERAL); + text_m.complete(p, MD_TEXTUAL); + } + + content.complete(p, MD_INLINE_ITEM_LIST); + m.complete(p, MD_INDENT_CODE_BLOCK); +} + +fn skip_optional_marker_space(p: &mut MarkdownParser, preserve_tab: bool) -> bool { + if !p.at(MD_TEXTUAL_LITERAL) { + return false; + } + + let text = p.cur_text(); + if text == " " { + p.parse_as_skipped_trivia_tokens(|p| p.bump(MD_TEXTUAL_LITERAL)); + return true; + } + if text == "\t" { + if !preserve_tab { + p.parse_as_skipped_trivia_tokens(|p| p.bump(MD_TEXTUAL_LITERAL)); + } + return true; + } + false +} + +pub(crate) fn has_quote_prefix(p: &mut MarkdownParser, depth: usize) -> bool { + if depth == 0 { + return false; + } + + p.lookahead(|p| consume_quote_prefix_impl(p, depth, false)) +} + +pub(crate) fn consume_quote_prefix(p: &mut MarkdownParser, depth: usize) -> bool { + if depth == 0 || !has_quote_prefix(p, depth) { + return false; + } + + consume_quote_prefix_impl(p, depth, true) +} + +pub(crate) fn consume_quote_prefix_without_virtual(p: &mut MarkdownParser, depth: usize) -> bool { + if depth == 0 || !has_quote_prefix(p, depth) { + return false; + } + + consume_quote_prefix_impl(p, depth, false) +} + +fn consume_quote_prefix_impl( + p: &mut MarkdownParser, + depth: usize, + set_virtual_line_start: bool, +) -> bool { + if !p.at_line_start() && !p.at_start_of_input() && !p.has_preceding_line_break() { + return false; + } + + for _ in 0..depth { + let prev_virtual = p.state().virtual_line_start; + p.state_mut().virtual_line_start = Some(p.cur_range().start()); + p.skip_line_indent(3); + p.state_mut().virtual_line_start = prev_virtual; + if p.at(T![>]) { + p.parse_as_skipped_trivia_tokens(|p| p.bump(T![>])); + } else if p.at(MD_TEXTUAL_LITERAL) && p.cur_text() == ">" { + p.parse_as_skipped_trivia_tokens(|p| p.bump_remap(T![>])); + } else { + return false; + } + let has_indented_code = at_quote_indented_code_start(p); + skip_optional_marker_space(p, has_indented_code); + } + + if set_virtual_line_start { + p.set_virtual_line_start(); + } + + true +} diff --git a/crates/biome_markdown_parser/src/syntax/thematic_break_block.rs b/crates/biome_markdown_parser/src/syntax/thematic_break_block.rs index b6789b9fde4f..0e289cee66ff 100644 --- a/crates/biome_markdown_parser/src/syntax/thematic_break_block.rs +++ b/crates/biome_markdown_parser/src/syntax/thematic_break_block.rs @@ -1,3 +1,18 @@ +//! Thematic break parsing for Markdown (CommonMark §4.1). +//! +//! A thematic break (horizontal rule) is a line consisting of three or more +//! matching `-`, `_`, or `*` characters, optionally with spaces between them. +//! +//! # Examples +//! +//! ```markdown +//! --- +//! *** +//! ___ +//! - - - +//! * * * +//! ``` + use crate::parser::MarkdownParser; use biome_markdown_syntax::MarkdownSyntaxKind::*; use biome_parser::{ @@ -6,7 +21,16 @@ use biome_parser::{ }; pub(crate) fn at_thematic_break_block(p: &mut MarkdownParser) -> bool { - p.at(MD_THEMATIC_BREAK_LITERAL) + p.lookahead(|p| { + if !p.at_line_start() && !p.at_start_of_input() { + return false; + } + if p.line_start_leading_indent() > 3 { + return false; + } + p.skip_line_indent(3); + p.at(MD_THEMATIC_BREAK_LITERAL) + }) } pub(crate) fn parse_thematic_break_block(p: &mut MarkdownParser) -> ParsedSyntax { @@ -15,6 +39,8 @@ pub(crate) fn parse_thematic_break_block(p: &mut MarkdownParser) -> ParsedSyntax } let m = p.start(); + p.skip_line_indent(3); + p.expect(MD_THEMATIC_BREAK_LITERAL); Present(m.complete(p, MD_THEMATIC_BREAK_BLOCK)) diff --git a/crates/biome_markdown_parser/src/to_html.rs b/crates/biome_markdown_parser/src/to_html.rs new file mode 100644 index 000000000000..7b19b457eb68 --- /dev/null +++ b/crates/biome_markdown_parser/src/to_html.rs @@ -0,0 +1,1543 @@ +//! HTML renderer for the Biome Markdown CST. +//! +//! This module provides a CST-to-HTML renderer for validating CommonMark spec +//! compliance. It handles the architectural mismatch between Biome's lossless +//! CST (which preserves tabs) and CommonMark's HTML output requirements. +//! +//! ## Purpose +//! +//! This is a **test harness module** designed for validating the markdown parser +//! against the CommonMark specification. It is not intended for production HTML +//! rendering. For production use cases, consider: +//! +//! - Using a dedicated markdown-to-HTML library +//! - Implementing a streaming/zero-copy renderer +//! +//! ## Key Design Decisions +//! +//! 1. **Tab Expansion**: The CST preserves raw `\t` characters for losslessness. +//! This renderer expands tabs only for structural indentation purposes (per +//! CommonMark §2.2), preserving literal tabs in code block content. +//! +//! 2. **Entity Decoding**: Uses the [`htmlize`] crate for WHATWG-compliant +//! HTML5 entity decoding, supporting all 2000+ named entities. +//! +//! 3. **Code Block Content**: The CST may include structural newlines; we skip +//! the leading newline after a fence marker. +//! +//! 4. **Line Endings**: Handles both LF (`\n`) and CRLF (`\r\n`) line endings +//! via the [`split_lines`] helper. +//! +//! ## Performance Notes +//! +//! This implementation prioritizes correctness over performance. Each rendering +//! pass may allocate multiple intermediate strings. For production rendering, +//! consider a single-buffer approach using `fmt::Write` or direct string building. + +use biome_markdown_syntax::{ + AnyCodeBlock, AnyContainerBlock, AnyLeafBlock, AnyMdBlock, AnyMdInline, MdAutolink, MdBullet, + MdBulletListItem, MdDocument, MdEntityReference, MdFencedCodeBlock, MdHeader, MdHtmlBlock, + MdIndentCodeBlock, MdInlineCode, MdInlineHtml, MdInlineImage, MdInlineLink, + MdLinkReferenceDefinition, MdLinkTitle, MdOrderedListItem, MdParagraph, MdQuote, + MdReferenceImage, MdReferenceLink, MdSetextHeader, MdTextual, +}; +use biome_rowan::{AstNode, AstNodeList, Direction, TextRange}; +use std::collections::HashMap; + +use crate::link_reference::normalize_reference_label; +use crate::parser::ListTightness; + +// ============================================================================ +// Line Handling Utilities +// ============================================================================ + +/// Split text into lines, handling both LF (\n) and CRLF (\r\n) line endings. +/// +/// Unlike `str::lines()`, this preserves the information needed for proper +/// line-by-line processing in code blocks. Returns an iterator over line content +/// (without the line ending). +fn split_lines(text: &str) -> impl Iterator { + text.split('\n') + .map(|line| line.strip_suffix('\r').unwrap_or(line)) +} + +fn map_lines(text: &str, mut f: F) -> String +where + F: FnMut(&str, &mut String), +{ + let mut result = String::new(); + for (i, line) in split_lines(text).enumerate() { + if i > 0 { + result.push('\n'); + } + f(line, &mut result); + } + result +} + +// ============================================================================ +// Tab Expansion +// ============================================================================ + +/// Expand tabs to spaces based on 4-space tab stops. +/// +/// CommonMark spec §2.2: "Tabs are expanded to spaces with a tab stop of 4 characters." +/// The column position determines how many spaces a tab expands to. +#[cfg(test)] +fn expand_tabs(text: &str) -> String { + let mut result = String::with_capacity(text.len()); + let mut column = 0; + + for c in text.chars() { + match c { + '\t' => { + // Expand to next 4-space tab stop + let spaces = 4 - (column % 4); + for _ in 0..spaces { + result.push(' '); + } + column += spaces; + } + '\n' => { + result.push('\n'); + column = 0; // Reset column at newline + } + _ => { + result.push(c); + column += 1; + } + } + } + + result +} + +/// Strip structural indentation from code block content while preserving literal tabs. +/// +/// CommonMark §2.2: Tabs are expanded for structural purposes (determining indentation) +/// but preserved literally in content. This function: +/// 1. Calculates column position treating tabs as 4-space stops +/// 2. Strips the first `strip_cols` columns of indentation +/// 3. Preserves literal tabs in the remaining content +fn strip_indent_preserve_tabs(text: &str, strip_cols: usize) -> String { + map_lines(text, |line, result| { + let mut col = 0; + let mut char_idx = 0; + + // Find where to start copying (after stripping strip_cols columns) + for (idx, c) in line.char_indices() { + if col >= strip_cols { + char_idx = idx; + break; + } + match c { + '\t' => { + let next_col = col + (4 - (col % 4)); + if next_col > strip_cols { + // Tab crosses the strip boundary - add remaining spaces + let spaces = next_col - strip_cols; + for _ in 0..spaces { + result.push(' '); + } + char_idx = idx + 1; + break; + } + col = next_col; + } + ' ' => col += 1, + _ => { + // Non-whitespace before strip_cols - keep from here + char_idx = idx; + break; + } + } + char_idx = idx + c.len_utf8(); + } + + // Append the rest of the line (preserving literal tabs) + if char_idx < line.len() { + result.push_str(&line[char_idx..]); + } + }) +} + +fn strip_quote_prefixes(text: &str, quote_indent: usize) -> String { + map_lines(text, |line, result| { + let mut remaining = line; + loop { + let mut idx = 0usize; + let mut col = 0usize; + + // Scan up to 3 spaces/tabs for the optional quote indent. + while idx < remaining.len() { + let c = remaining.as_bytes()[idx]; + match c { + b' ' => { + col += 1; + idx += 1; + } + b'\t' => { + col += 4 - (col % 4); + idx += 1; + } + _ => break, + } + + if col > 3 { + idx = 0; + break; + } + } + + if idx > 0 || col == 0 { + if idx < remaining.len() { + if remaining[idx..].starts_with(">") { + idx += 4; + if idx < remaining.len() + && matches!(remaining.as_bytes()[idx], b' ' | b'\t') + { + idx += 1; + } + remaining = &remaining[idx..]; + continue; + } + if remaining.as_bytes()[idx] == b'>' { + idx += 1; + if idx < remaining.len() + && matches!(remaining.as_bytes()[idx], b' ' | b'\t') + { + idx += 1; + } + remaining = &remaining[idx..]; + continue; + } + } + + if quote_indent > 1 && idx > 0 && remaining.len() > 1 { + remaining = &remaining[1..]; + continue; + } + } + + break; + } + + result.push_str(remaining); + }) +} + +// ============================================================================ +// Context and Main Entry Point +// ============================================================================ + +/// Context for HTML rendering, containing link reference definitions +/// and list tightness information. +pub struct HtmlRenderContext { + /// Link reference definitions: label -> (url, title) + link_definitions: HashMap)>, + /// List tightness by text range + list_tightness: HashMap, + /// List item indentation details by text range + list_item_indents: HashMap, + /// Quote marker indents by text range + quote_indents: HashMap, +} + +impl HtmlRenderContext { + /// Create a new rendering context from parsed document data. + pub fn new( + document: &MdDocument, + list_tightness: &[ListTightness], + list_item_indents: &[crate::parser::ListItemIndent], + quote_indents: &[crate::parser::QuoteIndent], + ) -> Self { + let link_definitions = collect_link_definitions(document); + let list_tightness_map = list_tightness + .iter() + .map(|lt| (lt.range, lt.is_tight)) + .collect(); + let list_item_indent_map = list_item_indents + .iter() + .map(|item| (item.range, item.clone())) + .collect(); + let quote_indent_map = quote_indents + .iter() + .map(|item| (item.range, item.clone())) + .collect(); + + Self { + link_definitions, + list_tightness: list_tightness_map, + list_item_indents: list_item_indent_map, + quote_indents: quote_indent_map, + } + } + + /// Look up a link reference definition by normalized label. + pub fn get_link_definition(&self, label: &str) -> Option<&(String, Option)> { + let normalized = normalize_reference_label(label); + self.link_definitions.get(&normalized) + } + + /// Check if a list at the given range is tight. + pub fn is_list_tight(&self, range: TextRange) -> bool { + self.list_tightness.get(&range).copied().unwrap_or(false) + } + + pub fn list_item_indent(&self, range: TextRange) -> Option<&crate::parser::ListItemIndent> { + self.list_item_indents.get(&range) + } + + pub fn quote_indent(&self, range: TextRange) -> usize { + self.quote_indents.get(&range).map_or(0, |item| item.indent) + } +} + +/// Render a markdown document to HTML. +pub fn document_to_html( + document: &MdDocument, + list_tightness: &[ListTightness], + list_item_indents: &[crate::parser::ListItemIndent], + quote_indents: &[crate::parser::QuoteIndent], +) -> String { + let ctx = HtmlRenderContext::new(document, list_tightness, list_item_indents, quote_indents); + let mut html = String::new(); + + for block in document.value() { + render_block(&block, &ctx, &mut html, false, 0, 0); + } + + html +} + +// ============================================================================ +// Link Reference Collection +// ============================================================================ + +/// Collect link reference definitions from the document. +fn collect_link_definitions(document: &MdDocument) -> HashMap)> { + let mut definitions = HashMap::new(); + + for node in document.syntax().descendants() { + if let Some(def) = MdLinkReferenceDefinition::cast(node) + && let (Ok(label), Ok(dest)) = (def.label(), def.destination()) + { + let label_text = collect_inline_text(&label.content()); + let normalized = normalize_reference_label(&label_text); + if normalized.is_empty() { + continue; + } + + // Only keep first definition (per CommonMark spec) + if definitions.contains_key(&normalized) { + continue; + } + + let url = collect_inline_text(&dest.content()); + let url = process_link_destination(&url); + + let title = def.title().map(|t| { + let text = collect_inline_text(&t.content()); + process_link_title(&text) + }); + + definitions.insert(normalized, (url, title)); + } + } + + definitions +} + +// ============================================================================ +// Block Rendering +// ============================================================================ + +/// Render a block element to HTML. +fn render_block( + block: &AnyMdBlock, + ctx: &HtmlRenderContext, + out: &mut String, + in_tight_list: bool, + list_indent: usize, + quote_indent: usize, +) { + match block { + AnyMdBlock::AnyLeafBlock(leaf) => { + render_leaf_block(leaf, ctx, out, in_tight_list, list_indent, quote_indent); + } + AnyMdBlock::AnyContainerBlock(container) => { + render_container_block(container, ctx, out, list_indent, quote_indent); + } + } +} + +/// Render a leaf block to HTML. +fn render_leaf_block( + block: &AnyLeafBlock, + ctx: &HtmlRenderContext, + out: &mut String, + in_tight_list: bool, + list_indent: usize, + quote_indent: usize, +) { + match block { + AnyLeafBlock::MdParagraph(para) => { + render_paragraph(para, ctx, out, in_tight_list, quote_indent); + } + AnyLeafBlock::MdHeader(header) => { + render_atx_header(header, ctx, out); + } + AnyLeafBlock::MdSetextHeader(header) => { + render_setext_header(header, ctx, out); + } + AnyLeafBlock::AnyCodeBlock(code) => { + render_code_block(code, out, list_indent, quote_indent); + } + AnyLeafBlock::MdThematicBreakBlock(_) => { + out.push_str("
\n"); + } + AnyLeafBlock::MdHtmlBlock(html) => { + render_html_block(html, out, list_indent, quote_indent); + } + AnyLeafBlock::MdLinkReferenceDefinition(_) => { + // Link reference definitions don't produce output + } + AnyLeafBlock::MdLinkBlock(_) => { + // MdLinkBlock is an internal structure, skip it + } + AnyLeafBlock::MdNewline(_) => { + // Blank lines don't produce output + } + } +} + +/// Render a container block to HTML. +fn render_container_block( + block: &AnyContainerBlock, + ctx: &HtmlRenderContext, + out: &mut String, + list_indent: usize, + quote_indent: usize, +) { + match block { + AnyContainerBlock::MdQuote(quote) => { + render_blockquote(quote, ctx, out, list_indent, quote_indent); + } + AnyContainerBlock::MdBulletListItem(list) => { + render_bullet_list(list, ctx, out, quote_indent); + } + AnyContainerBlock::MdOrderedListItem(list) => { + render_ordered_list(list, ctx, out, quote_indent); + } + } +} + +/// Render a paragraph. +fn render_paragraph( + para: &MdParagraph, + ctx: &HtmlRenderContext, + out: &mut String, + in_tight_list: bool, + quote_indent: usize, +) { + let mut content = render_inline_list(¶.list(), ctx); + if quote_indent > 0 { + content = strip_quote_prefixes(&content, quote_indent); + } + // Trim both ends - leading whitespace can appear from parser including + // the space after list markers in the paragraph content + let content = strip_paragraph_indent(content.trim()); + + if in_tight_list { + // In tight lists, paragraphs are rendered without

tags + out.push_str(&content); + out.push('\n'); + } else { + out.push_str("

"); + out.push_str(&content); + out.push_str("

\n"); + } +} + +fn strip_paragraph_indent(content: &str) -> String { + map_lines(content, |line, out| { + let mut stripped = 0usize; + let mut at_line_start = true; + for ch in line.chars() { + if at_line_start { + if ch == ' ' && stripped < 4 { + stripped += 1; + continue; + } + at_line_start = false; + } + out.push(ch); + } + }) +} + +/// Render an ATX header (# style). +fn render_atx_header(header: &MdHeader, ctx: &HtmlRenderContext, out: &mut String) { + let level = header.before().len().clamp(1, 6); + + out.push_str("'); + + if let Some(content) = header.content() { + let text = render_inline_list(&content.list(), ctx); + out.push_str(text.trim()); + } + + out.push_str("\n"); +} + +/// Render a setext header (underline style). +fn render_setext_header(header: &MdSetextHeader, ctx: &HtmlRenderContext, out: &mut String) { + let level = if let Ok(underline) = header.underline_token() { + let text = underline.text(); + if text.trim_start().starts_with('=') { + 1 + } else { + 2 + } + } else { + 1 + }; + + out.push_str("'); + + let text = render_inline_list(&header.content(), ctx); + out.push_str(text.trim()); + + out.push_str("\n"); +} + +// ============================================================================ +// Code Block Rendering +// ============================================================================ + +/// Render a code block (fenced or indented). +fn render_code_block( + code: &AnyCodeBlock, + out: &mut String, + list_indent: usize, + quote_indent: usize, +) { + match code { + AnyCodeBlock::MdFencedCodeBlock(fenced) => { + render_fenced_code_block(fenced, out, list_indent, quote_indent); + } + AnyCodeBlock::MdIndentCodeBlock(indented) => { + render_indented_code_block(indented, out, list_indent, quote_indent); + } + } +} + +/// Render a fenced code block. +/// +/// Handles the architectural issue where the CST content may include the +/// newline immediately after the opening fence. We skip this leading newline. +/// Also strips the fence's indentation from each content line per CommonMark. +fn render_fenced_code_block( + code: &MdFencedCodeBlock, + out: &mut String, + list_indent: usize, + quote_indent: usize, +) { + out.push_str("
 indent += 1,
+                    '\t' => indent += 4 - (indent % 4),
+                    '\n' | '\r' => indent = 0, // Reset at newlines
+                    _ => {}
+                }
+            }
+        }
+        indent
+    });
+    let container_indent = list_indent + quote_indent;
+    let fence_indent = fence_leading_indent.saturating_sub(container_indent).min(3);
+    let content_indent = container_indent + fence_indent;
+
+    // Get info string (language) - process escapes
+    let info_string: String = code
+        .code_list()
+        .iter()
+        .filter_map(|item| {
+            item.syntax()
+                .descendants_with_tokens(Direction::Next)
+                .filter_map(|el| el.into_token())
+                .map(|tok| tok.text().to_string())
+                .next()
+        })
+        .collect::();
+    let info_string = info_string.trim();
+
+    // Extract just the language part (before first space) and process escapes
+    let language = info_string.split_whitespace().next().unwrap_or("");
+    let language = process_escapes(language);
+
+    if !language.is_empty() {
+        out.push_str(" class=\"language-");
+        out.push_str(&escape_html_attribute(&language));
+        out.push('"');
+    }
+
+    out.push('>');
+
+    // Get raw content and handle leading newline
+    let mut content = collect_raw_inline_text(&code.content());
+
+    // Skip leading newline if present (it's part of the fence structure, not content)
+    if content.starts_with('\n') {
+        content = content[1..].to_string();
+    }
+
+    // Strip container + fence indentation from content lines
+    if content_indent > 0 {
+        content = strip_indent_preserve_tabs(&content, content_indent);
+    }
+
+    // Escape HTML but preserve the content structure
+    out.push_str(&escape_html(&content));
+
+    out.push_str("
\n"); +} + +/// Render an indented code block. +/// +/// Indented code blocks require stripping 4 spaces of indentation after +/// tab expansion. +fn render_indented_code_block( + code: &MdIndentCodeBlock, + out: &mut String, + list_indent: usize, + quote_indent: usize, +) { + out.push_str("
");
+
+    let mut content = collect_raw_inline_text(&code.content());
+    // Drop a leading newline from list marker-only lines.
+    if content.starts_with('\n') {
+        content = content[1..].to_string();
+    }
+    // Strip 4 columns of structural indent but preserve literal tabs in content
+    let content = strip_indent_preserve_tabs(&content, 4 + list_indent + quote_indent);
+    out.push_str(&escape_html(&content));
+
+    out.push_str("
\n"); +} + +/// Render an HTML block. +fn render_html_block( + html: &MdHtmlBlock, + out: &mut String, + list_indent: usize, + quote_indent: usize, +) { + let mut content = collect_raw_inline_text(&html.content()); + if list_indent > 0 { + content = strip_indent_preserve_tabs(&content, list_indent); + } + if quote_indent > 0 { + content = strip_quote_prefixes(&content, quote_indent); + } + out.push_str(&content); + if !content.ends_with('\n') { + out.push('\n'); + } +} + +// ============================================================================ +// Container Block Rendering +// ============================================================================ + +/// Render a blockquote. +fn render_blockquote( + quote: &MdQuote, + ctx: &HtmlRenderContext, + out: &mut String, + list_indent: usize, + quote_indent: usize, +) { + out.push_str("
\n"); + + let content = quote.content(); + let marker_indent = ctx.quote_indent(quote.syntax().text_trimmed_range()); + for block in content.iter() { + render_block( + &block, + ctx, + out, + false, + list_indent, + quote_indent + marker_indent, + ); + } + + out.push_str("
\n"); +} + +/// Render a bullet (unordered) list. +fn render_bullet_list( + list: &MdBulletListItem, + ctx: &HtmlRenderContext, + out: &mut String, + quote_indent: usize, +) { + let range = list.syntax().text_trimmed_range(); + let mut is_tight = ctx.is_list_tight(range); + let has_blank_lines = list.md_bullet_list().iter().any(|bullet| { + bullet + .content() + .iter() + .any(|block| matches!(block, AnyMdBlock::AnyLeafBlock(AnyLeafBlock::MdNewline(_)))) + }); + if has_blank_lines { + is_tight = false; + } + + out.push_str("
    \n"); + + for bullet in list.md_bullet_list() { + render_list_item(&bullet, ctx, out, is_tight, quote_indent); + } + + out.push_str("
\n"); +} + +/// Render an ordered list. +fn render_ordered_list( + list: &MdOrderedListItem, + ctx: &HtmlRenderContext, + out: &mut String, + quote_indent: usize, +) { + let range = list.syntax().text_trimmed_range(); + let mut is_tight = ctx.is_list_tight(range); + let has_blank_lines = list.md_bullet_list().iter().any(|bullet| { + bullet + .content() + .iter() + .any(|block| matches!(block, AnyMdBlock::AnyLeafBlock(AnyLeafBlock::MdNewline(_)))) + }); + if has_blank_lines { + is_tight = false; + } + + // Get starting number from first item + let start = list + .md_bullet_list() + .first() + .and_then(|bullet| bullet.bullet().ok()) + .map_or(1, |marker| { + let text = marker.text(); + // Extract number from "1." or "1)" format + text.trim_start() + .chars() + .take_while(|c| c.is_ascii_digit()) + .collect::() + .parse::() + .unwrap_or(1) + }); + + if start == 1 { + out.push_str("
    \n"); + } else { + out.push_str("
      \n"); + } + + for bullet in list.md_bullet_list() { + render_list_item(&bullet, ctx, out, is_tight, quote_indent); + } + + out.push_str("
    \n"); +} + +/// Render a list item. +fn render_list_item( + bullet: &MdBullet, + ctx: &HtmlRenderContext, + out: &mut String, + is_tight: bool, + quote_indent: usize, +) { + out.push_str("
  1. "); + + let list_indent = ctx.list_item_indent(bullet.syntax().text_trimmed_range()); + let blocks: Vec<_> = bullet.content().iter().collect(); + let item_has_blank_line = blocks.iter().enumerate().any(|(index, block)| { + if !is_newline_block(block) { + return false; + } + + // Ignore the marker-line newline when content follows. + if index == 0 && blocks.iter().skip(1).any(|block| !is_newline_block(block)) { + return false; + } + + true + }); + let is_tight = is_tight && !item_has_blank_line; + + let (indent, first_line_code_indent) = match list_indent { + Some(entry) => { + let base = list_item_required_indent(entry); + let first_line_code = + (entry.spaces_after_marker > INDENT_CODE_BLOCK_SPACES).then_some(1); + (base, first_line_code) + } + None => (0, None), + }; + + if is_empty_content(&blocks) { + out.push_str("
  2. \n"); + return; + } + + if is_tight { + if blocks.len() == 1 && is_paragraph_block(&blocks[0]) { + // Tight list with single paragraph: no newline after
  3. + if let Some(block) = blocks.first() { + let block_indent = match (first_line_code_indent, block) { + ( + Some(code_indent), + AnyMdBlock::AnyLeafBlock(AnyLeafBlock::AnyCodeBlock( + AnyCodeBlock::MdIndentCodeBlock(_), + )), + ) => code_indent, + _ => indent, + }; + render_block(block, ctx, out, true, block_indent, quote_indent); + } + // Remove trailing newline for tight lists + if out.ends_with('\n') { + out.pop(); + } + } else if blocks.first().is_some_and(is_paragraph_block) { + // Tight list with multiple blocks: render paragraph inline with
  4. + if let Some(first) = blocks.first() { + let block_indent = match (first_line_code_indent, first) { + ( + Some(code_indent), + AnyMdBlock::AnyLeafBlock(AnyLeafBlock::AnyCodeBlock( + AnyCodeBlock::MdIndentCodeBlock(_), + )), + ) => code_indent, + _ => indent, + }; + render_block(first, ctx, out, true, block_indent, quote_indent); + } + for block in blocks.iter().skip(1) { + render_block(block, ctx, out, true, indent, quote_indent); + } + } else { + out.push('\n'); + for (idx, block) in blocks.iter().enumerate() { + let block_indent = if idx == 0 { + match (first_line_code_indent, block) { + ( + Some(code_indent), + AnyMdBlock::AnyLeafBlock(AnyLeafBlock::AnyCodeBlock( + AnyCodeBlock::MdIndentCodeBlock(_), + )), + ) => code_indent, + _ => indent, + } + } else { + indent + }; + render_block(block, ctx, out, true, block_indent, quote_indent); + } + } + } else { + // Loose list or multiple blocks + out.push('\n'); + for (idx, block) in blocks.iter().enumerate() { + let block_indent = if idx == 0 { + match (first_line_code_indent, block) { + ( + Some(code_indent), + AnyMdBlock::AnyLeafBlock(AnyLeafBlock::AnyCodeBlock( + AnyCodeBlock::MdIndentCodeBlock(_), + )), + ) => code_indent, + _ => indent, + } + } else { + indent + }; + render_block(block, ctx, out, false, block_indent, quote_indent); + } + } + + out.push_str("
  5. \n"); +} + +// ============================================================================ +// Inline Rendering +// ============================================================================ + +/// Render an inline item list to HTML string. +fn render_inline_list( + list: &biome_markdown_syntax::MdInlineItemList, + ctx: &HtmlRenderContext, +) -> String { + let mut result = String::new(); + let items: Vec<_> = list.iter().collect(); + let len = items.len(); + + for (i, item) in items.iter().enumerate() { + let is_last = i == len - 1; + + // Special handling for hard line breaks at end of block + if is_last && let AnyMdInline::MdHardLine(hard) = item { + // Per CommonMark: hard line break at end of block is ignored + // But if it was a backslash, output the backslash + if let Ok(token) = hard.value_token() + && token.text().starts_with('\\') + { + result.push('\\'); + } + // Otherwise (trailing spaces), output nothing + continue; + } + + render_inline(item, ctx, &mut result); + } + result +} + +/// Render an inline element. +fn render_inline(inline: &AnyMdInline, ctx: &HtmlRenderContext, out: &mut String) { + match inline { + AnyMdInline::MdTextual(text) => { + render_textual(text, out); + } + AnyMdInline::MdInlineEmphasis(em) => { + out.push_str(""); + out.push_str(&render_inline_list(&em.content(), ctx)); + out.push_str(""); + } + AnyMdInline::MdInlineItalic(italic) => { + out.push_str(""); + out.push_str(&render_inline_list(&italic.content(), ctx)); + out.push_str(""); + } + AnyMdInline::MdInlineCode(code) => { + render_inline_code(code, out); + } + AnyMdInline::MdInlineLink(link) => { + render_inline_link(link, ctx, out); + } + AnyMdInline::MdInlineImage(img) => { + render_inline_image(img, ctx, out); + } + AnyMdInline::MdReferenceLink(link) => { + render_reference_link(link, ctx, out); + } + AnyMdInline::MdReferenceImage(img) => { + render_reference_image(img, ctx, out); + } + AnyMdInline::MdAutolink(autolink) => { + render_autolink(autolink, out); + } + AnyMdInline::MdInlineHtml(html) => { + render_inline_html(html, out); + } + AnyMdInline::MdHtmlBlock(html) => { + // Inline HTML block (rare case) + let content = collect_raw_inline_text(&html.content()); + out.push_str(&content); + } + AnyMdInline::MdHardLine(_) => { + out.push_str("
    \n"); + } + AnyMdInline::MdSoftBreak(_) => { + out.push('\n'); + } + AnyMdInline::MdEntityReference(entity) => { + render_entity_reference(entity, out); + } + } +} + +/// Render textual content. +fn render_textual(text: &MdTextual, out: &mut String) { + if let Ok(token) = text.value_token() { + let raw = token.text(); + // Process backslash escapes and escape HTML + let processed = process_escapes(raw); + out.push_str(&escape_html(&processed)); + } +} + +/// Render inline code. +fn render_inline_code(code: &MdInlineCode, out: &mut String) { + out.push_str(""); + + let content = collect_raw_inline_text(&code.content()); + // Code spans: normalize line endings to spaces + let content = content.replace('\n', " "); + // Code spans: strip one leading/trailing space if content has both + // and the content isn't all spaces + let content = if content.starts_with(' ') + && content.ends_with(' ') + && content.len() > 2 + && content.chars().any(|c| c != ' ') + { + content[1..content.len() - 1].to_string() + } else { + content + }; + + out.push_str(&escape_html(&content)); + out.push_str(""); +} + +/// Render an inline link. +fn render_inline_link(link: &MdInlineLink, ctx: &HtmlRenderContext, out: &mut String) { + let text = render_inline_list(&link.text(), ctx); + let dest = collect_inline_text(&link.destination()); + let dest = process_link_destination(&dest); + + out.push_str("'); + out.push_str(&text); + out.push_str(""); +} + +/// Render an inline image. +fn render_inline_image(img: &MdInlineImage, ctx: &HtmlRenderContext, out: &mut String) { + let alt = render_inline_list(&img.alt(), ctx); + // Strip HTML tags from alt text + let alt = strip_html_tags(&alt); + + let dest = collect_inline_text(&img.destination()); + let dest = process_link_destination(&dest); + + out.push_str("\"");"); +} + +/// Render a reference link. +fn render_reference_link(link: &MdReferenceLink, ctx: &HtmlRenderContext, out: &mut String) { + let text = render_inline_list(&link.text(), ctx); + let text_raw = collect_inline_text(&link.text()); + + render_reference_common( + link.label(), + text_raw.clone(), + |label_node| collect_inline_text(&label_node.label()), + ctx, + out, + |url, title, out| { + out.push_str("'); + out.push_str(&text); + out.push_str(""); + }, + |label_display, out| { + // No definition found - output as literal text + out.push('['); + out.push_str(&text); + out.push(']'); + if let Some(label) = label_display { + out.push('['); + out.push_str(&escape_html(&label)); + out.push(']'); + } + }, + ); +} + +/// Render a reference image. +fn render_reference_image(img: &MdReferenceImage, ctx: &HtmlRenderContext, out: &mut String) { + let alt = render_inline_list(&img.alt(), ctx); + let alt = strip_html_tags(&alt); + let alt_raw = collect_inline_text(&img.alt()); + + render_reference_common( + img.label(), + alt_raw.clone(), + |label_node| collect_inline_text(&label_node.label()), + ctx, + out, + |url, title, out| { + out.push_str("\"");"); + }, + |label_display, out| { + // No definition found - output as literal text + out.push_str("!["); + out.push_str(&alt); + out.push(']'); + if let Some(label) = label_display { + out.push('['); + out.push_str(&escape_html(&label)); + out.push(']'); + } + }, + ); +} + +fn resolve_reference_label( + label_node: Option, + fallback: String, + label_text: F, +) -> (String, Option) +where + F: FnOnce(&L) -> String, +{ + if let Some(node) = label_node { + let text = label_text(&node); + (text.clone(), Some(text)) + } else { + (fallback, None) + } +} + +fn render_reference_common( + label_node: Option, + fallback: String, + label_text: FLabel, + ctx: &HtmlRenderContext, + out: &mut String, + on_found: FFound, + on_missing: FMissing, +) where + FLabel: FnOnce(&L) -> String, + FFound: FnOnce(&str, Option<&str>, &mut String), + FMissing: FnOnce(Option, &mut String), +{ + // Get label (if explicit) or use the fallback + let (label, label_display) = resolve_reference_label(label_node, fallback, label_text); + + if let Some((url, title)) = ctx.get_link_definition(&label) { + on_found(url, title.as_deref(), out); + } else { + on_missing(label_display, out); + } +} + +/// Render an autolink. +fn render_autolink(autolink: &MdAutolink, out: &mut String) { + let content = collect_raw_inline_text(&autolink.value()); + + // Check if it's an email autolink + let is_email = content.contains('@') && !content.contains(':'); + + let href = if is_email { + format!("mailto:{}", content) + } else { + content.clone() + }; + + out.push_str(""); + out.push_str(&escape_html(&content)); + out.push_str(""); +} + +/// Render inline HTML. +fn render_inline_html(html: &MdInlineHtml, out: &mut String) { + let content = collect_raw_inline_text(&html.value()); + out.push_str(&content); +} + +/// Render an entity reference. +fn render_entity_reference(entity: &MdEntityReference, out: &mut String) { + if let Ok(token) = entity.value_token() { + let text = token.text(); + // Decode known entities or pass through + if let Some(decoded) = decode_entity(text) { + out.push_str(&escape_html(&decoded)); + } else { + // Unknown entity - pass through as-is (escaped) + out.push_str(&escape_html(text)); + } + } +} + +/// Render a link title attribute. +fn render_link_title(title: &MdLinkTitle, out: &mut String) { + let text = collect_inline_text(&title.content()); + let text = process_link_title(&text); + + out.push_str(" title=\""); + out.push_str(&escape_html_attribute(&text)); + out.push('"'); +} + +// ============================================================================ +// Text Collection Helpers +// ============================================================================ + +/// Collect all text from an inline list (for processing). +fn collect_inline_text(list: &biome_markdown_syntax::MdInlineItemList) -> String { + let mut text = String::new(); + for token in list + .syntax() + .descendants_with_tokens(Direction::Next) + .filter_map(|element| element.into_token()) + { + text.push_str(token.text()); + } + text +} + +/// Collect raw inline text without processing escapes. +fn collect_raw_inline_text(list: &biome_markdown_syntax::MdInlineItemList) -> String { + let mut text = String::new(); + for item in list.iter() { + collect_raw_inline_item(&item, &mut text); + } + text +} + +/// Collect raw text from a single inline item. +fn collect_raw_inline_item(item: &AnyMdInline, out: &mut String) { + match item { + AnyMdInline::MdTextual(text) => { + if let Ok(token) = text.value_token() { + out.push_str(token.text()); + } + } + AnyMdInline::MdSoftBreak(_) => { + out.push('\n'); + } + AnyMdInline::MdHardLine(_) => { + out.push('\n'); + } + _ => { + // For other inline elements, collect their tokens + for token in item + .syntax() + .descendants_with_tokens(Direction::Next) + .filter_map(|element| element.into_token()) + { + out.push_str(token.text()); + } + } + } +} + +// ============================================================================ +// Text Processing +// ============================================================================ + +/// Process backslash escapes in text. +fn process_escapes(text: &str) -> String { + let mut result = String::new(); + let mut chars = text.chars().peekable(); + + while let Some(c) = chars.next() { + if c == '\\' + && let Some(&next) = chars.peek() + && is_escapable(next) + { + result.push(next); + chars.next(); + continue; + } + result.push(c); + } + + result +} + +/// Check if a character can be escaped in markdown. +fn is_escapable(c: char) -> bool { + matches!( + c, + '!' | '"' + | '#' + | '$' + | '%' + | '&' + | '\'' + | '(' + | ')' + | '*' + | '+' + | ',' + | '-' + | '.' + | '/' + | ':' + | ';' + | '<' + | '=' + | '>' + | '?' + | '@' + | '[' + | '\\' + | ']' + | '^' + | '_' + | '`' + | '{' + | '|' + | '}' + | '~' + ) +} + +/// Process a link destination (remove angle brackets, decode escapes). +fn process_link_destination(dest: &str) -> String { + let dest = dest.trim(); + + // Remove angle brackets if present + let dest = if dest.starts_with('<') && dest.ends_with('>') { + &dest[1..dest.len() - 1] + } else { + dest + }; + + // Process escapes + process_escapes(dest) +} + +/// Process a link title (remove quotes, decode escapes). +fn process_link_title(title: &str) -> String { + let title = title.trim(); + + // Remove surrounding quotes + let title = if (title.starts_with('"') && title.ends_with('"')) + || (title.starts_with('\'') && title.ends_with('\'')) + || (title.starts_with('(') && title.ends_with(')')) + { + &title[1..title.len() - 1] + } else { + title + }; + + // Process escapes + process_escapes(title) +} + +// ============================================================================ +// HTML Escaping +// ============================================================================ + +/// Escape HTML special characters. +fn escape_html(text: &str) -> String { + let mut result = String::with_capacity(text.len()); + for c in text.chars() { + match c { + '&' => result.push_str("&"), + '<' => result.push_str("<"), + '>' => result.push_str(">"), + '"' => result.push_str("""), + _ => result.push(c), + } + } + result +} + +/// Escape HTML attribute values. +fn escape_html_attribute(text: &str) -> String { + escape_html(text) +} + +/// Strip HTML tags from text (for image alt text). +fn strip_html_tags(text: &str) -> String { + let mut result = String::new(); + let mut in_tag = false; + + for c in text.chars() { + if c == '<' { + in_tag = true; + } else if c == '>' { + in_tag = false; + } else if !in_tag { + result.push(c); + } + } + + result +} + +// ============================================================================ +// Entity Decoding +// ============================================================================ + +/// Decode HTML entities using the WHATWG standard. +/// +/// Handles numeric character references ({ and {) and all standard +/// HTML5 named entities via the `htmlize` crate. +fn decode_entity(entity: &str) -> Option { + use std::borrow::Cow; + + // Use htmlize for WHATWG-compliant entity decoding + let decoded = htmlize::unescape(entity); + + match decoded { + // If the entity was decoded (string changed), return the decoded value + Cow::Owned(s) if s != entity => Some(s), + // If unchanged and it looks like an entity, it's invalid + Cow::Borrowed(_) if entity.starts_with('&') && entity.ends_with(';') => None, + Cow::Owned(s) if entity.starts_with('&') && entity.ends_with(';') && s == entity => None, + // Otherwise return as-is + Cow::Owned(s) => Some(s), + Cow::Borrowed(s) => Some(s.to_string()), + } +} +fn is_paragraph_block(block: &AnyMdBlock) -> bool { + matches!( + block, + AnyMdBlock::AnyLeafBlock(AnyLeafBlock::MdParagraph(_)) + ) +} + +/// Check if a block is a newline (produces no output). +fn is_newline_block(block: &AnyMdBlock) -> bool { + matches!(block, AnyMdBlock::AnyLeafBlock(AnyLeafBlock::MdNewline(_))) +} + +/// Check if blocks are effectively empty (empty or only newlines). +fn is_empty_content(blocks: &[AnyMdBlock]) -> bool { + blocks.is_empty() || blocks.iter().all(is_newline_block) +} + +const INDENT_CODE_BLOCK_SPACES: usize = 4; + +fn list_item_required_indent(entry: &crate::parser::ListItemIndent) -> usize { + if entry.spaces_after_marker > INDENT_CODE_BLOCK_SPACES { + entry.marker_indent + entry.marker_width + 1 + } else { + entry.marker_indent + entry.marker_width + entry.spaces_after_marker.max(1) + } +} + +// ============================================================================ +// Tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + use crate::parse_markdown; + + #[test] + fn test_tab_expansion() { + assert_eq!(expand_tabs("\tfoo"), " foo"); + assert_eq!(expand_tabs("a\tb"), "a b"); + assert_eq!(expand_tabs("ab\tc"), "ab c"); + assert_eq!(expand_tabs("abc\td"), "abc d"); + assert_eq!(expand_tabs("abcd\te"), "abcd e"); + assert_eq!(expand_tabs("\t\tfoo"), " foo"); + } + + #[test] + fn test_tab_expansion_multiline() { + assert_eq!(expand_tabs("a\tb\nc\td"), "a b\nc d"); + } + + #[test] + fn test_simple_paragraph() { + let parsed = parse_markdown("Hello, world!\n"); + let html = document_to_html( + &parsed.tree(), + parsed.list_tightness(), + parsed.list_item_indents(), + parsed.quote_indents(), + ); + assert_eq!(html, "

    Hello, world!

    \n"); + } + + #[test] + fn test_atx_header() { + let parsed = parse_markdown("# Hello\n"); + let html = document_to_html( + &parsed.tree(), + parsed.list_tightness(), + parsed.list_item_indents(), + parsed.quote_indents(), + ); + assert_eq!(html, "

    Hello

    \n"); + } + + #[test] + fn test_emphasis() { + let parsed = parse_markdown("*italic* and **bold**\n"); + let html = document_to_html( + &parsed.tree(), + parsed.list_tightness(), + parsed.list_item_indents(), + parsed.quote_indents(), + ); + assert_eq!(html, "

    italic and bold

    \n"); + } + + #[test] + fn test_escape_html() { + assert_eq!(escape_html("a & b < c > d"), "a & b < c > d"); + } + + #[test] + fn test_decode_entity() { + assert_eq!(decode_entity("&"), Some("&".to_string())); + assert_eq!(decode_entity("A"), Some("A".to_string())); + assert_eq!(decode_entity("A"), Some("A".to_string())); + assert_eq!(decode_entity(" "), Some("\u{00A0}".to_string())); + // U+0000 should become replacement character + assert_eq!(decode_entity("�"), Some("\u{FFFD}".to_string())); + } +} diff --git a/crates/biome_markdown_parser/src/token_source.rs b/crates/biome_markdown_parser/src/token_source.rs index b33bd80f5956..451d1130f3e4 100644 --- a/crates/biome_markdown_parser/src/token_source.rs +++ b/crates/biome_markdown_parser/src/token_source.rs @@ -60,34 +60,122 @@ impl<'source> MarkdownTokenSource<'source> { } } - /// Returns the number of whitespace characters before the current token until the first new line. - /// tab will be counted as 4 spaces https://spec.commonmark.org/0.31.2/#tabs - /// whitespace will be counted as 1 space - pub fn before_whitespace_count(&self) -> usize { - let last_trivia: Vec<&Trivia> = self - .trivia_list - .iter() - .rev() - .take_while(|item| { - // get before whitespace and tab collect - matches!( - item.kind(), - TriviaPieceKind::Whitespace | TriviaPieceKind::Skipped - ) - }) - .collect(); - last_trivia.iter().fold(0, |count, b| match b.kind() { - TriviaPieceKind::Skipped => count + 4, - TriviaPieceKind::Whitespace => count + u32::from(b.len()) as usize, - _ => count, - }) - } - - #[expect(dead_code)] + // === Token-based helpers for Stage 1 refactor === + // These helpers work with explicit NEWLINE tokens rather than trivia inspection. + + /// Returns true if the current token is at the start of input (position 0). + /// + /// This is a position-based check that doesn't rely on trivia_len, + /// making it work correctly when NEWLINE is an explicit token. + pub fn at_start_of_input(&self) -> bool { + self.current_range().start() == 0.into() + } + + /// Returns the source text starting from the current token position. + /// This is useful for lookahead when detecting HTML blocks. + pub fn source_after_current(&self) -> &str { + let range = self.lexer.current_range(); + let start: usize = range.start().into(); + let source = self.lexer.source(); + &source[start..] + } + + /// Returns the full source text. + pub fn source_text(&self) -> &str { + self.lexer.source() + } + + /// Count leading indentation on the current line, including whitespace inside the current token. + /// + /// This scans from the start of the current line to the first non-whitespace character. + /// Tab characters are counted as 4 spaces per CommonMark spec. + pub fn line_start_leading_indent(&self) -> usize { + let range = self.lexer.current_range(); + let start: usize = range.start().into(); + + let source = self.lexer.source(); + let before_token = &source[..start]; + + // Find the last newline before current token + let last_newline_pos = before_token.rfind(['\n', '\r']); + let line_start = match last_newline_pos { + Some(pos) => { + let bytes = before_token.as_bytes(); + if bytes.get(pos) == Some(&b'\r') && bytes.get(pos + 1) == Some(&b'\n') { + pos + 2 + } else { + pos + 1 + } + } + None => 0, + }; + + let line = &source[line_start..]; + let mut count = 0usize; + for c in line.chars() { + match c { + ' ' => count += 1, + '\t' => count += 4, + _ => break, + } + } + count + } + + /// Returns true if the current token starts on a line with only whitespace before it. + /// + /// This is a more robust line-start check when NEWLINE is an explicit token. + pub fn at_line_start_with_whitespace(&self) -> bool { + let range = self.lexer.current_range(); + let start: usize = range.start().into(); + + let source = self.lexer.source(); + let before_token = &source[..start]; + + let last_newline_pos = before_token.rfind(['\n', '\r']); + let line_start = match last_newline_pos { + Some(pos) => { + let bytes = before_token.as_bytes(); + if bytes.get(pos) == Some(&b'\r') && bytes.get(pos + 1) == Some(&b'\n') { + pos + 2 + } else { + pos + 1 + } + } + None => 0, + }; + + source[line_start..start] + .chars() + .all(|c| c == ' ' || c == '\t') + } + + /// Re-lexes the current token in a different context. + /// Used for context-sensitive parsing like link definitions where whitespace + /// needs to produce separate tokens to distinguish destination from title. pub fn re_lex(&mut self, mode: MarkdownReLexContext) -> MarkdownSyntaxKind { self.lexer.re_lex(mode) } + /// Force re-lex the current token in a new lex context. + /// + /// Use this after lookahead operations (like `lookahead_reference_link`) when + /// switching to LinkDefinition context. This ensures that tokens cached during + /// lookahead in Regular context are discarded and re-lexed correctly. + pub fn force_relex_in_context(&mut self, context: MarkdownLexContext) -> MarkdownSyntaxKind { + self.lexer.force_relex_in_context(context) + } + + pub fn set_force_ordered_list_marker(&mut self, value: bool) { + self.lexer.lexer_mut().set_force_ordered_list_marker(value); + } + + /// Bump the current token using the LinkDefinition context. + /// In this context, whitespace produces separate tokens. + pub fn bump_link_definition(&mut self) { + self.bump_with_context(MarkdownLexContext::LinkDefinition); + } + /// Creates a checkpoint to which it can later return using [Self::rewind]. pub fn checkpoint(&self) -> MarkdownTokenSourceCheckpoint { MarkdownTokenSourceCheckpoint { diff --git a/crates/biome_markdown_parser/tests/commonmark_spec.rs b/crates/biome_markdown_parser/tests/commonmark_spec.rs new file mode 100644 index 000000000000..686bd6edcb7b --- /dev/null +++ b/crates/biome_markdown_parser/tests/commonmark_spec.rs @@ -0,0 +1,346 @@ +//! CommonMark specification compliance test harness. +//! +//! This test runs all 652 CommonMark spec examples against Biome's markdown parser +//! and reports the compliance percentage. +//! +//! Run with: `cargo test -p biome_markdown_parser --test commonmark_spec -- --nocapture` + +use biome_markdown_parser::{document_to_html, parse_markdown}; +use serde::Deserialize; + +/// Embedded CommonMark spec test cases. +const SPEC_JSON: &str = include_str!("spec.json"); + +/// A single test case from the CommonMark spec. +#[derive(Debug, Deserialize)] +struct SpecTest { + /// The markdown input + markdown: String, + /// The expected HTML output + html: String, + /// The example number in the spec + example: u32, + /// The section name + section: String, +} + +/// Information about a failed test. +#[derive(Debug)] +struct FailedTest { + example: u32, + section: String, + markdown: String, + expected: String, + actual: String, +} + +/// Normalize HTML for comparison. +/// +/// CommonMark spec tests are strict about HTML output, but there are some +/// acceptable variations in whitespace that we handle here. +/// +/// IMPORTANT: We preserve whitespace inside `
    ` blocks since trailing
    +/// spaces are significant in code blocks per CommonMark spec.
    +fn normalize_html(html: &str) -> String {
    +    let mut result = Vec::new();
    +    let mut in_pre = false;
    +
    +    for line in html.lines() {
    +        // Track 
     block state
    +        // Note: CommonMark output has 
     on same line, so check for 
     blocks
    +        if in_pre {
    +            result.push(line.to_string());
    +        } else {
    +            result.push(line.trim_end().to_string());
    +        }
    +
    +        // Check for 
    after processing the line + if line.contains("
    ") { + in_pre = false; + } + } + + result.join("\n").trim().to_string() + "\n" +} + +/// Show a unified diff between expected and actual HTML. +fn diff(expected: &str, actual: &str) -> String { + let mut result = String::new(); + let expected_lines: Vec<&str> = expected.lines().collect(); + let actual_lines: Vec<&str> = actual.lines().collect(); + + let max_lines = expected_lines.len().max(actual_lines.len()); + + for i in 0..max_lines { + let exp = expected_lines.get(i).unwrap_or(&""); + let act = actual_lines.get(i).unwrap_or(&""); + + if exp != act { + result.push_str(&format!("- {}\n", exp)); + result.push_str(&format!("+ {}\n", act)); + } else { + result.push_str(&format!(" {}\n", exp)); + } + } + + result +} + +#[test] +fn commonmark_spec_compliance() { + let tests: Vec = serde_json::from_str(SPEC_JSON).expect("Failed to parse spec.json"); + let total = tests.len(); + + let mut passed = 0; + let mut failed: Vec = Vec::new(); + let mut section_stats: std::collections::HashMap = + std::collections::HashMap::new(); + + let log_progress = std::env::var("CMARK_PROGRESS").is_ok(); + for (index, test) in tests.iter().enumerate() { + if log_progress { + println!( + "progress {}/{} example {} {}", + index + 1, + total, + test.example, + test.section + ); + } + let parsed = parse_markdown(&test.markdown); + let actual = document_to_html( + &parsed.tree(), + parsed.list_tightness(), + parsed.list_item_indents(), + parsed.quote_indents(), + ); + + let expected_normalized = normalize_html(&test.html); + let actual_normalized = normalize_html(&actual); + + let section_entry = section_stats.entry(test.section.clone()).or_insert((0, 0)); + section_entry.1 += 1; // total for section + + if expected_normalized == actual_normalized { + passed += 1; + section_entry.0 += 1; // passed for section + } else { + failed.push(FailedTest { + example: test.example, + section: test.section.clone(), + markdown: test.markdown.clone(), + expected: test.html.clone(), + actual, + }); + } + } + + // Print summary + println!("\n"); + println!("═══════════════════════════════════════════════════════════════════════════════"); + println!(" CommonMark Spec Compliance Report"); + println!("═══════════════════════════════════════════════════════════════════════════════"); + println!(); + println!( + "Overall: {}/{} ({:.1}%)", + passed, + total, + (passed as f64 / total as f64) * 100.0 + ); + println!(); + + // Print section breakdown + println!("Section Breakdown:"); + println!("─────────────────────────────────────────────────────────────────────────────────"); + + let mut sections: Vec<_> = section_stats.iter().collect(); + sections.sort_by_key(|(name, _)| *name); + + for (section, (section_passed, section_total)) in sections { + let pct = (*section_passed as f64 / *section_total as f64) * 100.0; + let status = if pct == 100.0 { + "✓" + } else if pct >= 80.0 { + "○" + } else { + "✗" + }; + println!( + " {} {:40} {:3}/{:3} ({:5.1}%)", + status, section, section_passed, section_total, pct + ); + } + println!(); + + // Print failures (limited to first 50) + if !failed.is_empty() { + println!("Failed Examples (showing first 50):"); + println!( + "─────────────────────────────────────────────────────────────────────────────────" + ); + + for (i, failure) in failed.iter().take(50).enumerate() { + println!(); + println!( + "{}. Example {} [{}]", + i + 1, + failure.example, + failure.section + ); + println!(" Input:"); + for line in failure.markdown.lines() { + println!(" │ {:?}", line); + } + println!(" Expected:"); + for line in failure.expected.lines() { + println!(" │ {}", line); + } + println!(" Actual:"); + for line in failure.actual.lines() { + println!(" │ {}", line); + } + println!(" Diff:"); + for line in diff(&failure.expected, &failure.actual).lines() { + println!(" │ {}", line); + } + } + + if failed.len() > 50 { + println!(); + println!("... and {} more failures", failed.len() - 50); + } + } + + println!(); + println!("═══════════════════════════════════════════════════════════════════════════════"); + + // For now, we don't fail the test - we're just measuring compliance + // Once we reach high compliance, we can enable this assertion + // assert!(passed == total, "Not all CommonMark spec tests pass"); + + // Report the overall result + let compliance_pct = (passed as f64 / total as f64) * 100.0; + if compliance_pct < 50.0 { + println!( + "WARNING: Compliance is below 50% ({:.1}%). Parser may need significant work.", + compliance_pct + ); + } +} + +/// Run a single example for debugging. +#[test] +#[ignore] +fn debug_single_example() { + let tests: Vec = serde_json::from_str(SPEC_JSON).expect("Failed to parse spec.json"); + + // Change this to debug a specific example + let example_num = 259; + + if let Some(test) = tests.iter().find(|t| t.example == example_num) { + println!("Example {}: {}", test.example, test.section); + println!("Markdown: {:?}", test.markdown); + println!(); + + let parsed = parse_markdown(&test.markdown); + + println!("AST:"); + println!("{:#?}", parsed.tree()); + println!(); + + println!("CST:"); + println!("{:#?}", parsed.syntax()); + println!(); + + if parsed.has_errors() { + println!("Parse errors:"); + for diag in parsed.diagnostics() { + println!(" - {:?}", diag); + } + println!(); + } + + println!("List tightness: {:?}", parsed.list_tightness()); + println!(); + + let actual = document_to_html( + &parsed.tree(), + parsed.list_tightness(), + parsed.list_item_indents(), + parsed.quote_indents(), + ); + + println!("Expected HTML:"); + println!("{}", test.html); + println!(); + + println!("Actual HTML:"); + println!("{}", actual); + println!(); + + let expected_normalized = normalize_html(&test.html); + let actual_normalized = normalize_html(&actual); + + if expected_normalized == actual_normalized { + println!("✓ PASS"); + } else { + println!("✗ FAIL"); + println!("Diff:"); + println!("{}", diff(&test.html, &actual)); + } + } else { + println!("Example {} not found", example_num); + } +} + +/// Test specific sections for focused debugging. +#[test] +#[ignore] +fn debug_section() { + let tests: Vec = serde_json::from_str(SPEC_JSON).expect("Failed to parse spec.json"); + + // Change this to debug a specific section + let section = "List items"; + + let section_tests: Vec<_> = tests.iter().filter(|t| t.section == section).collect(); + + println!("Section: {} ({} tests)", section, section_tests.len()); + println!(); + + let mut passed = 0; + for test in §ion_tests { + let parsed = parse_markdown(&test.markdown); + let actual = document_to_html( + &parsed.tree(), + parsed.list_tightness(), + parsed.list_item_indents(), + parsed.quote_indents(), + ); + + let expected_normalized = normalize_html(&test.html); + let actual_normalized = normalize_html(&actual); + + if expected_normalized == actual_normalized { + passed += 1; + println!(" ✓ Example {}", test.example); + } else { + println!(" ✗ Example {}", test.example); + println!(" Input: {:?}", test.markdown); + println!(" Expected: {:?}", test.html); + println!(" Actual: {:?}", actual); + } + } + + println!(); + println!( + "Result: {}/{} ({:.1}%)", + passed, + section_tests.len(), + (passed as f64 / section_tests.len() as f64) * 100.0 + ); +} diff --git a/crates/biome_markdown_parser/tests/list_tightness.rs b/crates/biome_markdown_parser/tests/list_tightness.rs new file mode 100644 index 000000000000..3d1ee201e41c --- /dev/null +++ b/crates/biome_markdown_parser/tests/list_tightness.rs @@ -0,0 +1,32 @@ +use biome_markdown_parser::parse_markdown; + +#[test] +fn tracks_list_tightness() { + let source = "- a\n- b\n\npara\n\n- a\n\n b\n"; + let parse = parse_markdown(source); + let tightness = parse.list_tightness(); + + assert_eq!(tightness.len(), 2); + + let mut found_tight = false; + let mut found_loose = false; + + for entry in tightness { + let start: usize = entry.range.start().into(); + let end: usize = entry.range.end().into(); + let text = &source[start..end]; + + if text.contains("- a\n- b") { + assert!(entry.is_tight); + found_tight = true; + } + + if text.contains("- a\n\n b") { + assert!(!entry.is_tight); + found_loose = true; + } + } + + assert!(found_tight); + assert!(found_loose); +} diff --git a/crates/biome_markdown_parser/tests/md_test_suite/error/multiline_label_reference.md b/crates/biome_markdown_parser/tests/md_test_suite/error/multiline_label_reference.md new file mode 100644 index 000000000000..35341a3f3045 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/error/multiline_label_reference.md @@ -0,0 +1,2 @@ +[foo +bar] diff --git a/crates/biome_markdown_parser/tests/md_test_suite/error/multiline_label_reference.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/error/multiline_label_reference.md.snap new file mode 100644 index 000000000000..8c5585d74f9c --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/error/multiline_label_reference.md.snap @@ -0,0 +1,101 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +[foo +bar] + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdReferenceLink { + l_brack_token: L_BRACK@0..1 "[" [] [], + text: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1..4 "foo" [] [], + }, + ], + r_brack_token: missing (required), + label: missing (optional), + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@4..5 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@5..8 "bar" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@8..9 "]" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@9..10 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + eof_token: EOF@10..10 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..10 + 0: (empty) + 1: MD_BLOCK_LIST@0..10 + 0: MD_PARAGRAPH@0..10 + 0: MD_INLINE_ITEM_LIST@0..10 + 0: MD_REFERENCE_LINK@0..4 + 0: L_BRACK@0..1 "[" [] [] + 1: MD_INLINE_ITEM_LIST@1..4 + 0: MD_TEXTUAL@1..4 + 0: MD_TEXTUAL_LITERAL@1..4 "foo" [] [] + 2: (empty) + 3: (empty) + 1: MD_TEXTUAL@4..5 + 0: MD_TEXTUAL_LITERAL@4..5 "\n" [] [] + 2: MD_TEXTUAL@5..8 + 0: MD_TEXTUAL_LITERAL@5..8 "bar" [] [] + 3: MD_TEXTUAL@8..9 + 0: MD_TEXTUAL_LITERAL@8..9 "]" [] [] + 4: MD_TEXTUAL@9..10 + 0: MD_TEXTUAL_LITERAL@9..10 "\n" [] [] + 1: (empty) + 2: EOF@10..10 "" [] [] + +``` + +## Diagnostics + +``` +multiline_label_reference.md:1:1 parse ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + + × Unclosed link, expected `]` to close link text. + + > 1 │ [foo + │ ^ + 2 │ bar] + 3 │ + + i link started here + + > 1 │ [foo + │ ^ + 2 │ bar] + 3 │ + + i Format: [link text](url) + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/error/quote_nesting_too_deep.md b/crates/biome_markdown_parser/tests/md_test_suite/error/quote_nesting_too_deep.md new file mode 100644 index 000000000000..a27418151d0b --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/error/quote_nesting_too_deep.md @@ -0,0 +1 @@ +>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Too deep diff --git a/crates/biome_markdown_parser/tests/md_test_suite/error/quote_nesting_too_deep.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/error/quote_nesting_too_deep.md.snap new file mode 100644 index 000000000000..1b95c0525e88 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/error/quote_nesting_too_deep.md.snap @@ -0,0 +1,452 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Too deep + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdQuote { + marker_token: R_ANGLE@0..1 ">" [] [], + content: MdBlockList [ + MdQuote { + marker_token: R_ANGLE@1..2 ">" [] [], + content: MdBlockList [ + MdQuote { + marker_token: R_ANGLE@2..3 ">" [] [], + content: MdBlockList [ + MdQuote { + marker_token: R_ANGLE@3..4 ">" [] [], + content: MdBlockList [ + MdQuote { + marker_token: R_ANGLE@4..5 ">" [] [], + content: MdBlockList [ + MdQuote { + marker_token: R_ANGLE@5..6 ">" [] [], + content: MdBlockList [ + MdQuote { + marker_token: R_ANGLE@6..7 ">" [] [], + content: MdBlockList [ + MdQuote { + marker_token: R_ANGLE@7..8 ">" [] [], + content: MdBlockList [ + MdQuote { + marker_token: R_ANGLE@8..9 ">" [] [], + content: MdBlockList [ + MdQuote { + marker_token: R_ANGLE@9..10 ">" [] [], + content: MdBlockList [ + MdQuote { + marker_token: R_ANGLE@10..11 ">" [] [], + content: MdBlockList [ + MdQuote { + marker_token: R_ANGLE@11..12 ">" [] [], + content: MdBlockList [ + MdQuote { + marker_token: R_ANGLE@12..13 ">" [] [], + content: MdBlockList [ + MdQuote { + marker_token: R_ANGLE@13..14 ">" [] [], + content: MdBlockList [ + MdQuote { + marker_token: R_ANGLE@14..15 ">" [] [], + content: MdBlockList [ + MdQuote { + marker_token: R_ANGLE@15..16 ">" [] [], + content: MdBlockList [ + MdQuote, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@100..110 "Too deep" [Skipped(">"), Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@110..111 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + }, + ], + eof_token: EOF@111..111 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..111 + 0: (empty) + 1: MD_BLOCK_LIST@0..111 + 0: MD_QUOTE@0..111 + 0: R_ANGLE@0..1 ">" [] [] + 1: MD_BLOCK_LIST@1..111 + 0: MD_QUOTE@1..100 + 0: R_ANGLE@1..2 ">" [] [] + 1: MD_BLOCK_LIST@2..100 + 0: MD_QUOTE@2..100 + 0: R_ANGLE@2..3 ">" [] [] + 1: MD_BLOCK_LIST@3..100 + 0: MD_QUOTE@3..100 + 0: R_ANGLE@3..4 ">" [] [] + 1: MD_BLOCK_LIST@4..100 + 0: MD_QUOTE@4..100 + 0: R_ANGLE@4..5 ">" [] [] + 1: MD_BLOCK_LIST@5..100 + 0: MD_QUOTE@5..100 + 0: R_ANGLE@5..6 ">" [] [] + 1: MD_BLOCK_LIST@6..100 + 0: MD_QUOTE@6..100 + 0: R_ANGLE@6..7 ">" [] [] + 1: MD_BLOCK_LIST@7..100 + 0: MD_QUOTE@7..100 + 0: R_ANGLE@7..8 ">" [] [] + 1: MD_BLOCK_LIST@8..100 + 0: MD_QUOTE@8..100 + 0: R_ANGLE@8..9 ">" [] [] + 1: MD_BLOCK_LIST@9..100 + 0: MD_QUOTE@9..100 + 0: R_ANGLE@9..10 ">" [] [] + 1: MD_BLOCK_LIST@10..100 + 0: MD_QUOTE@10..100 + 0: R_ANGLE@10..11 ">" [] [] + 1: MD_BLOCK_LIST@11..100 + 0: MD_QUOTE@11..100 + 0: R_ANGLE@11..12 ">" [] [] + 1: MD_BLOCK_LIST@12..100 + 0: MD_QUOTE@12..100 + 0: R_ANGLE@12..13 ">" [] [] + 1: MD_BLOCK_LIST@13..100 + 0: MD_QUOTE@13..100 + 0: R_ANGLE@13..14 ">" [] [] + 1: MD_BLOCK_LIST@14..100 + 0: MD_QUOTE@14..100 + 0: R_ANGLE@14..15 ">" [] [] + 1: MD_BLOCK_LIST@15..100 + 0: MD_QUOTE@15..100 + 0: R_ANGLE@15..16 ">" [] [] + 1: MD_BLOCK_LIST@16..100 + 0: MD_QUOTE@16..100 + 0: R_ANGLE@16..17 ">" [] [] + 1: MD_BLOCK_LIST@17..100 + 0: MD_QUOTE@17..100 + 0: R_ANGLE@17..18 ">" [] [] + 1: MD_BLOCK_LIST@18..100 + 0: MD_QUOTE@18..100 + 0: R_ANGLE@18..19 ">" [] [] + 1: MD_BLOCK_LIST@19..100 + 0: MD_QUOTE@19..100 + 0: R_ANGLE@19..20 ">" [] [] + 1: MD_BLOCK_LIST@20..100 + 0: MD_QUOTE@20..100 + 0: R_ANGLE@20..21 ">" [] [] + 1: MD_BLOCK_LIST@21..100 + 0: MD_QUOTE@21..100 + 0: R_ANGLE@21..22 ">" [] [] + 1: MD_BLOCK_LIST@22..100 + 0: MD_QUOTE@22..100 + 0: R_ANGLE@22..23 ">" [] [] + 1: MD_BLOCK_LIST@23..100 + 0: MD_QUOTE@23..100 + 0: R_ANGLE@23..24 ">" [] [] + 1: MD_BLOCK_LIST@24..100 + 0: MD_QUOTE@24..100 + 0: R_ANGLE@24..25 ">" [] [] + 1: MD_BLOCK_LIST@25..100 + 0: MD_QUOTE@25..100 + 0: R_ANGLE@25..26 ">" [] [] + 1: MD_BLOCK_LIST@26..100 + 0: MD_QUOTE@26..100 + 0: R_ANGLE@26..27 ">" [] [] + 1: MD_BLOCK_LIST@27..100 + 0: MD_QUOTE@27..100 + 0: R_ANGLE@27..28 ">" [] [] + 1: MD_BLOCK_LIST@28..100 + 0: MD_QUOTE@28..100 + 0: R_ANGLE@28..29 ">" [] [] + 1: MD_BLOCK_LIST@29..100 + 0: MD_QUOTE@29..100 + 0: R_ANGLE@29..30 ">" [] [] + 1: MD_BLOCK_LIST@30..100 + 0: MD_QUOTE@30..100 + 0: R_ANGLE@30..31 ">" [] [] + 1: MD_BLOCK_LIST@31..100 + 0: MD_QUOTE@31..100 + 0: R_ANGLE@31..32 ">" [] [] + 1: MD_BLOCK_LIST@32..100 + 0: MD_QUOTE@32..100 + 0: R_ANGLE@32..33 ">" [] [] + 1: MD_BLOCK_LIST@33..100 + 0: MD_QUOTE@33..100 + 0: R_ANGLE@33..34 ">" [] [] + 1: MD_BLOCK_LIST@34..100 + 0: MD_QUOTE@34..100 + 0: R_ANGLE@34..35 ">" [] [] + 1: MD_BLOCK_LIST@35..100 + 0: MD_QUOTE@35..100 + 0: R_ANGLE@35..36 ">" [] [] + 1: MD_BLOCK_LIST@36..100 + 0: MD_QUOTE@36..100 + 0: R_ANGLE@36..37 ">" [] [] + 1: MD_BLOCK_LIST@37..100 + 0: MD_QUOTE@37..100 + 0: R_ANGLE@37..38 ">" [] [] + 1: MD_BLOCK_LIST@38..100 + 0: MD_QUOTE@38..100 + 0: R_ANGLE@38..39 ">" [] [] + 1: MD_BLOCK_LIST@39..100 + 0: MD_QUOTE@39..100 + 0: R_ANGLE@39..40 ">" [] [] + 1: MD_BLOCK_LIST@40..100 + 0: MD_QUOTE@40..100 + 0: R_ANGLE@40..41 ">" [] [] + 1: MD_BLOCK_LIST@41..100 + 0: MD_QUOTE@41..100 + 0: R_ANGLE@41..42 ">" [] [] + 1: MD_BLOCK_LIST@42..100 + 0: MD_QUOTE@42..100 + 0: R_ANGLE@42..43 ">" [] [] + 1: MD_BLOCK_LIST@43..100 + 0: MD_QUOTE@43..100 + 0: R_ANGLE@43..44 ">" [] [] + 1: MD_BLOCK_LIST@44..100 + 0: MD_QUOTE@44..100 + 0: R_ANGLE@44..45 ">" [] [] + 1: MD_BLOCK_LIST@45..100 + 0: MD_QUOTE@45..100 + 0: R_ANGLE@45..46 ">" [] [] + 1: MD_BLOCK_LIST@46..100 + 0: MD_QUOTE@46..100 + 0: R_ANGLE@46..47 ">" [] [] + 1: MD_BLOCK_LIST@47..100 + 0: MD_QUOTE@47..100 + 0: R_ANGLE@47..48 ">" [] [] + 1: MD_BLOCK_LIST@48..100 + 0: MD_QUOTE@48..100 + 0: R_ANGLE@48..49 ">" [] [] + 1: MD_BLOCK_LIST@49..100 + 0: MD_QUOTE@49..100 + 0: R_ANGLE@49..50 ">" [] [] + 1: MD_BLOCK_LIST@50..100 + 0: MD_QUOTE@50..100 + 0: R_ANGLE@50..51 ">" [] [] + 1: MD_BLOCK_LIST@51..100 + 0: MD_QUOTE@51..100 + 0: R_ANGLE@51..52 ">" [] [] + 1: MD_BLOCK_LIST@52..100 + 0: MD_QUOTE@52..100 + 0: R_ANGLE@52..53 ">" [] [] + 1: MD_BLOCK_LIST@53..100 + 0: MD_QUOTE@53..100 + 0: R_ANGLE@53..54 ">" [] [] + 1: MD_BLOCK_LIST@54..100 + 0: MD_QUOTE@54..100 + 0: R_ANGLE@54..55 ">" [] [] + 1: MD_BLOCK_LIST@55..100 + 0: MD_QUOTE@55..100 + 0: R_ANGLE@55..56 ">" [] [] + 1: MD_BLOCK_LIST@56..100 + 0: MD_QUOTE@56..100 + 0: R_ANGLE@56..57 ">" [] [] + 1: MD_BLOCK_LIST@57..100 + 0: MD_QUOTE@57..100 + 0: R_ANGLE@57..58 ">" [] [] + 1: MD_BLOCK_LIST@58..100 + 0: MD_QUOTE@58..100 + 0: R_ANGLE@58..59 ">" [] [] + 1: MD_BLOCK_LIST@59..100 + 0: MD_QUOTE@59..100 + 0: R_ANGLE@59..60 ">" [] [] + 1: MD_BLOCK_LIST@60..100 + 0: MD_QUOTE@60..100 + 0: R_ANGLE@60..61 ">" [] [] + 1: MD_BLOCK_LIST@61..100 + 0: MD_QUOTE@61..100 + 0: R_ANGLE@61..62 ">" [] [] + 1: MD_BLOCK_LIST@62..100 + 0: MD_QUOTE@62..100 + 0: R_ANGLE@62..63 ">" [] [] + 1: MD_BLOCK_LIST@63..100 + 0: MD_QUOTE@63..100 + 0: R_ANGLE@63..64 ">" [] [] + 1: MD_BLOCK_LIST@64..100 + 0: MD_QUOTE@64..100 + 0: R_ANGLE@64..65 ">" [] [] + 1: MD_BLOCK_LIST@65..100 + 0: MD_QUOTE@65..100 + 0: R_ANGLE@65..66 ">" [] [] + 1: MD_BLOCK_LIST@66..100 + 0: MD_QUOTE@66..100 + 0: R_ANGLE@66..67 ">" [] [] + 1: MD_BLOCK_LIST@67..100 + 0: MD_QUOTE@67..100 + 0: R_ANGLE@67..68 ">" [] [] + 1: MD_BLOCK_LIST@68..100 + 0: MD_QUOTE@68..100 + 0: R_ANGLE@68..69 ">" [] [] + 1: MD_BLOCK_LIST@69..100 + 0: MD_QUOTE@69..100 + 0: R_ANGLE@69..70 ">" [] [] + 1: MD_BLOCK_LIST@70..100 + 0: MD_QUOTE@70..100 + 0: R_ANGLE@70..71 ">" [] [] + 1: MD_BLOCK_LIST@71..100 + 0: MD_QUOTE@71..100 + 0: R_ANGLE@71..72 ">" [] [] + 1: MD_BLOCK_LIST@72..100 + 0: MD_QUOTE@72..100 + 0: R_ANGLE@72..73 ">" [] [] + 1: MD_BLOCK_LIST@73..100 + 0: MD_QUOTE@73..100 + 0: R_ANGLE@73..74 ">" [] [] + 1: MD_BLOCK_LIST@74..100 + 0: MD_QUOTE@74..100 + 0: R_ANGLE@74..75 ">" [] [] + 1: MD_BLOCK_LIST@75..100 + 0: MD_QUOTE@75..100 + 0: R_ANGLE@75..76 ">" [] [] + 1: MD_BLOCK_LIST@76..100 + 0: MD_QUOTE@76..100 + 0: R_ANGLE@76..77 ">" [] [] + 1: MD_BLOCK_LIST@77..100 + 0: MD_QUOTE@77..100 + 0: R_ANGLE@77..78 ">" [] [] + 1: MD_BLOCK_LIST@78..100 + 0: MD_QUOTE@78..100 + 0: R_ANGLE@78..79 ">" [] [] + 1: MD_BLOCK_LIST@79..100 + 0: MD_QUOTE@79..100 + 0: R_ANGLE@79..80 ">" [] [] + 1: MD_BLOCK_LIST@80..100 + 0: MD_QUOTE@80..100 + 0: R_ANGLE@80..81 ">" [] [] + 1: MD_BLOCK_LIST@81..100 + 0: MD_QUOTE@81..100 + 0: R_ANGLE@81..82 ">" [] [] + 1: MD_BLOCK_LIST@82..100 + 0: MD_QUOTE@82..100 + 0: R_ANGLE@82..83 ">" [] [] + 1: MD_BLOCK_LIST@83..100 + 0: MD_QUOTE@83..100 + 0: R_ANGLE@83..84 ">" [] [] + 1: MD_BLOCK_LIST@84..100 + 0: MD_QUOTE@84..100 + 0: R_ANGLE@84..85 ">" [] [] + 1: MD_BLOCK_LIST@85..100 + 0: MD_QUOTE@85..100 + 0: R_ANGLE@85..86 ">" [] [] + 1: MD_BLOCK_LIST@86..100 + 0: MD_QUOTE@86..100 + 0: R_ANGLE@86..87 ">" [] [] + 1: MD_BLOCK_LIST@87..100 + 0: MD_QUOTE@87..100 + 0: R_ANGLE@87..88 ">" [] [] + 1: MD_BLOCK_LIST@88..100 + 0: MD_QUOTE@88..100 + 0: R_ANGLE@88..89 ">" [] [] + 1: MD_BLOCK_LIST@89..100 + 0: MD_QUOTE@89..100 + 0: R_ANGLE@89..90 ">" [] [] + 1: MD_BLOCK_LIST@90..100 + 0: MD_QUOTE@90..100 + 0: R_ANGLE@90..91 ">" [] [] + 1: MD_BLOCK_LIST@91..100 + 0: MD_QUOTE@91..100 + 0: R_ANGLE@91..92 ">" [] [] + 1: MD_BLOCK_LIST@92..100 + 0: MD_QUOTE@92..100 + 0: R_ANGLE@92..93 ">" [] [] + 1: MD_BLOCK_LIST@93..100 + 0: MD_QUOTE@93..100 + 0: R_ANGLE@93..94 ">" [] [] + 1: MD_BLOCK_LIST@94..100 + 0: MD_QUOTE@94..100 + 0: R_ANGLE@94..95 ">" [] [] + 1: MD_BLOCK_LIST@95..100 + 0: MD_QUOTE@95..100 + 0: R_ANGLE@95..96 ">" [] [] + 1: MD_BLOCK_LIST@96..100 + 0: MD_QUOTE@96..100 + 0: R_ANGLE@96..97 ">" [] [] + 1: MD_BLOCK_LIST@97..100 + 0: MD_QUOTE@97..100 + 0: R_ANGLE@97..98 ">" [] [] + 1: MD_BLOCK_LIST@98..100 + 0: MD_QUOTE@98..100 + 0: R_ANGLE@98..99 ">" [] [] + 1: MD_BLOCK_LIST@99..100 + 0: MD_QUOTE@99..100 + 0: R_ANGLE@99..100 ">" [] [] + 1: MD_BLOCK_LIST@100..100 + 1: MD_PARAGRAPH@100..111 + 0: MD_INLINE_ITEM_LIST@100..111 + 0: MD_TEXTUAL@100..110 + 0: MD_TEXTUAL_LITERAL@100..110 "Too deep" [Skipped(">"), Skipped(" ")] [] + 1: MD_TEXTUAL@110..111 + 0: MD_TEXTUAL_LITERAL@110..111 "\n" [] [] + 1: (empty) + 2: EOF@111..111 "" [] [] + +``` + +## Diagnostics + +``` +quote_nesting_too_deep.md:1:101 parse ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + + × Block quote nesting exceeds maximum depth of 100. + + > 1 │ >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Too deep + │ ^ + 2 │ + + i nesting limit reached here + + > 1 │ >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> Too deep + │ ^ + 2 │ + + i Reduce nesting depth. Additional levels will be treated as content. + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/error/too_many_hashes.md b/crates/biome_markdown_parser/tests/md_test_suite/error/too_many_hashes.md new file mode 100644 index 000000000000..358e3305eec3 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/error/too_many_hashes.md @@ -0,0 +1 @@ +####### This has too many hashes diff --git a/crates/biome_markdown_parser/tests/md_test_suite/error/too_many_hashes.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/error/too_many_hashes.md.snap new file mode 100644 index 000000000000..513faad7c059 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/error/too_many_hashes.md.snap @@ -0,0 +1,106 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +####### This has too many hashes + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@0..1 "#" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1..2 "#" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@2..3 "#" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@3..4 "#" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@4..5 "#" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@5..6 "#" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@6..7 "#" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@7..32 " This has too many hashes" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@32..33 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + eof_token: EOF@33..33 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..33 + 0: (empty) + 1: MD_BLOCK_LIST@0..33 + 0: MD_PARAGRAPH@0..33 + 0: MD_INLINE_ITEM_LIST@0..33 + 0: MD_TEXTUAL@0..1 + 0: MD_TEXTUAL_LITERAL@0..1 "#" [] [] + 1: MD_TEXTUAL@1..2 + 0: MD_TEXTUAL_LITERAL@1..2 "#" [] [] + 2: MD_TEXTUAL@2..3 + 0: MD_TEXTUAL_LITERAL@2..3 "#" [] [] + 3: MD_TEXTUAL@3..4 + 0: MD_TEXTUAL_LITERAL@3..4 "#" [] [] + 4: MD_TEXTUAL@4..5 + 0: MD_TEXTUAL_LITERAL@4..5 "#" [] [] + 5: MD_TEXTUAL@5..6 + 0: MD_TEXTUAL_LITERAL@5..6 "#" [] [] + 6: MD_TEXTUAL@6..7 + 0: MD_TEXTUAL_LITERAL@6..7 "#" [] [] + 7: MD_TEXTUAL@7..32 + 0: MD_TEXTUAL_LITERAL@7..32 " This has too many hashes" [] [] + 8: MD_TEXTUAL@32..33 + 0: MD_TEXTUAL_LITERAL@32..33 "\n" [] [] + 1: (empty) + 2: EOF@33..33 "" [] [] + +``` + +## Diagnostics + +``` +too_many_hashes.md:1:1 parse ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + + × ATX heading has 7 hashes, but maximum is 6. + + > 1 │ ####### This has too many hashes + │ ^^^^^^^ + 2 │ + + i heading started here + + > 1 │ ####### This has too many hashes + │ ^^^^^^^ + 2 │ + + i Use 1-6 `#` characters for headings. This will be parsed as a paragraph. + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_code_span.md b/crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_code_span.md new file mode 100644 index 000000000000..ec06cc66f72d --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_code_span.md @@ -0,0 +1 @@ +This has `unclosed code diff --git a/crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_code_span.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_code_span.md.snap new file mode 100644 index 000000000000..f2d764c1ed96 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_code_span.md.snap @@ -0,0 +1,86 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +This has `unclosed code + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@0..9 "This has " [] [], + }, + MdInlineCode { + l_tick_token: BACKTICK@9..10 "`" [] [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@10..23 "unclosed code" [] [], + }, + ], + r_tick_token: missing (required), + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@23..24 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + eof_token: EOF@24..24 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..24 + 0: (empty) + 1: MD_BLOCK_LIST@0..24 + 0: MD_PARAGRAPH@0..24 + 0: MD_INLINE_ITEM_LIST@0..24 + 0: MD_TEXTUAL@0..9 + 0: MD_TEXTUAL_LITERAL@0..9 "This has " [] [] + 1: MD_INLINE_CODE@9..23 + 0: BACKTICK@9..10 "`" [] [] + 1: MD_INLINE_ITEM_LIST@10..23 + 0: MD_TEXTUAL@10..23 + 0: MD_TEXTUAL_LITERAL@10..23 "unclosed code" [] [] + 2: (empty) + 2: MD_TEXTUAL@23..24 + 0: MD_TEXTUAL_LITERAL@23..24 "\n" [] [] + 1: (empty) + 2: EOF@24..24 "" [] [] + +``` + +## Diagnostics + +``` +unclosed_code_span.md:1:10 parse ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + + × Unclosed code span, expected closing ```. + + > 1 │ This has `unclosed code + │ ^ + 2 │ + + i code span started here + + > 1 │ This has `unclosed code + │ ^ + 2 │ + + i Add closing ``` to close the code span. + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_image.md b/crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_image.md new file mode 100644 index 000000000000..eb686c5bb932 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_image.md @@ -0,0 +1 @@ +This has ![unclosed image diff --git a/crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_image.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_image.md.snap new file mode 100644 index 000000000000..44989423c775 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_image.md.snap @@ -0,0 +1,90 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +This has ![unclosed image + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@0..9 "This has " [] [], + }, + MdReferenceImage { + excl_token: BANG@9..10 "!" [] [], + l_brack_token: L_BRACK@10..11 "[" [] [], + alt: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@11..25 "unclosed image" [] [], + }, + ], + r_brack_token: missing (required), + label: missing (optional), + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@25..26 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + eof_token: EOF@26..26 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..26 + 0: (empty) + 1: MD_BLOCK_LIST@0..26 + 0: MD_PARAGRAPH@0..26 + 0: MD_INLINE_ITEM_LIST@0..26 + 0: MD_TEXTUAL@0..9 + 0: MD_TEXTUAL_LITERAL@0..9 "This has " [] [] + 1: MD_REFERENCE_IMAGE@9..25 + 0: BANG@9..10 "!" [] [] + 1: L_BRACK@10..11 "[" [] [] + 2: MD_INLINE_ITEM_LIST@11..25 + 0: MD_TEXTUAL@11..25 + 0: MD_TEXTUAL_LITERAL@11..25 "unclosed image" [] [] + 3: (empty) + 4: (empty) + 2: MD_TEXTUAL@25..26 + 0: MD_TEXTUAL_LITERAL@25..26 "\n" [] [] + 1: (empty) + 2: EOF@26..26 "" [] [] + +``` + +## Diagnostics + +``` +unclosed_image.md:1:10 parse ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + + × Unclosed image, expected `]` to close alt text. + + > 1 │ This has ![unclosed image + │ ^ + 2 │ + + i image started here + + > 1 │ This has ![unclosed image + │ ^ + 2 │ + + i Format: ![alt text](image-url) + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_link.md b/crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_link.md new file mode 100644 index 000000000000..91bfd8656e0d --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_link.md @@ -0,0 +1 @@ +This has [unclosed link diff --git a/crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_link.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_link.md.snap new file mode 100644 index 000000000000..747cc26d8ea7 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_link.md.snap @@ -0,0 +1,88 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +This has [unclosed link + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@0..9 "This has " [] [], + }, + MdReferenceLink { + l_brack_token: L_BRACK@9..10 "[" [] [], + text: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@10..23 "unclosed link" [] [], + }, + ], + r_brack_token: missing (required), + label: missing (optional), + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@23..24 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + eof_token: EOF@24..24 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..24 + 0: (empty) + 1: MD_BLOCK_LIST@0..24 + 0: MD_PARAGRAPH@0..24 + 0: MD_INLINE_ITEM_LIST@0..24 + 0: MD_TEXTUAL@0..9 + 0: MD_TEXTUAL_LITERAL@0..9 "This has " [] [] + 1: MD_REFERENCE_LINK@9..23 + 0: L_BRACK@9..10 "[" [] [] + 1: MD_INLINE_ITEM_LIST@10..23 + 0: MD_TEXTUAL@10..23 + 0: MD_TEXTUAL_LITERAL@10..23 "unclosed link" [] [] + 2: (empty) + 3: (empty) + 2: MD_TEXTUAL@23..24 + 0: MD_TEXTUAL_LITERAL@23..24 "\n" [] [] + 1: (empty) + 2: EOF@24..24 "" [] [] + +``` + +## Diagnostics + +``` +unclosed_link.md:1:10 parse ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + + × Unclosed link, expected `]` to close link text. + + > 1 │ This has [unclosed link + │ ^ + 2 │ + + i link started here + + > 1 │ This has [unclosed link + │ ^ + 2 │ + + i Format: [link text](url) + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_reference_image_label.md b/crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_reference_image_label.md new file mode 100644 index 000000000000..28913e5cc8c0 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_reference_image_label.md @@ -0,0 +1 @@ +This has ![alt][unclosed label diff --git a/crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_reference_image_label.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_reference_image_label.md.snap new file mode 100644 index 000000000000..16bbacb7e23b --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_reference_image_label.md.snap @@ -0,0 +1,109 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +assertion_line: 131 +expression: snapshot +--- +## Input + +``` +This has ![alt][unclosed label + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@0..9 "This has " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@9..10 "!" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@10..11 "[" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@11..14 "alt" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@14..15 "]" [] [], + }, + MdReferenceLink { + l_brack_token: L_BRACK@15..16 "[" [] [], + text: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@16..30 "unclosed label" [] [], + }, + ], + r_brack_token: missing (required), + label: missing (optional), + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@30..31 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + eof_token: EOF@31..31 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..31 + 0: (empty) + 1: MD_BLOCK_LIST@0..31 + 0: MD_PARAGRAPH@0..31 + 0: MD_INLINE_ITEM_LIST@0..31 + 0: MD_TEXTUAL@0..9 + 0: MD_TEXTUAL_LITERAL@0..9 "This has " [] [] + 1: MD_TEXTUAL@9..10 + 0: MD_TEXTUAL_LITERAL@9..10 "!" [] [] + 2: MD_TEXTUAL@10..11 + 0: MD_TEXTUAL_LITERAL@10..11 "[" [] [] + 3: MD_TEXTUAL@11..14 + 0: MD_TEXTUAL_LITERAL@11..14 "alt" [] [] + 4: MD_TEXTUAL@14..15 + 0: MD_TEXTUAL_LITERAL@14..15 "]" [] [] + 5: MD_REFERENCE_LINK@15..30 + 0: L_BRACK@15..16 "[" [] [] + 1: MD_INLINE_ITEM_LIST@16..30 + 0: MD_TEXTUAL@16..30 + 0: MD_TEXTUAL_LITERAL@16..30 "unclosed label" [] [] + 2: (empty) + 3: (empty) + 6: MD_TEXTUAL@30..31 + 0: MD_TEXTUAL_LITERAL@30..31 "\n" [] [] + 1: (empty) + 2: EOF@31..31 "" [] [] + +``` + +## Diagnostics + +``` +unclosed_reference_image_label.md:1:16 parse ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + + × Unclosed link, expected `]` to close link text. + + > 1 │ This has ![alt][unclosed label + │ ^ + 2 │ + + i link started here + + > 1 │ This has ![alt][unclosed label + │ ^ + 2 │ + + i Format: [link text](url) + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_reference_link_label.md b/crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_reference_link_label.md new file mode 100644 index 000000000000..3aaa9c5bf1fd --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_reference_link_label.md @@ -0,0 +1 @@ +This has [text][unclosed label diff --git a/crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_reference_link_label.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_reference_link_label.md.snap new file mode 100644 index 000000000000..8beb02c90bfd --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/error/unclosed_reference_link_label.md.snap @@ -0,0 +1,104 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +assertion_line: 131 +expression: snapshot +--- +## Input + +``` +This has [text][unclosed label + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@0..9 "This has " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@9..10 "[" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@10..14 "text" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@14..15 "]" [] [], + }, + MdReferenceLink { + l_brack_token: L_BRACK@15..16 "[" [] [], + text: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@16..30 "unclosed label" [] [], + }, + ], + r_brack_token: missing (required), + label: missing (optional), + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@30..31 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + eof_token: EOF@31..31 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..31 + 0: (empty) + 1: MD_BLOCK_LIST@0..31 + 0: MD_PARAGRAPH@0..31 + 0: MD_INLINE_ITEM_LIST@0..31 + 0: MD_TEXTUAL@0..9 + 0: MD_TEXTUAL_LITERAL@0..9 "This has " [] [] + 1: MD_TEXTUAL@9..10 + 0: MD_TEXTUAL_LITERAL@9..10 "[" [] [] + 2: MD_TEXTUAL@10..14 + 0: MD_TEXTUAL_LITERAL@10..14 "text" [] [] + 3: MD_TEXTUAL@14..15 + 0: MD_TEXTUAL_LITERAL@14..15 "]" [] [] + 4: MD_REFERENCE_LINK@15..30 + 0: L_BRACK@15..16 "[" [] [] + 1: MD_INLINE_ITEM_LIST@16..30 + 0: MD_TEXTUAL@16..30 + 0: MD_TEXTUAL_LITERAL@16..30 "unclosed label" [] [] + 2: (empty) + 3: (empty) + 5: MD_TEXTUAL@30..31 + 0: MD_TEXTUAL_LITERAL@30..31 "\n" [] [] + 1: (empty) + 2: EOF@31..31 "" [] [] + +``` + +## Diagnostics + +``` +unclosed_reference_link_label.md:1:16 parse ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + + × Unclosed link, expected `]` to close link text. + + > 1 │ This has [text][unclosed label + │ ^ + 2 │ + + i link started here + + > 1 │ This has [text][unclosed label + │ ^ + 2 │ + + i Format: [link text](url) + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/error/unterminated_code_fence.md b/crates/biome_markdown_parser/tests/md_test_suite/error/unterminated_code_fence.md new file mode 100644 index 000000000000..b8a7be054e98 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/error/unterminated_code_fence.md @@ -0,0 +1,4 @@ +```rust +fn main() { + println!("Hello"); +} diff --git a/crates/biome_markdown_parser/tests/md_test_suite/error/unterminated_code_fence.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/error/unterminated_code_fence.md.snap new file mode 100644 index 000000000000..30c1cce52de8 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/error/unterminated_code_fence.md.snap @@ -0,0 +1,172 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +assertion_line: 131 +expression: snapshot +--- +## Input + +``` +```rust +fn main() { + println!("Hello"); +} + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdFencedCodeBlock { + l_fence: TRIPLE_BACKTICK@0..3 "```" [] [], + code_list: MdCodeNameList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@3..7 "rust" [] [], + }, + ], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@7..8 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@8..15 "fn main" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@15..16 "(" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@16..17 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@17..19 " {" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@19..20 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@20..21 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@21..22 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@22..23 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@23..24 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@24..31 "println" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@31..32 "!" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@32..33 "(" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@33..40 "\"Hello\"" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@40..41 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@41..42 ";" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@42..43 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@43..44 "}" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@44..45 "\n" [] [], + }, + ], + r_fence: missing (required), + }, + ], + eof_token: EOF@45..45 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..45 + 0: (empty) + 1: MD_BLOCK_LIST@0..45 + 0: MD_FENCED_CODE_BLOCK@0..45 + 0: TRIPLE_BACKTICK@0..3 "```" [] [] + 1: MD_CODE_NAME_LIST@3..7 + 0: MD_TEXTUAL@3..7 + 0: MD_TEXTUAL_LITERAL@3..7 "rust" [] [] + 2: MD_INLINE_ITEM_LIST@7..45 + 0: MD_TEXTUAL@7..8 + 0: MD_TEXTUAL_LITERAL@7..8 "\n" [] [] + 1: MD_TEXTUAL@8..15 + 0: MD_TEXTUAL_LITERAL@8..15 "fn main" [] [] + 2: MD_TEXTUAL@15..16 + 0: MD_TEXTUAL_LITERAL@15..16 "(" [] [] + 3: MD_TEXTUAL@16..17 + 0: MD_TEXTUAL_LITERAL@16..17 ")" [] [] + 4: MD_TEXTUAL@17..19 + 0: MD_TEXTUAL_LITERAL@17..19 " {" [] [] + 5: MD_TEXTUAL@19..20 + 0: MD_TEXTUAL_LITERAL@19..20 "\n" [] [] + 6: MD_TEXTUAL@20..21 + 0: MD_TEXTUAL_LITERAL@20..21 " " [] [] + 7: MD_TEXTUAL@21..22 + 0: MD_TEXTUAL_LITERAL@21..22 " " [] [] + 8: MD_TEXTUAL@22..23 + 0: MD_TEXTUAL_LITERAL@22..23 " " [] [] + 9: MD_TEXTUAL@23..24 + 0: MD_TEXTUAL_LITERAL@23..24 " " [] [] + 10: MD_TEXTUAL@24..31 + 0: MD_TEXTUAL_LITERAL@24..31 "println" [] [] + 11: MD_TEXTUAL@31..32 + 0: MD_TEXTUAL_LITERAL@31..32 "!" [] [] + 12: MD_TEXTUAL@32..33 + 0: MD_TEXTUAL_LITERAL@32..33 "(" [] [] + 13: MD_TEXTUAL@33..40 + 0: MD_TEXTUAL_LITERAL@33..40 "\"Hello\"" [] [] + 14: MD_TEXTUAL@40..41 + 0: MD_TEXTUAL_LITERAL@40..41 ")" [] [] + 15: MD_TEXTUAL@41..42 + 0: MD_TEXTUAL_LITERAL@41..42 ";" [] [] + 16: MD_TEXTUAL@42..43 + 0: MD_TEXTUAL_LITERAL@42..43 "\n" [] [] + 17: MD_TEXTUAL@43..44 + 0: MD_TEXTUAL_LITERAL@43..44 "}" [] [] + 18: MD_TEXTUAL@44..45 + 0: MD_TEXTUAL_LITERAL@44..45 "\n" [] [] + 3: (empty) + 2: EOF@45..45 "" [] [] + +``` + +## Diagnostics + +``` +unterminated_code_fence.md:1:1 parse ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + + × Unterminated fenced code block, expected closing triple backticks (```). + + > 1 │ ```rust + │ ^^^ + 2 │ fn main() { + 3 │ println!("Hello"); + + i code block started here + + > 1 │ ```rust + │ ^^^ + 2 │ fn main() { + 3 │ println!("Hello"); + + i Add closing triple backticks (```) at the start of a new line. + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/atx_heading_trailing_hash.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/atx_heading_trailing_hash.md new file mode 100644 index 000000000000..b9cbe781d34a --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/atx_heading_trailing_hash.md @@ -0,0 +1,4 @@ +# foo# +# foo # +# foo # +#foo diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/atx_heading_trailing_hash.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/atx_heading_trailing_hash.md.snap new file mode 100644 index 000000000000..f73831653e03 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/atx_heading_trailing_hash.md.snap @@ -0,0 +1,169 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +assertion_line: 131 +expression: snapshot +--- +## Input + +``` +# foo# +# foo # +# foo # +#foo + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@0..1 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1..5 " foo" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@5..6 "#" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [], + }, + MdNewline { + value_token: NEWLINE@6..7 "\n" [] [], + }, + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@7..8 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@8..12 " foo" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [ + MdHash { + hash_token: HASH@12..14 "#" [Skipped(" ")] [], + }, + ], + }, + MdNewline { + value_token: NEWLINE@14..15 "\n" [] [], + }, + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@15..16 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@16..22 " foo" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [ + MdHash { + hash_token: HASH@22..26 "#" [Skipped(" "), Skipped(" "), Skipped(" ")] [], + }, + ], + }, + MdNewline { + value_token: NEWLINE@26..27 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@27..28 "#" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@28..31 "foo" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@31..32 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + eof_token: EOF@32..32 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..32 + 0: (empty) + 1: MD_BLOCK_LIST@0..32 + 0: MD_HEADER@0..6 + 0: MD_HASH_LIST@0..1 + 0: MD_HASH@0..1 + 0: HASH@0..1 "#" [] [] + 1: MD_PARAGRAPH@1..6 + 0: MD_INLINE_ITEM_LIST@1..6 + 0: MD_TEXTUAL@1..5 + 0: MD_TEXTUAL_LITERAL@1..5 " foo" [] [] + 1: MD_TEXTUAL@5..6 + 0: MD_TEXTUAL_LITERAL@5..6 "#" [] [] + 1: (empty) + 2: MD_HASH_LIST@6..6 + 1: MD_NEWLINE@6..7 + 0: NEWLINE@6..7 "\n" [] [] + 2: MD_HEADER@7..14 + 0: MD_HASH_LIST@7..8 + 0: MD_HASH@7..8 + 0: HASH@7..8 "#" [] [] + 1: MD_PARAGRAPH@8..12 + 0: MD_INLINE_ITEM_LIST@8..12 + 0: MD_TEXTUAL@8..12 + 0: MD_TEXTUAL_LITERAL@8..12 " foo" [] [] + 1: (empty) + 2: MD_HASH_LIST@12..14 + 0: MD_HASH@12..14 + 0: HASH@12..14 "#" [Skipped(" ")] [] + 3: MD_NEWLINE@14..15 + 0: NEWLINE@14..15 "\n" [] [] + 4: MD_HEADER@15..26 + 0: MD_HASH_LIST@15..16 + 0: MD_HASH@15..16 + 0: HASH@15..16 "#" [] [] + 1: MD_PARAGRAPH@16..22 + 0: MD_INLINE_ITEM_LIST@16..22 + 0: MD_TEXTUAL@16..22 + 0: MD_TEXTUAL_LITERAL@16..22 " foo" [] [] + 1: (empty) + 2: MD_HASH_LIST@22..26 + 0: MD_HASH@22..26 + 0: HASH@22..26 "#" [Skipped(" "), Skipped(" "), Skipped(" ")] [] + 5: MD_NEWLINE@26..27 + 0: NEWLINE@26..27 "\n" [] [] + 6: MD_PARAGRAPH@27..32 + 0: MD_INLINE_ITEM_LIST@27..32 + 0: MD_TEXTUAL@27..28 + 0: MD_TEXTUAL_LITERAL@27..28 "#" [] [] + 1: MD_TEXTUAL@28..31 + 0: MD_TEXTUAL_LITERAL@28..31 "foo" [] [] + 2: MD_TEXTUAL@31..32 + 0: MD_TEXTUAL_LITERAL@31..32 "\n" [] [] + 1: (empty) + 2: EOF@32..32 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/autolinks.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/autolinks.md new file mode 100644 index 000000000000..e786ac124363 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/autolinks.md @@ -0,0 +1,7 @@ +URL autolink: + +Email autolink: + +Not an autolink (no scheme): + +Mixed text: Visit for more. diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/autolinks.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/autolinks.md.snap new file mode 100644 index 000000000000..3a1b4415d301 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/autolinks.md.snap @@ -0,0 +1,225 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +URL autolink: + +Email autolink: + +Not an autolink (no scheme): + +Mixed text: Visit for more. + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@0..14 "URL autolink: " [] [], + }, + MdAutolink { + l_angle_token: L_ANGLE@14..15 "<" [] [], + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@15..34 "https://example.com" [] [], + }, + ], + r_angle_token: R_ANGLE@34..35 ">" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@35..36 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@36..37 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@37..53 "Email autolink: " [] [], + }, + MdAutolink { + l_angle_token: L_ANGLE@53..54 "<" [] [], + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@54..70 "user@example.com" [] [], + }, + ], + r_angle_token: R_ANGLE@70..71 ">" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@71..72 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@72..73 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@73..89 "Not an autolink " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@89..90 "(" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@90..99 "no scheme" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@99..100 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@100..101 ":" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@101..102 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@102..103 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@103..114 "example.com" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@114..115 ">" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@115..116 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@116..117 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@117..135 "Mixed text: Visit " [] [], + }, + MdAutolink { + l_angle_token: L_ANGLE@135..136 "<" [] [], + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@136..147 "http://rust" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@147..148 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@148..156 "lang.org" [] [], + }, + ], + r_angle_token: R_ANGLE@156..157 ">" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@157..167 " for more." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@167..168 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + eof_token: EOF@168..168 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..168 + 0: (empty) + 1: MD_BLOCK_LIST@0..168 + 0: MD_PARAGRAPH@0..36 + 0: MD_INLINE_ITEM_LIST@0..36 + 0: MD_TEXTUAL@0..14 + 0: MD_TEXTUAL_LITERAL@0..14 "URL autolink: " [] [] + 1: MD_AUTOLINK@14..35 + 0: L_ANGLE@14..15 "<" [] [] + 1: MD_INLINE_ITEM_LIST@15..34 + 0: MD_TEXTUAL@15..34 + 0: MD_TEXTUAL_LITERAL@15..34 "https://example.com" [] [] + 2: R_ANGLE@34..35 ">" [] [] + 2: MD_TEXTUAL@35..36 + 0: MD_TEXTUAL_LITERAL@35..36 "\n" [] [] + 1: (empty) + 1: MD_NEWLINE@36..37 + 0: NEWLINE@36..37 "\n" [] [] + 2: MD_PARAGRAPH@37..72 + 0: MD_INLINE_ITEM_LIST@37..72 + 0: MD_TEXTUAL@37..53 + 0: MD_TEXTUAL_LITERAL@37..53 "Email autolink: " [] [] + 1: MD_AUTOLINK@53..71 + 0: L_ANGLE@53..54 "<" [] [] + 1: MD_INLINE_ITEM_LIST@54..70 + 0: MD_TEXTUAL@54..70 + 0: MD_TEXTUAL_LITERAL@54..70 "user@example.com" [] [] + 2: R_ANGLE@70..71 ">" [] [] + 2: MD_TEXTUAL@71..72 + 0: MD_TEXTUAL_LITERAL@71..72 "\n" [] [] + 1: (empty) + 3: MD_NEWLINE@72..73 + 0: NEWLINE@72..73 "\n" [] [] + 4: MD_PARAGRAPH@73..116 + 0: MD_INLINE_ITEM_LIST@73..116 + 0: MD_TEXTUAL@73..89 + 0: MD_TEXTUAL_LITERAL@73..89 "Not an autolink " [] [] + 1: MD_TEXTUAL@89..90 + 0: MD_TEXTUAL_LITERAL@89..90 "(" [] [] + 2: MD_TEXTUAL@90..99 + 0: MD_TEXTUAL_LITERAL@90..99 "no scheme" [] [] + 3: MD_TEXTUAL@99..100 + 0: MD_TEXTUAL_LITERAL@99..100 ")" [] [] + 4: MD_TEXTUAL@100..101 + 0: MD_TEXTUAL_LITERAL@100..101 ":" [] [] + 5: MD_TEXTUAL@101..102 + 0: MD_TEXTUAL_LITERAL@101..102 " " [] [] + 6: MD_TEXTUAL@102..103 + 0: MD_TEXTUAL_LITERAL@102..103 "<" [] [] + 7: MD_TEXTUAL@103..114 + 0: MD_TEXTUAL_LITERAL@103..114 "example.com" [] [] + 8: MD_TEXTUAL@114..115 + 0: MD_TEXTUAL_LITERAL@114..115 ">" [] [] + 9: MD_TEXTUAL@115..116 + 0: MD_TEXTUAL_LITERAL@115..116 "\n" [] [] + 1: (empty) + 5: MD_NEWLINE@116..117 + 0: NEWLINE@116..117 "\n" [] [] + 6: MD_PARAGRAPH@117..168 + 0: MD_INLINE_ITEM_LIST@117..168 + 0: MD_TEXTUAL@117..135 + 0: MD_TEXTUAL_LITERAL@117..135 "Mixed text: Visit " [] [] + 1: MD_AUTOLINK@135..157 + 0: L_ANGLE@135..136 "<" [] [] + 1: MD_INLINE_ITEM_LIST@136..156 + 0: MD_TEXTUAL@136..147 + 0: MD_TEXTUAL_LITERAL@136..147 "http://rust" [] [] + 1: MD_TEXTUAL@147..148 + 0: MD_TEXTUAL_LITERAL@147..148 "-" [] [] + 2: MD_TEXTUAL@148..156 + 0: MD_TEXTUAL_LITERAL@148..156 "lang.org" [] [] + 2: R_ANGLE@156..157 ">" [] [] + 2: MD_TEXTUAL@157..167 + 0: MD_TEXTUAL_LITERAL@157..167 " for more." [] [] + 3: MD_TEXTUAL@167..168 + 0: MD_TEXTUAL_LITERAL@167..168 "\n" [] [] + 1: (empty) + 2: EOF@168..168 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/block_quote.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/block_quote.md new file mode 100644 index 000000000000..041984494079 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/block_quote.md @@ -0,0 +1,4 @@ +> This is a quote +> It continues here + +> Another quote block diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/block_quote.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/block_quote.md.snap new file mode 100644 index 000000000000..7082f0f9d6e2 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/block_quote.md.snap @@ -0,0 +1,102 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +> This is a quote +> It continues here + +> Another quote block + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdQuote { + marker_token: R_ANGLE@0..1 ">" [] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1..17 "This is a quote" [Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@17..18 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@18..37 "It continues here" [Skipped(">"), Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@37..38 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + }, + MdNewline { + value_token: NEWLINE@38..39 "\n" [] [], + }, + MdQuote { + marker_token: R_ANGLE@39..40 ">" [] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@40..60 "Another quote block" [Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@60..61 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + }, + ], + eof_token: EOF@61..61 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..61 + 0: (empty) + 1: MD_BLOCK_LIST@0..61 + 0: MD_QUOTE@0..38 + 0: R_ANGLE@0..1 ">" [] [] + 1: MD_BLOCK_LIST@1..38 + 0: MD_PARAGRAPH@1..38 + 0: MD_INLINE_ITEM_LIST@1..38 + 0: MD_TEXTUAL@1..17 + 0: MD_TEXTUAL_LITERAL@1..17 "This is a quote" [Skipped(" ")] [] + 1: MD_TEXTUAL@17..18 + 0: MD_TEXTUAL_LITERAL@17..18 "\n" [] [] + 2: MD_TEXTUAL@18..37 + 0: MD_TEXTUAL_LITERAL@18..37 "It continues here" [Skipped(">"), Skipped(" ")] [] + 3: MD_TEXTUAL@37..38 + 0: MD_TEXTUAL_LITERAL@37..38 "\n" [] [] + 1: (empty) + 1: MD_NEWLINE@38..39 + 0: NEWLINE@38..39 "\n" [] [] + 2: MD_QUOTE@39..61 + 0: R_ANGLE@39..40 ">" [] [] + 1: MD_BLOCK_LIST@40..61 + 0: MD_PARAGRAPH@40..61 + 0: MD_INLINE_ITEM_LIST@40..61 + 0: MD_TEXTUAL@40..60 + 0: MD_TEXTUAL_LITERAL@40..60 "Another quote block" [Skipped(" ")] [] + 1: MD_TEXTUAL@60..61 + 0: MD_TEXTUAL_LITERAL@60..61 "\n" [] [] + 1: (empty) + 2: EOF@61..61 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/block_quote_grouping.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/block_quote_grouping.md new file mode 100644 index 000000000000..b6c5ded632cf --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/block_quote_grouping.md @@ -0,0 +1,7 @@ +> a +> b + +> a +> b + +> > nested diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/block_quote_grouping.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/block_quote_grouping.md.snap new file mode 100644 index 000000000000..2fef5fa35676 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/block_quote_grouping.md.snap @@ -0,0 +1,154 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +> a +> b + +> a +> b + +> > nested + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdQuote { + marker_token: R_ANGLE@0..1 ">" [] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1..3 "a" [Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@3..4 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@4..7 "b" [Skipped(">"), Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@7..8 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + }, + MdNewline { + value_token: NEWLINE@8..9 "\n" [] [], + }, + MdQuote { + marker_token: R_ANGLE@9..10 ">" [] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@10..13 " a" [Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@13..14 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@14..17 "b" [Skipped(">"), Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@17..18 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + }, + MdNewline { + value_token: NEWLINE@18..19 "\n" [] [], + }, + MdQuote { + marker_token: R_ANGLE@19..20 ">" [] [], + content: MdBlockList [ + MdQuote { + marker_token: R_ANGLE@20..22 ">" [Skipped(" ")] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@22..29 "nested" [Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@29..30 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + }, + ], + }, + ], + eof_token: EOF@30..30 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..30 + 0: (empty) + 1: MD_BLOCK_LIST@0..30 + 0: MD_QUOTE@0..8 + 0: R_ANGLE@0..1 ">" [] [] + 1: MD_BLOCK_LIST@1..8 + 0: MD_PARAGRAPH@1..8 + 0: MD_INLINE_ITEM_LIST@1..8 + 0: MD_TEXTUAL@1..3 + 0: MD_TEXTUAL_LITERAL@1..3 "a" [Skipped(" ")] [] + 1: MD_TEXTUAL@3..4 + 0: MD_TEXTUAL_LITERAL@3..4 "\n" [] [] + 2: MD_TEXTUAL@4..7 + 0: MD_TEXTUAL_LITERAL@4..7 "b" [Skipped(">"), Skipped(" ")] [] + 3: MD_TEXTUAL@7..8 + 0: MD_TEXTUAL_LITERAL@7..8 "\n" [] [] + 1: (empty) + 1: MD_NEWLINE@8..9 + 0: NEWLINE@8..9 "\n" [] [] + 2: MD_QUOTE@9..18 + 0: R_ANGLE@9..10 ">" [] [] + 1: MD_BLOCK_LIST@10..18 + 0: MD_PARAGRAPH@10..18 + 0: MD_INLINE_ITEM_LIST@10..18 + 0: MD_TEXTUAL@10..13 + 0: MD_TEXTUAL_LITERAL@10..13 " a" [Skipped(" ")] [] + 1: MD_TEXTUAL@13..14 + 0: MD_TEXTUAL_LITERAL@13..14 "\n" [] [] + 2: MD_TEXTUAL@14..17 + 0: MD_TEXTUAL_LITERAL@14..17 "b" [Skipped(">"), Skipped(" ")] [] + 3: MD_TEXTUAL@17..18 + 0: MD_TEXTUAL_LITERAL@17..18 "\n" [] [] + 1: (empty) + 3: MD_NEWLINE@18..19 + 0: NEWLINE@18..19 "\n" [] [] + 4: MD_QUOTE@19..30 + 0: R_ANGLE@19..20 ">" [] [] + 1: MD_BLOCK_LIST@20..30 + 0: MD_QUOTE@20..30 + 0: R_ANGLE@20..22 ">" [Skipped(" ")] [] + 1: MD_BLOCK_LIST@22..30 + 0: MD_PARAGRAPH@22..30 + 0: MD_INLINE_ITEM_LIST@22..30 + 0: MD_TEXTUAL@22..29 + 0: MD_TEXTUAL_LITERAL@22..29 "nested" [Skipped(" ")] [] + 1: MD_TEXTUAL@29..30 + 0: MD_TEXTUAL_LITERAL@29..30 "\n" [] [] + 1: (empty) + 2: EOF@30..30 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/bullet_list.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/bullet_list.md new file mode 100644 index 000000000000..429b37d1d126 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/bullet_list.md @@ -0,0 +1,9 @@ +- Item one +- Item two +- Item three + +* Alternative marker +* Another item + ++ Plus marker ++ Another plus item diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/bullet_list.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/bullet_list.md.snap new file mode 100644 index 000000000000..ad8b4d5076aa --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/bullet_list.md.snap @@ -0,0 +1,238 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +- Item one +- Item two +- Item three + +* Alternative marker +* Another item + ++ Plus marker ++ Another plus item + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdBulletListItem { + md_bullet_list: MdBulletList [ + MdBullet { + bullet: MINUS@0..1 "-" [] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1..10 " Item one" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@10..11 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + }, + MdBullet { + bullet: MINUS@11..12 "-" [] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@12..21 " Item two" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@21..22 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + }, + MdBullet { + bullet: MINUS@22..23 "-" [] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@23..34 " Item three" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@34..35 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@35..36 "\n" [] [], + }, + ], + }, + MdBullet { + bullet: STAR@36..37 "*" [] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@37..56 " Alternative marker" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@56..57 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + }, + MdBullet { + bullet: STAR@57..58 "*" [] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@58..71 " Another item" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@71..72 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@72..73 "\n" [] [], + }, + ], + }, + MdBullet { + bullet: PLUS@73..74 "+" [] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@74..86 " Plus marker" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@86..87 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + }, + MdBullet { + bullet: PLUS@87..88 "+" [] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@88..106 " Another plus item" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@106..107 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + }, + ], + }, + ], + eof_token: EOF@107..107 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..107 + 0: (empty) + 1: MD_BLOCK_LIST@0..107 + 0: MD_BULLET_LIST_ITEM@0..107 + 0: MD_BULLET_LIST@0..107 + 0: MD_BULLET@0..11 + 0: MINUS@0..1 "-" [] [] + 1: MD_BLOCK_LIST@1..11 + 0: MD_PARAGRAPH@1..11 + 0: MD_INLINE_ITEM_LIST@1..11 + 0: MD_TEXTUAL@1..10 + 0: MD_TEXTUAL_LITERAL@1..10 " Item one" [] [] + 1: MD_TEXTUAL@10..11 + 0: MD_TEXTUAL_LITERAL@10..11 "\n" [] [] + 1: (empty) + 1: MD_BULLET@11..22 + 0: MINUS@11..12 "-" [] [] + 1: MD_BLOCK_LIST@12..22 + 0: MD_PARAGRAPH@12..22 + 0: MD_INLINE_ITEM_LIST@12..22 + 0: MD_TEXTUAL@12..21 + 0: MD_TEXTUAL_LITERAL@12..21 " Item two" [] [] + 1: MD_TEXTUAL@21..22 + 0: MD_TEXTUAL_LITERAL@21..22 "\n" [] [] + 1: (empty) + 2: MD_BULLET@22..36 + 0: MINUS@22..23 "-" [] [] + 1: MD_BLOCK_LIST@23..36 + 0: MD_PARAGRAPH@23..35 + 0: MD_INLINE_ITEM_LIST@23..35 + 0: MD_TEXTUAL@23..34 + 0: MD_TEXTUAL_LITERAL@23..34 " Item three" [] [] + 1: MD_TEXTUAL@34..35 + 0: MD_TEXTUAL_LITERAL@34..35 "\n" [] [] + 1: (empty) + 1: MD_NEWLINE@35..36 + 0: NEWLINE@35..36 "\n" [] [] + 3: MD_BULLET@36..57 + 0: STAR@36..37 "*" [] [] + 1: MD_BLOCK_LIST@37..57 + 0: MD_PARAGRAPH@37..57 + 0: MD_INLINE_ITEM_LIST@37..57 + 0: MD_TEXTUAL@37..56 + 0: MD_TEXTUAL_LITERAL@37..56 " Alternative marker" [] [] + 1: MD_TEXTUAL@56..57 + 0: MD_TEXTUAL_LITERAL@56..57 "\n" [] [] + 1: (empty) + 4: MD_BULLET@57..73 + 0: STAR@57..58 "*" [] [] + 1: MD_BLOCK_LIST@58..73 + 0: MD_PARAGRAPH@58..72 + 0: MD_INLINE_ITEM_LIST@58..72 + 0: MD_TEXTUAL@58..71 + 0: MD_TEXTUAL_LITERAL@58..71 " Another item" [] [] + 1: MD_TEXTUAL@71..72 + 0: MD_TEXTUAL_LITERAL@71..72 "\n" [] [] + 1: (empty) + 1: MD_NEWLINE@72..73 + 0: NEWLINE@72..73 "\n" [] [] + 5: MD_BULLET@73..87 + 0: PLUS@73..74 "+" [] [] + 1: MD_BLOCK_LIST@74..87 + 0: MD_PARAGRAPH@74..87 + 0: MD_INLINE_ITEM_LIST@74..87 + 0: MD_TEXTUAL@74..86 + 0: MD_TEXTUAL_LITERAL@74..86 " Plus marker" [] [] + 1: MD_TEXTUAL@86..87 + 0: MD_TEXTUAL_LITERAL@86..87 "\n" [] [] + 1: (empty) + 6: MD_BULLET@87..107 + 0: PLUS@87..88 "+" [] [] + 1: MD_BLOCK_LIST@88..107 + 0: MD_PARAGRAPH@88..107 + 0: MD_INLINE_ITEM_LIST@88..107 + 0: MD_TEXTUAL@88..106 + 0: MD_TEXTUAL_LITERAL@88..106 " Another plus item" [] [] + 1: MD_TEXTUAL@106..107 + 0: MD_TEXTUAL_LITERAL@106..107 "\n" [] [] + 1: (empty) + 2: EOF@107..107 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/edge_cases.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/edge_cases.md new file mode 100644 index 000000000000..70e6f15306a0 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/edge_cases.md @@ -0,0 +1,30 @@ +#NoSpaceHeading + +##AlsoNoSpace + +foo --- bar is not a thematic break + +inline ~~~ tilde is not a fence + + *code with* inline markers + + more code **here** too + +# Tab after hash is valid heading + +## Multiple hashes with tab + +Paragraph here + # not heading due to 4+ spaces + - not list due to 4+ spaces + > not quote due to 4+ spaces +continues as paragraph + +Another para + # this IS a heading (only 3 spaces) + +Para with indented fence + ``` + not a code fence + ``` +still the same paragraph diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/edge_cases.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/edge_cases.md.snap new file mode 100644 index 000000000000..216d30698c94 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/edge_cases.md.snap @@ -0,0 +1,702 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +assertion_line: 131 +expression: snapshot +--- +## Input + +``` +#NoSpaceHeading + +##AlsoNoSpace + +foo --- bar is not a thematic break + +inline ~~~ tilde is not a fence + + *code with* inline markers + + more code **here** too + +# Tab after hash is valid heading + +## Multiple hashes with tab + +Paragraph here + # not heading due to 4+ spaces + - not list due to 4+ spaces + > not quote due to 4+ spaces +continues as paragraph + +Another para + # this IS a heading (only 3 spaces) + +Para with indented fence + ``` + not a code fence + ``` +still the same paragraph + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@0..1 "#" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1..15 "NoSpaceHeading" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@15..16 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@16..17 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@17..18 "#" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@18..19 "#" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@19..30 "AlsoNoSpace" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@30..31 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@31..32 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@32..36 "foo " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@36..37 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@37..38 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@38..39 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@39..67 " bar is not a thematic break" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@67..68 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@68..69 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@69..76 "inline " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@76..79 "~~~" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@79..100 " tilde is not a fence" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@100..101 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@101..102 "\n" [] [], + }, + MdIndentCodeBlock { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@102..107 "*" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@107..116 "code with" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@116..117 "*" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@117..132 " inline markers" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@132..133 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@133..134 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@134..148 "more code " [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@148..150 "**" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@150..154 "here" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@154..156 "**" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@156..160 " too" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@160..161 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@161..162 "\n" [] [], + }, + ], + }, + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@162..163 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@163..195 "\tTab after hash is valid heading" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [], + }, + MdNewline { + value_token: NEWLINE@195..196 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@196..197 "\n" [] [], + }, + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@197..198 "#" [] [], + }, + MdHash { + hash_token: HASH@198..199 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@199..224 "\tMultiple hashes with tab" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [], + }, + MdNewline { + value_token: NEWLINE@224..225 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@225..226 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@226..240 "Paragraph here" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@240..241 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@241..242 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@242..243 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@243..244 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@244..245 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@245..246 "#" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@246..267 " not heading due to 4" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@267..268 "+" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@268..275 " spaces" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@275..276 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@276..277 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@277..278 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@278..279 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@279..280 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@280..281 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@281..299 " not list due to 4" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@299..300 "+" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@300..307 " spaces" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@307..308 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@308..309 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@309..310 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@310..311 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@311..312 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@312..313 ">" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@313..332 " not quote due to 4" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@332..333 "+" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@333..340 " spaces" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@340..341 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@341..363 "continues as paragraph" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@363..364 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@364..365 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@365..377 "Another para" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@377..378 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@378..382 "#" [Skipped(" "), Skipped(" "), Skipped(" ")] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@382..401 " this IS a heading " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@401..402 "(" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@402..415 "only 3 spaces" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@415..416 ")" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [], + }, + MdNewline { + value_token: NEWLINE@416..417 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@417..418 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@418..442 "Para with indented fence" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@442..443 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@443..444 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@444..445 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@445..446 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@446..447 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@447..450 "```" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@450..451 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@451..452 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@452..453 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@453..454 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@454..455 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@455..471 "not a code fence" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@471..472 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@472..473 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@473..474 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@474..475 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@475..476 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@476..479 "```" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@479..480 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@480..504 "still the same paragraph" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@504..505 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + eof_token: EOF@505..505 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..505 + 0: (empty) + 1: MD_BLOCK_LIST@0..505 + 0: MD_PARAGRAPH@0..16 + 0: MD_INLINE_ITEM_LIST@0..16 + 0: MD_TEXTUAL@0..1 + 0: MD_TEXTUAL_LITERAL@0..1 "#" [] [] + 1: MD_TEXTUAL@1..15 + 0: MD_TEXTUAL_LITERAL@1..15 "NoSpaceHeading" [] [] + 2: MD_TEXTUAL@15..16 + 0: MD_TEXTUAL_LITERAL@15..16 "\n" [] [] + 1: (empty) + 1: MD_NEWLINE@16..17 + 0: NEWLINE@16..17 "\n" [] [] + 2: MD_PARAGRAPH@17..31 + 0: MD_INLINE_ITEM_LIST@17..31 + 0: MD_TEXTUAL@17..18 + 0: MD_TEXTUAL_LITERAL@17..18 "#" [] [] + 1: MD_TEXTUAL@18..19 + 0: MD_TEXTUAL_LITERAL@18..19 "#" [] [] + 2: MD_TEXTUAL@19..30 + 0: MD_TEXTUAL_LITERAL@19..30 "AlsoNoSpace" [] [] + 3: MD_TEXTUAL@30..31 + 0: MD_TEXTUAL_LITERAL@30..31 "\n" [] [] + 1: (empty) + 3: MD_NEWLINE@31..32 + 0: NEWLINE@31..32 "\n" [] [] + 4: MD_PARAGRAPH@32..68 + 0: MD_INLINE_ITEM_LIST@32..68 + 0: MD_TEXTUAL@32..36 + 0: MD_TEXTUAL_LITERAL@32..36 "foo " [] [] + 1: MD_TEXTUAL@36..37 + 0: MD_TEXTUAL_LITERAL@36..37 "-" [] [] + 2: MD_TEXTUAL@37..38 + 0: MD_TEXTUAL_LITERAL@37..38 "-" [] [] + 3: MD_TEXTUAL@38..39 + 0: MD_TEXTUAL_LITERAL@38..39 "-" [] [] + 4: MD_TEXTUAL@39..67 + 0: MD_TEXTUAL_LITERAL@39..67 " bar is not a thematic break" [] [] + 5: MD_TEXTUAL@67..68 + 0: MD_TEXTUAL_LITERAL@67..68 "\n" [] [] + 1: (empty) + 5: MD_NEWLINE@68..69 + 0: NEWLINE@68..69 "\n" [] [] + 6: MD_PARAGRAPH@69..101 + 0: MD_INLINE_ITEM_LIST@69..101 + 0: MD_TEXTUAL@69..76 + 0: MD_TEXTUAL_LITERAL@69..76 "inline " [] [] + 1: MD_TEXTUAL@76..79 + 0: MD_TEXTUAL_LITERAL@76..79 "~~~" [] [] + 2: MD_TEXTUAL@79..100 + 0: MD_TEXTUAL_LITERAL@79..100 " tilde is not a fence" [] [] + 3: MD_TEXTUAL@100..101 + 0: MD_TEXTUAL_LITERAL@100..101 "\n" [] [] + 1: (empty) + 7: MD_NEWLINE@101..102 + 0: NEWLINE@101..102 "\n" [] [] + 8: MD_INDENT_CODE_BLOCK@102..162 + 0: MD_INLINE_ITEM_LIST@102..162 + 0: MD_TEXTUAL@102..107 + 0: MD_TEXTUAL_LITERAL@102..107 "*" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [] + 1: MD_TEXTUAL@107..116 + 0: MD_TEXTUAL_LITERAL@107..116 "code with" [] [] + 2: MD_TEXTUAL@116..117 + 0: MD_TEXTUAL_LITERAL@116..117 "*" [] [] + 3: MD_TEXTUAL@117..132 + 0: MD_TEXTUAL_LITERAL@117..132 " inline markers" [] [] + 4: MD_TEXTUAL@132..133 + 0: MD_TEXTUAL_LITERAL@132..133 "\n" [] [] + 5: MD_TEXTUAL@133..134 + 0: MD_TEXTUAL_LITERAL@133..134 "\n" [] [] + 6: MD_TEXTUAL@134..148 + 0: MD_TEXTUAL_LITERAL@134..148 "more code " [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [] + 7: MD_TEXTUAL@148..150 + 0: MD_TEXTUAL_LITERAL@148..150 "**" [] [] + 8: MD_TEXTUAL@150..154 + 0: MD_TEXTUAL_LITERAL@150..154 "here" [] [] + 9: MD_TEXTUAL@154..156 + 0: MD_TEXTUAL_LITERAL@154..156 "**" [] [] + 10: MD_TEXTUAL@156..160 + 0: MD_TEXTUAL_LITERAL@156..160 " too" [] [] + 11: MD_TEXTUAL@160..161 + 0: MD_TEXTUAL_LITERAL@160..161 "\n" [] [] + 12: MD_TEXTUAL@161..162 + 0: MD_TEXTUAL_LITERAL@161..162 "\n" [] [] + 9: MD_HEADER@162..195 + 0: MD_HASH_LIST@162..163 + 0: MD_HASH@162..163 + 0: HASH@162..163 "#" [] [] + 1: MD_PARAGRAPH@163..195 + 0: MD_INLINE_ITEM_LIST@163..195 + 0: MD_TEXTUAL@163..195 + 0: MD_TEXTUAL_LITERAL@163..195 "\tTab after hash is valid heading" [] [] + 1: (empty) + 2: MD_HASH_LIST@195..195 + 10: MD_NEWLINE@195..196 + 0: NEWLINE@195..196 "\n" [] [] + 11: MD_NEWLINE@196..197 + 0: NEWLINE@196..197 "\n" [] [] + 12: MD_HEADER@197..224 + 0: MD_HASH_LIST@197..199 + 0: MD_HASH@197..198 + 0: HASH@197..198 "#" [] [] + 1: MD_HASH@198..199 + 0: HASH@198..199 "#" [] [] + 1: MD_PARAGRAPH@199..224 + 0: MD_INLINE_ITEM_LIST@199..224 + 0: MD_TEXTUAL@199..224 + 0: MD_TEXTUAL_LITERAL@199..224 "\tMultiple hashes with tab" [] [] + 1: (empty) + 2: MD_HASH_LIST@224..224 + 13: MD_NEWLINE@224..225 + 0: NEWLINE@224..225 "\n" [] [] + 14: MD_NEWLINE@225..226 + 0: NEWLINE@225..226 "\n" [] [] + 15: MD_PARAGRAPH@226..364 + 0: MD_INLINE_ITEM_LIST@226..364 + 0: MD_TEXTUAL@226..240 + 0: MD_TEXTUAL_LITERAL@226..240 "Paragraph here" [] [] + 1: MD_TEXTUAL@240..241 + 0: MD_TEXTUAL_LITERAL@240..241 "\n" [] [] + 2: MD_TEXTUAL@241..242 + 0: MD_TEXTUAL_LITERAL@241..242 " " [] [] + 3: MD_TEXTUAL@242..243 + 0: MD_TEXTUAL_LITERAL@242..243 " " [] [] + 4: MD_TEXTUAL@243..244 + 0: MD_TEXTUAL_LITERAL@243..244 " " [] [] + 5: MD_TEXTUAL@244..245 + 0: MD_TEXTUAL_LITERAL@244..245 " " [] [] + 6: MD_TEXTUAL@245..246 + 0: MD_TEXTUAL_LITERAL@245..246 "#" [] [] + 7: MD_TEXTUAL@246..267 + 0: MD_TEXTUAL_LITERAL@246..267 " not heading due to 4" [] [] + 8: MD_TEXTUAL@267..268 + 0: MD_TEXTUAL_LITERAL@267..268 "+" [] [] + 9: MD_TEXTUAL@268..275 + 0: MD_TEXTUAL_LITERAL@268..275 " spaces" [] [] + 10: MD_TEXTUAL@275..276 + 0: MD_TEXTUAL_LITERAL@275..276 "\n" [] [] + 11: MD_TEXTUAL@276..277 + 0: MD_TEXTUAL_LITERAL@276..277 " " [] [] + 12: MD_TEXTUAL@277..278 + 0: MD_TEXTUAL_LITERAL@277..278 " " [] [] + 13: MD_TEXTUAL@278..279 + 0: MD_TEXTUAL_LITERAL@278..279 " " [] [] + 14: MD_TEXTUAL@279..280 + 0: MD_TEXTUAL_LITERAL@279..280 " " [] [] + 15: MD_TEXTUAL@280..281 + 0: MD_TEXTUAL_LITERAL@280..281 "-" [] [] + 16: MD_TEXTUAL@281..299 + 0: MD_TEXTUAL_LITERAL@281..299 " not list due to 4" [] [] + 17: MD_TEXTUAL@299..300 + 0: MD_TEXTUAL_LITERAL@299..300 "+" [] [] + 18: MD_TEXTUAL@300..307 + 0: MD_TEXTUAL_LITERAL@300..307 " spaces" [] [] + 19: MD_TEXTUAL@307..308 + 0: MD_TEXTUAL_LITERAL@307..308 "\n" [] [] + 20: MD_TEXTUAL@308..309 + 0: MD_TEXTUAL_LITERAL@308..309 " " [] [] + 21: MD_TEXTUAL@309..310 + 0: MD_TEXTUAL_LITERAL@309..310 " " [] [] + 22: MD_TEXTUAL@310..311 + 0: MD_TEXTUAL_LITERAL@310..311 " " [] [] + 23: MD_TEXTUAL@311..312 + 0: MD_TEXTUAL_LITERAL@311..312 " " [] [] + 24: MD_TEXTUAL@312..313 + 0: MD_TEXTUAL_LITERAL@312..313 ">" [] [] + 25: MD_TEXTUAL@313..332 + 0: MD_TEXTUAL_LITERAL@313..332 " not quote due to 4" [] [] + 26: MD_TEXTUAL@332..333 + 0: MD_TEXTUAL_LITERAL@332..333 "+" [] [] + 27: MD_TEXTUAL@333..340 + 0: MD_TEXTUAL_LITERAL@333..340 " spaces" [] [] + 28: MD_TEXTUAL@340..341 + 0: MD_TEXTUAL_LITERAL@340..341 "\n" [] [] + 29: MD_TEXTUAL@341..363 + 0: MD_TEXTUAL_LITERAL@341..363 "continues as paragraph" [] [] + 30: MD_TEXTUAL@363..364 + 0: MD_TEXTUAL_LITERAL@363..364 "\n" [] [] + 1: (empty) + 16: MD_NEWLINE@364..365 + 0: NEWLINE@364..365 "\n" [] [] + 17: MD_PARAGRAPH@365..378 + 0: MD_INLINE_ITEM_LIST@365..378 + 0: MD_TEXTUAL@365..377 + 0: MD_TEXTUAL_LITERAL@365..377 "Another para" [] [] + 1: MD_TEXTUAL@377..378 + 0: MD_TEXTUAL_LITERAL@377..378 "\n" [] [] + 1: (empty) + 18: MD_HEADER@378..416 + 0: MD_HASH_LIST@378..382 + 0: MD_HASH@378..382 + 0: HASH@378..382 "#" [Skipped(" "), Skipped(" "), Skipped(" ")] [] + 1: MD_PARAGRAPH@382..416 + 0: MD_INLINE_ITEM_LIST@382..416 + 0: MD_TEXTUAL@382..401 + 0: MD_TEXTUAL_LITERAL@382..401 " this IS a heading " [] [] + 1: MD_TEXTUAL@401..402 + 0: MD_TEXTUAL_LITERAL@401..402 "(" [] [] + 2: MD_TEXTUAL@402..415 + 0: MD_TEXTUAL_LITERAL@402..415 "only 3 spaces" [] [] + 3: MD_TEXTUAL@415..416 + 0: MD_TEXTUAL_LITERAL@415..416 ")" [] [] + 1: (empty) + 2: MD_HASH_LIST@416..416 + 19: MD_NEWLINE@416..417 + 0: NEWLINE@416..417 "\n" [] [] + 20: MD_NEWLINE@417..418 + 0: NEWLINE@417..418 "\n" [] [] + 21: MD_PARAGRAPH@418..505 + 0: MD_INLINE_ITEM_LIST@418..505 + 0: MD_TEXTUAL@418..442 + 0: MD_TEXTUAL_LITERAL@418..442 "Para with indented fence" [] [] + 1: MD_TEXTUAL@442..443 + 0: MD_TEXTUAL_LITERAL@442..443 "\n" [] [] + 2: MD_TEXTUAL@443..444 + 0: MD_TEXTUAL_LITERAL@443..444 " " [] [] + 3: MD_TEXTUAL@444..445 + 0: MD_TEXTUAL_LITERAL@444..445 " " [] [] + 4: MD_TEXTUAL@445..446 + 0: MD_TEXTUAL_LITERAL@445..446 " " [] [] + 5: MD_TEXTUAL@446..447 + 0: MD_TEXTUAL_LITERAL@446..447 " " [] [] + 6: MD_TEXTUAL@447..450 + 0: MD_TEXTUAL_LITERAL@447..450 "```" [] [] + 7: MD_TEXTUAL@450..451 + 0: MD_TEXTUAL_LITERAL@450..451 "\n" [] [] + 8: MD_TEXTUAL@451..452 + 0: MD_TEXTUAL_LITERAL@451..452 " " [] [] + 9: MD_TEXTUAL@452..453 + 0: MD_TEXTUAL_LITERAL@452..453 " " [] [] + 10: MD_TEXTUAL@453..454 + 0: MD_TEXTUAL_LITERAL@453..454 " " [] [] + 11: MD_TEXTUAL@454..455 + 0: MD_TEXTUAL_LITERAL@454..455 " " [] [] + 12: MD_TEXTUAL@455..471 + 0: MD_TEXTUAL_LITERAL@455..471 "not a code fence" [] [] + 13: MD_TEXTUAL@471..472 + 0: MD_TEXTUAL_LITERAL@471..472 "\n" [] [] + 14: MD_TEXTUAL@472..473 + 0: MD_TEXTUAL_LITERAL@472..473 " " [] [] + 15: MD_TEXTUAL@473..474 + 0: MD_TEXTUAL_LITERAL@473..474 " " [] [] + 16: MD_TEXTUAL@474..475 + 0: MD_TEXTUAL_LITERAL@474..475 " " [] [] + 17: MD_TEXTUAL@475..476 + 0: MD_TEXTUAL_LITERAL@475..476 " " [] [] + 18: MD_TEXTUAL@476..479 + 0: MD_TEXTUAL_LITERAL@476..479 "```" [] [] + 19: MD_TEXTUAL@479..480 + 0: MD_TEXTUAL_LITERAL@479..480 "\n" [] [] + 20: MD_TEXTUAL@480..504 + 0: MD_TEXTUAL_LITERAL@480..504 "still the same paragraph" [] [] + 21: MD_TEXTUAL@504..505 + 0: MD_TEXTUAL_LITERAL@504..505 "\n" [] [] + 1: (empty) + 2: EOF@505..505 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_complex.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_complex.md new file mode 100644 index 000000000000..b813c0dad3f0 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_complex.md @@ -0,0 +1,13 @@ +Nested: **bold *and italic* text** + +Overlapping: *foo**bar**baz* + +Rule of 3: ***bold italic*** + +Multiple runs: *a **b** c* + +Not emphasis: * not * emphasis * + +Intraword: foo*bar*baz + +Intraword underscore: foo_bar_baz diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_complex.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_complex.md.snap new file mode 100644 index 000000000000..10e49ec3d451 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_complex.md.snap @@ -0,0 +1,395 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +Nested: **bold *and italic* text** + +Overlapping: *foo**bar**baz* + +Rule of 3: ***bold italic*** + +Multiple runs: *a **b** c* + +Not emphasis: * not * emphasis * + +Intraword: foo*bar*baz + +Intraword underscore: foo_bar_baz + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@0..8 "Nested: " [] [], + }, + MdInlineEmphasis { + l_fence: DOUBLE_STAR@8..10 "**" [] [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@10..15 "bold " [] [], + }, + MdInlineItalic { + l_fence: STAR@15..16 "*" [] [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@16..26 "and italic" [] [], + }, + ], + r_fence: STAR@26..27 "*" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@27..32 " text" [] [], + }, + ], + r_fence: DOUBLE_STAR@32..34 "**" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@34..35 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@35..36 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@36..49 "Overlapping: " [] [], + }, + MdInlineItalic { + l_fence: STAR@49..50 "*" [] [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@50..53 "foo" [] [], + }, + MdInlineEmphasis { + l_fence: DOUBLE_STAR@53..55 "**" [] [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@55..58 "bar" [] [], + }, + ], + r_fence: DOUBLE_STAR@58..60 "**" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@60..63 "baz" [] [], + }, + ], + r_fence: STAR@63..64 "*" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@64..65 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@65..66 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@66..77 "Rule of 3: " [] [], + }, + MdInlineEmphasis { + l_fence: DOUBLE_STAR@77..79 "**" [] [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@79..80 "*" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@80..91 "bold italic" [] [], + }, + ], + r_fence: DOUBLE_STAR@91..93 "**" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@93..94 "*" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@94..95 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@95..96 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@96..111 "Multiple runs: " [] [], + }, + MdInlineItalic { + l_fence: STAR@111..112 "*" [] [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@112..114 "a " [] [], + }, + MdInlineEmphasis { + l_fence: DOUBLE_STAR@114..116 "**" [] [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@116..117 "b" [] [], + }, + ], + r_fence: DOUBLE_STAR@117..119 "**" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@119..121 " c" [] [], + }, + ], + r_fence: STAR@121..122 "*" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@122..123 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@123..124 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@124..138 "Not emphasis: " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@138..139 "*" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@139..144 " not " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@144..145 "*" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@145..155 " emphasis " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@155..156 "*" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@156..157 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@157..158 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@158..172 "Intraword: foo" [] [], + }, + MdInlineItalic { + l_fence: STAR@172..173 "*" [] [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@173..176 "bar" [] [], + }, + ], + r_fence: STAR@176..177 "*" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@177..180 "baz" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@180..181 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@181..182 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@182..207 "Intraword underscore: foo" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@207..208 "_" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@208..211 "bar" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@211..212 "_" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@212..215 "baz" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@215..216 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + eof_token: EOF@216..216 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..216 + 0: (empty) + 1: MD_BLOCK_LIST@0..216 + 0: MD_PARAGRAPH@0..35 + 0: MD_INLINE_ITEM_LIST@0..35 + 0: MD_TEXTUAL@0..8 + 0: MD_TEXTUAL_LITERAL@0..8 "Nested: " [] [] + 1: MD_INLINE_EMPHASIS@8..34 + 0: DOUBLE_STAR@8..10 "**" [] [] + 1: MD_INLINE_ITEM_LIST@10..32 + 0: MD_TEXTUAL@10..15 + 0: MD_TEXTUAL_LITERAL@10..15 "bold " [] [] + 1: MD_INLINE_ITALIC@15..27 + 0: STAR@15..16 "*" [] [] + 1: MD_INLINE_ITEM_LIST@16..26 + 0: MD_TEXTUAL@16..26 + 0: MD_TEXTUAL_LITERAL@16..26 "and italic" [] [] + 2: STAR@26..27 "*" [] [] + 2: MD_TEXTUAL@27..32 + 0: MD_TEXTUAL_LITERAL@27..32 " text" [] [] + 2: DOUBLE_STAR@32..34 "**" [] [] + 2: MD_TEXTUAL@34..35 + 0: MD_TEXTUAL_LITERAL@34..35 "\n" [] [] + 1: (empty) + 1: MD_NEWLINE@35..36 + 0: NEWLINE@35..36 "\n" [] [] + 2: MD_PARAGRAPH@36..65 + 0: MD_INLINE_ITEM_LIST@36..65 + 0: MD_TEXTUAL@36..49 + 0: MD_TEXTUAL_LITERAL@36..49 "Overlapping: " [] [] + 1: MD_INLINE_ITALIC@49..64 + 0: STAR@49..50 "*" [] [] + 1: MD_INLINE_ITEM_LIST@50..63 + 0: MD_TEXTUAL@50..53 + 0: MD_TEXTUAL_LITERAL@50..53 "foo" [] [] + 1: MD_INLINE_EMPHASIS@53..60 + 0: DOUBLE_STAR@53..55 "**" [] [] + 1: MD_INLINE_ITEM_LIST@55..58 + 0: MD_TEXTUAL@55..58 + 0: MD_TEXTUAL_LITERAL@55..58 "bar" [] [] + 2: DOUBLE_STAR@58..60 "**" [] [] + 2: MD_TEXTUAL@60..63 + 0: MD_TEXTUAL_LITERAL@60..63 "baz" [] [] + 2: STAR@63..64 "*" [] [] + 2: MD_TEXTUAL@64..65 + 0: MD_TEXTUAL_LITERAL@64..65 "\n" [] [] + 1: (empty) + 3: MD_NEWLINE@65..66 + 0: NEWLINE@65..66 "\n" [] [] + 4: MD_PARAGRAPH@66..95 + 0: MD_INLINE_ITEM_LIST@66..95 + 0: MD_TEXTUAL@66..77 + 0: MD_TEXTUAL_LITERAL@66..77 "Rule of 3: " [] [] + 1: MD_INLINE_EMPHASIS@77..93 + 0: DOUBLE_STAR@77..79 "**" [] [] + 1: MD_INLINE_ITEM_LIST@79..91 + 0: MD_TEXTUAL@79..80 + 0: MD_TEXTUAL_LITERAL@79..80 "*" [] [] + 1: MD_TEXTUAL@80..91 + 0: MD_TEXTUAL_LITERAL@80..91 "bold italic" [] [] + 2: DOUBLE_STAR@91..93 "**" [] [] + 2: MD_TEXTUAL@93..94 + 0: MD_TEXTUAL_LITERAL@93..94 "*" [] [] + 3: MD_TEXTUAL@94..95 + 0: MD_TEXTUAL_LITERAL@94..95 "\n" [] [] + 1: (empty) + 5: MD_NEWLINE@95..96 + 0: NEWLINE@95..96 "\n" [] [] + 6: MD_PARAGRAPH@96..123 + 0: MD_INLINE_ITEM_LIST@96..123 + 0: MD_TEXTUAL@96..111 + 0: MD_TEXTUAL_LITERAL@96..111 "Multiple runs: " [] [] + 1: MD_INLINE_ITALIC@111..122 + 0: STAR@111..112 "*" [] [] + 1: MD_INLINE_ITEM_LIST@112..121 + 0: MD_TEXTUAL@112..114 + 0: MD_TEXTUAL_LITERAL@112..114 "a " [] [] + 1: MD_INLINE_EMPHASIS@114..119 + 0: DOUBLE_STAR@114..116 "**" [] [] + 1: MD_INLINE_ITEM_LIST@116..117 + 0: MD_TEXTUAL@116..117 + 0: MD_TEXTUAL_LITERAL@116..117 "b" [] [] + 2: DOUBLE_STAR@117..119 "**" [] [] + 2: MD_TEXTUAL@119..121 + 0: MD_TEXTUAL_LITERAL@119..121 " c" [] [] + 2: STAR@121..122 "*" [] [] + 2: MD_TEXTUAL@122..123 + 0: MD_TEXTUAL_LITERAL@122..123 "\n" [] [] + 1: (empty) + 7: MD_NEWLINE@123..124 + 0: NEWLINE@123..124 "\n" [] [] + 8: MD_PARAGRAPH@124..157 + 0: MD_INLINE_ITEM_LIST@124..157 + 0: MD_TEXTUAL@124..138 + 0: MD_TEXTUAL_LITERAL@124..138 "Not emphasis: " [] [] + 1: MD_TEXTUAL@138..139 + 0: MD_TEXTUAL_LITERAL@138..139 "*" [] [] + 2: MD_TEXTUAL@139..144 + 0: MD_TEXTUAL_LITERAL@139..144 " not " [] [] + 3: MD_TEXTUAL@144..145 + 0: MD_TEXTUAL_LITERAL@144..145 "*" [] [] + 4: MD_TEXTUAL@145..155 + 0: MD_TEXTUAL_LITERAL@145..155 " emphasis " [] [] + 5: MD_TEXTUAL@155..156 + 0: MD_TEXTUAL_LITERAL@155..156 "*" [] [] + 6: MD_TEXTUAL@156..157 + 0: MD_TEXTUAL_LITERAL@156..157 "\n" [] [] + 1: (empty) + 9: MD_NEWLINE@157..158 + 0: NEWLINE@157..158 "\n" [] [] + 10: MD_PARAGRAPH@158..181 + 0: MD_INLINE_ITEM_LIST@158..181 + 0: MD_TEXTUAL@158..172 + 0: MD_TEXTUAL_LITERAL@158..172 "Intraword: foo" [] [] + 1: MD_INLINE_ITALIC@172..177 + 0: STAR@172..173 "*" [] [] + 1: MD_INLINE_ITEM_LIST@173..176 + 0: MD_TEXTUAL@173..176 + 0: MD_TEXTUAL_LITERAL@173..176 "bar" [] [] + 2: STAR@176..177 "*" [] [] + 2: MD_TEXTUAL@177..180 + 0: MD_TEXTUAL_LITERAL@177..180 "baz" [] [] + 3: MD_TEXTUAL@180..181 + 0: MD_TEXTUAL_LITERAL@180..181 "\n" [] [] + 1: (empty) + 11: MD_NEWLINE@181..182 + 0: NEWLINE@181..182 "\n" [] [] + 12: MD_PARAGRAPH@182..216 + 0: MD_INLINE_ITEM_LIST@182..216 + 0: MD_TEXTUAL@182..207 + 0: MD_TEXTUAL_LITERAL@182..207 "Intraword underscore: foo" [] [] + 1: MD_TEXTUAL@207..208 + 0: MD_TEXTUAL_LITERAL@207..208 "_" [] [] + 2: MD_TEXTUAL@208..211 + 0: MD_TEXTUAL_LITERAL@208..211 "bar" [] [] + 3: MD_TEXTUAL@211..212 + 0: MD_TEXTUAL_LITERAL@211..212 "_" [] [] + 4: MD_TEXTUAL@212..215 + 0: MD_TEXTUAL_LITERAL@212..215 "baz" [] [] + 5: MD_TEXTUAL@215..216 + 0: MD_TEXTUAL_LITERAL@215..216 "\n" [] [] + 1: (empty) + 2: EOF@216..216 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_crossing.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_crossing.md new file mode 100644 index 000000000000..3cfca8723433 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_crossing.md @@ -0,0 +1 @@ +Crossing: **a *b** c* diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_crossing.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_crossing.md.snap new file mode 100644 index 000000000000..bdbaf86e823d --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_crossing.md.snap @@ -0,0 +1,85 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +Crossing: **a *b** c* + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@0..10 "Crossing: " [] [], + }, + MdInlineEmphasis { + l_fence: DOUBLE_STAR@10..12 "**" [] [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@12..14 "a " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@14..15 "*" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@15..16 "b" [] [], + }, + ], + r_fence: DOUBLE_STAR@16..18 "**" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@18..20 " c" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@20..21 "*" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@21..22 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + eof_token: EOF@22..22 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..22 + 0: (empty) + 1: MD_BLOCK_LIST@0..22 + 0: MD_PARAGRAPH@0..22 + 0: MD_INLINE_ITEM_LIST@0..22 + 0: MD_TEXTUAL@0..10 + 0: MD_TEXTUAL_LITERAL@0..10 "Crossing: " [] [] + 1: MD_INLINE_EMPHASIS@10..18 + 0: DOUBLE_STAR@10..12 "**" [] [] + 1: MD_INLINE_ITEM_LIST@12..16 + 0: MD_TEXTUAL@12..14 + 0: MD_TEXTUAL_LITERAL@12..14 "a " [] [] + 1: MD_TEXTUAL@14..15 + 0: MD_TEXTUAL_LITERAL@14..15 "*" [] [] + 2: MD_TEXTUAL@15..16 + 0: MD_TEXTUAL_LITERAL@15..16 "b" [] [] + 2: DOUBLE_STAR@16..18 "**" [] [] + 2: MD_TEXTUAL@18..20 + 0: MD_TEXTUAL_LITERAL@18..20 " c" [] [] + 3: MD_TEXTUAL@20..21 + 0: MD_TEXTUAL_LITERAL@20..21 "*" [] [] + 4: MD_TEXTUAL@21..22 + 0: MD_TEXTUAL_LITERAL@21..22 "\n" [] [] + 1: (empty) + 2: EOF@22..22 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_edge_cases.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_edge_cases.md new file mode 100644 index 000000000000..b22601f3ad7c --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_edge_cases.md @@ -0,0 +1,17 @@ +Code spans block emphasis: `*not emphasis*` + +Escaped asterisks: \*not emphasis\* + +HTML with asterisks: *between tags* + +HTML attribute: attr + +Autolink: + +Link text: [*emphasis*](https://example.com) + +Reference link text: [*emphasis*][label] + +Image alt: ![*emphasis*][label] + +[label]: https://example.com diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_edge_cases.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_edge_cases.md.snap new file mode 100644 index 000000000000..b663aaa39063 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_edge_cases.md.snap @@ -0,0 +1,600 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +Code spans block emphasis: `*not emphasis*` + +Escaped asterisks: \*not emphasis\* + +HTML with asterisks: *between tags* + +HTML attribute: attr + +Autolink: + +Link text: [*emphasis*](https://example.com) + +Reference link text: [*emphasis*][label] + +Image alt: ![*emphasis*][label] + +[label]: https://example.com + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@0..27 "Code spans block emphasis: " [] [], + }, + MdInlineCode { + l_tick_token: BACKTICK@27..28 "`" [] [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@28..29 "*" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@29..41 "not emphasis" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@41..42 "*" [] [], + }, + ], + r_tick_token: BACKTICK@42..43 "`" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@43..44 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@44..45 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@45..64 "Escaped asterisks: " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@64..66 "\\*" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@66..78 "not emphasis" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@78..80 "\\*" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@80..81 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@81..82 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@82..103 "HTML with asterisks: " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@103..104 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@104..108 "span" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@108..109 ">" [] [], + }, + ], + }, + MdInlineItalic { + l_fence: STAR@109..110 "*" [] [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@110..122 "between tags" [] [], + }, + ], + r_fence: STAR@122..123 "*" [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@123..124 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@124..129 "/span" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@129..130 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@130..131 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@131..132 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@132..148 "HTML attribute: " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@148..149 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@149..161 "span title=\"" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@161..162 "*" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@162..174 "not emphasis" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@174..175 "*" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@175..176 "\"" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@176..177 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@177..181 "attr" [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@181..182 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@182..187 "/span" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@187..188 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@188..189 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@189..190 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@190..200 "Autolink: " [] [], + }, + MdAutolink { + l_angle_token: L_ANGLE@200..201 "<" [] [], + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@201..221 "https://example.com/" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@221..222 "*" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@222..225 "not" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@225..226 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@226..234 "emphasis" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@234..235 "*" [] [], + }, + ], + r_angle_token: R_ANGLE@235..236 ">" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@236..237 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@237..238 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@238..249 "Link text: " [] [], + }, + MdInlineLink { + l_brack_token: L_BRACK@249..250 "[" [] [], + text: MdInlineItemList [ + MdInlineItalic { + l_fence: STAR@250..251 "*" [] [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@251..259 "emphasis" [] [], + }, + ], + r_fence: STAR@259..260 "*" [] [], + }, + ], + r_brack_token: R_BRACK@260..261 "]" [] [], + l_paren_token: L_PAREN@261..262 "(" [] [], + destination: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@262..281 "https://example.com" [] [], + }, + ], + title: missing (optional), + r_paren_token: R_PAREN@281..282 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@282..283 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@283..284 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@284..305 "Reference link text: " [] [], + }, + MdReferenceLink { + l_brack_token: L_BRACK@305..306 "[" [] [], + text: MdInlineItemList [ + MdInlineItalic { + l_fence: STAR@306..307 "*" [] [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@307..315 "emphasis" [] [], + }, + ], + r_fence: STAR@315..316 "*" [] [], + }, + ], + r_brack_token: R_BRACK@316..317 "]" [] [], + label: MdReferenceLinkLabel { + l_brack_token: L_BRACK@317..318 "[" [] [], + label: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@318..323 "label" [] [], + }, + ], + r_brack_token: R_BRACK@323..324 "]" [] [], + }, + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@324..325 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@325..326 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@326..337 "Image alt: " [] [], + }, + MdReferenceImage { + excl_token: BANG@337..338 "!" [] [], + l_brack_token: L_BRACK@338..339 "[" [] [], + alt: MdInlineItemList [ + MdInlineItalic { + l_fence: STAR@339..340 "*" [] [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@340..348 "emphasis" [] [], + }, + ], + r_fence: STAR@348..349 "*" [] [], + }, + ], + r_brack_token: R_BRACK@349..350 "]" [] [], + label: MdReferenceLinkLabel { + l_brack_token: L_BRACK@350..351 "[" [] [], + label: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@351..356 "label" [] [], + }, + ], + r_brack_token: R_BRACK@356..357 "]" [] [], + }, + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@357..358 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@358..359 "\n" [] [], + }, + MdLinkReferenceDefinition { + l_brack_token: L_BRACK@359..360 "[" [] [], + label: MdLinkLabel { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@360..365 "label" [] [], + }, + ], + }, + r_brack_token: R_BRACK@365..366 "]" [] [], + colon_token: COLON@366..367 ":" [] [], + destination: MdLinkDestination { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@367..368 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@368..387 "https://example.com" [] [], + }, + ], + }, + title: missing (optional), + }, + MdNewline { + value_token: NEWLINE@387..388 "\n" [] [], + }, + ], + eof_token: EOF@388..388 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..388 + 0: (empty) + 1: MD_BLOCK_LIST@0..388 + 0: MD_PARAGRAPH@0..44 + 0: MD_INLINE_ITEM_LIST@0..44 + 0: MD_TEXTUAL@0..27 + 0: MD_TEXTUAL_LITERAL@0..27 "Code spans block emphasis: " [] [] + 1: MD_INLINE_CODE@27..43 + 0: BACKTICK@27..28 "`" [] [] + 1: MD_INLINE_ITEM_LIST@28..42 + 0: MD_TEXTUAL@28..29 + 0: MD_TEXTUAL_LITERAL@28..29 "*" [] [] + 1: MD_TEXTUAL@29..41 + 0: MD_TEXTUAL_LITERAL@29..41 "not emphasis" [] [] + 2: MD_TEXTUAL@41..42 + 0: MD_TEXTUAL_LITERAL@41..42 "*" [] [] + 2: BACKTICK@42..43 "`" [] [] + 2: MD_TEXTUAL@43..44 + 0: MD_TEXTUAL_LITERAL@43..44 "\n" [] [] + 1: (empty) + 1: MD_NEWLINE@44..45 + 0: NEWLINE@44..45 "\n" [] [] + 2: MD_PARAGRAPH@45..81 + 0: MD_INLINE_ITEM_LIST@45..81 + 0: MD_TEXTUAL@45..64 + 0: MD_TEXTUAL_LITERAL@45..64 "Escaped asterisks: " [] [] + 1: MD_TEXTUAL@64..66 + 0: MD_TEXTUAL_LITERAL@64..66 "\\*" [] [] + 2: MD_TEXTUAL@66..78 + 0: MD_TEXTUAL_LITERAL@66..78 "not emphasis" [] [] + 3: MD_TEXTUAL@78..80 + 0: MD_TEXTUAL_LITERAL@78..80 "\\*" [] [] + 4: MD_TEXTUAL@80..81 + 0: MD_TEXTUAL_LITERAL@80..81 "\n" [] [] + 1: (empty) + 3: MD_NEWLINE@81..82 + 0: NEWLINE@81..82 "\n" [] [] + 4: MD_PARAGRAPH@82..131 + 0: MD_INLINE_ITEM_LIST@82..131 + 0: MD_TEXTUAL@82..103 + 0: MD_TEXTUAL_LITERAL@82..103 "HTML with asterisks: " [] [] + 1: MD_INLINE_HTML@103..109 + 0: MD_INLINE_ITEM_LIST@103..109 + 0: MD_TEXTUAL@103..104 + 0: MD_TEXTUAL_LITERAL@103..104 "<" [] [] + 1: MD_TEXTUAL@104..108 + 0: MD_TEXTUAL_LITERAL@104..108 "span" [] [] + 2: MD_TEXTUAL@108..109 + 0: MD_TEXTUAL_LITERAL@108..109 ">" [] [] + 2: MD_INLINE_ITALIC@109..123 + 0: STAR@109..110 "*" [] [] + 1: MD_INLINE_ITEM_LIST@110..122 + 0: MD_TEXTUAL@110..122 + 0: MD_TEXTUAL_LITERAL@110..122 "between tags" [] [] + 2: STAR@122..123 "*" [] [] + 3: MD_INLINE_HTML@123..130 + 0: MD_INLINE_ITEM_LIST@123..130 + 0: MD_TEXTUAL@123..124 + 0: MD_TEXTUAL_LITERAL@123..124 "<" [] [] + 1: MD_TEXTUAL@124..129 + 0: MD_TEXTUAL_LITERAL@124..129 "/span" [] [] + 2: MD_TEXTUAL@129..130 + 0: MD_TEXTUAL_LITERAL@129..130 ">" [] [] + 4: MD_TEXTUAL@130..131 + 0: MD_TEXTUAL_LITERAL@130..131 "\n" [] [] + 1: (empty) + 5: MD_NEWLINE@131..132 + 0: NEWLINE@131..132 "\n" [] [] + 6: MD_PARAGRAPH@132..189 + 0: MD_INLINE_ITEM_LIST@132..189 + 0: MD_TEXTUAL@132..148 + 0: MD_TEXTUAL_LITERAL@132..148 "HTML attribute: " [] [] + 1: MD_INLINE_HTML@148..177 + 0: MD_INLINE_ITEM_LIST@148..177 + 0: MD_TEXTUAL@148..149 + 0: MD_TEXTUAL_LITERAL@148..149 "<" [] [] + 1: MD_TEXTUAL@149..161 + 0: MD_TEXTUAL_LITERAL@149..161 "span title=\"" [] [] + 2: MD_TEXTUAL@161..162 + 0: MD_TEXTUAL_LITERAL@161..162 "*" [] [] + 3: MD_TEXTUAL@162..174 + 0: MD_TEXTUAL_LITERAL@162..174 "not emphasis" [] [] + 4: MD_TEXTUAL@174..175 + 0: MD_TEXTUAL_LITERAL@174..175 "*" [] [] + 5: MD_TEXTUAL@175..176 + 0: MD_TEXTUAL_LITERAL@175..176 "\"" [] [] + 6: MD_TEXTUAL@176..177 + 0: MD_TEXTUAL_LITERAL@176..177 ">" [] [] + 2: MD_TEXTUAL@177..181 + 0: MD_TEXTUAL_LITERAL@177..181 "attr" [] [] + 3: MD_INLINE_HTML@181..188 + 0: MD_INLINE_ITEM_LIST@181..188 + 0: MD_TEXTUAL@181..182 + 0: MD_TEXTUAL_LITERAL@181..182 "<" [] [] + 1: MD_TEXTUAL@182..187 + 0: MD_TEXTUAL_LITERAL@182..187 "/span" [] [] + 2: MD_TEXTUAL@187..188 + 0: MD_TEXTUAL_LITERAL@187..188 ">" [] [] + 4: MD_TEXTUAL@188..189 + 0: MD_TEXTUAL_LITERAL@188..189 "\n" [] [] + 1: (empty) + 7: MD_NEWLINE@189..190 + 0: NEWLINE@189..190 "\n" [] [] + 8: MD_PARAGRAPH@190..237 + 0: MD_INLINE_ITEM_LIST@190..237 + 0: MD_TEXTUAL@190..200 + 0: MD_TEXTUAL_LITERAL@190..200 "Autolink: " [] [] + 1: MD_AUTOLINK@200..236 + 0: L_ANGLE@200..201 "<" [] [] + 1: MD_INLINE_ITEM_LIST@201..235 + 0: MD_TEXTUAL@201..221 + 0: MD_TEXTUAL_LITERAL@201..221 "https://example.com/" [] [] + 1: MD_TEXTUAL@221..222 + 0: MD_TEXTUAL_LITERAL@221..222 "*" [] [] + 2: MD_TEXTUAL@222..225 + 0: MD_TEXTUAL_LITERAL@222..225 "not" [] [] + 3: MD_TEXTUAL@225..226 + 0: MD_TEXTUAL_LITERAL@225..226 "-" [] [] + 4: MD_TEXTUAL@226..234 + 0: MD_TEXTUAL_LITERAL@226..234 "emphasis" [] [] + 5: MD_TEXTUAL@234..235 + 0: MD_TEXTUAL_LITERAL@234..235 "*" [] [] + 2: R_ANGLE@235..236 ">" [] [] + 2: MD_TEXTUAL@236..237 + 0: MD_TEXTUAL_LITERAL@236..237 "\n" [] [] + 1: (empty) + 9: MD_NEWLINE@237..238 + 0: NEWLINE@237..238 "\n" [] [] + 10: MD_PARAGRAPH@238..283 + 0: MD_INLINE_ITEM_LIST@238..283 + 0: MD_TEXTUAL@238..249 + 0: MD_TEXTUAL_LITERAL@238..249 "Link text: " [] [] + 1: MD_INLINE_LINK@249..282 + 0: L_BRACK@249..250 "[" [] [] + 1: MD_INLINE_ITEM_LIST@250..260 + 0: MD_INLINE_ITALIC@250..260 + 0: STAR@250..251 "*" [] [] + 1: MD_INLINE_ITEM_LIST@251..259 + 0: MD_TEXTUAL@251..259 + 0: MD_TEXTUAL_LITERAL@251..259 "emphasis" [] [] + 2: STAR@259..260 "*" [] [] + 2: R_BRACK@260..261 "]" [] [] + 3: L_PAREN@261..262 "(" [] [] + 4: MD_INLINE_ITEM_LIST@262..281 + 0: MD_TEXTUAL@262..281 + 0: MD_TEXTUAL_LITERAL@262..281 "https://example.com" [] [] + 5: (empty) + 6: R_PAREN@281..282 ")" [] [] + 2: MD_TEXTUAL@282..283 + 0: MD_TEXTUAL_LITERAL@282..283 "\n" [] [] + 1: (empty) + 11: MD_NEWLINE@283..284 + 0: NEWLINE@283..284 "\n" [] [] + 12: MD_PARAGRAPH@284..325 + 0: MD_INLINE_ITEM_LIST@284..325 + 0: MD_TEXTUAL@284..305 + 0: MD_TEXTUAL_LITERAL@284..305 "Reference link text: " [] [] + 1: MD_REFERENCE_LINK@305..324 + 0: L_BRACK@305..306 "[" [] [] + 1: MD_INLINE_ITEM_LIST@306..316 + 0: MD_INLINE_ITALIC@306..316 + 0: STAR@306..307 "*" [] [] + 1: MD_INLINE_ITEM_LIST@307..315 + 0: MD_TEXTUAL@307..315 + 0: MD_TEXTUAL_LITERAL@307..315 "emphasis" [] [] + 2: STAR@315..316 "*" [] [] + 2: R_BRACK@316..317 "]" [] [] + 3: MD_REFERENCE_LINK_LABEL@317..324 + 0: L_BRACK@317..318 "[" [] [] + 1: MD_INLINE_ITEM_LIST@318..323 + 0: MD_TEXTUAL@318..323 + 0: MD_TEXTUAL_LITERAL@318..323 "label" [] [] + 2: R_BRACK@323..324 "]" [] [] + 2: MD_TEXTUAL@324..325 + 0: MD_TEXTUAL_LITERAL@324..325 "\n" [] [] + 1: (empty) + 13: MD_NEWLINE@325..326 + 0: NEWLINE@325..326 "\n" [] [] + 14: MD_PARAGRAPH@326..358 + 0: MD_INLINE_ITEM_LIST@326..358 + 0: MD_TEXTUAL@326..337 + 0: MD_TEXTUAL_LITERAL@326..337 "Image alt: " [] [] + 1: MD_REFERENCE_IMAGE@337..357 + 0: BANG@337..338 "!" [] [] + 1: L_BRACK@338..339 "[" [] [] + 2: MD_INLINE_ITEM_LIST@339..349 + 0: MD_INLINE_ITALIC@339..349 + 0: STAR@339..340 "*" [] [] + 1: MD_INLINE_ITEM_LIST@340..348 + 0: MD_TEXTUAL@340..348 + 0: MD_TEXTUAL_LITERAL@340..348 "emphasis" [] [] + 2: STAR@348..349 "*" [] [] + 3: R_BRACK@349..350 "]" [] [] + 4: MD_REFERENCE_LINK_LABEL@350..357 + 0: L_BRACK@350..351 "[" [] [] + 1: MD_INLINE_ITEM_LIST@351..356 + 0: MD_TEXTUAL@351..356 + 0: MD_TEXTUAL_LITERAL@351..356 "label" [] [] + 2: R_BRACK@356..357 "]" [] [] + 2: MD_TEXTUAL@357..358 + 0: MD_TEXTUAL_LITERAL@357..358 "\n" [] [] + 1: (empty) + 15: MD_NEWLINE@358..359 + 0: NEWLINE@358..359 "\n" [] [] + 16: MD_LINK_REFERENCE_DEFINITION@359..387 + 0: L_BRACK@359..360 "[" [] [] + 1: MD_LINK_LABEL@360..365 + 0: MD_INLINE_ITEM_LIST@360..365 + 0: MD_TEXTUAL@360..365 + 0: MD_TEXTUAL_LITERAL@360..365 "label" [] [] + 2: R_BRACK@365..366 "]" [] [] + 3: COLON@366..367 ":" [] [] + 4: MD_LINK_DESTINATION@367..387 + 0: MD_INLINE_ITEM_LIST@367..387 + 0: MD_TEXTUAL@367..368 + 0: MD_TEXTUAL_LITERAL@367..368 " " [] [] + 1: MD_TEXTUAL@368..387 + 0: MD_TEXTUAL_LITERAL@368..387 "https://example.com" [] [] + 5: (empty) + 17: MD_NEWLINE@387..388 + 0: NEWLINE@387..388 "\n" [] [] + 2: EOF@388..388 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_flanking.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_flanking.md new file mode 100644 index 000000000000..7932e8867517 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_flanking.md @@ -0,0 +1,11 @@ +Valid emphasis: *italic* and **bold** + +Valid underscore: _italic_ and __bold__ + +Asterisk after space: foo * bar* should not be emphasis + +Underscore intraword: foo_bar_baz (not emphasis) + +Valid underscore at word boundary: _word_ + +Mixed: foo *bar* baz diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_flanking.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_flanking.md.snap new file mode 100644 index 000000000000..a4b423b8a131 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_flanking.md.snap @@ -0,0 +1,335 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +Valid emphasis: *italic* and **bold** + +Valid underscore: _italic_ and __bold__ + +Asterisk after space: foo * bar* should not be emphasis + +Underscore intraword: foo_bar_baz (not emphasis) + +Valid underscore at word boundary: _word_ + +Mixed: foo *bar* baz + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@0..16 "Valid emphasis: " [] [], + }, + MdInlineItalic { + l_fence: STAR@16..17 "*" [] [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@17..23 "italic" [] [], + }, + ], + r_fence: STAR@23..24 "*" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@24..29 " and " [] [], + }, + MdInlineEmphasis { + l_fence: DOUBLE_STAR@29..31 "**" [] [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@31..35 "bold" [] [], + }, + ], + r_fence: DOUBLE_STAR@35..37 "**" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@37..38 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@38..39 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@39..57 "Valid underscore: " [] [], + }, + MdInlineItalic { + l_fence: UNDERSCORE@57..58 "_" [] [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@58..64 "italic" [] [], + }, + ], + r_fence: UNDERSCORE@64..65 "_" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@65..70 " and " [] [], + }, + MdInlineEmphasis { + l_fence: DOUBLE_UNDERSCORE@70..72 "__" [] [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@72..76 "bold" [] [], + }, + ], + r_fence: DOUBLE_UNDERSCORE@76..78 "__" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@78..79 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@79..80 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@80..106 "Asterisk after space: foo " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@106..107 "*" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@107..111 " bar" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@111..112 "*" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@112..135 " should not be emphasis" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@135..136 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@136..137 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@137..162 "Underscore intraword: foo" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@162..163 "_" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@163..166 "bar" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@166..167 "_" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@167..171 "baz " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@171..172 "(" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@172..184 "not emphasis" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@184..185 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@185..186 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@186..187 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@187..222 "Valid underscore at word boundary: " [] [], + }, + MdInlineItalic { + l_fence: UNDERSCORE@222..223 "_" [] [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@223..227 "word" [] [], + }, + ], + r_fence: UNDERSCORE@227..228 "_" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@228..229 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@229..230 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@230..241 "Mixed: foo " [] [], + }, + MdInlineItalic { + l_fence: STAR@241..242 "*" [] [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@242..245 "bar" [] [], + }, + ], + r_fence: STAR@245..246 "*" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@246..250 " baz" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@250..251 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + eof_token: EOF@251..251 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..251 + 0: (empty) + 1: MD_BLOCK_LIST@0..251 + 0: MD_PARAGRAPH@0..38 + 0: MD_INLINE_ITEM_LIST@0..38 + 0: MD_TEXTUAL@0..16 + 0: MD_TEXTUAL_LITERAL@0..16 "Valid emphasis: " [] [] + 1: MD_INLINE_ITALIC@16..24 + 0: STAR@16..17 "*" [] [] + 1: MD_INLINE_ITEM_LIST@17..23 + 0: MD_TEXTUAL@17..23 + 0: MD_TEXTUAL_LITERAL@17..23 "italic" [] [] + 2: STAR@23..24 "*" [] [] + 2: MD_TEXTUAL@24..29 + 0: MD_TEXTUAL_LITERAL@24..29 " and " [] [] + 3: MD_INLINE_EMPHASIS@29..37 + 0: DOUBLE_STAR@29..31 "**" [] [] + 1: MD_INLINE_ITEM_LIST@31..35 + 0: MD_TEXTUAL@31..35 + 0: MD_TEXTUAL_LITERAL@31..35 "bold" [] [] + 2: DOUBLE_STAR@35..37 "**" [] [] + 4: MD_TEXTUAL@37..38 + 0: MD_TEXTUAL_LITERAL@37..38 "\n" [] [] + 1: (empty) + 1: MD_NEWLINE@38..39 + 0: NEWLINE@38..39 "\n" [] [] + 2: MD_PARAGRAPH@39..79 + 0: MD_INLINE_ITEM_LIST@39..79 + 0: MD_TEXTUAL@39..57 + 0: MD_TEXTUAL_LITERAL@39..57 "Valid underscore: " [] [] + 1: MD_INLINE_ITALIC@57..65 + 0: UNDERSCORE@57..58 "_" [] [] + 1: MD_INLINE_ITEM_LIST@58..64 + 0: MD_TEXTUAL@58..64 + 0: MD_TEXTUAL_LITERAL@58..64 "italic" [] [] + 2: UNDERSCORE@64..65 "_" [] [] + 2: MD_TEXTUAL@65..70 + 0: MD_TEXTUAL_LITERAL@65..70 " and " [] [] + 3: MD_INLINE_EMPHASIS@70..78 + 0: DOUBLE_UNDERSCORE@70..72 "__" [] [] + 1: MD_INLINE_ITEM_LIST@72..76 + 0: MD_TEXTUAL@72..76 + 0: MD_TEXTUAL_LITERAL@72..76 "bold" [] [] + 2: DOUBLE_UNDERSCORE@76..78 "__" [] [] + 4: MD_TEXTUAL@78..79 + 0: MD_TEXTUAL_LITERAL@78..79 "\n" [] [] + 1: (empty) + 3: MD_NEWLINE@79..80 + 0: NEWLINE@79..80 "\n" [] [] + 4: MD_PARAGRAPH@80..136 + 0: MD_INLINE_ITEM_LIST@80..136 + 0: MD_TEXTUAL@80..106 + 0: MD_TEXTUAL_LITERAL@80..106 "Asterisk after space: foo " [] [] + 1: MD_TEXTUAL@106..107 + 0: MD_TEXTUAL_LITERAL@106..107 "*" [] [] + 2: MD_TEXTUAL@107..111 + 0: MD_TEXTUAL_LITERAL@107..111 " bar" [] [] + 3: MD_TEXTUAL@111..112 + 0: MD_TEXTUAL_LITERAL@111..112 "*" [] [] + 4: MD_TEXTUAL@112..135 + 0: MD_TEXTUAL_LITERAL@112..135 " should not be emphasis" [] [] + 5: MD_TEXTUAL@135..136 + 0: MD_TEXTUAL_LITERAL@135..136 "\n" [] [] + 1: (empty) + 5: MD_NEWLINE@136..137 + 0: NEWLINE@136..137 "\n" [] [] + 6: MD_PARAGRAPH@137..186 + 0: MD_INLINE_ITEM_LIST@137..186 + 0: MD_TEXTUAL@137..162 + 0: MD_TEXTUAL_LITERAL@137..162 "Underscore intraword: foo" [] [] + 1: MD_TEXTUAL@162..163 + 0: MD_TEXTUAL_LITERAL@162..163 "_" [] [] + 2: MD_TEXTUAL@163..166 + 0: MD_TEXTUAL_LITERAL@163..166 "bar" [] [] + 3: MD_TEXTUAL@166..167 + 0: MD_TEXTUAL_LITERAL@166..167 "_" [] [] + 4: MD_TEXTUAL@167..171 + 0: MD_TEXTUAL_LITERAL@167..171 "baz " [] [] + 5: MD_TEXTUAL@171..172 + 0: MD_TEXTUAL_LITERAL@171..172 "(" [] [] + 6: MD_TEXTUAL@172..184 + 0: MD_TEXTUAL_LITERAL@172..184 "not emphasis" [] [] + 7: MD_TEXTUAL@184..185 + 0: MD_TEXTUAL_LITERAL@184..185 ")" [] [] + 8: MD_TEXTUAL@185..186 + 0: MD_TEXTUAL_LITERAL@185..186 "\n" [] [] + 1: (empty) + 7: MD_NEWLINE@186..187 + 0: NEWLINE@186..187 "\n" [] [] + 8: MD_PARAGRAPH@187..229 + 0: MD_INLINE_ITEM_LIST@187..229 + 0: MD_TEXTUAL@187..222 + 0: MD_TEXTUAL_LITERAL@187..222 "Valid underscore at word boundary: " [] [] + 1: MD_INLINE_ITALIC@222..228 + 0: UNDERSCORE@222..223 "_" [] [] + 1: MD_INLINE_ITEM_LIST@223..227 + 0: MD_TEXTUAL@223..227 + 0: MD_TEXTUAL_LITERAL@223..227 "word" [] [] + 2: UNDERSCORE@227..228 "_" [] [] + 2: MD_TEXTUAL@228..229 + 0: MD_TEXTUAL_LITERAL@228..229 "\n" [] [] + 1: (empty) + 9: MD_NEWLINE@229..230 + 0: NEWLINE@229..230 "\n" [] [] + 10: MD_PARAGRAPH@230..251 + 0: MD_INLINE_ITEM_LIST@230..251 + 0: MD_TEXTUAL@230..241 + 0: MD_TEXTUAL_LITERAL@230..241 "Mixed: foo " [] [] + 1: MD_INLINE_ITALIC@241..246 + 0: STAR@241..242 "*" [] [] + 1: MD_INLINE_ITEM_LIST@242..245 + 0: MD_TEXTUAL@242..245 + 0: MD_TEXTUAL_LITERAL@242..245 "bar" [] [] + 2: STAR@245..246 "*" [] [] + 2: MD_TEXTUAL@246..250 + 0: MD_TEXTUAL_LITERAL@246..250 " baz" [] [] + 3: MD_TEXTUAL@250..251 + 0: MD_TEXTUAL_LITERAL@250..251 "\n" [] [] + 1: (empty) + 2: EOF@251..251 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_link_text.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_link_text.md new file mode 100644 index 000000000000..38ee5393aedf --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_link_text.md @@ -0,0 +1 @@ +Link with emphasis: [a *b* c](u) diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_link_text.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_link_text.md.snap new file mode 100644 index 000000000000..1223bb339adc --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/emphasis_link_text.md.snap @@ -0,0 +1,99 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +Link with emphasis: [a *b* c](u) + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@0..20 "Link with emphasis: " [] [], + }, + MdInlineLink { + l_brack_token: L_BRACK@20..21 "[" [] [], + text: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@21..23 "a " [] [], + }, + MdInlineItalic { + l_fence: STAR@23..24 "*" [] [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@24..25 "b" [] [], + }, + ], + r_fence: STAR@25..26 "*" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@26..28 " c" [] [], + }, + ], + r_brack_token: R_BRACK@28..29 "]" [] [], + l_paren_token: L_PAREN@29..30 "(" [] [], + destination: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@30..31 "u" [] [], + }, + ], + title: missing (optional), + r_paren_token: R_PAREN@31..32 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@32..33 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + eof_token: EOF@33..33 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..33 + 0: (empty) + 1: MD_BLOCK_LIST@0..33 + 0: MD_PARAGRAPH@0..33 + 0: MD_INLINE_ITEM_LIST@0..33 + 0: MD_TEXTUAL@0..20 + 0: MD_TEXTUAL_LITERAL@0..20 "Link with emphasis: " [] [] + 1: MD_INLINE_LINK@20..32 + 0: L_BRACK@20..21 "[" [] [] + 1: MD_INLINE_ITEM_LIST@21..28 + 0: MD_TEXTUAL@21..23 + 0: MD_TEXTUAL_LITERAL@21..23 "a " [] [] + 1: MD_INLINE_ITALIC@23..26 + 0: STAR@23..24 "*" [] [] + 1: MD_INLINE_ITEM_LIST@24..25 + 0: MD_TEXTUAL@24..25 + 0: MD_TEXTUAL_LITERAL@24..25 "b" [] [] + 2: STAR@25..26 "*" [] [] + 2: MD_TEXTUAL@26..28 + 0: MD_TEXTUAL_LITERAL@26..28 " c" [] [] + 2: R_BRACK@28..29 "]" [] [] + 3: L_PAREN@29..30 "(" [] [] + 4: MD_INLINE_ITEM_LIST@30..31 + 0: MD_TEXTUAL@30..31 + 0: MD_TEXTUAL_LITERAL@30..31 "u" [] [] + 5: (empty) + 6: R_PAREN@31..32 ")" [] [] + 2: MD_TEXTUAL@32..33 + 0: MD_TEXTUAL_LITERAL@32..33 "\n" [] [] + 1: (empty) + 2: EOF@33..33 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/entity_references.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/entity_references.md new file mode 100644 index 000000000000..86ae24156e52 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/entity_references.md @@ -0,0 +1,11 @@ +Named entities: & ©   + +Decimal entities: { A + +Hex entities:  / + +Mixed: Use <div> for HTML. + +Invalid (remain as text): ¬anentity_with_underscore; � &#xGGGG; &; &# & foo; + +Edge cases: &a; (too short) &abcdefghijklmnopqrstuvwxyz01234567890; (too long) diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/entity_references.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/entity_references.md.snap new file mode 100644 index 000000000000..8ed58e7e2785 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/entity_references.md.snap @@ -0,0 +1,400 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +Named entities: & ©   + +Decimal entities: { A + +Hex entities:  / + +Mixed: Use <div> for HTML. + +Invalid (remain as text): ¬anentity_with_underscore; � &#xGGGG; &; &# & foo; + +Edge cases: &a; (too short) &abcdefghijklmnopqrstuvwxyz01234567890; (too long) + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@0..16 "Named entities: " [] [], + }, + MdEntityReference { + value_token: MD_ENTITY_LITERAL@16..21 "&" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@21..22 " " [] [], + }, + MdEntityReference { + value_token: MD_ENTITY_LITERAL@22..28 "©" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@28..29 " " [] [], + }, + MdEntityReference { + value_token: MD_ENTITY_LITERAL@29..35 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@35..36 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@36..37 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@37..55 "Decimal entities: " [] [], + }, + MdEntityReference { + value_token: MD_ENTITY_LITERAL@55..61 "{" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@61..62 " " [] [], + }, + MdEntityReference { + value_token: MD_ENTITY_LITERAL@62..67 "A" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@67..68 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@68..69 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@69..83 "Hex entities: " [] [], + }, + MdEntityReference { + value_token: MD_ENTITY_LITERAL@83..89 "" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@89..90 " " [] [], + }, + MdEntityReference { + value_token: MD_ENTITY_LITERAL@90..96 "/" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@96..97 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@97..98 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@98..109 "Mixed: Use " [] [], + }, + MdEntityReference { + value_token: MD_ENTITY_LITERAL@109..113 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@113..116 "div" [] [], + }, + MdEntityReference { + value_token: MD_ENTITY_LITERAL@116..120 ">" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@120..130 " for HTML." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@130..131 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@131..132 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@132..140 "Invalid " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@140..141 "(" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@141..155 "remain as text" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@155..156 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@156..157 ":" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@157..158 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@158..170 "¬anentity" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@170..171 "_" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@171..175 "with" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@175..176 "_" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@176..188 "underscore; " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@188..189 "&" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@189..190 "#" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@190..200 "12345678; " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@200..201 "&" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@201..202 "#" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@202..209 "xGGGG; " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@209..212 "&; " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@212..213 "&" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@213..214 "#" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@214..215 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@215..221 "& foo;" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@221..222 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@222..223 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@223..235 "Edge cases: " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@235..239 "&a; " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@239..240 "(" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@240..249 "too short" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@249..250 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@250..251 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@251..291 "&abcdefghijklmnopqrstuvwxyz01234567890; " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@291..292 "(" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@292..300 "too long" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@300..301 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@301..302 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + eof_token: EOF@302..302 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..302 + 0: (empty) + 1: MD_BLOCK_LIST@0..302 + 0: MD_PARAGRAPH@0..36 + 0: MD_INLINE_ITEM_LIST@0..36 + 0: MD_TEXTUAL@0..16 + 0: MD_TEXTUAL_LITERAL@0..16 "Named entities: " [] [] + 1: MD_ENTITY_REFERENCE@16..21 + 0: MD_ENTITY_LITERAL@16..21 "&" [] [] + 2: MD_TEXTUAL@21..22 + 0: MD_TEXTUAL_LITERAL@21..22 " " [] [] + 3: MD_ENTITY_REFERENCE@22..28 + 0: MD_ENTITY_LITERAL@22..28 "©" [] [] + 4: MD_TEXTUAL@28..29 + 0: MD_TEXTUAL_LITERAL@28..29 " " [] [] + 5: MD_ENTITY_REFERENCE@29..35 + 0: MD_ENTITY_LITERAL@29..35 " " [] [] + 6: MD_TEXTUAL@35..36 + 0: MD_TEXTUAL_LITERAL@35..36 "\n" [] [] + 1: (empty) + 1: MD_NEWLINE@36..37 + 0: NEWLINE@36..37 "\n" [] [] + 2: MD_PARAGRAPH@37..68 + 0: MD_INLINE_ITEM_LIST@37..68 + 0: MD_TEXTUAL@37..55 + 0: MD_TEXTUAL_LITERAL@37..55 "Decimal entities: " [] [] + 1: MD_ENTITY_REFERENCE@55..61 + 0: MD_ENTITY_LITERAL@55..61 "{" [] [] + 2: MD_TEXTUAL@61..62 + 0: MD_TEXTUAL_LITERAL@61..62 " " [] [] + 3: MD_ENTITY_REFERENCE@62..67 + 0: MD_ENTITY_LITERAL@62..67 "A" [] [] + 4: MD_TEXTUAL@67..68 + 0: MD_TEXTUAL_LITERAL@67..68 "\n" [] [] + 1: (empty) + 3: MD_NEWLINE@68..69 + 0: NEWLINE@68..69 "\n" [] [] + 4: MD_PARAGRAPH@69..97 + 0: MD_INLINE_ITEM_LIST@69..97 + 0: MD_TEXTUAL@69..83 + 0: MD_TEXTUAL_LITERAL@69..83 "Hex entities: " [] [] + 1: MD_ENTITY_REFERENCE@83..89 + 0: MD_ENTITY_LITERAL@83..89 "" [] [] + 2: MD_TEXTUAL@89..90 + 0: MD_TEXTUAL_LITERAL@89..90 " " [] [] + 3: MD_ENTITY_REFERENCE@90..96 + 0: MD_ENTITY_LITERAL@90..96 "/" [] [] + 4: MD_TEXTUAL@96..97 + 0: MD_TEXTUAL_LITERAL@96..97 "\n" [] [] + 1: (empty) + 5: MD_NEWLINE@97..98 + 0: NEWLINE@97..98 "\n" [] [] + 6: MD_PARAGRAPH@98..131 + 0: MD_INLINE_ITEM_LIST@98..131 + 0: MD_TEXTUAL@98..109 + 0: MD_TEXTUAL_LITERAL@98..109 "Mixed: Use " [] [] + 1: MD_ENTITY_REFERENCE@109..113 + 0: MD_ENTITY_LITERAL@109..113 "<" [] [] + 2: MD_TEXTUAL@113..116 + 0: MD_TEXTUAL_LITERAL@113..116 "div" [] [] + 3: MD_ENTITY_REFERENCE@116..120 + 0: MD_ENTITY_LITERAL@116..120 ">" [] [] + 4: MD_TEXTUAL@120..130 + 0: MD_TEXTUAL_LITERAL@120..130 " for HTML." [] [] + 5: MD_TEXTUAL@130..131 + 0: MD_TEXTUAL_LITERAL@130..131 "\n" [] [] + 1: (empty) + 7: MD_NEWLINE@131..132 + 0: NEWLINE@131..132 "\n" [] [] + 8: MD_PARAGRAPH@132..222 + 0: MD_INLINE_ITEM_LIST@132..222 + 0: MD_TEXTUAL@132..140 + 0: MD_TEXTUAL_LITERAL@132..140 "Invalid " [] [] + 1: MD_TEXTUAL@140..141 + 0: MD_TEXTUAL_LITERAL@140..141 "(" [] [] + 2: MD_TEXTUAL@141..155 + 0: MD_TEXTUAL_LITERAL@141..155 "remain as text" [] [] + 3: MD_TEXTUAL@155..156 + 0: MD_TEXTUAL_LITERAL@155..156 ")" [] [] + 4: MD_TEXTUAL@156..157 + 0: MD_TEXTUAL_LITERAL@156..157 ":" [] [] + 5: MD_TEXTUAL@157..158 + 0: MD_TEXTUAL_LITERAL@157..158 " " [] [] + 6: MD_TEXTUAL@158..170 + 0: MD_TEXTUAL_LITERAL@158..170 "¬anentity" [] [] + 7: MD_TEXTUAL@170..171 + 0: MD_TEXTUAL_LITERAL@170..171 "_" [] [] + 8: MD_TEXTUAL@171..175 + 0: MD_TEXTUAL_LITERAL@171..175 "with" [] [] + 9: MD_TEXTUAL@175..176 + 0: MD_TEXTUAL_LITERAL@175..176 "_" [] [] + 10: MD_TEXTUAL@176..188 + 0: MD_TEXTUAL_LITERAL@176..188 "underscore; " [] [] + 11: MD_TEXTUAL@188..189 + 0: MD_TEXTUAL_LITERAL@188..189 "&" [] [] + 12: MD_TEXTUAL@189..190 + 0: MD_TEXTUAL_LITERAL@189..190 "#" [] [] + 13: MD_TEXTUAL@190..200 + 0: MD_TEXTUAL_LITERAL@190..200 "12345678; " [] [] + 14: MD_TEXTUAL@200..201 + 0: MD_TEXTUAL_LITERAL@200..201 "&" [] [] + 15: MD_TEXTUAL@201..202 + 0: MD_TEXTUAL_LITERAL@201..202 "#" [] [] + 16: MD_TEXTUAL@202..209 + 0: MD_TEXTUAL_LITERAL@202..209 "xGGGG; " [] [] + 17: MD_TEXTUAL@209..212 + 0: MD_TEXTUAL_LITERAL@209..212 "&; " [] [] + 18: MD_TEXTUAL@212..213 + 0: MD_TEXTUAL_LITERAL@212..213 "&" [] [] + 19: MD_TEXTUAL@213..214 + 0: MD_TEXTUAL_LITERAL@213..214 "#" [] [] + 20: MD_TEXTUAL@214..215 + 0: MD_TEXTUAL_LITERAL@214..215 " " [] [] + 21: MD_TEXTUAL@215..221 + 0: MD_TEXTUAL_LITERAL@215..221 "& foo;" [] [] + 22: MD_TEXTUAL@221..222 + 0: MD_TEXTUAL_LITERAL@221..222 "\n" [] [] + 1: (empty) + 9: MD_NEWLINE@222..223 + 0: NEWLINE@222..223 "\n" [] [] + 10: MD_PARAGRAPH@223..302 + 0: MD_INLINE_ITEM_LIST@223..302 + 0: MD_TEXTUAL@223..235 + 0: MD_TEXTUAL_LITERAL@223..235 "Edge cases: " [] [] + 1: MD_TEXTUAL@235..239 + 0: MD_TEXTUAL_LITERAL@235..239 "&a; " [] [] + 2: MD_TEXTUAL@239..240 + 0: MD_TEXTUAL_LITERAL@239..240 "(" [] [] + 3: MD_TEXTUAL@240..249 + 0: MD_TEXTUAL_LITERAL@240..249 "too short" [] [] + 4: MD_TEXTUAL@249..250 + 0: MD_TEXTUAL_LITERAL@249..250 ")" [] [] + 5: MD_TEXTUAL@250..251 + 0: MD_TEXTUAL_LITERAL@250..251 " " [] [] + 6: MD_TEXTUAL@251..291 + 0: MD_TEXTUAL_LITERAL@251..291 "&abcdefghijklmnopqrstuvwxyz01234567890; " [] [] + 7: MD_TEXTUAL@291..292 + 0: MD_TEXTUAL_LITERAL@291..292 "(" [] [] + 8: MD_TEXTUAL@292..300 + 0: MD_TEXTUAL_LITERAL@292..300 "too long" [] [] + 9: MD_TEXTUAL@300..301 + 0: MD_TEXTUAL_LITERAL@300..301 ")" [] [] + 10: MD_TEXTUAL@301..302 + 0: MD_TEXTUAL_LITERAL@301..302 "\n" [] [] + 1: (empty) + 2: EOF@302..302 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/fenced_code_advanced.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/fenced_code_advanced.md new file mode 100644 index 000000000000..29029ab58d49 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/fenced_code_advanced.md @@ -0,0 +1,38 @@ +Basic (3 backticks): +``` +code +``` + +Longer fence (5 backticks): +````` +code with ``` inside +````` + +Tildes: +~~~ +code +~~~ + +Mixed (should not close): +``` +code +~~~ +still code +``` + +Indented closing (valid): +``` +code + ``` + +Short closing (invalid - treated as content): +```` +code +``` +still code +```` + +Indented opening (stripped from content): + ``` + code line + ``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/fenced_code_advanced.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/fenced_code_advanced.md.snap new file mode 100644 index 000000000000..a1ea76038749 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/fenced_code_advanced.md.snap @@ -0,0 +1,630 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +assertion_line: 131 +expression: snapshot +--- +## Input + +``` +Basic (3 backticks): +``` +code +``` + +Longer fence (5 backticks): +````` +code with ``` inside +````` + +Tildes: +~~~ +code +~~~ + +Mixed (should not close): +``` +code +~~~ +still code +``` + +Indented closing (valid): +``` +code + ``` + +Short closing (invalid - treated as content): +```` +code +``` +still code +```` + +Indented opening (stripped from content): + ``` + code line + ``` + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@0..6 "Basic " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@6..7 "(" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@7..18 "3 backticks" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@18..19 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@19..20 ":" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@20..21 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdFencedCodeBlock { + l_fence: TRIPLE_BACKTICK@21..24 "```" [] [], + code_list: MdCodeNameList [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@24..25 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@25..29 "code" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@29..30 "\n" [] [], + }, + ], + r_fence: TRIPLE_BACKTICK@30..33 "```" [] [], + }, + MdNewline { + value_token: NEWLINE@33..34 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@34..35 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@35..48 "Longer fence " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@48..49 "(" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@49..60 "5 backticks" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@60..61 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@61..62 ":" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@62..63 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdFencedCodeBlock { + l_fence: TRIPLE_BACKTICK@63..68 "`````" [] [], + code_list: MdCodeNameList [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@68..69 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@69..79 "code with " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@79..82 "```" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@82..89 " inside" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@89..90 "\n" [] [], + }, + ], + r_fence: TRIPLE_BACKTICK@90..95 "`````" [] [], + }, + MdNewline { + value_token: NEWLINE@95..96 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@96..97 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@97..104 "Tildes:" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@104..105 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdFencedCodeBlock { + l_fence: TRIPLE_TILDE@105..108 "~~~" [] [], + code_list: MdCodeNameList [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@108..109 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@109..113 "code" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@113..114 "\n" [] [], + }, + ], + r_fence: TRIPLE_TILDE@114..117 "~~~" [] [], + }, + MdNewline { + value_token: NEWLINE@117..118 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@118..119 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@119..125 "Mixed " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@125..126 "(" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@126..142 "should not close" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@142..143 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@143..144 ":" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@144..145 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdFencedCodeBlock { + l_fence: TRIPLE_BACKTICK@145..148 "```" [] [], + code_list: MdCodeNameList [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@148..149 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@149..153 "code" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@153..154 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@154..157 "~~~" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@157..158 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@158..168 "still code" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@168..169 "\n" [] [], + }, + ], + r_fence: TRIPLE_BACKTICK@169..172 "```" [] [], + }, + MdNewline { + value_token: NEWLINE@172..173 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@173..174 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@174..191 "Indented closing " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@191..192 "(" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@192..197 "valid" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@197..198 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@198..199 ":" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@199..200 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdFencedCodeBlock { + l_fence: TRIPLE_BACKTICK@200..203 "```" [] [], + code_list: MdCodeNameList [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@203..204 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@204..208 "code" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@208..209 "\n" [] [], + }, + ], + r_fence: TRIPLE_BACKTICK@209..215 "```" [Skipped(" "), Skipped(" "), Skipped(" ")] [], + }, + MdNewline { + value_token: NEWLINE@215..216 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@216..217 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@217..231 "Short closing " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@231..232 "(" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@232..240 "invalid " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@240..241 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@241..260 " treated as content" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@260..261 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@261..262 ":" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@262..263 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdFencedCodeBlock { + l_fence: TRIPLE_BACKTICK@263..267 "````" [] [], + code_list: MdCodeNameList [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@267..268 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@268..272 "code" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@272..273 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@273..276 "```" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@276..277 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@277..287 "still code" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@287..288 "\n" [] [], + }, + ], + r_fence: TRIPLE_BACKTICK@288..292 "````" [] [], + }, + MdNewline { + value_token: NEWLINE@292..293 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@293..294 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@294..311 "Indented opening " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@311..312 "(" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@312..333 "stripped from content" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@333..334 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@334..335 ":" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@335..336 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdFencedCodeBlock { + l_fence: TRIPLE_BACKTICK@336..341 "```" [Skipped(" "), Skipped(" ")] [], + code_list: MdCodeNameList [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@341..342 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@342..353 "code line" [Skipped(" "), Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@353..354 "\n" [] [], + }, + ], + r_fence: TRIPLE_BACKTICK@354..359 "```" [Skipped(" "), Skipped(" ")] [], + }, + MdNewline { + value_token: NEWLINE@359..360 "\n" [] [], + }, + ], + eof_token: EOF@360..360 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..360 + 0: (empty) + 1: MD_BLOCK_LIST@0..360 + 0: MD_PARAGRAPH@0..21 + 0: MD_INLINE_ITEM_LIST@0..21 + 0: MD_TEXTUAL@0..6 + 0: MD_TEXTUAL_LITERAL@0..6 "Basic " [] [] + 1: MD_TEXTUAL@6..7 + 0: MD_TEXTUAL_LITERAL@6..7 "(" [] [] + 2: MD_TEXTUAL@7..18 + 0: MD_TEXTUAL_LITERAL@7..18 "3 backticks" [] [] + 3: MD_TEXTUAL@18..19 + 0: MD_TEXTUAL_LITERAL@18..19 ")" [] [] + 4: MD_TEXTUAL@19..20 + 0: MD_TEXTUAL_LITERAL@19..20 ":" [] [] + 5: MD_TEXTUAL@20..21 + 0: MD_TEXTUAL_LITERAL@20..21 "\n" [] [] + 1: (empty) + 1: MD_FENCED_CODE_BLOCK@21..33 + 0: TRIPLE_BACKTICK@21..24 "```" [] [] + 1: MD_CODE_NAME_LIST@24..24 + 2: MD_INLINE_ITEM_LIST@24..30 + 0: MD_TEXTUAL@24..25 + 0: MD_TEXTUAL_LITERAL@24..25 "\n" [] [] + 1: MD_TEXTUAL@25..29 + 0: MD_TEXTUAL_LITERAL@25..29 "code" [] [] + 2: MD_TEXTUAL@29..30 + 0: MD_TEXTUAL_LITERAL@29..30 "\n" [] [] + 3: TRIPLE_BACKTICK@30..33 "```" [] [] + 2: MD_NEWLINE@33..34 + 0: NEWLINE@33..34 "\n" [] [] + 3: MD_NEWLINE@34..35 + 0: NEWLINE@34..35 "\n" [] [] + 4: MD_PARAGRAPH@35..63 + 0: MD_INLINE_ITEM_LIST@35..63 + 0: MD_TEXTUAL@35..48 + 0: MD_TEXTUAL_LITERAL@35..48 "Longer fence " [] [] + 1: MD_TEXTUAL@48..49 + 0: MD_TEXTUAL_LITERAL@48..49 "(" [] [] + 2: MD_TEXTUAL@49..60 + 0: MD_TEXTUAL_LITERAL@49..60 "5 backticks" [] [] + 3: MD_TEXTUAL@60..61 + 0: MD_TEXTUAL_LITERAL@60..61 ")" [] [] + 4: MD_TEXTUAL@61..62 + 0: MD_TEXTUAL_LITERAL@61..62 ":" [] [] + 5: MD_TEXTUAL@62..63 + 0: MD_TEXTUAL_LITERAL@62..63 "\n" [] [] + 1: (empty) + 5: MD_FENCED_CODE_BLOCK@63..95 + 0: TRIPLE_BACKTICK@63..68 "`````" [] [] + 1: MD_CODE_NAME_LIST@68..68 + 2: MD_INLINE_ITEM_LIST@68..90 + 0: MD_TEXTUAL@68..69 + 0: MD_TEXTUAL_LITERAL@68..69 "\n" [] [] + 1: MD_TEXTUAL@69..79 + 0: MD_TEXTUAL_LITERAL@69..79 "code with " [] [] + 2: MD_TEXTUAL@79..82 + 0: MD_TEXTUAL_LITERAL@79..82 "```" [] [] + 3: MD_TEXTUAL@82..89 + 0: MD_TEXTUAL_LITERAL@82..89 " inside" [] [] + 4: MD_TEXTUAL@89..90 + 0: MD_TEXTUAL_LITERAL@89..90 "\n" [] [] + 3: TRIPLE_BACKTICK@90..95 "`````" [] [] + 6: MD_NEWLINE@95..96 + 0: NEWLINE@95..96 "\n" [] [] + 7: MD_NEWLINE@96..97 + 0: NEWLINE@96..97 "\n" [] [] + 8: MD_PARAGRAPH@97..105 + 0: MD_INLINE_ITEM_LIST@97..105 + 0: MD_TEXTUAL@97..104 + 0: MD_TEXTUAL_LITERAL@97..104 "Tildes:" [] [] + 1: MD_TEXTUAL@104..105 + 0: MD_TEXTUAL_LITERAL@104..105 "\n" [] [] + 1: (empty) + 9: MD_FENCED_CODE_BLOCK@105..117 + 0: TRIPLE_TILDE@105..108 "~~~" [] [] + 1: MD_CODE_NAME_LIST@108..108 + 2: MD_INLINE_ITEM_LIST@108..114 + 0: MD_TEXTUAL@108..109 + 0: MD_TEXTUAL_LITERAL@108..109 "\n" [] [] + 1: MD_TEXTUAL@109..113 + 0: MD_TEXTUAL_LITERAL@109..113 "code" [] [] + 2: MD_TEXTUAL@113..114 + 0: MD_TEXTUAL_LITERAL@113..114 "\n" [] [] + 3: TRIPLE_TILDE@114..117 "~~~" [] [] + 10: MD_NEWLINE@117..118 + 0: NEWLINE@117..118 "\n" [] [] + 11: MD_NEWLINE@118..119 + 0: NEWLINE@118..119 "\n" [] [] + 12: MD_PARAGRAPH@119..145 + 0: MD_INLINE_ITEM_LIST@119..145 + 0: MD_TEXTUAL@119..125 + 0: MD_TEXTUAL_LITERAL@119..125 "Mixed " [] [] + 1: MD_TEXTUAL@125..126 + 0: MD_TEXTUAL_LITERAL@125..126 "(" [] [] + 2: MD_TEXTUAL@126..142 + 0: MD_TEXTUAL_LITERAL@126..142 "should not close" [] [] + 3: MD_TEXTUAL@142..143 + 0: MD_TEXTUAL_LITERAL@142..143 ")" [] [] + 4: MD_TEXTUAL@143..144 + 0: MD_TEXTUAL_LITERAL@143..144 ":" [] [] + 5: MD_TEXTUAL@144..145 + 0: MD_TEXTUAL_LITERAL@144..145 "\n" [] [] + 1: (empty) + 13: MD_FENCED_CODE_BLOCK@145..172 + 0: TRIPLE_BACKTICK@145..148 "```" [] [] + 1: MD_CODE_NAME_LIST@148..148 + 2: MD_INLINE_ITEM_LIST@148..169 + 0: MD_TEXTUAL@148..149 + 0: MD_TEXTUAL_LITERAL@148..149 "\n" [] [] + 1: MD_TEXTUAL@149..153 + 0: MD_TEXTUAL_LITERAL@149..153 "code" [] [] + 2: MD_TEXTUAL@153..154 + 0: MD_TEXTUAL_LITERAL@153..154 "\n" [] [] + 3: MD_TEXTUAL@154..157 + 0: MD_TEXTUAL_LITERAL@154..157 "~~~" [] [] + 4: MD_TEXTUAL@157..158 + 0: MD_TEXTUAL_LITERAL@157..158 "\n" [] [] + 5: MD_TEXTUAL@158..168 + 0: MD_TEXTUAL_LITERAL@158..168 "still code" [] [] + 6: MD_TEXTUAL@168..169 + 0: MD_TEXTUAL_LITERAL@168..169 "\n" [] [] + 3: TRIPLE_BACKTICK@169..172 "```" [] [] + 14: MD_NEWLINE@172..173 + 0: NEWLINE@172..173 "\n" [] [] + 15: MD_NEWLINE@173..174 + 0: NEWLINE@173..174 "\n" [] [] + 16: MD_PARAGRAPH@174..200 + 0: MD_INLINE_ITEM_LIST@174..200 + 0: MD_TEXTUAL@174..191 + 0: MD_TEXTUAL_LITERAL@174..191 "Indented closing " [] [] + 1: MD_TEXTUAL@191..192 + 0: MD_TEXTUAL_LITERAL@191..192 "(" [] [] + 2: MD_TEXTUAL@192..197 + 0: MD_TEXTUAL_LITERAL@192..197 "valid" [] [] + 3: MD_TEXTUAL@197..198 + 0: MD_TEXTUAL_LITERAL@197..198 ")" [] [] + 4: MD_TEXTUAL@198..199 + 0: MD_TEXTUAL_LITERAL@198..199 ":" [] [] + 5: MD_TEXTUAL@199..200 + 0: MD_TEXTUAL_LITERAL@199..200 "\n" [] [] + 1: (empty) + 17: MD_FENCED_CODE_BLOCK@200..215 + 0: TRIPLE_BACKTICK@200..203 "```" [] [] + 1: MD_CODE_NAME_LIST@203..203 + 2: MD_INLINE_ITEM_LIST@203..209 + 0: MD_TEXTUAL@203..204 + 0: MD_TEXTUAL_LITERAL@203..204 "\n" [] [] + 1: MD_TEXTUAL@204..208 + 0: MD_TEXTUAL_LITERAL@204..208 "code" [] [] + 2: MD_TEXTUAL@208..209 + 0: MD_TEXTUAL_LITERAL@208..209 "\n" [] [] + 3: TRIPLE_BACKTICK@209..215 "```" [Skipped(" "), Skipped(" "), Skipped(" ")] [] + 18: MD_NEWLINE@215..216 + 0: NEWLINE@215..216 "\n" [] [] + 19: MD_NEWLINE@216..217 + 0: NEWLINE@216..217 "\n" [] [] + 20: MD_PARAGRAPH@217..263 + 0: MD_INLINE_ITEM_LIST@217..263 + 0: MD_TEXTUAL@217..231 + 0: MD_TEXTUAL_LITERAL@217..231 "Short closing " [] [] + 1: MD_TEXTUAL@231..232 + 0: MD_TEXTUAL_LITERAL@231..232 "(" [] [] + 2: MD_TEXTUAL@232..240 + 0: MD_TEXTUAL_LITERAL@232..240 "invalid " [] [] + 3: MD_TEXTUAL@240..241 + 0: MD_TEXTUAL_LITERAL@240..241 "-" [] [] + 4: MD_TEXTUAL@241..260 + 0: MD_TEXTUAL_LITERAL@241..260 " treated as content" [] [] + 5: MD_TEXTUAL@260..261 + 0: MD_TEXTUAL_LITERAL@260..261 ")" [] [] + 6: MD_TEXTUAL@261..262 + 0: MD_TEXTUAL_LITERAL@261..262 ":" [] [] + 7: MD_TEXTUAL@262..263 + 0: MD_TEXTUAL_LITERAL@262..263 "\n" [] [] + 1: (empty) + 21: MD_FENCED_CODE_BLOCK@263..292 + 0: TRIPLE_BACKTICK@263..267 "````" [] [] + 1: MD_CODE_NAME_LIST@267..267 + 2: MD_INLINE_ITEM_LIST@267..288 + 0: MD_TEXTUAL@267..268 + 0: MD_TEXTUAL_LITERAL@267..268 "\n" [] [] + 1: MD_TEXTUAL@268..272 + 0: MD_TEXTUAL_LITERAL@268..272 "code" [] [] + 2: MD_TEXTUAL@272..273 + 0: MD_TEXTUAL_LITERAL@272..273 "\n" [] [] + 3: MD_TEXTUAL@273..276 + 0: MD_TEXTUAL_LITERAL@273..276 "```" [] [] + 4: MD_TEXTUAL@276..277 + 0: MD_TEXTUAL_LITERAL@276..277 "\n" [] [] + 5: MD_TEXTUAL@277..287 + 0: MD_TEXTUAL_LITERAL@277..287 "still code" [] [] + 6: MD_TEXTUAL@287..288 + 0: MD_TEXTUAL_LITERAL@287..288 "\n" [] [] + 3: TRIPLE_BACKTICK@288..292 "````" [] [] + 22: MD_NEWLINE@292..293 + 0: NEWLINE@292..293 "\n" [] [] + 23: MD_NEWLINE@293..294 + 0: NEWLINE@293..294 "\n" [] [] + 24: MD_PARAGRAPH@294..336 + 0: MD_INLINE_ITEM_LIST@294..336 + 0: MD_TEXTUAL@294..311 + 0: MD_TEXTUAL_LITERAL@294..311 "Indented opening " [] [] + 1: MD_TEXTUAL@311..312 + 0: MD_TEXTUAL_LITERAL@311..312 "(" [] [] + 2: MD_TEXTUAL@312..333 + 0: MD_TEXTUAL_LITERAL@312..333 "stripped from content" [] [] + 3: MD_TEXTUAL@333..334 + 0: MD_TEXTUAL_LITERAL@333..334 ")" [] [] + 4: MD_TEXTUAL@334..335 + 0: MD_TEXTUAL_LITERAL@334..335 ":" [] [] + 5: MD_TEXTUAL@335..336 + 0: MD_TEXTUAL_LITERAL@335..336 "\n" [] [] + 1: (empty) + 25: MD_FENCED_CODE_BLOCK@336..359 + 0: TRIPLE_BACKTICK@336..341 "```" [Skipped(" "), Skipped(" ")] [] + 1: MD_CODE_NAME_LIST@341..341 + 2: MD_INLINE_ITEM_LIST@341..354 + 0: MD_TEXTUAL@341..342 + 0: MD_TEXTUAL_LITERAL@341..342 "\n" [] [] + 1: MD_TEXTUAL@342..353 + 0: MD_TEXTUAL_LITERAL@342..353 "code line" [Skipped(" "), Skipped(" ")] [] + 2: MD_TEXTUAL@353..354 + 0: MD_TEXTUAL_LITERAL@353..354 "\n" [] [] + 3: TRIPLE_BACKTICK@354..359 "```" [Skipped(" "), Skipped(" ")] [] + 26: MD_NEWLINE@359..360 + 0: NEWLINE@359..360 "\n" [] [] + 2: EOF@360..360 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/fenced_code_block.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/fenced_code_block.md new file mode 100644 index 000000000000..bf110cc260d0 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/fenced_code_block.md @@ -0,0 +1,15 @@ +```javascript +const x = 1; +``` + +``` +No language +``` + +~~~python +x = 1 +~~~ + +~~~ +Tilde no language +~~~ diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/fenced_code_block.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/fenced_code_block.md.snap new file mode 100644 index 000000000000..b302dabf8f41 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/fenced_code_block.md.snap @@ -0,0 +1,201 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +```javascript +const x = 1; +``` + +``` +No language +``` + +~~~python +x = 1 +~~~ + +~~~ +Tilde no language +~~~ + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdFencedCodeBlock { + l_fence: TRIPLE_BACKTICK@0..3 "```" [] [], + code_list: MdCodeNameList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@3..13 "javascript" [] [], + }, + ], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@13..14 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@14..26 "const x = 1;" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@26..27 "\n" [] [], + }, + ], + r_fence: TRIPLE_BACKTICK@27..30 "```" [] [], + }, + MdNewline { + value_token: NEWLINE@30..31 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@31..32 "\n" [] [], + }, + MdFencedCodeBlock { + l_fence: TRIPLE_BACKTICK@32..35 "```" [] [], + code_list: MdCodeNameList [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@35..36 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@36..47 "No language" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@47..48 "\n" [] [], + }, + ], + r_fence: TRIPLE_BACKTICK@48..51 "```" [] [], + }, + MdNewline { + value_token: NEWLINE@51..52 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@52..53 "\n" [] [], + }, + MdFencedCodeBlock { + l_fence: TRIPLE_TILDE@53..56 "~~~" [] [], + code_list: MdCodeNameList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@56..62 "python" [] [], + }, + ], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@62..63 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@63..68 "x = 1" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@68..69 "\n" [] [], + }, + ], + r_fence: TRIPLE_TILDE@69..72 "~~~" [] [], + }, + MdNewline { + value_token: NEWLINE@72..73 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@73..74 "\n" [] [], + }, + MdFencedCodeBlock { + l_fence: TRIPLE_TILDE@74..77 "~~~" [] [], + code_list: MdCodeNameList [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@77..78 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@78..95 "Tilde no language" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@95..96 "\n" [] [], + }, + ], + r_fence: TRIPLE_TILDE@96..99 "~~~" [] [], + }, + MdNewline { + value_token: NEWLINE@99..100 "\n" [] [], + }, + ], + eof_token: EOF@100..100 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..100 + 0: (empty) + 1: MD_BLOCK_LIST@0..100 + 0: MD_FENCED_CODE_BLOCK@0..30 + 0: TRIPLE_BACKTICK@0..3 "```" [] [] + 1: MD_CODE_NAME_LIST@3..13 + 0: MD_TEXTUAL@3..13 + 0: MD_TEXTUAL_LITERAL@3..13 "javascript" [] [] + 2: MD_INLINE_ITEM_LIST@13..27 + 0: MD_TEXTUAL@13..14 + 0: MD_TEXTUAL_LITERAL@13..14 "\n" [] [] + 1: MD_TEXTUAL@14..26 + 0: MD_TEXTUAL_LITERAL@14..26 "const x = 1;" [] [] + 2: MD_TEXTUAL@26..27 + 0: MD_TEXTUAL_LITERAL@26..27 "\n" [] [] + 3: TRIPLE_BACKTICK@27..30 "```" [] [] + 1: MD_NEWLINE@30..31 + 0: NEWLINE@30..31 "\n" [] [] + 2: MD_NEWLINE@31..32 + 0: NEWLINE@31..32 "\n" [] [] + 3: MD_FENCED_CODE_BLOCK@32..51 + 0: TRIPLE_BACKTICK@32..35 "```" [] [] + 1: MD_CODE_NAME_LIST@35..35 + 2: MD_INLINE_ITEM_LIST@35..48 + 0: MD_TEXTUAL@35..36 + 0: MD_TEXTUAL_LITERAL@35..36 "\n" [] [] + 1: MD_TEXTUAL@36..47 + 0: MD_TEXTUAL_LITERAL@36..47 "No language" [] [] + 2: MD_TEXTUAL@47..48 + 0: MD_TEXTUAL_LITERAL@47..48 "\n" [] [] + 3: TRIPLE_BACKTICK@48..51 "```" [] [] + 4: MD_NEWLINE@51..52 + 0: NEWLINE@51..52 "\n" [] [] + 5: MD_NEWLINE@52..53 + 0: NEWLINE@52..53 "\n" [] [] + 6: MD_FENCED_CODE_BLOCK@53..72 + 0: TRIPLE_TILDE@53..56 "~~~" [] [] + 1: MD_CODE_NAME_LIST@56..62 + 0: MD_TEXTUAL@56..62 + 0: MD_TEXTUAL_LITERAL@56..62 "python" [] [] + 2: MD_INLINE_ITEM_LIST@62..69 + 0: MD_TEXTUAL@62..63 + 0: MD_TEXTUAL_LITERAL@62..63 "\n" [] [] + 1: MD_TEXTUAL@63..68 + 0: MD_TEXTUAL_LITERAL@63..68 "x = 1" [] [] + 2: MD_TEXTUAL@68..69 + 0: MD_TEXTUAL_LITERAL@68..69 "\n" [] [] + 3: TRIPLE_TILDE@69..72 "~~~" [] [] + 7: MD_NEWLINE@72..73 + 0: NEWLINE@72..73 "\n" [] [] + 8: MD_NEWLINE@73..74 + 0: NEWLINE@73..74 "\n" [] [] + 9: MD_FENCED_CODE_BLOCK@74..99 + 0: TRIPLE_TILDE@74..77 "~~~" [] [] + 1: MD_CODE_NAME_LIST@77..77 + 2: MD_INLINE_ITEM_LIST@77..96 + 0: MD_TEXTUAL@77..78 + 0: MD_TEXTUAL_LITERAL@77..78 "\n" [] [] + 1: MD_TEXTUAL@78..95 + 0: MD_TEXTUAL_LITERAL@78..95 "Tilde no language" [] [] + 2: MD_TEXTUAL@95..96 + 0: MD_TEXTUAL_LITERAL@95..96 "\n" [] [] + 3: TRIPLE_TILDE@96..99 "~~~" [] [] + 10: MD_NEWLINE@99..100 + 0: NEWLINE@99..100 "\n" [] [] + 2: EOF@100..100 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/fenced_code_indentation.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/fenced_code_indentation.md new file mode 100644 index 000000000000..7557babe62d0 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/fenced_code_indentation.md @@ -0,0 +1,3 @@ +``` + indented +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/fenced_code_indentation.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/fenced_code_indentation.md.snap new file mode 100644 index 000000000000..9329d2001438 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/fenced_code_indentation.md.snap @@ -0,0 +1,77 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +assertion_line: 131 +expression: snapshot +--- +## Input + +``` +``` + indented +``` + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdFencedCodeBlock { + l_fence: TRIPLE_BACKTICK@0..3 "```" [] [], + code_list: MdCodeNameList [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@3..4 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@4..5 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@5..6 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@6..14 "indented" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@14..15 "\n" [] [], + }, + ], + r_fence: TRIPLE_BACKTICK@15..18 "```" [] [], + }, + MdNewline { + value_token: NEWLINE@18..19 "\n" [] [], + }, + ], + eof_token: EOF@19..19 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..19 + 0: (empty) + 1: MD_BLOCK_LIST@0..19 + 0: MD_FENCED_CODE_BLOCK@0..18 + 0: TRIPLE_BACKTICK@0..3 "```" [] [] + 1: MD_CODE_NAME_LIST@3..3 + 2: MD_INLINE_ITEM_LIST@3..15 + 0: MD_TEXTUAL@3..4 + 0: MD_TEXTUAL_LITERAL@3..4 "\n" [] [] + 1: MD_TEXTUAL@4..5 + 0: MD_TEXTUAL_LITERAL@4..5 " " [] [] + 2: MD_TEXTUAL@5..6 + 0: MD_TEXTUAL_LITERAL@5..6 " " [] [] + 3: MD_TEXTUAL@6..14 + 0: MD_TEXTUAL_LITERAL@6..14 "indented" [] [] + 4: MD_TEXTUAL@14..15 + 0: MD_TEXTUAL_LITERAL@14..15 "\n" [] [] + 3: TRIPLE_BACKTICK@15..18 "```" [] [] + 1: MD_NEWLINE@18..19 + 0: NEWLINE@18..19 "\n" [] [] + 2: EOF@19..19 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/fenced_code_info_backtick.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/fenced_code_info_backtick.md new file mode 100644 index 000000000000..44fd1b333ee5 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/fenced_code_info_backtick.md @@ -0,0 +1,2 @@ +Backtick in info string should not open fence: +``` lang`uage` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/fenced_code_info_backtick.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/fenced_code_info_backtick.md.snap new file mode 100644 index 000000000000..6ac84d6fb081 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/fenced_code_info_backtick.md.snap @@ -0,0 +1,81 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +Backtick in info string should not open fence: +``` lang`uage` + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@0..46 "Backtick in info string should not open fence:" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@46..47 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@47..50 "```" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@50..55 " lang" [] [], + }, + MdInlineCode { + l_tick_token: BACKTICK@55..56 "`" [] [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@56..60 "uage" [] [], + }, + ], + r_tick_token: BACKTICK@60..61 "`" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@61..62 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + eof_token: EOF@62..62 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..62 + 0: (empty) + 1: MD_BLOCK_LIST@0..62 + 0: MD_PARAGRAPH@0..62 + 0: MD_INLINE_ITEM_LIST@0..62 + 0: MD_TEXTUAL@0..46 + 0: MD_TEXTUAL_LITERAL@0..46 "Backtick in info string should not open fence:" [] [] + 1: MD_TEXTUAL@46..47 + 0: MD_TEXTUAL_LITERAL@46..47 "\n" [] [] + 2: MD_TEXTUAL@47..50 + 0: MD_TEXTUAL_LITERAL@47..50 "```" [] [] + 3: MD_TEXTUAL@50..55 + 0: MD_TEXTUAL_LITERAL@50..55 " lang" [] [] + 4: MD_INLINE_CODE@55..61 + 0: BACKTICK@55..56 "`" [] [] + 1: MD_INLINE_ITEM_LIST@56..60 + 0: MD_TEXTUAL@56..60 + 0: MD_TEXTUAL_LITERAL@56..60 "uage" [] [] + 2: BACKTICK@60..61 "`" [] [] + 5: MD_TEXTUAL@61..62 + 0: MD_TEXTUAL_LITERAL@61..62 "\n" [] [] + 1: (empty) + 2: EOF@62..62 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/hard_line_break.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/hard_line_break.md new file mode 100644 index 000000000000..a86cbf987a97 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/hard_line_break.md @@ -0,0 +1,5 @@ +Line one +Line two + +Backslash\ +line break diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/hard_line_break.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/hard_line_break.md.snap new file mode 100644 index 000000000000..481335d7dd45 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/hard_line_break.md.snap @@ -0,0 +1,97 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +Line one +Line two + +Backslash\ +line break + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@0..8 "Line one" [] [], + }, + MdHardLine { + value_token: MD_HARD_LINE_LITERAL@8..11 " \n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@11..19 "Line two" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@19..20 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@20..21 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@21..30 "Backslash" [] [], + }, + MdHardLine { + value_token: MD_HARD_LINE_LITERAL@30..32 "\\\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@32..42 "line break" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@42..43 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + eof_token: EOF@43..43 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..43 + 0: (empty) + 1: MD_BLOCK_LIST@0..43 + 0: MD_PARAGRAPH@0..20 + 0: MD_INLINE_ITEM_LIST@0..20 + 0: MD_TEXTUAL@0..8 + 0: MD_TEXTUAL_LITERAL@0..8 "Line one" [] [] + 1: MD_HARD_LINE@8..11 + 0: MD_HARD_LINE_LITERAL@8..11 " \n" [] [] + 2: MD_TEXTUAL@11..19 + 0: MD_TEXTUAL_LITERAL@11..19 "Line two" [] [] + 3: MD_TEXTUAL@19..20 + 0: MD_TEXTUAL_LITERAL@19..20 "\n" [] [] + 1: (empty) + 1: MD_NEWLINE@20..21 + 0: NEWLINE@20..21 "\n" [] [] + 2: MD_PARAGRAPH@21..43 + 0: MD_INLINE_ITEM_LIST@21..43 + 0: MD_TEXTUAL@21..30 + 0: MD_TEXTUAL_LITERAL@21..30 "Backslash" [] [] + 1: MD_HARD_LINE@30..32 + 0: MD_HARD_LINE_LITERAL@30..32 "\\\n" [] [] + 2: MD_TEXTUAL@32..42 + 0: MD_TEXTUAL_LITERAL@32..42 "line break" [] [] + 3: MD_TEXTUAL@42..43 + 0: MD_TEXTUAL_LITERAL@42..43 "\n" [] [] + 1: (empty) + 2: EOF@43..43 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/header.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/header.md new file mode 100644 index 000000000000..bbb7f21371b0 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/header.md @@ -0,0 +1,17 @@ +# Heading 1 + +## Heading 2 + +### Heading 3 + +#### Heading 4 + +##### Heading 5 + +###### Heading 6 + +# Trailing hash # + +## Multiple trailing ## + +### Mixed # content ## with ### trailing #### diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/header.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/header.md.snap new file mode 100644 index 000000000000..03b5a2a8d685 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/header.md.snap @@ -0,0 +1,541 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +assertion_line: 131 +expression: snapshot +--- +## Input + +``` +# Heading 1 + +## Heading 2 + +### Heading 3 + +#### Heading 4 + +##### Heading 5 + +###### Heading 6 + +# Trailing hash # + +## Multiple trailing ## + +### Mixed # content ## with ### trailing #### + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@0..1 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1..11 " Heading 1" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [], + }, + MdNewline { + value_token: NEWLINE@11..12 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@12..13 "\n" [] [], + }, + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@13..14 "#" [] [], + }, + MdHash { + hash_token: HASH@14..15 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@15..25 " Heading 2" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [], + }, + MdNewline { + value_token: NEWLINE@25..26 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@26..27 "\n" [] [], + }, + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@27..28 "#" [] [], + }, + MdHash { + hash_token: HASH@28..29 "#" [] [], + }, + MdHash { + hash_token: HASH@29..30 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@30..40 " Heading 3" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [], + }, + MdNewline { + value_token: NEWLINE@40..41 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@41..42 "\n" [] [], + }, + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@42..43 "#" [] [], + }, + MdHash { + hash_token: HASH@43..44 "#" [] [], + }, + MdHash { + hash_token: HASH@44..45 "#" [] [], + }, + MdHash { + hash_token: HASH@45..46 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@46..56 " Heading 4" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [], + }, + MdNewline { + value_token: NEWLINE@56..57 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@57..58 "\n" [] [], + }, + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@58..59 "#" [] [], + }, + MdHash { + hash_token: HASH@59..60 "#" [] [], + }, + MdHash { + hash_token: HASH@60..61 "#" [] [], + }, + MdHash { + hash_token: HASH@61..62 "#" [] [], + }, + MdHash { + hash_token: HASH@62..63 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@63..73 " Heading 5" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [], + }, + MdNewline { + value_token: NEWLINE@73..74 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@74..75 "\n" [] [], + }, + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@75..76 "#" [] [], + }, + MdHash { + hash_token: HASH@76..77 "#" [] [], + }, + MdHash { + hash_token: HASH@77..78 "#" [] [], + }, + MdHash { + hash_token: HASH@78..79 "#" [] [], + }, + MdHash { + hash_token: HASH@79..80 "#" [] [], + }, + MdHash { + hash_token: HASH@80..81 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@81..91 " Heading 6" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [], + }, + MdNewline { + value_token: NEWLINE@91..92 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@92..93 "\n" [] [], + }, + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@93..94 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@94..108 " Trailing hash" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [ + MdHash { + hash_token: HASH@108..110 "#" [Skipped(" ")] [], + }, + ], + }, + MdNewline { + value_token: NEWLINE@110..111 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@111..112 "\n" [] [], + }, + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@112..113 "#" [] [], + }, + MdHash { + hash_token: HASH@113..114 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@114..132 " Multiple trailing" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [ + MdHash { + hash_token: HASH@132..134 "#" [Skipped(" ")] [], + }, + MdHash { + hash_token: HASH@134..135 "#" [] [], + }, + ], + }, + MdNewline { + value_token: NEWLINE@135..136 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@136..137 "\n" [] [], + }, + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@137..138 "#" [] [], + }, + MdHash { + hash_token: HASH@138..139 "#" [] [], + }, + MdHash { + hash_token: HASH@139..140 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@140..147 " Mixed " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@147..148 "#" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@148..157 " content " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@157..158 "#" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@158..159 "#" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@159..165 " with " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@165..166 "#" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@166..167 "#" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@167..168 "#" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@168..177 " trailing" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [ + MdHash { + hash_token: HASH@177..179 "#" [Skipped(" ")] [], + }, + MdHash { + hash_token: HASH@179..180 "#" [] [], + }, + MdHash { + hash_token: HASH@180..181 "#" [] [], + }, + MdHash { + hash_token: HASH@181..182 "#" [] [], + }, + ], + }, + MdNewline { + value_token: NEWLINE@182..183 "\n" [] [], + }, + ], + eof_token: EOF@183..183 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..183 + 0: (empty) + 1: MD_BLOCK_LIST@0..183 + 0: MD_HEADER@0..11 + 0: MD_HASH_LIST@0..1 + 0: MD_HASH@0..1 + 0: HASH@0..1 "#" [] [] + 1: MD_PARAGRAPH@1..11 + 0: MD_INLINE_ITEM_LIST@1..11 + 0: MD_TEXTUAL@1..11 + 0: MD_TEXTUAL_LITERAL@1..11 " Heading 1" [] [] + 1: (empty) + 2: MD_HASH_LIST@11..11 + 1: MD_NEWLINE@11..12 + 0: NEWLINE@11..12 "\n" [] [] + 2: MD_NEWLINE@12..13 + 0: NEWLINE@12..13 "\n" [] [] + 3: MD_HEADER@13..25 + 0: MD_HASH_LIST@13..15 + 0: MD_HASH@13..14 + 0: HASH@13..14 "#" [] [] + 1: MD_HASH@14..15 + 0: HASH@14..15 "#" [] [] + 1: MD_PARAGRAPH@15..25 + 0: MD_INLINE_ITEM_LIST@15..25 + 0: MD_TEXTUAL@15..25 + 0: MD_TEXTUAL_LITERAL@15..25 " Heading 2" [] [] + 1: (empty) + 2: MD_HASH_LIST@25..25 + 4: MD_NEWLINE@25..26 + 0: NEWLINE@25..26 "\n" [] [] + 5: MD_NEWLINE@26..27 + 0: NEWLINE@26..27 "\n" [] [] + 6: MD_HEADER@27..40 + 0: MD_HASH_LIST@27..30 + 0: MD_HASH@27..28 + 0: HASH@27..28 "#" [] [] + 1: MD_HASH@28..29 + 0: HASH@28..29 "#" [] [] + 2: MD_HASH@29..30 + 0: HASH@29..30 "#" [] [] + 1: MD_PARAGRAPH@30..40 + 0: MD_INLINE_ITEM_LIST@30..40 + 0: MD_TEXTUAL@30..40 + 0: MD_TEXTUAL_LITERAL@30..40 " Heading 3" [] [] + 1: (empty) + 2: MD_HASH_LIST@40..40 + 7: MD_NEWLINE@40..41 + 0: NEWLINE@40..41 "\n" [] [] + 8: MD_NEWLINE@41..42 + 0: NEWLINE@41..42 "\n" [] [] + 9: MD_HEADER@42..56 + 0: MD_HASH_LIST@42..46 + 0: MD_HASH@42..43 + 0: HASH@42..43 "#" [] [] + 1: MD_HASH@43..44 + 0: HASH@43..44 "#" [] [] + 2: MD_HASH@44..45 + 0: HASH@44..45 "#" [] [] + 3: MD_HASH@45..46 + 0: HASH@45..46 "#" [] [] + 1: MD_PARAGRAPH@46..56 + 0: MD_INLINE_ITEM_LIST@46..56 + 0: MD_TEXTUAL@46..56 + 0: MD_TEXTUAL_LITERAL@46..56 " Heading 4" [] [] + 1: (empty) + 2: MD_HASH_LIST@56..56 + 10: MD_NEWLINE@56..57 + 0: NEWLINE@56..57 "\n" [] [] + 11: MD_NEWLINE@57..58 + 0: NEWLINE@57..58 "\n" [] [] + 12: MD_HEADER@58..73 + 0: MD_HASH_LIST@58..63 + 0: MD_HASH@58..59 + 0: HASH@58..59 "#" [] [] + 1: MD_HASH@59..60 + 0: HASH@59..60 "#" [] [] + 2: MD_HASH@60..61 + 0: HASH@60..61 "#" [] [] + 3: MD_HASH@61..62 + 0: HASH@61..62 "#" [] [] + 4: MD_HASH@62..63 + 0: HASH@62..63 "#" [] [] + 1: MD_PARAGRAPH@63..73 + 0: MD_INLINE_ITEM_LIST@63..73 + 0: MD_TEXTUAL@63..73 + 0: MD_TEXTUAL_LITERAL@63..73 " Heading 5" [] [] + 1: (empty) + 2: MD_HASH_LIST@73..73 + 13: MD_NEWLINE@73..74 + 0: NEWLINE@73..74 "\n" [] [] + 14: MD_NEWLINE@74..75 + 0: NEWLINE@74..75 "\n" [] [] + 15: MD_HEADER@75..91 + 0: MD_HASH_LIST@75..81 + 0: MD_HASH@75..76 + 0: HASH@75..76 "#" [] [] + 1: MD_HASH@76..77 + 0: HASH@76..77 "#" [] [] + 2: MD_HASH@77..78 + 0: HASH@77..78 "#" [] [] + 3: MD_HASH@78..79 + 0: HASH@78..79 "#" [] [] + 4: MD_HASH@79..80 + 0: HASH@79..80 "#" [] [] + 5: MD_HASH@80..81 + 0: HASH@80..81 "#" [] [] + 1: MD_PARAGRAPH@81..91 + 0: MD_INLINE_ITEM_LIST@81..91 + 0: MD_TEXTUAL@81..91 + 0: MD_TEXTUAL_LITERAL@81..91 " Heading 6" [] [] + 1: (empty) + 2: MD_HASH_LIST@91..91 + 16: MD_NEWLINE@91..92 + 0: NEWLINE@91..92 "\n" [] [] + 17: MD_NEWLINE@92..93 + 0: NEWLINE@92..93 "\n" [] [] + 18: MD_HEADER@93..110 + 0: MD_HASH_LIST@93..94 + 0: MD_HASH@93..94 + 0: HASH@93..94 "#" [] [] + 1: MD_PARAGRAPH@94..108 + 0: MD_INLINE_ITEM_LIST@94..108 + 0: MD_TEXTUAL@94..108 + 0: MD_TEXTUAL_LITERAL@94..108 " Trailing hash" [] [] + 1: (empty) + 2: MD_HASH_LIST@108..110 + 0: MD_HASH@108..110 + 0: HASH@108..110 "#" [Skipped(" ")] [] + 19: MD_NEWLINE@110..111 + 0: NEWLINE@110..111 "\n" [] [] + 20: MD_NEWLINE@111..112 + 0: NEWLINE@111..112 "\n" [] [] + 21: MD_HEADER@112..135 + 0: MD_HASH_LIST@112..114 + 0: MD_HASH@112..113 + 0: HASH@112..113 "#" [] [] + 1: MD_HASH@113..114 + 0: HASH@113..114 "#" [] [] + 1: MD_PARAGRAPH@114..132 + 0: MD_INLINE_ITEM_LIST@114..132 + 0: MD_TEXTUAL@114..132 + 0: MD_TEXTUAL_LITERAL@114..132 " Multiple trailing" [] [] + 1: (empty) + 2: MD_HASH_LIST@132..135 + 0: MD_HASH@132..134 + 0: HASH@132..134 "#" [Skipped(" ")] [] + 1: MD_HASH@134..135 + 0: HASH@134..135 "#" [] [] + 22: MD_NEWLINE@135..136 + 0: NEWLINE@135..136 "\n" [] [] + 23: MD_NEWLINE@136..137 + 0: NEWLINE@136..137 "\n" [] [] + 24: MD_HEADER@137..182 + 0: MD_HASH_LIST@137..140 + 0: MD_HASH@137..138 + 0: HASH@137..138 "#" [] [] + 1: MD_HASH@138..139 + 0: HASH@138..139 "#" [] [] + 2: MD_HASH@139..140 + 0: HASH@139..140 "#" [] [] + 1: MD_PARAGRAPH@140..177 + 0: MD_INLINE_ITEM_LIST@140..177 + 0: MD_TEXTUAL@140..147 + 0: MD_TEXTUAL_LITERAL@140..147 " Mixed " [] [] + 1: MD_TEXTUAL@147..148 + 0: MD_TEXTUAL_LITERAL@147..148 "#" [] [] + 2: MD_TEXTUAL@148..157 + 0: MD_TEXTUAL_LITERAL@148..157 " content " [] [] + 3: MD_TEXTUAL@157..158 + 0: MD_TEXTUAL_LITERAL@157..158 "#" [] [] + 4: MD_TEXTUAL@158..159 + 0: MD_TEXTUAL_LITERAL@158..159 "#" [] [] + 5: MD_TEXTUAL@159..165 + 0: MD_TEXTUAL_LITERAL@159..165 " with " [] [] + 6: MD_TEXTUAL@165..166 + 0: MD_TEXTUAL_LITERAL@165..166 "#" [] [] + 7: MD_TEXTUAL@166..167 + 0: MD_TEXTUAL_LITERAL@166..167 "#" [] [] + 8: MD_TEXTUAL@167..168 + 0: MD_TEXTUAL_LITERAL@167..168 "#" [] [] + 9: MD_TEXTUAL@168..177 + 0: MD_TEXTUAL_LITERAL@168..177 " trailing" [] [] + 1: (empty) + 2: MD_HASH_LIST@177..182 + 0: MD_HASH@177..179 + 0: HASH@177..179 "#" [Skipped(" ")] [] + 1: MD_HASH@179..180 + 0: HASH@179..180 "#" [] [] + 2: MD_HASH@180..181 + 0: HASH@180..181 "#" [] [] + 3: MD_HASH@181..182 + 0: HASH@181..182 "#" [] [] + 25: MD_NEWLINE@182..183 + 0: NEWLINE@182..183 "\n" [] [] + 2: EOF@183..183 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/html_block.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/html_block.md new file mode 100644 index 000000000000..8c96bb22befc --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/html_block.md @@ -0,0 +1,6 @@ +
    +This is an HTML block. +It continues until blank line. +
    + +Next paragraph after blank line. diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/html_block.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/html_block.md.snap new file mode 100644 index 000000000000..036ee6008be8 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/html_block.md.snap @@ -0,0 +1,126 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +
    +This is an HTML block. +It continues until blank line. +
    + +Next paragraph after blank line. + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdHtmlBlock { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@0..1 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1..4 "div" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@4..5 ">" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@5..6 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@6..28 "This is an HTML block." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@28..29 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@29..59 "It continues until blank line." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@59..60 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@60..61 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@61..65 "/div" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@65..66 ">" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@66..67 "\n" [] [], + }, + ], + }, + MdNewline { + value_token: NEWLINE@67..68 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@68..100 "Next paragraph after blank line." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@100..101 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + eof_token: EOF@101..101 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..101 + 0: (empty) + 1: MD_BLOCK_LIST@0..101 + 0: MD_HTML_BLOCK@0..67 + 0: MD_INLINE_ITEM_LIST@0..67 + 0: MD_TEXTUAL@0..1 + 0: MD_TEXTUAL_LITERAL@0..1 "<" [] [] + 1: MD_TEXTUAL@1..4 + 0: MD_TEXTUAL_LITERAL@1..4 "div" [] [] + 2: MD_TEXTUAL@4..5 + 0: MD_TEXTUAL_LITERAL@4..5 ">" [] [] + 3: MD_TEXTUAL@5..6 + 0: MD_TEXTUAL_LITERAL@5..6 "\n" [] [] + 4: MD_TEXTUAL@6..28 + 0: MD_TEXTUAL_LITERAL@6..28 "This is an HTML block." [] [] + 5: MD_TEXTUAL@28..29 + 0: MD_TEXTUAL_LITERAL@28..29 "\n" [] [] + 6: MD_TEXTUAL@29..59 + 0: MD_TEXTUAL_LITERAL@29..59 "It continues until blank line." [] [] + 7: MD_TEXTUAL@59..60 + 0: MD_TEXTUAL_LITERAL@59..60 "\n" [] [] + 8: MD_TEXTUAL@60..61 + 0: MD_TEXTUAL_LITERAL@60..61 "<" [] [] + 9: MD_TEXTUAL@61..65 + 0: MD_TEXTUAL_LITERAL@61..65 "/div" [] [] + 10: MD_TEXTUAL@65..66 + 0: MD_TEXTUAL_LITERAL@65..66 ">" [] [] + 11: MD_TEXTUAL@66..67 + 0: MD_TEXTUAL_LITERAL@66..67 "\n" [] [] + 1: MD_NEWLINE@67..68 + 0: NEWLINE@67..68 "\n" [] [] + 2: MD_PARAGRAPH@68..101 + 0: MD_INLINE_ITEM_LIST@68..101 + 0: MD_TEXTUAL@68..100 + 0: MD_TEXTUAL_LITERAL@68..100 "Next paragraph after blank line." [] [] + 1: MD_TEXTUAL@100..101 + 0: MD_TEXTUAL_LITERAL@100..101 "\n" [] [] + 1: (empty) + 2: EOF@101..101 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/indent_code_block.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/indent_code_block.md new file mode 100644 index 000000000000..9dcdb50c8250 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/indent_code_block.md @@ -0,0 +1,8 @@ + function hello() { + console.log("indented"); + } + +Regular paragraph here. + + More code + continues here diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/indent_code_block.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/indent_code_block.md.snap new file mode 100644 index 000000000000..dbbb87aae9f0 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/indent_code_block.md.snap @@ -0,0 +1,170 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +assertion_line: 131 +expression: snapshot +--- +## Input + +``` + function hello() { + console.log("indented"); + } + +Regular paragraph here. + + More code + continues here + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdIndentCodeBlock { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@0..18 "function hello" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@18..19 "(" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@19..20 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@20..22 " {" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@22..23 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@23..28 " " [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@28..42 "console.log" [Skipped(" "), Skipped(" "), Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@42..43 "(" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@43..53 "\"indented\"" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@53..54 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@54..55 ";" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@55..56 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@56..61 "}" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@61..62 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@62..63 "\n" [] [], + }, + ], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@63..86 "Regular paragraph here." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@86..87 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@87..88 "\n" [] [], + }, + MdIndentCodeBlock { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@88..101 "More code" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@101..102 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@102..120 "continues here" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@120..121 "\n" [] [], + }, + ], + }, + ], + eof_token: EOF@121..121 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..121 + 0: (empty) + 1: MD_BLOCK_LIST@0..121 + 0: MD_INDENT_CODE_BLOCK@0..63 + 0: MD_INLINE_ITEM_LIST@0..63 + 0: MD_TEXTUAL@0..18 + 0: MD_TEXTUAL_LITERAL@0..18 "function hello" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [] + 1: MD_TEXTUAL@18..19 + 0: MD_TEXTUAL_LITERAL@18..19 "(" [] [] + 2: MD_TEXTUAL@19..20 + 0: MD_TEXTUAL_LITERAL@19..20 ")" [] [] + 3: MD_TEXTUAL@20..22 + 0: MD_TEXTUAL_LITERAL@20..22 " {" [] [] + 4: MD_TEXTUAL@22..23 + 0: MD_TEXTUAL_LITERAL@22..23 "\n" [] [] + 5: MD_TEXTUAL@23..28 + 0: MD_TEXTUAL_LITERAL@23..28 " " [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [] + 6: MD_TEXTUAL@28..42 + 0: MD_TEXTUAL_LITERAL@28..42 "console.log" [Skipped(" "), Skipped(" "), Skipped(" ")] [] + 7: MD_TEXTUAL@42..43 + 0: MD_TEXTUAL_LITERAL@42..43 "(" [] [] + 8: MD_TEXTUAL@43..53 + 0: MD_TEXTUAL_LITERAL@43..53 "\"indented\"" [] [] + 9: MD_TEXTUAL@53..54 + 0: MD_TEXTUAL_LITERAL@53..54 ")" [] [] + 10: MD_TEXTUAL@54..55 + 0: MD_TEXTUAL_LITERAL@54..55 ";" [] [] + 11: MD_TEXTUAL@55..56 + 0: MD_TEXTUAL_LITERAL@55..56 "\n" [] [] + 12: MD_TEXTUAL@56..61 + 0: MD_TEXTUAL_LITERAL@56..61 "}" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [] + 13: MD_TEXTUAL@61..62 + 0: MD_TEXTUAL_LITERAL@61..62 "\n" [] [] + 14: MD_TEXTUAL@62..63 + 0: MD_TEXTUAL_LITERAL@62..63 "\n" [] [] + 1: MD_PARAGRAPH@63..87 + 0: MD_INLINE_ITEM_LIST@63..87 + 0: MD_TEXTUAL@63..86 + 0: MD_TEXTUAL_LITERAL@63..86 "Regular paragraph here." [] [] + 1: MD_TEXTUAL@86..87 + 0: MD_TEXTUAL_LITERAL@86..87 "\n" [] [] + 1: (empty) + 2: MD_NEWLINE@87..88 + 0: NEWLINE@87..88 "\n" [] [] + 3: MD_INDENT_CODE_BLOCK@88..121 + 0: MD_INLINE_ITEM_LIST@88..121 + 0: MD_TEXTUAL@88..101 + 0: MD_TEXTUAL_LITERAL@88..101 "More code" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [] + 1: MD_TEXTUAL@101..102 + 0: MD_TEXTUAL_LITERAL@101..102 "\n" [] [] + 2: MD_TEXTUAL@102..120 + 0: MD_TEXTUAL_LITERAL@102..120 "continues here" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [] + 3: MD_TEXTUAL@120..121 + 0: MD_TEXTUAL_LITERAL@120..121 "\n" [] [] + 2: EOF@121..121 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/indented_code_blank_lines.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/indented_code_blank_lines.md new file mode 100644 index 000000000000..8c556a8b92b5 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/indented_code_blank_lines.md @@ -0,0 +1,5 @@ + a + + b + + x diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/indented_code_blank_lines.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/indented_code_blank_lines.md.snap new file mode 100644 index 000000000000..2f998fe57b31 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/indented_code_blank_lines.md.snap @@ -0,0 +1,88 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +assertion_line: 131 +expression: snapshot +--- +## Input + +``` + a + + b + + x + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdIndentCodeBlock { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@0..5 "a" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@5..6 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@6..7 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@7..12 "b" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@12..13 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@13..14 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@14..19 " " [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@19..23 "x" [Skipped(" "), Skipped(" "), Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@23..24 "\n" [] [], + }, + ], + }, + ], + eof_token: EOF@24..24 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..24 + 0: (empty) + 1: MD_BLOCK_LIST@0..24 + 0: MD_INDENT_CODE_BLOCK@0..24 + 0: MD_INLINE_ITEM_LIST@0..24 + 0: MD_TEXTUAL@0..5 + 0: MD_TEXTUAL_LITERAL@0..5 "a" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [] + 1: MD_TEXTUAL@5..6 + 0: MD_TEXTUAL_LITERAL@5..6 "\n" [] [] + 2: MD_TEXTUAL@6..7 + 0: MD_TEXTUAL_LITERAL@6..7 "\n" [] [] + 3: MD_TEXTUAL@7..12 + 0: MD_TEXTUAL_LITERAL@7..12 "b" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [] + 4: MD_TEXTUAL@12..13 + 0: MD_TEXTUAL_LITERAL@12..13 "\n" [] [] + 5: MD_TEXTUAL@13..14 + 0: MD_TEXTUAL_LITERAL@13..14 "\n" [] [] + 6: MD_TEXTUAL@14..19 + 0: MD_TEXTUAL_LITERAL@14..19 " " [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [] + 7: MD_TEXTUAL@19..23 + 0: MD_TEXTUAL_LITERAL@19..23 "x" [Skipped(" "), Skipped(" "), Skipped(" ")] [] + 8: MD_TEXTUAL@23..24 + 0: MD_TEXTUAL_LITERAL@23..24 "\n" [] [] + 2: EOF@24..24 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_elements.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_elements.md new file mode 100644 index 000000000000..c0f5c4983ac7 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_elements.md @@ -0,0 +1,7 @@ +This has `inline code` in it. + +This is *italic* and this is **bold**. + +This is _also italic_ and __also bold__. + +Here is a [link](https://example.com) and an ![image](image.png). diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_elements.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_elements.md.snap new file mode 100644 index 000000000000..c17c1c3dd644 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_elements.md.snap @@ -0,0 +1,295 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +This has `inline code` in it. + +This is *italic* and this is **bold**. + +This is _also italic_ and __also bold__. + +Here is a [link](https://example.com) and an ![image](image.png). + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@0..9 "This has " [] [], + }, + MdInlineCode { + l_tick_token: BACKTICK@9..10 "`" [] [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@10..21 "inline code" [] [], + }, + ], + r_tick_token: BACKTICK@21..22 "`" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@22..29 " in it." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@29..30 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@30..31 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@31..39 "This is " [] [], + }, + MdInlineItalic { + l_fence: STAR@39..40 "*" [] [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@40..46 "italic" [] [], + }, + ], + r_fence: STAR@46..47 "*" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@47..60 " and this is " [] [], + }, + MdInlineEmphasis { + l_fence: DOUBLE_STAR@60..62 "**" [] [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@62..66 "bold" [] [], + }, + ], + r_fence: DOUBLE_STAR@66..68 "**" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@68..69 "." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@69..70 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@70..71 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@71..79 "This is " [] [], + }, + MdInlineItalic { + l_fence: UNDERSCORE@79..80 "_" [] [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@80..91 "also italic" [] [], + }, + ], + r_fence: UNDERSCORE@91..92 "_" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@92..97 " and " [] [], + }, + MdInlineEmphasis { + l_fence: DOUBLE_UNDERSCORE@97..99 "__" [] [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@99..108 "also bold" [] [], + }, + ], + r_fence: DOUBLE_UNDERSCORE@108..110 "__" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@110..111 "." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@111..112 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@112..113 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@113..123 "Here is a " [] [], + }, + MdInlineLink { + l_brack_token: L_BRACK@123..124 "[" [] [], + text: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@124..128 "link" [] [], + }, + ], + r_brack_token: R_BRACK@128..129 "]" [] [], + l_paren_token: L_PAREN@129..130 "(" [] [], + destination: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@130..149 "https://example.com" [] [], + }, + ], + title: missing (optional), + r_paren_token: R_PAREN@149..150 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@150..158 " and an " [] [], + }, + MdInlineImage { + excl_token: BANG@158..159 "!" [] [], + l_brack_token: L_BRACK@159..160 "[" [] [], + alt: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@160..165 "image" [] [], + }, + ], + r_brack_token: R_BRACK@165..166 "]" [] [], + l_paren_token: L_PAREN@166..167 "(" [] [], + destination: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@167..176 "image.png" [] [], + }, + ], + title: missing (optional), + r_paren_token: R_PAREN@176..177 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@177..178 "." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@178..179 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + eof_token: EOF@179..179 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..179 + 0: (empty) + 1: MD_BLOCK_LIST@0..179 + 0: MD_PARAGRAPH@0..30 + 0: MD_INLINE_ITEM_LIST@0..30 + 0: MD_TEXTUAL@0..9 + 0: MD_TEXTUAL_LITERAL@0..9 "This has " [] [] + 1: MD_INLINE_CODE@9..22 + 0: BACKTICK@9..10 "`" [] [] + 1: MD_INLINE_ITEM_LIST@10..21 + 0: MD_TEXTUAL@10..21 + 0: MD_TEXTUAL_LITERAL@10..21 "inline code" [] [] + 2: BACKTICK@21..22 "`" [] [] + 2: MD_TEXTUAL@22..29 + 0: MD_TEXTUAL_LITERAL@22..29 " in it." [] [] + 3: MD_TEXTUAL@29..30 + 0: MD_TEXTUAL_LITERAL@29..30 "\n" [] [] + 1: (empty) + 1: MD_NEWLINE@30..31 + 0: NEWLINE@30..31 "\n" [] [] + 2: MD_PARAGRAPH@31..70 + 0: MD_INLINE_ITEM_LIST@31..70 + 0: MD_TEXTUAL@31..39 + 0: MD_TEXTUAL_LITERAL@31..39 "This is " [] [] + 1: MD_INLINE_ITALIC@39..47 + 0: STAR@39..40 "*" [] [] + 1: MD_INLINE_ITEM_LIST@40..46 + 0: MD_TEXTUAL@40..46 + 0: MD_TEXTUAL_LITERAL@40..46 "italic" [] [] + 2: STAR@46..47 "*" [] [] + 2: MD_TEXTUAL@47..60 + 0: MD_TEXTUAL_LITERAL@47..60 " and this is " [] [] + 3: MD_INLINE_EMPHASIS@60..68 + 0: DOUBLE_STAR@60..62 "**" [] [] + 1: MD_INLINE_ITEM_LIST@62..66 + 0: MD_TEXTUAL@62..66 + 0: MD_TEXTUAL_LITERAL@62..66 "bold" [] [] + 2: DOUBLE_STAR@66..68 "**" [] [] + 4: MD_TEXTUAL@68..69 + 0: MD_TEXTUAL_LITERAL@68..69 "." [] [] + 5: MD_TEXTUAL@69..70 + 0: MD_TEXTUAL_LITERAL@69..70 "\n" [] [] + 1: (empty) + 3: MD_NEWLINE@70..71 + 0: NEWLINE@70..71 "\n" [] [] + 4: MD_PARAGRAPH@71..112 + 0: MD_INLINE_ITEM_LIST@71..112 + 0: MD_TEXTUAL@71..79 + 0: MD_TEXTUAL_LITERAL@71..79 "This is " [] [] + 1: MD_INLINE_ITALIC@79..92 + 0: UNDERSCORE@79..80 "_" [] [] + 1: MD_INLINE_ITEM_LIST@80..91 + 0: MD_TEXTUAL@80..91 + 0: MD_TEXTUAL_LITERAL@80..91 "also italic" [] [] + 2: UNDERSCORE@91..92 "_" [] [] + 2: MD_TEXTUAL@92..97 + 0: MD_TEXTUAL_LITERAL@92..97 " and " [] [] + 3: MD_INLINE_EMPHASIS@97..110 + 0: DOUBLE_UNDERSCORE@97..99 "__" [] [] + 1: MD_INLINE_ITEM_LIST@99..108 + 0: MD_TEXTUAL@99..108 + 0: MD_TEXTUAL_LITERAL@99..108 "also bold" [] [] + 2: DOUBLE_UNDERSCORE@108..110 "__" [] [] + 4: MD_TEXTUAL@110..111 + 0: MD_TEXTUAL_LITERAL@110..111 "." [] [] + 5: MD_TEXTUAL@111..112 + 0: MD_TEXTUAL_LITERAL@111..112 "\n" [] [] + 1: (empty) + 5: MD_NEWLINE@112..113 + 0: NEWLINE@112..113 "\n" [] [] + 6: MD_PARAGRAPH@113..179 + 0: MD_INLINE_ITEM_LIST@113..179 + 0: MD_TEXTUAL@113..123 + 0: MD_TEXTUAL_LITERAL@113..123 "Here is a " [] [] + 1: MD_INLINE_LINK@123..150 + 0: L_BRACK@123..124 "[" [] [] + 1: MD_INLINE_ITEM_LIST@124..128 + 0: MD_TEXTUAL@124..128 + 0: MD_TEXTUAL_LITERAL@124..128 "link" [] [] + 2: R_BRACK@128..129 "]" [] [] + 3: L_PAREN@129..130 "(" [] [] + 4: MD_INLINE_ITEM_LIST@130..149 + 0: MD_TEXTUAL@130..149 + 0: MD_TEXTUAL_LITERAL@130..149 "https://example.com" [] [] + 5: (empty) + 6: R_PAREN@149..150 ")" [] [] + 2: MD_TEXTUAL@150..158 + 0: MD_TEXTUAL_LITERAL@150..158 " and an " [] [] + 3: MD_INLINE_IMAGE@158..177 + 0: BANG@158..159 "!" [] [] + 1: L_BRACK@159..160 "[" [] [] + 2: MD_INLINE_ITEM_LIST@160..165 + 0: MD_TEXTUAL@160..165 + 0: MD_TEXTUAL_LITERAL@160..165 "image" [] [] + 3: R_BRACK@165..166 "]" [] [] + 4: L_PAREN@166..167 "(" [] [] + 5: MD_INLINE_ITEM_LIST@167..176 + 0: MD_TEXTUAL@167..176 + 0: MD_TEXTUAL_LITERAL@167..176 "image.png" [] [] + 6: (empty) + 7: R_PAREN@176..177 ")" [] [] + 4: MD_TEXTUAL@177..178 + 0: MD_TEXTUAL_LITERAL@177..178 "." [] [] + 5: MD_TEXTUAL@178..179 + 0: MD_TEXTUAL_LITERAL@178..179 "\n" [] [] + 1: (empty) + 2: EOF@179..179 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_html.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_html.md new file mode 100644 index 000000000000..78c99f0396ef --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_html.md @@ -0,0 +1,11 @@ +This has inline HTML in it. + +Link with anchor text. + +Self-closing:
    and + +Comment: inline. + +PI: here. + +CDATA: here. diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_html.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_html.md.snap new file mode 100644 index 000000000000..e45beb9fac9a --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_html.md.snap @@ -0,0 +1,469 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +This has inline HTML in it. + +Link with anchor text. + +Self-closing:
    and + +Comment: inline. + +PI: here. + +CDATA: here. + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@0..9 "This has " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@9..10 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@10..14 "span" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@14..15 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@15..26 "inline HTML" [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@26..27 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@27..32 "/span" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@32..33 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@33..40 " in it." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@40..41 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@41..42 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@42..52 "Link with " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@52..53 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@53..65 "a href=\"url\"" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@65..66 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@66..72 "anchor" [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@72..73 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@73..75 "/a" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@75..76 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@76..82 " text." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@82..83 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@83..84 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@84..88 "Self" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@88..89 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@89..98 "closing: " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@98..99 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@99..102 "br/" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@102..103 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@103..108 " and " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@108..109 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@109..121 "img src=\"x\"/" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@121..122 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@122..123 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@123..124 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@124..133 "Comment: " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@133..134 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@134..135 "!" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@135..136 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@136..137 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@137..146 " comment " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@146..147 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@147..148 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@148..149 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@149..157 " inline." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@157..158 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@158..159 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@159..163 "PI: " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@163..164 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@164..183 "?xml version=\"1.0\"?" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@183..184 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@184..190 " here." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@190..191 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@191..192 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@192..199 "CDATA: " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@199..200 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@200..201 "!" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@201..202 "[" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@202..207 "CDATA" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@207..208 "[" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@208..212 "text" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@212..213 "]" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@213..214 "]" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@214..215 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@215..221 " here." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@221..222 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + eof_token: EOF@222..222 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..222 + 0: (empty) + 1: MD_BLOCK_LIST@0..222 + 0: MD_PARAGRAPH@0..41 + 0: MD_INLINE_ITEM_LIST@0..41 + 0: MD_TEXTUAL@0..9 + 0: MD_TEXTUAL_LITERAL@0..9 "This has " [] [] + 1: MD_INLINE_HTML@9..15 + 0: MD_INLINE_ITEM_LIST@9..15 + 0: MD_TEXTUAL@9..10 + 0: MD_TEXTUAL_LITERAL@9..10 "<" [] [] + 1: MD_TEXTUAL@10..14 + 0: MD_TEXTUAL_LITERAL@10..14 "span" [] [] + 2: MD_TEXTUAL@14..15 + 0: MD_TEXTUAL_LITERAL@14..15 ">" [] [] + 2: MD_TEXTUAL@15..26 + 0: MD_TEXTUAL_LITERAL@15..26 "inline HTML" [] [] + 3: MD_INLINE_HTML@26..33 + 0: MD_INLINE_ITEM_LIST@26..33 + 0: MD_TEXTUAL@26..27 + 0: MD_TEXTUAL_LITERAL@26..27 "<" [] [] + 1: MD_TEXTUAL@27..32 + 0: MD_TEXTUAL_LITERAL@27..32 "/span" [] [] + 2: MD_TEXTUAL@32..33 + 0: MD_TEXTUAL_LITERAL@32..33 ">" [] [] + 4: MD_TEXTUAL@33..40 + 0: MD_TEXTUAL_LITERAL@33..40 " in it." [] [] + 5: MD_TEXTUAL@40..41 + 0: MD_TEXTUAL_LITERAL@40..41 "\n" [] [] + 1: (empty) + 1: MD_NEWLINE@41..42 + 0: NEWLINE@41..42 "\n" [] [] + 2: MD_PARAGRAPH@42..83 + 0: MD_INLINE_ITEM_LIST@42..83 + 0: MD_TEXTUAL@42..52 + 0: MD_TEXTUAL_LITERAL@42..52 "Link with " [] [] + 1: MD_INLINE_HTML@52..66 + 0: MD_INLINE_ITEM_LIST@52..66 + 0: MD_TEXTUAL@52..53 + 0: MD_TEXTUAL_LITERAL@52..53 "<" [] [] + 1: MD_TEXTUAL@53..65 + 0: MD_TEXTUAL_LITERAL@53..65 "a href=\"url\"" [] [] + 2: MD_TEXTUAL@65..66 + 0: MD_TEXTUAL_LITERAL@65..66 ">" [] [] + 2: MD_TEXTUAL@66..72 + 0: MD_TEXTUAL_LITERAL@66..72 "anchor" [] [] + 3: MD_INLINE_HTML@72..76 + 0: MD_INLINE_ITEM_LIST@72..76 + 0: MD_TEXTUAL@72..73 + 0: MD_TEXTUAL_LITERAL@72..73 "<" [] [] + 1: MD_TEXTUAL@73..75 + 0: MD_TEXTUAL_LITERAL@73..75 "/a" [] [] + 2: MD_TEXTUAL@75..76 + 0: MD_TEXTUAL_LITERAL@75..76 ">" [] [] + 4: MD_TEXTUAL@76..82 + 0: MD_TEXTUAL_LITERAL@76..82 " text." [] [] + 5: MD_TEXTUAL@82..83 + 0: MD_TEXTUAL_LITERAL@82..83 "\n" [] [] + 1: (empty) + 3: MD_NEWLINE@83..84 + 0: NEWLINE@83..84 "\n" [] [] + 4: MD_PARAGRAPH@84..123 + 0: MD_INLINE_ITEM_LIST@84..123 + 0: MD_TEXTUAL@84..88 + 0: MD_TEXTUAL_LITERAL@84..88 "Self" [] [] + 1: MD_TEXTUAL@88..89 + 0: MD_TEXTUAL_LITERAL@88..89 "-" [] [] + 2: MD_TEXTUAL@89..98 + 0: MD_TEXTUAL_LITERAL@89..98 "closing: " [] [] + 3: MD_INLINE_HTML@98..103 + 0: MD_INLINE_ITEM_LIST@98..103 + 0: MD_TEXTUAL@98..99 + 0: MD_TEXTUAL_LITERAL@98..99 "<" [] [] + 1: MD_TEXTUAL@99..102 + 0: MD_TEXTUAL_LITERAL@99..102 "br/" [] [] + 2: MD_TEXTUAL@102..103 + 0: MD_TEXTUAL_LITERAL@102..103 ">" [] [] + 4: MD_TEXTUAL@103..108 + 0: MD_TEXTUAL_LITERAL@103..108 " and " [] [] + 5: MD_INLINE_HTML@108..122 + 0: MD_INLINE_ITEM_LIST@108..122 + 0: MD_TEXTUAL@108..109 + 0: MD_TEXTUAL_LITERAL@108..109 "<" [] [] + 1: MD_TEXTUAL@109..121 + 0: MD_TEXTUAL_LITERAL@109..121 "img src=\"x\"/" [] [] + 2: MD_TEXTUAL@121..122 + 0: MD_TEXTUAL_LITERAL@121..122 ">" [] [] + 6: MD_TEXTUAL@122..123 + 0: MD_TEXTUAL_LITERAL@122..123 "\n" [] [] + 1: (empty) + 5: MD_NEWLINE@123..124 + 0: NEWLINE@123..124 "\n" [] [] + 6: MD_PARAGRAPH@124..158 + 0: MD_INLINE_ITEM_LIST@124..158 + 0: MD_TEXTUAL@124..133 + 0: MD_TEXTUAL_LITERAL@124..133 "Comment: " [] [] + 1: MD_INLINE_HTML@133..149 + 0: MD_INLINE_ITEM_LIST@133..149 + 0: MD_TEXTUAL@133..134 + 0: MD_TEXTUAL_LITERAL@133..134 "<" [] [] + 1: MD_TEXTUAL@134..135 + 0: MD_TEXTUAL_LITERAL@134..135 "!" [] [] + 2: MD_TEXTUAL@135..136 + 0: MD_TEXTUAL_LITERAL@135..136 "-" [] [] + 3: MD_TEXTUAL@136..137 + 0: MD_TEXTUAL_LITERAL@136..137 "-" [] [] + 4: MD_TEXTUAL@137..146 + 0: MD_TEXTUAL_LITERAL@137..146 " comment " [] [] + 5: MD_TEXTUAL@146..147 + 0: MD_TEXTUAL_LITERAL@146..147 "-" [] [] + 6: MD_TEXTUAL@147..148 + 0: MD_TEXTUAL_LITERAL@147..148 "-" [] [] + 7: MD_TEXTUAL@148..149 + 0: MD_TEXTUAL_LITERAL@148..149 ">" [] [] + 2: MD_TEXTUAL@149..157 + 0: MD_TEXTUAL_LITERAL@149..157 " inline." [] [] + 3: MD_TEXTUAL@157..158 + 0: MD_TEXTUAL_LITERAL@157..158 "\n" [] [] + 1: (empty) + 7: MD_NEWLINE@158..159 + 0: NEWLINE@158..159 "\n" [] [] + 8: MD_PARAGRAPH@159..191 + 0: MD_INLINE_ITEM_LIST@159..191 + 0: MD_TEXTUAL@159..163 + 0: MD_TEXTUAL_LITERAL@159..163 "PI: " [] [] + 1: MD_INLINE_HTML@163..184 + 0: MD_INLINE_ITEM_LIST@163..184 + 0: MD_TEXTUAL@163..164 + 0: MD_TEXTUAL_LITERAL@163..164 "<" [] [] + 1: MD_TEXTUAL@164..183 + 0: MD_TEXTUAL_LITERAL@164..183 "?xml version=\"1.0\"?" [] [] + 2: MD_TEXTUAL@183..184 + 0: MD_TEXTUAL_LITERAL@183..184 ">" [] [] + 2: MD_TEXTUAL@184..190 + 0: MD_TEXTUAL_LITERAL@184..190 " here." [] [] + 3: MD_TEXTUAL@190..191 + 0: MD_TEXTUAL_LITERAL@190..191 "\n" [] [] + 1: (empty) + 9: MD_NEWLINE@191..192 + 0: NEWLINE@191..192 "\n" [] [] + 10: MD_PARAGRAPH@192..222 + 0: MD_INLINE_ITEM_LIST@192..222 + 0: MD_TEXTUAL@192..199 + 0: MD_TEXTUAL_LITERAL@192..199 "CDATA: " [] [] + 1: MD_INLINE_HTML@199..215 + 0: MD_INLINE_ITEM_LIST@199..215 + 0: MD_TEXTUAL@199..200 + 0: MD_TEXTUAL_LITERAL@199..200 "<" [] [] + 1: MD_TEXTUAL@200..201 + 0: MD_TEXTUAL_LITERAL@200..201 "!" [] [] + 2: MD_TEXTUAL@201..202 + 0: MD_TEXTUAL_LITERAL@201..202 "[" [] [] + 3: MD_TEXTUAL@202..207 + 0: MD_TEXTUAL_LITERAL@202..207 "CDATA" [] [] + 4: MD_TEXTUAL@207..208 + 0: MD_TEXTUAL_LITERAL@207..208 "[" [] [] + 5: MD_TEXTUAL@208..212 + 0: MD_TEXTUAL_LITERAL@208..212 "text" [] [] + 6: MD_TEXTUAL@212..213 + 0: MD_TEXTUAL_LITERAL@212..213 "]" [] [] + 7: MD_TEXTUAL@213..214 + 0: MD_TEXTUAL_LITERAL@213..214 "]" [] [] + 8: MD_TEXTUAL@214..215 + 0: MD_TEXTUAL_LITERAL@214..215 ">" [] [] + 2: MD_TEXTUAL@215..221 + 0: MD_TEXTUAL_LITERAL@215..221 " here." [] [] + 3: MD_TEXTUAL@221..222 + 0: MD_TEXTUAL_LITERAL@221..222 "\n" [] [] + 1: (empty) + 2: EOF@222..222 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_html_edge_cases.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_html_edge_cases.md new file mode 100644 index 000000000000..2ece6b29a323 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_html_edge_cases.md @@ -0,0 +1,63 @@ +# Inline HTML Edge Cases + +## Basic Open Tags +Simple tag here. +With attrs
    content
    end. + +## Self-Closing Tags +Line break:
    here. +With space:
    there. +Input: field. + +## Closing Tags +Open bold text. +Nested double tags. + +## Comments +Simple inline. +Empty comment. +With dashes here. +Leading dash allowed. + +## Processing Instructions +XML: present. +PHP: code. + +## CDATA Sections +Data: here. +Special: &"]]> chars. + +## Declarations +Standard: declaration. +Lowercase: declaration. +Extended: test. + +## Attributes with Quotes +Single:
    text
    end. +Double:
    text
    end. +Both:
    text
    end. + +## Attributes with Special Chars +Spaces:
    text
    end. +Multiple:
    text
    end. +Unquoted:
    text
    end. +Underscore/colon:
    text
    end. +Boolean:
    text
    end. + +## Newline Cases (should parse as inline HTML) +Allowed:
    ok
    tag. +Allowed:
    ok
    tag. + +## Priority - Autolinks Should Win +URL: link. +Email: address. + +## Tag Names with Hyphens +Custom: content element. +Multiple: test tag. + +## Empty Tags +Empty open:
    tags. +Self close:
    break. diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_html_edge_cases.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_html_edge_cases.md.snap new file mode 100644 index 000000000000..3ccde7873506 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_html_edge_cases.md.snap @@ -0,0 +1,2834 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +assertion_line: 131 +expression: snapshot +--- +## Input + +``` +# Inline HTML Edge Cases + +## Basic Open Tags +Simple tag here. +With attrs
    content
    end. + +## Self-Closing Tags +Line break:
    here. +With space:
    there. +Input: field. + +## Closing Tags +Open bold text. +Nested double tags. + +## Comments +Simple inline. +Empty comment. +With dashes here. +Leading dash allowed. + +## Processing Instructions +XML: present. +PHP: code. + +## CDATA Sections +Data: here. +Special: &"]]> chars. + +## Declarations +Standard: declaration. +Lowercase: declaration. +Extended: test. + +## Attributes with Quotes +Single:
    text
    end. +Double:
    text
    end. +Both:
    text
    end. + +## Attributes with Special Chars +Spaces:
    text
    end. +Multiple:
    text
    end. +Unquoted:
    text
    end. +Underscore/colon:
    text
    end. +Boolean:
    text
    end. + +## Newline Cases (should parse as inline HTML) +Allowed:
    ok
    tag. +Allowed:
    ok
    tag. + +## Priority - Autolinks Should Win +URL: link. +Email: address. + +## Tag Names with Hyphens +Custom: content element. +Multiple: test tag. + +## Empty Tags +Empty open:
    tags. +Self close:
    break. + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@0..1 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1..24 " Inline HTML Edge Cases" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [], + }, + MdNewline { + value_token: NEWLINE@24..25 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@25..26 "\n" [] [], + }, + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@26..27 "#" [] [], + }, + MdHash { + hash_token: HASH@27..28 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@28..44 " Basic Open Tags" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [], + }, + MdNewline { + value_token: NEWLINE@44..45 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@45..52 "Simple " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@52..53 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@53..57 "span" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@57..58 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@58..61 "tag" [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@61..62 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@62..67 "/span" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@67..68 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@68..74 " here." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@74..75 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@75..86 "With attrs " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@86..87 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@87..103 "div class=\"test\"" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@103..104 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@104..111 "content" [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@111..112 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@112..116 "/div" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@116..117 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@117..122 " end." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@122..123 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@123..124 "\n" [] [], + }, + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@124..125 "#" [] [], + }, + MdHash { + hash_token: HASH@125..126 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@126..131 " Self" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@131..132 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@132..144 "Closing Tags" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [], + }, + MdNewline { + value_token: NEWLINE@144..145 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@145..157 "Line break: " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@157..158 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@158..161 "br/" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@161..162 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@162..168 " here." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@168..169 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@169..181 "With space: " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@181..182 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@182..186 "br /" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@186..187 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@187..194 " there." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@194..195 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@195..202 "Input: " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@202..203 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@203..222 "input type=\"text\" /" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@222..223 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@223..230 " field." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@230..231 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@231..232 "\n" [] [], + }, + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@232..233 "#" [] [], + }, + MdHash { + hash_token: HASH@233..234 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@234..247 " Closing Tags" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [], + }, + MdNewline { + value_token: NEWLINE@247..248 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@248..253 "Open " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@253..254 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@254..255 "b" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@255..256 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@256..260 "bold" [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@260..261 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@261..263 "/b" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@263..264 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@264..270 " text." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@270..271 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@271..278 "Nested " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@278..279 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@279..283 "span" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@283..284 ">" [] [], + }, + ], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@284..285 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@285..291 "strong" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@291..292 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@292..298 "double" [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@298..299 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@299..306 "/strong" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@306..307 ">" [] [], + }, + ], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@307..308 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@308..313 "/span" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@313..314 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@314..320 " tags." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@320..321 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@321..322 "\n" [] [], + }, + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@322..323 "#" [] [], + }, + MdHash { + hash_token: HASH@323..324 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@324..333 " Comments" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [], + }, + MdNewline { + value_token: NEWLINE@333..334 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@334..341 "Simple " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@341..342 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@342..343 "!" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@343..344 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@344..345 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@345..354 " comment " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@354..355 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@355..356 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@356..357 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@357..365 " inline." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@365..366 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@366..372 "Empty " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@372..373 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@373..374 "!" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@374..375 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@375..376 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@376..377 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@377..378 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@378..379 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@379..380 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@380..389 " comment." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@389..390 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@390..402 "With dashes " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@402..403 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@403..404 "!" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@404..405 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@405..406 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@406..409 "foo" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@409..410 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@410..413 "bar" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@413..414 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@414..415 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@415..416 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@416..422 " here." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@422..423 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@423..436 "Leading dash " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@436..437 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@437..438 "!" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@438..439 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@439..440 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@440..441 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@441..442 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@442..443 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@443..444 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@444..445 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@445..454 " allowed." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@454..455 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@455..456 "\n" [] [], + }, + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@456..457 "#" [] [], + }, + MdHash { + hash_token: HASH@457..458 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@458..482 " Processing Instructions" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [], + }, + MdNewline { + value_token: NEWLINE@482..483 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@483..488 "XML: " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@488..489 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@489..508 "?xml version=\"1.0\"?" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@508..509 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@509..518 " present." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@518..519 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@519..524 "PHP: " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@524..525 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@525..544 "?php echo \"test\"; ?" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@544..545 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@545..551 " code." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@551..552 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@552..553 "\n" [] [], + }, + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@553..554 "#" [] [], + }, + MdHash { + hash_token: HASH@554..555 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@555..570 " CDATA Sections" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [], + }, + MdNewline { + value_token: NEWLINE@570..571 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@571..577 "Data: " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@577..578 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@578..579 "!" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@579..580 "[" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@580..585 "CDATA" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@585..586 "[" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@586..595 "some text" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@595..596 "]" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@596..597 "]" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@597..598 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@598..604 " here." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@604..605 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@605..614 "Special: " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@614..615 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@615..616 "!" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@616..617 "[" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@617..622 "CDATA" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@622..623 "[" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@623..624 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@624..625 ">" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@625..627 "&\"" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@627..628 "]" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@628..629 "]" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@629..630 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@630..637 " chars." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@637..638 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@638..639 "\n" [] [], + }, + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@639..640 "#" [] [], + }, + MdHash { + hash_token: HASH@640..641 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@641..654 " Declarations" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [], + }, + MdNewline { + value_token: NEWLINE@654..655 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@655..665 "Standard: " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@665..666 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@666..667 "!" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@667..679 "DOCTYPE html" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@679..680 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@680..693 " declaration." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@693..694 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@694..705 "Lowercase: " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@705..706 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@706..707 "!" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@707..719 "doctype html" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@719..720 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@720..733 " declaration." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@733..734 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@734..744 "Extended: " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@744..745 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@745..746 "!" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@746..767 "DOCTYPE HTML PUBLIC \"" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@767..768 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@768..793 "//W3C//DTD HTML 4.01//EN\"" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@793..794 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@794..800 " test." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@800..801 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@801..802 "\n" [] [], + }, + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@802..803 "#" [] [], + }, + MdHash { + hash_token: HASH@803..804 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@804..827 " Attributes with Quotes" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [], + }, + MdNewline { + value_token: NEWLINE@827..828 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@828..836 "Single: " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@836..837 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@837..855 "div class='quoted'" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@855..856 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@856..860 "text" [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@860..861 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@861..865 "/div" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@865..866 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@866..871 " end." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@871..872 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@872..880 "Double: " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@880..881 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@881..899 "div class=\"quoted\"" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@899..900 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@900..904 "text" [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@904..905 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@905..909 "/div" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@909..910 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@910..915 " end." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@915..916 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@916..922 "Both: " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@922..923 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@923..951 "div class=\"outer\" id='inner'" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@951..952 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@952..956 "text" [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@956..957 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@957..961 "/div" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@961..962 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@962..967 " end." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@967..968 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@968..969 "\n" [] [], + }, + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@969..970 "#" [] [], + }, + MdHash { + hash_token: HASH@970..971 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@971..1001 " Attributes with Special Chars" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [], + }, + MdNewline { + value_token: NEWLINE@1001..1002 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1002..1010 "Spaces: " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1010..1011 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1011..1019 "div data" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1019..1020 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1020..1039 "value=\"with spaces\"" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1039..1040 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1040..1044 "text" [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1044..1045 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1045..1049 "/div" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1049..1050 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1050..1055 " end." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1055..1056 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1056..1066 "Multiple: " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1066..1067 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1067..1092 "div class=\"a\" id=\"b\" data" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1092..1093 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1093..1098 "x=\"c\"" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1098..1099 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1099..1103 "text" [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1103..1104 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1104..1108 "/div" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1108..1109 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1109..1114 " end." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1114..1115 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1115..1125 "Unquoted: " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1125..1126 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1126..1134 "div data" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1134..1135 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1135..1145 "x=foo data" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1145..1146 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1146..1151 "y=bar" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1151..1152 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1152..1155 "baz" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1155..1156 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1156..1160 "text" [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1160..1161 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1161..1165 "/div" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1165..1166 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1166..1171 " end." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1171..1172 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1172..1190 "Underscore/colon: " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1190..1191 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1191..1195 "div " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1195..1196 "_" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1196..1205 "x=1 x:y=2" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1205..1206 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1206..1210 "text" [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1210..1211 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1211..1215 "/div" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1215..1216 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1216..1221 " end." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1221..1222 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1222..1231 "Boolean: " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1231..1232 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1232..1244 "div disabled" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1244..1245 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1245..1249 "text" [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1249..1250 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1250..1254 "/div" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1254..1255 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1255..1260 " end." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1260..1261 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@1261..1262 "\n" [] [], + }, + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@1262..1263 "#" [] [], + }, + MdHash { + hash_token: HASH@1263..1264 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1264..1279 " Newline Cases " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1279..1280 "(" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1280..1307 "should parse as inline HTML" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1307..1308 ")" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [], + }, + MdNewline { + value_token: NEWLINE@1308..1309 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1309..1318 "Allowed: " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1318..1319 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1319..1322 "div" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1322..1323 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1323..1335 "class=\"test\"" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1335..1336 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1336..1338 "ok" [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1338..1339 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1339..1343 "/div" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1343..1344 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1344..1349 " tag." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1349..1350 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1350..1359 "Allowed: " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1359..1360 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1360..1373 "div class=\"a\"" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1373..1374 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1374..1375 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1375..1377 "ok" [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1377..1378 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1378..1382 "/div" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1382..1383 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1383..1388 " tag." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1388..1389 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@1389..1390 "\n" [] [], + }, + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@1390..1391 "#" [] [], + }, + MdHash { + hash_token: HASH@1391..1392 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1392..1402 " Priority " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1402..1403 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1403..1424 " Autolinks Should Win" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [], + }, + MdNewline { + value_token: NEWLINE@1424..1425 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1425..1430 "URL: " [] [], + }, + MdAutolink { + l_angle_token: L_ANGLE@1430..1431 "<" [] [], + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1431..1450 "https://example.com" [] [], + }, + ], + r_angle_token: R_ANGLE@1450..1451 ">" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1451..1457 " link." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1457..1458 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1458..1465 "Email: " [] [], + }, + MdAutolink { + l_angle_token: L_ANGLE@1465..1466 "<" [] [], + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1466..1482 "user@example.com" [] [], + }, + ], + r_angle_token: R_ANGLE@1482..1483 ">" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1483..1492 " address." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1492..1493 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@1493..1494 "\n" [] [], + }, + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@1494..1495 "#" [] [], + }, + MdHash { + hash_token: HASH@1495..1496 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1496..1519 " Tag Names with Hyphens" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [], + }, + MdNewline { + value_token: NEWLINE@1519..1520 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1520..1528 "Custom: " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1528..1529 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1529..1531 "my" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1531..1532 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1532..1541 "component" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1541..1542 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1542..1549 "content" [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1549..1550 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1550..1553 "/my" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1553..1554 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1554..1563 "component" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1563..1564 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1564..1573 " element." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1573..1574 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1574..1584 "Multiple: " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1584..1585 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1585..1587 "my" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1587..1588 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1588..1594 "custom" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1594..1595 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1595..1602 "element" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1602..1603 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1603..1607 "test" [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1607..1608 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1608..1611 "/my" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1611..1612 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1612..1618 "custom" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1618..1619 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1619..1626 "element" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1626..1627 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1627..1632 " tag." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1632..1633 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@1633..1634 "\n" [] [], + }, + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@1634..1635 "#" [] [], + }, + MdHash { + hash_token: HASH@1635..1636 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1636..1647 " Empty Tags" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [], + }, + MdNewline { + value_token: NEWLINE@1647..1648 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1648..1660 "Empty open: " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1660..1661 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1661..1664 "div" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1664..1665 ">" [] [], + }, + ], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1665..1666 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1666..1670 "/div" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1670..1671 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1671..1677 " tags." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1677..1678 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1678..1690 "Self close: " [] [], + }, + MdInlineHtml { + value: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1690..1691 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1691..1694 "br/" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1694..1695 ">" [] [], + }, + ], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1695..1702 " break." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1702..1703 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + eof_token: EOF@1703..1703 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..1703 + 0: (empty) + 1: MD_BLOCK_LIST@0..1703 + 0: MD_HEADER@0..24 + 0: MD_HASH_LIST@0..1 + 0: MD_HASH@0..1 + 0: HASH@0..1 "#" [] [] + 1: MD_PARAGRAPH@1..24 + 0: MD_INLINE_ITEM_LIST@1..24 + 0: MD_TEXTUAL@1..24 + 0: MD_TEXTUAL_LITERAL@1..24 " Inline HTML Edge Cases" [] [] + 1: (empty) + 2: MD_HASH_LIST@24..24 + 1: MD_NEWLINE@24..25 + 0: NEWLINE@24..25 "\n" [] [] + 2: MD_NEWLINE@25..26 + 0: NEWLINE@25..26 "\n" [] [] + 3: MD_HEADER@26..44 + 0: MD_HASH_LIST@26..28 + 0: MD_HASH@26..27 + 0: HASH@26..27 "#" [] [] + 1: MD_HASH@27..28 + 0: HASH@27..28 "#" [] [] + 1: MD_PARAGRAPH@28..44 + 0: MD_INLINE_ITEM_LIST@28..44 + 0: MD_TEXTUAL@28..44 + 0: MD_TEXTUAL_LITERAL@28..44 " Basic Open Tags" [] [] + 1: (empty) + 2: MD_HASH_LIST@44..44 + 4: MD_NEWLINE@44..45 + 0: NEWLINE@44..45 "\n" [] [] + 5: MD_PARAGRAPH@45..123 + 0: MD_INLINE_ITEM_LIST@45..123 + 0: MD_TEXTUAL@45..52 + 0: MD_TEXTUAL_LITERAL@45..52 "Simple " [] [] + 1: MD_INLINE_HTML@52..58 + 0: MD_INLINE_ITEM_LIST@52..58 + 0: MD_TEXTUAL@52..53 + 0: MD_TEXTUAL_LITERAL@52..53 "<" [] [] + 1: MD_TEXTUAL@53..57 + 0: MD_TEXTUAL_LITERAL@53..57 "span" [] [] + 2: MD_TEXTUAL@57..58 + 0: MD_TEXTUAL_LITERAL@57..58 ">" [] [] + 2: MD_TEXTUAL@58..61 + 0: MD_TEXTUAL_LITERAL@58..61 "tag" [] [] + 3: MD_INLINE_HTML@61..68 + 0: MD_INLINE_ITEM_LIST@61..68 + 0: MD_TEXTUAL@61..62 + 0: MD_TEXTUAL_LITERAL@61..62 "<" [] [] + 1: MD_TEXTUAL@62..67 + 0: MD_TEXTUAL_LITERAL@62..67 "/span" [] [] + 2: MD_TEXTUAL@67..68 + 0: MD_TEXTUAL_LITERAL@67..68 ">" [] [] + 4: MD_TEXTUAL@68..74 + 0: MD_TEXTUAL_LITERAL@68..74 " here." [] [] + 5: MD_TEXTUAL@74..75 + 0: MD_TEXTUAL_LITERAL@74..75 "\n" [] [] + 6: MD_TEXTUAL@75..86 + 0: MD_TEXTUAL_LITERAL@75..86 "With attrs " [] [] + 7: MD_INLINE_HTML@86..104 + 0: MD_INLINE_ITEM_LIST@86..104 + 0: MD_TEXTUAL@86..87 + 0: MD_TEXTUAL_LITERAL@86..87 "<" [] [] + 1: MD_TEXTUAL@87..103 + 0: MD_TEXTUAL_LITERAL@87..103 "div class=\"test\"" [] [] + 2: MD_TEXTUAL@103..104 + 0: MD_TEXTUAL_LITERAL@103..104 ">" [] [] + 8: MD_TEXTUAL@104..111 + 0: MD_TEXTUAL_LITERAL@104..111 "content" [] [] + 9: MD_INLINE_HTML@111..117 + 0: MD_INLINE_ITEM_LIST@111..117 + 0: MD_TEXTUAL@111..112 + 0: MD_TEXTUAL_LITERAL@111..112 "<" [] [] + 1: MD_TEXTUAL@112..116 + 0: MD_TEXTUAL_LITERAL@112..116 "/div" [] [] + 2: MD_TEXTUAL@116..117 + 0: MD_TEXTUAL_LITERAL@116..117 ">" [] [] + 10: MD_TEXTUAL@117..122 + 0: MD_TEXTUAL_LITERAL@117..122 " end." [] [] + 11: MD_TEXTUAL@122..123 + 0: MD_TEXTUAL_LITERAL@122..123 "\n" [] [] + 1: (empty) + 6: MD_NEWLINE@123..124 + 0: NEWLINE@123..124 "\n" [] [] + 7: MD_HEADER@124..144 + 0: MD_HASH_LIST@124..126 + 0: MD_HASH@124..125 + 0: HASH@124..125 "#" [] [] + 1: MD_HASH@125..126 + 0: HASH@125..126 "#" [] [] + 1: MD_PARAGRAPH@126..144 + 0: MD_INLINE_ITEM_LIST@126..144 + 0: MD_TEXTUAL@126..131 + 0: MD_TEXTUAL_LITERAL@126..131 " Self" [] [] + 1: MD_TEXTUAL@131..132 + 0: MD_TEXTUAL_LITERAL@131..132 "-" [] [] + 2: MD_TEXTUAL@132..144 + 0: MD_TEXTUAL_LITERAL@132..144 "Closing Tags" [] [] + 1: (empty) + 2: MD_HASH_LIST@144..144 + 8: MD_NEWLINE@144..145 + 0: NEWLINE@144..145 "\n" [] [] + 9: MD_PARAGRAPH@145..231 + 0: MD_INLINE_ITEM_LIST@145..231 + 0: MD_TEXTUAL@145..157 + 0: MD_TEXTUAL_LITERAL@145..157 "Line break: " [] [] + 1: MD_INLINE_HTML@157..162 + 0: MD_INLINE_ITEM_LIST@157..162 + 0: MD_TEXTUAL@157..158 + 0: MD_TEXTUAL_LITERAL@157..158 "<" [] [] + 1: MD_TEXTUAL@158..161 + 0: MD_TEXTUAL_LITERAL@158..161 "br/" [] [] + 2: MD_TEXTUAL@161..162 + 0: MD_TEXTUAL_LITERAL@161..162 ">" [] [] + 2: MD_TEXTUAL@162..168 + 0: MD_TEXTUAL_LITERAL@162..168 " here." [] [] + 3: MD_TEXTUAL@168..169 + 0: MD_TEXTUAL_LITERAL@168..169 "\n" [] [] + 4: MD_TEXTUAL@169..181 + 0: MD_TEXTUAL_LITERAL@169..181 "With space: " [] [] + 5: MD_INLINE_HTML@181..187 + 0: MD_INLINE_ITEM_LIST@181..187 + 0: MD_TEXTUAL@181..182 + 0: MD_TEXTUAL_LITERAL@181..182 "<" [] [] + 1: MD_TEXTUAL@182..186 + 0: MD_TEXTUAL_LITERAL@182..186 "br /" [] [] + 2: MD_TEXTUAL@186..187 + 0: MD_TEXTUAL_LITERAL@186..187 ">" [] [] + 6: MD_TEXTUAL@187..194 + 0: MD_TEXTUAL_LITERAL@187..194 " there." [] [] + 7: MD_TEXTUAL@194..195 + 0: MD_TEXTUAL_LITERAL@194..195 "\n" [] [] + 8: MD_TEXTUAL@195..202 + 0: MD_TEXTUAL_LITERAL@195..202 "Input: " [] [] + 9: MD_INLINE_HTML@202..223 + 0: MD_INLINE_ITEM_LIST@202..223 + 0: MD_TEXTUAL@202..203 + 0: MD_TEXTUAL_LITERAL@202..203 "<" [] [] + 1: MD_TEXTUAL@203..222 + 0: MD_TEXTUAL_LITERAL@203..222 "input type=\"text\" /" [] [] + 2: MD_TEXTUAL@222..223 + 0: MD_TEXTUAL_LITERAL@222..223 ">" [] [] + 10: MD_TEXTUAL@223..230 + 0: MD_TEXTUAL_LITERAL@223..230 " field." [] [] + 11: MD_TEXTUAL@230..231 + 0: MD_TEXTUAL_LITERAL@230..231 "\n" [] [] + 1: (empty) + 10: MD_NEWLINE@231..232 + 0: NEWLINE@231..232 "\n" [] [] + 11: MD_HEADER@232..247 + 0: MD_HASH_LIST@232..234 + 0: MD_HASH@232..233 + 0: HASH@232..233 "#" [] [] + 1: MD_HASH@233..234 + 0: HASH@233..234 "#" [] [] + 1: MD_PARAGRAPH@234..247 + 0: MD_INLINE_ITEM_LIST@234..247 + 0: MD_TEXTUAL@234..247 + 0: MD_TEXTUAL_LITERAL@234..247 " Closing Tags" [] [] + 1: (empty) + 2: MD_HASH_LIST@247..247 + 12: MD_NEWLINE@247..248 + 0: NEWLINE@247..248 "\n" [] [] + 13: MD_PARAGRAPH@248..321 + 0: MD_INLINE_ITEM_LIST@248..321 + 0: MD_TEXTUAL@248..253 + 0: MD_TEXTUAL_LITERAL@248..253 "Open " [] [] + 1: MD_INLINE_HTML@253..256 + 0: MD_INLINE_ITEM_LIST@253..256 + 0: MD_TEXTUAL@253..254 + 0: MD_TEXTUAL_LITERAL@253..254 "<" [] [] + 1: MD_TEXTUAL@254..255 + 0: MD_TEXTUAL_LITERAL@254..255 "b" [] [] + 2: MD_TEXTUAL@255..256 + 0: MD_TEXTUAL_LITERAL@255..256 ">" [] [] + 2: MD_TEXTUAL@256..260 + 0: MD_TEXTUAL_LITERAL@256..260 "bold" [] [] + 3: MD_INLINE_HTML@260..264 + 0: MD_INLINE_ITEM_LIST@260..264 + 0: MD_TEXTUAL@260..261 + 0: MD_TEXTUAL_LITERAL@260..261 "<" [] [] + 1: MD_TEXTUAL@261..263 + 0: MD_TEXTUAL_LITERAL@261..263 "/b" [] [] + 2: MD_TEXTUAL@263..264 + 0: MD_TEXTUAL_LITERAL@263..264 ">" [] [] + 4: MD_TEXTUAL@264..270 + 0: MD_TEXTUAL_LITERAL@264..270 " text." [] [] + 5: MD_TEXTUAL@270..271 + 0: MD_TEXTUAL_LITERAL@270..271 "\n" [] [] + 6: MD_TEXTUAL@271..278 + 0: MD_TEXTUAL_LITERAL@271..278 "Nested " [] [] + 7: MD_INLINE_HTML@278..284 + 0: MD_INLINE_ITEM_LIST@278..284 + 0: MD_TEXTUAL@278..279 + 0: MD_TEXTUAL_LITERAL@278..279 "<" [] [] + 1: MD_TEXTUAL@279..283 + 0: MD_TEXTUAL_LITERAL@279..283 "span" [] [] + 2: MD_TEXTUAL@283..284 + 0: MD_TEXTUAL_LITERAL@283..284 ">" [] [] + 8: MD_INLINE_HTML@284..292 + 0: MD_INLINE_ITEM_LIST@284..292 + 0: MD_TEXTUAL@284..285 + 0: MD_TEXTUAL_LITERAL@284..285 "<" [] [] + 1: MD_TEXTUAL@285..291 + 0: MD_TEXTUAL_LITERAL@285..291 "strong" [] [] + 2: MD_TEXTUAL@291..292 + 0: MD_TEXTUAL_LITERAL@291..292 ">" [] [] + 9: MD_TEXTUAL@292..298 + 0: MD_TEXTUAL_LITERAL@292..298 "double" [] [] + 10: MD_INLINE_HTML@298..307 + 0: MD_INLINE_ITEM_LIST@298..307 + 0: MD_TEXTUAL@298..299 + 0: MD_TEXTUAL_LITERAL@298..299 "<" [] [] + 1: MD_TEXTUAL@299..306 + 0: MD_TEXTUAL_LITERAL@299..306 "/strong" [] [] + 2: MD_TEXTUAL@306..307 + 0: MD_TEXTUAL_LITERAL@306..307 ">" [] [] + 11: MD_INLINE_HTML@307..314 + 0: MD_INLINE_ITEM_LIST@307..314 + 0: MD_TEXTUAL@307..308 + 0: MD_TEXTUAL_LITERAL@307..308 "<" [] [] + 1: MD_TEXTUAL@308..313 + 0: MD_TEXTUAL_LITERAL@308..313 "/span" [] [] + 2: MD_TEXTUAL@313..314 + 0: MD_TEXTUAL_LITERAL@313..314 ">" [] [] + 12: MD_TEXTUAL@314..320 + 0: MD_TEXTUAL_LITERAL@314..320 " tags." [] [] + 13: MD_TEXTUAL@320..321 + 0: MD_TEXTUAL_LITERAL@320..321 "\n" [] [] + 1: (empty) + 14: MD_NEWLINE@321..322 + 0: NEWLINE@321..322 "\n" [] [] + 15: MD_HEADER@322..333 + 0: MD_HASH_LIST@322..324 + 0: MD_HASH@322..323 + 0: HASH@322..323 "#" [] [] + 1: MD_HASH@323..324 + 0: HASH@323..324 "#" [] [] + 1: MD_PARAGRAPH@324..333 + 0: MD_INLINE_ITEM_LIST@324..333 + 0: MD_TEXTUAL@324..333 + 0: MD_TEXTUAL_LITERAL@324..333 " Comments" [] [] + 1: (empty) + 2: MD_HASH_LIST@333..333 + 16: MD_NEWLINE@333..334 + 0: NEWLINE@333..334 "\n" [] [] + 17: MD_PARAGRAPH@334..455 + 0: MD_INLINE_ITEM_LIST@334..455 + 0: MD_TEXTUAL@334..341 + 0: MD_TEXTUAL_LITERAL@334..341 "Simple " [] [] + 1: MD_INLINE_HTML@341..357 + 0: MD_INLINE_ITEM_LIST@341..357 + 0: MD_TEXTUAL@341..342 + 0: MD_TEXTUAL_LITERAL@341..342 "<" [] [] + 1: MD_TEXTUAL@342..343 + 0: MD_TEXTUAL_LITERAL@342..343 "!" [] [] + 2: MD_TEXTUAL@343..344 + 0: MD_TEXTUAL_LITERAL@343..344 "-" [] [] + 3: MD_TEXTUAL@344..345 + 0: MD_TEXTUAL_LITERAL@344..345 "-" [] [] + 4: MD_TEXTUAL@345..354 + 0: MD_TEXTUAL_LITERAL@345..354 " comment " [] [] + 5: MD_TEXTUAL@354..355 + 0: MD_TEXTUAL_LITERAL@354..355 "-" [] [] + 6: MD_TEXTUAL@355..356 + 0: MD_TEXTUAL_LITERAL@355..356 "-" [] [] + 7: MD_TEXTUAL@356..357 + 0: MD_TEXTUAL_LITERAL@356..357 ">" [] [] + 2: MD_TEXTUAL@357..365 + 0: MD_TEXTUAL_LITERAL@357..365 " inline." [] [] + 3: MD_TEXTUAL@365..366 + 0: MD_TEXTUAL_LITERAL@365..366 "\n" [] [] + 4: MD_TEXTUAL@366..372 + 0: MD_TEXTUAL_LITERAL@366..372 "Empty " [] [] + 5: MD_INLINE_HTML@372..380 + 0: MD_INLINE_ITEM_LIST@372..380 + 0: MD_TEXTUAL@372..373 + 0: MD_TEXTUAL_LITERAL@372..373 "<" [] [] + 1: MD_TEXTUAL@373..374 + 0: MD_TEXTUAL_LITERAL@373..374 "!" [] [] + 2: MD_TEXTUAL@374..375 + 0: MD_TEXTUAL_LITERAL@374..375 "-" [] [] + 3: MD_TEXTUAL@375..376 + 0: MD_TEXTUAL_LITERAL@375..376 "-" [] [] + 4: MD_TEXTUAL@376..377 + 0: MD_TEXTUAL_LITERAL@376..377 " " [] [] + 5: MD_TEXTUAL@377..378 + 0: MD_TEXTUAL_LITERAL@377..378 "-" [] [] + 6: MD_TEXTUAL@378..379 + 0: MD_TEXTUAL_LITERAL@378..379 "-" [] [] + 7: MD_TEXTUAL@379..380 + 0: MD_TEXTUAL_LITERAL@379..380 ">" [] [] + 6: MD_TEXTUAL@380..389 + 0: MD_TEXTUAL_LITERAL@380..389 " comment." [] [] + 7: MD_TEXTUAL@389..390 + 0: MD_TEXTUAL_LITERAL@389..390 "\n" [] [] + 8: MD_TEXTUAL@390..402 + 0: MD_TEXTUAL_LITERAL@390..402 "With dashes " [] [] + 9: MD_INLINE_HTML@402..416 + 0: MD_INLINE_ITEM_LIST@402..416 + 0: MD_TEXTUAL@402..403 + 0: MD_TEXTUAL_LITERAL@402..403 "<" [] [] + 1: MD_TEXTUAL@403..404 + 0: MD_TEXTUAL_LITERAL@403..404 "!" [] [] + 2: MD_TEXTUAL@404..405 + 0: MD_TEXTUAL_LITERAL@404..405 "-" [] [] + 3: MD_TEXTUAL@405..406 + 0: MD_TEXTUAL_LITERAL@405..406 "-" [] [] + 4: MD_TEXTUAL@406..409 + 0: MD_TEXTUAL_LITERAL@406..409 "foo" [] [] + 5: MD_TEXTUAL@409..410 + 0: MD_TEXTUAL_LITERAL@409..410 "-" [] [] + 6: MD_TEXTUAL@410..413 + 0: MD_TEXTUAL_LITERAL@410..413 "bar" [] [] + 7: MD_TEXTUAL@413..414 + 0: MD_TEXTUAL_LITERAL@413..414 "-" [] [] + 8: MD_TEXTUAL@414..415 + 0: MD_TEXTUAL_LITERAL@414..415 "-" [] [] + 9: MD_TEXTUAL@415..416 + 0: MD_TEXTUAL_LITERAL@415..416 ">" [] [] + 10: MD_TEXTUAL@416..422 + 0: MD_TEXTUAL_LITERAL@416..422 " here." [] [] + 11: MD_TEXTUAL@422..423 + 0: MD_TEXTUAL_LITERAL@422..423 "\n" [] [] + 12: MD_TEXTUAL@423..436 + 0: MD_TEXTUAL_LITERAL@423..436 "Leading dash " [] [] + 13: MD_INLINE_HTML@436..445 + 0: MD_INLINE_ITEM_LIST@436..445 + 0: MD_TEXTUAL@436..437 + 0: MD_TEXTUAL_LITERAL@436..437 "<" [] [] + 1: MD_TEXTUAL@437..438 + 0: MD_TEXTUAL_LITERAL@437..438 "!" [] [] + 2: MD_TEXTUAL@438..439 + 0: MD_TEXTUAL_LITERAL@438..439 "-" [] [] + 3: MD_TEXTUAL@439..440 + 0: MD_TEXTUAL_LITERAL@439..440 "-" [] [] + 4: MD_TEXTUAL@440..441 + 0: MD_TEXTUAL_LITERAL@440..441 "-" [] [] + 5: MD_TEXTUAL@441..442 + 0: MD_TEXTUAL_LITERAL@441..442 " " [] [] + 6: MD_TEXTUAL@442..443 + 0: MD_TEXTUAL_LITERAL@442..443 "-" [] [] + 7: MD_TEXTUAL@443..444 + 0: MD_TEXTUAL_LITERAL@443..444 "-" [] [] + 8: MD_TEXTUAL@444..445 + 0: MD_TEXTUAL_LITERAL@444..445 ">" [] [] + 14: MD_TEXTUAL@445..454 + 0: MD_TEXTUAL_LITERAL@445..454 " allowed." [] [] + 15: MD_TEXTUAL@454..455 + 0: MD_TEXTUAL_LITERAL@454..455 "\n" [] [] + 1: (empty) + 18: MD_NEWLINE@455..456 + 0: NEWLINE@455..456 "\n" [] [] + 19: MD_HEADER@456..482 + 0: MD_HASH_LIST@456..458 + 0: MD_HASH@456..457 + 0: HASH@456..457 "#" [] [] + 1: MD_HASH@457..458 + 0: HASH@457..458 "#" [] [] + 1: MD_PARAGRAPH@458..482 + 0: MD_INLINE_ITEM_LIST@458..482 + 0: MD_TEXTUAL@458..482 + 0: MD_TEXTUAL_LITERAL@458..482 " Processing Instructions" [] [] + 1: (empty) + 2: MD_HASH_LIST@482..482 + 20: MD_NEWLINE@482..483 + 0: NEWLINE@482..483 "\n" [] [] + 21: MD_PARAGRAPH@483..552 + 0: MD_INLINE_ITEM_LIST@483..552 + 0: MD_TEXTUAL@483..488 + 0: MD_TEXTUAL_LITERAL@483..488 "XML: " [] [] + 1: MD_INLINE_HTML@488..509 + 0: MD_INLINE_ITEM_LIST@488..509 + 0: MD_TEXTUAL@488..489 + 0: MD_TEXTUAL_LITERAL@488..489 "<" [] [] + 1: MD_TEXTUAL@489..508 + 0: MD_TEXTUAL_LITERAL@489..508 "?xml version=\"1.0\"?" [] [] + 2: MD_TEXTUAL@508..509 + 0: MD_TEXTUAL_LITERAL@508..509 ">" [] [] + 2: MD_TEXTUAL@509..518 + 0: MD_TEXTUAL_LITERAL@509..518 " present." [] [] + 3: MD_TEXTUAL@518..519 + 0: MD_TEXTUAL_LITERAL@518..519 "\n" [] [] + 4: MD_TEXTUAL@519..524 + 0: MD_TEXTUAL_LITERAL@519..524 "PHP: " [] [] + 5: MD_INLINE_HTML@524..545 + 0: MD_INLINE_ITEM_LIST@524..545 + 0: MD_TEXTUAL@524..525 + 0: MD_TEXTUAL_LITERAL@524..525 "<" [] [] + 1: MD_TEXTUAL@525..544 + 0: MD_TEXTUAL_LITERAL@525..544 "?php echo \"test\"; ?" [] [] + 2: MD_TEXTUAL@544..545 + 0: MD_TEXTUAL_LITERAL@544..545 ">" [] [] + 6: MD_TEXTUAL@545..551 + 0: MD_TEXTUAL_LITERAL@545..551 " code." [] [] + 7: MD_TEXTUAL@551..552 + 0: MD_TEXTUAL_LITERAL@551..552 "\n" [] [] + 1: (empty) + 22: MD_NEWLINE@552..553 + 0: NEWLINE@552..553 "\n" [] [] + 23: MD_HEADER@553..570 + 0: MD_HASH_LIST@553..555 + 0: MD_HASH@553..554 + 0: HASH@553..554 "#" [] [] + 1: MD_HASH@554..555 + 0: HASH@554..555 "#" [] [] + 1: MD_PARAGRAPH@555..570 + 0: MD_INLINE_ITEM_LIST@555..570 + 0: MD_TEXTUAL@555..570 + 0: MD_TEXTUAL_LITERAL@555..570 " CDATA Sections" [] [] + 1: (empty) + 2: MD_HASH_LIST@570..570 + 24: MD_NEWLINE@570..571 + 0: NEWLINE@570..571 "\n" [] [] + 25: MD_PARAGRAPH@571..638 + 0: MD_INLINE_ITEM_LIST@571..638 + 0: MD_TEXTUAL@571..577 + 0: MD_TEXTUAL_LITERAL@571..577 "Data: " [] [] + 1: MD_INLINE_HTML@577..598 + 0: MD_INLINE_ITEM_LIST@577..598 + 0: MD_TEXTUAL@577..578 + 0: MD_TEXTUAL_LITERAL@577..578 "<" [] [] + 1: MD_TEXTUAL@578..579 + 0: MD_TEXTUAL_LITERAL@578..579 "!" [] [] + 2: MD_TEXTUAL@579..580 + 0: MD_TEXTUAL_LITERAL@579..580 "[" [] [] + 3: MD_TEXTUAL@580..585 + 0: MD_TEXTUAL_LITERAL@580..585 "CDATA" [] [] + 4: MD_TEXTUAL@585..586 + 0: MD_TEXTUAL_LITERAL@585..586 "[" [] [] + 5: MD_TEXTUAL@586..595 + 0: MD_TEXTUAL_LITERAL@586..595 "some text" [] [] + 6: MD_TEXTUAL@595..596 + 0: MD_TEXTUAL_LITERAL@595..596 "]" [] [] + 7: MD_TEXTUAL@596..597 + 0: MD_TEXTUAL_LITERAL@596..597 "]" [] [] + 8: MD_TEXTUAL@597..598 + 0: MD_TEXTUAL_LITERAL@597..598 ">" [] [] + 2: MD_TEXTUAL@598..604 + 0: MD_TEXTUAL_LITERAL@598..604 " here." [] [] + 3: MD_TEXTUAL@604..605 + 0: MD_TEXTUAL_LITERAL@604..605 "\n" [] [] + 4: MD_TEXTUAL@605..614 + 0: MD_TEXTUAL_LITERAL@605..614 "Special: " [] [] + 5: MD_INLINE_HTML@614..630 + 0: MD_INLINE_ITEM_LIST@614..630 + 0: MD_TEXTUAL@614..615 + 0: MD_TEXTUAL_LITERAL@614..615 "<" [] [] + 1: MD_TEXTUAL@615..616 + 0: MD_TEXTUAL_LITERAL@615..616 "!" [] [] + 2: MD_TEXTUAL@616..617 + 0: MD_TEXTUAL_LITERAL@616..617 "[" [] [] + 3: MD_TEXTUAL@617..622 + 0: MD_TEXTUAL_LITERAL@617..622 "CDATA" [] [] + 4: MD_TEXTUAL@622..623 + 0: MD_TEXTUAL_LITERAL@622..623 "[" [] [] + 5: MD_TEXTUAL@623..624 + 0: MD_TEXTUAL_LITERAL@623..624 "<" [] [] + 6: MD_TEXTUAL@624..625 + 0: MD_TEXTUAL_LITERAL@624..625 ">" [] [] + 7: MD_TEXTUAL@625..627 + 0: MD_TEXTUAL_LITERAL@625..627 "&\"" [] [] + 8: MD_TEXTUAL@627..628 + 0: MD_TEXTUAL_LITERAL@627..628 "]" [] [] + 9: MD_TEXTUAL@628..629 + 0: MD_TEXTUAL_LITERAL@628..629 "]" [] [] + 10: MD_TEXTUAL@629..630 + 0: MD_TEXTUAL_LITERAL@629..630 ">" [] [] + 6: MD_TEXTUAL@630..637 + 0: MD_TEXTUAL_LITERAL@630..637 " chars." [] [] + 7: MD_TEXTUAL@637..638 + 0: MD_TEXTUAL_LITERAL@637..638 "\n" [] [] + 1: (empty) + 26: MD_NEWLINE@638..639 + 0: NEWLINE@638..639 "\n" [] [] + 27: MD_HEADER@639..654 + 0: MD_HASH_LIST@639..641 + 0: MD_HASH@639..640 + 0: HASH@639..640 "#" [] [] + 1: MD_HASH@640..641 + 0: HASH@640..641 "#" [] [] + 1: MD_PARAGRAPH@641..654 + 0: MD_INLINE_ITEM_LIST@641..654 + 0: MD_TEXTUAL@641..654 + 0: MD_TEXTUAL_LITERAL@641..654 " Declarations" [] [] + 1: (empty) + 2: MD_HASH_LIST@654..654 + 28: MD_NEWLINE@654..655 + 0: NEWLINE@654..655 "\n" [] [] + 29: MD_PARAGRAPH@655..801 + 0: MD_INLINE_ITEM_LIST@655..801 + 0: MD_TEXTUAL@655..665 + 0: MD_TEXTUAL_LITERAL@655..665 "Standard: " [] [] + 1: MD_INLINE_HTML@665..680 + 0: MD_INLINE_ITEM_LIST@665..680 + 0: MD_TEXTUAL@665..666 + 0: MD_TEXTUAL_LITERAL@665..666 "<" [] [] + 1: MD_TEXTUAL@666..667 + 0: MD_TEXTUAL_LITERAL@666..667 "!" [] [] + 2: MD_TEXTUAL@667..679 + 0: MD_TEXTUAL_LITERAL@667..679 "DOCTYPE html" [] [] + 3: MD_TEXTUAL@679..680 + 0: MD_TEXTUAL_LITERAL@679..680 ">" [] [] + 2: MD_TEXTUAL@680..693 + 0: MD_TEXTUAL_LITERAL@680..693 " declaration." [] [] + 3: MD_TEXTUAL@693..694 + 0: MD_TEXTUAL_LITERAL@693..694 "\n" [] [] + 4: MD_TEXTUAL@694..705 + 0: MD_TEXTUAL_LITERAL@694..705 "Lowercase: " [] [] + 5: MD_INLINE_HTML@705..720 + 0: MD_INLINE_ITEM_LIST@705..720 + 0: MD_TEXTUAL@705..706 + 0: MD_TEXTUAL_LITERAL@705..706 "<" [] [] + 1: MD_TEXTUAL@706..707 + 0: MD_TEXTUAL_LITERAL@706..707 "!" [] [] + 2: MD_TEXTUAL@707..719 + 0: MD_TEXTUAL_LITERAL@707..719 "doctype html" [] [] + 3: MD_TEXTUAL@719..720 + 0: MD_TEXTUAL_LITERAL@719..720 ">" [] [] + 6: MD_TEXTUAL@720..733 + 0: MD_TEXTUAL_LITERAL@720..733 " declaration." [] [] + 7: MD_TEXTUAL@733..734 + 0: MD_TEXTUAL_LITERAL@733..734 "\n" [] [] + 8: MD_TEXTUAL@734..744 + 0: MD_TEXTUAL_LITERAL@734..744 "Extended: " [] [] + 9: MD_INLINE_HTML@744..794 + 0: MD_INLINE_ITEM_LIST@744..794 + 0: MD_TEXTUAL@744..745 + 0: MD_TEXTUAL_LITERAL@744..745 "<" [] [] + 1: MD_TEXTUAL@745..746 + 0: MD_TEXTUAL_LITERAL@745..746 "!" [] [] + 2: MD_TEXTUAL@746..767 + 0: MD_TEXTUAL_LITERAL@746..767 "DOCTYPE HTML PUBLIC \"" [] [] + 3: MD_TEXTUAL@767..768 + 0: MD_TEXTUAL_LITERAL@767..768 "-" [] [] + 4: MD_TEXTUAL@768..793 + 0: MD_TEXTUAL_LITERAL@768..793 "//W3C//DTD HTML 4.01//EN\"" [] [] + 5: MD_TEXTUAL@793..794 + 0: MD_TEXTUAL_LITERAL@793..794 ">" [] [] + 10: MD_TEXTUAL@794..800 + 0: MD_TEXTUAL_LITERAL@794..800 " test." [] [] + 11: MD_TEXTUAL@800..801 + 0: MD_TEXTUAL_LITERAL@800..801 "\n" [] [] + 1: (empty) + 30: MD_NEWLINE@801..802 + 0: NEWLINE@801..802 "\n" [] [] + 31: MD_HEADER@802..827 + 0: MD_HASH_LIST@802..804 + 0: MD_HASH@802..803 + 0: HASH@802..803 "#" [] [] + 1: MD_HASH@803..804 + 0: HASH@803..804 "#" [] [] + 1: MD_PARAGRAPH@804..827 + 0: MD_INLINE_ITEM_LIST@804..827 + 0: MD_TEXTUAL@804..827 + 0: MD_TEXTUAL_LITERAL@804..827 " Attributes with Quotes" [] [] + 1: (empty) + 2: MD_HASH_LIST@827..827 + 32: MD_NEWLINE@827..828 + 0: NEWLINE@827..828 "\n" [] [] + 33: MD_PARAGRAPH@828..968 + 0: MD_INLINE_ITEM_LIST@828..968 + 0: MD_TEXTUAL@828..836 + 0: MD_TEXTUAL_LITERAL@828..836 "Single: " [] [] + 1: MD_INLINE_HTML@836..856 + 0: MD_INLINE_ITEM_LIST@836..856 + 0: MD_TEXTUAL@836..837 + 0: MD_TEXTUAL_LITERAL@836..837 "<" [] [] + 1: MD_TEXTUAL@837..855 + 0: MD_TEXTUAL_LITERAL@837..855 "div class='quoted'" [] [] + 2: MD_TEXTUAL@855..856 + 0: MD_TEXTUAL_LITERAL@855..856 ">" [] [] + 2: MD_TEXTUAL@856..860 + 0: MD_TEXTUAL_LITERAL@856..860 "text" [] [] + 3: MD_INLINE_HTML@860..866 + 0: MD_INLINE_ITEM_LIST@860..866 + 0: MD_TEXTUAL@860..861 + 0: MD_TEXTUAL_LITERAL@860..861 "<" [] [] + 1: MD_TEXTUAL@861..865 + 0: MD_TEXTUAL_LITERAL@861..865 "/div" [] [] + 2: MD_TEXTUAL@865..866 + 0: MD_TEXTUAL_LITERAL@865..866 ">" [] [] + 4: MD_TEXTUAL@866..871 + 0: MD_TEXTUAL_LITERAL@866..871 " end." [] [] + 5: MD_TEXTUAL@871..872 + 0: MD_TEXTUAL_LITERAL@871..872 "\n" [] [] + 6: MD_TEXTUAL@872..880 + 0: MD_TEXTUAL_LITERAL@872..880 "Double: " [] [] + 7: MD_INLINE_HTML@880..900 + 0: MD_INLINE_ITEM_LIST@880..900 + 0: MD_TEXTUAL@880..881 + 0: MD_TEXTUAL_LITERAL@880..881 "<" [] [] + 1: MD_TEXTUAL@881..899 + 0: MD_TEXTUAL_LITERAL@881..899 "div class=\"quoted\"" [] [] + 2: MD_TEXTUAL@899..900 + 0: MD_TEXTUAL_LITERAL@899..900 ">" [] [] + 8: MD_TEXTUAL@900..904 + 0: MD_TEXTUAL_LITERAL@900..904 "text" [] [] + 9: MD_INLINE_HTML@904..910 + 0: MD_INLINE_ITEM_LIST@904..910 + 0: MD_TEXTUAL@904..905 + 0: MD_TEXTUAL_LITERAL@904..905 "<" [] [] + 1: MD_TEXTUAL@905..909 + 0: MD_TEXTUAL_LITERAL@905..909 "/div" [] [] + 2: MD_TEXTUAL@909..910 + 0: MD_TEXTUAL_LITERAL@909..910 ">" [] [] + 10: MD_TEXTUAL@910..915 + 0: MD_TEXTUAL_LITERAL@910..915 " end." [] [] + 11: MD_TEXTUAL@915..916 + 0: MD_TEXTUAL_LITERAL@915..916 "\n" [] [] + 12: MD_TEXTUAL@916..922 + 0: MD_TEXTUAL_LITERAL@916..922 "Both: " [] [] + 13: MD_INLINE_HTML@922..952 + 0: MD_INLINE_ITEM_LIST@922..952 + 0: MD_TEXTUAL@922..923 + 0: MD_TEXTUAL_LITERAL@922..923 "<" [] [] + 1: MD_TEXTUAL@923..951 + 0: MD_TEXTUAL_LITERAL@923..951 "div class=\"outer\" id='inner'" [] [] + 2: MD_TEXTUAL@951..952 + 0: MD_TEXTUAL_LITERAL@951..952 ">" [] [] + 14: MD_TEXTUAL@952..956 + 0: MD_TEXTUAL_LITERAL@952..956 "text" [] [] + 15: MD_INLINE_HTML@956..962 + 0: MD_INLINE_ITEM_LIST@956..962 + 0: MD_TEXTUAL@956..957 + 0: MD_TEXTUAL_LITERAL@956..957 "<" [] [] + 1: MD_TEXTUAL@957..961 + 0: MD_TEXTUAL_LITERAL@957..961 "/div" [] [] + 2: MD_TEXTUAL@961..962 + 0: MD_TEXTUAL_LITERAL@961..962 ">" [] [] + 16: MD_TEXTUAL@962..967 + 0: MD_TEXTUAL_LITERAL@962..967 " end." [] [] + 17: MD_TEXTUAL@967..968 + 0: MD_TEXTUAL_LITERAL@967..968 "\n" [] [] + 1: (empty) + 34: MD_NEWLINE@968..969 + 0: NEWLINE@968..969 "\n" [] [] + 35: MD_HEADER@969..1001 + 0: MD_HASH_LIST@969..971 + 0: MD_HASH@969..970 + 0: HASH@969..970 "#" [] [] + 1: MD_HASH@970..971 + 0: HASH@970..971 "#" [] [] + 1: MD_PARAGRAPH@971..1001 + 0: MD_INLINE_ITEM_LIST@971..1001 + 0: MD_TEXTUAL@971..1001 + 0: MD_TEXTUAL_LITERAL@971..1001 " Attributes with Special Chars" [] [] + 1: (empty) + 2: MD_HASH_LIST@1001..1001 + 36: MD_NEWLINE@1001..1002 + 0: NEWLINE@1001..1002 "\n" [] [] + 37: MD_PARAGRAPH@1002..1261 + 0: MD_INLINE_ITEM_LIST@1002..1261 + 0: MD_TEXTUAL@1002..1010 + 0: MD_TEXTUAL_LITERAL@1002..1010 "Spaces: " [] [] + 1: MD_INLINE_HTML@1010..1040 + 0: MD_INLINE_ITEM_LIST@1010..1040 + 0: MD_TEXTUAL@1010..1011 + 0: MD_TEXTUAL_LITERAL@1010..1011 "<" [] [] + 1: MD_TEXTUAL@1011..1019 + 0: MD_TEXTUAL_LITERAL@1011..1019 "div data" [] [] + 2: MD_TEXTUAL@1019..1020 + 0: MD_TEXTUAL_LITERAL@1019..1020 "-" [] [] + 3: MD_TEXTUAL@1020..1039 + 0: MD_TEXTUAL_LITERAL@1020..1039 "value=\"with spaces\"" [] [] + 4: MD_TEXTUAL@1039..1040 + 0: MD_TEXTUAL_LITERAL@1039..1040 ">" [] [] + 2: MD_TEXTUAL@1040..1044 + 0: MD_TEXTUAL_LITERAL@1040..1044 "text" [] [] + 3: MD_INLINE_HTML@1044..1050 + 0: MD_INLINE_ITEM_LIST@1044..1050 + 0: MD_TEXTUAL@1044..1045 + 0: MD_TEXTUAL_LITERAL@1044..1045 "<" [] [] + 1: MD_TEXTUAL@1045..1049 + 0: MD_TEXTUAL_LITERAL@1045..1049 "/div" [] [] + 2: MD_TEXTUAL@1049..1050 + 0: MD_TEXTUAL_LITERAL@1049..1050 ">" [] [] + 4: MD_TEXTUAL@1050..1055 + 0: MD_TEXTUAL_LITERAL@1050..1055 " end." [] [] + 5: MD_TEXTUAL@1055..1056 + 0: MD_TEXTUAL_LITERAL@1055..1056 "\n" [] [] + 6: MD_TEXTUAL@1056..1066 + 0: MD_TEXTUAL_LITERAL@1056..1066 "Multiple: " [] [] + 7: MD_INLINE_HTML@1066..1099 + 0: MD_INLINE_ITEM_LIST@1066..1099 + 0: MD_TEXTUAL@1066..1067 + 0: MD_TEXTUAL_LITERAL@1066..1067 "<" [] [] + 1: MD_TEXTUAL@1067..1092 + 0: MD_TEXTUAL_LITERAL@1067..1092 "div class=\"a\" id=\"b\" data" [] [] + 2: MD_TEXTUAL@1092..1093 + 0: MD_TEXTUAL_LITERAL@1092..1093 "-" [] [] + 3: MD_TEXTUAL@1093..1098 + 0: MD_TEXTUAL_LITERAL@1093..1098 "x=\"c\"" [] [] + 4: MD_TEXTUAL@1098..1099 + 0: MD_TEXTUAL_LITERAL@1098..1099 ">" [] [] + 8: MD_TEXTUAL@1099..1103 + 0: MD_TEXTUAL_LITERAL@1099..1103 "text" [] [] + 9: MD_INLINE_HTML@1103..1109 + 0: MD_INLINE_ITEM_LIST@1103..1109 + 0: MD_TEXTUAL@1103..1104 + 0: MD_TEXTUAL_LITERAL@1103..1104 "<" [] [] + 1: MD_TEXTUAL@1104..1108 + 0: MD_TEXTUAL_LITERAL@1104..1108 "/div" [] [] + 2: MD_TEXTUAL@1108..1109 + 0: MD_TEXTUAL_LITERAL@1108..1109 ">" [] [] + 10: MD_TEXTUAL@1109..1114 + 0: MD_TEXTUAL_LITERAL@1109..1114 " end." [] [] + 11: MD_TEXTUAL@1114..1115 + 0: MD_TEXTUAL_LITERAL@1114..1115 "\n" [] [] + 12: MD_TEXTUAL@1115..1125 + 0: MD_TEXTUAL_LITERAL@1115..1125 "Unquoted: " [] [] + 13: MD_INLINE_HTML@1125..1156 + 0: MD_INLINE_ITEM_LIST@1125..1156 + 0: MD_TEXTUAL@1125..1126 + 0: MD_TEXTUAL_LITERAL@1125..1126 "<" [] [] + 1: MD_TEXTUAL@1126..1134 + 0: MD_TEXTUAL_LITERAL@1126..1134 "div data" [] [] + 2: MD_TEXTUAL@1134..1135 + 0: MD_TEXTUAL_LITERAL@1134..1135 "-" [] [] + 3: MD_TEXTUAL@1135..1145 + 0: MD_TEXTUAL_LITERAL@1135..1145 "x=foo data" [] [] + 4: MD_TEXTUAL@1145..1146 + 0: MD_TEXTUAL_LITERAL@1145..1146 "-" [] [] + 5: MD_TEXTUAL@1146..1151 + 0: MD_TEXTUAL_LITERAL@1146..1151 "y=bar" [] [] + 6: MD_TEXTUAL@1151..1152 + 0: MD_TEXTUAL_LITERAL@1151..1152 "-" [] [] + 7: MD_TEXTUAL@1152..1155 + 0: MD_TEXTUAL_LITERAL@1152..1155 "baz" [] [] + 8: MD_TEXTUAL@1155..1156 + 0: MD_TEXTUAL_LITERAL@1155..1156 ">" [] [] + 14: MD_TEXTUAL@1156..1160 + 0: MD_TEXTUAL_LITERAL@1156..1160 "text" [] [] + 15: MD_INLINE_HTML@1160..1166 + 0: MD_INLINE_ITEM_LIST@1160..1166 + 0: MD_TEXTUAL@1160..1161 + 0: MD_TEXTUAL_LITERAL@1160..1161 "<" [] [] + 1: MD_TEXTUAL@1161..1165 + 0: MD_TEXTUAL_LITERAL@1161..1165 "/div" [] [] + 2: MD_TEXTUAL@1165..1166 + 0: MD_TEXTUAL_LITERAL@1165..1166 ">" [] [] + 16: MD_TEXTUAL@1166..1171 + 0: MD_TEXTUAL_LITERAL@1166..1171 " end." [] [] + 17: MD_TEXTUAL@1171..1172 + 0: MD_TEXTUAL_LITERAL@1171..1172 "\n" [] [] + 18: MD_TEXTUAL@1172..1190 + 0: MD_TEXTUAL_LITERAL@1172..1190 "Underscore/colon: " [] [] + 19: MD_INLINE_HTML@1190..1206 + 0: MD_INLINE_ITEM_LIST@1190..1206 + 0: MD_TEXTUAL@1190..1191 + 0: MD_TEXTUAL_LITERAL@1190..1191 "<" [] [] + 1: MD_TEXTUAL@1191..1195 + 0: MD_TEXTUAL_LITERAL@1191..1195 "div " [] [] + 2: MD_TEXTUAL@1195..1196 + 0: MD_TEXTUAL_LITERAL@1195..1196 "_" [] [] + 3: MD_TEXTUAL@1196..1205 + 0: MD_TEXTUAL_LITERAL@1196..1205 "x=1 x:y=2" [] [] + 4: MD_TEXTUAL@1205..1206 + 0: MD_TEXTUAL_LITERAL@1205..1206 ">" [] [] + 20: MD_TEXTUAL@1206..1210 + 0: MD_TEXTUAL_LITERAL@1206..1210 "text" [] [] + 21: MD_INLINE_HTML@1210..1216 + 0: MD_INLINE_ITEM_LIST@1210..1216 + 0: MD_TEXTUAL@1210..1211 + 0: MD_TEXTUAL_LITERAL@1210..1211 "<" [] [] + 1: MD_TEXTUAL@1211..1215 + 0: MD_TEXTUAL_LITERAL@1211..1215 "/div" [] [] + 2: MD_TEXTUAL@1215..1216 + 0: MD_TEXTUAL_LITERAL@1215..1216 ">" [] [] + 22: MD_TEXTUAL@1216..1221 + 0: MD_TEXTUAL_LITERAL@1216..1221 " end." [] [] + 23: MD_TEXTUAL@1221..1222 + 0: MD_TEXTUAL_LITERAL@1221..1222 "\n" [] [] + 24: MD_TEXTUAL@1222..1231 + 0: MD_TEXTUAL_LITERAL@1222..1231 "Boolean: " [] [] + 25: MD_INLINE_HTML@1231..1245 + 0: MD_INLINE_ITEM_LIST@1231..1245 + 0: MD_TEXTUAL@1231..1232 + 0: MD_TEXTUAL_LITERAL@1231..1232 "<" [] [] + 1: MD_TEXTUAL@1232..1244 + 0: MD_TEXTUAL_LITERAL@1232..1244 "div disabled" [] [] + 2: MD_TEXTUAL@1244..1245 + 0: MD_TEXTUAL_LITERAL@1244..1245 ">" [] [] + 26: MD_TEXTUAL@1245..1249 + 0: MD_TEXTUAL_LITERAL@1245..1249 "text" [] [] + 27: MD_INLINE_HTML@1249..1255 + 0: MD_INLINE_ITEM_LIST@1249..1255 + 0: MD_TEXTUAL@1249..1250 + 0: MD_TEXTUAL_LITERAL@1249..1250 "<" [] [] + 1: MD_TEXTUAL@1250..1254 + 0: MD_TEXTUAL_LITERAL@1250..1254 "/div" [] [] + 2: MD_TEXTUAL@1254..1255 + 0: MD_TEXTUAL_LITERAL@1254..1255 ">" [] [] + 28: MD_TEXTUAL@1255..1260 + 0: MD_TEXTUAL_LITERAL@1255..1260 " end." [] [] + 29: MD_TEXTUAL@1260..1261 + 0: MD_TEXTUAL_LITERAL@1260..1261 "\n" [] [] + 1: (empty) + 38: MD_NEWLINE@1261..1262 + 0: NEWLINE@1261..1262 "\n" [] [] + 39: MD_HEADER@1262..1308 + 0: MD_HASH_LIST@1262..1264 + 0: MD_HASH@1262..1263 + 0: HASH@1262..1263 "#" [] [] + 1: MD_HASH@1263..1264 + 0: HASH@1263..1264 "#" [] [] + 1: MD_PARAGRAPH@1264..1308 + 0: MD_INLINE_ITEM_LIST@1264..1308 + 0: MD_TEXTUAL@1264..1279 + 0: MD_TEXTUAL_LITERAL@1264..1279 " Newline Cases " [] [] + 1: MD_TEXTUAL@1279..1280 + 0: MD_TEXTUAL_LITERAL@1279..1280 "(" [] [] + 2: MD_TEXTUAL@1280..1307 + 0: MD_TEXTUAL_LITERAL@1280..1307 "should parse as inline HTML" [] [] + 3: MD_TEXTUAL@1307..1308 + 0: MD_TEXTUAL_LITERAL@1307..1308 ")" [] [] + 1: (empty) + 2: MD_HASH_LIST@1308..1308 + 40: MD_NEWLINE@1308..1309 + 0: NEWLINE@1308..1309 "\n" [] [] + 41: MD_PARAGRAPH@1309..1389 + 0: MD_INLINE_ITEM_LIST@1309..1389 + 0: MD_TEXTUAL@1309..1318 + 0: MD_TEXTUAL_LITERAL@1309..1318 "Allowed: " [] [] + 1: MD_INLINE_HTML@1318..1336 + 0: MD_INLINE_ITEM_LIST@1318..1336 + 0: MD_TEXTUAL@1318..1319 + 0: MD_TEXTUAL_LITERAL@1318..1319 "<" [] [] + 1: MD_TEXTUAL@1319..1322 + 0: MD_TEXTUAL_LITERAL@1319..1322 "div" [] [] + 2: MD_TEXTUAL@1322..1323 + 0: MD_TEXTUAL_LITERAL@1322..1323 "\n" [] [] + 3: MD_TEXTUAL@1323..1335 + 0: MD_TEXTUAL_LITERAL@1323..1335 "class=\"test\"" [] [] + 4: MD_TEXTUAL@1335..1336 + 0: MD_TEXTUAL_LITERAL@1335..1336 ">" [] [] + 2: MD_TEXTUAL@1336..1338 + 0: MD_TEXTUAL_LITERAL@1336..1338 "ok" [] [] + 3: MD_INLINE_HTML@1338..1344 + 0: MD_INLINE_ITEM_LIST@1338..1344 + 0: MD_TEXTUAL@1338..1339 + 0: MD_TEXTUAL_LITERAL@1338..1339 "<" [] [] + 1: MD_TEXTUAL@1339..1343 + 0: MD_TEXTUAL_LITERAL@1339..1343 "/div" [] [] + 2: MD_TEXTUAL@1343..1344 + 0: MD_TEXTUAL_LITERAL@1343..1344 ">" [] [] + 4: MD_TEXTUAL@1344..1349 + 0: MD_TEXTUAL_LITERAL@1344..1349 " tag." [] [] + 5: MD_TEXTUAL@1349..1350 + 0: MD_TEXTUAL_LITERAL@1349..1350 "\n" [] [] + 6: MD_TEXTUAL@1350..1359 + 0: MD_TEXTUAL_LITERAL@1350..1359 "Allowed: " [] [] + 7: MD_INLINE_HTML@1359..1375 + 0: MD_INLINE_ITEM_LIST@1359..1375 + 0: MD_TEXTUAL@1359..1360 + 0: MD_TEXTUAL_LITERAL@1359..1360 "<" [] [] + 1: MD_TEXTUAL@1360..1373 + 0: MD_TEXTUAL_LITERAL@1360..1373 "div class=\"a\"" [] [] + 2: MD_TEXTUAL@1373..1374 + 0: MD_TEXTUAL_LITERAL@1373..1374 "\n" [] [] + 3: MD_TEXTUAL@1374..1375 + 0: MD_TEXTUAL_LITERAL@1374..1375 ">" [] [] + 8: MD_TEXTUAL@1375..1377 + 0: MD_TEXTUAL_LITERAL@1375..1377 "ok" [] [] + 9: MD_INLINE_HTML@1377..1383 + 0: MD_INLINE_ITEM_LIST@1377..1383 + 0: MD_TEXTUAL@1377..1378 + 0: MD_TEXTUAL_LITERAL@1377..1378 "<" [] [] + 1: MD_TEXTUAL@1378..1382 + 0: MD_TEXTUAL_LITERAL@1378..1382 "/div" [] [] + 2: MD_TEXTUAL@1382..1383 + 0: MD_TEXTUAL_LITERAL@1382..1383 ">" [] [] + 10: MD_TEXTUAL@1383..1388 + 0: MD_TEXTUAL_LITERAL@1383..1388 " tag." [] [] + 11: MD_TEXTUAL@1388..1389 + 0: MD_TEXTUAL_LITERAL@1388..1389 "\n" [] [] + 1: (empty) + 42: MD_NEWLINE@1389..1390 + 0: NEWLINE@1389..1390 "\n" [] [] + 43: MD_HEADER@1390..1424 + 0: MD_HASH_LIST@1390..1392 + 0: MD_HASH@1390..1391 + 0: HASH@1390..1391 "#" [] [] + 1: MD_HASH@1391..1392 + 0: HASH@1391..1392 "#" [] [] + 1: MD_PARAGRAPH@1392..1424 + 0: MD_INLINE_ITEM_LIST@1392..1424 + 0: MD_TEXTUAL@1392..1402 + 0: MD_TEXTUAL_LITERAL@1392..1402 " Priority " [] [] + 1: MD_TEXTUAL@1402..1403 + 0: MD_TEXTUAL_LITERAL@1402..1403 "-" [] [] + 2: MD_TEXTUAL@1403..1424 + 0: MD_TEXTUAL_LITERAL@1403..1424 " Autolinks Should Win" [] [] + 1: (empty) + 2: MD_HASH_LIST@1424..1424 + 44: MD_NEWLINE@1424..1425 + 0: NEWLINE@1424..1425 "\n" [] [] + 45: MD_PARAGRAPH@1425..1493 + 0: MD_INLINE_ITEM_LIST@1425..1493 + 0: MD_TEXTUAL@1425..1430 + 0: MD_TEXTUAL_LITERAL@1425..1430 "URL: " [] [] + 1: MD_AUTOLINK@1430..1451 + 0: L_ANGLE@1430..1431 "<" [] [] + 1: MD_INLINE_ITEM_LIST@1431..1450 + 0: MD_TEXTUAL@1431..1450 + 0: MD_TEXTUAL_LITERAL@1431..1450 "https://example.com" [] [] + 2: R_ANGLE@1450..1451 ">" [] [] + 2: MD_TEXTUAL@1451..1457 + 0: MD_TEXTUAL_LITERAL@1451..1457 " link." [] [] + 3: MD_TEXTUAL@1457..1458 + 0: MD_TEXTUAL_LITERAL@1457..1458 "\n" [] [] + 4: MD_TEXTUAL@1458..1465 + 0: MD_TEXTUAL_LITERAL@1458..1465 "Email: " [] [] + 5: MD_AUTOLINK@1465..1483 + 0: L_ANGLE@1465..1466 "<" [] [] + 1: MD_INLINE_ITEM_LIST@1466..1482 + 0: MD_TEXTUAL@1466..1482 + 0: MD_TEXTUAL_LITERAL@1466..1482 "user@example.com" [] [] + 2: R_ANGLE@1482..1483 ">" [] [] + 6: MD_TEXTUAL@1483..1492 + 0: MD_TEXTUAL_LITERAL@1483..1492 " address." [] [] + 7: MD_TEXTUAL@1492..1493 + 0: MD_TEXTUAL_LITERAL@1492..1493 "\n" [] [] + 1: (empty) + 46: MD_NEWLINE@1493..1494 + 0: NEWLINE@1493..1494 "\n" [] [] + 47: MD_HEADER@1494..1519 + 0: MD_HASH_LIST@1494..1496 + 0: MD_HASH@1494..1495 + 0: HASH@1494..1495 "#" [] [] + 1: MD_HASH@1495..1496 + 0: HASH@1495..1496 "#" [] [] + 1: MD_PARAGRAPH@1496..1519 + 0: MD_INLINE_ITEM_LIST@1496..1519 + 0: MD_TEXTUAL@1496..1519 + 0: MD_TEXTUAL_LITERAL@1496..1519 " Tag Names with Hyphens" [] [] + 1: (empty) + 2: MD_HASH_LIST@1519..1519 + 48: MD_NEWLINE@1519..1520 + 0: NEWLINE@1519..1520 "\n" [] [] + 49: MD_PARAGRAPH@1520..1633 + 0: MD_INLINE_ITEM_LIST@1520..1633 + 0: MD_TEXTUAL@1520..1528 + 0: MD_TEXTUAL_LITERAL@1520..1528 "Custom: " [] [] + 1: MD_INLINE_HTML@1528..1542 + 0: MD_INLINE_ITEM_LIST@1528..1542 + 0: MD_TEXTUAL@1528..1529 + 0: MD_TEXTUAL_LITERAL@1528..1529 "<" [] [] + 1: MD_TEXTUAL@1529..1531 + 0: MD_TEXTUAL_LITERAL@1529..1531 "my" [] [] + 2: MD_TEXTUAL@1531..1532 + 0: MD_TEXTUAL_LITERAL@1531..1532 "-" [] [] + 3: MD_TEXTUAL@1532..1541 + 0: MD_TEXTUAL_LITERAL@1532..1541 "component" [] [] + 4: MD_TEXTUAL@1541..1542 + 0: MD_TEXTUAL_LITERAL@1541..1542 ">" [] [] + 2: MD_TEXTUAL@1542..1549 + 0: MD_TEXTUAL_LITERAL@1542..1549 "content" [] [] + 3: MD_INLINE_HTML@1549..1564 + 0: MD_INLINE_ITEM_LIST@1549..1564 + 0: MD_TEXTUAL@1549..1550 + 0: MD_TEXTUAL_LITERAL@1549..1550 "<" [] [] + 1: MD_TEXTUAL@1550..1553 + 0: MD_TEXTUAL_LITERAL@1550..1553 "/my" [] [] + 2: MD_TEXTUAL@1553..1554 + 0: MD_TEXTUAL_LITERAL@1553..1554 "-" [] [] + 3: MD_TEXTUAL@1554..1563 + 0: MD_TEXTUAL_LITERAL@1554..1563 "component" [] [] + 4: MD_TEXTUAL@1563..1564 + 0: MD_TEXTUAL_LITERAL@1563..1564 ">" [] [] + 4: MD_TEXTUAL@1564..1573 + 0: MD_TEXTUAL_LITERAL@1564..1573 " element." [] [] + 5: MD_TEXTUAL@1573..1574 + 0: MD_TEXTUAL_LITERAL@1573..1574 "\n" [] [] + 6: MD_TEXTUAL@1574..1584 + 0: MD_TEXTUAL_LITERAL@1574..1584 "Multiple: " [] [] + 7: MD_INLINE_HTML@1584..1603 + 0: MD_INLINE_ITEM_LIST@1584..1603 + 0: MD_TEXTUAL@1584..1585 + 0: MD_TEXTUAL_LITERAL@1584..1585 "<" [] [] + 1: MD_TEXTUAL@1585..1587 + 0: MD_TEXTUAL_LITERAL@1585..1587 "my" [] [] + 2: MD_TEXTUAL@1587..1588 + 0: MD_TEXTUAL_LITERAL@1587..1588 "-" [] [] + 3: MD_TEXTUAL@1588..1594 + 0: MD_TEXTUAL_LITERAL@1588..1594 "custom" [] [] + 4: MD_TEXTUAL@1594..1595 + 0: MD_TEXTUAL_LITERAL@1594..1595 "-" [] [] + 5: MD_TEXTUAL@1595..1602 + 0: MD_TEXTUAL_LITERAL@1595..1602 "element" [] [] + 6: MD_TEXTUAL@1602..1603 + 0: MD_TEXTUAL_LITERAL@1602..1603 ">" [] [] + 8: MD_TEXTUAL@1603..1607 + 0: MD_TEXTUAL_LITERAL@1603..1607 "test" [] [] + 9: MD_INLINE_HTML@1607..1627 + 0: MD_INLINE_ITEM_LIST@1607..1627 + 0: MD_TEXTUAL@1607..1608 + 0: MD_TEXTUAL_LITERAL@1607..1608 "<" [] [] + 1: MD_TEXTUAL@1608..1611 + 0: MD_TEXTUAL_LITERAL@1608..1611 "/my" [] [] + 2: MD_TEXTUAL@1611..1612 + 0: MD_TEXTUAL_LITERAL@1611..1612 "-" [] [] + 3: MD_TEXTUAL@1612..1618 + 0: MD_TEXTUAL_LITERAL@1612..1618 "custom" [] [] + 4: MD_TEXTUAL@1618..1619 + 0: MD_TEXTUAL_LITERAL@1618..1619 "-" [] [] + 5: MD_TEXTUAL@1619..1626 + 0: MD_TEXTUAL_LITERAL@1619..1626 "element" [] [] + 6: MD_TEXTUAL@1626..1627 + 0: MD_TEXTUAL_LITERAL@1626..1627 ">" [] [] + 10: MD_TEXTUAL@1627..1632 + 0: MD_TEXTUAL_LITERAL@1627..1632 " tag." [] [] + 11: MD_TEXTUAL@1632..1633 + 0: MD_TEXTUAL_LITERAL@1632..1633 "\n" [] [] + 1: (empty) + 50: MD_NEWLINE@1633..1634 + 0: NEWLINE@1633..1634 "\n" [] [] + 51: MD_HEADER@1634..1647 + 0: MD_HASH_LIST@1634..1636 + 0: MD_HASH@1634..1635 + 0: HASH@1634..1635 "#" [] [] + 1: MD_HASH@1635..1636 + 0: HASH@1635..1636 "#" [] [] + 1: MD_PARAGRAPH@1636..1647 + 0: MD_INLINE_ITEM_LIST@1636..1647 + 0: MD_TEXTUAL@1636..1647 + 0: MD_TEXTUAL_LITERAL@1636..1647 " Empty Tags" [] [] + 1: (empty) + 2: MD_HASH_LIST@1647..1647 + 52: MD_NEWLINE@1647..1648 + 0: NEWLINE@1647..1648 "\n" [] [] + 53: MD_PARAGRAPH@1648..1703 + 0: MD_INLINE_ITEM_LIST@1648..1703 + 0: MD_TEXTUAL@1648..1660 + 0: MD_TEXTUAL_LITERAL@1648..1660 "Empty open: " [] [] + 1: MD_INLINE_HTML@1660..1665 + 0: MD_INLINE_ITEM_LIST@1660..1665 + 0: MD_TEXTUAL@1660..1661 + 0: MD_TEXTUAL_LITERAL@1660..1661 "<" [] [] + 1: MD_TEXTUAL@1661..1664 + 0: MD_TEXTUAL_LITERAL@1661..1664 "div" [] [] + 2: MD_TEXTUAL@1664..1665 + 0: MD_TEXTUAL_LITERAL@1664..1665 ">" [] [] + 2: MD_INLINE_HTML@1665..1671 + 0: MD_INLINE_ITEM_LIST@1665..1671 + 0: MD_TEXTUAL@1665..1666 + 0: MD_TEXTUAL_LITERAL@1665..1666 "<" [] [] + 1: MD_TEXTUAL@1666..1670 + 0: MD_TEXTUAL_LITERAL@1666..1670 "/div" [] [] + 2: MD_TEXTUAL@1670..1671 + 0: MD_TEXTUAL_LITERAL@1670..1671 ">" [] [] + 3: MD_TEXTUAL@1671..1677 + 0: MD_TEXTUAL_LITERAL@1671..1677 " tags." [] [] + 4: MD_TEXTUAL@1677..1678 + 0: MD_TEXTUAL_LITERAL@1677..1678 "\n" [] [] + 5: MD_TEXTUAL@1678..1690 + 0: MD_TEXTUAL_LITERAL@1678..1690 "Self close: " [] [] + 6: MD_INLINE_HTML@1690..1695 + 0: MD_INLINE_ITEM_LIST@1690..1695 + 0: MD_TEXTUAL@1690..1691 + 0: MD_TEXTUAL_LITERAL@1690..1691 "<" [] [] + 1: MD_TEXTUAL@1691..1694 + 0: MD_TEXTUAL_LITERAL@1691..1694 "br/" [] [] + 2: MD_TEXTUAL@1694..1695 + 0: MD_TEXTUAL_LITERAL@1694..1695 ">" [] [] + 7: MD_TEXTUAL@1695..1702 + 0: MD_TEXTUAL_LITERAL@1695..1702 " break." [] [] + 8: MD_TEXTUAL@1702..1703 + 0: MD_TEXTUAL_LITERAL@1702..1703 "\n" [] [] + 1: (empty) + 2: EOF@1703..1703 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_html_invalid.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_html_invalid.md new file mode 100644 index 000000000000..e75a898cc0f0 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_html_invalid.md @@ -0,0 +1,21 @@ +# Invalid Inline HTML Cases + +These should all be parsed as text, NOT as inline HTML. + +## Period in Tag Name +The URL should remain text. +Domain should remain text. + +## Unclosed Tags +Open bracket < followed by text. +Partial tag
    should be text. +Missing value
    should be text. +Backtick in unquoted
    should be text. +Invalid name
    should be text. + +## Invalid Comments +Invalid start should be text. +Double dash should be text maybe. +Starts with arrow should be text. diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_html_invalid.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_html_invalid.md.snap new file mode 100644 index 000000000000..e9c58d4bc941 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_html_invalid.md.snap @@ -0,0 +1,684 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +assertion_line: 131 +expression: snapshot +--- +## Input + +``` +# Invalid Inline HTML Cases + +These should all be parsed as text, NOT as inline HTML. + +## Period in Tag Name +The URL should remain text. +Domain should remain text. + +## Unclosed Tags +Open bracket < followed by text. +Partial tag
    should be text. +Missing value
    should be text. +Backtick in unquoted
    should be text. +Invalid name
    should be text. + +## Invalid Comments +Invalid start should be text. +Double dash should be text maybe. +Starts with arrow should be text. + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@0..1 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1..27 " Invalid Inline HTML Cases" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [], + }, + MdNewline { + value_token: NEWLINE@27..28 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@28..29 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@29..84 "These should all be parsed as text, NOT as inline HTML." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@84..85 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@85..86 "\n" [] [], + }, + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@86..87 "#" [] [], + }, + MdHash { + hash_token: HASH@87..88 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@88..107 " Period in Tag Name" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [], + }, + MdNewline { + value_token: NEWLINE@107..108 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@108..116 "The URL " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@116..117 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@117..128 "example.com" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@128..129 ">" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@129..149 " should remain text." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@149..150 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@150..157 "Domain " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@157..158 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@158..174 "test.example.com" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@174..175 ">" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@175..195 " should remain text." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@195..196 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@196..197 "\n" [] [], + }, + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@197..198 "#" [] [], + }, + MdHash { + hash_token: HASH@198..199 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@199..213 " Unclosed Tags" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [], + }, + MdNewline { + value_token: NEWLINE@213..214 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@214..227 "Open bracket " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@227..228 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@228..246 " followed by text." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@246..247 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@247..259 "Partial tag " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@259..260 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@260..279 "div should be text." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@279..280 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@280..292 "Missing end " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@292..293 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@293..325 "div class=\"test\" should be text." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@325..326 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@326..339 "Missing name " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@339..340 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@340..348 "div =foo" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@348..349 ">" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@349..365 " should be text." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@365..366 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@366..380 "Missing value " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@380..381 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@381..389 "div data" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@389..390 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@390..392 "x=" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@392..393 ">" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@393..409 " should be text." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@409..410 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@410..431 "Backtick in unquoted " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@431..432 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@432..440 "div data" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@440..441 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@441..443 "x=" [] [], + }, + MdInlineCode { + l_tick_token: BACKTICK@443..444 "`" [] [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@444..445 "a" [] [], + }, + ], + r_tick_token: BACKTICK@445..446 "`" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@446..447 ">" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@447..463 " should be text." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@463..464 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@464..477 "Invalid name " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@477..478 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@478..488 "div 1a=foo" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@488..489 ">" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@489..505 " should be text." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@505..506 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@506..507 "\n" [] [], + }, + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@507..508 "#" [] [], + }, + MdHash { + hash_token: HASH@508..509 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@509..526 " Invalid Comments" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [], + }, + MdNewline { + value_token: NEWLINE@526..527 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@527..541 "Invalid start " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@541..542 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@542..543 "!" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@543..544 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@544..545 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@545..546 ">" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@546..562 " should be text." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@562..563 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@563..575 "Double dash " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@575..576 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@576..577 "!" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@577..578 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@578..579 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@579..584 " foo " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@584..585 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@585..586 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@586..591 " bar " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@591..592 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@592..593 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@593..594 ">" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@594..616 " should be text maybe." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@616..617 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@617..635 "Starts with arrow " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@635..636 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@636..637 "!" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@637..638 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@638..639 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@639..640 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@640..641 ">" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@641..657 " should be text." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@657..658 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + eof_token: EOF@658..658 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..658 + 0: (empty) + 1: MD_BLOCK_LIST@0..658 + 0: MD_HEADER@0..27 + 0: MD_HASH_LIST@0..1 + 0: MD_HASH@0..1 + 0: HASH@0..1 "#" [] [] + 1: MD_PARAGRAPH@1..27 + 0: MD_INLINE_ITEM_LIST@1..27 + 0: MD_TEXTUAL@1..27 + 0: MD_TEXTUAL_LITERAL@1..27 " Invalid Inline HTML Cases" [] [] + 1: (empty) + 2: MD_HASH_LIST@27..27 + 1: MD_NEWLINE@27..28 + 0: NEWLINE@27..28 "\n" [] [] + 2: MD_NEWLINE@28..29 + 0: NEWLINE@28..29 "\n" [] [] + 3: MD_PARAGRAPH@29..85 + 0: MD_INLINE_ITEM_LIST@29..85 + 0: MD_TEXTUAL@29..84 + 0: MD_TEXTUAL_LITERAL@29..84 "These should all be parsed as text, NOT as inline HTML." [] [] + 1: MD_TEXTUAL@84..85 + 0: MD_TEXTUAL_LITERAL@84..85 "\n" [] [] + 1: (empty) + 4: MD_NEWLINE@85..86 + 0: NEWLINE@85..86 "\n" [] [] + 5: MD_HEADER@86..107 + 0: MD_HASH_LIST@86..88 + 0: MD_HASH@86..87 + 0: HASH@86..87 "#" [] [] + 1: MD_HASH@87..88 + 0: HASH@87..88 "#" [] [] + 1: MD_PARAGRAPH@88..107 + 0: MD_INLINE_ITEM_LIST@88..107 + 0: MD_TEXTUAL@88..107 + 0: MD_TEXTUAL_LITERAL@88..107 " Period in Tag Name" [] [] + 1: (empty) + 2: MD_HASH_LIST@107..107 + 6: MD_NEWLINE@107..108 + 0: NEWLINE@107..108 "\n" [] [] + 7: MD_PARAGRAPH@108..196 + 0: MD_INLINE_ITEM_LIST@108..196 + 0: MD_TEXTUAL@108..116 + 0: MD_TEXTUAL_LITERAL@108..116 "The URL " [] [] + 1: MD_TEXTUAL@116..117 + 0: MD_TEXTUAL_LITERAL@116..117 "<" [] [] + 2: MD_TEXTUAL@117..128 + 0: MD_TEXTUAL_LITERAL@117..128 "example.com" [] [] + 3: MD_TEXTUAL@128..129 + 0: MD_TEXTUAL_LITERAL@128..129 ">" [] [] + 4: MD_TEXTUAL@129..149 + 0: MD_TEXTUAL_LITERAL@129..149 " should remain text." [] [] + 5: MD_TEXTUAL@149..150 + 0: MD_TEXTUAL_LITERAL@149..150 "\n" [] [] + 6: MD_TEXTUAL@150..157 + 0: MD_TEXTUAL_LITERAL@150..157 "Domain " [] [] + 7: MD_TEXTUAL@157..158 + 0: MD_TEXTUAL_LITERAL@157..158 "<" [] [] + 8: MD_TEXTUAL@158..174 + 0: MD_TEXTUAL_LITERAL@158..174 "test.example.com" [] [] + 9: MD_TEXTUAL@174..175 + 0: MD_TEXTUAL_LITERAL@174..175 ">" [] [] + 10: MD_TEXTUAL@175..195 + 0: MD_TEXTUAL_LITERAL@175..195 " should remain text." [] [] + 11: MD_TEXTUAL@195..196 + 0: MD_TEXTUAL_LITERAL@195..196 "\n" [] [] + 1: (empty) + 8: MD_NEWLINE@196..197 + 0: NEWLINE@196..197 "\n" [] [] + 9: MD_HEADER@197..213 + 0: MD_HASH_LIST@197..199 + 0: MD_HASH@197..198 + 0: HASH@197..198 "#" [] [] + 1: MD_HASH@198..199 + 0: HASH@198..199 "#" [] [] + 1: MD_PARAGRAPH@199..213 + 0: MD_INLINE_ITEM_LIST@199..213 + 0: MD_TEXTUAL@199..213 + 0: MD_TEXTUAL_LITERAL@199..213 " Unclosed Tags" [] [] + 1: (empty) + 2: MD_HASH_LIST@213..213 + 10: MD_NEWLINE@213..214 + 0: NEWLINE@213..214 "\n" [] [] + 11: MD_PARAGRAPH@214..506 + 0: MD_INLINE_ITEM_LIST@214..506 + 0: MD_TEXTUAL@214..227 + 0: MD_TEXTUAL_LITERAL@214..227 "Open bracket " [] [] + 1: MD_TEXTUAL@227..228 + 0: MD_TEXTUAL_LITERAL@227..228 "<" [] [] + 2: MD_TEXTUAL@228..246 + 0: MD_TEXTUAL_LITERAL@228..246 " followed by text." [] [] + 3: MD_TEXTUAL@246..247 + 0: MD_TEXTUAL_LITERAL@246..247 "\n" [] [] + 4: MD_TEXTUAL@247..259 + 0: MD_TEXTUAL_LITERAL@247..259 "Partial tag " [] [] + 5: MD_TEXTUAL@259..260 + 0: MD_TEXTUAL_LITERAL@259..260 "<" [] [] + 6: MD_TEXTUAL@260..279 + 0: MD_TEXTUAL_LITERAL@260..279 "div should be text." [] [] + 7: MD_TEXTUAL@279..280 + 0: MD_TEXTUAL_LITERAL@279..280 "\n" [] [] + 8: MD_TEXTUAL@280..292 + 0: MD_TEXTUAL_LITERAL@280..292 "Missing end " [] [] + 9: MD_TEXTUAL@292..293 + 0: MD_TEXTUAL_LITERAL@292..293 "<" [] [] + 10: MD_TEXTUAL@293..325 + 0: MD_TEXTUAL_LITERAL@293..325 "div class=\"test\" should be text." [] [] + 11: MD_TEXTUAL@325..326 + 0: MD_TEXTUAL_LITERAL@325..326 "\n" [] [] + 12: MD_TEXTUAL@326..339 + 0: MD_TEXTUAL_LITERAL@326..339 "Missing name " [] [] + 13: MD_TEXTUAL@339..340 + 0: MD_TEXTUAL_LITERAL@339..340 "<" [] [] + 14: MD_TEXTUAL@340..348 + 0: MD_TEXTUAL_LITERAL@340..348 "div =foo" [] [] + 15: MD_TEXTUAL@348..349 + 0: MD_TEXTUAL_LITERAL@348..349 ">" [] [] + 16: MD_TEXTUAL@349..365 + 0: MD_TEXTUAL_LITERAL@349..365 " should be text." [] [] + 17: MD_TEXTUAL@365..366 + 0: MD_TEXTUAL_LITERAL@365..366 "\n" [] [] + 18: MD_TEXTUAL@366..380 + 0: MD_TEXTUAL_LITERAL@366..380 "Missing value " [] [] + 19: MD_TEXTUAL@380..381 + 0: MD_TEXTUAL_LITERAL@380..381 "<" [] [] + 20: MD_TEXTUAL@381..389 + 0: MD_TEXTUAL_LITERAL@381..389 "div data" [] [] + 21: MD_TEXTUAL@389..390 + 0: MD_TEXTUAL_LITERAL@389..390 "-" [] [] + 22: MD_TEXTUAL@390..392 + 0: MD_TEXTUAL_LITERAL@390..392 "x=" [] [] + 23: MD_TEXTUAL@392..393 + 0: MD_TEXTUAL_LITERAL@392..393 ">" [] [] + 24: MD_TEXTUAL@393..409 + 0: MD_TEXTUAL_LITERAL@393..409 " should be text." [] [] + 25: MD_TEXTUAL@409..410 + 0: MD_TEXTUAL_LITERAL@409..410 "\n" [] [] + 26: MD_TEXTUAL@410..431 + 0: MD_TEXTUAL_LITERAL@410..431 "Backtick in unquoted " [] [] + 27: MD_TEXTUAL@431..432 + 0: MD_TEXTUAL_LITERAL@431..432 "<" [] [] + 28: MD_TEXTUAL@432..440 + 0: MD_TEXTUAL_LITERAL@432..440 "div data" [] [] + 29: MD_TEXTUAL@440..441 + 0: MD_TEXTUAL_LITERAL@440..441 "-" [] [] + 30: MD_TEXTUAL@441..443 + 0: MD_TEXTUAL_LITERAL@441..443 "x=" [] [] + 31: MD_INLINE_CODE@443..446 + 0: BACKTICK@443..444 "`" [] [] + 1: MD_INLINE_ITEM_LIST@444..445 + 0: MD_TEXTUAL@444..445 + 0: MD_TEXTUAL_LITERAL@444..445 "a" [] [] + 2: BACKTICK@445..446 "`" [] [] + 32: MD_TEXTUAL@446..447 + 0: MD_TEXTUAL_LITERAL@446..447 ">" [] [] + 33: MD_TEXTUAL@447..463 + 0: MD_TEXTUAL_LITERAL@447..463 " should be text." [] [] + 34: MD_TEXTUAL@463..464 + 0: MD_TEXTUAL_LITERAL@463..464 "\n" [] [] + 35: MD_TEXTUAL@464..477 + 0: MD_TEXTUAL_LITERAL@464..477 "Invalid name " [] [] + 36: MD_TEXTUAL@477..478 + 0: MD_TEXTUAL_LITERAL@477..478 "<" [] [] + 37: MD_TEXTUAL@478..488 + 0: MD_TEXTUAL_LITERAL@478..488 "div 1a=foo" [] [] + 38: MD_TEXTUAL@488..489 + 0: MD_TEXTUAL_LITERAL@488..489 ">" [] [] + 39: MD_TEXTUAL@489..505 + 0: MD_TEXTUAL_LITERAL@489..505 " should be text." [] [] + 40: MD_TEXTUAL@505..506 + 0: MD_TEXTUAL_LITERAL@505..506 "\n" [] [] + 1: (empty) + 12: MD_NEWLINE@506..507 + 0: NEWLINE@506..507 "\n" [] [] + 13: MD_HEADER@507..526 + 0: MD_HASH_LIST@507..509 + 0: MD_HASH@507..508 + 0: HASH@507..508 "#" [] [] + 1: MD_HASH@508..509 + 0: HASH@508..509 "#" [] [] + 1: MD_PARAGRAPH@509..526 + 0: MD_INLINE_ITEM_LIST@509..526 + 0: MD_TEXTUAL@509..526 + 0: MD_TEXTUAL_LITERAL@509..526 " Invalid Comments" [] [] + 1: (empty) + 2: MD_HASH_LIST@526..526 + 14: MD_NEWLINE@526..527 + 0: NEWLINE@526..527 "\n" [] [] + 15: MD_PARAGRAPH@527..658 + 0: MD_INLINE_ITEM_LIST@527..658 + 0: MD_TEXTUAL@527..541 + 0: MD_TEXTUAL_LITERAL@527..541 "Invalid start " [] [] + 1: MD_TEXTUAL@541..542 + 0: MD_TEXTUAL_LITERAL@541..542 "<" [] [] + 2: MD_TEXTUAL@542..543 + 0: MD_TEXTUAL_LITERAL@542..543 "!" [] [] + 3: MD_TEXTUAL@543..544 + 0: MD_TEXTUAL_LITERAL@543..544 "-" [] [] + 4: MD_TEXTUAL@544..545 + 0: MD_TEXTUAL_LITERAL@544..545 "-" [] [] + 5: MD_TEXTUAL@545..546 + 0: MD_TEXTUAL_LITERAL@545..546 ">" [] [] + 6: MD_TEXTUAL@546..562 + 0: MD_TEXTUAL_LITERAL@546..562 " should be text." [] [] + 7: MD_TEXTUAL@562..563 + 0: MD_TEXTUAL_LITERAL@562..563 "\n" [] [] + 8: MD_TEXTUAL@563..575 + 0: MD_TEXTUAL_LITERAL@563..575 "Double dash " [] [] + 9: MD_TEXTUAL@575..576 + 0: MD_TEXTUAL_LITERAL@575..576 "<" [] [] + 10: MD_TEXTUAL@576..577 + 0: MD_TEXTUAL_LITERAL@576..577 "!" [] [] + 11: MD_TEXTUAL@577..578 + 0: MD_TEXTUAL_LITERAL@577..578 "-" [] [] + 12: MD_TEXTUAL@578..579 + 0: MD_TEXTUAL_LITERAL@578..579 "-" [] [] + 13: MD_TEXTUAL@579..584 + 0: MD_TEXTUAL_LITERAL@579..584 " foo " [] [] + 14: MD_TEXTUAL@584..585 + 0: MD_TEXTUAL_LITERAL@584..585 "-" [] [] + 15: MD_TEXTUAL@585..586 + 0: MD_TEXTUAL_LITERAL@585..586 "-" [] [] + 16: MD_TEXTUAL@586..591 + 0: MD_TEXTUAL_LITERAL@586..591 " bar " [] [] + 17: MD_TEXTUAL@591..592 + 0: MD_TEXTUAL_LITERAL@591..592 "-" [] [] + 18: MD_TEXTUAL@592..593 + 0: MD_TEXTUAL_LITERAL@592..593 "-" [] [] + 19: MD_TEXTUAL@593..594 + 0: MD_TEXTUAL_LITERAL@593..594 ">" [] [] + 20: MD_TEXTUAL@594..616 + 0: MD_TEXTUAL_LITERAL@594..616 " should be text maybe." [] [] + 21: MD_TEXTUAL@616..617 + 0: MD_TEXTUAL_LITERAL@616..617 "\n" [] [] + 22: MD_TEXTUAL@617..635 + 0: MD_TEXTUAL_LITERAL@617..635 "Starts with arrow " [] [] + 23: MD_TEXTUAL@635..636 + 0: MD_TEXTUAL_LITERAL@635..636 "<" [] [] + 24: MD_TEXTUAL@636..637 + 0: MD_TEXTUAL_LITERAL@636..637 "!" [] [] + 25: MD_TEXTUAL@637..638 + 0: MD_TEXTUAL_LITERAL@637..638 "-" [] [] + 26: MD_TEXTUAL@638..639 + 0: MD_TEXTUAL_LITERAL@638..639 "-" [] [] + 27: MD_TEXTUAL@639..640 + 0: MD_TEXTUAL_LITERAL@639..640 "-" [] [] + 28: MD_TEXTUAL@640..641 + 0: MD_TEXTUAL_LITERAL@640..641 ">" [] [] + 29: MD_TEXTUAL@641..657 + 0: MD_TEXTUAL_LITERAL@641..657 " should be text." [] [] + 30: MD_TEXTUAL@657..658 + 0: MD_TEXTUAL_LITERAL@657..658 "\n" [] [] + 1: (empty) + 2: EOF@658..658 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_link_destination_title.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_link_destination_title.md new file mode 100644 index 000000000000..0f4293b8ad7c --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_link_destination_title.md @@ -0,0 +1,11 @@ +Paren destination: [a](b(c)d) + +Angle destination: [a]() + +Title double: [a](u "t") + +Title single: [a](u 't') + +Title paren: [a](u (t)) + +Image title: ![alt](u "t") diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_link_destination_title.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_link_destination_title.md.snap new file mode 100644 index 000000000000..b6f00415b123 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_link_destination_title.md.snap @@ -0,0 +1,467 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +Paren destination: [a](b(c)d) + +Angle destination: [a]() + +Title double: [a](u "t") + +Title single: [a](u 't') + +Title paren: [a](u (t)) + +Image title: ![alt](u "t") + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@0..19 "Paren destination: " [] [], + }, + MdInlineLink { + l_brack_token: L_BRACK@19..20 "[" [] [], + text: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@20..21 "a" [] [], + }, + ], + r_brack_token: R_BRACK@21..22 "]" [] [], + l_paren_token: L_PAREN@22..23 "(" [] [], + destination: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@23..24 "b" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@24..25 "(" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@25..26 "c" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@26..27 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@27..28 "d" [] [], + }, + ], + title: missing (optional), + r_paren_token: R_PAREN@28..29 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@29..30 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@30..31 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@31..50 "Angle destination: " [] [], + }, + MdInlineLink { + l_brack_token: L_BRACK@50..51 "[" [] [], + text: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@51..52 "a" [] [], + }, + ], + r_brack_token: R_BRACK@52..53 "]" [] [], + l_paren_token: L_PAREN@53..54 "(" [] [], + destination: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@54..55 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@55..56 "b" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@56..57 "(" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@57..58 "c" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@58..59 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@59..60 "d" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@60..61 ">" [] [], + }, + ], + title: missing (optional), + r_paren_token: R_PAREN@61..62 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@62..63 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@63..64 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@64..78 "Title double: " [] [], + }, + MdInlineLink { + l_brack_token: L_BRACK@78..79 "[" [] [], + text: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@79..80 "a" [] [], + }, + ], + r_brack_token: R_BRACK@80..81 "]" [] [], + l_paren_token: L_PAREN@81..82 "(" [] [], + destination: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@82..83 "u" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@83..84 " " [] [], + }, + ], + title: MdLinkTitle { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@84..87 "\"t\"" [] [], + }, + ], + }, + r_paren_token: R_PAREN@87..88 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@88..89 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@89..90 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@90..104 "Title single: " [] [], + }, + MdInlineLink { + l_brack_token: L_BRACK@104..105 "[" [] [], + text: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@105..106 "a" [] [], + }, + ], + r_brack_token: R_BRACK@106..107 "]" [] [], + l_paren_token: L_PAREN@107..108 "(" [] [], + destination: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@108..109 "u" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@109..110 " " [] [], + }, + ], + title: MdLinkTitle { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@110..113 "'t'" [] [], + }, + ], + }, + r_paren_token: R_PAREN@113..114 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@114..115 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@115..116 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@116..129 "Title paren: " [] [], + }, + MdInlineLink { + l_brack_token: L_BRACK@129..130 "[" [] [], + text: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@130..131 "a" [] [], + }, + ], + r_brack_token: R_BRACK@131..132 "]" [] [], + l_paren_token: L_PAREN@132..133 "(" [] [], + destination: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@133..134 "u" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@134..135 " " [] [], + }, + ], + title: MdLinkTitle { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@135..136 "(" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@136..137 "t" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@137..138 ")" [] [], + }, + ], + }, + r_paren_token: R_PAREN@138..139 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@139..140 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@140..141 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@141..154 "Image title: " [] [], + }, + MdInlineImage { + excl_token: BANG@154..155 "!" [] [], + l_brack_token: L_BRACK@155..156 "[" [] [], + alt: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@156..159 "alt" [] [], + }, + ], + r_brack_token: R_BRACK@159..160 "]" [] [], + l_paren_token: L_PAREN@160..161 "(" [] [], + destination: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@161..162 "u" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@162..163 " " [] [], + }, + ], + title: MdLinkTitle { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@163..166 "\"t\"" [] [], + }, + ], + }, + r_paren_token: R_PAREN@166..167 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@167..168 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + eof_token: EOF@168..168 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..168 + 0: (empty) + 1: MD_BLOCK_LIST@0..168 + 0: MD_PARAGRAPH@0..30 + 0: MD_INLINE_ITEM_LIST@0..30 + 0: MD_TEXTUAL@0..19 + 0: MD_TEXTUAL_LITERAL@0..19 "Paren destination: " [] [] + 1: MD_INLINE_LINK@19..29 + 0: L_BRACK@19..20 "[" [] [] + 1: MD_INLINE_ITEM_LIST@20..21 + 0: MD_TEXTUAL@20..21 + 0: MD_TEXTUAL_LITERAL@20..21 "a" [] [] + 2: R_BRACK@21..22 "]" [] [] + 3: L_PAREN@22..23 "(" [] [] + 4: MD_INLINE_ITEM_LIST@23..28 + 0: MD_TEXTUAL@23..24 + 0: MD_TEXTUAL_LITERAL@23..24 "b" [] [] + 1: MD_TEXTUAL@24..25 + 0: MD_TEXTUAL_LITERAL@24..25 "(" [] [] + 2: MD_TEXTUAL@25..26 + 0: MD_TEXTUAL_LITERAL@25..26 "c" [] [] + 3: MD_TEXTUAL@26..27 + 0: MD_TEXTUAL_LITERAL@26..27 ")" [] [] + 4: MD_TEXTUAL@27..28 + 0: MD_TEXTUAL_LITERAL@27..28 "d" [] [] + 5: (empty) + 6: R_PAREN@28..29 ")" [] [] + 2: MD_TEXTUAL@29..30 + 0: MD_TEXTUAL_LITERAL@29..30 "\n" [] [] + 1: (empty) + 1: MD_NEWLINE@30..31 + 0: NEWLINE@30..31 "\n" [] [] + 2: MD_PARAGRAPH@31..63 + 0: MD_INLINE_ITEM_LIST@31..63 + 0: MD_TEXTUAL@31..50 + 0: MD_TEXTUAL_LITERAL@31..50 "Angle destination: " [] [] + 1: MD_INLINE_LINK@50..62 + 0: L_BRACK@50..51 "[" [] [] + 1: MD_INLINE_ITEM_LIST@51..52 + 0: MD_TEXTUAL@51..52 + 0: MD_TEXTUAL_LITERAL@51..52 "a" [] [] + 2: R_BRACK@52..53 "]" [] [] + 3: L_PAREN@53..54 "(" [] [] + 4: MD_INLINE_ITEM_LIST@54..61 + 0: MD_TEXTUAL@54..55 + 0: MD_TEXTUAL_LITERAL@54..55 "<" [] [] + 1: MD_TEXTUAL@55..56 + 0: MD_TEXTUAL_LITERAL@55..56 "b" [] [] + 2: MD_TEXTUAL@56..57 + 0: MD_TEXTUAL_LITERAL@56..57 "(" [] [] + 3: MD_TEXTUAL@57..58 + 0: MD_TEXTUAL_LITERAL@57..58 "c" [] [] + 4: MD_TEXTUAL@58..59 + 0: MD_TEXTUAL_LITERAL@58..59 ")" [] [] + 5: MD_TEXTUAL@59..60 + 0: MD_TEXTUAL_LITERAL@59..60 "d" [] [] + 6: MD_TEXTUAL@60..61 + 0: MD_TEXTUAL_LITERAL@60..61 ">" [] [] + 5: (empty) + 6: R_PAREN@61..62 ")" [] [] + 2: MD_TEXTUAL@62..63 + 0: MD_TEXTUAL_LITERAL@62..63 "\n" [] [] + 1: (empty) + 3: MD_NEWLINE@63..64 + 0: NEWLINE@63..64 "\n" [] [] + 4: MD_PARAGRAPH@64..89 + 0: MD_INLINE_ITEM_LIST@64..89 + 0: MD_TEXTUAL@64..78 + 0: MD_TEXTUAL_LITERAL@64..78 "Title double: " [] [] + 1: MD_INLINE_LINK@78..88 + 0: L_BRACK@78..79 "[" [] [] + 1: MD_INLINE_ITEM_LIST@79..80 + 0: MD_TEXTUAL@79..80 + 0: MD_TEXTUAL_LITERAL@79..80 "a" [] [] + 2: R_BRACK@80..81 "]" [] [] + 3: L_PAREN@81..82 "(" [] [] + 4: MD_INLINE_ITEM_LIST@82..84 + 0: MD_TEXTUAL@82..83 + 0: MD_TEXTUAL_LITERAL@82..83 "u" [] [] + 1: MD_TEXTUAL@83..84 + 0: MD_TEXTUAL_LITERAL@83..84 " " [] [] + 5: MD_LINK_TITLE@84..87 + 0: MD_INLINE_ITEM_LIST@84..87 + 0: MD_TEXTUAL@84..87 + 0: MD_TEXTUAL_LITERAL@84..87 "\"t\"" [] [] + 6: R_PAREN@87..88 ")" [] [] + 2: MD_TEXTUAL@88..89 + 0: MD_TEXTUAL_LITERAL@88..89 "\n" [] [] + 1: (empty) + 5: MD_NEWLINE@89..90 + 0: NEWLINE@89..90 "\n" [] [] + 6: MD_PARAGRAPH@90..115 + 0: MD_INLINE_ITEM_LIST@90..115 + 0: MD_TEXTUAL@90..104 + 0: MD_TEXTUAL_LITERAL@90..104 "Title single: " [] [] + 1: MD_INLINE_LINK@104..114 + 0: L_BRACK@104..105 "[" [] [] + 1: MD_INLINE_ITEM_LIST@105..106 + 0: MD_TEXTUAL@105..106 + 0: MD_TEXTUAL_LITERAL@105..106 "a" [] [] + 2: R_BRACK@106..107 "]" [] [] + 3: L_PAREN@107..108 "(" [] [] + 4: MD_INLINE_ITEM_LIST@108..110 + 0: MD_TEXTUAL@108..109 + 0: MD_TEXTUAL_LITERAL@108..109 "u" [] [] + 1: MD_TEXTUAL@109..110 + 0: MD_TEXTUAL_LITERAL@109..110 " " [] [] + 5: MD_LINK_TITLE@110..113 + 0: MD_INLINE_ITEM_LIST@110..113 + 0: MD_TEXTUAL@110..113 + 0: MD_TEXTUAL_LITERAL@110..113 "'t'" [] [] + 6: R_PAREN@113..114 ")" [] [] + 2: MD_TEXTUAL@114..115 + 0: MD_TEXTUAL_LITERAL@114..115 "\n" [] [] + 1: (empty) + 7: MD_NEWLINE@115..116 + 0: NEWLINE@115..116 "\n" [] [] + 8: MD_PARAGRAPH@116..140 + 0: MD_INLINE_ITEM_LIST@116..140 + 0: MD_TEXTUAL@116..129 + 0: MD_TEXTUAL_LITERAL@116..129 "Title paren: " [] [] + 1: MD_INLINE_LINK@129..139 + 0: L_BRACK@129..130 "[" [] [] + 1: MD_INLINE_ITEM_LIST@130..131 + 0: MD_TEXTUAL@130..131 + 0: MD_TEXTUAL_LITERAL@130..131 "a" [] [] + 2: R_BRACK@131..132 "]" [] [] + 3: L_PAREN@132..133 "(" [] [] + 4: MD_INLINE_ITEM_LIST@133..135 + 0: MD_TEXTUAL@133..134 + 0: MD_TEXTUAL_LITERAL@133..134 "u" [] [] + 1: MD_TEXTUAL@134..135 + 0: MD_TEXTUAL_LITERAL@134..135 " " [] [] + 5: MD_LINK_TITLE@135..138 + 0: MD_INLINE_ITEM_LIST@135..138 + 0: MD_TEXTUAL@135..136 + 0: MD_TEXTUAL_LITERAL@135..136 "(" [] [] + 1: MD_TEXTUAL@136..137 + 0: MD_TEXTUAL_LITERAL@136..137 "t" [] [] + 2: MD_TEXTUAL@137..138 + 0: MD_TEXTUAL_LITERAL@137..138 ")" [] [] + 6: R_PAREN@138..139 ")" [] [] + 2: MD_TEXTUAL@139..140 + 0: MD_TEXTUAL_LITERAL@139..140 "\n" [] [] + 1: (empty) + 9: MD_NEWLINE@140..141 + 0: NEWLINE@140..141 "\n" [] [] + 10: MD_PARAGRAPH@141..168 + 0: MD_INLINE_ITEM_LIST@141..168 + 0: MD_TEXTUAL@141..154 + 0: MD_TEXTUAL_LITERAL@141..154 "Image title: " [] [] + 1: MD_INLINE_IMAGE@154..167 + 0: BANG@154..155 "!" [] [] + 1: L_BRACK@155..156 "[" [] [] + 2: MD_INLINE_ITEM_LIST@156..159 + 0: MD_TEXTUAL@156..159 + 0: MD_TEXTUAL_LITERAL@156..159 "alt" [] [] + 3: R_BRACK@159..160 "]" [] [] + 4: L_PAREN@160..161 "(" [] [] + 5: MD_INLINE_ITEM_LIST@161..163 + 0: MD_TEXTUAL@161..162 + 0: MD_TEXTUAL_LITERAL@161..162 "u" [] [] + 1: MD_TEXTUAL@162..163 + 0: MD_TEXTUAL_LITERAL@162..163 " " [] [] + 6: MD_LINK_TITLE@163..166 + 0: MD_INLINE_ITEM_LIST@163..166 + 0: MD_TEXTUAL@163..166 + 0: MD_TEXTUAL_LITERAL@163..166 "\"t\"" [] [] + 7: R_PAREN@166..167 ")" [] [] + 2: MD_TEXTUAL@167..168 + 0: MD_TEXTUAL_LITERAL@167..168 "\n" [] [] + 1: (empty) + 2: EOF@168..168 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/lazy_continuation.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/lazy_continuation.md new file mode 100644 index 000000000000..33298e8d5795 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/lazy_continuation.md @@ -0,0 +1,31 @@ +> This is a quote +that continues lazily + +> Another quote +with lazy continuation +> and explicit continuation + +> Multi-line quote +with more lazy content +and even more content + +> Quote interrupted by heading +# This heading ends the lazy continuation + +> Quote interrupted by setext heading +Setext heading +--- + +> Quote interrupted by list +- This list item ends lazy continuation + +> Quote interrupted by fenced code +``` +code block +``` + +> Quote interrupted by thematic break +--- + +> Quote interrupted by indented code + This is an indented code block diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/lazy_continuation.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/lazy_continuation.md.snap new file mode 100644 index 000000000000..60c8b8c17601 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/lazy_continuation.md.snap @@ -0,0 +1,541 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +> This is a quote +that continues lazily + +> Another quote +with lazy continuation +> and explicit continuation + +> Multi-line quote +with more lazy content +and even more content + +> Quote interrupted by heading +# This heading ends the lazy continuation + +> Quote interrupted by setext heading +Setext heading +--- + +> Quote interrupted by list +- This list item ends lazy continuation + +> Quote interrupted by fenced code +``` +code block +``` + +> Quote interrupted by thematic break +--- + +> Quote interrupted by indented code + This is an indented code block + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdQuote { + marker_token: R_ANGLE@0..1 ">" [] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1..17 "This is a quote" [Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@17..18 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@18..39 "that continues lazily" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@39..40 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + }, + MdNewline { + value_token: NEWLINE@40..41 "\n" [] [], + }, + MdQuote { + marker_token: R_ANGLE@41..42 ">" [] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@42..56 "Another quote" [Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@56..57 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@57..79 "with lazy continuation" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@79..80 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@80..107 "and explicit continuation" [Skipped(">"), Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@107..108 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + }, + MdNewline { + value_token: NEWLINE@108..109 "\n" [] [], + }, + MdQuote { + marker_token: R_ANGLE@109..110 ">" [] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@110..116 "Multi" [Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@116..117 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@117..127 "line quote" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@127..128 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@128..150 "with more lazy content" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@150..151 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@151..172 "and even more content" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@172..173 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + }, + MdNewline { + value_token: NEWLINE@173..174 "\n" [] [], + }, + MdQuote { + marker_token: R_ANGLE@174..175 ">" [] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@175..204 "Quote interrupted by heading" [Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@204..205 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + }, + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@205..206 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@206..246 " This heading ends the lazy continuation" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [], + }, + MdNewline { + value_token: NEWLINE@246..247 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@247..248 "\n" [] [], + }, + MdQuote { + marker_token: R_ANGLE@248..249 ">" [] [], + content: MdBlockList [ + MdSetextHeader { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@249..285 "Quote interrupted by setext heading" [Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@285..286 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@286..300 "Setext heading" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@300..301 "\n" [] [], + }, + ], + underline_token: MD_SETEXT_UNDERLINE_LITERAL@301..304 "---" [] [], + }, + ], + }, + MdNewline { + value_token: NEWLINE@304..305 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@305..306 "\n" [] [], + }, + MdQuote { + marker_token: R_ANGLE@306..307 ">" [] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@307..333 "Quote interrupted by list" [Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@333..334 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + }, + MdBulletListItem { + md_bullet_list: MdBulletList [ + MdBullet { + bullet: MINUS@334..335 "-" [] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@335..373 " This list item ends lazy continuation" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@373..374 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + }, + ], + }, + MdNewline { + value_token: NEWLINE@374..375 "\n" [] [], + }, + MdQuote { + marker_token: R_ANGLE@375..376 ">" [] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@376..409 "Quote interrupted by fenced code" [Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@409..410 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + }, + MdFencedCodeBlock { + l_fence: TRIPLE_BACKTICK@410..413 "```" [] [], + code_list: MdCodeNameList [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@413..414 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@414..424 "code block" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@424..425 "\n" [] [], + }, + ], + r_fence: TRIPLE_BACKTICK@425..428 "```" [] [], + }, + MdNewline { + value_token: NEWLINE@428..429 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@429..430 "\n" [] [], + }, + MdQuote { + marker_token: R_ANGLE@430..431 ">" [] [], + content: MdBlockList [ + MdSetextHeader { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@431..467 "Quote interrupted by thematic break" [Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@467..468 "\n" [] [], + }, + ], + underline_token: MD_SETEXT_UNDERLINE_LITERAL@468..471 "---" [] [], + }, + ], + }, + MdNewline { + value_token: NEWLINE@471..472 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@472..473 "\n" [] [], + }, + MdQuote { + marker_token: R_ANGLE@473..474 ">" [] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@474..509 "Quote interrupted by indented code" [Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@509..510 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@510..511 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@511..512 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@512..513 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@513..514 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@514..544 "This is an indented code block" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@544..545 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + }, + ], + eof_token: EOF@545..545 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..545 + 0: (empty) + 1: MD_BLOCK_LIST@0..545 + 0: MD_QUOTE@0..40 + 0: R_ANGLE@0..1 ">" [] [] + 1: MD_BLOCK_LIST@1..40 + 0: MD_PARAGRAPH@1..40 + 0: MD_INLINE_ITEM_LIST@1..40 + 0: MD_TEXTUAL@1..17 + 0: MD_TEXTUAL_LITERAL@1..17 "This is a quote" [Skipped(" ")] [] + 1: MD_TEXTUAL@17..18 + 0: MD_TEXTUAL_LITERAL@17..18 "\n" [] [] + 2: MD_TEXTUAL@18..39 + 0: MD_TEXTUAL_LITERAL@18..39 "that continues lazily" [] [] + 3: MD_TEXTUAL@39..40 + 0: MD_TEXTUAL_LITERAL@39..40 "\n" [] [] + 1: (empty) + 1: MD_NEWLINE@40..41 + 0: NEWLINE@40..41 "\n" [] [] + 2: MD_QUOTE@41..108 + 0: R_ANGLE@41..42 ">" [] [] + 1: MD_BLOCK_LIST@42..108 + 0: MD_PARAGRAPH@42..108 + 0: MD_INLINE_ITEM_LIST@42..108 + 0: MD_TEXTUAL@42..56 + 0: MD_TEXTUAL_LITERAL@42..56 "Another quote" [Skipped(" ")] [] + 1: MD_TEXTUAL@56..57 + 0: MD_TEXTUAL_LITERAL@56..57 "\n" [] [] + 2: MD_TEXTUAL@57..79 + 0: MD_TEXTUAL_LITERAL@57..79 "with lazy continuation" [] [] + 3: MD_TEXTUAL@79..80 + 0: MD_TEXTUAL_LITERAL@79..80 "\n" [] [] + 4: MD_TEXTUAL@80..107 + 0: MD_TEXTUAL_LITERAL@80..107 "and explicit continuation" [Skipped(">"), Skipped(" ")] [] + 5: MD_TEXTUAL@107..108 + 0: MD_TEXTUAL_LITERAL@107..108 "\n" [] [] + 1: (empty) + 3: MD_NEWLINE@108..109 + 0: NEWLINE@108..109 "\n" [] [] + 4: MD_QUOTE@109..173 + 0: R_ANGLE@109..110 ">" [] [] + 1: MD_BLOCK_LIST@110..173 + 0: MD_PARAGRAPH@110..173 + 0: MD_INLINE_ITEM_LIST@110..173 + 0: MD_TEXTUAL@110..116 + 0: MD_TEXTUAL_LITERAL@110..116 "Multi" [Skipped(" ")] [] + 1: MD_TEXTUAL@116..117 + 0: MD_TEXTUAL_LITERAL@116..117 "-" [] [] + 2: MD_TEXTUAL@117..127 + 0: MD_TEXTUAL_LITERAL@117..127 "line quote" [] [] + 3: MD_TEXTUAL@127..128 + 0: MD_TEXTUAL_LITERAL@127..128 "\n" [] [] + 4: MD_TEXTUAL@128..150 + 0: MD_TEXTUAL_LITERAL@128..150 "with more lazy content" [] [] + 5: MD_TEXTUAL@150..151 + 0: MD_TEXTUAL_LITERAL@150..151 "\n" [] [] + 6: MD_TEXTUAL@151..172 + 0: MD_TEXTUAL_LITERAL@151..172 "and even more content" [] [] + 7: MD_TEXTUAL@172..173 + 0: MD_TEXTUAL_LITERAL@172..173 "\n" [] [] + 1: (empty) + 5: MD_NEWLINE@173..174 + 0: NEWLINE@173..174 "\n" [] [] + 6: MD_QUOTE@174..205 + 0: R_ANGLE@174..175 ">" [] [] + 1: MD_BLOCK_LIST@175..205 + 0: MD_PARAGRAPH@175..205 + 0: MD_INLINE_ITEM_LIST@175..205 + 0: MD_TEXTUAL@175..204 + 0: MD_TEXTUAL_LITERAL@175..204 "Quote interrupted by heading" [Skipped(" ")] [] + 1: MD_TEXTUAL@204..205 + 0: MD_TEXTUAL_LITERAL@204..205 "\n" [] [] + 1: (empty) + 7: MD_HEADER@205..246 + 0: MD_HASH_LIST@205..206 + 0: MD_HASH@205..206 + 0: HASH@205..206 "#" [] [] + 1: MD_PARAGRAPH@206..246 + 0: MD_INLINE_ITEM_LIST@206..246 + 0: MD_TEXTUAL@206..246 + 0: MD_TEXTUAL_LITERAL@206..246 " This heading ends the lazy continuation" [] [] + 1: (empty) + 2: MD_HASH_LIST@246..246 + 8: MD_NEWLINE@246..247 + 0: NEWLINE@246..247 "\n" [] [] + 9: MD_NEWLINE@247..248 + 0: NEWLINE@247..248 "\n" [] [] + 10: MD_QUOTE@248..304 + 0: R_ANGLE@248..249 ">" [] [] + 1: MD_BLOCK_LIST@249..304 + 0: MD_SETEXT_HEADER@249..304 + 0: MD_INLINE_ITEM_LIST@249..301 + 0: MD_TEXTUAL@249..285 + 0: MD_TEXTUAL_LITERAL@249..285 "Quote interrupted by setext heading" [Skipped(" ")] [] + 1: MD_TEXTUAL@285..286 + 0: MD_TEXTUAL_LITERAL@285..286 "\n" [] [] + 2: MD_TEXTUAL@286..300 + 0: MD_TEXTUAL_LITERAL@286..300 "Setext heading" [] [] + 3: MD_TEXTUAL@300..301 + 0: MD_TEXTUAL_LITERAL@300..301 "\n" [] [] + 1: MD_SETEXT_UNDERLINE_LITERAL@301..304 "---" [] [] + 11: MD_NEWLINE@304..305 + 0: NEWLINE@304..305 "\n" [] [] + 12: MD_NEWLINE@305..306 + 0: NEWLINE@305..306 "\n" [] [] + 13: MD_QUOTE@306..334 + 0: R_ANGLE@306..307 ">" [] [] + 1: MD_BLOCK_LIST@307..334 + 0: MD_PARAGRAPH@307..334 + 0: MD_INLINE_ITEM_LIST@307..334 + 0: MD_TEXTUAL@307..333 + 0: MD_TEXTUAL_LITERAL@307..333 "Quote interrupted by list" [Skipped(" ")] [] + 1: MD_TEXTUAL@333..334 + 0: MD_TEXTUAL_LITERAL@333..334 "\n" [] [] + 1: (empty) + 14: MD_BULLET_LIST_ITEM@334..374 + 0: MD_BULLET_LIST@334..374 + 0: MD_BULLET@334..374 + 0: MINUS@334..335 "-" [] [] + 1: MD_BLOCK_LIST@335..374 + 0: MD_PARAGRAPH@335..374 + 0: MD_INLINE_ITEM_LIST@335..374 + 0: MD_TEXTUAL@335..373 + 0: MD_TEXTUAL_LITERAL@335..373 " This list item ends lazy continuation" [] [] + 1: MD_TEXTUAL@373..374 + 0: MD_TEXTUAL_LITERAL@373..374 "\n" [] [] + 1: (empty) + 15: MD_NEWLINE@374..375 + 0: NEWLINE@374..375 "\n" [] [] + 16: MD_QUOTE@375..410 + 0: R_ANGLE@375..376 ">" [] [] + 1: MD_BLOCK_LIST@376..410 + 0: MD_PARAGRAPH@376..410 + 0: MD_INLINE_ITEM_LIST@376..410 + 0: MD_TEXTUAL@376..409 + 0: MD_TEXTUAL_LITERAL@376..409 "Quote interrupted by fenced code" [Skipped(" ")] [] + 1: MD_TEXTUAL@409..410 + 0: MD_TEXTUAL_LITERAL@409..410 "\n" [] [] + 1: (empty) + 17: MD_FENCED_CODE_BLOCK@410..428 + 0: TRIPLE_BACKTICK@410..413 "```" [] [] + 1: MD_CODE_NAME_LIST@413..413 + 2: MD_INLINE_ITEM_LIST@413..425 + 0: MD_TEXTUAL@413..414 + 0: MD_TEXTUAL_LITERAL@413..414 "\n" [] [] + 1: MD_TEXTUAL@414..424 + 0: MD_TEXTUAL_LITERAL@414..424 "code block" [] [] + 2: MD_TEXTUAL@424..425 + 0: MD_TEXTUAL_LITERAL@424..425 "\n" [] [] + 3: TRIPLE_BACKTICK@425..428 "```" [] [] + 18: MD_NEWLINE@428..429 + 0: NEWLINE@428..429 "\n" [] [] + 19: MD_NEWLINE@429..430 + 0: NEWLINE@429..430 "\n" [] [] + 20: MD_QUOTE@430..471 + 0: R_ANGLE@430..431 ">" [] [] + 1: MD_BLOCK_LIST@431..471 + 0: MD_SETEXT_HEADER@431..471 + 0: MD_INLINE_ITEM_LIST@431..468 + 0: MD_TEXTUAL@431..467 + 0: MD_TEXTUAL_LITERAL@431..467 "Quote interrupted by thematic break" [Skipped(" ")] [] + 1: MD_TEXTUAL@467..468 + 0: MD_TEXTUAL_LITERAL@467..468 "\n" [] [] + 1: MD_SETEXT_UNDERLINE_LITERAL@468..471 "---" [] [] + 21: MD_NEWLINE@471..472 + 0: NEWLINE@471..472 "\n" [] [] + 22: MD_NEWLINE@472..473 + 0: NEWLINE@472..473 "\n" [] [] + 23: MD_QUOTE@473..545 + 0: R_ANGLE@473..474 ">" [] [] + 1: MD_BLOCK_LIST@474..545 + 0: MD_PARAGRAPH@474..545 + 0: MD_INLINE_ITEM_LIST@474..545 + 0: MD_TEXTUAL@474..509 + 0: MD_TEXTUAL_LITERAL@474..509 "Quote interrupted by indented code" [Skipped(" ")] [] + 1: MD_TEXTUAL@509..510 + 0: MD_TEXTUAL_LITERAL@509..510 "\n" [] [] + 2: MD_TEXTUAL@510..511 + 0: MD_TEXTUAL_LITERAL@510..511 " " [] [] + 3: MD_TEXTUAL@511..512 + 0: MD_TEXTUAL_LITERAL@511..512 " " [] [] + 4: MD_TEXTUAL@512..513 + 0: MD_TEXTUAL_LITERAL@512..513 " " [] [] + 5: MD_TEXTUAL@513..514 + 0: MD_TEXTUAL_LITERAL@513..514 " " [] [] + 6: MD_TEXTUAL@514..544 + 0: MD_TEXTUAL_LITERAL@514..544 "This is an indented code block" [] [] + 7: MD_TEXTUAL@544..545 + 0: MD_TEXTUAL_LITERAL@544..545 "\n" [] [] + 1: (empty) + 2: EOF@545..545 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/link_definition.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/link_definition.md new file mode 100644 index 000000000000..49f729678fdb --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/link_definition.md @@ -0,0 +1,9 @@ +[example]: https://example.com + +[foo]: /url + + [one-space]: /url + + [two-spaces]: /url + + [three-spaces]: /url diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/link_definition.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/link_definition.md.snap new file mode 100644 index 000000000000..12a28b6df7ad --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/link_definition.md.snap @@ -0,0 +1,305 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +[example]: https://example.com + +[foo]: /url + + [one-space]: /url + + [two-spaces]: /url + + [three-spaces]: /url + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdLinkReferenceDefinition { + l_brack_token: L_BRACK@0..1 "[" [] [], + label: MdLinkLabel { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1..8 "example" [] [], + }, + ], + }, + r_brack_token: R_BRACK@8..9 "]" [] [], + colon_token: COLON@9..10 ":" [] [], + destination: MdLinkDestination { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@10..11 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@11..30 "https://example.com" [] [], + }, + ], + }, + title: missing (optional), + }, + MdNewline { + value_token: NEWLINE@30..31 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@31..32 "\n" [] [], + }, + MdLinkReferenceDefinition { + l_brack_token: L_BRACK@32..33 "[" [] [], + label: MdLinkLabel { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@33..36 "foo" [] [], + }, + ], + }, + r_brack_token: R_BRACK@36..37 "]" [] [], + colon_token: COLON@37..38 ":" [] [], + destination: MdLinkDestination { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@38..39 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@39..43 "/url" [] [], + }, + ], + }, + title: missing (optional), + }, + MdNewline { + value_token: NEWLINE@43..44 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@44..45 "\n" [] [], + }, + MdLinkReferenceDefinition { + l_brack_token: L_BRACK@45..47 "[" [Skipped(" ")] [], + label: MdLinkLabel { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@47..50 "one" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@50..51 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@51..56 "space" [] [], + }, + ], + }, + r_brack_token: R_BRACK@56..57 "]" [] [], + colon_token: COLON@57..58 ":" [] [], + destination: MdLinkDestination { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@58..59 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@59..63 "/url" [] [], + }, + ], + }, + title: missing (optional), + }, + MdNewline { + value_token: NEWLINE@63..64 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@64..65 "\n" [] [], + }, + MdLinkReferenceDefinition { + l_brack_token: L_BRACK@65..68 "[" [Skipped(" "), Skipped(" ")] [], + label: MdLinkLabel { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@68..71 "two" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@71..72 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@72..78 "spaces" [] [], + }, + ], + }, + r_brack_token: R_BRACK@78..79 "]" [] [], + colon_token: COLON@79..80 ":" [] [], + destination: MdLinkDestination { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@80..81 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@81..85 "/url" [] [], + }, + ], + }, + title: missing (optional), + }, + MdNewline { + value_token: NEWLINE@85..86 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@86..87 "\n" [] [], + }, + MdLinkReferenceDefinition { + l_brack_token: L_BRACK@87..91 "[" [Skipped(" "), Skipped(" "), Skipped(" ")] [], + label: MdLinkLabel { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@91..96 "three" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@96..97 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@97..103 "spaces" [] [], + }, + ], + }, + r_brack_token: R_BRACK@103..104 "]" [] [], + colon_token: COLON@104..105 ":" [] [], + destination: MdLinkDestination { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@105..106 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@106..110 "/url" [] [], + }, + ], + }, + title: missing (optional), + }, + MdNewline { + value_token: NEWLINE@110..111 "\n" [] [], + }, + ], + eof_token: EOF@111..111 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..111 + 0: (empty) + 1: MD_BLOCK_LIST@0..111 + 0: MD_LINK_REFERENCE_DEFINITION@0..30 + 0: L_BRACK@0..1 "[" [] [] + 1: MD_LINK_LABEL@1..8 + 0: MD_INLINE_ITEM_LIST@1..8 + 0: MD_TEXTUAL@1..8 + 0: MD_TEXTUAL_LITERAL@1..8 "example" [] [] + 2: R_BRACK@8..9 "]" [] [] + 3: COLON@9..10 ":" [] [] + 4: MD_LINK_DESTINATION@10..30 + 0: MD_INLINE_ITEM_LIST@10..30 + 0: MD_TEXTUAL@10..11 + 0: MD_TEXTUAL_LITERAL@10..11 " " [] [] + 1: MD_TEXTUAL@11..30 + 0: MD_TEXTUAL_LITERAL@11..30 "https://example.com" [] [] + 5: (empty) + 1: MD_NEWLINE@30..31 + 0: NEWLINE@30..31 "\n" [] [] + 2: MD_NEWLINE@31..32 + 0: NEWLINE@31..32 "\n" [] [] + 3: MD_LINK_REFERENCE_DEFINITION@32..43 + 0: L_BRACK@32..33 "[" [] [] + 1: MD_LINK_LABEL@33..36 + 0: MD_INLINE_ITEM_LIST@33..36 + 0: MD_TEXTUAL@33..36 + 0: MD_TEXTUAL_LITERAL@33..36 "foo" [] [] + 2: R_BRACK@36..37 "]" [] [] + 3: COLON@37..38 ":" [] [] + 4: MD_LINK_DESTINATION@38..43 + 0: MD_INLINE_ITEM_LIST@38..43 + 0: MD_TEXTUAL@38..39 + 0: MD_TEXTUAL_LITERAL@38..39 " " [] [] + 1: MD_TEXTUAL@39..43 + 0: MD_TEXTUAL_LITERAL@39..43 "/url" [] [] + 5: (empty) + 4: MD_NEWLINE@43..44 + 0: NEWLINE@43..44 "\n" [] [] + 5: MD_NEWLINE@44..45 + 0: NEWLINE@44..45 "\n" [] [] + 6: MD_LINK_REFERENCE_DEFINITION@45..63 + 0: L_BRACK@45..47 "[" [Skipped(" ")] [] + 1: MD_LINK_LABEL@47..56 + 0: MD_INLINE_ITEM_LIST@47..56 + 0: MD_TEXTUAL@47..50 + 0: MD_TEXTUAL_LITERAL@47..50 "one" [] [] + 1: MD_TEXTUAL@50..51 + 0: MD_TEXTUAL_LITERAL@50..51 "-" [] [] + 2: MD_TEXTUAL@51..56 + 0: MD_TEXTUAL_LITERAL@51..56 "space" [] [] + 2: R_BRACK@56..57 "]" [] [] + 3: COLON@57..58 ":" [] [] + 4: MD_LINK_DESTINATION@58..63 + 0: MD_INLINE_ITEM_LIST@58..63 + 0: MD_TEXTUAL@58..59 + 0: MD_TEXTUAL_LITERAL@58..59 " " [] [] + 1: MD_TEXTUAL@59..63 + 0: MD_TEXTUAL_LITERAL@59..63 "/url" [] [] + 5: (empty) + 7: MD_NEWLINE@63..64 + 0: NEWLINE@63..64 "\n" [] [] + 8: MD_NEWLINE@64..65 + 0: NEWLINE@64..65 "\n" [] [] + 9: MD_LINK_REFERENCE_DEFINITION@65..85 + 0: L_BRACK@65..68 "[" [Skipped(" "), Skipped(" ")] [] + 1: MD_LINK_LABEL@68..78 + 0: MD_INLINE_ITEM_LIST@68..78 + 0: MD_TEXTUAL@68..71 + 0: MD_TEXTUAL_LITERAL@68..71 "two" [] [] + 1: MD_TEXTUAL@71..72 + 0: MD_TEXTUAL_LITERAL@71..72 "-" [] [] + 2: MD_TEXTUAL@72..78 + 0: MD_TEXTUAL_LITERAL@72..78 "spaces" [] [] + 2: R_BRACK@78..79 "]" [] [] + 3: COLON@79..80 ":" [] [] + 4: MD_LINK_DESTINATION@80..85 + 0: MD_INLINE_ITEM_LIST@80..85 + 0: MD_TEXTUAL@80..81 + 0: MD_TEXTUAL_LITERAL@80..81 " " [] [] + 1: MD_TEXTUAL@81..85 + 0: MD_TEXTUAL_LITERAL@81..85 "/url" [] [] + 5: (empty) + 10: MD_NEWLINE@85..86 + 0: NEWLINE@85..86 "\n" [] [] + 11: MD_NEWLINE@86..87 + 0: NEWLINE@86..87 "\n" [] [] + 12: MD_LINK_REFERENCE_DEFINITION@87..110 + 0: L_BRACK@87..91 "[" [Skipped(" "), Skipped(" "), Skipped(" ")] [] + 1: MD_LINK_LABEL@91..103 + 0: MD_INLINE_ITEM_LIST@91..103 + 0: MD_TEXTUAL@91..96 + 0: MD_TEXTUAL_LITERAL@91..96 "three" [] [] + 1: MD_TEXTUAL@96..97 + 0: MD_TEXTUAL_LITERAL@96..97 "-" [] [] + 2: MD_TEXTUAL@97..103 + 0: MD_TEXTUAL_LITERAL@97..103 "spaces" [] [] + 2: R_BRACK@103..104 "]" [] [] + 3: COLON@104..105 ":" [] [] + 4: MD_LINK_DESTINATION@105..110 + 0: MD_INLINE_ITEM_LIST@105..110 + 0: MD_TEXTUAL@105..106 + 0: MD_TEXTUAL_LITERAL@105..106 " " [] [] + 1: MD_TEXTUAL@106..110 + 0: MD_TEXTUAL_LITERAL@106..110 "/url" [] [] + 5: (empty) + 13: MD_NEWLINE@110..111 + 0: NEWLINE@110..111 "\n" [] [] + 2: EOF@111..111 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/link_definition_edge_cases.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/link_definition_edge_cases.md new file mode 100644 index 000000000000..b8c72626c468 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/link_definition_edge_cases.md @@ -0,0 +1,32 @@ +Some text with trailing spaces + + [after-trailing]: /url + +Another paragraph + + [normal-indent]: /url + +[lambda]: /url + +[nihongo]: /url "Japanese label" + +[title-next-line]: /url + "title on next line" + +[single-quote-next]: /url + 'single quoted' + +[paren-next]: /url + (parenthesized) + +[balanced-parens]: http://example.com/path(with)parens + +[nested-parens]: http://example.com/a(b(c)d)e + +[escaped\]bracket]: /url + +[trailing-spaces]: /url + +[invalid-trailing]: /url invalid + +[angle-trailing]: invalid diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/link_definition_edge_cases.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/link_definition_edge_cases.md.snap new file mode 100644 index 000000000000..2737efcebe70 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/link_definition_edge_cases.md.snap @@ -0,0 +1,1058 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +Some text with trailing spaces + + [after-trailing]: /url + +Another paragraph + + [normal-indent]: /url + +[lambda]: /url + +[nihongo]: /url "Japanese label" + +[title-next-line]: /url + "title on next line" + +[single-quote-next]: /url + 'single quoted' + +[paren-next]: /url + (parenthesized) + +[balanced-parens]: http://example.com/path(with)parens + +[nested-parens]: http://example.com/a(b(c)d)e + +[escaped\]bracket]: /url + +[trailing-spaces]: /url + +[invalid-trailing]: /url invalid + +[angle-trailing]: invalid + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@0..30 "Some text with trailing spaces" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@30..31 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@31..32 "\n" [] [], + }, + MdLinkReferenceDefinition { + l_brack_token: L_BRACK@32..35 "[" [Skipped(" "), Skipped(" ")] [], + label: MdLinkLabel { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@35..40 "after" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@40..41 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@41..49 "trailing" [] [], + }, + ], + }, + r_brack_token: R_BRACK@49..50 "]" [] [], + colon_token: COLON@50..51 ":" [] [], + destination: MdLinkDestination { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@51..52 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@52..56 "/url" [] [], + }, + ], + }, + title: missing (optional), + }, + MdNewline { + value_token: NEWLINE@56..57 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@57..58 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@58..75 "Another paragraph" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@75..76 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@76..77 "\n" [] [], + }, + MdLinkReferenceDefinition { + l_brack_token: L_BRACK@77..80 "[" [Skipped(" "), Skipped(" ")] [], + label: MdLinkLabel { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@80..86 "normal" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@86..87 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@87..93 "indent" [] [], + }, + ], + }, + r_brack_token: R_BRACK@93..94 "]" [] [], + colon_token: COLON@94..95 ":" [] [], + destination: MdLinkDestination { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@95..96 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@96..100 "/url" [] [], + }, + ], + }, + title: missing (optional), + }, + MdNewline { + value_token: NEWLINE@100..101 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@101..102 "\n" [] [], + }, + MdLinkReferenceDefinition { + l_brack_token: L_BRACK@102..103 "[" [] [], + label: MdLinkLabel { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@103..109 "lambda" [] [], + }, + ], + }, + r_brack_token: R_BRACK@109..110 "]" [] [], + colon_token: COLON@110..111 ":" [] [], + destination: MdLinkDestination { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@111..112 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@112..116 "/url" [] [], + }, + ], + }, + title: missing (optional), + }, + MdNewline { + value_token: NEWLINE@116..117 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@117..118 "\n" [] [], + }, + MdLinkReferenceDefinition { + l_brack_token: L_BRACK@118..119 "[" [] [], + label: MdLinkLabel { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@119..126 "nihongo" [] [], + }, + ], + }, + r_brack_token: R_BRACK@126..127 "]" [] [], + colon_token: COLON@127..128 ":" [] [], + destination: MdLinkDestination { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@128..129 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@129..133 "/url" [] [], + }, + ], + }, + title: MdLinkTitle { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@133..134 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@134..150 "\"Japanese label\"" [] [], + }, + ], + }, + }, + MdNewline { + value_token: NEWLINE@150..151 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@151..152 "\n" [] [], + }, + MdLinkReferenceDefinition { + l_brack_token: L_BRACK@152..153 "[" [] [], + label: MdLinkLabel { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@153..158 "title" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@158..159 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@159..163 "next" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@163..164 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@164..168 "line" [] [], + }, + ], + }, + r_brack_token: R_BRACK@168..169 "]" [] [], + colon_token: COLON@169..170 ":" [] [], + destination: MdLinkDestination { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@170..171 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@171..175 "/url" [] [], + }, + ], + }, + title: MdLinkTitle { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@175..176 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@176..178 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@178..198 "\"title on next line\"" [] [], + }, + ], + }, + }, + MdNewline { + value_token: NEWLINE@198..199 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@199..200 "\n" [] [], + }, + MdLinkReferenceDefinition { + l_brack_token: L_BRACK@200..201 "[" [] [], + label: MdLinkLabel { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@201..207 "single" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@207..208 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@208..213 "quote" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@213..214 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@214..218 "next" [] [], + }, + ], + }, + r_brack_token: R_BRACK@218..219 "]" [] [], + colon_token: COLON@219..220 ":" [] [], + destination: MdLinkDestination { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@220..221 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@221..225 "/url" [] [], + }, + ], + }, + title: MdLinkTitle { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@225..226 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@226..228 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@228..243 "'single quoted'" [] [], + }, + ], + }, + }, + MdNewline { + value_token: NEWLINE@243..244 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@244..245 "\n" [] [], + }, + MdLinkReferenceDefinition { + l_brack_token: L_BRACK@245..246 "[" [] [], + label: MdLinkLabel { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@246..251 "paren" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@251..252 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@252..256 "next" [] [], + }, + ], + }, + r_brack_token: R_BRACK@256..257 "]" [] [], + colon_token: COLON@257..258 ":" [] [], + destination: MdLinkDestination { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@258..259 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@259..263 "/url" [] [], + }, + ], + }, + title: MdLinkTitle { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@263..264 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@264..266 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@266..267 "(" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@267..280 "parenthesized" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@280..281 ")" [] [], + }, + ], + }, + }, + MdNewline { + value_token: NEWLINE@281..282 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@282..283 "\n" [] [], + }, + MdLinkReferenceDefinition { + l_brack_token: L_BRACK@283..284 "[" [] [], + label: MdLinkLabel { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@284..292 "balanced" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@292..293 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@293..299 "parens" [] [], + }, + ], + }, + r_brack_token: R_BRACK@299..300 "]" [] [], + colon_token: COLON@300..301 ":" [] [], + destination: MdLinkDestination { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@301..302 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@302..325 "http://example.com/path" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@325..326 "(" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@326..330 "with" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@330..331 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@331..337 "parens" [] [], + }, + ], + }, + title: missing (optional), + }, + MdNewline { + value_token: NEWLINE@337..338 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@338..339 "\n" [] [], + }, + MdLinkReferenceDefinition { + l_brack_token: L_BRACK@339..340 "[" [] [], + label: MdLinkLabel { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@340..346 "nested" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@346..347 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@347..353 "parens" [] [], + }, + ], + }, + r_brack_token: R_BRACK@353..354 "]" [] [], + colon_token: COLON@354..355 ":" [] [], + destination: MdLinkDestination { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@355..356 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@356..376 "http://example.com/a" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@376..377 "(" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@377..378 "b" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@378..379 "(" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@379..380 "c" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@380..381 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@381..382 "d" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@382..383 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@383..384 "e" [] [], + }, + ], + }, + title: missing (optional), + }, + MdNewline { + value_token: NEWLINE@384..385 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@385..386 "\n" [] [], + }, + MdLinkReferenceDefinition { + l_brack_token: L_BRACK@386..387 "[" [] [], + label: MdLinkLabel { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@387..394 "escaped" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@394..396 "\\]" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@396..403 "bracket" [] [], + }, + ], + }, + r_brack_token: R_BRACK@403..404 "]" [] [], + colon_token: COLON@404..405 ":" [] [], + destination: MdLinkDestination { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@405..406 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@406..410 "/url" [] [], + }, + ], + }, + title: missing (optional), + }, + MdNewline { + value_token: NEWLINE@410..411 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@411..412 "\n" [] [], + }, + MdLinkReferenceDefinition { + l_brack_token: L_BRACK@412..413 "[" [] [], + label: MdLinkLabel { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@413..421 "trailing" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@421..422 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@422..428 "spaces" [] [], + }, + ], + }, + r_brack_token: R_BRACK@428..429 "]" [] [], + colon_token: COLON@429..430 ":" [] [], + destination: MdLinkDestination { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@430..431 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@431..435 "/url" [] [], + }, + ], + }, + title: missing (optional), + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@435..438 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@438..439 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@439..440 "\n" [] [], + }, + MdLinkReferenceDefinition { + l_brack_token: L_BRACK@440..441 "[" [] [], + label: MdLinkLabel { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@441..448 "invalid" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@448..449 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@449..457 "trailing" [] [], + }, + ], + }, + r_brack_token: R_BRACK@457..458 "]" [] [], + colon_token: COLON@458..459 ":" [] [], + destination: MdLinkDestination { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@459..460 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@460..464 "/url" [] [], + }, + ], + }, + title: missing (optional), + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@464..465 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@465..472 "invalid" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@472..473 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@473..474 "\n" [] [], + }, + MdLinkReferenceDefinition { + l_brack_token: L_BRACK@474..475 "[" [] [], + label: MdLinkLabel { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@475..480 "angle" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@480..481 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@481..489 "trailing" [] [], + }, + ], + }, + r_brack_token: R_BRACK@489..490 "]" [] [], + colon_token: COLON@490..491 ":" [] [], + destination: MdLinkDestination { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@491..492 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@492..493 "<" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@493..497 "/url" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@497..498 ">" [] [], + }, + ], + }, + title: missing (optional), + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@498..499 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@499..506 "invalid" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@506..507 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + eof_token: EOF@507..507 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..507 + 0: (empty) + 1: MD_BLOCK_LIST@0..507 + 0: MD_PARAGRAPH@0..31 + 0: MD_INLINE_ITEM_LIST@0..31 + 0: MD_TEXTUAL@0..30 + 0: MD_TEXTUAL_LITERAL@0..30 "Some text with trailing spaces" [] [] + 1: MD_TEXTUAL@30..31 + 0: MD_TEXTUAL_LITERAL@30..31 "\n" [] [] + 1: (empty) + 1: MD_NEWLINE@31..32 + 0: NEWLINE@31..32 "\n" [] [] + 2: MD_LINK_REFERENCE_DEFINITION@32..56 + 0: L_BRACK@32..35 "[" [Skipped(" "), Skipped(" ")] [] + 1: MD_LINK_LABEL@35..49 + 0: MD_INLINE_ITEM_LIST@35..49 + 0: MD_TEXTUAL@35..40 + 0: MD_TEXTUAL_LITERAL@35..40 "after" [] [] + 1: MD_TEXTUAL@40..41 + 0: MD_TEXTUAL_LITERAL@40..41 "-" [] [] + 2: MD_TEXTUAL@41..49 + 0: MD_TEXTUAL_LITERAL@41..49 "trailing" [] [] + 2: R_BRACK@49..50 "]" [] [] + 3: COLON@50..51 ":" [] [] + 4: MD_LINK_DESTINATION@51..56 + 0: MD_INLINE_ITEM_LIST@51..56 + 0: MD_TEXTUAL@51..52 + 0: MD_TEXTUAL_LITERAL@51..52 " " [] [] + 1: MD_TEXTUAL@52..56 + 0: MD_TEXTUAL_LITERAL@52..56 "/url" [] [] + 5: (empty) + 3: MD_NEWLINE@56..57 + 0: NEWLINE@56..57 "\n" [] [] + 4: MD_NEWLINE@57..58 + 0: NEWLINE@57..58 "\n" [] [] + 5: MD_PARAGRAPH@58..76 + 0: MD_INLINE_ITEM_LIST@58..76 + 0: MD_TEXTUAL@58..75 + 0: MD_TEXTUAL_LITERAL@58..75 "Another paragraph" [] [] + 1: MD_TEXTUAL@75..76 + 0: MD_TEXTUAL_LITERAL@75..76 "\n" [] [] + 1: (empty) + 6: MD_NEWLINE@76..77 + 0: NEWLINE@76..77 "\n" [] [] + 7: MD_LINK_REFERENCE_DEFINITION@77..100 + 0: L_BRACK@77..80 "[" [Skipped(" "), Skipped(" ")] [] + 1: MD_LINK_LABEL@80..93 + 0: MD_INLINE_ITEM_LIST@80..93 + 0: MD_TEXTUAL@80..86 + 0: MD_TEXTUAL_LITERAL@80..86 "normal" [] [] + 1: MD_TEXTUAL@86..87 + 0: MD_TEXTUAL_LITERAL@86..87 "-" [] [] + 2: MD_TEXTUAL@87..93 + 0: MD_TEXTUAL_LITERAL@87..93 "indent" [] [] + 2: R_BRACK@93..94 "]" [] [] + 3: COLON@94..95 ":" [] [] + 4: MD_LINK_DESTINATION@95..100 + 0: MD_INLINE_ITEM_LIST@95..100 + 0: MD_TEXTUAL@95..96 + 0: MD_TEXTUAL_LITERAL@95..96 " " [] [] + 1: MD_TEXTUAL@96..100 + 0: MD_TEXTUAL_LITERAL@96..100 "/url" [] [] + 5: (empty) + 8: MD_NEWLINE@100..101 + 0: NEWLINE@100..101 "\n" [] [] + 9: MD_NEWLINE@101..102 + 0: NEWLINE@101..102 "\n" [] [] + 10: MD_LINK_REFERENCE_DEFINITION@102..116 + 0: L_BRACK@102..103 "[" [] [] + 1: MD_LINK_LABEL@103..109 + 0: MD_INLINE_ITEM_LIST@103..109 + 0: MD_TEXTUAL@103..109 + 0: MD_TEXTUAL_LITERAL@103..109 "lambda" [] [] + 2: R_BRACK@109..110 "]" [] [] + 3: COLON@110..111 ":" [] [] + 4: MD_LINK_DESTINATION@111..116 + 0: MD_INLINE_ITEM_LIST@111..116 + 0: MD_TEXTUAL@111..112 + 0: MD_TEXTUAL_LITERAL@111..112 " " [] [] + 1: MD_TEXTUAL@112..116 + 0: MD_TEXTUAL_LITERAL@112..116 "/url" [] [] + 5: (empty) + 11: MD_NEWLINE@116..117 + 0: NEWLINE@116..117 "\n" [] [] + 12: MD_NEWLINE@117..118 + 0: NEWLINE@117..118 "\n" [] [] + 13: MD_LINK_REFERENCE_DEFINITION@118..150 + 0: L_BRACK@118..119 "[" [] [] + 1: MD_LINK_LABEL@119..126 + 0: MD_INLINE_ITEM_LIST@119..126 + 0: MD_TEXTUAL@119..126 + 0: MD_TEXTUAL_LITERAL@119..126 "nihongo" [] [] + 2: R_BRACK@126..127 "]" [] [] + 3: COLON@127..128 ":" [] [] + 4: MD_LINK_DESTINATION@128..133 + 0: MD_INLINE_ITEM_LIST@128..133 + 0: MD_TEXTUAL@128..129 + 0: MD_TEXTUAL_LITERAL@128..129 " " [] [] + 1: MD_TEXTUAL@129..133 + 0: MD_TEXTUAL_LITERAL@129..133 "/url" [] [] + 5: MD_LINK_TITLE@133..150 + 0: MD_INLINE_ITEM_LIST@133..150 + 0: MD_TEXTUAL@133..134 + 0: MD_TEXTUAL_LITERAL@133..134 " " [] [] + 1: MD_TEXTUAL@134..150 + 0: MD_TEXTUAL_LITERAL@134..150 "\"Japanese label\"" [] [] + 14: MD_NEWLINE@150..151 + 0: NEWLINE@150..151 "\n" [] [] + 15: MD_NEWLINE@151..152 + 0: NEWLINE@151..152 "\n" [] [] + 16: MD_LINK_REFERENCE_DEFINITION@152..198 + 0: L_BRACK@152..153 "[" [] [] + 1: MD_LINK_LABEL@153..168 + 0: MD_INLINE_ITEM_LIST@153..168 + 0: MD_TEXTUAL@153..158 + 0: MD_TEXTUAL_LITERAL@153..158 "title" [] [] + 1: MD_TEXTUAL@158..159 + 0: MD_TEXTUAL_LITERAL@158..159 "-" [] [] + 2: MD_TEXTUAL@159..163 + 0: MD_TEXTUAL_LITERAL@159..163 "next" [] [] + 3: MD_TEXTUAL@163..164 + 0: MD_TEXTUAL_LITERAL@163..164 "-" [] [] + 4: MD_TEXTUAL@164..168 + 0: MD_TEXTUAL_LITERAL@164..168 "line" [] [] + 2: R_BRACK@168..169 "]" [] [] + 3: COLON@169..170 ":" [] [] + 4: MD_LINK_DESTINATION@170..175 + 0: MD_INLINE_ITEM_LIST@170..175 + 0: MD_TEXTUAL@170..171 + 0: MD_TEXTUAL_LITERAL@170..171 " " [] [] + 1: MD_TEXTUAL@171..175 + 0: MD_TEXTUAL_LITERAL@171..175 "/url" [] [] + 5: MD_LINK_TITLE@175..198 + 0: MD_INLINE_ITEM_LIST@175..198 + 0: MD_TEXTUAL@175..176 + 0: MD_TEXTUAL_LITERAL@175..176 "\n" [] [] + 1: MD_TEXTUAL@176..178 + 0: MD_TEXTUAL_LITERAL@176..178 " " [] [] + 2: MD_TEXTUAL@178..198 + 0: MD_TEXTUAL_LITERAL@178..198 "\"title on next line\"" [] [] + 17: MD_NEWLINE@198..199 + 0: NEWLINE@198..199 "\n" [] [] + 18: MD_NEWLINE@199..200 + 0: NEWLINE@199..200 "\n" [] [] + 19: MD_LINK_REFERENCE_DEFINITION@200..243 + 0: L_BRACK@200..201 "[" [] [] + 1: MD_LINK_LABEL@201..218 + 0: MD_INLINE_ITEM_LIST@201..218 + 0: MD_TEXTUAL@201..207 + 0: MD_TEXTUAL_LITERAL@201..207 "single" [] [] + 1: MD_TEXTUAL@207..208 + 0: MD_TEXTUAL_LITERAL@207..208 "-" [] [] + 2: MD_TEXTUAL@208..213 + 0: MD_TEXTUAL_LITERAL@208..213 "quote" [] [] + 3: MD_TEXTUAL@213..214 + 0: MD_TEXTUAL_LITERAL@213..214 "-" [] [] + 4: MD_TEXTUAL@214..218 + 0: MD_TEXTUAL_LITERAL@214..218 "next" [] [] + 2: R_BRACK@218..219 "]" [] [] + 3: COLON@219..220 ":" [] [] + 4: MD_LINK_DESTINATION@220..225 + 0: MD_INLINE_ITEM_LIST@220..225 + 0: MD_TEXTUAL@220..221 + 0: MD_TEXTUAL_LITERAL@220..221 " " [] [] + 1: MD_TEXTUAL@221..225 + 0: MD_TEXTUAL_LITERAL@221..225 "/url" [] [] + 5: MD_LINK_TITLE@225..243 + 0: MD_INLINE_ITEM_LIST@225..243 + 0: MD_TEXTUAL@225..226 + 0: MD_TEXTUAL_LITERAL@225..226 "\n" [] [] + 1: MD_TEXTUAL@226..228 + 0: MD_TEXTUAL_LITERAL@226..228 " " [] [] + 2: MD_TEXTUAL@228..243 + 0: MD_TEXTUAL_LITERAL@228..243 "'single quoted'" [] [] + 20: MD_NEWLINE@243..244 + 0: NEWLINE@243..244 "\n" [] [] + 21: MD_NEWLINE@244..245 + 0: NEWLINE@244..245 "\n" [] [] + 22: MD_LINK_REFERENCE_DEFINITION@245..281 + 0: L_BRACK@245..246 "[" [] [] + 1: MD_LINK_LABEL@246..256 + 0: MD_INLINE_ITEM_LIST@246..256 + 0: MD_TEXTUAL@246..251 + 0: MD_TEXTUAL_LITERAL@246..251 "paren" [] [] + 1: MD_TEXTUAL@251..252 + 0: MD_TEXTUAL_LITERAL@251..252 "-" [] [] + 2: MD_TEXTUAL@252..256 + 0: MD_TEXTUAL_LITERAL@252..256 "next" [] [] + 2: R_BRACK@256..257 "]" [] [] + 3: COLON@257..258 ":" [] [] + 4: MD_LINK_DESTINATION@258..263 + 0: MD_INLINE_ITEM_LIST@258..263 + 0: MD_TEXTUAL@258..259 + 0: MD_TEXTUAL_LITERAL@258..259 " " [] [] + 1: MD_TEXTUAL@259..263 + 0: MD_TEXTUAL_LITERAL@259..263 "/url" [] [] + 5: MD_LINK_TITLE@263..281 + 0: MD_INLINE_ITEM_LIST@263..281 + 0: MD_TEXTUAL@263..264 + 0: MD_TEXTUAL_LITERAL@263..264 "\n" [] [] + 1: MD_TEXTUAL@264..266 + 0: MD_TEXTUAL_LITERAL@264..266 " " [] [] + 2: MD_TEXTUAL@266..267 + 0: MD_TEXTUAL_LITERAL@266..267 "(" [] [] + 3: MD_TEXTUAL@267..280 + 0: MD_TEXTUAL_LITERAL@267..280 "parenthesized" [] [] + 4: MD_TEXTUAL@280..281 + 0: MD_TEXTUAL_LITERAL@280..281 ")" [] [] + 23: MD_NEWLINE@281..282 + 0: NEWLINE@281..282 "\n" [] [] + 24: MD_NEWLINE@282..283 + 0: NEWLINE@282..283 "\n" [] [] + 25: MD_LINK_REFERENCE_DEFINITION@283..337 + 0: L_BRACK@283..284 "[" [] [] + 1: MD_LINK_LABEL@284..299 + 0: MD_INLINE_ITEM_LIST@284..299 + 0: MD_TEXTUAL@284..292 + 0: MD_TEXTUAL_LITERAL@284..292 "balanced" [] [] + 1: MD_TEXTUAL@292..293 + 0: MD_TEXTUAL_LITERAL@292..293 "-" [] [] + 2: MD_TEXTUAL@293..299 + 0: MD_TEXTUAL_LITERAL@293..299 "parens" [] [] + 2: R_BRACK@299..300 "]" [] [] + 3: COLON@300..301 ":" [] [] + 4: MD_LINK_DESTINATION@301..337 + 0: MD_INLINE_ITEM_LIST@301..337 + 0: MD_TEXTUAL@301..302 + 0: MD_TEXTUAL_LITERAL@301..302 " " [] [] + 1: MD_TEXTUAL@302..325 + 0: MD_TEXTUAL_LITERAL@302..325 "http://example.com/path" [] [] + 2: MD_TEXTUAL@325..326 + 0: MD_TEXTUAL_LITERAL@325..326 "(" [] [] + 3: MD_TEXTUAL@326..330 + 0: MD_TEXTUAL_LITERAL@326..330 "with" [] [] + 4: MD_TEXTUAL@330..331 + 0: MD_TEXTUAL_LITERAL@330..331 ")" [] [] + 5: MD_TEXTUAL@331..337 + 0: MD_TEXTUAL_LITERAL@331..337 "parens" [] [] + 5: (empty) + 26: MD_NEWLINE@337..338 + 0: NEWLINE@337..338 "\n" [] [] + 27: MD_NEWLINE@338..339 + 0: NEWLINE@338..339 "\n" [] [] + 28: MD_LINK_REFERENCE_DEFINITION@339..384 + 0: L_BRACK@339..340 "[" [] [] + 1: MD_LINK_LABEL@340..353 + 0: MD_INLINE_ITEM_LIST@340..353 + 0: MD_TEXTUAL@340..346 + 0: MD_TEXTUAL_LITERAL@340..346 "nested" [] [] + 1: MD_TEXTUAL@346..347 + 0: MD_TEXTUAL_LITERAL@346..347 "-" [] [] + 2: MD_TEXTUAL@347..353 + 0: MD_TEXTUAL_LITERAL@347..353 "parens" [] [] + 2: R_BRACK@353..354 "]" [] [] + 3: COLON@354..355 ":" [] [] + 4: MD_LINK_DESTINATION@355..384 + 0: MD_INLINE_ITEM_LIST@355..384 + 0: MD_TEXTUAL@355..356 + 0: MD_TEXTUAL_LITERAL@355..356 " " [] [] + 1: MD_TEXTUAL@356..376 + 0: MD_TEXTUAL_LITERAL@356..376 "http://example.com/a" [] [] + 2: MD_TEXTUAL@376..377 + 0: MD_TEXTUAL_LITERAL@376..377 "(" [] [] + 3: MD_TEXTUAL@377..378 + 0: MD_TEXTUAL_LITERAL@377..378 "b" [] [] + 4: MD_TEXTUAL@378..379 + 0: MD_TEXTUAL_LITERAL@378..379 "(" [] [] + 5: MD_TEXTUAL@379..380 + 0: MD_TEXTUAL_LITERAL@379..380 "c" [] [] + 6: MD_TEXTUAL@380..381 + 0: MD_TEXTUAL_LITERAL@380..381 ")" [] [] + 7: MD_TEXTUAL@381..382 + 0: MD_TEXTUAL_LITERAL@381..382 "d" [] [] + 8: MD_TEXTUAL@382..383 + 0: MD_TEXTUAL_LITERAL@382..383 ")" [] [] + 9: MD_TEXTUAL@383..384 + 0: MD_TEXTUAL_LITERAL@383..384 "e" [] [] + 5: (empty) + 29: MD_NEWLINE@384..385 + 0: NEWLINE@384..385 "\n" [] [] + 30: MD_NEWLINE@385..386 + 0: NEWLINE@385..386 "\n" [] [] + 31: MD_LINK_REFERENCE_DEFINITION@386..410 + 0: L_BRACK@386..387 "[" [] [] + 1: MD_LINK_LABEL@387..403 + 0: MD_INLINE_ITEM_LIST@387..403 + 0: MD_TEXTUAL@387..394 + 0: MD_TEXTUAL_LITERAL@387..394 "escaped" [] [] + 1: MD_TEXTUAL@394..396 + 0: MD_TEXTUAL_LITERAL@394..396 "\\]" [] [] + 2: MD_TEXTUAL@396..403 + 0: MD_TEXTUAL_LITERAL@396..403 "bracket" [] [] + 2: R_BRACK@403..404 "]" [] [] + 3: COLON@404..405 ":" [] [] + 4: MD_LINK_DESTINATION@405..410 + 0: MD_INLINE_ITEM_LIST@405..410 + 0: MD_TEXTUAL@405..406 + 0: MD_TEXTUAL_LITERAL@405..406 " " [] [] + 1: MD_TEXTUAL@406..410 + 0: MD_TEXTUAL_LITERAL@406..410 "/url" [] [] + 5: (empty) + 32: MD_NEWLINE@410..411 + 0: NEWLINE@410..411 "\n" [] [] + 33: MD_NEWLINE@411..412 + 0: NEWLINE@411..412 "\n" [] [] + 34: MD_LINK_REFERENCE_DEFINITION@412..435 + 0: L_BRACK@412..413 "[" [] [] + 1: MD_LINK_LABEL@413..428 + 0: MD_INLINE_ITEM_LIST@413..428 + 0: MD_TEXTUAL@413..421 + 0: MD_TEXTUAL_LITERAL@413..421 "trailing" [] [] + 1: MD_TEXTUAL@421..422 + 0: MD_TEXTUAL_LITERAL@421..422 "-" [] [] + 2: MD_TEXTUAL@422..428 + 0: MD_TEXTUAL_LITERAL@422..428 "spaces" [] [] + 2: R_BRACK@428..429 "]" [] [] + 3: COLON@429..430 ":" [] [] + 4: MD_LINK_DESTINATION@430..435 + 0: MD_INLINE_ITEM_LIST@430..435 + 0: MD_TEXTUAL@430..431 + 0: MD_TEXTUAL_LITERAL@430..431 " " [] [] + 1: MD_TEXTUAL@431..435 + 0: MD_TEXTUAL_LITERAL@431..435 "/url" [] [] + 5: (empty) + 35: MD_PARAGRAPH@435..439 + 0: MD_INLINE_ITEM_LIST@435..439 + 0: MD_TEXTUAL@435..438 + 0: MD_TEXTUAL_LITERAL@435..438 " " [] [] + 1: MD_TEXTUAL@438..439 + 0: MD_TEXTUAL_LITERAL@438..439 "\n" [] [] + 1: (empty) + 36: MD_NEWLINE@439..440 + 0: NEWLINE@439..440 "\n" [] [] + 37: MD_LINK_REFERENCE_DEFINITION@440..464 + 0: L_BRACK@440..441 "[" [] [] + 1: MD_LINK_LABEL@441..457 + 0: MD_INLINE_ITEM_LIST@441..457 + 0: MD_TEXTUAL@441..448 + 0: MD_TEXTUAL_LITERAL@441..448 "invalid" [] [] + 1: MD_TEXTUAL@448..449 + 0: MD_TEXTUAL_LITERAL@448..449 "-" [] [] + 2: MD_TEXTUAL@449..457 + 0: MD_TEXTUAL_LITERAL@449..457 "trailing" [] [] + 2: R_BRACK@457..458 "]" [] [] + 3: COLON@458..459 ":" [] [] + 4: MD_LINK_DESTINATION@459..464 + 0: MD_INLINE_ITEM_LIST@459..464 + 0: MD_TEXTUAL@459..460 + 0: MD_TEXTUAL_LITERAL@459..460 " " [] [] + 1: MD_TEXTUAL@460..464 + 0: MD_TEXTUAL_LITERAL@460..464 "/url" [] [] + 5: (empty) + 38: MD_PARAGRAPH@464..473 + 0: MD_INLINE_ITEM_LIST@464..473 + 0: MD_TEXTUAL@464..465 + 0: MD_TEXTUAL_LITERAL@464..465 " " [] [] + 1: MD_TEXTUAL@465..472 + 0: MD_TEXTUAL_LITERAL@465..472 "invalid" [] [] + 2: MD_TEXTUAL@472..473 + 0: MD_TEXTUAL_LITERAL@472..473 "\n" [] [] + 1: (empty) + 39: MD_NEWLINE@473..474 + 0: NEWLINE@473..474 "\n" [] [] + 40: MD_LINK_REFERENCE_DEFINITION@474..498 + 0: L_BRACK@474..475 "[" [] [] + 1: MD_LINK_LABEL@475..489 + 0: MD_INLINE_ITEM_LIST@475..489 + 0: MD_TEXTUAL@475..480 + 0: MD_TEXTUAL_LITERAL@475..480 "angle" [] [] + 1: MD_TEXTUAL@480..481 + 0: MD_TEXTUAL_LITERAL@480..481 "-" [] [] + 2: MD_TEXTUAL@481..489 + 0: MD_TEXTUAL_LITERAL@481..489 "trailing" [] [] + 2: R_BRACK@489..490 "]" [] [] + 3: COLON@490..491 ":" [] [] + 4: MD_LINK_DESTINATION@491..498 + 0: MD_INLINE_ITEM_LIST@491..498 + 0: MD_TEXTUAL@491..492 + 0: MD_TEXTUAL_LITERAL@491..492 " " [] [] + 1: MD_TEXTUAL@492..493 + 0: MD_TEXTUAL_LITERAL@492..493 "<" [] [] + 2: MD_TEXTUAL@493..497 + 0: MD_TEXTUAL_LITERAL@493..497 "/url" [] [] + 3: MD_TEXTUAL@497..498 + 0: MD_TEXTUAL_LITERAL@497..498 ">" [] [] + 5: (empty) + 41: MD_PARAGRAPH@498..507 + 0: MD_INLINE_ITEM_LIST@498..507 + 0: MD_TEXTUAL@498..499 + 0: MD_TEXTUAL_LITERAL@498..499 " " [] [] + 1: MD_TEXTUAL@499..506 + 0: MD_TEXTUAL_LITERAL@499..506 "invalid" [] [] + 2: MD_TEXTUAL@506..507 + 0: MD_TEXTUAL_LITERAL@506..507 "\n" [] [] + 1: (empty) + 2: EOF@507..507 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/link_definition_invalid.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/link_definition_invalid.md new file mode 100644 index 000000000000..e62c67e57955 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/link_definition_invalid.md @@ -0,0 +1,14 @@ +These should NOT be parsed as link reference definitions. +They should fall back to paragraph parsing. + +Unterminated angle bracket destination: +[unterminated-angle]: Outer quote +>> Nested quote +>>> Deeply nested + +> Back to outer diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/nested_quote.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/nested_quote.md.snap new file mode 100644 index 000000000000..fa00fa37ed12 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/nested_quote.md.snap @@ -0,0 +1,145 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +> Outer quote +>> Nested quote +>>> Deeply nested + +> Back to outer + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdQuote { + marker_token: R_ANGLE@0..1 ">" [] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1..13 "Outer quote" [Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@13..14 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdQuote { + marker_token: R_ANGLE@14..16 ">" [Skipped(">")] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@16..29 "Nested quote" [Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@29..30 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdQuote { + marker_token: R_ANGLE@30..33 ">" [Skipped(">"), Skipped(">")] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@33..47 "Deeply nested" [Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@47..48 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + }, + ], + }, + ], + }, + MdNewline { + value_token: NEWLINE@48..49 "\n" [] [], + }, + MdQuote { + marker_token: R_ANGLE@49..50 ">" [] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@50..64 "Back to outer" [Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@64..65 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + }, + ], + eof_token: EOF@65..65 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..65 + 0: (empty) + 1: MD_BLOCK_LIST@0..65 + 0: MD_QUOTE@0..48 + 0: R_ANGLE@0..1 ">" [] [] + 1: MD_BLOCK_LIST@1..48 + 0: MD_PARAGRAPH@1..14 + 0: MD_INLINE_ITEM_LIST@1..14 + 0: MD_TEXTUAL@1..13 + 0: MD_TEXTUAL_LITERAL@1..13 "Outer quote" [Skipped(" ")] [] + 1: MD_TEXTUAL@13..14 + 0: MD_TEXTUAL_LITERAL@13..14 "\n" [] [] + 1: (empty) + 1: MD_QUOTE@14..48 + 0: R_ANGLE@14..16 ">" [Skipped(">")] [] + 1: MD_BLOCK_LIST@16..48 + 0: MD_PARAGRAPH@16..30 + 0: MD_INLINE_ITEM_LIST@16..30 + 0: MD_TEXTUAL@16..29 + 0: MD_TEXTUAL_LITERAL@16..29 "Nested quote" [Skipped(" ")] [] + 1: MD_TEXTUAL@29..30 + 0: MD_TEXTUAL_LITERAL@29..30 "\n" [] [] + 1: (empty) + 1: MD_QUOTE@30..48 + 0: R_ANGLE@30..33 ">" [Skipped(">"), Skipped(">")] [] + 1: MD_BLOCK_LIST@33..48 + 0: MD_PARAGRAPH@33..48 + 0: MD_INLINE_ITEM_LIST@33..48 + 0: MD_TEXTUAL@33..47 + 0: MD_TEXTUAL_LITERAL@33..47 "Deeply nested" [Skipped(" ")] [] + 1: MD_TEXTUAL@47..48 + 0: MD_TEXTUAL_LITERAL@47..48 "\n" [] [] + 1: (empty) + 1: MD_NEWLINE@48..49 + 0: NEWLINE@48..49 "\n" [] [] + 2: MD_QUOTE@49..65 + 0: R_ANGLE@49..50 ">" [] [] + 1: MD_BLOCK_LIST@50..65 + 0: MD_PARAGRAPH@50..65 + 0: MD_INLINE_ITEM_LIST@50..65 + 0: MD_TEXTUAL@50..64 + 0: MD_TEXTUAL_LITERAL@50..64 "Back to outer" [Skipped(" ")] [] + 1: MD_TEXTUAL@64..65 + 0: MD_TEXTUAL_LITERAL@64..65 "\n" [] [] + 1: (empty) + 2: EOF@65..65 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/ordered_list.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/ordered_list.md new file mode 100644 index 000000000000..b250e4984978 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/ordered_list.md @@ -0,0 +1,6 @@ +1. First item +2. Second item +3. Third item + +1) Using parenthesis +2) Another item diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/ordered_list.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/ordered_list.md.snap new file mode 100644 index 000000000000..3a583f0f0ce7 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/ordered_list.md.snap @@ -0,0 +1,178 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +1. First item +2. Second item +3. Third item + +1) Using parenthesis +2) Another item + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdOrderedListItem { + md_bullet_list: MdBulletList [ + MdBullet { + bullet: MD_ORDERED_LIST_MARKER@0..2 "1." [] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@2..13 " First item" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@13..14 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + }, + MdBullet { + bullet: MD_ORDERED_LIST_MARKER@14..16 "2." [] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@16..28 " Second item" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@28..29 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + }, + MdBullet { + bullet: MD_ORDERED_LIST_MARKER@29..31 "3." [] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@31..42 " Third item" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@42..43 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@43..44 "\n" [] [], + }, + ], + }, + MdBullet { + bullet: MD_ORDERED_LIST_MARKER@44..46 "1)" [] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@46..64 " Using parenthesis" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@64..65 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + }, + MdBullet { + bullet: MD_ORDERED_LIST_MARKER@65..67 "2)" [] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@67..80 " Another item" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@80..81 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + }, + ], + }, + ], + eof_token: EOF@81..81 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..81 + 0: (empty) + 1: MD_BLOCK_LIST@0..81 + 0: MD_ORDERED_LIST_ITEM@0..81 + 0: MD_BULLET_LIST@0..81 + 0: MD_BULLET@0..14 + 0: MD_ORDERED_LIST_MARKER@0..2 "1." [] [] + 1: MD_BLOCK_LIST@2..14 + 0: MD_PARAGRAPH@2..14 + 0: MD_INLINE_ITEM_LIST@2..14 + 0: MD_TEXTUAL@2..13 + 0: MD_TEXTUAL_LITERAL@2..13 " First item" [] [] + 1: MD_TEXTUAL@13..14 + 0: MD_TEXTUAL_LITERAL@13..14 "\n" [] [] + 1: (empty) + 1: MD_BULLET@14..29 + 0: MD_ORDERED_LIST_MARKER@14..16 "2." [] [] + 1: MD_BLOCK_LIST@16..29 + 0: MD_PARAGRAPH@16..29 + 0: MD_INLINE_ITEM_LIST@16..29 + 0: MD_TEXTUAL@16..28 + 0: MD_TEXTUAL_LITERAL@16..28 " Second item" [] [] + 1: MD_TEXTUAL@28..29 + 0: MD_TEXTUAL_LITERAL@28..29 "\n" [] [] + 1: (empty) + 2: MD_BULLET@29..44 + 0: MD_ORDERED_LIST_MARKER@29..31 "3." [] [] + 1: MD_BLOCK_LIST@31..44 + 0: MD_PARAGRAPH@31..43 + 0: MD_INLINE_ITEM_LIST@31..43 + 0: MD_TEXTUAL@31..42 + 0: MD_TEXTUAL_LITERAL@31..42 " Third item" [] [] + 1: MD_TEXTUAL@42..43 + 0: MD_TEXTUAL_LITERAL@42..43 "\n" [] [] + 1: (empty) + 1: MD_NEWLINE@43..44 + 0: NEWLINE@43..44 "\n" [] [] + 3: MD_BULLET@44..65 + 0: MD_ORDERED_LIST_MARKER@44..46 "1)" [] [] + 1: MD_BLOCK_LIST@46..65 + 0: MD_PARAGRAPH@46..65 + 0: MD_INLINE_ITEM_LIST@46..65 + 0: MD_TEXTUAL@46..64 + 0: MD_TEXTUAL_LITERAL@46..64 " Using parenthesis" [] [] + 1: MD_TEXTUAL@64..65 + 0: MD_TEXTUAL_LITERAL@64..65 "\n" [] [] + 1: (empty) + 4: MD_BULLET@65..81 + 0: MD_ORDERED_LIST_MARKER@65..67 "2)" [] [] + 1: MD_BLOCK_LIST@67..81 + 0: MD_PARAGRAPH@67..81 + 0: MD_INLINE_ITEM_LIST@67..81 + 0: MD_TEXTUAL@67..80 + 0: MD_TEXTUAL_LITERAL@67..80 " Another item" [] [] + 1: MD_TEXTUAL@80..81 + 0: MD_TEXTUAL_LITERAL@80..81 "\n" [] [] + 1: (empty) + 2: EOF@81..81 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/paragraph.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/paragraph.md new file mode 100644 index 000000000000..b3f891111b93 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/paragraph.md @@ -0,0 +1,3 @@ +This is a simple paragraph. + +This is another paragraph. \ No newline at end of file diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/paragraph.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/paragraph.md.snap new file mode 100644 index 000000000000..570c506c539d --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/paragraph.md.snap @@ -0,0 +1,69 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +This is a simple paragraph. + +This is another paragraph. +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@0..27 "This is a simple paragraph." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@27..28 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@28..29 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@29..55 "This is another paragraph." [] [], + }, + ], + hard_line: missing (optional), + }, + ], + eof_token: EOF@55..55 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..55 + 0: (empty) + 1: MD_BLOCK_LIST@0..55 + 0: MD_PARAGRAPH@0..28 + 0: MD_INLINE_ITEM_LIST@0..28 + 0: MD_TEXTUAL@0..27 + 0: MD_TEXTUAL_LITERAL@0..27 "This is a simple paragraph." [] [] + 1: MD_TEXTUAL@27..28 + 0: MD_TEXTUAL_LITERAL@27..28 "\n" [] [] + 1: (empty) + 1: MD_NEWLINE@28..29 + 0: NEWLINE@28..29 "\n" [] [] + 2: MD_PARAGRAPH@29..55 + 0: MD_INLINE_ITEM_LIST@29..55 + 0: MD_TEXTUAL@29..55 + 0: MD_TEXTUAL_LITERAL@29..55 "This is another paragraph." [] [] + 1: (empty) + 2: EOF@55..55 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/paragraph_interruption.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/paragraph_interruption.md new file mode 100644 index 000000000000..48c087618be2 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/paragraph_interruption.md @@ -0,0 +1,16 @@ +Paragraph text +# Heading interrupts + +More text here +- List interrupts + +Another para +> Quote interrupts + +Some text +``` +Fence interrupts +``` + +Final text +*** diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/paragraph_interruption.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/paragraph_interruption.md.snap new file mode 100644 index 000000000000..138fa7e3bf5a --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/paragraph_interruption.md.snap @@ -0,0 +1,288 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +Paragraph text +# Heading interrupts + +More text here +- List interrupts + +Another para +> Quote interrupts + +Some text +``` +Fence interrupts +``` + +Final text +*** + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@0..14 "Paragraph text" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@14..15 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdHeader { + before: MdHashList [ + MdHash { + hash_token: HASH@15..16 "#" [] [], + }, + ], + content: MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@16..35 " Heading interrupts" [] [], + }, + ], + hard_line: missing (optional), + }, + after: MdHashList [], + }, + MdNewline { + value_token: NEWLINE@35..36 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@36..37 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@37..51 "More text here" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@51..52 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdBulletListItem { + md_bullet_list: MdBulletList [ + MdBullet { + bullet: MINUS@52..53 "-" [] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@53..69 " List interrupts" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@69..70 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + }, + ], + }, + MdNewline { + value_token: NEWLINE@70..71 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@71..83 "Another para" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@83..84 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdQuote { + marker_token: R_ANGLE@84..85 ">" [] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@85..102 "Quote interrupts" [Skipped(" ")] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@102..103 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + }, + MdNewline { + value_token: NEWLINE@103..104 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@104..113 "Some text" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@113..114 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdFencedCodeBlock { + l_fence: TRIPLE_BACKTICK@114..117 "```" [] [], + code_list: MdCodeNameList [], + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@117..118 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@118..134 "Fence interrupts" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@134..135 "\n" [] [], + }, + ], + r_fence: TRIPLE_BACKTICK@135..138 "```" [] [], + }, + MdNewline { + value_token: NEWLINE@138..139 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@139..140 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@140..150 "Final text" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@150..151 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdThematicBreakBlock { + value_token: MD_THEMATIC_BREAK_LITERAL@151..154 "***" [] [], + }, + MdNewline { + value_token: NEWLINE@154..155 "\n" [] [], + }, + ], + eof_token: EOF@155..155 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..155 + 0: (empty) + 1: MD_BLOCK_LIST@0..155 + 0: MD_PARAGRAPH@0..15 + 0: MD_INLINE_ITEM_LIST@0..15 + 0: MD_TEXTUAL@0..14 + 0: MD_TEXTUAL_LITERAL@0..14 "Paragraph text" [] [] + 1: MD_TEXTUAL@14..15 + 0: MD_TEXTUAL_LITERAL@14..15 "\n" [] [] + 1: (empty) + 1: MD_HEADER@15..35 + 0: MD_HASH_LIST@15..16 + 0: MD_HASH@15..16 + 0: HASH@15..16 "#" [] [] + 1: MD_PARAGRAPH@16..35 + 0: MD_INLINE_ITEM_LIST@16..35 + 0: MD_TEXTUAL@16..35 + 0: MD_TEXTUAL_LITERAL@16..35 " Heading interrupts" [] [] + 1: (empty) + 2: MD_HASH_LIST@35..35 + 2: MD_NEWLINE@35..36 + 0: NEWLINE@35..36 "\n" [] [] + 3: MD_NEWLINE@36..37 + 0: NEWLINE@36..37 "\n" [] [] + 4: MD_PARAGRAPH@37..52 + 0: MD_INLINE_ITEM_LIST@37..52 + 0: MD_TEXTUAL@37..51 + 0: MD_TEXTUAL_LITERAL@37..51 "More text here" [] [] + 1: MD_TEXTUAL@51..52 + 0: MD_TEXTUAL_LITERAL@51..52 "\n" [] [] + 1: (empty) + 5: MD_BULLET_LIST_ITEM@52..70 + 0: MD_BULLET_LIST@52..70 + 0: MD_BULLET@52..70 + 0: MINUS@52..53 "-" [] [] + 1: MD_BLOCK_LIST@53..70 + 0: MD_PARAGRAPH@53..70 + 0: MD_INLINE_ITEM_LIST@53..70 + 0: MD_TEXTUAL@53..69 + 0: MD_TEXTUAL_LITERAL@53..69 " List interrupts" [] [] + 1: MD_TEXTUAL@69..70 + 0: MD_TEXTUAL_LITERAL@69..70 "\n" [] [] + 1: (empty) + 6: MD_NEWLINE@70..71 + 0: NEWLINE@70..71 "\n" [] [] + 7: MD_PARAGRAPH@71..84 + 0: MD_INLINE_ITEM_LIST@71..84 + 0: MD_TEXTUAL@71..83 + 0: MD_TEXTUAL_LITERAL@71..83 "Another para" [] [] + 1: MD_TEXTUAL@83..84 + 0: MD_TEXTUAL_LITERAL@83..84 "\n" [] [] + 1: (empty) + 8: MD_QUOTE@84..103 + 0: R_ANGLE@84..85 ">" [] [] + 1: MD_BLOCK_LIST@85..103 + 0: MD_PARAGRAPH@85..103 + 0: MD_INLINE_ITEM_LIST@85..103 + 0: MD_TEXTUAL@85..102 + 0: MD_TEXTUAL_LITERAL@85..102 "Quote interrupts" [Skipped(" ")] [] + 1: MD_TEXTUAL@102..103 + 0: MD_TEXTUAL_LITERAL@102..103 "\n" [] [] + 1: (empty) + 9: MD_NEWLINE@103..104 + 0: NEWLINE@103..104 "\n" [] [] + 10: MD_PARAGRAPH@104..114 + 0: MD_INLINE_ITEM_LIST@104..114 + 0: MD_TEXTUAL@104..113 + 0: MD_TEXTUAL_LITERAL@104..113 "Some text" [] [] + 1: MD_TEXTUAL@113..114 + 0: MD_TEXTUAL_LITERAL@113..114 "\n" [] [] + 1: (empty) + 11: MD_FENCED_CODE_BLOCK@114..138 + 0: TRIPLE_BACKTICK@114..117 "```" [] [] + 1: MD_CODE_NAME_LIST@117..117 + 2: MD_INLINE_ITEM_LIST@117..135 + 0: MD_TEXTUAL@117..118 + 0: MD_TEXTUAL_LITERAL@117..118 "\n" [] [] + 1: MD_TEXTUAL@118..134 + 0: MD_TEXTUAL_LITERAL@118..134 "Fence interrupts" [] [] + 2: MD_TEXTUAL@134..135 + 0: MD_TEXTUAL_LITERAL@134..135 "\n" [] [] + 3: TRIPLE_BACKTICK@135..138 "```" [] [] + 12: MD_NEWLINE@138..139 + 0: NEWLINE@138..139 "\n" [] [] + 13: MD_NEWLINE@139..140 + 0: NEWLINE@139..140 "\n" [] [] + 14: MD_PARAGRAPH@140..151 + 0: MD_INLINE_ITEM_LIST@140..151 + 0: MD_TEXTUAL@140..150 + 0: MD_TEXTUAL_LITERAL@140..150 "Final text" [] [] + 1: MD_TEXTUAL@150..151 + 0: MD_TEXTUAL_LITERAL@150..151 "\n" [] [] + 1: (empty) + 15: MD_THEMATIC_BREAK_BLOCK@151..154 + 0: MD_THEMATIC_BREAK_LITERAL@151..154 "***" [] [] + 16: MD_NEWLINE@154..155 + 0: NEWLINE@154..155 "\n" [] [] + 2: EOF@155..155 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/reference_link_not_implemented.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/reference_link_not_implemented.md new file mode 100644 index 000000000000..0c7df8482598 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/reference_link_not_implemented.md @@ -0,0 +1 @@ +Reference links without definitions should be parsed as text: [text][label]. diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/reference_link_not_implemented.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/reference_link_not_implemented.md.snap new file mode 100644 index 000000000000..aa814e23c11c --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/reference_link_not_implemented.md.snap @@ -0,0 +1,86 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +assertion_line: 131 +expression: snapshot +--- +## Input + +``` +Reference links without definitions should be parsed as text: [text][label]. + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@0..62 "Reference links without definitions should be parsed as text: " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@62..63 "[" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@63..67 "text" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@67..68 "]" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@68..69 "[" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@69..74 "label" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@74..75 "]" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@75..76 "." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@76..77 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + eof_token: EOF@77..77 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..77 + 0: (empty) + 1: MD_BLOCK_LIST@0..77 + 0: MD_PARAGRAPH@0..77 + 0: MD_INLINE_ITEM_LIST@0..77 + 0: MD_TEXTUAL@0..62 + 0: MD_TEXTUAL_LITERAL@0..62 "Reference links without definitions should be parsed as text: " [] [] + 1: MD_TEXTUAL@62..63 + 0: MD_TEXTUAL_LITERAL@62..63 "[" [] [] + 2: MD_TEXTUAL@63..67 + 0: MD_TEXTUAL_LITERAL@63..67 "text" [] [] + 3: MD_TEXTUAL@67..68 + 0: MD_TEXTUAL_LITERAL@67..68 "]" [] [] + 4: MD_TEXTUAL@68..69 + 0: MD_TEXTUAL_LITERAL@68..69 "[" [] [] + 5: MD_TEXTUAL@69..74 + 0: MD_TEXTUAL_LITERAL@69..74 "label" [] [] + 6: MD_TEXTUAL@74..75 + 0: MD_TEXTUAL_LITERAL@74..75 "]" [] [] + 7: MD_TEXTUAL@75..76 + 0: MD_TEXTUAL_LITERAL@75..76 "." [] [] + 8: MD_TEXTUAL@76..77 + 0: MD_TEXTUAL_LITERAL@76..77 "\n" [] [] + 1: (empty) + 2: EOF@77..77 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/reference_links.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/reference_links.md new file mode 100644 index 000000000000..50ee14bae96d --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/reference_links.md @@ -0,0 +1,33 @@ +[example]: https://example.com "Example Title" + +Full reference: [click here][example] + +Collapsed reference: [example][] + +Shortcut reference: [example] + +[foo]: https://foo.com + +Image full: ![alt text][foo] + +Image collapsed: ![foo][] + +Image shortcut: ![foo] + +Multiple words in text: [click here for more info][example] + +Empty label (collapsed): [test][] + +[test]: https://test.com + +Shortcut that looks like text: [undefined] + +Mixed with inline: [inline](https://inline.com) and [ref][example] + +Nested in paragraph: This is a paragraph with [a reference][foo] in the middle. + +[Case Label]: https://case.example + +Case-insensitive: [case label] + +Whitespace normalized: [case label] diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/reference_links.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/reference_links.md.snap new file mode 100644 index 000000000000..8895f1a6a9ee --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/reference_links.md.snap @@ -0,0 +1,949 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +[example]: https://example.com "Example Title" + +Full reference: [click here][example] + +Collapsed reference: [example][] + +Shortcut reference: [example] + +[foo]: https://foo.com + +Image full: ![alt text][foo] + +Image collapsed: ![foo][] + +Image shortcut: ![foo] + +Multiple words in text: [click here for more info][example] + +Empty label (collapsed): [test][] + +[test]: https://test.com + +Shortcut that looks like text: [undefined] + +Mixed with inline: [inline](https://inline.com) and [ref][example] + +Nested in paragraph: This is a paragraph with [a reference][foo] in the middle. + +[Case Label]: https://case.example + +Case-insensitive: [case label] + +Whitespace normalized: [case label] + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdLinkReferenceDefinition { + l_brack_token: L_BRACK@0..1 "[" [] [], + label: MdLinkLabel { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@1..8 "example" [] [], + }, + ], + }, + r_brack_token: R_BRACK@8..9 "]" [] [], + colon_token: COLON@9..10 ":" [] [], + destination: MdLinkDestination { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@10..11 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@11..30 "https://example.com" [] [], + }, + ], + }, + title: MdLinkTitle { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@30..31 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@31..46 "\"Example Title\"" [] [], + }, + ], + }, + }, + MdNewline { + value_token: NEWLINE@46..47 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@47..48 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@48..64 "Full reference: " [] [], + }, + MdReferenceLink { + l_brack_token: L_BRACK@64..65 "[" [] [], + text: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@65..75 "click here" [] [], + }, + ], + r_brack_token: R_BRACK@75..76 "]" [] [], + label: MdReferenceLinkLabel { + l_brack_token: L_BRACK@76..77 "[" [] [], + label: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@77..84 "example" [] [], + }, + ], + r_brack_token: R_BRACK@84..85 "]" [] [], + }, + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@85..86 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@86..87 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@87..108 "Collapsed reference: " [] [], + }, + MdReferenceLink { + l_brack_token: L_BRACK@108..109 "[" [] [], + text: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@109..116 "example" [] [], + }, + ], + r_brack_token: R_BRACK@116..117 "]" [] [], + label: MdReferenceLinkLabel { + l_brack_token: L_BRACK@117..118 "[" [] [], + label: MdInlineItemList [], + r_brack_token: R_BRACK@118..119 "]" [] [], + }, + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@119..120 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@120..121 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@121..141 "Shortcut reference: " [] [], + }, + MdReferenceLink { + l_brack_token: L_BRACK@141..142 "[" [] [], + text: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@142..149 "example" [] [], + }, + ], + r_brack_token: R_BRACK@149..150 "]" [] [], + label: missing (optional), + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@150..151 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@151..152 "\n" [] [], + }, + MdLinkReferenceDefinition { + l_brack_token: L_BRACK@152..153 "[" [] [], + label: MdLinkLabel { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@153..156 "foo" [] [], + }, + ], + }, + r_brack_token: R_BRACK@156..157 "]" [] [], + colon_token: COLON@157..158 ":" [] [], + destination: MdLinkDestination { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@158..159 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@159..174 "https://foo.com" [] [], + }, + ], + }, + title: missing (optional), + }, + MdNewline { + value_token: NEWLINE@174..175 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@175..176 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@176..188 "Image full: " [] [], + }, + MdReferenceImage { + excl_token: BANG@188..189 "!" [] [], + l_brack_token: L_BRACK@189..190 "[" [] [], + alt: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@190..198 "alt text" [] [], + }, + ], + r_brack_token: R_BRACK@198..199 "]" [] [], + label: MdReferenceLinkLabel { + l_brack_token: L_BRACK@199..200 "[" [] [], + label: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@200..203 "foo" [] [], + }, + ], + r_brack_token: R_BRACK@203..204 "]" [] [], + }, + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@204..205 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@205..206 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@206..223 "Image collapsed: " [] [], + }, + MdReferenceImage { + excl_token: BANG@223..224 "!" [] [], + l_brack_token: L_BRACK@224..225 "[" [] [], + alt: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@225..228 "foo" [] [], + }, + ], + r_brack_token: R_BRACK@228..229 "]" [] [], + label: MdReferenceLinkLabel { + l_brack_token: L_BRACK@229..230 "[" [] [], + label: MdInlineItemList [], + r_brack_token: R_BRACK@230..231 "]" [] [], + }, + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@231..232 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@232..233 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@233..249 "Image shortcut: " [] [], + }, + MdReferenceImage { + excl_token: BANG@249..250 "!" [] [], + l_brack_token: L_BRACK@250..251 "[" [] [], + alt: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@251..254 "foo" [] [], + }, + ], + r_brack_token: R_BRACK@254..255 "]" [] [], + label: missing (optional), + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@255..256 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@256..257 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@257..281 "Multiple words in text: " [] [], + }, + MdReferenceLink { + l_brack_token: L_BRACK@281..282 "[" [] [], + text: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@282..306 "click here for more info" [] [], + }, + ], + r_brack_token: R_BRACK@306..307 "]" [] [], + label: MdReferenceLinkLabel { + l_brack_token: L_BRACK@307..308 "[" [] [], + label: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@308..315 "example" [] [], + }, + ], + r_brack_token: R_BRACK@315..316 "]" [] [], + }, + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@316..317 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@317..318 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@318..330 "Empty label " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@330..331 "(" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@331..340 "collapsed" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@340..341 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@341..342 ":" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@342..343 " " [] [], + }, + MdReferenceLink { + l_brack_token: L_BRACK@343..344 "[" [] [], + text: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@344..348 "test" [] [], + }, + ], + r_brack_token: R_BRACK@348..349 "]" [] [], + label: MdReferenceLinkLabel { + l_brack_token: L_BRACK@349..350 "[" [] [], + label: MdInlineItemList [], + r_brack_token: R_BRACK@350..351 "]" [] [], + }, + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@351..352 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@352..353 "\n" [] [], + }, + MdLinkReferenceDefinition { + l_brack_token: L_BRACK@353..354 "[" [] [], + label: MdLinkLabel { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@354..358 "test" [] [], + }, + ], + }, + r_brack_token: R_BRACK@358..359 "]" [] [], + colon_token: COLON@359..360 ":" [] [], + destination: MdLinkDestination { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@360..361 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@361..377 "https://test.com" [] [], + }, + ], + }, + title: missing (optional), + }, + MdNewline { + value_token: NEWLINE@377..378 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@378..379 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@379..410 "Shortcut that looks like text: " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@410..411 "[" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@411..420 "undefined" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@420..421 "]" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@421..422 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@422..423 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@423..442 "Mixed with inline: " [] [], + }, + MdInlineLink { + l_brack_token: L_BRACK@442..443 "[" [] [], + text: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@443..449 "inline" [] [], + }, + ], + r_brack_token: R_BRACK@449..450 "]" [] [], + l_paren_token: L_PAREN@450..451 "(" [] [], + destination: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@451..469 "https://inline.com" [] [], + }, + ], + title: missing (optional), + r_paren_token: R_PAREN@469..470 ")" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@470..475 " and " [] [], + }, + MdReferenceLink { + l_brack_token: L_BRACK@475..476 "[" [] [], + text: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@476..479 "ref" [] [], + }, + ], + r_brack_token: R_BRACK@479..480 "]" [] [], + label: MdReferenceLinkLabel { + l_brack_token: L_BRACK@480..481 "[" [] [], + label: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@481..488 "example" [] [], + }, + ], + r_brack_token: R_BRACK@488..489 "]" [] [], + }, + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@489..490 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@490..491 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@491..537 "Nested in paragraph: This is a paragraph with " [] [], + }, + MdReferenceLink { + l_brack_token: L_BRACK@537..538 "[" [] [], + text: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@538..549 "a reference" [] [], + }, + ], + r_brack_token: R_BRACK@549..550 "]" [] [], + label: MdReferenceLinkLabel { + l_brack_token: L_BRACK@550..551 "[" [] [], + label: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@551..554 "foo" [] [], + }, + ], + r_brack_token: R_BRACK@554..555 "]" [] [], + }, + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@555..570 " in the middle." [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@570..571 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@571..572 "\n" [] [], + }, + MdLinkReferenceDefinition { + l_brack_token: L_BRACK@572..573 "[" [] [], + label: MdLinkLabel { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@573..583 "Case Label" [] [], + }, + ], + }, + r_brack_token: R_BRACK@583..584 "]" [] [], + colon_token: COLON@584..585 ":" [] [], + destination: MdLinkDestination { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@585..586 " " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@586..606 "https://case.example" [] [], + }, + ], + }, + title: missing (optional), + }, + MdNewline { + value_token: NEWLINE@606..607 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@607..608 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@608..612 "Case" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@612..613 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@613..626 "insensitive: " [] [], + }, + MdReferenceLink { + l_brack_token: L_BRACK@626..627 "[" [] [], + text: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@627..637 "case label" [] [], + }, + ], + r_brack_token: R_BRACK@637..638 "]" [] [], + label: missing (optional), + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@638..639 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + MdNewline { + value_token: NEWLINE@639..640 "\n" [] [], + }, + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@640..663 "Whitespace normalized: " [] [], + }, + MdReferenceLink { + l_brack_token: L_BRACK@663..664 "[" [] [], + text: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@664..676 "case label" [] [], + }, + ], + r_brack_token: R_BRACK@676..677 "]" [] [], + label: missing (optional), + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@677..678 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + eof_token: EOF@678..678 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..678 + 0: (empty) + 1: MD_BLOCK_LIST@0..678 + 0: MD_LINK_REFERENCE_DEFINITION@0..46 + 0: L_BRACK@0..1 "[" [] [] + 1: MD_LINK_LABEL@1..8 + 0: MD_INLINE_ITEM_LIST@1..8 + 0: MD_TEXTUAL@1..8 + 0: MD_TEXTUAL_LITERAL@1..8 "example" [] [] + 2: R_BRACK@8..9 "]" [] [] + 3: COLON@9..10 ":" [] [] + 4: MD_LINK_DESTINATION@10..30 + 0: MD_INLINE_ITEM_LIST@10..30 + 0: MD_TEXTUAL@10..11 + 0: MD_TEXTUAL_LITERAL@10..11 " " [] [] + 1: MD_TEXTUAL@11..30 + 0: MD_TEXTUAL_LITERAL@11..30 "https://example.com" [] [] + 5: MD_LINK_TITLE@30..46 + 0: MD_INLINE_ITEM_LIST@30..46 + 0: MD_TEXTUAL@30..31 + 0: MD_TEXTUAL_LITERAL@30..31 " " [] [] + 1: MD_TEXTUAL@31..46 + 0: MD_TEXTUAL_LITERAL@31..46 "\"Example Title\"" [] [] + 1: MD_NEWLINE@46..47 + 0: NEWLINE@46..47 "\n" [] [] + 2: MD_NEWLINE@47..48 + 0: NEWLINE@47..48 "\n" [] [] + 3: MD_PARAGRAPH@48..86 + 0: MD_INLINE_ITEM_LIST@48..86 + 0: MD_TEXTUAL@48..64 + 0: MD_TEXTUAL_LITERAL@48..64 "Full reference: " [] [] + 1: MD_REFERENCE_LINK@64..85 + 0: L_BRACK@64..65 "[" [] [] + 1: MD_INLINE_ITEM_LIST@65..75 + 0: MD_TEXTUAL@65..75 + 0: MD_TEXTUAL_LITERAL@65..75 "click here" [] [] + 2: R_BRACK@75..76 "]" [] [] + 3: MD_REFERENCE_LINK_LABEL@76..85 + 0: L_BRACK@76..77 "[" [] [] + 1: MD_INLINE_ITEM_LIST@77..84 + 0: MD_TEXTUAL@77..84 + 0: MD_TEXTUAL_LITERAL@77..84 "example" [] [] + 2: R_BRACK@84..85 "]" [] [] + 2: MD_TEXTUAL@85..86 + 0: MD_TEXTUAL_LITERAL@85..86 "\n" [] [] + 1: (empty) + 4: MD_NEWLINE@86..87 + 0: NEWLINE@86..87 "\n" [] [] + 5: MD_PARAGRAPH@87..120 + 0: MD_INLINE_ITEM_LIST@87..120 + 0: MD_TEXTUAL@87..108 + 0: MD_TEXTUAL_LITERAL@87..108 "Collapsed reference: " [] [] + 1: MD_REFERENCE_LINK@108..119 + 0: L_BRACK@108..109 "[" [] [] + 1: MD_INLINE_ITEM_LIST@109..116 + 0: MD_TEXTUAL@109..116 + 0: MD_TEXTUAL_LITERAL@109..116 "example" [] [] + 2: R_BRACK@116..117 "]" [] [] + 3: MD_REFERENCE_LINK_LABEL@117..119 + 0: L_BRACK@117..118 "[" [] [] + 1: MD_INLINE_ITEM_LIST@118..118 + 2: R_BRACK@118..119 "]" [] [] + 2: MD_TEXTUAL@119..120 + 0: MD_TEXTUAL_LITERAL@119..120 "\n" [] [] + 1: (empty) + 6: MD_NEWLINE@120..121 + 0: NEWLINE@120..121 "\n" [] [] + 7: MD_PARAGRAPH@121..151 + 0: MD_INLINE_ITEM_LIST@121..151 + 0: MD_TEXTUAL@121..141 + 0: MD_TEXTUAL_LITERAL@121..141 "Shortcut reference: " [] [] + 1: MD_REFERENCE_LINK@141..150 + 0: L_BRACK@141..142 "[" [] [] + 1: MD_INLINE_ITEM_LIST@142..149 + 0: MD_TEXTUAL@142..149 + 0: MD_TEXTUAL_LITERAL@142..149 "example" [] [] + 2: R_BRACK@149..150 "]" [] [] + 3: (empty) + 2: MD_TEXTUAL@150..151 + 0: MD_TEXTUAL_LITERAL@150..151 "\n" [] [] + 1: (empty) + 8: MD_NEWLINE@151..152 + 0: NEWLINE@151..152 "\n" [] [] + 9: MD_LINK_REFERENCE_DEFINITION@152..174 + 0: L_BRACK@152..153 "[" [] [] + 1: MD_LINK_LABEL@153..156 + 0: MD_INLINE_ITEM_LIST@153..156 + 0: MD_TEXTUAL@153..156 + 0: MD_TEXTUAL_LITERAL@153..156 "foo" [] [] + 2: R_BRACK@156..157 "]" [] [] + 3: COLON@157..158 ":" [] [] + 4: MD_LINK_DESTINATION@158..174 + 0: MD_INLINE_ITEM_LIST@158..174 + 0: MD_TEXTUAL@158..159 + 0: MD_TEXTUAL_LITERAL@158..159 " " [] [] + 1: MD_TEXTUAL@159..174 + 0: MD_TEXTUAL_LITERAL@159..174 "https://foo.com" [] [] + 5: (empty) + 10: MD_NEWLINE@174..175 + 0: NEWLINE@174..175 "\n" [] [] + 11: MD_NEWLINE@175..176 + 0: NEWLINE@175..176 "\n" [] [] + 12: MD_PARAGRAPH@176..205 + 0: MD_INLINE_ITEM_LIST@176..205 + 0: MD_TEXTUAL@176..188 + 0: MD_TEXTUAL_LITERAL@176..188 "Image full: " [] [] + 1: MD_REFERENCE_IMAGE@188..204 + 0: BANG@188..189 "!" [] [] + 1: L_BRACK@189..190 "[" [] [] + 2: MD_INLINE_ITEM_LIST@190..198 + 0: MD_TEXTUAL@190..198 + 0: MD_TEXTUAL_LITERAL@190..198 "alt text" [] [] + 3: R_BRACK@198..199 "]" [] [] + 4: MD_REFERENCE_LINK_LABEL@199..204 + 0: L_BRACK@199..200 "[" [] [] + 1: MD_INLINE_ITEM_LIST@200..203 + 0: MD_TEXTUAL@200..203 + 0: MD_TEXTUAL_LITERAL@200..203 "foo" [] [] + 2: R_BRACK@203..204 "]" [] [] + 2: MD_TEXTUAL@204..205 + 0: MD_TEXTUAL_LITERAL@204..205 "\n" [] [] + 1: (empty) + 13: MD_NEWLINE@205..206 + 0: NEWLINE@205..206 "\n" [] [] + 14: MD_PARAGRAPH@206..232 + 0: MD_INLINE_ITEM_LIST@206..232 + 0: MD_TEXTUAL@206..223 + 0: MD_TEXTUAL_LITERAL@206..223 "Image collapsed: " [] [] + 1: MD_REFERENCE_IMAGE@223..231 + 0: BANG@223..224 "!" [] [] + 1: L_BRACK@224..225 "[" [] [] + 2: MD_INLINE_ITEM_LIST@225..228 + 0: MD_TEXTUAL@225..228 + 0: MD_TEXTUAL_LITERAL@225..228 "foo" [] [] + 3: R_BRACK@228..229 "]" [] [] + 4: MD_REFERENCE_LINK_LABEL@229..231 + 0: L_BRACK@229..230 "[" [] [] + 1: MD_INLINE_ITEM_LIST@230..230 + 2: R_BRACK@230..231 "]" [] [] + 2: MD_TEXTUAL@231..232 + 0: MD_TEXTUAL_LITERAL@231..232 "\n" [] [] + 1: (empty) + 15: MD_NEWLINE@232..233 + 0: NEWLINE@232..233 "\n" [] [] + 16: MD_PARAGRAPH@233..256 + 0: MD_INLINE_ITEM_LIST@233..256 + 0: MD_TEXTUAL@233..249 + 0: MD_TEXTUAL_LITERAL@233..249 "Image shortcut: " [] [] + 1: MD_REFERENCE_IMAGE@249..255 + 0: BANG@249..250 "!" [] [] + 1: L_BRACK@250..251 "[" [] [] + 2: MD_INLINE_ITEM_LIST@251..254 + 0: MD_TEXTUAL@251..254 + 0: MD_TEXTUAL_LITERAL@251..254 "foo" [] [] + 3: R_BRACK@254..255 "]" [] [] + 4: (empty) + 2: MD_TEXTUAL@255..256 + 0: MD_TEXTUAL_LITERAL@255..256 "\n" [] [] + 1: (empty) + 17: MD_NEWLINE@256..257 + 0: NEWLINE@256..257 "\n" [] [] + 18: MD_PARAGRAPH@257..317 + 0: MD_INLINE_ITEM_LIST@257..317 + 0: MD_TEXTUAL@257..281 + 0: MD_TEXTUAL_LITERAL@257..281 "Multiple words in text: " [] [] + 1: MD_REFERENCE_LINK@281..316 + 0: L_BRACK@281..282 "[" [] [] + 1: MD_INLINE_ITEM_LIST@282..306 + 0: MD_TEXTUAL@282..306 + 0: MD_TEXTUAL_LITERAL@282..306 "click here for more info" [] [] + 2: R_BRACK@306..307 "]" [] [] + 3: MD_REFERENCE_LINK_LABEL@307..316 + 0: L_BRACK@307..308 "[" [] [] + 1: MD_INLINE_ITEM_LIST@308..315 + 0: MD_TEXTUAL@308..315 + 0: MD_TEXTUAL_LITERAL@308..315 "example" [] [] + 2: R_BRACK@315..316 "]" [] [] + 2: MD_TEXTUAL@316..317 + 0: MD_TEXTUAL_LITERAL@316..317 "\n" [] [] + 1: (empty) + 19: MD_NEWLINE@317..318 + 0: NEWLINE@317..318 "\n" [] [] + 20: MD_PARAGRAPH@318..352 + 0: MD_INLINE_ITEM_LIST@318..352 + 0: MD_TEXTUAL@318..330 + 0: MD_TEXTUAL_LITERAL@318..330 "Empty label " [] [] + 1: MD_TEXTUAL@330..331 + 0: MD_TEXTUAL_LITERAL@330..331 "(" [] [] + 2: MD_TEXTUAL@331..340 + 0: MD_TEXTUAL_LITERAL@331..340 "collapsed" [] [] + 3: MD_TEXTUAL@340..341 + 0: MD_TEXTUAL_LITERAL@340..341 ")" [] [] + 4: MD_TEXTUAL@341..342 + 0: MD_TEXTUAL_LITERAL@341..342 ":" [] [] + 5: MD_TEXTUAL@342..343 + 0: MD_TEXTUAL_LITERAL@342..343 " " [] [] + 6: MD_REFERENCE_LINK@343..351 + 0: L_BRACK@343..344 "[" [] [] + 1: MD_INLINE_ITEM_LIST@344..348 + 0: MD_TEXTUAL@344..348 + 0: MD_TEXTUAL_LITERAL@344..348 "test" [] [] + 2: R_BRACK@348..349 "]" [] [] + 3: MD_REFERENCE_LINK_LABEL@349..351 + 0: L_BRACK@349..350 "[" [] [] + 1: MD_INLINE_ITEM_LIST@350..350 + 2: R_BRACK@350..351 "]" [] [] + 7: MD_TEXTUAL@351..352 + 0: MD_TEXTUAL_LITERAL@351..352 "\n" [] [] + 1: (empty) + 21: MD_NEWLINE@352..353 + 0: NEWLINE@352..353 "\n" [] [] + 22: MD_LINK_REFERENCE_DEFINITION@353..377 + 0: L_BRACK@353..354 "[" [] [] + 1: MD_LINK_LABEL@354..358 + 0: MD_INLINE_ITEM_LIST@354..358 + 0: MD_TEXTUAL@354..358 + 0: MD_TEXTUAL_LITERAL@354..358 "test" [] [] + 2: R_BRACK@358..359 "]" [] [] + 3: COLON@359..360 ":" [] [] + 4: MD_LINK_DESTINATION@360..377 + 0: MD_INLINE_ITEM_LIST@360..377 + 0: MD_TEXTUAL@360..361 + 0: MD_TEXTUAL_LITERAL@360..361 " " [] [] + 1: MD_TEXTUAL@361..377 + 0: MD_TEXTUAL_LITERAL@361..377 "https://test.com" [] [] + 5: (empty) + 23: MD_NEWLINE@377..378 + 0: NEWLINE@377..378 "\n" [] [] + 24: MD_NEWLINE@378..379 + 0: NEWLINE@378..379 "\n" [] [] + 25: MD_PARAGRAPH@379..422 + 0: MD_INLINE_ITEM_LIST@379..422 + 0: MD_TEXTUAL@379..410 + 0: MD_TEXTUAL_LITERAL@379..410 "Shortcut that looks like text: " [] [] + 1: MD_TEXTUAL@410..411 + 0: MD_TEXTUAL_LITERAL@410..411 "[" [] [] + 2: MD_TEXTUAL@411..420 + 0: MD_TEXTUAL_LITERAL@411..420 "undefined" [] [] + 3: MD_TEXTUAL@420..421 + 0: MD_TEXTUAL_LITERAL@420..421 "]" [] [] + 4: MD_TEXTUAL@421..422 + 0: MD_TEXTUAL_LITERAL@421..422 "\n" [] [] + 1: (empty) + 26: MD_NEWLINE@422..423 + 0: NEWLINE@422..423 "\n" [] [] + 27: MD_PARAGRAPH@423..490 + 0: MD_INLINE_ITEM_LIST@423..490 + 0: MD_TEXTUAL@423..442 + 0: MD_TEXTUAL_LITERAL@423..442 "Mixed with inline: " [] [] + 1: MD_INLINE_LINK@442..470 + 0: L_BRACK@442..443 "[" [] [] + 1: MD_INLINE_ITEM_LIST@443..449 + 0: MD_TEXTUAL@443..449 + 0: MD_TEXTUAL_LITERAL@443..449 "inline" [] [] + 2: R_BRACK@449..450 "]" [] [] + 3: L_PAREN@450..451 "(" [] [] + 4: MD_INLINE_ITEM_LIST@451..469 + 0: MD_TEXTUAL@451..469 + 0: MD_TEXTUAL_LITERAL@451..469 "https://inline.com" [] [] + 5: (empty) + 6: R_PAREN@469..470 ")" [] [] + 2: MD_TEXTUAL@470..475 + 0: MD_TEXTUAL_LITERAL@470..475 " and " [] [] + 3: MD_REFERENCE_LINK@475..489 + 0: L_BRACK@475..476 "[" [] [] + 1: MD_INLINE_ITEM_LIST@476..479 + 0: MD_TEXTUAL@476..479 + 0: MD_TEXTUAL_LITERAL@476..479 "ref" [] [] + 2: R_BRACK@479..480 "]" [] [] + 3: MD_REFERENCE_LINK_LABEL@480..489 + 0: L_BRACK@480..481 "[" [] [] + 1: MD_INLINE_ITEM_LIST@481..488 + 0: MD_TEXTUAL@481..488 + 0: MD_TEXTUAL_LITERAL@481..488 "example" [] [] + 2: R_BRACK@488..489 "]" [] [] + 4: MD_TEXTUAL@489..490 + 0: MD_TEXTUAL_LITERAL@489..490 "\n" [] [] + 1: (empty) + 28: MD_NEWLINE@490..491 + 0: NEWLINE@490..491 "\n" [] [] + 29: MD_PARAGRAPH@491..571 + 0: MD_INLINE_ITEM_LIST@491..571 + 0: MD_TEXTUAL@491..537 + 0: MD_TEXTUAL_LITERAL@491..537 "Nested in paragraph: This is a paragraph with " [] [] + 1: MD_REFERENCE_LINK@537..555 + 0: L_BRACK@537..538 "[" [] [] + 1: MD_INLINE_ITEM_LIST@538..549 + 0: MD_TEXTUAL@538..549 + 0: MD_TEXTUAL_LITERAL@538..549 "a reference" [] [] + 2: R_BRACK@549..550 "]" [] [] + 3: MD_REFERENCE_LINK_LABEL@550..555 + 0: L_BRACK@550..551 "[" [] [] + 1: MD_INLINE_ITEM_LIST@551..554 + 0: MD_TEXTUAL@551..554 + 0: MD_TEXTUAL_LITERAL@551..554 "foo" [] [] + 2: R_BRACK@554..555 "]" [] [] + 2: MD_TEXTUAL@555..570 + 0: MD_TEXTUAL_LITERAL@555..570 " in the middle." [] [] + 3: MD_TEXTUAL@570..571 + 0: MD_TEXTUAL_LITERAL@570..571 "\n" [] [] + 1: (empty) + 30: MD_NEWLINE@571..572 + 0: NEWLINE@571..572 "\n" [] [] + 31: MD_LINK_REFERENCE_DEFINITION@572..606 + 0: L_BRACK@572..573 "[" [] [] + 1: MD_LINK_LABEL@573..583 + 0: MD_INLINE_ITEM_LIST@573..583 + 0: MD_TEXTUAL@573..583 + 0: MD_TEXTUAL_LITERAL@573..583 "Case Label" [] [] + 2: R_BRACK@583..584 "]" [] [] + 3: COLON@584..585 ":" [] [] + 4: MD_LINK_DESTINATION@585..606 + 0: MD_INLINE_ITEM_LIST@585..606 + 0: MD_TEXTUAL@585..586 + 0: MD_TEXTUAL_LITERAL@585..586 " " [] [] + 1: MD_TEXTUAL@586..606 + 0: MD_TEXTUAL_LITERAL@586..606 "https://case.example" [] [] + 5: (empty) + 32: MD_NEWLINE@606..607 + 0: NEWLINE@606..607 "\n" [] [] + 33: MD_NEWLINE@607..608 + 0: NEWLINE@607..608 "\n" [] [] + 34: MD_PARAGRAPH@608..639 + 0: MD_INLINE_ITEM_LIST@608..639 + 0: MD_TEXTUAL@608..612 + 0: MD_TEXTUAL_LITERAL@608..612 "Case" [] [] + 1: MD_TEXTUAL@612..613 + 0: MD_TEXTUAL_LITERAL@612..613 "-" [] [] + 2: MD_TEXTUAL@613..626 + 0: MD_TEXTUAL_LITERAL@613..626 "insensitive: " [] [] + 3: MD_REFERENCE_LINK@626..638 + 0: L_BRACK@626..627 "[" [] [] + 1: MD_INLINE_ITEM_LIST@627..637 + 0: MD_TEXTUAL@627..637 + 0: MD_TEXTUAL_LITERAL@627..637 "case label" [] [] + 2: R_BRACK@637..638 "]" [] [] + 3: (empty) + 4: MD_TEXTUAL@638..639 + 0: MD_TEXTUAL_LITERAL@638..639 "\n" [] [] + 1: (empty) + 35: MD_NEWLINE@639..640 + 0: NEWLINE@639..640 "\n" [] [] + 36: MD_PARAGRAPH@640..678 + 0: MD_INLINE_ITEM_LIST@640..678 + 0: MD_TEXTUAL@640..663 + 0: MD_TEXTUAL_LITERAL@640..663 "Whitespace normalized: " [] [] + 1: MD_REFERENCE_LINK@663..677 + 0: L_BRACK@663..664 "[" [] [] + 1: MD_INLINE_ITEM_LIST@664..676 + 0: MD_TEXTUAL@664..676 + 0: MD_TEXTUAL_LITERAL@664..676 "case label" [] [] + 2: R_BRACK@676..677 "]" [] [] + 3: (empty) + 2: MD_TEXTUAL@677..678 + 0: MD_TEXTUAL_LITERAL@677..678 "\n" [] [] + 1: (empty) + 2: EOF@678..678 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading.md new file mode 100644 index 000000000000..db3f7fcd82ed --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading.md @@ -0,0 +1,15 @@ +Heading 1 +========= + +Heading 2 +--------- + +Another H1 +=== + +Another H2 +--- + +Multi-line content +that spans lines +================ diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading.md.snap new file mode 100644 index 000000000000..7a62443dd97f --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/setext_heading.md.snap @@ -0,0 +1,201 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +Heading 1 +========= + +Heading 2 +--------- + +Another H1 +=== + +Another H2 +--- + +Multi-line content +that spans lines +================ + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdSetextHeader { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@0..9 "Heading 1" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@9..10 "\n" [] [], + }, + ], + underline_token: MD_SETEXT_UNDERLINE_LITERAL@10..19 "=========" [] [], + }, + MdNewline { + value_token: NEWLINE@19..20 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@20..21 "\n" [] [], + }, + MdSetextHeader { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@21..30 "Heading 2" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@30..31 "\n" [] [], + }, + ], + underline_token: MD_SETEXT_UNDERLINE_LITERAL@31..40 "---------" [] [], + }, + MdNewline { + value_token: NEWLINE@40..41 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@41..42 "\n" [] [], + }, + MdSetextHeader { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@42..52 "Another H1" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@52..53 "\n" [] [], + }, + ], + underline_token: MD_SETEXT_UNDERLINE_LITERAL@53..56 "===" [] [], + }, + MdNewline { + value_token: NEWLINE@56..57 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@57..58 "\n" [] [], + }, + MdSetextHeader { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@58..68 "Another H2" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@68..69 "\n" [] [], + }, + ], + underline_token: MD_SETEXT_UNDERLINE_LITERAL@69..72 "---" [] [], + }, + MdNewline { + value_token: NEWLINE@72..73 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@73..74 "\n" [] [], + }, + MdSetextHeader { + content: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@74..79 "Multi" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@79..80 "-" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@80..92 "line content" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@92..93 "\n" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@93..109 "that spans lines" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@109..110 "\n" [] [], + }, + ], + underline_token: MD_SETEXT_UNDERLINE_LITERAL@110..126 "================" [] [], + }, + MdNewline { + value_token: NEWLINE@126..127 "\n" [] [], + }, + ], + eof_token: EOF@127..127 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..127 + 0: (empty) + 1: MD_BLOCK_LIST@0..127 + 0: MD_SETEXT_HEADER@0..19 + 0: MD_INLINE_ITEM_LIST@0..10 + 0: MD_TEXTUAL@0..9 + 0: MD_TEXTUAL_LITERAL@0..9 "Heading 1" [] [] + 1: MD_TEXTUAL@9..10 + 0: MD_TEXTUAL_LITERAL@9..10 "\n" [] [] + 1: MD_SETEXT_UNDERLINE_LITERAL@10..19 "=========" [] [] + 1: MD_NEWLINE@19..20 + 0: NEWLINE@19..20 "\n" [] [] + 2: MD_NEWLINE@20..21 + 0: NEWLINE@20..21 "\n" [] [] + 3: MD_SETEXT_HEADER@21..40 + 0: MD_INLINE_ITEM_LIST@21..31 + 0: MD_TEXTUAL@21..30 + 0: MD_TEXTUAL_LITERAL@21..30 "Heading 2" [] [] + 1: MD_TEXTUAL@30..31 + 0: MD_TEXTUAL_LITERAL@30..31 "\n" [] [] + 1: MD_SETEXT_UNDERLINE_LITERAL@31..40 "---------" [] [] + 4: MD_NEWLINE@40..41 + 0: NEWLINE@40..41 "\n" [] [] + 5: MD_NEWLINE@41..42 + 0: NEWLINE@41..42 "\n" [] [] + 6: MD_SETEXT_HEADER@42..56 + 0: MD_INLINE_ITEM_LIST@42..53 + 0: MD_TEXTUAL@42..52 + 0: MD_TEXTUAL_LITERAL@42..52 "Another H1" [] [] + 1: MD_TEXTUAL@52..53 + 0: MD_TEXTUAL_LITERAL@52..53 "\n" [] [] + 1: MD_SETEXT_UNDERLINE_LITERAL@53..56 "===" [] [] + 7: MD_NEWLINE@56..57 + 0: NEWLINE@56..57 "\n" [] [] + 8: MD_NEWLINE@57..58 + 0: NEWLINE@57..58 "\n" [] [] + 9: MD_SETEXT_HEADER@58..72 + 0: MD_INLINE_ITEM_LIST@58..69 + 0: MD_TEXTUAL@58..68 + 0: MD_TEXTUAL_LITERAL@58..68 "Another H2" [] [] + 1: MD_TEXTUAL@68..69 + 0: MD_TEXTUAL_LITERAL@68..69 "\n" [] [] + 1: MD_SETEXT_UNDERLINE_LITERAL@69..72 "---" [] [] + 10: MD_NEWLINE@72..73 + 0: NEWLINE@72..73 "\n" [] [] + 11: MD_NEWLINE@73..74 + 0: NEWLINE@73..74 "\n" [] [] + 12: MD_SETEXT_HEADER@74..126 + 0: MD_INLINE_ITEM_LIST@74..110 + 0: MD_TEXTUAL@74..79 + 0: MD_TEXTUAL_LITERAL@74..79 "Multi" [] [] + 1: MD_TEXTUAL@79..80 + 0: MD_TEXTUAL_LITERAL@79..80 "-" [] [] + 2: MD_TEXTUAL@80..92 + 0: MD_TEXTUAL_LITERAL@80..92 "line content" [] [] + 3: MD_TEXTUAL@92..93 + 0: MD_TEXTUAL_LITERAL@92..93 "\n" [] [] + 4: MD_TEXTUAL@93..109 + 0: MD_TEXTUAL_LITERAL@93..109 "that spans lines" [] [] + 5: MD_TEXTUAL@109..110 + 0: MD_TEXTUAL_LITERAL@109..110 "\n" [] [] + 1: MD_SETEXT_UNDERLINE_LITERAL@110..126 "================" [] [] + 13: MD_NEWLINE@126..127 + 0: NEWLINE@126..127 "\n" [] [] + 2: EOF@127..127 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/thematic_break_block.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/thematic_break_block.md.snap index ed839fc78381..0ca0a4dd7e81 100644 --- a/crates/biome_markdown_parser/tests/md_test_suite/ok/thematic_break_block.md.snap +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/thematic_break_block.md.snap @@ -1,5 +1,6 @@ --- source: crates/biome_markdown_parser/tests/spec_test.rs +assertion_line: 131 expression: snapshot --- ## Input @@ -23,22 +24,43 @@ MdDocument { bom_token: missing (optional), value: MdBlockList [ MdThematicBreakBlock { - value_token: MD_THEMATIC_BREAK_LITERAL@0..6 "***" [Whitespace(" ")] [], + value_token: MD_THEMATIC_BREAK_LITERAL@0..6 "***" [Skipped(" "), Skipped(" "), Skipped(" ")] [], + }, + MdNewline { + value_token: NEWLINE@6..7 "\n" [] [], }, MdThematicBreakBlock { - value_token: MD_THEMATIC_BREAK_LITERAL@6..11 "***" [Newline("\n"), Whitespace(" ")] [], + value_token: MD_THEMATIC_BREAK_LITERAL@7..11 "***" [Skipped(" ")] [], + }, + MdNewline { + value_token: NEWLINE@11..12 "\n" [] [], }, MdThematicBreakBlock { - value_token: MD_THEMATIC_BREAK_LITERAL@11..18 "- - -" [Newline("\n"), Whitespace(" ")] [], + value_token: MD_THEMATIC_BREAK_LITERAL@12..18 "- - -" [Skipped(" ")] [], + }, + MdNewline { + value_token: NEWLINE@18..19 "\n" [] [], }, MdThematicBreakBlock { - value_token: MD_THEMATIC_BREAK_LITERAL@18..22 "___" [Newline("\n")] [], + value_token: MD_THEMATIC_BREAK_LITERAL@19..22 "___" [] [], + }, + MdNewline { + value_token: NEWLINE@22..23 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@23..24 "\n" [] [], }, MdThematicBreakBlock { - value_token: MD_THEMATIC_BREAK_LITERAL@22..30 "_ _ _" [Newline("\n"), Newline("\n"), Whitespace(" ")] [], + value_token: MD_THEMATIC_BREAK_LITERAL@24..30 "_ _ _" [Skipped(" ")] [], + }, + MdNewline { + value_token: NEWLINE@30..31 "\n" [] [], + }, + MdNewline { + value_token: NEWLINE@31..32 "\n" [] [], }, MdThematicBreakBlock { - value_token: MD_THEMATIC_BREAK_LITERAL@30..37 "* * *" [Newline("\n"), Newline("\n")] [], + value_token: MD_THEMATIC_BREAK_LITERAL@32..37 "* * *" [] [], }, ], eof_token: EOF@37..37 "" [] [], @@ -52,17 +74,31 @@ MdDocument { 0: (empty) 1: MD_BLOCK_LIST@0..37 0: MD_THEMATIC_BREAK_BLOCK@0..6 - 0: MD_THEMATIC_BREAK_LITERAL@0..6 "***" [Whitespace(" ")] [] - 1: MD_THEMATIC_BREAK_BLOCK@6..11 - 0: MD_THEMATIC_BREAK_LITERAL@6..11 "***" [Newline("\n"), Whitespace(" ")] [] - 2: MD_THEMATIC_BREAK_BLOCK@11..18 - 0: MD_THEMATIC_BREAK_LITERAL@11..18 "- - -" [Newline("\n"), Whitespace(" ")] [] - 3: MD_THEMATIC_BREAK_BLOCK@18..22 - 0: MD_THEMATIC_BREAK_LITERAL@18..22 "___" [Newline("\n")] [] - 4: MD_THEMATIC_BREAK_BLOCK@22..30 - 0: MD_THEMATIC_BREAK_LITERAL@22..30 "_ _ _" [Newline("\n"), Newline("\n"), Whitespace(" ")] [] - 5: MD_THEMATIC_BREAK_BLOCK@30..37 - 0: MD_THEMATIC_BREAK_LITERAL@30..37 "* * *" [Newline("\n"), Newline("\n")] [] + 0: MD_THEMATIC_BREAK_LITERAL@0..6 "***" [Skipped(" "), Skipped(" "), Skipped(" ")] [] + 1: MD_NEWLINE@6..7 + 0: NEWLINE@6..7 "\n" [] [] + 2: MD_THEMATIC_BREAK_BLOCK@7..11 + 0: MD_THEMATIC_BREAK_LITERAL@7..11 "***" [Skipped(" ")] [] + 3: MD_NEWLINE@11..12 + 0: NEWLINE@11..12 "\n" [] [] + 4: MD_THEMATIC_BREAK_BLOCK@12..18 + 0: MD_THEMATIC_BREAK_LITERAL@12..18 "- - -" [Skipped(" ")] [] + 5: MD_NEWLINE@18..19 + 0: NEWLINE@18..19 "\n" [] [] + 6: MD_THEMATIC_BREAK_BLOCK@19..22 + 0: MD_THEMATIC_BREAK_LITERAL@19..22 "___" [] [] + 7: MD_NEWLINE@22..23 + 0: NEWLINE@22..23 "\n" [] [] + 8: MD_NEWLINE@23..24 + 0: NEWLINE@23..24 "\n" [] [] + 9: MD_THEMATIC_BREAK_BLOCK@24..30 + 0: MD_THEMATIC_BREAK_LITERAL@24..30 "_ _ _" [Skipped(" ")] [] + 10: MD_NEWLINE@30..31 + 0: NEWLINE@30..31 "\n" [] [] + 11: MD_NEWLINE@31..32 + 0: NEWLINE@31..32 "\n" [] [] + 12: MD_THEMATIC_BREAK_BLOCK@32..37 + 0: MD_THEMATIC_BREAK_LITERAL@32..37 "* * *" [] [] 2: EOF@37..37 "" [] [] ``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/unclosed_bold.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/unclosed_bold.md new file mode 100644 index 000000000000..89ec5e79be43 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/unclosed_bold.md @@ -0,0 +1 @@ +This has **unclosed bold diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/unclosed_bold.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/unclosed_bold.md.snap new file mode 100644 index 000000000000..1183219b2b94 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/unclosed_bold.md.snap @@ -0,0 +1,60 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +This has **unclosed bold + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@0..9 "This has " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@9..11 "**" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@11..24 "unclosed bold" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@24..25 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + eof_token: EOF@25..25 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..25 + 0: (empty) + 1: MD_BLOCK_LIST@0..25 + 0: MD_PARAGRAPH@0..25 + 0: MD_INLINE_ITEM_LIST@0..25 + 0: MD_TEXTUAL@0..9 + 0: MD_TEXTUAL_LITERAL@0..9 "This has " [] [] + 1: MD_TEXTUAL@9..11 + 0: MD_TEXTUAL_LITERAL@9..11 "**" [] [] + 2: MD_TEXTUAL@11..24 + 0: MD_TEXTUAL_LITERAL@11..24 "unclosed bold" [] [] + 3: MD_TEXTUAL@24..25 + 0: MD_TEXTUAL_LITERAL@24..25 "\n" [] [] + 1: (empty) + 2: EOF@25..25 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/unclosed_emphasis.md b/crates/biome_markdown_parser/tests/md_test_suite/ok/unclosed_emphasis.md new file mode 100644 index 000000000000..91297cb3bd0d --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/unclosed_emphasis.md @@ -0,0 +1 @@ +This has *unclosed emphasis diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/unclosed_emphasis.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/unclosed_emphasis.md.snap new file mode 100644 index 000000000000..501c8b080d67 --- /dev/null +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/unclosed_emphasis.md.snap @@ -0,0 +1,60 @@ +--- +source: crates/biome_markdown_parser/tests/spec_test.rs +expression: snapshot +--- +## Input + +``` +This has *unclosed emphasis + +``` + + +## AST + +``` +MdDocument { + bom_token: missing (optional), + value: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@0..9 "This has " [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@9..10 "*" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@10..27 "unclosed emphasis" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@27..28 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], + eof_token: EOF@28..28 "" [] [], +} +``` + +## CST + +``` +0: MD_DOCUMENT@0..28 + 0: (empty) + 1: MD_BLOCK_LIST@0..28 + 0: MD_PARAGRAPH@0..28 + 0: MD_INLINE_ITEM_LIST@0..28 + 0: MD_TEXTUAL@0..9 + 0: MD_TEXTUAL_LITERAL@0..9 "This has " [] [] + 1: MD_TEXTUAL@9..10 + 0: MD_TEXTUAL_LITERAL@9..10 "*" [] [] + 2: MD_TEXTUAL@10..27 + 0: MD_TEXTUAL_LITERAL@10..27 "unclosed emphasis" [] [] + 3: MD_TEXTUAL@27..28 + 0: MD_TEXTUAL_LITERAL@27..28 "\n" [] [] + 1: (empty) + 2: EOF@28..28 "" [] [] + +``` diff --git a/crates/biome_markdown_parser/tests/spec.json b/crates/biome_markdown_parser/tests/spec.json new file mode 100644 index 000000000000..1f89e66f2ada --- /dev/null +++ b/crates/biome_markdown_parser/tests/spec.json @@ -0,0 +1,5218 @@ +[ + { + "markdown": "\tfoo\tbaz\t\tbim\n", + "html": "
    foo\tbaz\t\tbim\n
    \n", + "example": 1, + "start_line": 355, + "end_line": 360, + "section": "Tabs" + }, + { + "markdown": " \tfoo\tbaz\t\tbim\n", + "html": "
    foo\tbaz\t\tbim\n
    \n", + "example": 2, + "start_line": 362, + "end_line": 367, + "section": "Tabs" + }, + { + "markdown": " a\ta\n ὐ\ta\n", + "html": "
    a\ta\nὐ\ta\n
    \n", + "example": 3, + "start_line": 369, + "end_line": 376, + "section": "Tabs" + }, + { + "markdown": " - foo\n\n\tbar\n", + "html": "
      \n
    • \n

      foo

      \n

      bar

      \n
    • \n
    \n", + "example": 4, + "start_line": 382, + "end_line": 393, + "section": "Tabs" + }, + { + "markdown": "- foo\n\n\t\tbar\n", + "html": "
      \n
    • \n

      foo

      \n
        bar\n
      \n
    • \n
    \n", + "example": 5, + "start_line": 395, + "end_line": 407, + "section": "Tabs" + }, + { + "markdown": ">\t\tfoo\n", + "html": "
    \n
      foo\n
    \n
    \n", + "example": 6, + "start_line": 418, + "end_line": 425, + "section": "Tabs" + }, + { + "markdown": "-\t\tfoo\n", + "html": "
      \n
    • \n
        foo\n
      \n
    • \n
    \n", + "example": 7, + "start_line": 427, + "end_line": 436, + "section": "Tabs" + }, + { + "markdown": " foo\n\tbar\n", + "html": "
    foo\nbar\n
    \n", + "example": 8, + "start_line": 439, + "end_line": 446, + "section": "Tabs" + }, + { + "markdown": " - foo\n - bar\n\t - baz\n", + "html": "
      \n
    • foo\n
        \n
      • bar\n
          \n
        • baz
        • \n
        \n
      • \n
      \n
    • \n
    \n", + "example": 9, + "start_line": 448, + "end_line": 464, + "section": "Tabs" + }, + { + "markdown": "#\tFoo\n", + "html": "

    Foo

    \n", + "example": 10, + "start_line": 466, + "end_line": 470, + "section": "Tabs" + }, + { + "markdown": "*\t*\t*\t\n", + "html": "
    \n", + "example": 11, + "start_line": 472, + "end_line": 476, + "section": "Tabs" + }, + { + "markdown": "\\!\\\"\\#\\$\\%\\&\\'\\(\\)\\*\\+\\,\\-\\.\\/\\:\\;\\<\\=\\>\\?\\@\\[\\\\\\]\\^\\_\\`\\{\\|\\}\\~\n", + "html": "

    !"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~

    \n", + "example": 12, + "start_line": 489, + "end_line": 493, + "section": "Backslash escapes" + }, + { + "markdown": "\\\t\\A\\a\\ \\3\\φ\\«\n", + "html": "

    \\\t\\A\\a\\ \\3\\φ\\«

    \n", + "example": 13, + "start_line": 499, + "end_line": 503, + "section": "Backslash escapes" + }, + { + "markdown": "\\*not emphasized*\n\\
    not a tag\n\\[not a link](/foo)\n\\`not code`\n1\\. not a list\n\\* not a list\n\\# not a heading\n\\[foo]: /url \"not a reference\"\n\\ö not a character entity\n", + "html": "

    *not emphasized*\n<br/> not a tag\n[not a link](/foo)\n`not code`\n1. not a list\n* not a list\n# not a heading\n[foo]: /url "not a reference"\n&ouml; not a character entity

    \n", + "example": 14, + "start_line": 509, + "end_line": 529, + "section": "Backslash escapes" + }, + { + "markdown": "\\\\*emphasis*\n", + "html": "

    \\emphasis

    \n", + "example": 15, + "start_line": 534, + "end_line": 538, + "section": "Backslash escapes" + }, + { + "markdown": "foo\\\nbar\n", + "html": "

    foo
    \nbar

    \n", + "example": 16, + "start_line": 543, + "end_line": 549, + "section": "Backslash escapes" + }, + { + "markdown": "`` \\[\\` ``\n", + "html": "

    \\[\\`

    \n", + "example": 17, + "start_line": 555, + "end_line": 559, + "section": "Backslash escapes" + }, + { + "markdown": " \\[\\]\n", + "html": "
    \\[\\]\n
    \n", + "example": 18, + "start_line": 562, + "end_line": 567, + "section": "Backslash escapes" + }, + { + "markdown": "~~~\n\\[\\]\n~~~\n", + "html": "
    \\[\\]\n
    \n", + "example": 19, + "start_line": 570, + "end_line": 577, + "section": "Backslash escapes" + }, + { + "markdown": "\n", + "html": "

    https://example.com?find=\\*

    \n", + "example": 20, + "start_line": 580, + "end_line": 584, + "section": "Backslash escapes" + }, + { + "markdown": "\n", + "html": "\n", + "example": 21, + "start_line": 587, + "end_line": 591, + "section": "Backslash escapes" + }, + { + "markdown": "[foo](/bar\\* \"ti\\*tle\")\n", + "html": "

    foo

    \n", + "example": 22, + "start_line": 597, + "end_line": 601, + "section": "Backslash escapes" + }, + { + "markdown": "[foo]\n\n[foo]: /bar\\* \"ti\\*tle\"\n", + "html": "

    foo

    \n", + "example": 23, + "start_line": 604, + "end_line": 610, + "section": "Backslash escapes" + }, + { + "markdown": "``` foo\\+bar\nfoo\n```\n", + "html": "
    foo\n
    \n", + "example": 24, + "start_line": 613, + "end_line": 620, + "section": "Backslash escapes" + }, + { + "markdown": "  & © Æ Ď\n¾ ℋ ⅆ\n∲ ≧̸\n", + "html": "

      & © Æ Ď\n¾ ℋ ⅆ\n∲ ≧̸

    \n", + "example": 25, + "start_line": 649, + "end_line": 657, + "section": "Entity and numeric character references" + }, + { + "markdown": "# Ӓ Ϡ �\n", + "html": "

    # Ӓ Ϡ �

    \n", + "example": 26, + "start_line": 668, + "end_line": 672, + "section": "Entity and numeric character references" + }, + { + "markdown": "" ആ ಫ\n", + "html": "

    " ആ ಫ

    \n", + "example": 27, + "start_line": 681, + "end_line": 685, + "section": "Entity and numeric character references" + }, + { + "markdown": "  &x; &#; &#x;\n�\n&#abcdef0;\n&ThisIsNotDefined; &hi?;\n", + "html": "

    &nbsp &x; &#; &#x;\n&#87654321;\n&#abcdef0;\n&ThisIsNotDefined; &hi?;

    \n", + "example": 28, + "start_line": 690, + "end_line": 700, + "section": "Entity and numeric character references" + }, + { + "markdown": "©\n", + "html": "

    &copy

    \n", + "example": 29, + "start_line": 707, + "end_line": 711, + "section": "Entity and numeric character references" + }, + { + "markdown": "&MadeUpEntity;\n", + "html": "

    &MadeUpEntity;

    \n", + "example": 30, + "start_line": 717, + "end_line": 721, + "section": "Entity and numeric character references" + }, + { + "markdown": "\n", + "html": "\n", + "example": 31, + "start_line": 728, + "end_line": 732, + "section": "Entity and numeric character references" + }, + { + "markdown": "[foo](/föö \"föö\")\n", + "html": "

    foo

    \n", + "example": 32, + "start_line": 735, + "end_line": 739, + "section": "Entity and numeric character references" + }, + { + "markdown": "[foo]\n\n[foo]: /föö \"föö\"\n", + "html": "

    foo

    \n", + "example": 33, + "start_line": 742, + "end_line": 748, + "section": "Entity and numeric character references" + }, + { + "markdown": "``` föö\nfoo\n```\n", + "html": "
    foo\n
    \n", + "example": 34, + "start_line": 751, + "end_line": 758, + "section": "Entity and numeric character references" + }, + { + "markdown": "`föö`\n", + "html": "

    f&ouml;&ouml;

    \n", + "example": 35, + "start_line": 764, + "end_line": 768, + "section": "Entity and numeric character references" + }, + { + "markdown": " föfö\n", + "html": "
    f&ouml;f&ouml;\n
    \n", + "example": 36, + "start_line": 771, + "end_line": 776, + "section": "Entity and numeric character references" + }, + { + "markdown": "*foo*\n*foo*\n", + "html": "

    *foo*\nfoo

    \n", + "example": 37, + "start_line": 783, + "end_line": 789, + "section": "Entity and numeric character references" + }, + { + "markdown": "* foo\n\n* foo\n", + "html": "

    * foo

    \n
      \n
    • foo
    • \n
    \n", + "example": 38, + "start_line": 791, + "end_line": 800, + "section": "Entity and numeric character references" + }, + { + "markdown": "foo bar\n", + "html": "

    foo\n\nbar

    \n", + "example": 39, + "start_line": 802, + "end_line": 808, + "section": "Entity and numeric character references" + }, + { + "markdown": " foo\n", + "html": "

    \tfoo

    \n", + "example": 40, + "start_line": 810, + "end_line": 814, + "section": "Entity and numeric character references" + }, + { + "markdown": "[a](url "tit")\n", + "html": "

    [a](url "tit")

    \n", + "example": 41, + "start_line": 817, + "end_line": 821, + "section": "Entity and numeric character references" + }, + { + "markdown": "- `one\n- two`\n", + "html": "
      \n
    • `one
    • \n
    • two`
    • \n
    \n", + "example": 42, + "start_line": 840, + "end_line": 848, + "section": "Precedence" + }, + { + "markdown": "***\n---\n___\n", + "html": "
    \n
    \n
    \n", + "example": 43, + "start_line": 879, + "end_line": 887, + "section": "Thematic breaks" + }, + { + "markdown": "+++\n", + "html": "

    +++

    \n", + "example": 44, + "start_line": 892, + "end_line": 896, + "section": "Thematic breaks" + }, + { + "markdown": "===\n", + "html": "

    ===

    \n", + "example": 45, + "start_line": 899, + "end_line": 903, + "section": "Thematic breaks" + }, + { + "markdown": "--\n**\n__\n", + "html": "

    --\n**\n__

    \n", + "example": 46, + "start_line": 908, + "end_line": 916, + "section": "Thematic breaks" + }, + { + "markdown": " ***\n ***\n ***\n", + "html": "
    \n
    \n
    \n", + "example": 47, + "start_line": 921, + "end_line": 929, + "section": "Thematic breaks" + }, + { + "markdown": " ***\n", + "html": "
    ***\n
    \n", + "example": 48, + "start_line": 934, + "end_line": 939, + "section": "Thematic breaks" + }, + { + "markdown": "Foo\n ***\n", + "html": "

    Foo\n***

    \n", + "example": 49, + "start_line": 942, + "end_line": 948, + "section": "Thematic breaks" + }, + { + "markdown": "_____________________________________\n", + "html": "
    \n", + "example": 50, + "start_line": 953, + "end_line": 957, + "section": "Thematic breaks" + }, + { + "markdown": " - - -\n", + "html": "
    \n", + "example": 51, + "start_line": 962, + "end_line": 966, + "section": "Thematic breaks" + }, + { + "markdown": " ** * ** * ** * **\n", + "html": "
    \n", + "example": 52, + "start_line": 969, + "end_line": 973, + "section": "Thematic breaks" + }, + { + "markdown": "- - - -\n", + "html": "
    \n", + "example": 53, + "start_line": 976, + "end_line": 980, + "section": "Thematic breaks" + }, + { + "markdown": "- - - - \n", + "html": "
    \n", + "example": 54, + "start_line": 985, + "end_line": 989, + "section": "Thematic breaks" + }, + { + "markdown": "_ _ _ _ a\n\na------\n\n---a---\n", + "html": "

    _ _ _ _ a

    \n

    a------

    \n

    ---a---

    \n", + "example": 55, + "start_line": 994, + "end_line": 1004, + "section": "Thematic breaks" + }, + { + "markdown": " *-*\n", + "html": "

    -

    \n", + "example": 56, + "start_line": 1010, + "end_line": 1014, + "section": "Thematic breaks" + }, + { + "markdown": "- foo\n***\n- bar\n", + "html": "
      \n
    • foo
    • \n
    \n
    \n
      \n
    • bar
    • \n
    \n", + "example": 57, + "start_line": 1019, + "end_line": 1031, + "section": "Thematic breaks" + }, + { + "markdown": "Foo\n***\nbar\n", + "html": "

    Foo

    \n
    \n

    bar

    \n", + "example": 58, + "start_line": 1036, + "end_line": 1044, + "section": "Thematic breaks" + }, + { + "markdown": "Foo\n---\nbar\n", + "html": "

    Foo

    \n

    bar

    \n", + "example": 59, + "start_line": 1053, + "end_line": 1060, + "section": "Thematic breaks" + }, + { + "markdown": "* Foo\n* * *\n* Bar\n", + "html": "
      \n
    • Foo
    • \n
    \n
    \n
      \n
    • Bar
    • \n
    \n", + "example": 60, + "start_line": 1066, + "end_line": 1078, + "section": "Thematic breaks" + }, + { + "markdown": "- Foo\n- * * *\n", + "html": "
      \n
    • Foo
    • \n
    • \n
      \n
    • \n
    \n", + "example": 61, + "start_line": 1083, + "end_line": 1093, + "section": "Thematic breaks" + }, + { + "markdown": "# foo\n## foo\n### foo\n#### foo\n##### foo\n###### foo\n", + "html": "

    foo

    \n

    foo

    \n

    foo

    \n

    foo

    \n
    foo
    \n
    foo
    \n", + "example": 62, + "start_line": 1112, + "end_line": 1126, + "section": "ATX headings" + }, + { + "markdown": "####### foo\n", + "html": "

    ####### foo

    \n", + "example": 63, + "start_line": 1131, + "end_line": 1135, + "section": "ATX headings" + }, + { + "markdown": "#5 bolt\n\n#hashtag\n", + "html": "

    #5 bolt

    \n

    #hashtag

    \n", + "example": 64, + "start_line": 1146, + "end_line": 1153, + "section": "ATX headings" + }, + { + "markdown": "\\## foo\n", + "html": "

    ## foo

    \n", + "example": 65, + "start_line": 1158, + "end_line": 1162, + "section": "ATX headings" + }, + { + "markdown": "# foo *bar* \\*baz\\*\n", + "html": "

    foo bar *baz*

    \n", + "example": 66, + "start_line": 1167, + "end_line": 1171, + "section": "ATX headings" + }, + { + "markdown": "# foo \n", + "html": "

    foo

    \n", + "example": 67, + "start_line": 1176, + "end_line": 1180, + "section": "ATX headings" + }, + { + "markdown": " ### foo\n ## foo\n # foo\n", + "html": "

    foo

    \n

    foo

    \n

    foo

    \n", + "example": 68, + "start_line": 1185, + "end_line": 1193, + "section": "ATX headings" + }, + { + "markdown": " # foo\n", + "html": "
    # foo\n
    \n", + "example": 69, + "start_line": 1198, + "end_line": 1203, + "section": "ATX headings" + }, + { + "markdown": "foo\n # bar\n", + "html": "

    foo\n# bar

    \n", + "example": 70, + "start_line": 1206, + "end_line": 1212, + "section": "ATX headings" + }, + { + "markdown": "## foo ##\n ### bar ###\n", + "html": "

    foo

    \n

    bar

    \n", + "example": 71, + "start_line": 1217, + "end_line": 1223, + "section": "ATX headings" + }, + { + "markdown": "# foo ##################################\n##### foo ##\n", + "html": "

    foo

    \n
    foo
    \n", + "example": 72, + "start_line": 1228, + "end_line": 1234, + "section": "ATX headings" + }, + { + "markdown": "### foo ### \n", + "html": "

    foo

    \n", + "example": 73, + "start_line": 1239, + "end_line": 1243, + "section": "ATX headings" + }, + { + "markdown": "### foo ### b\n", + "html": "

    foo ### b

    \n", + "example": 74, + "start_line": 1250, + "end_line": 1254, + "section": "ATX headings" + }, + { + "markdown": "# foo#\n", + "html": "

    foo#

    \n", + "example": 75, + "start_line": 1259, + "end_line": 1263, + "section": "ATX headings" + }, + { + "markdown": "### foo \\###\n## foo #\\##\n# foo \\#\n", + "html": "

    foo ###

    \n

    foo ###

    \n

    foo #

    \n", + "example": 76, + "start_line": 1269, + "end_line": 1277, + "section": "ATX headings" + }, + { + "markdown": "****\n## foo\n****\n", + "html": "
    \n

    foo

    \n
    \n", + "example": 77, + "start_line": 1283, + "end_line": 1291, + "section": "ATX headings" + }, + { + "markdown": "Foo bar\n# baz\nBar foo\n", + "html": "

    Foo bar

    \n

    baz

    \n

    Bar foo

    \n", + "example": 78, + "start_line": 1294, + "end_line": 1302, + "section": "ATX headings" + }, + { + "markdown": "## \n#\n### ###\n", + "html": "

    \n

    \n

    \n", + "example": 79, + "start_line": 1307, + "end_line": 1315, + "section": "ATX headings" + }, + { + "markdown": "Foo *bar*\n=========\n\nFoo *bar*\n---------\n", + "html": "

    Foo bar

    \n

    Foo bar

    \n", + "example": 80, + "start_line": 1347, + "end_line": 1356, + "section": "Setext headings" + }, + { + "markdown": "Foo *bar\nbaz*\n====\n", + "html": "

    Foo bar\nbaz

    \n", + "example": 81, + "start_line": 1361, + "end_line": 1368, + "section": "Setext headings" + }, + { + "markdown": " Foo *bar\nbaz*\t\n====\n", + "html": "

    Foo bar\nbaz

    \n", + "example": 82, + "start_line": 1375, + "end_line": 1382, + "section": "Setext headings" + }, + { + "markdown": "Foo\n-------------------------\n\nFoo\n=\n", + "html": "

    Foo

    \n

    Foo

    \n", + "example": 83, + "start_line": 1387, + "end_line": 1396, + "section": "Setext headings" + }, + { + "markdown": " Foo\n---\n\n Foo\n-----\n\n Foo\n ===\n", + "html": "

    Foo

    \n

    Foo

    \n

    Foo

    \n", + "example": 84, + "start_line": 1402, + "end_line": 1415, + "section": "Setext headings" + }, + { + "markdown": " Foo\n ---\n\n Foo\n---\n", + "html": "
    Foo\n---\n\nFoo\n
    \n
    \n", + "example": 85, + "start_line": 1420, + "end_line": 1433, + "section": "Setext headings" + }, + { + "markdown": "Foo\n ---- \n", + "html": "

    Foo

    \n", + "example": 86, + "start_line": 1439, + "end_line": 1444, + "section": "Setext headings" + }, + { + "markdown": "Foo\n ---\n", + "html": "

    Foo\n---

    \n", + "example": 87, + "start_line": 1449, + "end_line": 1455, + "section": "Setext headings" + }, + { + "markdown": "Foo\n= =\n\nFoo\n--- -\n", + "html": "

    Foo\n= =

    \n

    Foo

    \n
    \n", + "example": 88, + "start_line": 1460, + "end_line": 1471, + "section": "Setext headings" + }, + { + "markdown": "Foo \n-----\n", + "html": "

    Foo

    \n", + "example": 89, + "start_line": 1476, + "end_line": 1481, + "section": "Setext headings" + }, + { + "markdown": "Foo\\\n----\n", + "html": "

    Foo\\

    \n", + "example": 90, + "start_line": 1486, + "end_line": 1491, + "section": "Setext headings" + }, + { + "markdown": "`Foo\n----\n`\n\n\n", + "html": "

    `Foo

    \n

    `

    \n

    <a title="a lot

    \n

    of dashes"/>

    \n", + "example": 91, + "start_line": 1497, + "end_line": 1510, + "section": "Setext headings" + }, + { + "markdown": "> Foo\n---\n", + "html": "
    \n

    Foo

    \n
    \n
    \n", + "example": 92, + "start_line": 1516, + "end_line": 1524, + "section": "Setext headings" + }, + { + "markdown": "> foo\nbar\n===\n", + "html": "
    \n

    foo\nbar\n===

    \n
    \n", + "example": 93, + "start_line": 1527, + "end_line": 1537, + "section": "Setext headings" + }, + { + "markdown": "- Foo\n---\n", + "html": "
      \n
    • Foo
    • \n
    \n
    \n", + "example": 94, + "start_line": 1540, + "end_line": 1548, + "section": "Setext headings" + }, + { + "markdown": "Foo\nBar\n---\n", + "html": "

    Foo\nBar

    \n", + "example": 95, + "start_line": 1555, + "end_line": 1562, + "section": "Setext headings" + }, + { + "markdown": "---\nFoo\n---\nBar\n---\nBaz\n", + "html": "
    \n

    Foo

    \n

    Bar

    \n

    Baz

    \n", + "example": 96, + "start_line": 1568, + "end_line": 1580, + "section": "Setext headings" + }, + { + "markdown": "\n====\n", + "html": "

    ====

    \n", + "example": 97, + "start_line": 1585, + "end_line": 1590, + "section": "Setext headings" + }, + { + "markdown": "---\n---\n", + "html": "
    \n
    \n", + "example": 98, + "start_line": 1597, + "end_line": 1603, + "section": "Setext headings" + }, + { + "markdown": "- foo\n-----\n", + "html": "
      \n
    • foo
    • \n
    \n
    \n", + "example": 99, + "start_line": 1606, + "end_line": 1614, + "section": "Setext headings" + }, + { + "markdown": " foo\n---\n", + "html": "
    foo\n
    \n
    \n", + "example": 100, + "start_line": 1617, + "end_line": 1624, + "section": "Setext headings" + }, + { + "markdown": "> foo\n-----\n", + "html": "
    \n

    foo

    \n
    \n
    \n", + "example": 101, + "start_line": 1627, + "end_line": 1635, + "section": "Setext headings" + }, + { + "markdown": "\\> foo\n------\n", + "html": "

    > foo

    \n", + "example": 102, + "start_line": 1641, + "end_line": 1646, + "section": "Setext headings" + }, + { + "markdown": "Foo\n\nbar\n---\nbaz\n", + "html": "

    Foo

    \n

    bar

    \n

    baz

    \n", + "example": 103, + "start_line": 1672, + "end_line": 1682, + "section": "Setext headings" + }, + { + "markdown": "Foo\nbar\n\n---\n\nbaz\n", + "html": "

    Foo\nbar

    \n
    \n

    baz

    \n", + "example": 104, + "start_line": 1688, + "end_line": 1700, + "section": "Setext headings" + }, + { + "markdown": "Foo\nbar\n* * *\nbaz\n", + "html": "

    Foo\nbar

    \n
    \n

    baz

    \n", + "example": 105, + "start_line": 1706, + "end_line": 1716, + "section": "Setext headings" + }, + { + "markdown": "Foo\nbar\n\\---\nbaz\n", + "html": "

    Foo\nbar\n---\nbaz

    \n", + "example": 106, + "start_line": 1721, + "end_line": 1731, + "section": "Setext headings" + }, + { + "markdown": " a simple\n indented code block\n", + "html": "
    a simple\n  indented code block\n
    \n", + "example": 107, + "start_line": 1749, + "end_line": 1756, + "section": "Indented code blocks" + }, + { + "markdown": " - foo\n\n bar\n", + "html": "
      \n
    • \n

      foo

      \n

      bar

      \n
    • \n
    \n", + "example": 108, + "start_line": 1763, + "end_line": 1774, + "section": "Indented code blocks" + }, + { + "markdown": "1. foo\n\n - bar\n", + "html": "
      \n
    1. \n

      foo

      \n
        \n
      • bar
      • \n
      \n
    2. \n
    \n", + "example": 109, + "start_line": 1777, + "end_line": 1790, + "section": "Indented code blocks" + }, + { + "markdown": "
    \n *hi*\n\n - one\n", + "html": "
    <a/>\n*hi*\n\n- one\n
    \n", + "example": 110, + "start_line": 1797, + "end_line": 1808, + "section": "Indented code blocks" + }, + { + "markdown": " chunk1\n\n chunk2\n \n \n \n chunk3\n", + "html": "
    chunk1\n\nchunk2\n\n\n\nchunk3\n
    \n", + "example": 111, + "start_line": 1813, + "end_line": 1830, + "section": "Indented code blocks" + }, + { + "markdown": " chunk1\n \n chunk2\n", + "html": "
    chunk1\n  \n  chunk2\n
    \n", + "example": 112, + "start_line": 1836, + "end_line": 1845, + "section": "Indented code blocks" + }, + { + "markdown": "Foo\n bar\n\n", + "html": "

    Foo\nbar

    \n", + "example": 113, + "start_line": 1851, + "end_line": 1858, + "section": "Indented code blocks" + }, + { + "markdown": " foo\nbar\n", + "html": "
    foo\n
    \n

    bar

    \n", + "example": 114, + "start_line": 1865, + "end_line": 1872, + "section": "Indented code blocks" + }, + { + "markdown": "# Heading\n foo\nHeading\n------\n foo\n----\n", + "html": "

    Heading

    \n
    foo\n
    \n

    Heading

    \n
    foo\n
    \n
    \n", + "example": 115, + "start_line": 1878, + "end_line": 1893, + "section": "Indented code blocks" + }, + { + "markdown": " foo\n bar\n", + "html": "
        foo\nbar\n
    \n", + "example": 116, + "start_line": 1898, + "end_line": 1905, + "section": "Indented code blocks" + }, + { + "markdown": "\n \n foo\n \n\n", + "html": "
    foo\n
    \n", + "example": 117, + "start_line": 1911, + "end_line": 1920, + "section": "Indented code blocks" + }, + { + "markdown": " foo \n", + "html": "
    foo  \n
    \n", + "example": 118, + "start_line": 1925, + "end_line": 1930, + "section": "Indented code blocks" + }, + { + "markdown": "```\n<\n >\n```\n", + "html": "
    <\n >\n
    \n", + "example": 119, + "start_line": 1980, + "end_line": 1989, + "section": "Fenced code blocks" + }, + { + "markdown": "~~~\n<\n >\n~~~\n", + "html": "
    <\n >\n
    \n", + "example": 120, + "start_line": 1994, + "end_line": 2003, + "section": "Fenced code blocks" + }, + { + "markdown": "``\nfoo\n``\n", + "html": "

    foo

    \n", + "example": 121, + "start_line": 2007, + "end_line": 2013, + "section": "Fenced code blocks" + }, + { + "markdown": "```\naaa\n~~~\n```\n", + "html": "
    aaa\n~~~\n
    \n", + "example": 122, + "start_line": 2018, + "end_line": 2027, + "section": "Fenced code blocks" + }, + { + "markdown": "~~~\naaa\n```\n~~~\n", + "html": "
    aaa\n```\n
    \n", + "example": 123, + "start_line": 2030, + "end_line": 2039, + "section": "Fenced code blocks" + }, + { + "markdown": "````\naaa\n```\n``````\n", + "html": "
    aaa\n```\n
    \n", + "example": 124, + "start_line": 2044, + "end_line": 2053, + "section": "Fenced code blocks" + }, + { + "markdown": "~~~~\naaa\n~~~\n~~~~\n", + "html": "
    aaa\n~~~\n
    \n", + "example": 125, + "start_line": 2056, + "end_line": 2065, + "section": "Fenced code blocks" + }, + { + "markdown": "```\n", + "html": "
    \n", + "example": 126, + "start_line": 2071, + "end_line": 2075, + "section": "Fenced code blocks" + }, + { + "markdown": "`````\n\n```\naaa\n", + "html": "
    \n```\naaa\n
    \n", + "example": 127, + "start_line": 2078, + "end_line": 2088, + "section": "Fenced code blocks" + }, + { + "markdown": "> ```\n> aaa\n\nbbb\n", + "html": "
    \n
    aaa\n
    \n
    \n

    bbb

    \n", + "example": 128, + "start_line": 2091, + "end_line": 2102, + "section": "Fenced code blocks" + }, + { + "markdown": "```\n\n \n```\n", + "html": "
    \n  \n
    \n", + "example": 129, + "start_line": 2107, + "end_line": 2116, + "section": "Fenced code blocks" + }, + { + "markdown": "```\n```\n", + "html": "
    \n", + "example": 130, + "start_line": 2121, + "end_line": 2126, + "section": "Fenced code blocks" + }, + { + "markdown": " ```\n aaa\naaa\n```\n", + "html": "
    aaa\naaa\n
    \n", + "example": 131, + "start_line": 2133, + "end_line": 2142, + "section": "Fenced code blocks" + }, + { + "markdown": " ```\naaa\n aaa\naaa\n ```\n", + "html": "
    aaa\naaa\naaa\n
    \n", + "example": 132, + "start_line": 2145, + "end_line": 2156, + "section": "Fenced code blocks" + }, + { + "markdown": " ```\n aaa\n aaa\n aaa\n ```\n", + "html": "
    aaa\n aaa\naaa\n
    \n", + "example": 133, + "start_line": 2159, + "end_line": 2170, + "section": "Fenced code blocks" + }, + { + "markdown": " ```\n aaa\n ```\n", + "html": "
    ```\naaa\n```\n
    \n", + "example": 134, + "start_line": 2175, + "end_line": 2184, + "section": "Fenced code blocks" + }, + { + "markdown": "```\naaa\n ```\n", + "html": "
    aaa\n
    \n", + "example": 135, + "start_line": 2190, + "end_line": 2197, + "section": "Fenced code blocks" + }, + { + "markdown": " ```\naaa\n ```\n", + "html": "
    aaa\n
    \n", + "example": 136, + "start_line": 2200, + "end_line": 2207, + "section": "Fenced code blocks" + }, + { + "markdown": "```\naaa\n ```\n", + "html": "
    aaa\n    ```\n
    \n", + "example": 137, + "start_line": 2212, + "end_line": 2220, + "section": "Fenced code blocks" + }, + { + "markdown": "``` ```\naaa\n", + "html": "

    \naaa

    \n", + "example": 138, + "start_line": 2226, + "end_line": 2232, + "section": "Fenced code blocks" + }, + { + "markdown": "~~~~~~\naaa\n~~~ ~~\n", + "html": "
    aaa\n~~~ ~~\n
    \n", + "example": 139, + "start_line": 2235, + "end_line": 2243, + "section": "Fenced code blocks" + }, + { + "markdown": "foo\n```\nbar\n```\nbaz\n", + "html": "

    foo

    \n
    bar\n
    \n

    baz

    \n", + "example": 140, + "start_line": 2249, + "end_line": 2260, + "section": "Fenced code blocks" + }, + { + "markdown": "foo\n---\n~~~\nbar\n~~~\n# baz\n", + "html": "

    foo

    \n
    bar\n
    \n

    baz

    \n", + "example": 141, + "start_line": 2266, + "end_line": 2278, + "section": "Fenced code blocks" + }, + { + "markdown": "```ruby\ndef foo(x)\n return 3\nend\n```\n", + "html": "
    def foo(x)\n  return 3\nend\n
    \n", + "example": 142, + "start_line": 2288, + "end_line": 2299, + "section": "Fenced code blocks" + }, + { + "markdown": "~~~~ ruby startline=3 $%@#$\ndef foo(x)\n return 3\nend\n~~~~~~~\n", + "html": "
    def foo(x)\n  return 3\nend\n
    \n", + "example": 143, + "start_line": 2302, + "end_line": 2313, + "section": "Fenced code blocks" + }, + { + "markdown": "````;\n````\n", + "html": "
    \n", + "example": 144, + "start_line": 2316, + "end_line": 2321, + "section": "Fenced code blocks" + }, + { + "markdown": "``` aa ```\nfoo\n", + "html": "

    aa\nfoo

    \n", + "example": 145, + "start_line": 2326, + "end_line": 2332, + "section": "Fenced code blocks" + }, + { + "markdown": "~~~ aa ``` ~~~\nfoo\n~~~\n", + "html": "
    foo\n
    \n", + "example": 146, + "start_line": 2337, + "end_line": 2344, + "section": "Fenced code blocks" + }, + { + "markdown": "```\n``` aaa\n```\n", + "html": "
    ``` aaa\n
    \n", + "example": 147, + "start_line": 2349, + "end_line": 2356, + "section": "Fenced code blocks" + }, + { + "markdown": "
    \n
    \n**Hello**,\n\n_world_.\n
    \n
    \n", + "html": "
    \n
    \n**Hello**,\n

    world.\n

    \n
    \n", + "example": 148, + "start_line": 2428, + "end_line": 2443, + "section": "HTML blocks" + }, + { + "markdown": "\n \n \n \n
    \n hi\n
    \n\nokay.\n", + "html": "\n \n \n \n
    \n hi\n
    \n

    okay.

    \n", + "example": 149, + "start_line": 2457, + "end_line": 2476, + "section": "HTML blocks" + }, + { + "markdown": "
    \n*foo*\n", + "example": 151, + "start_line": 2492, + "end_line": 2498, + "section": "HTML blocks" + }, + { + "markdown": "
    \n\n*Markdown*\n\n
    \n", + "html": "
    \n

    Markdown

    \n
    \n", + "example": 152, + "start_line": 2503, + "end_line": 2513, + "section": "HTML blocks" + }, + { + "markdown": "
    \n
    \n", + "html": "
    \n
    \n", + "example": 153, + "start_line": 2519, + "end_line": 2527, + "section": "HTML blocks" + }, + { + "markdown": "
    \n
    \n", + "html": "
    \n
    \n", + "example": 154, + "start_line": 2530, + "end_line": 2538, + "section": "HTML blocks" + }, + { + "markdown": "
    \n*foo*\n\n*bar*\n", + "html": "
    \n*foo*\n

    bar

    \n", + "example": 155, + "start_line": 2542, + "end_line": 2551, + "section": "HTML blocks" + }, + { + "markdown": "
    \n", + "html": "\n", + "example": 159, + "start_line": 2591, + "end_line": 2595, + "section": "HTML blocks" + }, + { + "markdown": "
    \nfoo\n
    \n", + "html": "
    \nfoo\n
    \n", + "example": 160, + "start_line": 2598, + "end_line": 2606, + "section": "HTML blocks" + }, + { + "markdown": "
    \n``` c\nint x = 33;\n```\n", + "html": "
    \n``` c\nint x = 33;\n```\n", + "example": 161, + "start_line": 2615, + "end_line": 2625, + "section": "HTML blocks" + }, + { + "markdown": "\n*bar*\n\n", + "html": "\n*bar*\n\n", + "example": 162, + "start_line": 2632, + "end_line": 2640, + "section": "HTML blocks" + }, + { + "markdown": "\n*bar*\n\n", + "html": "\n*bar*\n\n", + "example": 163, + "start_line": 2645, + "end_line": 2653, + "section": "HTML blocks" + }, + { + "markdown": "\n*bar*\n\n", + "html": "\n*bar*\n\n", + "example": 164, + "start_line": 2656, + "end_line": 2664, + "section": "HTML blocks" + }, + { + "markdown": "\n*bar*\n", + "html": "\n*bar*\n", + "example": 165, + "start_line": 2667, + "end_line": 2673, + "section": "HTML blocks" + }, + { + "markdown": "\n*foo*\n\n", + "html": "\n*foo*\n\n", + "example": 166, + "start_line": 2682, + "end_line": 2690, + "section": "HTML blocks" + }, + { + "markdown": "\n\n*foo*\n\n\n", + "html": "\n

    foo

    \n
    \n", + "example": 167, + "start_line": 2697, + "end_line": 2707, + "section": "HTML blocks" + }, + { + "markdown": "*foo*\n", + "html": "

    foo

    \n", + "example": 168, + "start_line": 2715, + "end_line": 2719, + "section": "HTML blocks" + }, + { + "markdown": "
    \nimport Text.HTML.TagSoup\n\nmain :: IO ()\nmain = print $ parseTags tags\n
    \nokay\n", + "html": "
    \nimport Text.HTML.TagSoup\n\nmain :: IO ()\nmain = print $ parseTags tags\n
    \n

    okay

    \n", + "example": 169, + "start_line": 2731, + "end_line": 2747, + "section": "HTML blocks" + }, + { + "markdown": "\nokay\n", + "html": "\n

    okay

    \n", + "example": 170, + "start_line": 2752, + "end_line": 2766, + "section": "HTML blocks" + }, + { + "markdown": "\n", + "html": "\n", + "example": 171, + "start_line": 2771, + "end_line": 2787, + "section": "HTML blocks" + }, + { + "markdown": "\nh1 {color:red;}\n\np {color:blue;}\n\nokay\n", + "html": "\nh1 {color:red;}\n\np {color:blue;}\n\n

    okay

    \n", + "example": 172, + "start_line": 2791, + "end_line": 2807, + "section": "HTML blocks" + }, + { + "markdown": "\n\nfoo\n", + "html": "\n\nfoo\n", + "example": 173, + "start_line": 2814, + "end_line": 2824, + "section": "HTML blocks" + }, + { + "markdown": ">
    \n> foo\n\nbar\n", + "html": "
    \n
    \nfoo\n
    \n

    bar

    \n", + "example": 174, + "start_line": 2827, + "end_line": 2838, + "section": "HTML blocks" + }, + { + "markdown": "-
    \n- foo\n", + "html": "
      \n
    • \n
      \n
    • \n
    • foo
    • \n
    \n", + "example": 175, + "start_line": 2841, + "end_line": 2851, + "section": "HTML blocks" + }, + { + "markdown": "\n*foo*\n", + "html": "\n

    foo

    \n", + "example": 176, + "start_line": 2856, + "end_line": 2862, + "section": "HTML blocks" + }, + { + "markdown": "*bar*\n*baz*\n", + "html": "*bar*\n

    baz

    \n", + "example": 177, + "start_line": 2865, + "end_line": 2871, + "section": "HTML blocks" + }, + { + "markdown": "1. *bar*\n", + "html": "1. *bar*\n", + "example": 178, + "start_line": 2877, + "end_line": 2885, + "section": "HTML blocks" + }, + { + "markdown": "\nokay\n", + "html": "\n

    okay

    \n", + "example": 179, + "start_line": 2890, + "end_line": 2902, + "section": "HTML blocks" + }, + { + "markdown": "';\n\n?>\nokay\n", + "html": "';\n\n?>\n

    okay

    \n", + "example": 180, + "start_line": 2908, + "end_line": 2922, + "section": "HTML blocks" + }, + { + "markdown": "\n", + "html": "\n", + "example": 181, + "start_line": 2927, + "end_line": 2931, + "section": "HTML blocks" + }, + { + "markdown": "\nokay\n", + "html": "\n

    okay

    \n", + "example": 182, + "start_line": 2936, + "end_line": 2964, + "section": "HTML blocks" + }, + { + "markdown": " \n\n \n", + "html": " \n
    <!-- foo -->\n
    \n", + "example": 183, + "start_line": 2970, + "end_line": 2978, + "section": "HTML blocks" + }, + { + "markdown": "
    \n\n
    \n", + "html": "
    \n
    <div>\n
    \n", + "example": 184, + "start_line": 2981, + "end_line": 2989, + "section": "HTML blocks" + }, + { + "markdown": "Foo\n
    \nbar\n
    \n", + "html": "

    Foo

    \n
    \nbar\n
    \n", + "example": 185, + "start_line": 2995, + "end_line": 3005, + "section": "HTML blocks" + }, + { + "markdown": "
    \nbar\n
    \n*foo*\n", + "html": "
    \nbar\n
    \n*foo*\n", + "example": 186, + "start_line": 3012, + "end_line": 3022, + "section": "HTML blocks" + }, + { + "markdown": "Foo\n\nbaz\n", + "html": "

    Foo\n\nbaz

    \n", + "example": 187, + "start_line": 3027, + "end_line": 3035, + "section": "HTML blocks" + }, + { + "markdown": "
    \n\n*Emphasized* text.\n\n
    \n", + "html": "
    \n

    Emphasized text.

    \n
    \n", + "example": 188, + "start_line": 3068, + "end_line": 3078, + "section": "HTML blocks" + }, + { + "markdown": "
    \n*Emphasized* text.\n
    \n", + "html": "
    \n*Emphasized* text.\n
    \n", + "example": 189, + "start_line": 3081, + "end_line": 3089, + "section": "HTML blocks" + }, + { + "markdown": "\n\n\n\n\n\n\n\n
    \nHi\n
    \n", + "html": "\n\n\n\n
    \nHi\n
    \n", + "example": 190, + "start_line": 3103, + "end_line": 3123, + "section": "HTML blocks" + }, + { + "markdown": "\n\n \n\n \n\n \n\n
    \n Hi\n
    \n", + "html": "\n \n
    <td>\n  Hi\n</td>\n
    \n \n
    \n", + "example": 191, + "start_line": 3130, + "end_line": 3151, + "section": "HTML blocks" + }, + { + "markdown": "[foo]: /url \"title\"\n\n[foo]\n", + "html": "

    foo

    \n", + "example": 192, + "start_line": 3179, + "end_line": 3185, + "section": "Link reference definitions" + }, + { + "markdown": " [foo]: \n /url \n 'the title' \n\n[foo]\n", + "html": "

    foo

    \n", + "example": 193, + "start_line": 3188, + "end_line": 3196, + "section": "Link reference definitions" + }, + { + "markdown": "[Foo*bar\\]]:my_(url) 'title (with parens)'\n\n[Foo*bar\\]]\n", + "html": "

    Foo*bar]

    \n", + "example": 194, + "start_line": 3199, + "end_line": 3205, + "section": "Link reference definitions" + }, + { + "markdown": "[Foo bar]:\n\n'title'\n\n[Foo bar]\n", + "html": "

    Foo bar

    \n", + "example": 195, + "start_line": 3208, + "end_line": 3216, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]: /url '\ntitle\nline1\nline2\n'\n\n[foo]\n", + "html": "

    foo

    \n", + "example": 196, + "start_line": 3221, + "end_line": 3235, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]: /url 'title\n\nwith blank line'\n\n[foo]\n", + "html": "

    [foo]: /url 'title

    \n

    with blank line'

    \n

    [foo]

    \n", + "example": 197, + "start_line": 3240, + "end_line": 3250, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]:\n/url\n\n[foo]\n", + "html": "

    foo

    \n", + "example": 198, + "start_line": 3255, + "end_line": 3262, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]:\n\n[foo]\n", + "html": "

    [foo]:

    \n

    [foo]

    \n", + "example": 199, + "start_line": 3267, + "end_line": 3274, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]: <>\n\n[foo]\n", + "html": "

    foo

    \n", + "example": 200, + "start_line": 3279, + "end_line": 3285, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]: (baz)\n\n[foo]\n", + "html": "

    [foo]: (baz)

    \n

    [foo]

    \n", + "example": 201, + "start_line": 3290, + "end_line": 3297, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]: /url\\bar\\*baz \"foo\\\"bar\\baz\"\n\n[foo]\n", + "html": "

    foo

    \n", + "example": 202, + "start_line": 3303, + "end_line": 3309, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]\n\n[foo]: url\n", + "html": "

    foo

    \n", + "example": 203, + "start_line": 3314, + "end_line": 3320, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]\n\n[foo]: first\n[foo]: second\n", + "html": "

    foo

    \n", + "example": 204, + "start_line": 3326, + "end_line": 3333, + "section": "Link reference definitions" + }, + { + "markdown": "[FOO]: /url\n\n[Foo]\n", + "html": "

    Foo

    \n", + "example": 205, + "start_line": 3339, + "end_line": 3345, + "section": "Link reference definitions" + }, + { + "markdown": "[ΑΓΩ]: /φου\n\n[αγω]\n", + "html": "

    αγω

    \n", + "example": 206, + "start_line": 3348, + "end_line": 3354, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]: /url\n", + "html": "", + "example": 207, + "start_line": 3363, + "end_line": 3366, + "section": "Link reference definitions" + }, + { + "markdown": "[\nfoo\n]: /url\nbar\n", + "html": "

    bar

    \n", + "example": 208, + "start_line": 3371, + "end_line": 3378, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]: /url \"title\" ok\n", + "html": "

    [foo]: /url "title" ok

    \n", + "example": 209, + "start_line": 3384, + "end_line": 3388, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]: /url\n\"title\" ok\n", + "html": "

    "title" ok

    \n", + "example": 210, + "start_line": 3393, + "end_line": 3398, + "section": "Link reference definitions" + }, + { + "markdown": " [foo]: /url \"title\"\n\n[foo]\n", + "html": "
    [foo]: /url "title"\n
    \n

    [foo]

    \n", + "example": 211, + "start_line": 3404, + "end_line": 3412, + "section": "Link reference definitions" + }, + { + "markdown": "```\n[foo]: /url\n```\n\n[foo]\n", + "html": "
    [foo]: /url\n
    \n

    [foo]

    \n", + "example": 212, + "start_line": 3418, + "end_line": 3428, + "section": "Link reference definitions" + }, + { + "markdown": "Foo\n[bar]: /baz\n\n[bar]\n", + "html": "

    Foo\n[bar]: /baz

    \n

    [bar]

    \n", + "example": 213, + "start_line": 3433, + "end_line": 3442, + "section": "Link reference definitions" + }, + { + "markdown": "# [Foo]\n[foo]: /url\n> bar\n", + "html": "

    Foo

    \n
    \n

    bar

    \n
    \n", + "example": 214, + "start_line": 3448, + "end_line": 3457, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]: /url\nbar\n===\n[foo]\n", + "html": "

    bar

    \n

    foo

    \n", + "example": 215, + "start_line": 3459, + "end_line": 3467, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]: /url\n===\n[foo]\n", + "html": "

    ===\nfoo

    \n", + "example": 216, + "start_line": 3469, + "end_line": 3476, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]: /foo-url \"foo\"\n[bar]: /bar-url\n \"bar\"\n[baz]: /baz-url\n\n[foo],\n[bar],\n[baz]\n", + "html": "

    foo,\nbar,\nbaz

    \n", + "example": 217, + "start_line": 3482, + "end_line": 3495, + "section": "Link reference definitions" + }, + { + "markdown": "[foo]\n\n> [foo]: /url\n", + "html": "

    foo

    \n
    \n
    \n", + "example": 218, + "start_line": 3503, + "end_line": 3511, + "section": "Link reference definitions" + }, + { + "markdown": "aaa\n\nbbb\n", + "html": "

    aaa

    \n

    bbb

    \n", + "example": 219, + "start_line": 3525, + "end_line": 3532, + "section": "Paragraphs" + }, + { + "markdown": "aaa\nbbb\n\nccc\nddd\n", + "html": "

    aaa\nbbb

    \n

    ccc\nddd

    \n", + "example": 220, + "start_line": 3537, + "end_line": 3548, + "section": "Paragraphs" + }, + { + "markdown": "aaa\n\n\nbbb\n", + "html": "

    aaa

    \n

    bbb

    \n", + "example": 221, + "start_line": 3553, + "end_line": 3561, + "section": "Paragraphs" + }, + { + "markdown": " aaa\n bbb\n", + "html": "

    aaa\nbbb

    \n", + "example": 222, + "start_line": 3566, + "end_line": 3572, + "section": "Paragraphs" + }, + { + "markdown": "aaa\n bbb\n ccc\n", + "html": "

    aaa\nbbb\nccc

    \n", + "example": 223, + "start_line": 3578, + "end_line": 3586, + "section": "Paragraphs" + }, + { + "markdown": " aaa\nbbb\n", + "html": "

    aaa\nbbb

    \n", + "example": 224, + "start_line": 3592, + "end_line": 3598, + "section": "Paragraphs" + }, + { + "markdown": " aaa\nbbb\n", + "html": "
    aaa\n
    \n

    bbb

    \n", + "example": 225, + "start_line": 3601, + "end_line": 3608, + "section": "Paragraphs" + }, + { + "markdown": "aaa \nbbb \n", + "html": "

    aaa
    \nbbb

    \n", + "example": 226, + "start_line": 3615, + "end_line": 3621, + "section": "Paragraphs" + }, + { + "markdown": " \n\naaa\n \n\n# aaa\n\n \n", + "html": "

    aaa

    \n

    aaa

    \n", + "example": 227, + "start_line": 3632, + "end_line": 3644, + "section": "Blank lines" + }, + { + "markdown": "> # Foo\n> bar\n> baz\n", + "html": "
    \n

    Foo

    \n

    bar\nbaz

    \n
    \n", + "example": 228, + "start_line": 3700, + "end_line": 3710, + "section": "Block quotes" + }, + { + "markdown": "># Foo\n>bar\n> baz\n", + "html": "
    \n

    Foo

    \n

    bar\nbaz

    \n
    \n", + "example": 229, + "start_line": 3715, + "end_line": 3725, + "section": "Block quotes" + }, + { + "markdown": " > # Foo\n > bar\n > baz\n", + "html": "
    \n

    Foo

    \n

    bar\nbaz

    \n
    \n", + "example": 230, + "start_line": 3730, + "end_line": 3740, + "section": "Block quotes" + }, + { + "markdown": " > # Foo\n > bar\n > baz\n", + "html": "
    > # Foo\n> bar\n> baz\n
    \n", + "example": 231, + "start_line": 3745, + "end_line": 3754, + "section": "Block quotes" + }, + { + "markdown": "> # Foo\n> bar\nbaz\n", + "html": "
    \n

    Foo

    \n

    bar\nbaz

    \n
    \n", + "example": 232, + "start_line": 3760, + "end_line": 3770, + "section": "Block quotes" + }, + { + "markdown": "> bar\nbaz\n> foo\n", + "html": "
    \n

    bar\nbaz\nfoo

    \n
    \n", + "example": 233, + "start_line": 3776, + "end_line": 3786, + "section": "Block quotes" + }, + { + "markdown": "> foo\n---\n", + "html": "
    \n

    foo

    \n
    \n
    \n", + "example": 234, + "start_line": 3800, + "end_line": 3808, + "section": "Block quotes" + }, + { + "markdown": "> - foo\n- bar\n", + "html": "
    \n
      \n
    • foo
    • \n
    \n
    \n
      \n
    • bar
    • \n
    \n", + "example": 235, + "start_line": 3820, + "end_line": 3832, + "section": "Block quotes" + }, + { + "markdown": "> foo\n bar\n", + "html": "
    \n
    foo\n
    \n
    \n
    bar\n
    \n", + "example": 236, + "start_line": 3838, + "end_line": 3848, + "section": "Block quotes" + }, + { + "markdown": "> ```\nfoo\n```\n", + "html": "
    \n
    \n
    \n

    foo

    \n
    \n", + "example": 237, + "start_line": 3851, + "end_line": 3861, + "section": "Block quotes" + }, + { + "markdown": "> foo\n - bar\n", + "html": "
    \n

    foo\n- bar

    \n
    \n", + "example": 238, + "start_line": 3867, + "end_line": 3875, + "section": "Block quotes" + }, + { + "markdown": ">\n", + "html": "
    \n
    \n", + "example": 239, + "start_line": 3891, + "end_line": 3896, + "section": "Block quotes" + }, + { + "markdown": ">\n> \n> \n", + "html": "
    \n
    \n", + "example": 240, + "start_line": 3899, + "end_line": 3906, + "section": "Block quotes" + }, + { + "markdown": ">\n> foo\n> \n", + "html": "
    \n

    foo

    \n
    \n", + "example": 241, + "start_line": 3911, + "end_line": 3919, + "section": "Block quotes" + }, + { + "markdown": "> foo\n\n> bar\n", + "html": "
    \n

    foo

    \n
    \n
    \n

    bar

    \n
    \n", + "example": 242, + "start_line": 3924, + "end_line": 3935, + "section": "Block quotes" + }, + { + "markdown": "> foo\n> bar\n", + "html": "
    \n

    foo\nbar

    \n
    \n", + "example": 243, + "start_line": 3946, + "end_line": 3954, + "section": "Block quotes" + }, + { + "markdown": "> foo\n>\n> bar\n", + "html": "
    \n

    foo

    \n

    bar

    \n
    \n", + "example": 244, + "start_line": 3959, + "end_line": 3968, + "section": "Block quotes" + }, + { + "markdown": "foo\n> bar\n", + "html": "

    foo

    \n
    \n

    bar

    \n
    \n", + "example": 245, + "start_line": 3973, + "end_line": 3981, + "section": "Block quotes" + }, + { + "markdown": "> aaa\n***\n> bbb\n", + "html": "
    \n

    aaa

    \n
    \n
    \n
    \n

    bbb

    \n
    \n", + "example": 246, + "start_line": 3987, + "end_line": 3999, + "section": "Block quotes" + }, + { + "markdown": "> bar\nbaz\n", + "html": "
    \n

    bar\nbaz

    \n
    \n", + "example": 247, + "start_line": 4005, + "end_line": 4013, + "section": "Block quotes" + }, + { + "markdown": "> bar\n\nbaz\n", + "html": "
    \n

    bar

    \n
    \n

    baz

    \n", + "example": 248, + "start_line": 4016, + "end_line": 4025, + "section": "Block quotes" + }, + { + "markdown": "> bar\n>\nbaz\n", + "html": "
    \n

    bar

    \n
    \n

    baz

    \n", + "example": 249, + "start_line": 4028, + "end_line": 4037, + "section": "Block quotes" + }, + { + "markdown": "> > > foo\nbar\n", + "html": "
    \n
    \n
    \n

    foo\nbar

    \n
    \n
    \n
    \n", + "example": 250, + "start_line": 4044, + "end_line": 4056, + "section": "Block quotes" + }, + { + "markdown": ">>> foo\n> bar\n>>baz\n", + "html": "
    \n
    \n
    \n

    foo\nbar\nbaz

    \n
    \n
    \n
    \n", + "example": 251, + "start_line": 4059, + "end_line": 4073, + "section": "Block quotes" + }, + { + "markdown": "> code\n\n> not code\n", + "html": "
    \n
    code\n
    \n
    \n
    \n

    not code

    \n
    \n", + "example": 252, + "start_line": 4081, + "end_line": 4093, + "section": "Block quotes" + }, + { + "markdown": "A paragraph\nwith two lines.\n\n indented code\n\n> A block quote.\n", + "html": "

    A paragraph\nwith two lines.

    \n
    indented code\n
    \n
    \n

    A block quote.

    \n
    \n", + "example": 253, + "start_line": 4135, + "end_line": 4150, + "section": "List items" + }, + { + "markdown": "1. A paragraph\n with two lines.\n\n indented code\n\n > A block quote.\n", + "html": "
      \n
    1. \n

      A paragraph\nwith two lines.

      \n
      indented code\n
      \n
      \n

      A block quote.

      \n
      \n
    2. \n
    \n", + "example": 254, + "start_line": 4157, + "end_line": 4176, + "section": "List items" + }, + { + "markdown": "- one\n\n two\n", + "html": "
      \n
    • one
    • \n
    \n

    two

    \n", + "example": 255, + "start_line": 4190, + "end_line": 4199, + "section": "List items" + }, + { + "markdown": "- one\n\n two\n", + "html": "
      \n
    • \n

      one

      \n

      two

      \n
    • \n
    \n", + "example": 256, + "start_line": 4202, + "end_line": 4213, + "section": "List items" + }, + { + "markdown": " - one\n\n two\n", + "html": "
      \n
    • one
    • \n
    \n
     two\n
    \n", + "example": 257, + "start_line": 4216, + "end_line": 4226, + "section": "List items" + }, + { + "markdown": " - one\n\n two\n", + "html": "
      \n
    • \n

      one

      \n

      two

      \n
    • \n
    \n", + "example": 258, + "start_line": 4229, + "end_line": 4240, + "section": "List items" + }, + { + "markdown": " > > 1. one\n>>\n>> two\n", + "html": "
    \n
    \n
      \n
    1. \n

      one

      \n

      two

      \n
    2. \n
    \n
    \n
    \n", + "example": 259, + "start_line": 4251, + "end_line": 4266, + "section": "List items" + }, + { + "markdown": ">>- one\n>>\n > > two\n", + "html": "
    \n
    \n
      \n
    • one
    • \n
    \n

    two

    \n
    \n
    \n", + "example": 260, + "start_line": 4278, + "end_line": 4291, + "section": "List items" + }, + { + "markdown": "-one\n\n2.two\n", + "html": "

    -one

    \n

    2.two

    \n", + "example": 261, + "start_line": 4297, + "end_line": 4304, + "section": "List items" + }, + { + "markdown": "- foo\n\n\n bar\n", + "html": "
      \n
    • \n

      foo

      \n

      bar

      \n
    • \n
    \n", + "example": 262, + "start_line": 4310, + "end_line": 4322, + "section": "List items" + }, + { + "markdown": "1. foo\n\n ```\n bar\n ```\n\n baz\n\n > bam\n", + "html": "
      \n
    1. \n

      foo

      \n
      bar\n
      \n

      baz

      \n
      \n

      bam

      \n
      \n
    2. \n
    \n", + "example": 263, + "start_line": 4327, + "end_line": 4349, + "section": "List items" + }, + { + "markdown": "- Foo\n\n bar\n\n\n baz\n", + "html": "
      \n
    • \n

      Foo

      \n
      bar\n\n\nbaz\n
      \n
    • \n
    \n", + "example": 264, + "start_line": 4355, + "end_line": 4373, + "section": "List items" + }, + { + "markdown": "123456789. ok\n", + "html": "
      \n
    1. ok
    2. \n
    \n", + "example": 265, + "start_line": 4377, + "end_line": 4383, + "section": "List items" + }, + { + "markdown": "1234567890. not ok\n", + "html": "

    1234567890. not ok

    \n", + "example": 266, + "start_line": 4386, + "end_line": 4390, + "section": "List items" + }, + { + "markdown": "0. ok\n", + "html": "
      \n
    1. ok
    2. \n
    \n", + "example": 267, + "start_line": 4395, + "end_line": 4401, + "section": "List items" + }, + { + "markdown": "003. ok\n", + "html": "
      \n
    1. ok
    2. \n
    \n", + "example": 268, + "start_line": 4404, + "end_line": 4410, + "section": "List items" + }, + { + "markdown": "-1. not ok\n", + "html": "

    -1. not ok

    \n", + "example": 269, + "start_line": 4415, + "end_line": 4419, + "section": "List items" + }, + { + "markdown": "- foo\n\n bar\n", + "html": "
      \n
    • \n

      foo

      \n
      bar\n
      \n
    • \n
    \n", + "example": 270, + "start_line": 4438, + "end_line": 4450, + "section": "List items" + }, + { + "markdown": " 10. foo\n\n bar\n", + "html": "
      \n
    1. \n

      foo

      \n
      bar\n
      \n
    2. \n
    \n", + "example": 271, + "start_line": 4455, + "end_line": 4467, + "section": "List items" + }, + { + "markdown": " indented code\n\nparagraph\n\n more code\n", + "html": "
    indented code\n
    \n

    paragraph

    \n
    more code\n
    \n", + "example": 272, + "start_line": 4474, + "end_line": 4486, + "section": "List items" + }, + { + "markdown": "1. indented code\n\n paragraph\n\n more code\n", + "html": "
      \n
    1. \n
      indented code\n
      \n

      paragraph

      \n
      more code\n
      \n
    2. \n
    \n", + "example": 273, + "start_line": 4489, + "end_line": 4505, + "section": "List items" + }, + { + "markdown": "1. indented code\n\n paragraph\n\n more code\n", + "html": "
      \n
    1. \n
       indented code\n
      \n

      paragraph

      \n
      more code\n
      \n
    2. \n
    \n", + "example": 274, + "start_line": 4511, + "end_line": 4527, + "section": "List items" + }, + { + "markdown": " foo\n\nbar\n", + "html": "

    foo

    \n

    bar

    \n", + "example": 275, + "start_line": 4538, + "end_line": 4545, + "section": "List items" + }, + { + "markdown": "- foo\n\n bar\n", + "html": "
      \n
    • foo
    • \n
    \n

    bar

    \n", + "example": 276, + "start_line": 4548, + "end_line": 4557, + "section": "List items" + }, + { + "markdown": "- foo\n\n bar\n", + "html": "
      \n
    • \n

      foo

      \n

      bar

      \n
    • \n
    \n", + "example": 277, + "start_line": 4565, + "end_line": 4576, + "section": "List items" + }, + { + "markdown": "-\n foo\n-\n ```\n bar\n ```\n-\n baz\n", + "html": "
      \n
    • foo
    • \n
    • \n
      bar\n
      \n
    • \n
    • \n
      baz\n
      \n
    • \n
    \n", + "example": 278, + "start_line": 4592, + "end_line": 4613, + "section": "List items" + }, + { + "markdown": "- \n foo\n", + "html": "
      \n
    • foo
    • \n
    \n", + "example": 279, + "start_line": 4618, + "end_line": 4625, + "section": "List items" + }, + { + "markdown": "-\n\n foo\n", + "html": "
      \n
    • \n
    \n

    foo

    \n", + "example": 280, + "start_line": 4632, + "end_line": 4641, + "section": "List items" + }, + { + "markdown": "- foo\n-\n- bar\n", + "html": "
      \n
    • foo
    • \n
    • \n
    • bar
    • \n
    \n", + "example": 281, + "start_line": 4646, + "end_line": 4656, + "section": "List items" + }, + { + "markdown": "- foo\n- \n- bar\n", + "html": "
      \n
    • foo
    • \n
    • \n
    • bar
    • \n
    \n", + "example": 282, + "start_line": 4661, + "end_line": 4671, + "section": "List items" + }, + { + "markdown": "1. foo\n2.\n3. bar\n", + "html": "
      \n
    1. foo
    2. \n
    3. \n
    4. bar
    5. \n
    \n", + "example": 283, + "start_line": 4676, + "end_line": 4686, + "section": "List items" + }, + { + "markdown": "*\n", + "html": "
      \n
    • \n
    \n", + "example": 284, + "start_line": 4691, + "end_line": 4697, + "section": "List items" + }, + { + "markdown": "foo\n*\n\nfoo\n1.\n", + "html": "

    foo\n*

    \n

    foo\n1.

    \n", + "example": 285, + "start_line": 4701, + "end_line": 4712, + "section": "List items" + }, + { + "markdown": " 1. A paragraph\n with two lines.\n\n indented code\n\n > A block quote.\n", + "html": "
      \n
    1. \n

      A paragraph\nwith two lines.

      \n
      indented code\n
      \n
      \n

      A block quote.

      \n
      \n
    2. \n
    \n", + "example": 286, + "start_line": 4723, + "end_line": 4742, + "section": "List items" + }, + { + "markdown": " 1. A paragraph\n with two lines.\n\n indented code\n\n > A block quote.\n", + "html": "
      \n
    1. \n

      A paragraph\nwith two lines.

      \n
      indented code\n
      \n
      \n

      A block quote.

      \n
      \n
    2. \n
    \n", + "example": 287, + "start_line": 4747, + "end_line": 4766, + "section": "List items" + }, + { + "markdown": " 1. A paragraph\n with two lines.\n\n indented code\n\n > A block quote.\n", + "html": "
      \n
    1. \n

      A paragraph\nwith two lines.

      \n
      indented code\n
      \n
      \n

      A block quote.

      \n
      \n
    2. \n
    \n", + "example": 288, + "start_line": 4771, + "end_line": 4790, + "section": "List items" + }, + { + "markdown": " 1. A paragraph\n with two lines.\n\n indented code\n\n > A block quote.\n", + "html": "
    1.  A paragraph\n    with two lines.\n\n        indented code\n\n    > A block quote.\n
    \n", + "example": 289, + "start_line": 4795, + "end_line": 4810, + "section": "List items" + }, + { + "markdown": " 1. A paragraph\nwith two lines.\n\n indented code\n\n > A block quote.\n", + "html": "
      \n
    1. \n

      A paragraph\nwith two lines.

      \n
      indented code\n
      \n
      \n

      A block quote.

      \n
      \n
    2. \n
    \n", + "example": 290, + "start_line": 4825, + "end_line": 4844, + "section": "List items" + }, + { + "markdown": " 1. A paragraph\n with two lines.\n", + "html": "
      \n
    1. A paragraph\nwith two lines.
    2. \n
    \n", + "example": 291, + "start_line": 4849, + "end_line": 4857, + "section": "List items" + }, + { + "markdown": "> 1. > Blockquote\ncontinued here.\n", + "html": "
    \n
      \n
    1. \n
      \n

      Blockquote\ncontinued here.

      \n
      \n
    2. \n
    \n
    \n", + "example": 292, + "start_line": 4862, + "end_line": 4876, + "section": "List items" + }, + { + "markdown": "> 1. > Blockquote\n> continued here.\n", + "html": "
    \n
      \n
    1. \n
      \n

      Blockquote\ncontinued here.

      \n
      \n
    2. \n
    \n
    \n", + "example": 293, + "start_line": 4879, + "end_line": 4893, + "section": "List items" + }, + { + "markdown": "- foo\n - bar\n - baz\n - boo\n", + "html": "
      \n
    • foo\n
        \n
      • bar\n
          \n
        • baz\n
            \n
          • boo
          • \n
          \n
        • \n
        \n
      • \n
      \n
    • \n
    \n", + "example": 294, + "start_line": 4907, + "end_line": 4928, + "section": "List items" + }, + { + "markdown": "- foo\n - bar\n - baz\n - boo\n", + "html": "
      \n
    • foo
    • \n
    • bar
    • \n
    • baz
    • \n
    • boo
    • \n
    \n", + "example": 295, + "start_line": 4933, + "end_line": 4945, + "section": "List items" + }, + { + "markdown": "10) foo\n - bar\n", + "html": "
      \n
    1. foo\n
        \n
      • bar
      • \n
      \n
    2. \n
    \n", + "example": 296, + "start_line": 4950, + "end_line": 4961, + "section": "List items" + }, + { + "markdown": "10) foo\n - bar\n", + "html": "
      \n
    1. foo
    2. \n
    \n
      \n
    • bar
    • \n
    \n", + "example": 297, + "start_line": 4966, + "end_line": 4976, + "section": "List items" + }, + { + "markdown": "- - foo\n", + "html": "
      \n
    • \n
        \n
      • foo
      • \n
      \n
    • \n
    \n", + "example": 298, + "start_line": 4981, + "end_line": 4991, + "section": "List items" + }, + { + "markdown": "1. - 2. foo\n", + "html": "
      \n
    1. \n
        \n
      • \n
          \n
        1. foo
        2. \n
        \n
      • \n
      \n
    2. \n
    \n", + "example": 299, + "start_line": 4994, + "end_line": 5008, + "section": "List items" + }, + { + "markdown": "- # Foo\n- Bar\n ---\n baz\n", + "html": "
      \n
    • \n

      Foo

      \n
    • \n
    • \n

      Bar

      \nbaz
    • \n
    \n", + "example": 300, + "start_line": 5013, + "end_line": 5027, + "section": "List items" + }, + { + "markdown": "- foo\n- bar\n+ baz\n", + "html": "
      \n
    • foo
    • \n
    • bar
    • \n
    \n
      \n
    • baz
    • \n
    \n", + "example": 301, + "start_line": 5249, + "end_line": 5261, + "section": "Lists" + }, + { + "markdown": "1. foo\n2. bar\n3) baz\n", + "html": "
      \n
    1. foo
    2. \n
    3. bar
    4. \n
    \n
      \n
    1. baz
    2. \n
    \n", + "example": 302, + "start_line": 5264, + "end_line": 5276, + "section": "Lists" + }, + { + "markdown": "Foo\n- bar\n- baz\n", + "html": "

    Foo

    \n
      \n
    • bar
    • \n
    • baz
    • \n
    \n", + "example": 303, + "start_line": 5283, + "end_line": 5293, + "section": "Lists" + }, + { + "markdown": "The number of windows in my house is\n14. The number of doors is 6.\n", + "html": "

    The number of windows in my house is\n14. The number of doors is 6.

    \n", + "example": 304, + "start_line": 5360, + "end_line": 5366, + "section": "Lists" + }, + { + "markdown": "The number of windows in my house is\n1. The number of doors is 6.\n", + "html": "

    The number of windows in my house is

    \n
      \n
    1. The number of doors is 6.
    2. \n
    \n", + "example": 305, + "start_line": 5370, + "end_line": 5378, + "section": "Lists" + }, + { + "markdown": "- foo\n\n- bar\n\n\n- baz\n", + "html": "
      \n
    • \n

      foo

      \n
    • \n
    • \n

      bar

      \n
    • \n
    • \n

      baz

      \n
    • \n
    \n", + "example": 306, + "start_line": 5384, + "end_line": 5403, + "section": "Lists" + }, + { + "markdown": "- foo\n - bar\n - baz\n\n\n bim\n", + "html": "
      \n
    • foo\n
        \n
      • bar\n
          \n
        • \n

          baz

          \n

          bim

          \n
        • \n
        \n
      • \n
      \n
    • \n
    \n", + "example": 307, + "start_line": 5405, + "end_line": 5427, + "section": "Lists" + }, + { + "markdown": "- foo\n- bar\n\n\n\n- baz\n- bim\n", + "html": "
      \n
    • foo
    • \n
    • bar
    • \n
    \n\n
      \n
    • baz
    • \n
    • bim
    • \n
    \n", + "example": 308, + "start_line": 5435, + "end_line": 5453, + "section": "Lists" + }, + { + "markdown": "- foo\n\n notcode\n\n- foo\n\n\n\n code\n", + "html": "
      \n
    • \n

      foo

      \n

      notcode

      \n
    • \n
    • \n

      foo

      \n
    • \n
    \n\n
    code\n
    \n", + "example": 309, + "start_line": 5456, + "end_line": 5479, + "section": "Lists" + }, + { + "markdown": "- a\n - b\n - c\n - d\n - e\n - f\n- g\n", + "html": "
      \n
    • a
    • \n
    • b
    • \n
    • c
    • \n
    • d
    • \n
    • e
    • \n
    • f
    • \n
    • g
    • \n
    \n", + "example": 310, + "start_line": 5487, + "end_line": 5505, + "section": "Lists" + }, + { + "markdown": "1. a\n\n 2. b\n\n 3. c\n", + "html": "
      \n
    1. \n

      a

      \n
    2. \n
    3. \n

      b

      \n
    4. \n
    5. \n

      c

      \n
    6. \n
    \n", + "example": 311, + "start_line": 5508, + "end_line": 5526, + "section": "Lists" + }, + { + "markdown": "- a\n - b\n - c\n - d\n - e\n", + "html": "
      \n
    • a
    • \n
    • b
    • \n
    • c
    • \n
    • d\n- e
    • \n
    \n", + "example": 312, + "start_line": 5532, + "end_line": 5546, + "section": "Lists" + }, + { + "markdown": "1. a\n\n 2. b\n\n 3. c\n", + "html": "
      \n
    1. \n

      a

      \n
    2. \n
    3. \n

      b

      \n
    4. \n
    \n
    3. c\n
    \n", + "example": 313, + "start_line": 5552, + "end_line": 5569, + "section": "Lists" + }, + { + "markdown": "- a\n- b\n\n- c\n", + "html": "
      \n
    • \n

      a

      \n
    • \n
    • \n

      b

      \n
    • \n
    • \n

      c

      \n
    • \n
    \n", + "example": 314, + "start_line": 5575, + "end_line": 5592, + "section": "Lists" + }, + { + "markdown": "* a\n*\n\n* c\n", + "html": "
      \n
    • \n

      a

      \n
    • \n
    • \n
    • \n

      c

      \n
    • \n
    \n", + "example": 315, + "start_line": 5597, + "end_line": 5612, + "section": "Lists" + }, + { + "markdown": "- a\n- b\n\n c\n- d\n", + "html": "
      \n
    • \n

      a

      \n
    • \n
    • \n

      b

      \n

      c

      \n
    • \n
    • \n

      d

      \n
    • \n
    \n", + "example": 316, + "start_line": 5619, + "end_line": 5638, + "section": "Lists" + }, + { + "markdown": "- a\n- b\n\n [ref]: /url\n- d\n", + "html": "
      \n
    • \n

      a

      \n
    • \n
    • \n

      b

      \n
    • \n
    • \n

      d

      \n
    • \n
    \n", + "example": 317, + "start_line": 5641, + "end_line": 5659, + "section": "Lists" + }, + { + "markdown": "- a\n- ```\n b\n\n\n ```\n- c\n", + "html": "
      \n
    • a
    • \n
    • \n
      b\n\n\n
      \n
    • \n
    • c
    • \n
    \n", + "example": 318, + "start_line": 5664, + "end_line": 5683, + "section": "Lists" + }, + { + "markdown": "- a\n - b\n\n c\n- d\n", + "html": "
      \n
    • a\n
        \n
      • \n

        b

        \n

        c

        \n
      • \n
      \n
    • \n
    • d
    • \n
    \n", + "example": 319, + "start_line": 5690, + "end_line": 5708, + "section": "Lists" + }, + { + "markdown": "* a\n > b\n >\n* c\n", + "html": "
      \n
    • a\n
      \n

      b

      \n
      \n
    • \n
    • c
    • \n
    \n", + "example": 320, + "start_line": 5714, + "end_line": 5728, + "section": "Lists" + }, + { + "markdown": "- a\n > b\n ```\n c\n ```\n- d\n", + "html": "
      \n
    • a\n
      \n

      b

      \n
      \n
      c\n
      \n
    • \n
    • d
    • \n
    \n", + "example": 321, + "start_line": 5734, + "end_line": 5752, + "section": "Lists" + }, + { + "markdown": "- a\n", + "html": "
      \n
    • a
    • \n
    \n", + "example": 322, + "start_line": 5757, + "end_line": 5763, + "section": "Lists" + }, + { + "markdown": "- a\n - b\n", + "html": "
      \n
    • a\n
        \n
      • b
      • \n
      \n
    • \n
    \n", + "example": 323, + "start_line": 5766, + "end_line": 5777, + "section": "Lists" + }, + { + "markdown": "1. ```\n foo\n ```\n\n bar\n", + "html": "
      \n
    1. \n
      foo\n
      \n

      bar

      \n
    2. \n
    \n", + "example": 324, + "start_line": 5783, + "end_line": 5797, + "section": "Lists" + }, + { + "markdown": "* foo\n * bar\n\n baz\n", + "html": "
      \n
    • \n

      foo

      \n
        \n
      • bar
      • \n
      \n

      baz

      \n
    • \n
    \n", + "example": 325, + "start_line": 5802, + "end_line": 5817, + "section": "Lists" + }, + { + "markdown": "- a\n - b\n - c\n\n- d\n - e\n - f\n", + "html": "
      \n
    • \n

      a

      \n
        \n
      • b
      • \n
      • c
      • \n
      \n
    • \n
    • \n

      d

      \n
        \n
      • e
      • \n
      • f
      • \n
      \n
    • \n
    \n", + "example": 326, + "start_line": 5820, + "end_line": 5845, + "section": "Lists" + }, + { + "markdown": "`hi`lo`\n", + "html": "

    hilo`

    \n", + "example": 327, + "start_line": 5854, + "end_line": 5858, + "section": "Inlines" + }, + { + "markdown": "`foo`\n", + "html": "

    foo

    \n", + "example": 328, + "start_line": 5886, + "end_line": 5890, + "section": "Code spans" + }, + { + "markdown": "`` foo ` bar ``\n", + "html": "

    foo ` bar

    \n", + "example": 329, + "start_line": 5897, + "end_line": 5901, + "section": "Code spans" + }, + { + "markdown": "` `` `\n", + "html": "

    ``

    \n", + "example": 330, + "start_line": 5907, + "end_line": 5911, + "section": "Code spans" + }, + { + "markdown": "` `` `\n", + "html": "

    ``

    \n", + "example": 331, + "start_line": 5915, + "end_line": 5919, + "section": "Code spans" + }, + { + "markdown": "` a`\n", + "html": "

    a

    \n", + "example": 332, + "start_line": 5924, + "end_line": 5928, + "section": "Code spans" + }, + { + "markdown": "` b `\n", + "html": "

     b 

    \n", + "example": 333, + "start_line": 5933, + "end_line": 5937, + "section": "Code spans" + }, + { + "markdown": "` `\n` `\n", + "html": "

     \n

    \n", + "example": 334, + "start_line": 5941, + "end_line": 5947, + "section": "Code spans" + }, + { + "markdown": "``\nfoo\nbar \nbaz\n``\n", + "html": "

    foo bar baz

    \n", + "example": 335, + "start_line": 5952, + "end_line": 5960, + "section": "Code spans" + }, + { + "markdown": "``\nfoo \n``\n", + "html": "

    foo

    \n", + "example": 336, + "start_line": 5962, + "end_line": 5968, + "section": "Code spans" + }, + { + "markdown": "`foo bar \nbaz`\n", + "html": "

    foo bar baz

    \n", + "example": 337, + "start_line": 5973, + "end_line": 5978, + "section": "Code spans" + }, + { + "markdown": "`foo\\`bar`\n", + "html": "

    foo\\bar`

    \n", + "example": 338, + "start_line": 5990, + "end_line": 5994, + "section": "Code spans" + }, + { + "markdown": "``foo`bar``\n", + "html": "

    foo`bar

    \n", + "example": 339, + "start_line": 6001, + "end_line": 6005, + "section": "Code spans" + }, + { + "markdown": "` foo `` bar `\n", + "html": "

    foo `` bar

    \n", + "example": 340, + "start_line": 6007, + "end_line": 6011, + "section": "Code spans" + }, + { + "markdown": "*foo`*`\n", + "html": "

    *foo*

    \n", + "example": 341, + "start_line": 6019, + "end_line": 6023, + "section": "Code spans" + }, + { + "markdown": "[not a `link](/foo`)\n", + "html": "

    [not a link](/foo)

    \n", + "example": 342, + "start_line": 6028, + "end_line": 6032, + "section": "Code spans" + }, + { + "markdown": "``\n", + "html": "

    <a href="">`

    \n", + "example": 343, + "start_line": 6038, + "end_line": 6042, + "section": "Code spans" + }, + { + "markdown": "
    `\n", + "html": "

    `

    \n", + "example": 344, + "start_line": 6047, + "end_line": 6051, + "section": "Code spans" + }, + { + "markdown": "``\n", + "html": "

    <https://foo.bar.baz>`

    \n", + "example": 345, + "start_line": 6056, + "end_line": 6060, + "section": "Code spans" + }, + { + "markdown": "`\n", + "html": "

    https://foo.bar.`baz`

    \n", + "example": 346, + "start_line": 6065, + "end_line": 6069, + "section": "Code spans" + }, + { + "markdown": "```foo``\n", + "html": "

    ```foo``

    \n", + "example": 347, + "start_line": 6075, + "end_line": 6079, + "section": "Code spans" + }, + { + "markdown": "`foo\n", + "html": "

    `foo

    \n", + "example": 348, + "start_line": 6082, + "end_line": 6086, + "section": "Code spans" + }, + { + "markdown": "`foo``bar``\n", + "html": "

    `foobar

    \n", + "example": 349, + "start_line": 6091, + "end_line": 6095, + "section": "Code spans" + }, + { + "markdown": "*foo bar*\n", + "html": "

    foo bar

    \n", + "example": 350, + "start_line": 6308, + "end_line": 6312, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "a * foo bar*\n", + "html": "

    a * foo bar*

    \n", + "example": 351, + "start_line": 6318, + "end_line": 6322, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "a*\"foo\"*\n", + "html": "

    a*"foo"*

    \n", + "example": 352, + "start_line": 6329, + "end_line": 6333, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "* a *\n", + "html": "

    * a *

    \n", + "example": 353, + "start_line": 6338, + "end_line": 6342, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*$*alpha.\n\n*£*bravo.\n\n*€*charlie.\n", + "html": "

    *$*alpha.

    \n

    *£*bravo.

    \n

    *€*charlie.

    \n", + "example": 354, + "start_line": 6347, + "end_line": 6357, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo*bar*\n", + "html": "

    foobar

    \n", + "example": 355, + "start_line": 6362, + "end_line": 6366, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "5*6*78\n", + "html": "

    5678

    \n", + "example": 356, + "start_line": 6369, + "end_line": 6373, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_foo bar_\n", + "html": "

    foo bar

    \n", + "example": 357, + "start_line": 6378, + "end_line": 6382, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_ foo bar_\n", + "html": "

    _ foo bar_

    \n", + "example": 358, + "start_line": 6388, + "end_line": 6392, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "a_\"foo\"_\n", + "html": "

    a_"foo"_

    \n", + "example": 359, + "start_line": 6398, + "end_line": 6402, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo_bar_\n", + "html": "

    foo_bar_

    \n", + "example": 360, + "start_line": 6407, + "end_line": 6411, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "5_6_78\n", + "html": "

    5_6_78

    \n", + "example": 361, + "start_line": 6414, + "end_line": 6418, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "пристаням_стремятся_\n", + "html": "

    пристаням_стремятся_

    \n", + "example": 362, + "start_line": 6421, + "end_line": 6425, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "aa_\"bb\"_cc\n", + "html": "

    aa_"bb"_cc

    \n", + "example": 363, + "start_line": 6431, + "end_line": 6435, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo-_(bar)_\n", + "html": "

    foo-(bar)

    \n", + "example": 364, + "start_line": 6442, + "end_line": 6446, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_foo*\n", + "html": "

    _foo*

    \n", + "example": 365, + "start_line": 6454, + "end_line": 6458, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo bar *\n", + "html": "

    *foo bar *

    \n", + "example": 366, + "start_line": 6464, + "end_line": 6468, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo bar\n*\n", + "html": "

    *foo bar\n*

    \n", + "example": 367, + "start_line": 6473, + "end_line": 6479, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*(*foo)\n", + "html": "

    *(*foo)

    \n", + "example": 368, + "start_line": 6486, + "end_line": 6490, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*(*foo*)*\n", + "html": "

    (foo)

    \n", + "example": 369, + "start_line": 6496, + "end_line": 6500, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo*bar\n", + "html": "

    foobar

    \n", + "example": 370, + "start_line": 6505, + "end_line": 6509, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_foo bar _\n", + "html": "

    _foo bar _

    \n", + "example": 371, + "start_line": 6518, + "end_line": 6522, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_(_foo)\n", + "html": "

    _(_foo)

    \n", + "example": 372, + "start_line": 6528, + "end_line": 6532, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_(_foo_)_\n", + "html": "

    (foo)

    \n", + "example": 373, + "start_line": 6537, + "end_line": 6541, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_foo_bar\n", + "html": "

    _foo_bar

    \n", + "example": 374, + "start_line": 6546, + "end_line": 6550, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_пристаням_стремятся\n", + "html": "

    _пристаням_стремятся

    \n", + "example": 375, + "start_line": 6553, + "end_line": 6557, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_foo_bar_baz_\n", + "html": "

    foo_bar_baz

    \n", + "example": 376, + "start_line": 6560, + "end_line": 6564, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_(bar)_.\n", + "html": "

    (bar).

    \n", + "example": 377, + "start_line": 6571, + "end_line": 6575, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**foo bar**\n", + "html": "

    foo bar

    \n", + "example": 378, + "start_line": 6580, + "end_line": 6584, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "** foo bar**\n", + "html": "

    ** foo bar**

    \n", + "example": 379, + "start_line": 6590, + "end_line": 6594, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "a**\"foo\"**\n", + "html": "

    a**"foo"**

    \n", + "example": 380, + "start_line": 6601, + "end_line": 6605, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo**bar**\n", + "html": "

    foobar

    \n", + "example": 381, + "start_line": 6610, + "end_line": 6614, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__foo bar__\n", + "html": "

    foo bar

    \n", + "example": 382, + "start_line": 6619, + "end_line": 6623, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__ foo bar__\n", + "html": "

    __ foo bar__

    \n", + "example": 383, + "start_line": 6629, + "end_line": 6633, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__\nfoo bar__\n", + "html": "

    __\nfoo bar__

    \n", + "example": 384, + "start_line": 6637, + "end_line": 6643, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "a__\"foo\"__\n", + "html": "

    a__"foo"__

    \n", + "example": 385, + "start_line": 6649, + "end_line": 6653, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo__bar__\n", + "html": "

    foo__bar__

    \n", + "example": 386, + "start_line": 6658, + "end_line": 6662, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "5__6__78\n", + "html": "

    5__6__78

    \n", + "example": 387, + "start_line": 6665, + "end_line": 6669, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "пристаням__стремятся__\n", + "html": "

    пристаням__стремятся__

    \n", + "example": 388, + "start_line": 6672, + "end_line": 6676, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__foo, __bar__, baz__\n", + "html": "

    foo, bar, baz

    \n", + "example": 389, + "start_line": 6679, + "end_line": 6683, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo-__(bar)__\n", + "html": "

    foo-(bar)

    \n", + "example": 390, + "start_line": 6690, + "end_line": 6694, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**foo bar **\n", + "html": "

    **foo bar **

    \n", + "example": 391, + "start_line": 6703, + "end_line": 6707, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**(**foo)\n", + "html": "

    **(**foo)

    \n", + "example": 392, + "start_line": 6716, + "end_line": 6720, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*(**foo**)*\n", + "html": "

    (foo)

    \n", + "example": 393, + "start_line": 6726, + "end_line": 6730, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**Gomphocarpus (*Gomphocarpus physocarpus*, syn.\n*Asclepias physocarpa*)**\n", + "html": "

    Gomphocarpus (Gomphocarpus physocarpus, syn.\nAsclepias physocarpa)

    \n", + "example": 394, + "start_line": 6733, + "end_line": 6739, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**foo \"*bar*\" foo**\n", + "html": "

    foo "bar" foo

    \n", + "example": 395, + "start_line": 6742, + "end_line": 6746, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**foo**bar\n", + "html": "

    foobar

    \n", + "example": 396, + "start_line": 6751, + "end_line": 6755, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__foo bar __\n", + "html": "

    __foo bar __

    \n", + "example": 397, + "start_line": 6763, + "end_line": 6767, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__(__foo)\n", + "html": "

    __(__foo)

    \n", + "example": 398, + "start_line": 6773, + "end_line": 6777, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_(__foo__)_\n", + "html": "

    (foo)

    \n", + "example": 399, + "start_line": 6783, + "end_line": 6787, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__foo__bar\n", + "html": "

    __foo__bar

    \n", + "example": 400, + "start_line": 6792, + "end_line": 6796, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__пристаням__стремятся\n", + "html": "

    __пристаням__стремятся

    \n", + "example": 401, + "start_line": 6799, + "end_line": 6803, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__foo__bar__baz__\n", + "html": "

    foo__bar__baz

    \n", + "example": 402, + "start_line": 6806, + "end_line": 6810, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__(bar)__.\n", + "html": "

    (bar).

    \n", + "example": 403, + "start_line": 6817, + "end_line": 6821, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo [bar](/url)*\n", + "html": "

    foo bar

    \n", + "example": 404, + "start_line": 6829, + "end_line": 6833, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo\nbar*\n", + "html": "

    foo\nbar

    \n", + "example": 405, + "start_line": 6836, + "end_line": 6842, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_foo __bar__ baz_\n", + "html": "

    foo bar baz

    \n", + "example": 406, + "start_line": 6848, + "end_line": 6852, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_foo _bar_ baz_\n", + "html": "

    foo bar baz

    \n", + "example": 407, + "start_line": 6855, + "end_line": 6859, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__foo_ bar_\n", + "html": "

    foo bar

    \n", + "example": 408, + "start_line": 6862, + "end_line": 6866, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo *bar**\n", + "html": "

    foo bar

    \n", + "example": 409, + "start_line": 6869, + "end_line": 6873, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo **bar** baz*\n", + "html": "

    foo bar baz

    \n", + "example": 410, + "start_line": 6876, + "end_line": 6880, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo**bar**baz*\n", + "html": "

    foobarbaz

    \n", + "example": 411, + "start_line": 6882, + "end_line": 6886, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo**bar*\n", + "html": "

    foo**bar

    \n", + "example": 412, + "start_line": 6906, + "end_line": 6910, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "***foo** bar*\n", + "html": "

    foo bar

    \n", + "example": 413, + "start_line": 6919, + "end_line": 6923, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo **bar***\n", + "html": "

    foo bar

    \n", + "example": 414, + "start_line": 6926, + "end_line": 6930, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo**bar***\n", + "html": "

    foobar

    \n", + "example": 415, + "start_line": 6933, + "end_line": 6937, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo***bar***baz\n", + "html": "

    foobarbaz

    \n", + "example": 416, + "start_line": 6944, + "end_line": 6948, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo******bar*********baz\n", + "html": "

    foobar***baz

    \n", + "example": 417, + "start_line": 6950, + "end_line": 6954, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo **bar *baz* bim** bop*\n", + "html": "

    foo bar baz bim bop

    \n", + "example": 418, + "start_line": 6959, + "end_line": 6963, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo [*bar*](/url)*\n", + "html": "

    foo bar

    \n", + "example": 419, + "start_line": 6966, + "end_line": 6970, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "** is not an empty emphasis\n", + "html": "

    ** is not an empty emphasis

    \n", + "example": 420, + "start_line": 6975, + "end_line": 6979, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**** is not an empty strong emphasis\n", + "html": "

    **** is not an empty strong emphasis

    \n", + "example": 421, + "start_line": 6982, + "end_line": 6986, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**foo [bar](/url)**\n", + "html": "

    foo bar

    \n", + "example": 422, + "start_line": 6995, + "end_line": 6999, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**foo\nbar**\n", + "html": "

    foo\nbar

    \n", + "example": 423, + "start_line": 7002, + "end_line": 7008, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__foo _bar_ baz__\n", + "html": "

    foo bar baz

    \n", + "example": 424, + "start_line": 7014, + "end_line": 7018, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__foo __bar__ baz__\n", + "html": "

    foo bar baz

    \n", + "example": 425, + "start_line": 7021, + "end_line": 7025, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "____foo__ bar__\n", + "html": "

    foo bar

    \n", + "example": 426, + "start_line": 7028, + "end_line": 7032, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**foo **bar****\n", + "html": "

    foo bar

    \n", + "example": 427, + "start_line": 7035, + "end_line": 7039, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**foo *bar* baz**\n", + "html": "

    foo bar baz

    \n", + "example": 428, + "start_line": 7042, + "end_line": 7046, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**foo*bar*baz**\n", + "html": "

    foobarbaz

    \n", + "example": 429, + "start_line": 7049, + "end_line": 7053, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "***foo* bar**\n", + "html": "

    foo bar

    \n", + "example": 430, + "start_line": 7056, + "end_line": 7060, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**foo *bar***\n", + "html": "

    foo bar

    \n", + "example": 431, + "start_line": 7063, + "end_line": 7067, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**foo *bar **baz**\nbim* bop**\n", + "html": "

    foo bar baz\nbim bop

    \n", + "example": 432, + "start_line": 7072, + "end_line": 7078, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**foo [*bar*](/url)**\n", + "html": "

    foo bar

    \n", + "example": 433, + "start_line": 7081, + "end_line": 7085, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__ is not an empty emphasis\n", + "html": "

    __ is not an empty emphasis

    \n", + "example": 434, + "start_line": 7090, + "end_line": 7094, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "____ is not an empty strong emphasis\n", + "html": "

    ____ is not an empty strong emphasis

    \n", + "example": 435, + "start_line": 7097, + "end_line": 7101, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo ***\n", + "html": "

    foo ***

    \n", + "example": 436, + "start_line": 7107, + "end_line": 7111, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo *\\**\n", + "html": "

    foo *

    \n", + "example": 437, + "start_line": 7114, + "end_line": 7118, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo *_*\n", + "html": "

    foo _

    \n", + "example": 438, + "start_line": 7121, + "end_line": 7125, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo *****\n", + "html": "

    foo *****

    \n", + "example": 439, + "start_line": 7128, + "end_line": 7132, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo **\\***\n", + "html": "

    foo *

    \n", + "example": 440, + "start_line": 7135, + "end_line": 7139, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo **_**\n", + "html": "

    foo _

    \n", + "example": 441, + "start_line": 7142, + "end_line": 7146, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**foo*\n", + "html": "

    *foo

    \n", + "example": 442, + "start_line": 7153, + "end_line": 7157, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo**\n", + "html": "

    foo*

    \n", + "example": 443, + "start_line": 7160, + "end_line": 7164, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "***foo**\n", + "html": "

    *foo

    \n", + "example": 444, + "start_line": 7167, + "end_line": 7171, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "****foo*\n", + "html": "

    ***foo

    \n", + "example": 445, + "start_line": 7174, + "end_line": 7178, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**foo***\n", + "html": "

    foo*

    \n", + "example": 446, + "start_line": 7181, + "end_line": 7185, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo****\n", + "html": "

    foo***

    \n", + "example": 447, + "start_line": 7188, + "end_line": 7192, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo ___\n", + "html": "

    foo ___

    \n", + "example": 448, + "start_line": 7198, + "end_line": 7202, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo _\\__\n", + "html": "

    foo _

    \n", + "example": 449, + "start_line": 7205, + "end_line": 7209, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo _*_\n", + "html": "

    foo *

    \n", + "example": 450, + "start_line": 7212, + "end_line": 7216, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo _____\n", + "html": "

    foo _____

    \n", + "example": 451, + "start_line": 7219, + "end_line": 7223, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo __\\___\n", + "html": "

    foo _

    \n", + "example": 452, + "start_line": 7226, + "end_line": 7230, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "foo __*__\n", + "html": "

    foo *

    \n", + "example": 453, + "start_line": 7233, + "end_line": 7237, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__foo_\n", + "html": "

    _foo

    \n", + "example": 454, + "start_line": 7240, + "end_line": 7244, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_foo__\n", + "html": "

    foo_

    \n", + "example": 455, + "start_line": 7251, + "end_line": 7255, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "___foo__\n", + "html": "

    _foo

    \n", + "example": 456, + "start_line": 7258, + "end_line": 7262, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "____foo_\n", + "html": "

    ___foo

    \n", + "example": 457, + "start_line": 7265, + "end_line": 7269, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__foo___\n", + "html": "

    foo_

    \n", + "example": 458, + "start_line": 7272, + "end_line": 7276, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_foo____\n", + "html": "

    foo___

    \n", + "example": 459, + "start_line": 7279, + "end_line": 7283, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**foo**\n", + "html": "

    foo

    \n", + "example": 460, + "start_line": 7289, + "end_line": 7293, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*_foo_*\n", + "html": "

    foo

    \n", + "example": 461, + "start_line": 7296, + "end_line": 7300, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__foo__\n", + "html": "

    foo

    \n", + "example": 462, + "start_line": 7303, + "end_line": 7307, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_*foo*_\n", + "html": "

    foo

    \n", + "example": 463, + "start_line": 7310, + "end_line": 7314, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "****foo****\n", + "html": "

    foo

    \n", + "example": 464, + "start_line": 7320, + "end_line": 7324, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "____foo____\n", + "html": "

    foo

    \n", + "example": 465, + "start_line": 7327, + "end_line": 7331, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "******foo******\n", + "html": "

    foo

    \n", + "example": 466, + "start_line": 7338, + "end_line": 7342, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "***foo***\n", + "html": "

    foo

    \n", + "example": 467, + "start_line": 7347, + "end_line": 7351, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_____foo_____\n", + "html": "

    foo

    \n", + "example": 468, + "start_line": 7354, + "end_line": 7358, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo _bar* baz_\n", + "html": "

    foo _bar baz_

    \n", + "example": 469, + "start_line": 7363, + "end_line": 7367, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo __bar *baz bim__ bam*\n", + "html": "

    foo bar *baz bim bam

    \n", + "example": 470, + "start_line": 7370, + "end_line": 7374, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**foo **bar baz**\n", + "html": "

    **foo bar baz

    \n", + "example": 471, + "start_line": 7379, + "end_line": 7383, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*foo *bar baz*\n", + "html": "

    *foo bar baz

    \n", + "example": 472, + "start_line": 7386, + "end_line": 7390, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*[bar*](/url)\n", + "html": "

    *bar*

    \n", + "example": 473, + "start_line": 7395, + "end_line": 7399, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_foo [bar_](/url)\n", + "html": "

    _foo bar_

    \n", + "example": 474, + "start_line": 7402, + "end_line": 7406, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*\n", + "html": "

    *

    \n", + "example": 475, + "start_line": 7409, + "end_line": 7413, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**\n", + "html": "

    **

    \n", + "example": 476, + "start_line": 7416, + "end_line": 7420, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__\n", + "html": "

    __

    \n", + "example": 477, + "start_line": 7423, + "end_line": 7427, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "*a `*`*\n", + "html": "

    a *

    \n", + "example": 478, + "start_line": 7430, + "end_line": 7434, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "_a `_`_\n", + "html": "

    a _

    \n", + "example": 479, + "start_line": 7437, + "end_line": 7441, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "**a\n", + "html": "

    **ahttps://foo.bar/?q=**

    \n", + "example": 480, + "start_line": 7444, + "end_line": 7448, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "__a\n", + "html": "

    __ahttps://foo.bar/?q=__

    \n", + "example": 481, + "start_line": 7451, + "end_line": 7455, + "section": "Emphasis and strong emphasis" + }, + { + "markdown": "[link](/uri \"title\")\n", + "html": "

    link

    \n", + "example": 482, + "start_line": 7539, + "end_line": 7543, + "section": "Links" + }, + { + "markdown": "[link](/uri)\n", + "html": "

    link

    \n", + "example": 483, + "start_line": 7549, + "end_line": 7553, + "section": "Links" + }, + { + "markdown": "[](./target.md)\n", + "html": "

    \n", + "example": 484, + "start_line": 7555, + "end_line": 7559, + "section": "Links" + }, + { + "markdown": "[link]()\n", + "html": "

    link

    \n", + "example": 485, + "start_line": 7562, + "end_line": 7566, + "section": "Links" + }, + { + "markdown": "[link](<>)\n", + "html": "

    link

    \n", + "example": 486, + "start_line": 7569, + "end_line": 7573, + "section": "Links" + }, + { + "markdown": "[]()\n", + "html": "

    \n", + "example": 487, + "start_line": 7576, + "end_line": 7580, + "section": "Links" + }, + { + "markdown": "[link](/my uri)\n", + "html": "

    [link](/my uri)

    \n", + "example": 488, + "start_line": 7585, + "end_line": 7589, + "section": "Links" + }, + { + "markdown": "[link](
    )\n", + "html": "

    link

    \n", + "example": 489, + "start_line": 7591, + "end_line": 7595, + "section": "Links" + }, + { + "markdown": "[link](foo\nbar)\n", + "html": "

    [link](foo\nbar)

    \n", + "example": 490, + "start_line": 7600, + "end_line": 7606, + "section": "Links" + }, + { + "markdown": "[link]()\n", + "html": "

    [link]()

    \n", + "example": 491, + "start_line": 7608, + "end_line": 7614, + "section": "Links" + }, + { + "markdown": "[a]()\n", + "html": "

    a

    \n", + "example": 492, + "start_line": 7619, + "end_line": 7623, + "section": "Links" + }, + { + "markdown": "[link]()\n", + "html": "

    [link](<foo>)

    \n", + "example": 493, + "start_line": 7627, + "end_line": 7631, + "section": "Links" + }, + { + "markdown": "[a](\n[a](c)\n", + "html": "

    [a](<b)c\n[a](<b)c>\n[a](c)

    \n", + "example": 494, + "start_line": 7636, + "end_line": 7644, + "section": "Links" + }, + { + "markdown": "[link](\\(foo\\))\n", + "html": "

    link

    \n", + "example": 495, + "start_line": 7648, + "end_line": 7652, + "section": "Links" + }, + { + "markdown": "[link](foo(and(bar)))\n", + "html": "

    link

    \n", + "example": 496, + "start_line": 7657, + "end_line": 7661, + "section": "Links" + }, + { + "markdown": "[link](foo(and(bar))\n", + "html": "

    [link](foo(and(bar))

    \n", + "example": 497, + "start_line": 7666, + "end_line": 7670, + "section": "Links" + }, + { + "markdown": "[link](foo\\(and\\(bar\\))\n", + "html": "

    link

    \n", + "example": 498, + "start_line": 7673, + "end_line": 7677, + "section": "Links" + }, + { + "markdown": "[link]()\n", + "html": "

    link

    \n", + "example": 499, + "start_line": 7680, + "end_line": 7684, + "section": "Links" + }, + { + "markdown": "[link](foo\\)\\:)\n", + "html": "

    link

    \n", + "example": 500, + "start_line": 7690, + "end_line": 7694, + "section": "Links" + }, + { + "markdown": "[link](#fragment)\n\n[link](https://example.com#fragment)\n\n[link](https://example.com?foo=3#frag)\n", + "html": "

    link

    \n

    link

    \n

    link

    \n", + "example": 501, + "start_line": 7699, + "end_line": 7709, + "section": "Links" + }, + { + "markdown": "[link](foo\\bar)\n", + "html": "

    link

    \n", + "example": 502, + "start_line": 7715, + "end_line": 7719, + "section": "Links" + }, + { + "markdown": "[link](foo%20bä)\n", + "html": "

    link

    \n", + "example": 503, + "start_line": 7731, + "end_line": 7735, + "section": "Links" + }, + { + "markdown": "[link](\"title\")\n", + "html": "

    link

    \n", + "example": 504, + "start_line": 7742, + "end_line": 7746, + "section": "Links" + }, + { + "markdown": "[link](/url \"title\")\n[link](/url 'title')\n[link](/url (title))\n", + "html": "

    link\nlink\nlink

    \n", + "example": 505, + "start_line": 7751, + "end_line": 7759, + "section": "Links" + }, + { + "markdown": "[link](/url \"title \\\""\")\n", + "html": "

    link

    \n", + "example": 506, + "start_line": 7765, + "end_line": 7769, + "section": "Links" + }, + { + "markdown": "[link](/url \"title\")\n", + "html": "

    link

    \n", + "example": 507, + "start_line": 7776, + "end_line": 7780, + "section": "Links" + }, + { + "markdown": "[link](/url \"title \"and\" title\")\n", + "html": "

    [link](/url "title "and" title")

    \n", + "example": 508, + "start_line": 7785, + "end_line": 7789, + "section": "Links" + }, + { + "markdown": "[link](/url 'title \"and\" title')\n", + "html": "

    link

    \n", + "example": 509, + "start_line": 7794, + "end_line": 7798, + "section": "Links" + }, + { + "markdown": "[link]( /uri\n \"title\" )\n", + "html": "

    link

    \n", + "example": 510, + "start_line": 7819, + "end_line": 7824, + "section": "Links" + }, + { + "markdown": "[link] (/uri)\n", + "html": "

    [link] (/uri)

    \n", + "example": 511, + "start_line": 7830, + "end_line": 7834, + "section": "Links" + }, + { + "markdown": "[link [foo [bar]]](/uri)\n", + "html": "

    link [foo [bar]]

    \n", + "example": 512, + "start_line": 7840, + "end_line": 7844, + "section": "Links" + }, + { + "markdown": "[link] bar](/uri)\n", + "html": "

    [link] bar](/uri)

    \n", + "example": 513, + "start_line": 7847, + "end_line": 7851, + "section": "Links" + }, + { + "markdown": "[link [bar](/uri)\n", + "html": "

    [link bar

    \n", + "example": 514, + "start_line": 7854, + "end_line": 7858, + "section": "Links" + }, + { + "markdown": "[link \\[bar](/uri)\n", + "html": "

    link [bar

    \n", + "example": 515, + "start_line": 7861, + "end_line": 7865, + "section": "Links" + }, + { + "markdown": "[link *foo **bar** `#`*](/uri)\n", + "html": "

    link foo bar #

    \n", + "example": 516, + "start_line": 7870, + "end_line": 7874, + "section": "Links" + }, + { + "markdown": "[![moon](moon.jpg)](/uri)\n", + "html": "

    \"moon\"

    \n", + "example": 517, + "start_line": 7877, + "end_line": 7881, + "section": "Links" + }, + { + "markdown": "[foo [bar](/uri)](/uri)\n", + "html": "

    [foo bar](/uri)

    \n", + "example": 518, + "start_line": 7886, + "end_line": 7890, + "section": "Links" + }, + { + "markdown": "[foo *[bar [baz](/uri)](/uri)*](/uri)\n", + "html": "

    [foo [bar baz](/uri)](/uri)

    \n", + "example": 519, + "start_line": 7893, + "end_line": 7897, + "section": "Links" + }, + { + "markdown": "![[[foo](uri1)](uri2)](uri3)\n", + "html": "

    \"[foo](uri2)\"

    \n", + "example": 520, + "start_line": 7900, + "end_line": 7904, + "section": "Links" + }, + { + "markdown": "*[foo*](/uri)\n", + "html": "

    *foo*

    \n", + "example": 521, + "start_line": 7910, + "end_line": 7914, + "section": "Links" + }, + { + "markdown": "[foo *bar](baz*)\n", + "html": "

    foo *bar

    \n", + "example": 522, + "start_line": 7917, + "end_line": 7921, + "section": "Links" + }, + { + "markdown": "*foo [bar* baz]\n", + "html": "

    foo [bar baz]

    \n", + "example": 523, + "start_line": 7927, + "end_line": 7931, + "section": "Links" + }, + { + "markdown": "[foo \n", + "html": "

    [foo

    \n", + "example": 524, + "start_line": 7937, + "end_line": 7941, + "section": "Links" + }, + { + "markdown": "[foo`](/uri)`\n", + "html": "

    [foo](/uri)

    \n", + "example": 525, + "start_line": 7944, + "end_line": 7948, + "section": "Links" + }, + { + "markdown": "[foo\n", + "html": "

    [foohttps://example.com/?search=](uri)

    \n", + "example": 526, + "start_line": 7951, + "end_line": 7955, + "section": "Links" + }, + { + "markdown": "[foo][bar]\n\n[bar]: /url \"title\"\n", + "html": "

    foo

    \n", + "example": 527, + "start_line": 7989, + "end_line": 7995, + "section": "Links" + }, + { + "markdown": "[link [foo [bar]]][ref]\n\n[ref]: /uri\n", + "html": "

    link [foo [bar]]

    \n", + "example": 528, + "start_line": 8004, + "end_line": 8010, + "section": "Links" + }, + { + "markdown": "[link \\[bar][ref]\n\n[ref]: /uri\n", + "html": "

    link [bar

    \n", + "example": 529, + "start_line": 8013, + "end_line": 8019, + "section": "Links" + }, + { + "markdown": "[link *foo **bar** `#`*][ref]\n\n[ref]: /uri\n", + "html": "

    link foo bar #

    \n", + "example": 530, + "start_line": 8024, + "end_line": 8030, + "section": "Links" + }, + { + "markdown": "[![moon](moon.jpg)][ref]\n\n[ref]: /uri\n", + "html": "

    \"moon\"

    \n", + "example": 531, + "start_line": 8033, + "end_line": 8039, + "section": "Links" + }, + { + "markdown": "[foo [bar](/uri)][ref]\n\n[ref]: /uri\n", + "html": "

    [foo bar]ref

    \n", + "example": 532, + "start_line": 8044, + "end_line": 8050, + "section": "Links" + }, + { + "markdown": "[foo *bar [baz][ref]*][ref]\n\n[ref]: /uri\n", + "html": "

    [foo bar baz]ref

    \n", + "example": 533, + "start_line": 8053, + "end_line": 8059, + "section": "Links" + }, + { + "markdown": "*[foo*][ref]\n\n[ref]: /uri\n", + "html": "

    *foo*

    \n", + "example": 534, + "start_line": 8068, + "end_line": 8074, + "section": "Links" + }, + { + "markdown": "[foo *bar][ref]*\n\n[ref]: /uri\n", + "html": "

    foo *bar*

    \n", + "example": 535, + "start_line": 8077, + "end_line": 8083, + "section": "Links" + }, + { + "markdown": "[foo \n\n[ref]: /uri\n", + "html": "

    [foo

    \n", + "example": 536, + "start_line": 8089, + "end_line": 8095, + "section": "Links" + }, + { + "markdown": "[foo`][ref]`\n\n[ref]: /uri\n", + "html": "

    [foo][ref]

    \n", + "example": 537, + "start_line": 8098, + "end_line": 8104, + "section": "Links" + }, + { + "markdown": "[foo\n\n[ref]: /uri\n", + "html": "

    [foohttps://example.com/?search=][ref]

    \n", + "example": 538, + "start_line": 8107, + "end_line": 8113, + "section": "Links" + }, + { + "markdown": "[foo][BaR]\n\n[bar]: /url \"title\"\n", + "html": "

    foo

    \n", + "example": 539, + "start_line": 8118, + "end_line": 8124, + "section": "Links" + }, + { + "markdown": "[ẞ]\n\n[SS]: /url\n", + "html": "

    \n", + "example": 540, + "start_line": 8129, + "end_line": 8135, + "section": "Links" + }, + { + "markdown": "[Foo\n bar]: /url\n\n[Baz][Foo bar]\n", + "html": "

    Baz

    \n", + "example": 541, + "start_line": 8141, + "end_line": 8148, + "section": "Links" + }, + { + "markdown": "[foo] [bar]\n\n[bar]: /url \"title\"\n", + "html": "

    [foo] bar

    \n", + "example": 542, + "start_line": 8154, + "end_line": 8160, + "section": "Links" + }, + { + "markdown": "[foo]\n[bar]\n\n[bar]: /url \"title\"\n", + "html": "

    [foo]\nbar

    \n", + "example": 543, + "start_line": 8163, + "end_line": 8171, + "section": "Links" + }, + { + "markdown": "[foo]: /url1\n\n[foo]: /url2\n\n[bar][foo]\n", + "html": "

    bar

    \n", + "example": 544, + "start_line": 8204, + "end_line": 8212, + "section": "Links" + }, + { + "markdown": "[bar][foo\\!]\n\n[foo!]: /url\n", + "html": "

    [bar][foo!]

    \n", + "example": 545, + "start_line": 8219, + "end_line": 8225, + "section": "Links" + }, + { + "markdown": "[foo][ref[]\n\n[ref[]: /uri\n", + "html": "

    [foo][ref[]

    \n

    [ref[]: /uri

    \n", + "example": 546, + "start_line": 8231, + "end_line": 8238, + "section": "Links" + }, + { + "markdown": "[foo][ref[bar]]\n\n[ref[bar]]: /uri\n", + "html": "

    [foo][ref[bar]]

    \n

    [ref[bar]]: /uri

    \n", + "example": 547, + "start_line": 8241, + "end_line": 8248, + "section": "Links" + }, + { + "markdown": "[[[foo]]]\n\n[[[foo]]]: /url\n", + "html": "

    [[[foo]]]

    \n

    [[[foo]]]: /url

    \n", + "example": 548, + "start_line": 8251, + "end_line": 8258, + "section": "Links" + }, + { + "markdown": "[foo][ref\\[]\n\n[ref\\[]: /uri\n", + "html": "

    foo

    \n", + "example": 549, + "start_line": 8261, + "end_line": 8267, + "section": "Links" + }, + { + "markdown": "[bar\\\\]: /uri\n\n[bar\\\\]\n", + "html": "

    bar\\

    \n", + "example": 550, + "start_line": 8272, + "end_line": 8278, + "section": "Links" + }, + { + "markdown": "[]\n\n[]: /uri\n", + "html": "

    []

    \n

    []: /uri

    \n", + "example": 551, + "start_line": 8284, + "end_line": 8291, + "section": "Links" + }, + { + "markdown": "[\n ]\n\n[\n ]: /uri\n", + "html": "

    [\n]

    \n

    [\n]: /uri

    \n", + "example": 552, + "start_line": 8294, + "end_line": 8305, + "section": "Links" + }, + { + "markdown": "[foo][]\n\n[foo]: /url \"title\"\n", + "html": "

    foo

    \n", + "example": 553, + "start_line": 8317, + "end_line": 8323, + "section": "Links" + }, + { + "markdown": "[*foo* bar][]\n\n[*foo* bar]: /url \"title\"\n", + "html": "

    foo bar

    \n", + "example": 554, + "start_line": 8326, + "end_line": 8332, + "section": "Links" + }, + { + "markdown": "[Foo][]\n\n[foo]: /url \"title\"\n", + "html": "

    Foo

    \n", + "example": 555, + "start_line": 8337, + "end_line": 8343, + "section": "Links" + }, + { + "markdown": "[foo] \n[]\n\n[foo]: /url \"title\"\n", + "html": "

    foo\n[]

    \n", + "example": 556, + "start_line": 8350, + "end_line": 8358, + "section": "Links" + }, + { + "markdown": "[foo]\n\n[foo]: /url \"title\"\n", + "html": "

    foo

    \n", + "example": 557, + "start_line": 8370, + "end_line": 8376, + "section": "Links" + }, + { + "markdown": "[*foo* bar]\n\n[*foo* bar]: /url \"title\"\n", + "html": "

    foo bar

    \n", + "example": 558, + "start_line": 8379, + "end_line": 8385, + "section": "Links" + }, + { + "markdown": "[[*foo* bar]]\n\n[*foo* bar]: /url \"title\"\n", + "html": "

    [foo bar]

    \n", + "example": 559, + "start_line": 8388, + "end_line": 8394, + "section": "Links" + }, + { + "markdown": "[[bar [foo]\n\n[foo]: /url\n", + "html": "

    [[bar foo

    \n", + "example": 560, + "start_line": 8397, + "end_line": 8403, + "section": "Links" + }, + { + "markdown": "[Foo]\n\n[foo]: /url \"title\"\n", + "html": "

    Foo

    \n", + "example": 561, + "start_line": 8408, + "end_line": 8414, + "section": "Links" + }, + { + "markdown": "[foo] bar\n\n[foo]: /url\n", + "html": "

    foo bar

    \n", + "example": 562, + "start_line": 8419, + "end_line": 8425, + "section": "Links" + }, + { + "markdown": "\\[foo]\n\n[foo]: /url \"title\"\n", + "html": "

    [foo]

    \n", + "example": 563, + "start_line": 8431, + "end_line": 8437, + "section": "Links" + }, + { + "markdown": "[foo*]: /url\n\n*[foo*]\n", + "html": "

    *foo*

    \n", + "example": 564, + "start_line": 8443, + "end_line": 8449, + "section": "Links" + }, + { + "markdown": "[foo][bar]\n\n[foo]: /url1\n[bar]: /url2\n", + "html": "

    foo

    \n", + "example": 565, + "start_line": 8455, + "end_line": 8462, + "section": "Links" + }, + { + "markdown": "[foo][]\n\n[foo]: /url1\n", + "html": "

    foo

    \n", + "example": 566, + "start_line": 8464, + "end_line": 8470, + "section": "Links" + }, + { + "markdown": "[foo]()\n\n[foo]: /url1\n", + "html": "

    foo

    \n", + "example": 567, + "start_line": 8474, + "end_line": 8480, + "section": "Links" + }, + { + "markdown": "[foo](not a link)\n\n[foo]: /url1\n", + "html": "

    foo(not a link)

    \n", + "example": 568, + "start_line": 8482, + "end_line": 8488, + "section": "Links" + }, + { + "markdown": "[foo][bar][baz]\n\n[baz]: /url\n", + "html": "

    [foo]bar

    \n", + "example": 569, + "start_line": 8493, + "end_line": 8499, + "section": "Links" + }, + { + "markdown": "[foo][bar][baz]\n\n[baz]: /url1\n[bar]: /url2\n", + "html": "

    foobaz

    \n", + "example": 570, + "start_line": 8505, + "end_line": 8512, + "section": "Links" + }, + { + "markdown": "[foo][bar][baz]\n\n[baz]: /url1\n[foo]: /url2\n", + "html": "

    [foo]bar

    \n", + "example": 571, + "start_line": 8518, + "end_line": 8525, + "section": "Links" + }, + { + "markdown": "![foo](/url \"title\")\n", + "html": "

    \"foo\"

    \n", + "example": 572, + "start_line": 8541, + "end_line": 8545, + "section": "Images" + }, + { + "markdown": "![foo *bar*]\n\n[foo *bar*]: train.jpg \"train & tracks\"\n", + "html": "

    \"foo

    \n", + "example": 573, + "start_line": 8548, + "end_line": 8554, + "section": "Images" + }, + { + "markdown": "![foo ![bar](/url)](/url2)\n", + "html": "

    \"foo

    \n", + "example": 574, + "start_line": 8557, + "end_line": 8561, + "section": "Images" + }, + { + "markdown": "![foo [bar](/url)](/url2)\n", + "html": "

    \"foo

    \n", + "example": 575, + "start_line": 8564, + "end_line": 8568, + "section": "Images" + }, + { + "markdown": "![foo *bar*][]\n\n[foo *bar*]: train.jpg \"train & tracks\"\n", + "html": "

    \"foo

    \n", + "example": 576, + "start_line": 8578, + "end_line": 8584, + "section": "Images" + }, + { + "markdown": "![foo *bar*][foobar]\n\n[FOOBAR]: train.jpg \"train & tracks\"\n", + "html": "

    \"foo

    \n", + "example": 577, + "start_line": 8587, + "end_line": 8593, + "section": "Images" + }, + { + "markdown": "![foo](train.jpg)\n", + "html": "

    \"foo\"

    \n", + "example": 578, + "start_line": 8596, + "end_line": 8600, + "section": "Images" + }, + { + "markdown": "My ![foo bar](/path/to/train.jpg \"title\" )\n", + "html": "

    My \"foo

    \n", + "example": 579, + "start_line": 8603, + "end_line": 8607, + "section": "Images" + }, + { + "markdown": "![foo]()\n", + "html": "

    \"foo\"

    \n", + "example": 580, + "start_line": 8610, + "end_line": 8614, + "section": "Images" + }, + { + "markdown": "![](/url)\n", + "html": "

    \"\"

    \n", + "example": 581, + "start_line": 8617, + "end_line": 8621, + "section": "Images" + }, + { + "markdown": "![foo][bar]\n\n[bar]: /url\n", + "html": "

    \"foo\"

    \n", + "example": 582, + "start_line": 8626, + "end_line": 8632, + "section": "Images" + }, + { + "markdown": "![foo][bar]\n\n[BAR]: /url\n", + "html": "

    \"foo\"

    \n", + "example": 583, + "start_line": 8635, + "end_line": 8641, + "section": "Images" + }, + { + "markdown": "![foo][]\n\n[foo]: /url \"title\"\n", + "html": "

    \"foo\"

    \n", + "example": 584, + "start_line": 8646, + "end_line": 8652, + "section": "Images" + }, + { + "markdown": "![*foo* bar][]\n\n[*foo* bar]: /url \"title\"\n", + "html": "

    \"foo

    \n", + "example": 585, + "start_line": 8655, + "end_line": 8661, + "section": "Images" + }, + { + "markdown": "![Foo][]\n\n[foo]: /url \"title\"\n", + "html": "

    \"Foo\"

    \n", + "example": 586, + "start_line": 8666, + "end_line": 8672, + "section": "Images" + }, + { + "markdown": "![foo] \n[]\n\n[foo]: /url \"title\"\n", + "html": "

    \"foo\"\n[]

    \n", + "example": 587, + "start_line": 8678, + "end_line": 8686, + "section": "Images" + }, + { + "markdown": "![foo]\n\n[foo]: /url \"title\"\n", + "html": "

    \"foo\"

    \n", + "example": 588, + "start_line": 8691, + "end_line": 8697, + "section": "Images" + }, + { + "markdown": "![*foo* bar]\n\n[*foo* bar]: /url \"title\"\n", + "html": "

    \"foo

    \n", + "example": 589, + "start_line": 8700, + "end_line": 8706, + "section": "Images" + }, + { + "markdown": "![[foo]]\n\n[[foo]]: /url \"title\"\n", + "html": "

    ![[foo]]

    \n

    [[foo]]: /url "title"

    \n", + "example": 590, + "start_line": 8711, + "end_line": 8718, + "section": "Images" + }, + { + "markdown": "![Foo]\n\n[foo]: /url \"title\"\n", + "html": "

    \"Foo\"

    \n", + "example": 591, + "start_line": 8723, + "end_line": 8729, + "section": "Images" + }, + { + "markdown": "!\\[foo]\n\n[foo]: /url \"title\"\n", + "html": "

    ![foo]

    \n", + "example": 592, + "start_line": 8735, + "end_line": 8741, + "section": "Images" + }, + { + "markdown": "\\![foo]\n\n[foo]: /url \"title\"\n", + "html": "

    !foo

    \n", + "example": 593, + "start_line": 8747, + "end_line": 8753, + "section": "Images" + }, + { + "markdown": "\n", + "html": "

    http://foo.bar.baz

    \n", + "example": 594, + "start_line": 8780, + "end_line": 8784, + "section": "Autolinks" + }, + { + "markdown": "\n", + "html": "

    https://foo.bar.baz/test?q=hello&id=22&boolean

    \n", + "example": 595, + "start_line": 8787, + "end_line": 8791, + "section": "Autolinks" + }, + { + "markdown": "\n", + "html": "

    irc://foo.bar:2233/baz

    \n", + "example": 596, + "start_line": 8794, + "end_line": 8798, + "section": "Autolinks" + }, + { + "markdown": "\n", + "html": "

    MAILTO:FOO@BAR.BAZ

    \n", + "example": 597, + "start_line": 8803, + "end_line": 8807, + "section": "Autolinks" + }, + { + "markdown": "\n", + "html": "

    a+b+c:d

    \n", + "example": 598, + "start_line": 8815, + "end_line": 8819, + "section": "Autolinks" + }, + { + "markdown": "\n", + "html": "

    made-up-scheme://foo,bar

    \n", + "example": 599, + "start_line": 8822, + "end_line": 8826, + "section": "Autolinks" + }, + { + "markdown": "\n", + "html": "

    https://../

    \n", + "example": 600, + "start_line": 8829, + "end_line": 8833, + "section": "Autolinks" + }, + { + "markdown": "\n", + "html": "

    localhost:5001/foo

    \n", + "example": 601, + "start_line": 8836, + "end_line": 8840, + "section": "Autolinks" + }, + { + "markdown": "\n", + "html": "

    <https://foo.bar/baz bim>

    \n", + "example": 602, + "start_line": 8845, + "end_line": 8849, + "section": "Autolinks" + }, + { + "markdown": "\n", + "html": "

    https://example.com/\\[\\

    \n", + "example": 603, + "start_line": 8854, + "end_line": 8858, + "section": "Autolinks" + }, + { + "markdown": "\n", + "html": "

    foo@bar.example.com

    \n", + "example": 604, + "start_line": 8876, + "end_line": 8880, + "section": "Autolinks" + }, + { + "markdown": "\n", + "html": "

    foo+special@Bar.baz-bar0.com

    \n", + "example": 605, + "start_line": 8883, + "end_line": 8887, + "section": "Autolinks" + }, + { + "markdown": "\n", + "html": "

    <foo+@bar.example.com>

    \n", + "example": 606, + "start_line": 8892, + "end_line": 8896, + "section": "Autolinks" + }, + { + "markdown": "<>\n", + "html": "

    <>

    \n", + "example": 607, + "start_line": 8901, + "end_line": 8905, + "section": "Autolinks" + }, + { + "markdown": "< https://foo.bar >\n", + "html": "

    < https://foo.bar >

    \n", + "example": 608, + "start_line": 8908, + "end_line": 8912, + "section": "Autolinks" + }, + { + "markdown": "\n", + "html": "

    <m:abc>

    \n", + "example": 609, + "start_line": 8915, + "end_line": 8919, + "section": "Autolinks" + }, + { + "markdown": "\n", + "html": "

    <foo.bar.baz>

    \n", + "example": 610, + "start_line": 8922, + "end_line": 8926, + "section": "Autolinks" + }, + { + "markdown": "https://example.com\n", + "html": "

    https://example.com

    \n", + "example": 611, + "start_line": 8929, + "end_line": 8933, + "section": "Autolinks" + }, + { + "markdown": "foo@bar.example.com\n", + "html": "

    foo@bar.example.com

    \n", + "example": 612, + "start_line": 8936, + "end_line": 8940, + "section": "Autolinks" + }, + { + "markdown": "\n", + "html": "

    \n", + "example": 613, + "start_line": 9016, + "end_line": 9020, + "section": "Raw HTML" + }, + { + "markdown": "\n", + "html": "

    \n", + "example": 614, + "start_line": 9025, + "end_line": 9029, + "section": "Raw HTML" + }, + { + "markdown": "\n", + "html": "

    \n", + "example": 615, + "start_line": 9034, + "end_line": 9040, + "section": "Raw HTML" + }, + { + "markdown": "\n", + "html": "

    \n", + "example": 616, + "start_line": 9045, + "end_line": 9051, + "section": "Raw HTML" + }, + { + "markdown": "Foo \n", + "html": "

    Foo

    \n", + "example": 617, + "start_line": 9056, + "end_line": 9060, + "section": "Raw HTML" + }, + { + "markdown": "<33> <__>\n", + "html": "

    <33> <__>

    \n", + "example": 618, + "start_line": 9065, + "end_line": 9069, + "section": "Raw HTML" + }, + { + "markdown": "
    \n", + "html": "

    <a h*#ref="hi">

    \n", + "example": 619, + "start_line": 9074, + "end_line": 9078, + "section": "Raw HTML" + }, + { + "markdown": "
    \n", + "html": "

    <a href="hi'> <a href=hi'>

    \n", + "example": 620, + "start_line": 9083, + "end_line": 9087, + "section": "Raw HTML" + }, + { + "markdown": "< a><\nfoo>\n\n", + "html": "

    < a><\nfoo><bar/ >\n<foo bar=baz\nbim!bop />

    \n", + "example": 621, + "start_line": 9092, + "end_line": 9102, + "section": "Raw HTML" + }, + { + "markdown": "
    \n", + "html": "

    <a href='bar'title=title>

    \n", + "example": 622, + "start_line": 9107, + "end_line": 9111, + "section": "Raw HTML" + }, + { + "markdown": "
    \n", + "html": "

    \n", + "example": 623, + "start_line": 9116, + "end_line": 9120, + "section": "Raw HTML" + }, + { + "markdown": "\n", + "html": "

    </a href="foo">

    \n", + "example": 624, + "start_line": 9125, + "end_line": 9129, + "section": "Raw HTML" + }, + { + "markdown": "foo \n", + "html": "

    foo

    \n", + "example": 625, + "start_line": 9134, + "end_line": 9140, + "section": "Raw HTML" + }, + { + "markdown": "foo foo -->\n\nfoo foo -->\n", + "html": "

    foo foo -->

    \n

    foo foo -->

    \n", + "example": 626, + "start_line": 9142, + "end_line": 9149, + "section": "Raw HTML" + }, + { + "markdown": "foo \n", + "html": "

    foo

    \n", + "example": 627, + "start_line": 9154, + "end_line": 9158, + "section": "Raw HTML" + }, + { + "markdown": "foo \n", + "html": "

    foo

    \n", + "example": 628, + "start_line": 9163, + "end_line": 9167, + "section": "Raw HTML" + }, + { + "markdown": "foo &<]]>\n", + "html": "

    foo &<]]>

    \n", + "example": 629, + "start_line": 9172, + "end_line": 9176, + "section": "Raw HTML" + }, + { + "markdown": "foo \n", + "html": "

    foo

    \n", + "example": 630, + "start_line": 9182, + "end_line": 9186, + "section": "Raw HTML" + }, + { + "markdown": "foo \n", + "html": "

    foo

    \n", + "example": 631, + "start_line": 9191, + "end_line": 9195, + "section": "Raw HTML" + }, + { + "markdown": "\n", + "html": "

    <a href=""">

    \n", + "example": 632, + "start_line": 9198, + "end_line": 9202, + "section": "Raw HTML" + }, + { + "markdown": "foo \nbaz\n", + "html": "

    foo
    \nbaz

    \n", + "example": 633, + "start_line": 9212, + "end_line": 9218, + "section": "Hard line breaks" + }, + { + "markdown": "foo\\\nbaz\n", + "html": "

    foo
    \nbaz

    \n", + "example": 634, + "start_line": 9224, + "end_line": 9230, + "section": "Hard line breaks" + }, + { + "markdown": "foo \nbaz\n", + "html": "

    foo
    \nbaz

    \n", + "example": 635, + "start_line": 9235, + "end_line": 9241, + "section": "Hard line breaks" + }, + { + "markdown": "foo \n bar\n", + "html": "

    foo
    \nbar

    \n", + "example": 636, + "start_line": 9246, + "end_line": 9252, + "section": "Hard line breaks" + }, + { + "markdown": "foo\\\n bar\n", + "html": "

    foo
    \nbar

    \n", + "example": 637, + "start_line": 9255, + "end_line": 9261, + "section": "Hard line breaks" + }, + { + "markdown": "*foo \nbar*\n", + "html": "

    foo
    \nbar

    \n", + "example": 638, + "start_line": 9267, + "end_line": 9273, + "section": "Hard line breaks" + }, + { + "markdown": "*foo\\\nbar*\n", + "html": "

    foo
    \nbar

    \n", + "example": 639, + "start_line": 9276, + "end_line": 9282, + "section": "Hard line breaks" + }, + { + "markdown": "`code \nspan`\n", + "html": "

    code span

    \n", + "example": 640, + "start_line": 9287, + "end_line": 9292, + "section": "Hard line breaks" + }, + { + "markdown": "`code\\\nspan`\n", + "html": "

    code\\ span

    \n", + "example": 641, + "start_line": 9295, + "end_line": 9300, + "section": "Hard line breaks" + }, + { + "markdown": "
    \n", + "html": "

    \n", + "example": 642, + "start_line": 9305, + "end_line": 9311, + "section": "Hard line breaks" + }, + { + "markdown": "\n", + "html": "

    \n", + "example": 643, + "start_line": 9314, + "end_line": 9320, + "section": "Hard line breaks" + }, + { + "markdown": "foo\\\n", + "html": "

    foo\\

    \n", + "example": 644, + "start_line": 9327, + "end_line": 9331, + "section": "Hard line breaks" + }, + { + "markdown": "foo \n", + "html": "

    foo

    \n", + "example": 645, + "start_line": 9334, + "end_line": 9338, + "section": "Hard line breaks" + }, + { + "markdown": "### foo\\\n", + "html": "

    foo\\

    \n", + "example": 646, + "start_line": 9341, + "end_line": 9345, + "section": "Hard line breaks" + }, + { + "markdown": "### foo \n", + "html": "

    foo

    \n", + "example": 647, + "start_line": 9348, + "end_line": 9352, + "section": "Hard line breaks" + }, + { + "markdown": "foo\nbaz\n", + "html": "

    foo\nbaz

    \n", + "example": 648, + "start_line": 9363, + "end_line": 9369, + "section": "Soft line breaks" + }, + { + "markdown": "foo \n baz\n", + "html": "

    foo\nbaz

    \n", + "example": 649, + "start_line": 9375, + "end_line": 9381, + "section": "Soft line breaks" + }, + { + "markdown": "hello $.;'there\n", + "html": "

    hello $.;'there

    \n", + "example": 650, + "start_line": 9395, + "end_line": 9399, + "section": "Textual content" + }, + { + "markdown": "Foo χρῆν\n", + "html": "

    Foo χρῆν

    \n", + "example": 651, + "start_line": 9402, + "end_line": 9406, + "section": "Textual content" + }, + { + "markdown": "Multiple spaces\n", + "html": "

    Multiple spaces

    \n", + "example": 652, + "start_line": 9411, + "end_line": 9415, + "section": "Textual content" + } +] \ No newline at end of file diff --git a/crates/biome_markdown_parser/tests/spec_test.rs b/crates/biome_markdown_parser/tests/spec_test.rs index dee024b5e7e8..195cd528609f 100644 --- a/crates/biome_markdown_parser/tests/spec_test.rs +++ b/crates/biome_markdown_parser/tests/spec_test.rs @@ -132,11 +132,9 @@ pub fn run(test_case: &str, _snapshot_name: &str, test_directory: &str, outcome_ }); } -#[ignore] #[test] pub fn quick_test() { - let code = r#" -your test code + let code = r#"**bold *and italic* text** "#; let root = parse_markdown(code); diff --git a/crates/biome_markdown_syntax/src/generated/kind.rs b/crates/biome_markdown_syntax/src/generated/kind.rs index a43d2ee90ad1..67fe64647641 100644 --- a/crates/biome_markdown_syntax/src/generated/kind.rs +++ b/crates/biome_markdown_syntax/src/generated/kind.rs @@ -23,15 +23,18 @@ pub enum MarkdownSyntaxKind { BANG, MINUS, STAR, + PLUS, DOUBLE_STAR, BACKTICK, TRIPLE_BACKTICK, TILDE, + TRIPLE_TILDE, WHITESPACE3, UNDERSCORE, DOUBLE_UNDERSCORE, HASH, COMMA, + COLON, NULL_KW, MD_HARD_LINE_LITERAL, MD_SOFT_BREAK_LITERAL, @@ -39,13 +42,17 @@ pub enum MarkdownSyntaxKind { MD_STRING_LITERAL, MD_INDENT_CHUNK_LITERAL, MD_THEMATIC_BREAK_LITERAL, + MD_SETEXT_UNDERLINE_LITERAL, + MD_ORDERED_LIST_MARKER, MD_ERROR_LITERAL, + MD_ENTITY_LITERAL, ERROR_TOKEN, NEWLINE, WHITESPACE, TAB, BOGUS, MD_BOGUS, + MD_BOGUS_BULLET, MD_DOCUMENT, MD_BLOCK_LIST, MD_HASH_LIST, @@ -56,11 +63,14 @@ pub enum MarkdownSyntaxKind { MD_CODE_NAME_LIST, MD_HTML_BLOCK, MD_LINK_BLOCK, + MD_LINK_REFERENCE_DEFINITION, + MD_LINK_LABEL, + MD_LINK_DESTINATION, + MD_LINK_TITLE, MD_QUOTE, - MD_ORDER_LIST_ITEM, + MD_ORDERED_LIST_ITEM, MD_BULLET_LIST_ITEM, MD_BULLET_LIST, - MD_ORDER_LIST, MD_PARAGRAPH, MD_INLINE_ITEM_LIST, MD_INLINE_EMPHASIS, @@ -69,6 +79,12 @@ pub enum MarkdownSyntaxKind { MD_BULLET, MD_INLINE_LINK, MD_INLINE_IMAGE, + MD_REFERENCE_LINK, + MD_REFERENCE_IMAGE, + MD_REFERENCE_LINK_LABEL, + MD_AUTOLINK, + MD_INLINE_HTML, + MD_ENTITY_REFERENCE, MD_INLINE_IMAGE_ALT, MD_INDENTED_CODE_LINE, MD_INLINE_IMAGE_LINK, @@ -81,6 +97,7 @@ pub enum MarkdownSyntaxKind { MD_STRING, MD_INDENT, MD_THEMATIC_BREAK_BLOCK, + MD_NEWLINE, #[doc(hidden)] __LAST, } @@ -100,15 +117,18 @@ impl MarkdownSyntaxKind { | BANG | MINUS | STAR + | PLUS | DOUBLE_STAR | BACKTICK | TRIPLE_BACKTICK | TILDE + | TRIPLE_TILDE | WHITESPACE3 | UNDERSCORE | DOUBLE_UNDERSCORE | HASH | COMMA + | COLON ) } pub const fn is_literal(self) -> bool { @@ -120,7 +140,10 @@ impl MarkdownSyntaxKind { | MD_STRING_LITERAL | MD_INDENT_CHUNK_LITERAL | MD_THEMATIC_BREAK_LITERAL + | MD_SETEXT_UNDERLINE_LITERAL + | MD_ORDERED_LIST_MARKER | MD_ERROR_LITERAL + | MD_ENTITY_LITERAL ) } pub const fn is_list(self) -> bool { @@ -130,7 +153,6 @@ impl MarkdownSyntaxKind { | MD_HASH_LIST | MD_CODE_NAME_LIST | MD_BULLET_LIST - | MD_ORDER_LIST | MD_INLINE_ITEM_LIST | MD_INDENTED_CODE_LINE_LIST ) @@ -155,15 +177,18 @@ impl MarkdownSyntaxKind { BANG => "!", MINUS => "-", STAR => "*", + PLUS => "+", DOUBLE_STAR => "**", BACKTICK => "`", TRIPLE_BACKTICK => "```", TILDE => "~", + TRIPLE_TILDE => "~~~", WHITESPACE3 => " ", UNDERSCORE => "_", DOUBLE_UNDERSCORE => "__", HASH => "#", COMMA => ",", + COLON => ":", NULL_KW => "null", EOF => "EOF", _ => return None, @@ -173,4 +198,4 @@ impl MarkdownSyntaxKind { } #[doc = r" Utility macro for creating a SyntaxKind through simple macro syntax"] #[macro_export] -macro_rules ! T { [<] => { $ crate :: MarkdownSyntaxKind :: L_ANGLE } ; [>] => { $ crate :: MarkdownSyntaxKind :: R_ANGLE } ; ['('] => { $ crate :: MarkdownSyntaxKind :: L_PAREN } ; [')'] => { $ crate :: MarkdownSyntaxKind :: R_PAREN } ; ['['] => { $ crate :: MarkdownSyntaxKind :: L_BRACK } ; [']'] => { $ crate :: MarkdownSyntaxKind :: R_BRACK } ; [/] => { $ crate :: MarkdownSyntaxKind :: SLASH } ; [=] => { $ crate :: MarkdownSyntaxKind :: EQ } ; [!] => { $ crate :: MarkdownSyntaxKind :: BANG } ; [-] => { $ crate :: MarkdownSyntaxKind :: MINUS } ; [*] => { $ crate :: MarkdownSyntaxKind :: STAR } ; [**] => { $ crate :: MarkdownSyntaxKind :: DOUBLE_STAR } ; ['`'] => { $ crate :: MarkdownSyntaxKind :: BACKTICK } ; ["```"] => { $ crate :: MarkdownSyntaxKind :: TRIPLE_BACKTICK } ; [~] => { $ crate :: MarkdownSyntaxKind :: TILDE } ; [" "] => { $ crate :: MarkdownSyntaxKind :: WHITESPACE3 } ; ["_"] => { $ crate :: MarkdownSyntaxKind :: UNDERSCORE } ; ["__"] => { $ crate :: MarkdownSyntaxKind :: DOUBLE_UNDERSCORE } ; [#] => { $ crate :: MarkdownSyntaxKind :: HASH } ; [,] => { $ crate :: MarkdownSyntaxKind :: COMMA } ; [null] => { $ crate :: MarkdownSyntaxKind :: NULL_KW } ; [ident] => { $ crate :: MarkdownSyntaxKind :: IDENT } ; [EOF] => { $ crate :: MarkdownSyntaxKind :: EOF } ; [UNICODE_BOM] => { $ crate :: MarkdownSyntaxKind :: UNICODE_BOM } ; [#] => { $ crate :: MarkdownSyntaxKind :: HASH } ; } +macro_rules ! T { [<] => { $ crate :: MarkdownSyntaxKind :: L_ANGLE } ; [>] => { $ crate :: MarkdownSyntaxKind :: R_ANGLE } ; ['('] => { $ crate :: MarkdownSyntaxKind :: L_PAREN } ; [')'] => { $ crate :: MarkdownSyntaxKind :: R_PAREN } ; ['['] => { $ crate :: MarkdownSyntaxKind :: L_BRACK } ; [']'] => { $ crate :: MarkdownSyntaxKind :: R_BRACK } ; [/] => { $ crate :: MarkdownSyntaxKind :: SLASH } ; [=] => { $ crate :: MarkdownSyntaxKind :: EQ } ; [!] => { $ crate :: MarkdownSyntaxKind :: BANG } ; [-] => { $ crate :: MarkdownSyntaxKind :: MINUS } ; [*] => { $ crate :: MarkdownSyntaxKind :: STAR } ; [+] => { $ crate :: MarkdownSyntaxKind :: PLUS } ; [**] => { $ crate :: MarkdownSyntaxKind :: DOUBLE_STAR } ; ['`'] => { $ crate :: MarkdownSyntaxKind :: BACKTICK } ; ["```"] => { $ crate :: MarkdownSyntaxKind :: TRIPLE_BACKTICK } ; [~] => { $ crate :: MarkdownSyntaxKind :: TILDE } ; [~~~] => { $ crate :: MarkdownSyntaxKind :: TRIPLE_TILDE } ; [" "] => { $ crate :: MarkdownSyntaxKind :: WHITESPACE3 } ; ["_"] => { $ crate :: MarkdownSyntaxKind :: UNDERSCORE } ; ["__"] => { $ crate :: MarkdownSyntaxKind :: DOUBLE_UNDERSCORE } ; [#] => { $ crate :: MarkdownSyntaxKind :: HASH } ; [,] => { $ crate :: MarkdownSyntaxKind :: COMMA } ; [:] => { $ crate :: MarkdownSyntaxKind :: COLON } ; [null] => { $ crate :: MarkdownSyntaxKind :: NULL_KW } ; [ident] => { $ crate :: MarkdownSyntaxKind :: IDENT } ; [EOF] => { $ crate :: MarkdownSyntaxKind :: EOF } ; [UNICODE_BOM] => { $ crate :: MarkdownSyntaxKind :: UNICODE_BOM } ; [#] => { $ crate :: MarkdownSyntaxKind :: HASH } ; } diff --git a/crates/biome_markdown_syntax/src/generated/macros.rs b/crates/biome_markdown_syntax/src/generated/macros.rs index 834bc5073bde..e10554278277 100644 --- a/crates/biome_markdown_syntax/src/generated/macros.rs +++ b/crates/biome_markdown_syntax/src/generated/macros.rs @@ -16,6 +16,10 @@ macro_rules! map_syntax_node { ($ node : expr , $ pattern : pat => $ body : expr) => { match $node { node => match $crate::MarkdownSyntaxNode::kind(&node) { + $crate::MarkdownSyntaxKind::MD_AUTOLINK => { + let $pattern = unsafe { $crate::MdAutolink::new_unchecked(node) }; + $body + } $crate::MarkdownSyntaxKind::MD_BULLET => { let $pattern = unsafe { $crate::MdBullet::new_unchecked(node) }; $body @@ -28,6 +32,10 @@ macro_rules! map_syntax_node { let $pattern = unsafe { $crate::MdDocument::new_unchecked(node) }; $body } + $crate::MarkdownSyntaxKind::MD_ENTITY_REFERENCE => { + let $pattern = unsafe { $crate::MdEntityReference::new_unchecked(node) }; + $body + } $crate::MarkdownSyntaxKind::MD_FENCED_CODE_BLOCK => { let $pattern = unsafe { $crate::MdFencedCodeBlock::new_unchecked(node) }; $body @@ -56,10 +64,6 @@ macro_rules! map_syntax_node { let $pattern = unsafe { $crate::MdIndentCodeBlock::new_unchecked(node) }; $body } - $crate::MarkdownSyntaxKind::MD_INDENTED_CODE_LINE => { - let $pattern = unsafe { $crate::MdIndentedCodeLine::new_unchecked(node) }; - $body - } $crate::MarkdownSyntaxKind::MD_INLINE_CODE => { let $pattern = unsafe { $crate::MdInlineCode::new_unchecked(node) }; $body @@ -68,20 +72,12 @@ macro_rules! map_syntax_node { let $pattern = unsafe { $crate::MdInlineEmphasis::new_unchecked(node) }; $body } - $crate::MarkdownSyntaxKind::MD_INLINE_IMAGE => { - let $pattern = unsafe { $crate::MdInlineImage::new_unchecked(node) }; + $crate::MarkdownSyntaxKind::MD_INLINE_HTML => { + let $pattern = unsafe { $crate::MdInlineHtml::new_unchecked(node) }; $body } - $crate::MarkdownSyntaxKind::MD_INLINE_IMAGE_ALT => { - let $pattern = unsafe { $crate::MdInlineImageAlt::new_unchecked(node) }; - $body - } - $crate::MarkdownSyntaxKind::MD_INLINE_IMAGE_LINK => { - let $pattern = unsafe { $crate::MdInlineImageLink::new_unchecked(node) }; - $body - } - $crate::MarkdownSyntaxKind::MD_INLINE_IMAGE_SOURCE => { - let $pattern = unsafe { $crate::MdInlineImageSource::new_unchecked(node) }; + $crate::MarkdownSyntaxKind::MD_INLINE_IMAGE => { + let $pattern = unsafe { $crate::MdInlineImage::new_unchecked(node) }; $body } $crate::MarkdownSyntaxKind::MD_INLINE_ITALIC => { @@ -96,8 +92,29 @@ macro_rules! map_syntax_node { let $pattern = unsafe { $crate::MdLinkBlock::new_unchecked(node) }; $body } - $crate::MarkdownSyntaxKind::MD_ORDER_LIST_ITEM => { - let $pattern = unsafe { $crate::MdOrderListItem::new_unchecked(node) }; + $crate::MarkdownSyntaxKind::MD_LINK_DESTINATION => { + let $pattern = unsafe { $crate::MdLinkDestination::new_unchecked(node) }; + $body + } + $crate::MarkdownSyntaxKind::MD_LINK_LABEL => { + let $pattern = unsafe { $crate::MdLinkLabel::new_unchecked(node) }; + $body + } + $crate::MarkdownSyntaxKind::MD_LINK_REFERENCE_DEFINITION => { + let $pattern = + unsafe { $crate::MdLinkReferenceDefinition::new_unchecked(node) }; + $body + } + $crate::MarkdownSyntaxKind::MD_LINK_TITLE => { + let $pattern = unsafe { $crate::MdLinkTitle::new_unchecked(node) }; + $body + } + $crate::MarkdownSyntaxKind::MD_NEWLINE => { + let $pattern = unsafe { $crate::MdNewline::new_unchecked(node) }; + $body + } + $crate::MarkdownSyntaxKind::MD_ORDERED_LIST_ITEM => { + let $pattern = unsafe { $crate::MdOrderedListItem::new_unchecked(node) }; $body } $crate::MarkdownSyntaxKind::MD_PARAGRAPH => { @@ -108,6 +125,18 @@ macro_rules! map_syntax_node { let $pattern = unsafe { $crate::MdQuote::new_unchecked(node) }; $body } + $crate::MarkdownSyntaxKind::MD_REFERENCE_IMAGE => { + let $pattern = unsafe { $crate::MdReferenceImage::new_unchecked(node) }; + $body + } + $crate::MarkdownSyntaxKind::MD_REFERENCE_LINK => { + let $pattern = unsafe { $crate::MdReferenceLink::new_unchecked(node) }; + $body + } + $crate::MarkdownSyntaxKind::MD_REFERENCE_LINK_LABEL => { + let $pattern = unsafe { $crate::MdReferenceLinkLabel::new_unchecked(node) }; + $body + } $crate::MarkdownSyntaxKind::MD_SETEXT_HEADER => { let $pattern = unsafe { $crate::MdSetextHeader::new_unchecked(node) }; $body @@ -144,18 +173,10 @@ macro_rules! map_syntax_node { let $pattern = unsafe { $crate::MdHashList::new_unchecked(node) }; $body } - $crate::MarkdownSyntaxKind::MD_INDENTED_CODE_LINE_LIST => { - let $pattern = unsafe { $crate::MdIndentedCodeLineList::new_unchecked(node) }; - $body - } $crate::MarkdownSyntaxKind::MD_INLINE_ITEM_LIST => { let $pattern = unsafe { $crate::MdInlineItemList::new_unchecked(node) }; $body } - $crate::MarkdownSyntaxKind::MD_ORDER_LIST => { - let $pattern = unsafe { $crate::MdOrderList::new_unchecked(node) }; - $body - } _ => unreachable!(), }, } diff --git a/crates/biome_markdown_syntax/src/generated/nodes.rs b/crates/biome_markdown_syntax/src/generated/nodes.rs index b3c37bc737d4..c9e9db0fd573 100644 --- a/crates/biome_markdown_syntax/src/generated/nodes.rs +++ b/crates/biome_markdown_syntax/src/generated/nodes.rs @@ -21,6 +21,51 @@ use std::fmt::{Debug, Formatter}; #[doc = r" the slots are not statically known."] pub(crate) const SLOT_MAP_EMPTY_VALUE: u8 = u8::MAX; #[derive(Clone, PartialEq, Eq, Hash)] +pub struct MdAutolink { + pub(crate) syntax: SyntaxNode, +} +impl MdAutolink { + #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] + #[doc = r""] + #[doc = r" # Safety"] + #[doc = r" This function must be guarded with a call to [AstNode::can_cast]"] + #[doc = r" or a match on [SyntaxNode::kind]"] + #[inline] + pub const unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { + Self { syntax } + } + pub fn as_fields(&self) -> MdAutolinkFields { + MdAutolinkFields { + l_angle_token: self.l_angle_token(), + value: self.value(), + r_angle_token: self.r_angle_token(), + } + } + pub fn l_angle_token(&self) -> SyntaxResult { + support::required_token(&self.syntax, 0usize) + } + pub fn value(&self) -> MdInlineItemList { + support::list(&self.syntax, 1usize) + } + pub fn r_angle_token(&self) -> SyntaxResult { + support::required_token(&self.syntax, 2usize) + } +} +impl Serialize for MdAutolink { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + self.as_fields().serialize(serializer) + } +} +#[derive(Serialize)] +pub struct MdAutolinkFields { + pub l_angle_token: SyntaxResult, + pub value: MdInlineItemList, + pub r_angle_token: SyntaxResult, +} +#[derive(Clone, PartialEq, Eq, Hash)] pub struct MdBullet { pub(crate) syntax: SyntaxNode, } @@ -37,18 +82,14 @@ impl MdBullet { pub fn as_fields(&self) -> MdBulletFields { MdBulletFields { bullet: self.bullet(), - space_token: self.space_token(), content: self.content(), } } pub fn bullet(&self) -> SyntaxResult { support::required_token(&self.syntax, 0usize) } - pub fn space_token(&self) -> SyntaxResult { - support::required_token(&self.syntax, 1usize) - } - pub fn content(&self) -> MdInlineItemList { - support::list(&self.syntax, 2usize) + pub fn content(&self) -> MdBlockList { + support::list(&self.syntax, 1usize) } } impl Serialize for MdBullet { @@ -62,8 +103,7 @@ impl Serialize for MdBullet { #[derive(Serialize)] pub struct MdBulletFields { pub bullet: SyntaxResult, - pub space_token: SyntaxResult, - pub content: MdInlineItemList, + pub content: MdBlockList, } #[derive(Clone, PartialEq, Eq, Hash)] pub struct MdBulletListItem { @@ -146,6 +186,41 @@ pub struct MdDocumentFields { pub eof_token: SyntaxResult, } #[derive(Clone, PartialEq, Eq, Hash)] +pub struct MdEntityReference { + pub(crate) syntax: SyntaxNode, +} +impl MdEntityReference { + #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] + #[doc = r""] + #[doc = r" # Safety"] + #[doc = r" This function must be guarded with a call to [AstNode::can_cast]"] + #[doc = r" or a match on [SyntaxNode::kind]"] + #[inline] + pub const unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { + Self { syntax } + } + pub fn as_fields(&self) -> MdEntityReferenceFields { + MdEntityReferenceFields { + value_token: self.value_token(), + } + } + pub fn value_token(&self) -> SyntaxResult { + support::required_token(&self.syntax, 0usize) + } +} +impl Serialize for MdEntityReference { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + self.as_fields().serialize(serializer) + } +} +#[derive(Serialize)] +pub struct MdEntityReferenceFields { + pub value_token: SyntaxResult, +} +#[derive(Clone, PartialEq, Eq, Hash)] pub struct MdFencedCodeBlock { pub(crate) syntax: SyntaxNode, } @@ -161,31 +236,23 @@ impl MdFencedCodeBlock { } pub fn as_fields(&self) -> MdFencedCodeBlockFields { MdFencedCodeBlockFields { - l_fence_token: self.l_fence_token(), + l_fence: self.l_fence(), code_list: self.code_list(), - l_hard_line: self.l_hard_line(), content: self.content(), - r_hard_line: self.r_hard_line(), - r_fence_token: self.r_fence_token(), + r_fence: self.r_fence(), } } - pub fn l_fence_token(&self) -> SyntaxResult { + pub fn l_fence(&self) -> SyntaxResult { support::required_token(&self.syntax, 0usize) } pub fn code_list(&self) -> MdCodeNameList { support::list(&self.syntax, 1usize) } - pub fn l_hard_line(&self) -> SyntaxResult { - support::required_node(&self.syntax, 2usize) - } - pub fn content(&self) -> SyntaxResult { - support::required_node(&self.syntax, 3usize) - } - pub fn r_hard_line(&self) -> SyntaxResult { - support::required_node(&self.syntax, 4usize) + pub fn content(&self) -> MdInlineItemList { + support::list(&self.syntax, 2usize) } - pub fn r_fence_token(&self) -> SyntaxResult { - support::required_token(&self.syntax, 5usize) + pub fn r_fence(&self) -> SyntaxResult { + support::required_token(&self.syntax, 3usize) } } impl Serialize for MdFencedCodeBlock { @@ -198,12 +265,10 @@ impl Serialize for MdFencedCodeBlock { } #[derive(Serialize)] pub struct MdFencedCodeBlockFields { - pub l_fence_token: SyntaxResult, + pub l_fence: SyntaxResult, pub code_list: MdCodeNameList, - pub l_hard_line: SyntaxResult, - pub content: SyntaxResult, - pub r_hard_line: SyntaxResult, - pub r_fence_token: SyntaxResult, + pub content: MdInlineItemList, + pub r_fence: SyntaxResult, } #[derive(Clone, PartialEq, Eq, Hash)] pub struct MdHardLine { @@ -336,11 +401,11 @@ impl MdHtmlBlock { } pub fn as_fields(&self) -> MdHtmlBlockFields { MdHtmlBlockFields { - md_textual: self.md_textual(), + content: self.content(), } } - pub fn md_textual(&self) -> SyntaxResult { - support::required_node(&self.syntax, 0usize) + pub fn content(&self) -> MdInlineItemList { + support::list(&self.syntax, 0usize) } } impl Serialize for MdHtmlBlock { @@ -353,7 +418,7 @@ impl Serialize for MdHtmlBlock { } #[derive(Serialize)] pub struct MdHtmlBlockFields { - pub md_textual: SyntaxResult, + pub content: MdInlineItemList, } #[derive(Clone, PartialEq, Eq, Hash)] pub struct MdIndent { @@ -406,10 +471,10 @@ impl MdIndentCodeBlock { } pub fn as_fields(&self) -> MdIndentCodeBlockFields { MdIndentCodeBlockFields { - lines: self.lines(), + content: self.content(), } } - pub fn lines(&self) -> MdIndentedCodeLineList { + pub fn content(&self) -> MdInlineItemList { support::list(&self.syntax, 0usize) } } @@ -423,47 +488,7 @@ impl Serialize for MdIndentCodeBlock { } #[derive(Serialize)] pub struct MdIndentCodeBlockFields { - pub lines: MdIndentedCodeLineList, -} -#[derive(Clone, PartialEq, Eq, Hash)] -pub struct MdIndentedCodeLine { - pub(crate) syntax: SyntaxNode, -} -impl MdIndentedCodeLine { - #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] - #[doc = r""] - #[doc = r" # Safety"] - #[doc = r" This function must be guarded with a call to [AstNode::can_cast]"] - #[doc = r" or a match on [SyntaxNode::kind]"] - #[inline] - pub const unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { - Self { syntax } - } - pub fn as_fields(&self) -> MdIndentedCodeLineFields { - MdIndentedCodeLineFields { - indentation: self.indentation(), - content: self.content(), - } - } - pub fn indentation(&self) -> SyntaxResult { - support::required_node(&self.syntax, 0usize) - } - pub fn content(&self) -> SyntaxResult { - support::required_node(&self.syntax, 1usize) - } -} -impl Serialize for MdIndentedCodeLine { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - self.as_fields().serialize(serializer) - } -} -#[derive(Serialize)] -pub struct MdIndentedCodeLineFields { - pub indentation: SyntaxResult, - pub content: SyntaxResult, + pub content: MdInlineItemList, } #[derive(Clone, PartialEq, Eq, Hash)] pub struct MdInlineCode { @@ -556,10 +581,10 @@ pub struct MdInlineEmphasisFields { pub r_fence: SyntaxResult, } #[derive(Clone, PartialEq, Eq, Hash)] -pub struct MdInlineImage { +pub struct MdInlineHtml { pub(crate) syntax: SyntaxNode, } -impl MdInlineImage { +impl MdInlineHtml { #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] #[doc = r""] #[doc = r" # Safety"] @@ -569,36 +594,16 @@ impl MdInlineImage { pub const unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { Self { syntax } } - pub fn as_fields(&self) -> MdInlineImageFields { - MdInlineImageFields { - l_brack_token: self.l_brack_token(), - excl_token: self.excl_token(), - alt: self.alt(), - source: self.source(), - r_brack_token: self.r_brack_token(), - link: self.link(), + pub fn as_fields(&self) -> MdInlineHtmlFields { + MdInlineHtmlFields { + value: self.value(), } } - pub fn l_brack_token(&self) -> SyntaxResult { - support::required_token(&self.syntax, 0usize) - } - pub fn excl_token(&self) -> SyntaxResult { - support::required_token(&self.syntax, 1usize) - } - pub fn alt(&self) -> SyntaxResult { - support::required_node(&self.syntax, 2usize) - } - pub fn source(&self) -> SyntaxResult { - support::required_node(&self.syntax, 3usize) - } - pub fn r_brack_token(&self) -> SyntaxResult { - support::required_token(&self.syntax, 4usize) - } - pub fn link(&self) -> Option { - support::node(&self.syntax, 5usize) + pub fn value(&self) -> MdInlineItemList { + support::list(&self.syntax, 0usize) } } -impl Serialize for MdInlineImage { +impl Serialize for MdInlineHtml { fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -607,19 +612,14 @@ impl Serialize for MdInlineImage { } } #[derive(Serialize)] -pub struct MdInlineImageFields { - pub l_brack_token: SyntaxResult, - pub excl_token: SyntaxResult, - pub alt: SyntaxResult, - pub source: SyntaxResult, - pub r_brack_token: SyntaxResult, - pub link: Option, +pub struct MdInlineHtmlFields { + pub value: MdInlineItemList, } #[derive(Clone, PartialEq, Eq, Hash)] -pub struct MdInlineImageAlt { +pub struct MdInlineImage { pub(crate) syntax: SyntaxNode, } -impl MdInlineImageAlt { +impl MdInlineImage { #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] #[doc = r""] #[doc = r" # Safety"] @@ -629,114 +629,44 @@ impl MdInlineImageAlt { pub const unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { Self { syntax } } - pub fn as_fields(&self) -> MdInlineImageAltFields { - MdInlineImageAltFields { + pub fn as_fields(&self) -> MdInlineImageFields { + MdInlineImageFields { + excl_token: self.excl_token(), l_brack_token: self.l_brack_token(), - content: self.content(), + alt: self.alt(), r_brack_token: self.r_brack_token(), - } - } - pub fn l_brack_token(&self) -> SyntaxResult { - support::required_token(&self.syntax, 0usize) - } - pub fn content(&self) -> MdInlineItemList { - support::list(&self.syntax, 1usize) - } - pub fn r_brack_token(&self) -> SyntaxResult { - support::required_token(&self.syntax, 2usize) - } -} -impl Serialize for MdInlineImageAlt { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - self.as_fields().serialize(serializer) - } -} -#[derive(Serialize)] -pub struct MdInlineImageAltFields { - pub l_brack_token: SyntaxResult, - pub content: MdInlineItemList, - pub r_brack_token: SyntaxResult, -} -#[derive(Clone, PartialEq, Eq, Hash)] -pub struct MdInlineImageLink { - pub(crate) syntax: SyntaxNode, -} -impl MdInlineImageLink { - #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] - #[doc = r""] - #[doc = r" # Safety"] - #[doc = r" This function must be guarded with a call to [AstNode::can_cast]"] - #[doc = r" or a match on [SyntaxNode::kind]"] - #[inline] - pub const unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { - Self { syntax } - } - pub fn as_fields(&self) -> MdInlineImageLinkFields { - MdInlineImageLinkFields { l_paren_token: self.l_paren_token(), - content: self.content(), + destination: self.destination(), + title: self.title(), r_paren_token: self.r_paren_token(), } } - pub fn l_paren_token(&self) -> SyntaxResult { + pub fn excl_token(&self) -> SyntaxResult { support::required_token(&self.syntax, 0usize) } - pub fn content(&self) -> MdInlineItemList { - support::list(&self.syntax, 1usize) - } - pub fn r_paren_token(&self) -> SyntaxResult { - support::required_token(&self.syntax, 2usize) - } -} -impl Serialize for MdInlineImageLink { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - self.as_fields().serialize(serializer) + pub fn l_brack_token(&self) -> SyntaxResult { + support::required_token(&self.syntax, 1usize) } -} -#[derive(Serialize)] -pub struct MdInlineImageLinkFields { - pub l_paren_token: SyntaxResult, - pub content: MdInlineItemList, - pub r_paren_token: SyntaxResult, -} -#[derive(Clone, PartialEq, Eq, Hash)] -pub struct MdInlineImageSource { - pub(crate) syntax: SyntaxNode, -} -impl MdInlineImageSource { - #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] - #[doc = r""] - #[doc = r" # Safety"] - #[doc = r" This function must be guarded with a call to [AstNode::can_cast]"] - #[doc = r" or a match on [SyntaxNode::kind]"] - #[inline] - pub const unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { - Self { syntax } + pub fn alt(&self) -> MdInlineItemList { + support::list(&self.syntax, 2usize) } - pub fn as_fields(&self) -> MdInlineImageSourceFields { - MdInlineImageSourceFields { - l_paren_token: self.l_paren_token(), - content: self.content(), - r_paren_token: self.r_paren_token(), - } + pub fn r_brack_token(&self) -> SyntaxResult { + support::required_token(&self.syntax, 3usize) } pub fn l_paren_token(&self) -> SyntaxResult { - support::required_token(&self.syntax, 0usize) + support::required_token(&self.syntax, 4usize) } - pub fn content(&self) -> MdInlineItemList { - support::list(&self.syntax, 1usize) + pub fn destination(&self) -> MdInlineItemList { + support::list(&self.syntax, 5usize) + } + pub fn title(&self) -> Option { + support::node(&self.syntax, 6usize) } pub fn r_paren_token(&self) -> SyntaxResult { - support::required_token(&self.syntax, 2usize) + support::required_token(&self.syntax, 7usize) } } -impl Serialize for MdInlineImageSource { +impl Serialize for MdInlineImage { fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -745,9 +675,14 @@ impl Serialize for MdInlineImageSource { } } #[derive(Serialize)] -pub struct MdInlineImageSourceFields { +pub struct MdInlineImageFields { + pub excl_token: SyntaxResult, + pub l_brack_token: SyntaxResult, + pub alt: MdInlineItemList, + pub r_brack_token: SyntaxResult, pub l_paren_token: SyntaxResult, - pub content: MdInlineItemList, + pub destination: MdInlineItemList, + pub title: Option, pub r_paren_token: SyntaxResult, } #[derive(Clone, PartialEq, Eq, Hash)] @@ -815,7 +750,8 @@ impl MdInlineLink { text: self.text(), r_brack_token: self.r_brack_token(), l_paren_token: self.l_paren_token(), - source: self.source(), + destination: self.destination(), + title: self.title(), r_paren_token: self.r_paren_token(), } } @@ -831,11 +767,14 @@ impl MdInlineLink { pub fn l_paren_token(&self) -> SyntaxResult { support::required_token(&self.syntax, 3usize) } - pub fn source(&self) -> MdInlineItemList { + pub fn destination(&self) -> MdInlineItemList { support::list(&self.syntax, 4usize) } + pub fn title(&self) -> Option { + support::node(&self.syntax, 5usize) + } pub fn r_paren_token(&self) -> SyntaxResult { - support::required_token(&self.syntax, 5usize) + support::required_token(&self.syntax, 6usize) } } impl Serialize for MdInlineLink { @@ -852,7 +791,8 @@ pub struct MdInlineLinkFields { pub text: MdInlineItemList, pub r_brack_token: SyntaxResult, pub l_paren_token: SyntaxResult, - pub source: MdInlineItemList, + pub destination: MdInlineItemList, + pub title: Option, pub r_paren_token: SyntaxResult, } #[derive(Clone, PartialEq, Eq, Hash)] @@ -901,10 +841,10 @@ pub struct MdLinkBlockFields { pub title: Option, } #[derive(Clone, PartialEq, Eq, Hash)] -pub struct MdOrderListItem { +pub struct MdLinkDestination { pub(crate) syntax: SyntaxNode, } -impl MdOrderListItem { +impl MdLinkDestination { #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] #[doc = r""] #[doc = r" # Safety"] @@ -914,16 +854,16 @@ impl MdOrderListItem { pub const unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { Self { syntax } } - pub fn as_fields(&self) -> MdOrderListItemFields { - MdOrderListItemFields { - md_bullet_list: self.md_bullet_list(), + pub fn as_fields(&self) -> MdLinkDestinationFields { + MdLinkDestinationFields { + content: self.content(), } } - pub fn md_bullet_list(&self) -> MdBulletList { + pub fn content(&self) -> MdInlineItemList { support::list(&self.syntax, 0usize) } } -impl Serialize for MdOrderListItem { +impl Serialize for MdLinkDestination { fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -932,14 +872,14 @@ impl Serialize for MdOrderListItem { } } #[derive(Serialize)] -pub struct MdOrderListItemFields { - pub md_bullet_list: MdBulletList, +pub struct MdLinkDestinationFields { + pub content: MdInlineItemList, } #[derive(Clone, PartialEq, Eq, Hash)] -pub struct MdParagraph { +pub struct MdLinkLabel { pub(crate) syntax: SyntaxNode, } -impl MdParagraph { +impl MdLinkLabel { #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] #[doc = r""] #[doc = r" # Safety"] @@ -949,20 +889,16 @@ impl MdParagraph { pub const unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { Self { syntax } } - pub fn as_fields(&self) -> MdParagraphFields { - MdParagraphFields { - list: self.list(), - hard_line: self.hard_line(), + pub fn as_fields(&self) -> MdLinkLabelFields { + MdLinkLabelFields { + content: self.content(), } } - pub fn list(&self) -> MdInlineItemList { + pub fn content(&self) -> MdInlineItemList { support::list(&self.syntax, 0usize) } - pub fn hard_line(&self) -> SyntaxResult { - support::required_node(&self.syntax, 1usize) - } } -impl Serialize for MdParagraph { +impl Serialize for MdLinkLabel { fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -971,15 +907,14 @@ impl Serialize for MdParagraph { } } #[derive(Serialize)] -pub struct MdParagraphFields { - pub list: MdInlineItemList, - pub hard_line: SyntaxResult, +pub struct MdLinkLabelFields { + pub content: MdInlineItemList, } #[derive(Clone, PartialEq, Eq, Hash)] -pub struct MdQuote { +pub struct MdLinkReferenceDefinition { pub(crate) syntax: SyntaxNode, } -impl MdQuote { +impl MdLinkReferenceDefinition { #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] #[doc = r""] #[doc = r" # Safety"] @@ -989,16 +924,36 @@ impl MdQuote { pub const unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { Self { syntax } } - pub fn as_fields(&self) -> MdQuoteFields { - MdQuoteFields { - any_md_block: self.any_md_block(), + pub fn as_fields(&self) -> MdLinkReferenceDefinitionFields { + MdLinkReferenceDefinitionFields { + l_brack_token: self.l_brack_token(), + label: self.label(), + r_brack_token: self.r_brack_token(), + colon_token: self.colon_token(), + destination: self.destination(), + title: self.title(), } } - pub fn any_md_block(&self) -> SyntaxResult { - support::required_node(&self.syntax, 0usize) + pub fn l_brack_token(&self) -> SyntaxResult { + support::required_token(&self.syntax, 0usize) + } + pub fn label(&self) -> SyntaxResult { + support::required_node(&self.syntax, 1usize) + } + pub fn r_brack_token(&self) -> SyntaxResult { + support::required_token(&self.syntax, 2usize) + } + pub fn colon_token(&self) -> SyntaxResult { + support::required_token(&self.syntax, 3usize) + } + pub fn destination(&self) -> SyntaxResult { + support::required_node(&self.syntax, 4usize) + } + pub fn title(&self) -> Option { + support::node(&self.syntax, 5usize) } } -impl Serialize for MdQuote { +impl Serialize for MdLinkReferenceDefinition { fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -1007,14 +962,19 @@ impl Serialize for MdQuote { } } #[derive(Serialize)] -pub struct MdQuoteFields { - pub any_md_block: SyntaxResult, +pub struct MdLinkReferenceDefinitionFields { + pub l_brack_token: SyntaxResult, + pub label: SyntaxResult, + pub r_brack_token: SyntaxResult, + pub colon_token: SyntaxResult, + pub destination: SyntaxResult, + pub title: Option, } #[derive(Clone, PartialEq, Eq, Hash)] -pub struct MdSetextHeader { +pub struct MdLinkTitle { pub(crate) syntax: SyntaxNode, } -impl MdSetextHeader { +impl MdLinkTitle { #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] #[doc = r""] #[doc = r" # Safety"] @@ -1024,16 +984,16 @@ impl MdSetextHeader { pub const unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { Self { syntax } } - pub fn as_fields(&self) -> MdSetextHeaderFields { - MdSetextHeaderFields { - md_paragraph: self.md_paragraph(), + pub fn as_fields(&self) -> MdLinkTitleFields { + MdLinkTitleFields { + content: self.content(), } } - pub fn md_paragraph(&self) -> SyntaxResult { - support::required_node(&self.syntax, 0usize) + pub fn content(&self) -> MdInlineItemList { + support::list(&self.syntax, 0usize) } } -impl Serialize for MdSetextHeader { +impl Serialize for MdLinkTitle { fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -1042,14 +1002,14 @@ impl Serialize for MdSetextHeader { } } #[derive(Serialize)] -pub struct MdSetextHeaderFields { - pub md_paragraph: SyntaxResult, +pub struct MdLinkTitleFields { + pub content: MdInlineItemList, } #[derive(Clone, PartialEq, Eq, Hash)] -pub struct MdSoftBreak { +pub struct MdNewline { pub(crate) syntax: SyntaxNode, } -impl MdSoftBreak { +impl MdNewline { #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] #[doc = r""] #[doc = r" # Safety"] @@ -1059,8 +1019,8 @@ impl MdSoftBreak { pub const unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { Self { syntax } } - pub fn as_fields(&self) -> MdSoftBreakFields { - MdSoftBreakFields { + pub fn as_fields(&self) -> MdNewlineFields { + MdNewlineFields { value_token: self.value_token(), } } @@ -1068,7 +1028,7 @@ impl MdSoftBreak { support::required_token(&self.syntax, 0usize) } } -impl Serialize for MdSoftBreak { +impl Serialize for MdNewline { fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -1077,14 +1037,14 @@ impl Serialize for MdSoftBreak { } } #[derive(Serialize)] -pub struct MdSoftBreakFields { +pub struct MdNewlineFields { pub value_token: SyntaxResult, } #[derive(Clone, PartialEq, Eq, Hash)] -pub struct MdTextual { +pub struct MdOrderedListItem { pub(crate) syntax: SyntaxNode, } -impl MdTextual { +impl MdOrderedListItem { #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] #[doc = r""] #[doc = r" # Safety"] @@ -1094,16 +1054,16 @@ impl MdTextual { pub const unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { Self { syntax } } - pub fn as_fields(&self) -> MdTextualFields { - MdTextualFields { - value_token: self.value_token(), + pub fn as_fields(&self) -> MdOrderedListItemFields { + MdOrderedListItemFields { + md_bullet_list: self.md_bullet_list(), } } - pub fn value_token(&self) -> SyntaxResult { - support::required_token(&self.syntax, 0usize) + pub fn md_bullet_list(&self) -> MdBulletList { + support::list(&self.syntax, 0usize) } } -impl Serialize for MdTextual { +impl Serialize for MdOrderedListItem { fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -1112,14 +1072,14 @@ impl Serialize for MdTextual { } } #[derive(Serialize)] -pub struct MdTextualFields { - pub value_token: SyntaxResult, +pub struct MdOrderedListItemFields { + pub md_bullet_list: MdBulletList, } #[derive(Clone, PartialEq, Eq, Hash)] -pub struct MdThematicBreakBlock { +pub struct MdParagraph { pub(crate) syntax: SyntaxNode, } -impl MdThematicBreakBlock { +impl MdParagraph { #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] #[doc = r""] #[doc = r" # Safety"] @@ -1129,16 +1089,20 @@ impl MdThematicBreakBlock { pub const unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { Self { syntax } } - pub fn as_fields(&self) -> MdThematicBreakBlockFields { - MdThematicBreakBlockFields { - value_token: self.value_token(), + pub fn as_fields(&self) -> MdParagraphFields { + MdParagraphFields { + list: self.list(), + hard_line: self.hard_line(), } } - pub fn value_token(&self) -> SyntaxResult { - support::required_token(&self.syntax, 0usize) + pub fn list(&self) -> MdInlineItemList { + support::list(&self.syntax, 0usize) + } + pub fn hard_line(&self) -> Option { + support::node(&self.syntax, 1usize) } } -impl Serialize for MdThematicBreakBlock { +impl Serialize for MdParagraph { fn serialize(&self, serializer: S) -> Result where S: Serializer, @@ -1147,201 +1111,938 @@ impl Serialize for MdThematicBreakBlock { } } #[derive(Serialize)] -pub struct MdThematicBreakBlockFields { - pub value_token: SyntaxResult, +pub struct MdParagraphFields { + pub list: MdInlineItemList, + pub hard_line: Option, } -#[derive(Clone, PartialEq, Eq, Hash, Serialize)] -pub enum AnyCodeBlock { - MdFencedCodeBlock(MdFencedCodeBlock), - MdIndentCodeBlock(MdIndentCodeBlock), +#[derive(Clone, PartialEq, Eq, Hash)] +pub struct MdQuote { + pub(crate) syntax: SyntaxNode, } -impl AnyCodeBlock { - pub fn as_md_fenced_code_block(&self) -> Option<&MdFencedCodeBlock> { - match &self { - Self::MdFencedCodeBlock(item) => Some(item), - _ => None, - } +impl MdQuote { + #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] + #[doc = r""] + #[doc = r" # Safety"] + #[doc = r" This function must be guarded with a call to [AstNode::can_cast]"] + #[doc = r" or a match on [SyntaxNode::kind]"] + #[inline] + pub const unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { + Self { syntax } } - pub fn as_md_indent_code_block(&self) -> Option<&MdIndentCodeBlock> { - match &self { - Self::MdIndentCodeBlock(item) => Some(item), - _ => None, + pub fn as_fields(&self) -> MdQuoteFields { + MdQuoteFields { + marker_token: self.marker_token(), + content: self.content(), } } -} -#[derive(Clone, PartialEq, Eq, Hash, Serialize)] -pub enum AnyContainerBlock { - MdBulletListItem(MdBulletListItem), - MdOrderListItem(MdOrderListItem), - MdQuote(MdQuote), -} -impl AnyContainerBlock { - pub fn as_md_bullet_list_item(&self) -> Option<&MdBulletListItem> { - match &self { - Self::MdBulletListItem(item) => Some(item), - _ => None, - } + pub fn marker_token(&self) -> SyntaxResult { + support::required_token(&self.syntax, 0usize) } - pub fn as_md_order_list_item(&self) -> Option<&MdOrderListItem> { - match &self { - Self::MdOrderListItem(item) => Some(item), - _ => None, - } + pub fn content(&self) -> MdBlockList { + support::list(&self.syntax, 1usize) } - pub fn as_md_quote(&self) -> Option<&MdQuote> { - match &self { - Self::MdQuote(item) => Some(item), - _ => None, - } +} +impl Serialize for MdQuote { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + self.as_fields().serialize(serializer) } } -#[derive(Clone, PartialEq, Eq, Hash, Serialize)] -pub enum AnyLeafBlock { - AnyCodeBlock(AnyCodeBlock), - MdHeader(MdHeader), - MdHtmlBlock(MdHtmlBlock), - MdLinkBlock(MdLinkBlock), - MdParagraph(MdParagraph), - MdSetextHeader(MdSetextHeader), - MdThematicBreakBlock(MdThematicBreakBlock), +#[derive(Serialize)] +pub struct MdQuoteFields { + pub marker_token: SyntaxResult, + pub content: MdBlockList, } -impl AnyLeafBlock { - pub fn as_any_code_block(&self) -> Option<&AnyCodeBlock> { - match &self { - Self::AnyCodeBlock(item) => Some(item), - _ => None, - } +#[derive(Clone, PartialEq, Eq, Hash)] +pub struct MdReferenceImage { + pub(crate) syntax: SyntaxNode, +} +impl MdReferenceImage { + #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] + #[doc = r""] + #[doc = r" # Safety"] + #[doc = r" This function must be guarded with a call to [AstNode::can_cast]"] + #[doc = r" or a match on [SyntaxNode::kind]"] + #[inline] + pub const unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { + Self { syntax } } - pub fn as_md_header(&self) -> Option<&MdHeader> { - match &self { - Self::MdHeader(item) => Some(item), - _ => None, + pub fn as_fields(&self) -> MdReferenceImageFields { + MdReferenceImageFields { + excl_token: self.excl_token(), + l_brack_token: self.l_brack_token(), + alt: self.alt(), + r_brack_token: self.r_brack_token(), + label: self.label(), } } - pub fn as_md_html_block(&self) -> Option<&MdHtmlBlock> { - match &self { - Self::MdHtmlBlock(item) => Some(item), - _ => None, - } + pub fn excl_token(&self) -> SyntaxResult { + support::required_token(&self.syntax, 0usize) } - pub fn as_md_link_block(&self) -> Option<&MdLinkBlock> { - match &self { - Self::MdLinkBlock(item) => Some(item), - _ => None, - } + pub fn l_brack_token(&self) -> SyntaxResult { + support::required_token(&self.syntax, 1usize) } - pub fn as_md_paragraph(&self) -> Option<&MdParagraph> { - match &self { - Self::MdParagraph(item) => Some(item), - _ => None, - } + pub fn alt(&self) -> MdInlineItemList { + support::list(&self.syntax, 2usize) } - pub fn as_md_setext_header(&self) -> Option<&MdSetextHeader> { - match &self { - Self::MdSetextHeader(item) => Some(item), - _ => None, - } + pub fn r_brack_token(&self) -> SyntaxResult { + support::required_token(&self.syntax, 3usize) } - pub fn as_md_thematic_break_block(&self) -> Option<&MdThematicBreakBlock> { - match &self { - Self::MdThematicBreakBlock(item) => Some(item), - _ => None, - } + pub fn label(&self) -> Option { + support::node(&self.syntax, 4usize) } } -#[derive(Clone, PartialEq, Eq, Hash, Serialize)] -pub enum AnyMdBlock { - AnyContainerBlock(AnyContainerBlock), - AnyLeafBlock(AnyLeafBlock), +impl Serialize for MdReferenceImage { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + self.as_fields().serialize(serializer) + } } -impl AnyMdBlock { - pub fn as_any_container_block(&self) -> Option<&AnyContainerBlock> { - match &self { - Self::AnyContainerBlock(item) => Some(item), - _ => None, - } +#[derive(Serialize)] +pub struct MdReferenceImageFields { + pub excl_token: SyntaxResult, + pub l_brack_token: SyntaxResult, + pub alt: MdInlineItemList, + pub r_brack_token: SyntaxResult, + pub label: Option, +} +#[derive(Clone, PartialEq, Eq, Hash)] +pub struct MdReferenceLink { + pub(crate) syntax: SyntaxNode, +} +impl MdReferenceLink { + #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] + #[doc = r""] + #[doc = r" # Safety"] + #[doc = r" This function must be guarded with a call to [AstNode::can_cast]"] + #[doc = r" or a match on [SyntaxNode::kind]"] + #[inline] + pub const unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { + Self { syntax } } - pub fn as_any_leaf_block(&self) -> Option<&AnyLeafBlock> { - match &self { - Self::AnyLeafBlock(item) => Some(item), - _ => None, + pub fn as_fields(&self) -> MdReferenceLinkFields { + MdReferenceLinkFields { + l_brack_token: self.l_brack_token(), + text: self.text(), + r_brack_token: self.r_brack_token(), + label: self.label(), } } -} + pub fn l_brack_token(&self) -> SyntaxResult { + support::required_token(&self.syntax, 0usize) + } + pub fn text(&self) -> MdInlineItemList { + support::list(&self.syntax, 1usize) + } + pub fn r_brack_token(&self) -> SyntaxResult { + support::required_token(&self.syntax, 2usize) + } + pub fn label(&self) -> Option { + support::node(&self.syntax, 3usize) + } +} +impl Serialize for MdReferenceLink { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + self.as_fields().serialize(serializer) + } +} +#[derive(Serialize)] +pub struct MdReferenceLinkFields { + pub l_brack_token: SyntaxResult, + pub text: MdInlineItemList, + pub r_brack_token: SyntaxResult, + pub label: Option, +} +#[derive(Clone, PartialEq, Eq, Hash)] +pub struct MdReferenceLinkLabel { + pub(crate) syntax: SyntaxNode, +} +impl MdReferenceLinkLabel { + #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] + #[doc = r""] + #[doc = r" # Safety"] + #[doc = r" This function must be guarded with a call to [AstNode::can_cast]"] + #[doc = r" or a match on [SyntaxNode::kind]"] + #[inline] + pub const unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { + Self { syntax } + } + pub fn as_fields(&self) -> MdReferenceLinkLabelFields { + MdReferenceLinkLabelFields { + l_brack_token: self.l_brack_token(), + label: self.label(), + r_brack_token: self.r_brack_token(), + } + } + pub fn l_brack_token(&self) -> SyntaxResult { + support::required_token(&self.syntax, 0usize) + } + pub fn label(&self) -> MdInlineItemList { + support::list(&self.syntax, 1usize) + } + pub fn r_brack_token(&self) -> SyntaxResult { + support::required_token(&self.syntax, 2usize) + } +} +impl Serialize for MdReferenceLinkLabel { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + self.as_fields().serialize(serializer) + } +} +#[derive(Serialize)] +pub struct MdReferenceLinkLabelFields { + pub l_brack_token: SyntaxResult, + pub label: MdInlineItemList, + pub r_brack_token: SyntaxResult, +} +#[derive(Clone, PartialEq, Eq, Hash)] +pub struct MdSetextHeader { + pub(crate) syntax: SyntaxNode, +} +impl MdSetextHeader { + #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] + #[doc = r""] + #[doc = r" # Safety"] + #[doc = r" This function must be guarded with a call to [AstNode::can_cast]"] + #[doc = r" or a match on [SyntaxNode::kind]"] + #[inline] + pub const unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { + Self { syntax } + } + pub fn as_fields(&self) -> MdSetextHeaderFields { + MdSetextHeaderFields { + content: self.content(), + underline_token: self.underline_token(), + } + } + pub fn content(&self) -> MdInlineItemList { + support::list(&self.syntax, 0usize) + } + pub fn underline_token(&self) -> SyntaxResult { + support::required_token(&self.syntax, 1usize) + } +} +impl Serialize for MdSetextHeader { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + self.as_fields().serialize(serializer) + } +} +#[derive(Serialize)] +pub struct MdSetextHeaderFields { + pub content: MdInlineItemList, + pub underline_token: SyntaxResult, +} +#[derive(Clone, PartialEq, Eq, Hash)] +pub struct MdSoftBreak { + pub(crate) syntax: SyntaxNode, +} +impl MdSoftBreak { + #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] + #[doc = r""] + #[doc = r" # Safety"] + #[doc = r" This function must be guarded with a call to [AstNode::can_cast]"] + #[doc = r" or a match on [SyntaxNode::kind]"] + #[inline] + pub const unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { + Self { syntax } + } + pub fn as_fields(&self) -> MdSoftBreakFields { + MdSoftBreakFields { + value_token: self.value_token(), + } + } + pub fn value_token(&self) -> SyntaxResult { + support::required_token(&self.syntax, 0usize) + } +} +impl Serialize for MdSoftBreak { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + self.as_fields().serialize(serializer) + } +} +#[derive(Serialize)] +pub struct MdSoftBreakFields { + pub value_token: SyntaxResult, +} +#[derive(Clone, PartialEq, Eq, Hash)] +pub struct MdTextual { + pub(crate) syntax: SyntaxNode, +} +impl MdTextual { + #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] + #[doc = r""] + #[doc = r" # Safety"] + #[doc = r" This function must be guarded with a call to [AstNode::can_cast]"] + #[doc = r" or a match on [SyntaxNode::kind]"] + #[inline] + pub const unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { + Self { syntax } + } + pub fn as_fields(&self) -> MdTextualFields { + MdTextualFields { + value_token: self.value_token(), + } + } + pub fn value_token(&self) -> SyntaxResult { + support::required_token(&self.syntax, 0usize) + } +} +impl Serialize for MdTextual { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + self.as_fields().serialize(serializer) + } +} +#[derive(Serialize)] +pub struct MdTextualFields { + pub value_token: SyntaxResult, +} +#[derive(Clone, PartialEq, Eq, Hash)] +pub struct MdThematicBreakBlock { + pub(crate) syntax: SyntaxNode, +} +impl MdThematicBreakBlock { + #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] + #[doc = r""] + #[doc = r" # Safety"] + #[doc = r" This function must be guarded with a call to [AstNode::can_cast]"] + #[doc = r" or a match on [SyntaxNode::kind]"] + #[inline] + pub const unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { + Self { syntax } + } + pub fn as_fields(&self) -> MdThematicBreakBlockFields { + MdThematicBreakBlockFields { + value_token: self.value_token(), + } + } + pub fn value_token(&self) -> SyntaxResult { + support::required_token(&self.syntax, 0usize) + } +} +impl Serialize for MdThematicBreakBlock { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + self.as_fields().serialize(serializer) + } +} +#[derive(Serialize)] +pub struct MdThematicBreakBlockFields { + pub value_token: SyntaxResult, +} #[derive(Clone, PartialEq, Eq, Hash, Serialize)] -pub enum AnyMdInline { - MdHardLine(MdHardLine), - MdHtmlBlock(MdHtmlBlock), - MdInlineCode(MdInlineCode), - MdInlineEmphasis(MdInlineEmphasis), - MdInlineImage(MdInlineImage), - MdInlineItalic(MdInlineItalic), - MdInlineLink(MdInlineLink), - MdSoftBreak(MdSoftBreak), - MdTextual(MdTextual), +pub enum AnyCodeBlock { + MdFencedCodeBlock(MdFencedCodeBlock), + MdIndentCodeBlock(MdIndentCodeBlock), } -impl AnyMdInline { - pub fn as_md_hard_line(&self) -> Option<&MdHardLine> { +impl AnyCodeBlock { + pub fn as_md_fenced_code_block(&self) -> Option<&MdFencedCodeBlock> { match &self { - Self::MdHardLine(item) => Some(item), + Self::MdFencedCodeBlock(item) => Some(item), _ => None, } } - pub fn as_md_html_block(&self) -> Option<&MdHtmlBlock> { + pub fn as_md_indent_code_block(&self) -> Option<&MdIndentCodeBlock> { match &self { - Self::MdHtmlBlock(item) => Some(item), + Self::MdIndentCodeBlock(item) => Some(item), + _ => None, + } + } +} +#[derive(Clone, PartialEq, Eq, Hash, Serialize)] +pub enum AnyContainerBlock { + MdBulletListItem(MdBulletListItem), + MdOrderedListItem(MdOrderedListItem), + MdQuote(MdQuote), +} +impl AnyContainerBlock { + pub fn as_md_bullet_list_item(&self) -> Option<&MdBulletListItem> { + match &self { + Self::MdBulletListItem(item) => Some(item), + _ => None, + } + } + pub fn as_md_ordered_list_item(&self) -> Option<&MdOrderedListItem> { + match &self { + Self::MdOrderedListItem(item) => Some(item), + _ => None, + } + } + pub fn as_md_quote(&self) -> Option<&MdQuote> { + match &self { + Self::MdQuote(item) => Some(item), _ => None, } } - pub fn as_md_inline_code(&self) -> Option<&MdInlineCode> { - match &self { - Self::MdInlineCode(item) => Some(item), - _ => None, - } +} +#[derive(Clone, PartialEq, Eq, Hash, Serialize)] +pub enum AnyLeafBlock { + AnyCodeBlock(AnyCodeBlock), + MdHeader(MdHeader), + MdHtmlBlock(MdHtmlBlock), + MdLinkBlock(MdLinkBlock), + MdLinkReferenceDefinition(MdLinkReferenceDefinition), + MdNewline(MdNewline), + MdParagraph(MdParagraph), + MdSetextHeader(MdSetextHeader), + MdThematicBreakBlock(MdThematicBreakBlock), +} +impl AnyLeafBlock { + pub fn as_any_code_block(&self) -> Option<&AnyCodeBlock> { + match &self { + Self::AnyCodeBlock(item) => Some(item), + _ => None, + } + } + pub fn as_md_header(&self) -> Option<&MdHeader> { + match &self { + Self::MdHeader(item) => Some(item), + _ => None, + } + } + pub fn as_md_html_block(&self) -> Option<&MdHtmlBlock> { + match &self { + Self::MdHtmlBlock(item) => Some(item), + _ => None, + } + } + pub fn as_md_link_block(&self) -> Option<&MdLinkBlock> { + match &self { + Self::MdLinkBlock(item) => Some(item), + _ => None, + } + } + pub fn as_md_link_reference_definition(&self) -> Option<&MdLinkReferenceDefinition> { + match &self { + Self::MdLinkReferenceDefinition(item) => Some(item), + _ => None, + } + } + pub fn as_md_newline(&self) -> Option<&MdNewline> { + match &self { + Self::MdNewline(item) => Some(item), + _ => None, + } + } + pub fn as_md_paragraph(&self) -> Option<&MdParagraph> { + match &self { + Self::MdParagraph(item) => Some(item), + _ => None, + } + } + pub fn as_md_setext_header(&self) -> Option<&MdSetextHeader> { + match &self { + Self::MdSetextHeader(item) => Some(item), + _ => None, + } + } + pub fn as_md_thematic_break_block(&self) -> Option<&MdThematicBreakBlock> { + match &self { + Self::MdThematicBreakBlock(item) => Some(item), + _ => None, + } + } +} +#[derive(Clone, PartialEq, Eq, Hash, Serialize)] +pub enum AnyMdBlock { + AnyContainerBlock(AnyContainerBlock), + AnyLeafBlock(AnyLeafBlock), +} +impl AnyMdBlock { + pub fn as_any_container_block(&self) -> Option<&AnyContainerBlock> { + match &self { + Self::AnyContainerBlock(item) => Some(item), + _ => None, + } + } + pub fn as_any_leaf_block(&self) -> Option<&AnyLeafBlock> { + match &self { + Self::AnyLeafBlock(item) => Some(item), + _ => None, + } + } +} +#[derive(Clone, PartialEq, Eq, Hash, Serialize)] +pub enum AnyMdInline { + MdAutolink(MdAutolink), + MdEntityReference(MdEntityReference), + MdHardLine(MdHardLine), + MdHtmlBlock(MdHtmlBlock), + MdInlineCode(MdInlineCode), + MdInlineEmphasis(MdInlineEmphasis), + MdInlineHtml(MdInlineHtml), + MdInlineImage(MdInlineImage), + MdInlineItalic(MdInlineItalic), + MdInlineLink(MdInlineLink), + MdReferenceImage(MdReferenceImage), + MdReferenceLink(MdReferenceLink), + MdSoftBreak(MdSoftBreak), + MdTextual(MdTextual), +} +impl AnyMdInline { + pub fn as_md_autolink(&self) -> Option<&MdAutolink> { + match &self { + Self::MdAutolink(item) => Some(item), + _ => None, + } + } + pub fn as_md_entity_reference(&self) -> Option<&MdEntityReference> { + match &self { + Self::MdEntityReference(item) => Some(item), + _ => None, + } + } + pub fn as_md_hard_line(&self) -> Option<&MdHardLine> { + match &self { + Self::MdHardLine(item) => Some(item), + _ => None, + } + } + pub fn as_md_html_block(&self) -> Option<&MdHtmlBlock> { + match &self { + Self::MdHtmlBlock(item) => Some(item), + _ => None, + } + } + pub fn as_md_inline_code(&self) -> Option<&MdInlineCode> { + match &self { + Self::MdInlineCode(item) => Some(item), + _ => None, + } + } + pub fn as_md_inline_emphasis(&self) -> Option<&MdInlineEmphasis> { + match &self { + Self::MdInlineEmphasis(item) => Some(item), + _ => None, + } + } + pub fn as_md_inline_html(&self) -> Option<&MdInlineHtml> { + match &self { + Self::MdInlineHtml(item) => Some(item), + _ => None, + } + } + pub fn as_md_inline_image(&self) -> Option<&MdInlineImage> { + match &self { + Self::MdInlineImage(item) => Some(item), + _ => None, + } + } + pub fn as_md_inline_italic(&self) -> Option<&MdInlineItalic> { + match &self { + Self::MdInlineItalic(item) => Some(item), + _ => None, + } + } + pub fn as_md_inline_link(&self) -> Option<&MdInlineLink> { + match &self { + Self::MdInlineLink(item) => Some(item), + _ => None, + } + } + pub fn as_md_reference_image(&self) -> Option<&MdReferenceImage> { + match &self { + Self::MdReferenceImage(item) => Some(item), + _ => None, + } + } + pub fn as_md_reference_link(&self) -> Option<&MdReferenceLink> { + match &self { + Self::MdReferenceLink(item) => Some(item), + _ => None, + } + } + pub fn as_md_soft_break(&self) -> Option<&MdSoftBreak> { + match &self { + Self::MdSoftBreak(item) => Some(item), + _ => None, + } + } + pub fn as_md_textual(&self) -> Option<&MdTextual> { + match &self { + Self::MdTextual(item) => Some(item), + _ => None, + } + } +} +impl AstNode for MdAutolink { + type Language = Language; + const KIND_SET: SyntaxKindSet = + SyntaxKindSet::from_raw(RawSyntaxKind(MD_AUTOLINK as u16)); + fn can_cast(kind: SyntaxKind) -> bool { + kind == MD_AUTOLINK + } + fn cast(syntax: SyntaxNode) -> Option { + if Self::can_cast(syntax.kind()) { + Some(Self { syntax }) + } else { + None + } + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } + fn into_syntax(self) -> SyntaxNode { + self.syntax + } +} +impl std::fmt::Debug for MdAutolink { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; + let current_depth = DEPTH.get(); + let result = if current_depth < 16 { + DEPTH.set(current_depth + 1); + f.debug_struct("MdAutolink") + .field( + "l_angle_token", + &support::DebugSyntaxResult(self.l_angle_token()), + ) + .field("value", &self.value()) + .field( + "r_angle_token", + &support::DebugSyntaxResult(self.r_angle_token()), + ) + .finish() + } else { + f.debug_struct("MdAutolink").finish() + }; + DEPTH.set(current_depth); + result + } +} +impl From for SyntaxNode { + fn from(n: MdAutolink) -> Self { + n.syntax + } +} +impl From for SyntaxElement { + fn from(n: MdAutolink) -> Self { + n.syntax.into() + } +} +impl AstNode for MdBullet { + type Language = Language; + const KIND_SET: SyntaxKindSet = + SyntaxKindSet::from_raw(RawSyntaxKind(MD_BULLET as u16)); + fn can_cast(kind: SyntaxKind) -> bool { + kind == MD_BULLET + } + fn cast(syntax: SyntaxNode) -> Option { + if Self::can_cast(syntax.kind()) { + Some(Self { syntax }) + } else { + None + } + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } + fn into_syntax(self) -> SyntaxNode { + self.syntax + } +} +impl std::fmt::Debug for MdBullet { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; + let current_depth = DEPTH.get(); + let result = if current_depth < 16 { + DEPTH.set(current_depth + 1); + f.debug_struct("MdBullet") + .field("bullet", &support::DebugSyntaxResult(self.bullet())) + .field("content", &self.content()) + .finish() + } else { + f.debug_struct("MdBullet").finish() + }; + DEPTH.set(current_depth); + result + } +} +impl From for SyntaxNode { + fn from(n: MdBullet) -> Self { + n.syntax + } +} +impl From for SyntaxElement { + fn from(n: MdBullet) -> Self { + n.syntax.into() + } +} +impl AstNode for MdBulletListItem { + type Language = Language; + const KIND_SET: SyntaxKindSet = + SyntaxKindSet::from_raw(RawSyntaxKind(MD_BULLET_LIST_ITEM as u16)); + fn can_cast(kind: SyntaxKind) -> bool { + kind == MD_BULLET_LIST_ITEM + } + fn cast(syntax: SyntaxNode) -> Option { + if Self::can_cast(syntax.kind()) { + Some(Self { syntax }) + } else { + None + } + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } + fn into_syntax(self) -> SyntaxNode { + self.syntax + } +} +impl std::fmt::Debug for MdBulletListItem { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; + let current_depth = DEPTH.get(); + let result = if current_depth < 16 { + DEPTH.set(current_depth + 1); + f.debug_struct("MdBulletListItem") + .field("md_bullet_list", &self.md_bullet_list()) + .finish() + } else { + f.debug_struct("MdBulletListItem").finish() + }; + DEPTH.set(current_depth); + result + } +} +impl From for SyntaxNode { + fn from(n: MdBulletListItem) -> Self { + n.syntax + } +} +impl From for SyntaxElement { + fn from(n: MdBulletListItem) -> Self { + n.syntax.into() + } +} +impl AstNode for MdDocument { + type Language = Language; + const KIND_SET: SyntaxKindSet = + SyntaxKindSet::from_raw(RawSyntaxKind(MD_DOCUMENT as u16)); + fn can_cast(kind: SyntaxKind) -> bool { + kind == MD_DOCUMENT + } + fn cast(syntax: SyntaxNode) -> Option { + if Self::can_cast(syntax.kind()) { + Some(Self { syntax }) + } else { + None + } + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } + fn into_syntax(self) -> SyntaxNode { + self.syntax + } +} +impl std::fmt::Debug for MdDocument { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; + let current_depth = DEPTH.get(); + let result = if current_depth < 16 { + DEPTH.set(current_depth + 1); + f.debug_struct("MdDocument") + .field( + "bom_token", + &support::DebugOptionalElement(self.bom_token()), + ) + .field("value", &self.value()) + .field("eof_token", &support::DebugSyntaxResult(self.eof_token())) + .finish() + } else { + f.debug_struct("MdDocument").finish() + }; + DEPTH.set(current_depth); + result + } +} +impl From for SyntaxNode { + fn from(n: MdDocument) -> Self { + n.syntax + } +} +impl From for SyntaxElement { + fn from(n: MdDocument) -> Self { + n.syntax.into() + } +} +impl AstNode for MdEntityReference { + type Language = Language; + const KIND_SET: SyntaxKindSet = + SyntaxKindSet::from_raw(RawSyntaxKind(MD_ENTITY_REFERENCE as u16)); + fn can_cast(kind: SyntaxKind) -> bool { + kind == MD_ENTITY_REFERENCE + } + fn cast(syntax: SyntaxNode) -> Option { + if Self::can_cast(syntax.kind()) { + Some(Self { syntax }) + } else { + None + } + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } + fn into_syntax(self) -> SyntaxNode { + self.syntax + } +} +impl std::fmt::Debug for MdEntityReference { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; + let current_depth = DEPTH.get(); + let result = if current_depth < 16 { + DEPTH.set(current_depth + 1); + f.debug_struct("MdEntityReference") + .field( + "value_token", + &support::DebugSyntaxResult(self.value_token()), + ) + .finish() + } else { + f.debug_struct("MdEntityReference").finish() + }; + DEPTH.set(current_depth); + result + } +} +impl From for SyntaxNode { + fn from(n: MdEntityReference) -> Self { + n.syntax + } +} +impl From for SyntaxElement { + fn from(n: MdEntityReference) -> Self { + n.syntax.into() + } +} +impl AstNode for MdFencedCodeBlock { + type Language = Language; + const KIND_SET: SyntaxKindSet = + SyntaxKindSet::from_raw(RawSyntaxKind(MD_FENCED_CODE_BLOCK as u16)); + fn can_cast(kind: SyntaxKind) -> bool { + kind == MD_FENCED_CODE_BLOCK + } + fn cast(syntax: SyntaxNode) -> Option { + if Self::can_cast(syntax.kind()) { + Some(Self { syntax }) + } else { + None + } + } + fn syntax(&self) -> &SyntaxNode { + &self.syntax + } + fn into_syntax(self) -> SyntaxNode { + self.syntax + } +} +impl std::fmt::Debug for MdFencedCodeBlock { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; + let current_depth = DEPTH.get(); + let result = if current_depth < 16 { + DEPTH.set(current_depth + 1); + f.debug_struct("MdFencedCodeBlock") + .field("l_fence", &support::DebugSyntaxResult(self.l_fence())) + .field("code_list", &self.code_list()) + .field("content", &self.content()) + .field("r_fence", &support::DebugSyntaxResult(self.r_fence())) + .finish() + } else { + f.debug_struct("MdFencedCodeBlock").finish() + }; + DEPTH.set(current_depth); + result + } +} +impl From for SyntaxNode { + fn from(n: MdFencedCodeBlock) -> Self { + n.syntax + } +} +impl From for SyntaxElement { + fn from(n: MdFencedCodeBlock) -> Self { + n.syntax.into() } - pub fn as_md_inline_emphasis(&self) -> Option<&MdInlineEmphasis> { - match &self { - Self::MdInlineEmphasis(item) => Some(item), - _ => None, - } +} +impl AstNode for MdHardLine { + type Language = Language; + const KIND_SET: SyntaxKindSet = + SyntaxKindSet::from_raw(RawSyntaxKind(MD_HARD_LINE as u16)); + fn can_cast(kind: SyntaxKind) -> bool { + kind == MD_HARD_LINE } - pub fn as_md_inline_image(&self) -> Option<&MdInlineImage> { - match &self { - Self::MdInlineImage(item) => Some(item), - _ => None, + fn cast(syntax: SyntaxNode) -> Option { + if Self::can_cast(syntax.kind()) { + Some(Self { syntax }) + } else { + None } } - pub fn as_md_inline_italic(&self) -> Option<&MdInlineItalic> { - match &self { - Self::MdInlineItalic(item) => Some(item), - _ => None, - } + fn syntax(&self) -> &SyntaxNode { + &self.syntax } - pub fn as_md_inline_link(&self) -> Option<&MdInlineLink> { - match &self { - Self::MdInlineLink(item) => Some(item), - _ => None, - } + fn into_syntax(self) -> SyntaxNode { + self.syntax } - pub fn as_md_soft_break(&self) -> Option<&MdSoftBreak> { - match &self { - Self::MdSoftBreak(item) => Some(item), - _ => None, - } +} +impl std::fmt::Debug for MdHardLine { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; + let current_depth = DEPTH.get(); + let result = if current_depth < 16 { + DEPTH.set(current_depth + 1); + f.debug_struct("MdHardLine") + .field( + "value_token", + &support::DebugSyntaxResult(self.value_token()), + ) + .finish() + } else { + f.debug_struct("MdHardLine").finish() + }; + DEPTH.set(current_depth); + result } - pub fn as_md_textual(&self) -> Option<&MdTextual> { - match &self { - Self::MdTextual(item) => Some(item), - _ => None, - } +} +impl From for SyntaxNode { + fn from(n: MdHardLine) -> Self { + n.syntax } } -impl AstNode for MdBullet { +impl From for SyntaxElement { + fn from(n: MdHardLine) -> Self { + n.syntax.into() + } +} +impl AstNode for MdHash { type Language = Language; const KIND_SET: SyntaxKindSet = - SyntaxKindSet::from_raw(RawSyntaxKind(MD_BULLET as u16)); + SyntaxKindSet::from_raw(RawSyntaxKind(MD_HASH as u16)); fn can_cast(kind: SyntaxKind) -> bool { - kind == MD_BULLET + kind == MD_HASH } fn cast(syntax: SyntaxNode) -> Option { if Self::can_cast(syntax.kind()) { @@ -1357,43 +2058,38 @@ impl AstNode for MdBullet { self.syntax } } -impl std::fmt::Debug for MdBullet { +impl std::fmt::Debug for MdHash { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; let current_depth = DEPTH.get(); let result = if current_depth < 16 { DEPTH.set(current_depth + 1); - f.debug_struct("MdBullet") - .field("bullet", &support::DebugSyntaxResult(self.bullet())) - .field( - "space_token", - &support::DebugSyntaxResult(self.space_token()), - ) - .field("content", &self.content()) + f.debug_struct("MdHash") + .field("hash_token", &support::DebugSyntaxResult(self.hash_token())) .finish() } else { - f.debug_struct("MdBullet").finish() + f.debug_struct("MdHash").finish() }; DEPTH.set(current_depth); result } } -impl From for SyntaxNode { - fn from(n: MdBullet) -> Self { +impl From for SyntaxNode { + fn from(n: MdHash) -> Self { n.syntax } } -impl From for SyntaxElement { - fn from(n: MdBullet) -> Self { +impl From for SyntaxElement { + fn from(n: MdHash) -> Self { n.syntax.into() } } -impl AstNode for MdBulletListItem { +impl AstNode for MdHeader { type Language = Language; const KIND_SET: SyntaxKindSet = - SyntaxKindSet::from_raw(RawSyntaxKind(MD_BULLET_LIST_ITEM as u16)); + SyntaxKindSet::from_raw(RawSyntaxKind(MD_HEADER as u16)); fn can_cast(kind: SyntaxKind) -> bool { - kind == MD_BULLET_LIST_ITEM + kind == MD_HEADER } fn cast(syntax: SyntaxNode) -> Option { if Self::can_cast(syntax.kind()) { @@ -1409,38 +2105,40 @@ impl AstNode for MdBulletListItem { self.syntax } } -impl std::fmt::Debug for MdBulletListItem { +impl std::fmt::Debug for MdHeader { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; let current_depth = DEPTH.get(); let result = if current_depth < 16 { DEPTH.set(current_depth + 1); - f.debug_struct("MdBulletListItem") - .field("md_bullet_list", &self.md_bullet_list()) + f.debug_struct("MdHeader") + .field("before", &self.before()) + .field("content", &support::DebugOptionalElement(self.content())) + .field("after", &self.after()) .finish() } else { - f.debug_struct("MdBulletListItem").finish() + f.debug_struct("MdHeader").finish() }; DEPTH.set(current_depth); result } } -impl From for SyntaxNode { - fn from(n: MdBulletListItem) -> Self { +impl From for SyntaxNode { + fn from(n: MdHeader) -> Self { n.syntax } } -impl From for SyntaxElement { - fn from(n: MdBulletListItem) -> Self { +impl From for SyntaxElement { + fn from(n: MdHeader) -> Self { n.syntax.into() } } -impl AstNode for MdDocument { +impl AstNode for MdHtmlBlock { type Language = Language; const KIND_SET: SyntaxKindSet = - SyntaxKindSet::from_raw(RawSyntaxKind(MD_DOCUMENT as u16)); + SyntaxKindSet::from_raw(RawSyntaxKind(MD_HTML_BLOCK as u16)); fn can_cast(kind: SyntaxKind) -> bool { - kind == MD_DOCUMENT + kind == MD_HTML_BLOCK } fn cast(syntax: SyntaxNode) -> Option { if Self::can_cast(syntax.kind()) { @@ -1456,43 +2154,38 @@ impl AstNode for MdDocument { self.syntax } } -impl std::fmt::Debug for MdDocument { +impl std::fmt::Debug for MdHtmlBlock { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; let current_depth = DEPTH.get(); let result = if current_depth < 16 { DEPTH.set(current_depth + 1); - f.debug_struct("MdDocument") - .field( - "bom_token", - &support::DebugOptionalElement(self.bom_token()), - ) - .field("value", &self.value()) - .field("eof_token", &support::DebugSyntaxResult(self.eof_token())) + f.debug_struct("MdHtmlBlock") + .field("content", &self.content()) .finish() } else { - f.debug_struct("MdDocument").finish() + f.debug_struct("MdHtmlBlock").finish() }; DEPTH.set(current_depth); result } } -impl From for SyntaxNode { - fn from(n: MdDocument) -> Self { +impl From for SyntaxNode { + fn from(n: MdHtmlBlock) -> Self { n.syntax } } -impl From for SyntaxElement { - fn from(n: MdDocument) -> Self { +impl From for SyntaxElement { + fn from(n: MdHtmlBlock) -> Self { n.syntax.into() } } -impl AstNode for MdFencedCodeBlock { +impl AstNode for MdIndent { type Language = Language; const KIND_SET: SyntaxKindSet = - SyntaxKindSet::from_raw(RawSyntaxKind(MD_FENCED_CODE_BLOCK as u16)); + SyntaxKindSet::from_raw(RawSyntaxKind(MD_INDENT as u16)); fn can_cast(kind: SyntaxKind) -> bool { - kind == MD_FENCED_CODE_BLOCK + kind == MD_INDENT } fn cast(syntax: SyntaxNode) -> Option { if Self::can_cast(syntax.kind()) { @@ -1508,55 +2201,41 @@ impl AstNode for MdFencedCodeBlock { self.syntax } } -impl std::fmt::Debug for MdFencedCodeBlock { +impl std::fmt::Debug for MdIndent { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; let current_depth = DEPTH.get(); let result = if current_depth < 16 { DEPTH.set(current_depth + 1); - f.debug_struct("MdFencedCodeBlock") - .field( - "l_fence_token", - &support::DebugSyntaxResult(self.l_fence_token()), - ) - .field("code_list", &self.code_list()) - .field( - "l_hard_line", - &support::DebugSyntaxResult(self.l_hard_line()), - ) - .field("content", &support::DebugSyntaxResult(self.content())) - .field( - "r_hard_line", - &support::DebugSyntaxResult(self.r_hard_line()), - ) + f.debug_struct("MdIndent") .field( - "r_fence_token", - &support::DebugSyntaxResult(self.r_fence_token()), + "value_token", + &support::DebugSyntaxResult(self.value_token()), ) .finish() } else { - f.debug_struct("MdFencedCodeBlock").finish() + f.debug_struct("MdIndent").finish() }; DEPTH.set(current_depth); result } } -impl From for SyntaxNode { - fn from(n: MdFencedCodeBlock) -> Self { +impl From for SyntaxNode { + fn from(n: MdIndent) -> Self { n.syntax } } -impl From for SyntaxElement { - fn from(n: MdFencedCodeBlock) -> Self { +impl From for SyntaxElement { + fn from(n: MdIndent) -> Self { n.syntax.into() } } -impl AstNode for MdHardLine { +impl AstNode for MdIndentCodeBlock { type Language = Language; const KIND_SET: SyntaxKindSet = - SyntaxKindSet::from_raw(RawSyntaxKind(MD_HARD_LINE as u16)); + SyntaxKindSet::from_raw(RawSyntaxKind(MD_INDENT_CODE_BLOCK as u16)); fn can_cast(kind: SyntaxKind) -> bool { - kind == MD_HARD_LINE + kind == MD_INDENT_CODE_BLOCK } fn cast(syntax: SyntaxNode) -> Option { if Self::can_cast(syntax.kind()) { @@ -1572,41 +2251,38 @@ impl AstNode for MdHardLine { self.syntax } } -impl std::fmt::Debug for MdHardLine { +impl std::fmt::Debug for MdIndentCodeBlock { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; let current_depth = DEPTH.get(); let result = if current_depth < 16 { DEPTH.set(current_depth + 1); - f.debug_struct("MdHardLine") - .field( - "value_token", - &support::DebugSyntaxResult(self.value_token()), - ) + f.debug_struct("MdIndentCodeBlock") + .field("content", &self.content()) .finish() } else { - f.debug_struct("MdHardLine").finish() + f.debug_struct("MdIndentCodeBlock").finish() }; DEPTH.set(current_depth); result } } -impl From for SyntaxNode { - fn from(n: MdHardLine) -> Self { +impl From for SyntaxNode { + fn from(n: MdIndentCodeBlock) -> Self { n.syntax } } -impl From for SyntaxElement { - fn from(n: MdHardLine) -> Self { +impl From for SyntaxElement { + fn from(n: MdIndentCodeBlock) -> Self { n.syntax.into() } } -impl AstNode for MdHash { +impl AstNode for MdInlineCode { type Language = Language; const KIND_SET: SyntaxKindSet = - SyntaxKindSet::from_raw(RawSyntaxKind(MD_HASH as u16)); + SyntaxKindSet::from_raw(RawSyntaxKind(MD_INLINE_CODE as u16)); fn can_cast(kind: SyntaxKind) -> bool { - kind == MD_HASH + kind == MD_INLINE_CODE } fn cast(syntax: SyntaxNode) -> Option { if Self::can_cast(syntax.kind()) { @@ -1622,38 +2298,46 @@ impl AstNode for MdHash { self.syntax } } -impl std::fmt::Debug for MdHash { +impl std::fmt::Debug for MdInlineCode { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; let current_depth = DEPTH.get(); let result = if current_depth < 16 { DEPTH.set(current_depth + 1); - f.debug_struct("MdHash") - .field("hash_token", &support::DebugSyntaxResult(self.hash_token())) + f.debug_struct("MdInlineCode") + .field( + "l_tick_token", + &support::DebugSyntaxResult(self.l_tick_token()), + ) + .field("content", &self.content()) + .field( + "r_tick_token", + &support::DebugSyntaxResult(self.r_tick_token()), + ) .finish() } else { - f.debug_struct("MdHash").finish() + f.debug_struct("MdInlineCode").finish() }; DEPTH.set(current_depth); result } } -impl From for SyntaxNode { - fn from(n: MdHash) -> Self { +impl From for SyntaxNode { + fn from(n: MdInlineCode) -> Self { n.syntax } } -impl From for SyntaxElement { - fn from(n: MdHash) -> Self { +impl From for SyntaxElement { + fn from(n: MdInlineCode) -> Self { n.syntax.into() } } -impl AstNode for MdHeader { +impl AstNode for MdInlineEmphasis { type Language = Language; const KIND_SET: SyntaxKindSet = - SyntaxKindSet::from_raw(RawSyntaxKind(MD_HEADER as u16)); + SyntaxKindSet::from_raw(RawSyntaxKind(MD_INLINE_EMPHASIS as u16)); fn can_cast(kind: SyntaxKind) -> bool { - kind == MD_HEADER + kind == MD_INLINE_EMPHASIS } fn cast(syntax: SyntaxNode) -> Option { if Self::can_cast(syntax.kind()) { @@ -1669,40 +2353,40 @@ impl AstNode for MdHeader { self.syntax } } -impl std::fmt::Debug for MdHeader { +impl std::fmt::Debug for MdInlineEmphasis { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; let current_depth = DEPTH.get(); let result = if current_depth < 16 { DEPTH.set(current_depth + 1); - f.debug_struct("MdHeader") - .field("before", &self.before()) - .field("content", &support::DebugOptionalElement(self.content())) - .field("after", &self.after()) + f.debug_struct("MdInlineEmphasis") + .field("l_fence", &support::DebugSyntaxResult(self.l_fence())) + .field("content", &self.content()) + .field("r_fence", &support::DebugSyntaxResult(self.r_fence())) .finish() } else { - f.debug_struct("MdHeader").finish() + f.debug_struct("MdInlineEmphasis").finish() }; DEPTH.set(current_depth); result } } -impl From for SyntaxNode { - fn from(n: MdHeader) -> Self { +impl From for SyntaxNode { + fn from(n: MdInlineEmphasis) -> Self { n.syntax } } -impl From for SyntaxElement { - fn from(n: MdHeader) -> Self { +impl From for SyntaxElement { + fn from(n: MdInlineEmphasis) -> Self { n.syntax.into() } } -impl AstNode for MdHtmlBlock { +impl AstNode for MdInlineHtml { type Language = Language; const KIND_SET: SyntaxKindSet = - SyntaxKindSet::from_raw(RawSyntaxKind(MD_HTML_BLOCK as u16)); + SyntaxKindSet::from_raw(RawSyntaxKind(MD_INLINE_HTML as u16)); fn can_cast(kind: SyntaxKind) -> bool { - kind == MD_HTML_BLOCK + kind == MD_INLINE_HTML } fn cast(syntax: SyntaxNode) -> Option { if Self::can_cast(syntax.kind()) { @@ -1718,38 +2402,38 @@ impl AstNode for MdHtmlBlock { self.syntax } } -impl std::fmt::Debug for MdHtmlBlock { +impl std::fmt::Debug for MdInlineHtml { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; let current_depth = DEPTH.get(); let result = if current_depth < 16 { DEPTH.set(current_depth + 1); - f.debug_struct("MdHtmlBlock") - .field("md_textual", &support::DebugSyntaxResult(self.md_textual())) + f.debug_struct("MdInlineHtml") + .field("value", &self.value()) .finish() } else { - f.debug_struct("MdHtmlBlock").finish() + f.debug_struct("MdInlineHtml").finish() }; DEPTH.set(current_depth); result } } -impl From for SyntaxNode { - fn from(n: MdHtmlBlock) -> Self { +impl From for SyntaxNode { + fn from(n: MdInlineHtml) -> Self { n.syntax } } -impl From for SyntaxElement { - fn from(n: MdHtmlBlock) -> Self { +impl From for SyntaxElement { + fn from(n: MdInlineHtml) -> Self { n.syntax.into() } } -impl AstNode for MdIndent { +impl AstNode for MdInlineImage { type Language = Language; const KIND_SET: SyntaxKindSet = - SyntaxKindSet::from_raw(RawSyntaxKind(MD_INDENT as u16)); + SyntaxKindSet::from_raw(RawSyntaxKind(MD_INLINE_IMAGE as u16)); fn can_cast(kind: SyntaxKind) -> bool { - kind == MD_INDENT + kind == MD_INLINE_IMAGE } fn cast(syntax: SyntaxNode) -> Option { if Self::can_cast(syntax.kind()) { @@ -1765,41 +2449,57 @@ impl AstNode for MdIndent { self.syntax } } -impl std::fmt::Debug for MdIndent { +impl std::fmt::Debug for MdInlineImage { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; let current_depth = DEPTH.get(); let result = if current_depth < 16 { DEPTH.set(current_depth + 1); - f.debug_struct("MdIndent") + f.debug_struct("MdInlineImage") + .field("excl_token", &support::DebugSyntaxResult(self.excl_token())) .field( - "value_token", - &support::DebugSyntaxResult(self.value_token()), + "l_brack_token", + &support::DebugSyntaxResult(self.l_brack_token()), + ) + .field("alt", &self.alt()) + .field( + "r_brack_token", + &support::DebugSyntaxResult(self.r_brack_token()), + ) + .field( + "l_paren_token", + &support::DebugSyntaxResult(self.l_paren_token()), + ) + .field("destination", &self.destination()) + .field("title", &support::DebugOptionalElement(self.title())) + .field( + "r_paren_token", + &support::DebugSyntaxResult(self.r_paren_token()), ) .finish() } else { - f.debug_struct("MdIndent").finish() + f.debug_struct("MdInlineImage").finish() }; DEPTH.set(current_depth); result } } -impl From for SyntaxNode { - fn from(n: MdIndent) -> Self { +impl From for SyntaxNode { + fn from(n: MdInlineImage) -> Self { n.syntax } } -impl From for SyntaxElement { - fn from(n: MdIndent) -> Self { +impl From for SyntaxElement { + fn from(n: MdInlineImage) -> Self { n.syntax.into() } } -impl AstNode for MdIndentCodeBlock { +impl AstNode for MdInlineItalic { type Language = Language; const KIND_SET: SyntaxKindSet = - SyntaxKindSet::from_raw(RawSyntaxKind(MD_INDENT_CODE_BLOCK as u16)); + SyntaxKindSet::from_raw(RawSyntaxKind(MD_INLINE_ITALIC as u16)); fn can_cast(kind: SyntaxKind) -> bool { - kind == MD_INDENT_CODE_BLOCK + kind == MD_INLINE_ITALIC } fn cast(syntax: SyntaxNode) -> Option { if Self::can_cast(syntax.kind()) { @@ -1815,38 +2515,40 @@ impl AstNode for MdIndentCodeBlock { self.syntax } } -impl std::fmt::Debug for MdIndentCodeBlock { +impl std::fmt::Debug for MdInlineItalic { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; let current_depth = DEPTH.get(); let result = if current_depth < 16 { DEPTH.set(current_depth + 1); - f.debug_struct("MdIndentCodeBlock") - .field("lines", &self.lines()) + f.debug_struct("MdInlineItalic") + .field("l_fence", &support::DebugSyntaxResult(self.l_fence())) + .field("content", &self.content()) + .field("r_fence", &support::DebugSyntaxResult(self.r_fence())) .finish() } else { - f.debug_struct("MdIndentCodeBlock").finish() + f.debug_struct("MdInlineItalic").finish() }; DEPTH.set(current_depth); result } } -impl From for SyntaxNode { - fn from(n: MdIndentCodeBlock) -> Self { +impl From for SyntaxNode { + fn from(n: MdInlineItalic) -> Self { n.syntax } } -impl From for SyntaxElement { - fn from(n: MdIndentCodeBlock) -> Self { +impl From for SyntaxElement { + fn from(n: MdInlineItalic) -> Self { n.syntax.into() } } -impl AstNode for MdIndentedCodeLine { +impl AstNode for MdInlineLink { type Language = Language; const KIND_SET: SyntaxKindSet = - SyntaxKindSet::from_raw(RawSyntaxKind(MD_INDENTED_CODE_LINE as u16)); + SyntaxKindSet::from_raw(RawSyntaxKind(MD_INLINE_LINK as u16)); fn can_cast(kind: SyntaxKind) -> bool { - kind == MD_INDENTED_CODE_LINE + kind == MD_INLINE_LINK } fn cast(syntax: SyntaxNode) -> Option { if Self::can_cast(syntax.kind()) { @@ -1862,42 +2564,56 @@ impl AstNode for MdIndentedCodeLine { self.syntax } } -impl std::fmt::Debug for MdIndentedCodeLine { +impl std::fmt::Debug for MdInlineLink { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; let current_depth = DEPTH.get(); let result = if current_depth < 16 { DEPTH.set(current_depth + 1); - f.debug_struct("MdIndentedCodeLine") + f.debug_struct("MdInlineLink") + .field( + "l_brack_token", + &support::DebugSyntaxResult(self.l_brack_token()), + ) + .field("text", &self.text()) + .field( + "r_brack_token", + &support::DebugSyntaxResult(self.r_brack_token()), + ) + .field( + "l_paren_token", + &support::DebugSyntaxResult(self.l_paren_token()), + ) + .field("destination", &self.destination()) + .field("title", &support::DebugOptionalElement(self.title())) .field( - "indentation", - &support::DebugSyntaxResult(self.indentation()), + "r_paren_token", + &support::DebugSyntaxResult(self.r_paren_token()), ) - .field("content", &support::DebugSyntaxResult(self.content())) .finish() } else { - f.debug_struct("MdIndentedCodeLine").finish() + f.debug_struct("MdInlineLink").finish() }; DEPTH.set(current_depth); result } } -impl From for SyntaxNode { - fn from(n: MdIndentedCodeLine) -> Self { +impl From for SyntaxNode { + fn from(n: MdInlineLink) -> Self { n.syntax } } -impl From for SyntaxElement { - fn from(n: MdIndentedCodeLine) -> Self { +impl From for SyntaxElement { + fn from(n: MdInlineLink) -> Self { n.syntax.into() } } -impl AstNode for MdInlineCode { +impl AstNode for MdLinkBlock { type Language = Language; const KIND_SET: SyntaxKindSet = - SyntaxKindSet::from_raw(RawSyntaxKind(MD_INLINE_CODE as u16)); + SyntaxKindSet::from_raw(RawSyntaxKind(MD_LINK_BLOCK as u16)); fn can_cast(kind: SyntaxKind) -> bool { - kind == MD_INLINE_CODE + kind == MD_LINK_BLOCK } fn cast(syntax: SyntaxNode) -> Option { if Self::can_cast(syntax.kind()) { @@ -1913,46 +2629,40 @@ impl AstNode for MdInlineCode { self.syntax } } -impl std::fmt::Debug for MdInlineCode { +impl std::fmt::Debug for MdLinkBlock { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; let current_depth = DEPTH.get(); let result = if current_depth < 16 { DEPTH.set(current_depth + 1); - f.debug_struct("MdInlineCode") - .field( - "l_tick_token", - &support::DebugSyntaxResult(self.l_tick_token()), - ) - .field("content", &self.content()) - .field( - "r_tick_token", - &support::DebugSyntaxResult(self.r_tick_token()), - ) + f.debug_struct("MdLinkBlock") + .field("label", &support::DebugSyntaxResult(self.label())) + .field("url", &support::DebugSyntaxResult(self.url())) + .field("title", &support::DebugOptionalElement(self.title())) .finish() } else { - f.debug_struct("MdInlineCode").finish() + f.debug_struct("MdLinkBlock").finish() }; DEPTH.set(current_depth); result } } -impl From for SyntaxNode { - fn from(n: MdInlineCode) -> Self { +impl From for SyntaxNode { + fn from(n: MdLinkBlock) -> Self { n.syntax } } -impl From for SyntaxElement { - fn from(n: MdInlineCode) -> Self { +impl From for SyntaxElement { + fn from(n: MdLinkBlock) -> Self { n.syntax.into() } } -impl AstNode for MdInlineEmphasis { +impl AstNode for MdLinkDestination { type Language = Language; const KIND_SET: SyntaxKindSet = - SyntaxKindSet::from_raw(RawSyntaxKind(MD_INLINE_EMPHASIS as u16)); + SyntaxKindSet::from_raw(RawSyntaxKind(MD_LINK_DESTINATION as u16)); fn can_cast(kind: SyntaxKind) -> bool { - kind == MD_INLINE_EMPHASIS + kind == MD_LINK_DESTINATION } fn cast(syntax: SyntaxNode) -> Option { if Self::can_cast(syntax.kind()) { @@ -1968,40 +2678,38 @@ impl AstNode for MdInlineEmphasis { self.syntax } } -impl std::fmt::Debug for MdInlineEmphasis { +impl std::fmt::Debug for MdLinkDestination { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; let current_depth = DEPTH.get(); let result = if current_depth < 16 { DEPTH.set(current_depth + 1); - f.debug_struct("MdInlineEmphasis") - .field("l_fence", &support::DebugSyntaxResult(self.l_fence())) + f.debug_struct("MdLinkDestination") .field("content", &self.content()) - .field("r_fence", &support::DebugSyntaxResult(self.r_fence())) .finish() } else { - f.debug_struct("MdInlineEmphasis").finish() + f.debug_struct("MdLinkDestination").finish() }; DEPTH.set(current_depth); result } } -impl From for SyntaxNode { - fn from(n: MdInlineEmphasis) -> Self { +impl From for SyntaxNode { + fn from(n: MdLinkDestination) -> Self { n.syntax } } -impl From for SyntaxElement { - fn from(n: MdInlineEmphasis) -> Self { +impl From for SyntaxElement { + fn from(n: MdLinkDestination) -> Self { n.syntax.into() } } -impl AstNode for MdInlineImage { +impl AstNode for MdLinkLabel { type Language = Language; const KIND_SET: SyntaxKindSet = - SyntaxKindSet::from_raw(RawSyntaxKind(MD_INLINE_IMAGE as u16)); + SyntaxKindSet::from_raw(RawSyntaxKind(MD_LINK_LABEL as u16)); fn can_cast(kind: SyntaxKind) -> bool { - kind == MD_INLINE_IMAGE + kind == MD_LINK_LABEL } fn cast(syntax: SyntaxNode) -> Option { if Self::can_cast(syntax.kind()) { @@ -2017,49 +2725,38 @@ impl AstNode for MdInlineImage { self.syntax } } -impl std::fmt::Debug for MdInlineImage { +impl std::fmt::Debug for MdLinkLabel { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; let current_depth = DEPTH.get(); let result = if current_depth < 16 { DEPTH.set(current_depth + 1); - f.debug_struct("MdInlineImage") - .field( - "l_brack_token", - &support::DebugSyntaxResult(self.l_brack_token()), - ) - .field("excl_token", &support::DebugSyntaxResult(self.excl_token())) - .field("alt", &support::DebugSyntaxResult(self.alt())) - .field("source", &support::DebugSyntaxResult(self.source())) - .field( - "r_brack_token", - &support::DebugSyntaxResult(self.r_brack_token()), - ) - .field("link", &support::DebugOptionalElement(self.link())) + f.debug_struct("MdLinkLabel") + .field("content", &self.content()) .finish() } else { - f.debug_struct("MdInlineImage").finish() + f.debug_struct("MdLinkLabel").finish() }; DEPTH.set(current_depth); result } } -impl From for SyntaxNode { - fn from(n: MdInlineImage) -> Self { +impl From for SyntaxNode { + fn from(n: MdLinkLabel) -> Self { n.syntax } } -impl From for SyntaxElement { - fn from(n: MdInlineImage) -> Self { +impl From for SyntaxElement { + fn from(n: MdLinkLabel) -> Self { n.syntax.into() } } -impl AstNode for MdInlineImageAlt { +impl AstNode for MdLinkReferenceDefinition { type Language = Language; const KIND_SET: SyntaxKindSet = - SyntaxKindSet::from_raw(RawSyntaxKind(MD_INLINE_IMAGE_ALT as u16)); + SyntaxKindSet::from_raw(RawSyntaxKind(MD_LINK_REFERENCE_DEFINITION as u16)); fn can_cast(kind: SyntaxKind) -> bool { - kind == MD_INLINE_IMAGE_ALT + kind == MD_LINK_REFERENCE_DEFINITION } fn cast(syntax: SyntaxNode) -> Option { if Self::can_cast(syntax.kind()) { @@ -2075,46 +2772,55 @@ impl AstNode for MdInlineImageAlt { self.syntax } } -impl std::fmt::Debug for MdInlineImageAlt { +impl std::fmt::Debug for MdLinkReferenceDefinition { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; let current_depth = DEPTH.get(); let result = if current_depth < 16 { DEPTH.set(current_depth + 1); - f.debug_struct("MdInlineImageAlt") + f.debug_struct("MdLinkReferenceDefinition") .field( "l_brack_token", &support::DebugSyntaxResult(self.l_brack_token()), ) - .field("content", &self.content()) + .field("label", &support::DebugSyntaxResult(self.label())) .field( "r_brack_token", &support::DebugSyntaxResult(self.r_brack_token()), ) + .field( + "colon_token", + &support::DebugSyntaxResult(self.colon_token()), + ) + .field( + "destination", + &support::DebugSyntaxResult(self.destination()), + ) + .field("title", &support::DebugOptionalElement(self.title())) .finish() } else { - f.debug_struct("MdInlineImageAlt").finish() + f.debug_struct("MdLinkReferenceDefinition").finish() }; DEPTH.set(current_depth); result } } -impl From for SyntaxNode { - fn from(n: MdInlineImageAlt) -> Self { +impl From for SyntaxNode { + fn from(n: MdLinkReferenceDefinition) -> Self { n.syntax } } -impl From for SyntaxElement { - fn from(n: MdInlineImageAlt) -> Self { +impl From for SyntaxElement { + fn from(n: MdLinkReferenceDefinition) -> Self { n.syntax.into() } } -impl AstNode for MdInlineImageLink { +impl AstNode for MdLinkTitle { type Language = Language; const KIND_SET: SyntaxKindSet = - SyntaxKindSet::from_raw(RawSyntaxKind(MD_INLINE_IMAGE_LINK as u16)); + SyntaxKindSet::from_raw(RawSyntaxKind(MD_LINK_TITLE as u16)); fn can_cast(kind: SyntaxKind) -> bool { - kind == MD_INLINE_IMAGE_LINK + kind == MD_LINK_TITLE } fn cast(syntax: SyntaxNode) -> Option { if Self::can_cast(syntax.kind()) { @@ -2130,46 +2836,38 @@ impl AstNode for MdInlineImageLink { self.syntax } } -impl std::fmt::Debug for MdInlineImageLink { +impl std::fmt::Debug for MdLinkTitle { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; let current_depth = DEPTH.get(); let result = if current_depth < 16 { DEPTH.set(current_depth + 1); - f.debug_struct("MdInlineImageLink") - .field( - "l_paren_token", - &support::DebugSyntaxResult(self.l_paren_token()), - ) + f.debug_struct("MdLinkTitle") .field("content", &self.content()) - .field( - "r_paren_token", - &support::DebugSyntaxResult(self.r_paren_token()), - ) .finish() } else { - f.debug_struct("MdInlineImageLink").finish() + f.debug_struct("MdLinkTitle").finish() }; DEPTH.set(current_depth); result } } -impl From for SyntaxNode { - fn from(n: MdInlineImageLink) -> Self { +impl From for SyntaxNode { + fn from(n: MdLinkTitle) -> Self { n.syntax } } -impl From for SyntaxElement { - fn from(n: MdInlineImageLink) -> Self { +impl From for SyntaxElement { + fn from(n: MdLinkTitle) -> Self { n.syntax.into() } } -impl AstNode for MdInlineImageSource { +impl AstNode for MdNewline { type Language = Language; const KIND_SET: SyntaxKindSet = - SyntaxKindSet::from_raw(RawSyntaxKind(MD_INLINE_IMAGE_SOURCE as u16)); + SyntaxKindSet::from_raw(RawSyntaxKind(MD_NEWLINE as u16)); fn can_cast(kind: SyntaxKind) -> bool { - kind == MD_INLINE_IMAGE_SOURCE + kind == MD_NEWLINE } fn cast(syntax: SyntaxNode) -> Option { if Self::can_cast(syntax.kind()) { @@ -2185,46 +2883,41 @@ impl AstNode for MdInlineImageSource { self.syntax } } -impl std::fmt::Debug for MdInlineImageSource { +impl std::fmt::Debug for MdNewline { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; let current_depth = DEPTH.get(); let result = if current_depth < 16 { DEPTH.set(current_depth + 1); - f.debug_struct("MdInlineImageSource") - .field( - "l_paren_token", - &support::DebugSyntaxResult(self.l_paren_token()), - ) - .field("content", &self.content()) + f.debug_struct("MdNewline") .field( - "r_paren_token", - &support::DebugSyntaxResult(self.r_paren_token()), + "value_token", + &support::DebugSyntaxResult(self.value_token()), ) .finish() } else { - f.debug_struct("MdInlineImageSource").finish() + f.debug_struct("MdNewline").finish() }; DEPTH.set(current_depth); result } } -impl From for SyntaxNode { - fn from(n: MdInlineImageSource) -> Self { +impl From for SyntaxNode { + fn from(n: MdNewline) -> Self { n.syntax } } -impl From for SyntaxElement { - fn from(n: MdInlineImageSource) -> Self { +impl From for SyntaxElement { + fn from(n: MdNewline) -> Self { n.syntax.into() } } -impl AstNode for MdInlineItalic { +impl AstNode for MdOrderedListItem { type Language = Language; const KIND_SET: SyntaxKindSet = - SyntaxKindSet::from_raw(RawSyntaxKind(MD_INLINE_ITALIC as u16)); + SyntaxKindSet::from_raw(RawSyntaxKind(MD_ORDERED_LIST_ITEM as u16)); fn can_cast(kind: SyntaxKind) -> bool { - kind == MD_INLINE_ITALIC + kind == MD_ORDERED_LIST_ITEM } fn cast(syntax: SyntaxNode) -> Option { if Self::can_cast(syntax.kind()) { @@ -2240,40 +2933,38 @@ impl AstNode for MdInlineItalic { self.syntax } } -impl std::fmt::Debug for MdInlineItalic { +impl std::fmt::Debug for MdOrderedListItem { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; let current_depth = DEPTH.get(); let result = if current_depth < 16 { DEPTH.set(current_depth + 1); - f.debug_struct("MdInlineItalic") - .field("l_fence", &support::DebugSyntaxResult(self.l_fence())) - .field("content", &self.content()) - .field("r_fence", &support::DebugSyntaxResult(self.r_fence())) + f.debug_struct("MdOrderedListItem") + .field("md_bullet_list", &self.md_bullet_list()) .finish() } else { - f.debug_struct("MdInlineItalic").finish() + f.debug_struct("MdOrderedListItem").finish() }; DEPTH.set(current_depth); result } } -impl From for SyntaxNode { - fn from(n: MdInlineItalic) -> Self { +impl From for SyntaxNode { + fn from(n: MdOrderedListItem) -> Self { n.syntax } } -impl From for SyntaxElement { - fn from(n: MdInlineItalic) -> Self { +impl From for SyntaxElement { + fn from(n: MdOrderedListItem) -> Self { n.syntax.into() } } -impl AstNode for MdInlineLink { +impl AstNode for MdParagraph { type Language = Language; const KIND_SET: SyntaxKindSet = - SyntaxKindSet::from_raw(RawSyntaxKind(MD_INLINE_LINK as u16)); + SyntaxKindSet::from_raw(RawSyntaxKind(MD_PARAGRAPH as u16)); fn can_cast(kind: SyntaxKind) -> bool { - kind == MD_INLINE_LINK + kind == MD_PARAGRAPH } fn cast(syntax: SyntaxNode) -> Option { if Self::can_cast(syntax.kind()) { @@ -2289,55 +2980,42 @@ impl AstNode for MdInlineLink { self.syntax } } -impl std::fmt::Debug for MdInlineLink { +impl std::fmt::Debug for MdParagraph { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; let current_depth = DEPTH.get(); let result = if current_depth < 16 { DEPTH.set(current_depth + 1); - f.debug_struct("MdInlineLink") - .field( - "l_brack_token", - &support::DebugSyntaxResult(self.l_brack_token()), - ) - .field("text", &self.text()) - .field( - "r_brack_token", - &support::DebugSyntaxResult(self.r_brack_token()), - ) - .field( - "l_paren_token", - &support::DebugSyntaxResult(self.l_paren_token()), - ) - .field("source", &self.source()) + f.debug_struct("MdParagraph") + .field("list", &self.list()) .field( - "r_paren_token", - &support::DebugSyntaxResult(self.r_paren_token()), + "hard_line", + &support::DebugOptionalElement(self.hard_line()), ) .finish() } else { - f.debug_struct("MdInlineLink").finish() + f.debug_struct("MdParagraph").finish() }; DEPTH.set(current_depth); result } } -impl From for SyntaxNode { - fn from(n: MdInlineLink) -> Self { +impl From for SyntaxNode { + fn from(n: MdParagraph) -> Self { n.syntax } } -impl From for SyntaxElement { - fn from(n: MdInlineLink) -> Self { +impl From for SyntaxElement { + fn from(n: MdParagraph) -> Self { n.syntax.into() } } -impl AstNode for MdLinkBlock { +impl AstNode for MdQuote { type Language = Language; const KIND_SET: SyntaxKindSet = - SyntaxKindSet::from_raw(RawSyntaxKind(MD_LINK_BLOCK as u16)); + SyntaxKindSet::from_raw(RawSyntaxKind(MD_QUOTE as u16)); fn can_cast(kind: SyntaxKind) -> bool { - kind == MD_LINK_BLOCK + kind == MD_QUOTE } fn cast(syntax: SyntaxNode) -> Option { if Self::can_cast(syntax.kind()) { @@ -2353,40 +3031,42 @@ impl AstNode for MdLinkBlock { self.syntax } } -impl std::fmt::Debug for MdLinkBlock { +impl std::fmt::Debug for MdQuote { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; let current_depth = DEPTH.get(); let result = if current_depth < 16 { DEPTH.set(current_depth + 1); - f.debug_struct("MdLinkBlock") - .field("label", &support::DebugSyntaxResult(self.label())) - .field("url", &support::DebugSyntaxResult(self.url())) - .field("title", &support::DebugOptionalElement(self.title())) + f.debug_struct("MdQuote") + .field( + "marker_token", + &support::DebugSyntaxResult(self.marker_token()), + ) + .field("content", &self.content()) .finish() } else { - f.debug_struct("MdLinkBlock").finish() + f.debug_struct("MdQuote").finish() }; DEPTH.set(current_depth); result } } -impl From for SyntaxNode { - fn from(n: MdLinkBlock) -> Self { +impl From for SyntaxNode { + fn from(n: MdQuote) -> Self { n.syntax } } -impl From for SyntaxElement { - fn from(n: MdLinkBlock) -> Self { +impl From for SyntaxElement { + fn from(n: MdQuote) -> Self { n.syntax.into() } } -impl AstNode for MdOrderListItem { +impl AstNode for MdReferenceImage { type Language = Language; const KIND_SET: SyntaxKindSet = - SyntaxKindSet::from_raw(RawSyntaxKind(MD_ORDER_LIST_ITEM as u16)); + SyntaxKindSet::from_raw(RawSyntaxKind(MD_REFERENCE_IMAGE as u16)); fn can_cast(kind: SyntaxKind) -> bool { - kind == MD_ORDER_LIST_ITEM + kind == MD_REFERENCE_IMAGE } fn cast(syntax: SyntaxNode) -> Option { if Self::can_cast(syntax.kind()) { @@ -2402,38 +3082,48 @@ impl AstNode for MdOrderListItem { self.syntax } } -impl std::fmt::Debug for MdOrderListItem { +impl std::fmt::Debug for MdReferenceImage { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; let current_depth = DEPTH.get(); let result = if current_depth < 16 { DEPTH.set(current_depth + 1); - f.debug_struct("MdOrderListItem") - .field("md_bullet_list", &self.md_bullet_list()) + f.debug_struct("MdReferenceImage") + .field("excl_token", &support::DebugSyntaxResult(self.excl_token())) + .field( + "l_brack_token", + &support::DebugSyntaxResult(self.l_brack_token()), + ) + .field("alt", &self.alt()) + .field( + "r_brack_token", + &support::DebugSyntaxResult(self.r_brack_token()), + ) + .field("label", &support::DebugOptionalElement(self.label())) .finish() } else { - f.debug_struct("MdOrderListItem").finish() + f.debug_struct("MdReferenceImage").finish() }; DEPTH.set(current_depth); result } } -impl From for SyntaxNode { - fn from(n: MdOrderListItem) -> Self { +impl From for SyntaxNode { + fn from(n: MdReferenceImage) -> Self { n.syntax } } -impl From for SyntaxElement { - fn from(n: MdOrderListItem) -> Self { +impl From for SyntaxElement { + fn from(n: MdReferenceImage) -> Self { n.syntax.into() } } -impl AstNode for MdParagraph { +impl AstNode for MdReferenceLink { type Language = Language; const KIND_SET: SyntaxKindSet = - SyntaxKindSet::from_raw(RawSyntaxKind(MD_PARAGRAPH as u16)); + SyntaxKindSet::from_raw(RawSyntaxKind(MD_REFERENCE_LINK as u16)); fn can_cast(kind: SyntaxKind) -> bool { - kind == MD_PARAGRAPH + kind == MD_REFERENCE_LINK } fn cast(syntax: SyntaxNode) -> Option { if Self::can_cast(syntax.kind()) { @@ -2449,39 +3139,47 @@ impl AstNode for MdParagraph { self.syntax } } -impl std::fmt::Debug for MdParagraph { +impl std::fmt::Debug for MdReferenceLink { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; let current_depth = DEPTH.get(); let result = if current_depth < 16 { DEPTH.set(current_depth + 1); - f.debug_struct("MdParagraph") - .field("list", &self.list()) - .field("hard_line", &support::DebugSyntaxResult(self.hard_line())) + f.debug_struct("MdReferenceLink") + .field( + "l_brack_token", + &support::DebugSyntaxResult(self.l_brack_token()), + ) + .field("text", &self.text()) + .field( + "r_brack_token", + &support::DebugSyntaxResult(self.r_brack_token()), + ) + .field("label", &support::DebugOptionalElement(self.label())) .finish() } else { - f.debug_struct("MdParagraph").finish() + f.debug_struct("MdReferenceLink").finish() }; DEPTH.set(current_depth); result } } -impl From for SyntaxNode { - fn from(n: MdParagraph) -> Self { +impl From for SyntaxNode { + fn from(n: MdReferenceLink) -> Self { n.syntax } } -impl From for SyntaxElement { - fn from(n: MdParagraph) -> Self { +impl From for SyntaxElement { + fn from(n: MdReferenceLink) -> Self { n.syntax.into() } } -impl AstNode for MdQuote { +impl AstNode for MdReferenceLinkLabel { type Language = Language; const KIND_SET: SyntaxKindSet = - SyntaxKindSet::from_raw(RawSyntaxKind(MD_QUOTE as u16)); + SyntaxKindSet::from_raw(RawSyntaxKind(MD_REFERENCE_LINK_LABEL as u16)); fn can_cast(kind: SyntaxKind) -> bool { - kind == MD_QUOTE + kind == MD_REFERENCE_LINK_LABEL } fn cast(syntax: SyntaxNode) -> Option { if Self::can_cast(syntax.kind()) { @@ -2497,32 +3195,37 @@ impl AstNode for MdQuote { self.syntax } } -impl std::fmt::Debug for MdQuote { +impl std::fmt::Debug for MdReferenceLinkLabel { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { thread_local! { static DEPTH : std :: cell :: Cell < u8 > = const { std :: cell :: Cell :: new (0) } }; let current_depth = DEPTH.get(); let result = if current_depth < 16 { DEPTH.set(current_depth + 1); - f.debug_struct("MdQuote") + f.debug_struct("MdReferenceLinkLabel") + .field( + "l_brack_token", + &support::DebugSyntaxResult(self.l_brack_token()), + ) + .field("label", &self.label()) .field( - "any_md_block", - &support::DebugSyntaxResult(self.any_md_block()), + "r_brack_token", + &support::DebugSyntaxResult(self.r_brack_token()), ) .finish() } else { - f.debug_struct("MdQuote").finish() + f.debug_struct("MdReferenceLinkLabel").finish() }; DEPTH.set(current_depth); result } } -impl From for SyntaxNode { - fn from(n: MdQuote) -> Self { +impl From for SyntaxNode { + fn from(n: MdReferenceLinkLabel) -> Self { n.syntax } } -impl From for SyntaxElement { - fn from(n: MdQuote) -> Self { +impl From for SyntaxElement { + fn from(n: MdReferenceLinkLabel) -> Self { n.syntax.into() } } @@ -2554,9 +3257,10 @@ impl std::fmt::Debug for MdSetextHeader { let result = if current_depth < 16 { DEPTH.set(current_depth + 1); f.debug_struct("MdSetextHeader") + .field("content", &self.content()) .field( - "md_paragraph", - &support::DebugSyntaxResult(self.md_paragraph()), + "underline_token", + &support::DebugSyntaxResult(self.underline_token()), ) .finish() } else { @@ -2791,9 +3495,9 @@ impl From for AnyContainerBlock { Self::MdBulletListItem(node) } } -impl From for AnyContainerBlock { - fn from(node: MdOrderListItem) -> Self { - Self::MdOrderListItem(node) +impl From for AnyContainerBlock { + fn from(node: MdOrderedListItem) -> Self { + Self::MdOrderedListItem(node) } } impl From for AnyContainerBlock { @@ -2804,15 +3508,15 @@ impl From for AnyContainerBlock { impl AstNode for AnyContainerBlock { type Language = Language; const KIND_SET: SyntaxKindSet = MdBulletListItem::KIND_SET - .union(MdOrderListItem::KIND_SET) + .union(MdOrderedListItem::KIND_SET) .union(MdQuote::KIND_SET); fn can_cast(kind: SyntaxKind) -> bool { - matches!(kind, MD_BULLET_LIST_ITEM | MD_ORDER_LIST_ITEM | MD_QUOTE) + matches!(kind, MD_BULLET_LIST_ITEM | MD_ORDERED_LIST_ITEM | MD_QUOTE) } fn cast(syntax: SyntaxNode) -> Option { let res = match syntax.kind() { MD_BULLET_LIST_ITEM => Self::MdBulletListItem(MdBulletListItem { syntax }), - MD_ORDER_LIST_ITEM => Self::MdOrderListItem(MdOrderListItem { syntax }), + MD_ORDERED_LIST_ITEM => Self::MdOrderedListItem(MdOrderedListItem { syntax }), MD_QUOTE => Self::MdQuote(MdQuote { syntax }), _ => return None, }; @@ -2821,14 +3525,14 @@ impl AstNode for AnyContainerBlock { fn syntax(&self) -> &SyntaxNode { match self { Self::MdBulletListItem(it) => it.syntax(), - Self::MdOrderListItem(it) => it.syntax(), + Self::MdOrderedListItem(it) => it.syntax(), Self::MdQuote(it) => it.syntax(), } } fn into_syntax(self) -> SyntaxNode { match self { Self::MdBulletListItem(it) => it.into_syntax(), - Self::MdOrderListItem(it) => it.into_syntax(), + Self::MdOrderedListItem(it) => it.into_syntax(), Self::MdQuote(it) => it.into_syntax(), } } @@ -2837,7 +3541,7 @@ impl std::fmt::Debug for AnyContainerBlock { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::MdBulletListItem(it) => std::fmt::Debug::fmt(it, f), - Self::MdOrderListItem(it) => std::fmt::Debug::fmt(it, f), + Self::MdOrderedListItem(it) => std::fmt::Debug::fmt(it, f), Self::MdQuote(it) => std::fmt::Debug::fmt(it, f), } } @@ -2846,7 +3550,7 @@ impl From for SyntaxNode { fn from(n: AnyContainerBlock) -> Self { match n { AnyContainerBlock::MdBulletListItem(it) => it.into_syntax(), - AnyContainerBlock::MdOrderListItem(it) => it.into_syntax(), + AnyContainerBlock::MdOrderedListItem(it) => it.into_syntax(), AnyContainerBlock::MdQuote(it) => it.into_syntax(), } } @@ -2872,6 +3576,16 @@ impl From for AnyLeafBlock { Self::MdLinkBlock(node) } } +impl From for AnyLeafBlock { + fn from(node: MdLinkReferenceDefinition) -> Self { + Self::MdLinkReferenceDefinition(node) + } +} +impl From for AnyLeafBlock { + fn from(node: MdNewline) -> Self { + Self::MdNewline(node) + } +} impl From for AnyLeafBlock { fn from(node: MdParagraph) -> Self { Self::MdParagraph(node) @@ -2893,6 +3607,8 @@ impl AstNode for AnyLeafBlock { .union(MdHeader::KIND_SET) .union(MdHtmlBlock::KIND_SET) .union(MdLinkBlock::KIND_SET) + .union(MdLinkReferenceDefinition::KIND_SET) + .union(MdNewline::KIND_SET) .union(MdParagraph::KIND_SET) .union(MdSetextHeader::KIND_SET) .union(MdThematicBreakBlock::KIND_SET); @@ -2901,6 +3617,8 @@ impl AstNode for AnyLeafBlock { MD_HEADER | MD_HTML_BLOCK | MD_LINK_BLOCK + | MD_LINK_REFERENCE_DEFINITION + | MD_NEWLINE | MD_PARAGRAPH | MD_SETEXT_HEADER | MD_THEMATIC_BREAK_BLOCK => true, @@ -2913,6 +3631,10 @@ impl AstNode for AnyLeafBlock { MD_HEADER => Self::MdHeader(MdHeader { syntax }), MD_HTML_BLOCK => Self::MdHtmlBlock(MdHtmlBlock { syntax }), MD_LINK_BLOCK => Self::MdLinkBlock(MdLinkBlock { syntax }), + MD_LINK_REFERENCE_DEFINITION => { + Self::MdLinkReferenceDefinition(MdLinkReferenceDefinition { syntax }) + } + MD_NEWLINE => Self::MdNewline(MdNewline { syntax }), MD_PARAGRAPH => Self::MdParagraph(MdParagraph { syntax }), MD_SETEXT_HEADER => Self::MdSetextHeader(MdSetextHeader { syntax }), MD_THEMATIC_BREAK_BLOCK => Self::MdThematicBreakBlock(MdThematicBreakBlock { syntax }), @@ -2930,6 +3652,8 @@ impl AstNode for AnyLeafBlock { Self::MdHeader(it) => it.syntax(), Self::MdHtmlBlock(it) => it.syntax(), Self::MdLinkBlock(it) => it.syntax(), + Self::MdLinkReferenceDefinition(it) => it.syntax(), + Self::MdNewline(it) => it.syntax(), Self::MdParagraph(it) => it.syntax(), Self::MdSetextHeader(it) => it.syntax(), Self::MdThematicBreakBlock(it) => it.syntax(), @@ -2941,6 +3665,8 @@ impl AstNode for AnyLeafBlock { Self::MdHeader(it) => it.into_syntax(), Self::MdHtmlBlock(it) => it.into_syntax(), Self::MdLinkBlock(it) => it.into_syntax(), + Self::MdLinkReferenceDefinition(it) => it.into_syntax(), + Self::MdNewline(it) => it.into_syntax(), Self::MdParagraph(it) => it.into_syntax(), Self::MdSetextHeader(it) => it.into_syntax(), Self::MdThematicBreakBlock(it) => it.into_syntax(), @@ -2955,6 +3681,8 @@ impl std::fmt::Debug for AnyLeafBlock { Self::MdHeader(it) => std::fmt::Debug::fmt(it, f), Self::MdHtmlBlock(it) => std::fmt::Debug::fmt(it, f), Self::MdLinkBlock(it) => std::fmt::Debug::fmt(it, f), + Self::MdLinkReferenceDefinition(it) => std::fmt::Debug::fmt(it, f), + Self::MdNewline(it) => std::fmt::Debug::fmt(it, f), Self::MdParagraph(it) => std::fmt::Debug::fmt(it, f), Self::MdSetextHeader(it) => std::fmt::Debug::fmt(it, f), Self::MdThematicBreakBlock(it) => std::fmt::Debug::fmt(it, f), @@ -2968,6 +3696,8 @@ impl From for SyntaxNode { AnyLeafBlock::MdHeader(it) => it.into_syntax(), AnyLeafBlock::MdHtmlBlock(it) => it.into_syntax(), AnyLeafBlock::MdLinkBlock(it) => it.into_syntax(), + AnyLeafBlock::MdLinkReferenceDefinition(it) => it.into_syntax(), + AnyLeafBlock::MdNewline(it) => it.into_syntax(), AnyLeafBlock::MdParagraph(it) => it.into_syntax(), AnyLeafBlock::MdSetextHeader(it) => it.into_syntax(), AnyLeafBlock::MdThematicBreakBlock(it) => it.into_syntax(), @@ -3038,6 +3768,16 @@ impl From for SyntaxElement { node.into() } } +impl From for AnyMdInline { + fn from(node: MdAutolink) -> Self { + Self::MdAutolink(node) + } +} +impl From for AnyMdInline { + fn from(node: MdEntityReference) -> Self { + Self::MdEntityReference(node) + } +} impl From for AnyMdInline { fn from(node: MdHardLine) -> Self { Self::MdHardLine(node) @@ -3058,6 +3798,11 @@ impl From for AnyMdInline { Self::MdInlineEmphasis(node) } } +impl From for AnyMdInline { + fn from(node: MdInlineHtml) -> Self { + Self::MdInlineHtml(node) + } +} impl From for AnyMdInline { fn from(node: MdInlineImage) -> Self { Self::MdInlineImage(node) @@ -3073,6 +3818,16 @@ impl From for AnyMdInline { Self::MdInlineLink(node) } } +impl From for AnyMdInline { + fn from(node: MdReferenceImage) -> Self { + Self::MdReferenceImage(node) + } +} +impl From for AnyMdInline { + fn from(node: MdReferenceLink) -> Self { + Self::MdReferenceLink(node) + } +} impl From for AnyMdInline { fn from(node: MdSoftBreak) -> Self { Self::MdSoftBreak(node) @@ -3085,38 +3840,53 @@ impl From for AnyMdInline { } impl AstNode for AnyMdInline { type Language = Language; - const KIND_SET: SyntaxKindSet = MdHardLine::KIND_SET + const KIND_SET: SyntaxKindSet = MdAutolink::KIND_SET + .union(MdEntityReference::KIND_SET) + .union(MdHardLine::KIND_SET) .union(MdHtmlBlock::KIND_SET) .union(MdInlineCode::KIND_SET) .union(MdInlineEmphasis::KIND_SET) + .union(MdInlineHtml::KIND_SET) .union(MdInlineImage::KIND_SET) .union(MdInlineItalic::KIND_SET) .union(MdInlineLink::KIND_SET) + .union(MdReferenceImage::KIND_SET) + .union(MdReferenceLink::KIND_SET) .union(MdSoftBreak::KIND_SET) .union(MdTextual::KIND_SET); fn can_cast(kind: SyntaxKind) -> bool { matches!( kind, - MD_HARD_LINE + MD_AUTOLINK + | MD_ENTITY_REFERENCE + | MD_HARD_LINE | MD_HTML_BLOCK | MD_INLINE_CODE | MD_INLINE_EMPHASIS + | MD_INLINE_HTML | MD_INLINE_IMAGE | MD_INLINE_ITALIC | MD_INLINE_LINK + | MD_REFERENCE_IMAGE + | MD_REFERENCE_LINK | MD_SOFT_BREAK | MD_TEXTUAL ) } fn cast(syntax: SyntaxNode) -> Option { let res = match syntax.kind() { + MD_AUTOLINK => Self::MdAutolink(MdAutolink { syntax }), + MD_ENTITY_REFERENCE => Self::MdEntityReference(MdEntityReference { syntax }), MD_HARD_LINE => Self::MdHardLine(MdHardLine { syntax }), MD_HTML_BLOCK => Self::MdHtmlBlock(MdHtmlBlock { syntax }), MD_INLINE_CODE => Self::MdInlineCode(MdInlineCode { syntax }), MD_INLINE_EMPHASIS => Self::MdInlineEmphasis(MdInlineEmphasis { syntax }), + MD_INLINE_HTML => Self::MdInlineHtml(MdInlineHtml { syntax }), MD_INLINE_IMAGE => Self::MdInlineImage(MdInlineImage { syntax }), MD_INLINE_ITALIC => Self::MdInlineItalic(MdInlineItalic { syntax }), MD_INLINE_LINK => Self::MdInlineLink(MdInlineLink { syntax }), + MD_REFERENCE_IMAGE => Self::MdReferenceImage(MdReferenceImage { syntax }), + MD_REFERENCE_LINK => Self::MdReferenceLink(MdReferenceLink { syntax }), MD_SOFT_BREAK => Self::MdSoftBreak(MdSoftBreak { syntax }), MD_TEXTUAL => Self::MdTextual(MdTextual { syntax }), _ => return None, @@ -3125,26 +3895,36 @@ impl AstNode for AnyMdInline { } fn syntax(&self) -> &SyntaxNode { match self { + Self::MdAutolink(it) => it.syntax(), + Self::MdEntityReference(it) => it.syntax(), Self::MdHardLine(it) => it.syntax(), Self::MdHtmlBlock(it) => it.syntax(), Self::MdInlineCode(it) => it.syntax(), Self::MdInlineEmphasis(it) => it.syntax(), + Self::MdInlineHtml(it) => it.syntax(), Self::MdInlineImage(it) => it.syntax(), Self::MdInlineItalic(it) => it.syntax(), Self::MdInlineLink(it) => it.syntax(), + Self::MdReferenceImage(it) => it.syntax(), + Self::MdReferenceLink(it) => it.syntax(), Self::MdSoftBreak(it) => it.syntax(), Self::MdTextual(it) => it.syntax(), } } fn into_syntax(self) -> SyntaxNode { match self { + Self::MdAutolink(it) => it.into_syntax(), + Self::MdEntityReference(it) => it.into_syntax(), Self::MdHardLine(it) => it.into_syntax(), Self::MdHtmlBlock(it) => it.into_syntax(), Self::MdInlineCode(it) => it.into_syntax(), Self::MdInlineEmphasis(it) => it.into_syntax(), + Self::MdInlineHtml(it) => it.into_syntax(), Self::MdInlineImage(it) => it.into_syntax(), Self::MdInlineItalic(it) => it.into_syntax(), Self::MdInlineLink(it) => it.into_syntax(), + Self::MdReferenceImage(it) => it.into_syntax(), + Self::MdReferenceLink(it) => it.into_syntax(), Self::MdSoftBreak(it) => it.into_syntax(), Self::MdTextual(it) => it.into_syntax(), } @@ -3153,13 +3933,18 @@ impl AstNode for AnyMdInline { impl std::fmt::Debug for AnyMdInline { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { + Self::MdAutolink(it) => std::fmt::Debug::fmt(it, f), + Self::MdEntityReference(it) => std::fmt::Debug::fmt(it, f), Self::MdHardLine(it) => std::fmt::Debug::fmt(it, f), Self::MdHtmlBlock(it) => std::fmt::Debug::fmt(it, f), Self::MdInlineCode(it) => std::fmt::Debug::fmt(it, f), Self::MdInlineEmphasis(it) => std::fmt::Debug::fmt(it, f), + Self::MdInlineHtml(it) => std::fmt::Debug::fmt(it, f), Self::MdInlineImage(it) => std::fmt::Debug::fmt(it, f), Self::MdInlineItalic(it) => std::fmt::Debug::fmt(it, f), Self::MdInlineLink(it) => std::fmt::Debug::fmt(it, f), + Self::MdReferenceImage(it) => std::fmt::Debug::fmt(it, f), + Self::MdReferenceLink(it) => std::fmt::Debug::fmt(it, f), Self::MdSoftBreak(it) => std::fmt::Debug::fmt(it, f), Self::MdTextual(it) => std::fmt::Debug::fmt(it, f), } @@ -3168,13 +3953,18 @@ impl std::fmt::Debug for AnyMdInline { impl From for SyntaxNode { fn from(n: AnyMdInline) -> Self { match n { + AnyMdInline::MdAutolink(it) => it.into_syntax(), + AnyMdInline::MdEntityReference(it) => it.into_syntax(), AnyMdInline::MdHardLine(it) => it.into_syntax(), AnyMdInline::MdHtmlBlock(it) => it.into_syntax(), AnyMdInline::MdInlineCode(it) => it.into_syntax(), AnyMdInline::MdInlineEmphasis(it) => it.into_syntax(), + AnyMdInline::MdInlineHtml(it) => it.into_syntax(), AnyMdInline::MdInlineImage(it) => it.into_syntax(), AnyMdInline::MdInlineItalic(it) => it.into_syntax(), AnyMdInline::MdInlineLink(it) => it.into_syntax(), + AnyMdInline::MdReferenceImage(it) => it.into_syntax(), + AnyMdInline::MdReferenceLink(it) => it.into_syntax(), AnyMdInline::MdSoftBreak(it) => it.into_syntax(), AnyMdInline::MdTextual(it) => it.into_syntax(), } @@ -3211,6 +4001,11 @@ impl std::fmt::Display for AnyMdInline { std::fmt::Display::fmt(self.syntax(), f) } } +impl std::fmt::Display for MdAutolink { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(self.syntax(), f) + } +} impl std::fmt::Display for MdBullet { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Display::fmt(self.syntax(), f) @@ -3226,6 +4021,11 @@ impl std::fmt::Display for MdDocument { std::fmt::Display::fmt(self.syntax(), f) } } +impl std::fmt::Display for MdEntityReference { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(self.syntax(), f) + } +} impl std::fmt::Display for MdFencedCodeBlock { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Display::fmt(self.syntax(), f) @@ -3261,17 +4061,17 @@ impl std::fmt::Display for MdIndentCodeBlock { std::fmt::Display::fmt(self.syntax(), f) } } -impl std::fmt::Display for MdIndentedCodeLine { +impl std::fmt::Display for MdInlineCode { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Display::fmt(self.syntax(), f) } } -impl std::fmt::Display for MdInlineCode { +impl std::fmt::Display for MdInlineEmphasis { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Display::fmt(self.syntax(), f) } } -impl std::fmt::Display for MdInlineEmphasis { +impl std::fmt::Display for MdInlineHtml { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Display::fmt(self.syntax(), f) } @@ -3281,37 +4081,47 @@ impl std::fmt::Display for MdInlineImage { std::fmt::Display::fmt(self.syntax(), f) } } -impl std::fmt::Display for MdInlineImageAlt { +impl std::fmt::Display for MdInlineItalic { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(self.syntax(), f) + } +} +impl std::fmt::Display for MdInlineLink { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Display::fmt(self.syntax(), f) } } -impl std::fmt::Display for MdInlineImageLink { +impl std::fmt::Display for MdLinkBlock { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Display::fmt(self.syntax(), f) } } -impl std::fmt::Display for MdInlineImageSource { +impl std::fmt::Display for MdLinkDestination { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Display::fmt(self.syntax(), f) } } -impl std::fmt::Display for MdInlineItalic { +impl std::fmt::Display for MdLinkLabel { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Display::fmt(self.syntax(), f) } } -impl std::fmt::Display for MdInlineLink { +impl std::fmt::Display for MdLinkReferenceDefinition { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Display::fmt(self.syntax(), f) } } -impl std::fmt::Display for MdLinkBlock { +impl std::fmt::Display for MdLinkTitle { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(self.syntax(), f) + } +} +impl std::fmt::Display for MdNewline { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Display::fmt(self.syntax(), f) } } -impl std::fmt::Display for MdOrderListItem { +impl std::fmt::Display for MdOrderedListItem { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Display::fmt(self.syntax(), f) } @@ -3326,6 +4136,21 @@ impl std::fmt::Display for MdQuote { std::fmt::Display::fmt(self.syntax(), f) } } +impl std::fmt::Display for MdReferenceImage { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(self.syntax(), f) + } +} +impl std::fmt::Display for MdReferenceLink { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(self.syntax(), f) + } +} +impl std::fmt::Display for MdReferenceLinkLabel { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(self.syntax(), f) + } +} impl std::fmt::Display for MdSetextHeader { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::Display::fmt(self.syntax(), f) @@ -3619,7 +4444,7 @@ impl Serialize for MdCodeNameList { seq.end() } } -impl AstSeparatedList for MdCodeNameList { +impl AstNodeList for MdCodeNameList { type Language = Language; type Node = MdTextual; fn syntax_list(&self) -> &SyntaxList { @@ -3632,19 +4457,19 @@ impl AstSeparatedList for MdCodeNameList { impl Debug for MdCodeNameList { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { f.write_str("MdCodeNameList ")?; - f.debug_list().entries(self.elements()).finish() + f.debug_list().entries(self.iter()).finish() } } -impl IntoIterator for MdCodeNameList { - type Item = SyntaxResult; - type IntoIter = AstSeparatedListNodesIterator; +impl IntoIterator for &MdCodeNameList { + type Item = MdTextual; + type IntoIter = AstNodeListIterator; fn into_iter(self) -> Self::IntoIter { self.iter() } } -impl IntoIterator for &MdCodeNameList { - type Item = SyntaxResult; - type IntoIter = AstSeparatedListNodesIterator; +impl IntoIterator for MdCodeNameList { + type Item = MdTextual; + type IntoIter = AstNodeListIterator; fn into_iter(self) -> Self::IntoIter { self.iter() } @@ -3732,88 +4557,6 @@ impl IntoIterator for MdHashList { } } #[derive(Clone, Eq, PartialEq, Hash)] -pub struct MdIndentedCodeLineList { - syntax_list: SyntaxList, -} -impl MdIndentedCodeLineList { - #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] - #[doc = r""] - #[doc = r" # Safety"] - #[doc = r" This function must be guarded with a call to [AstNode::can_cast]"] - #[doc = r" or a match on [SyntaxNode::kind]"] - #[inline] - pub unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { - Self { - syntax_list: syntax.into_list(), - } - } -} -impl AstNode for MdIndentedCodeLineList { - type Language = Language; - const KIND_SET: SyntaxKindSet = - SyntaxKindSet::from_raw(RawSyntaxKind(MD_INDENTED_CODE_LINE_LIST as u16)); - fn can_cast(kind: SyntaxKind) -> bool { - kind == MD_INDENTED_CODE_LINE_LIST - } - fn cast(syntax: SyntaxNode) -> Option { - if Self::can_cast(syntax.kind()) { - Some(Self { - syntax_list: syntax.into_list(), - }) - } else { - None - } - } - fn syntax(&self) -> &SyntaxNode { - self.syntax_list.node() - } - fn into_syntax(self) -> SyntaxNode { - self.syntax_list.into_node() - } -} -impl Serialize for MdIndentedCodeLineList { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - let mut seq = serializer.serialize_seq(Some(self.len()))?; - for e in self.iter() { - seq.serialize_element(&e)?; - } - seq.end() - } -} -impl AstNodeList for MdIndentedCodeLineList { - type Language = Language; - type Node = MdIndentedCodeLine; - fn syntax_list(&self) -> &SyntaxList { - &self.syntax_list - } - fn into_syntax_list(self) -> SyntaxList { - self.syntax_list - } -} -impl Debug for MdIndentedCodeLineList { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.write_str("MdIndentedCodeLineList ")?; - f.debug_list().entries(self.iter()).finish() - } -} -impl IntoIterator for &MdIndentedCodeLineList { - type Item = MdIndentedCodeLine; - type IntoIter = AstNodeListIterator; - fn into_iter(self) -> Self::IntoIter { - self.iter() - } -} -impl IntoIterator for MdIndentedCodeLineList { - type Item = MdIndentedCodeLine; - type IntoIter = AstNodeListIterator; - fn into_iter(self) -> Self::IntoIter { - self.iter() - } -} -#[derive(Clone, Eq, PartialEq, Hash)] pub struct MdInlineItemList { syntax_list: SyntaxList, } @@ -3895,88 +4638,6 @@ impl IntoIterator for MdInlineItemList { self.iter() } } -#[derive(Clone, Eq, PartialEq, Hash)] -pub struct MdOrderList { - syntax_list: SyntaxList, -} -impl MdOrderList { - #[doc = r" Create an AstNode from a SyntaxNode without checking its kind"] - #[doc = r""] - #[doc = r" # Safety"] - #[doc = r" This function must be guarded with a call to [AstNode::can_cast]"] - #[doc = r" or a match on [SyntaxNode::kind]"] - #[inline] - pub unsafe fn new_unchecked(syntax: SyntaxNode) -> Self { - Self { - syntax_list: syntax.into_list(), - } - } -} -impl AstNode for MdOrderList { - type Language = Language; - const KIND_SET: SyntaxKindSet = - SyntaxKindSet::from_raw(RawSyntaxKind(MD_ORDER_LIST as u16)); - fn can_cast(kind: SyntaxKind) -> bool { - kind == MD_ORDER_LIST - } - fn cast(syntax: SyntaxNode) -> Option { - if Self::can_cast(syntax.kind()) { - Some(Self { - syntax_list: syntax.into_list(), - }) - } else { - None - } - } - fn syntax(&self) -> &SyntaxNode { - self.syntax_list.node() - } - fn into_syntax(self) -> SyntaxNode { - self.syntax_list.into_node() - } -} -impl Serialize for MdOrderList { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - let mut seq = serializer.serialize_seq(Some(self.len()))?; - for e in self.iter() { - seq.serialize_element(&e)?; - } - seq.end() - } -} -impl AstNodeList for MdOrderList { - type Language = Language; - type Node = AnyCodeBlock; - fn syntax_list(&self) -> &SyntaxList { - &self.syntax_list - } - fn into_syntax_list(self) -> SyntaxList { - self.syntax_list - } -} -impl Debug for MdOrderList { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.write_str("MdOrderList ")?; - f.debug_list().entries(self.iter()).finish() - } -} -impl IntoIterator for &MdOrderList { - type Item = AnyCodeBlock; - type IntoIter = AstNodeListIterator; - fn into_iter(self) -> Self::IntoIter { - self.iter() - } -} -impl IntoIterator for MdOrderList { - type Item = AnyCodeBlock; - type IntoIter = AstNodeListIterator; - fn into_iter(self) -> Self::IntoIter { - self.iter() - } -} #[derive(Clone)] pub struct DebugSyntaxElementChildren(pub SyntaxElementChildren); impl Debug for DebugSyntaxElementChildren { diff --git a/crates/biome_markdown_syntax/src/generated/nodes_mut.rs b/crates/biome_markdown_syntax/src/generated/nodes_mut.rs index 74939ef00364..d44555c5be76 100644 --- a/crates/biome_markdown_syntax/src/generated/nodes_mut.rs +++ b/crates/biome_markdown_syntax/src/generated/nodes_mut.rs @@ -3,23 +3,37 @@ use crate::{MarkdownSyntaxToken as SyntaxToken, generated::nodes::*}; use biome_rowan::AstNode; use std::iter::once; -impl MdBullet { - pub fn with_bullet_token(self, element: SyntaxToken) -> Self { +impl MdAutolink { + pub fn with_l_angle_token(self, element: SyntaxToken) -> Self { Self::unwrap_cast( self.syntax .splice_slots(0usize..=0usize, once(Some(element.into()))), ) } - pub fn with_space_token(self, element: SyntaxToken) -> Self { + pub fn with_value(self, element: MdInlineItemList) -> Self { Self::unwrap_cast( self.syntax - .splice_slots(1usize..=1usize, once(Some(element.into()))), + .splice_slots(1usize..=1usize, once(Some(element.into_syntax().into()))), ) } - pub fn with_content(self, element: MdInlineItemList) -> Self { + pub fn with_r_angle_token(self, element: SyntaxToken) -> Self { Self::unwrap_cast( self.syntax - .splice_slots(2usize..=2usize, once(Some(element.into_syntax().into()))), + .splice_slots(2usize..=2usize, once(Some(element.into()))), + ) + } +} +impl MdBullet { + pub fn with_bullet_token(self, element: SyntaxToken) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(0usize..=0usize, once(Some(element.into()))), + ) + } + pub fn with_content(self, element: MdBlockList) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(1usize..=1usize, once(Some(element.into_syntax().into()))), ) } } @@ -51,6 +65,14 @@ impl MdDocument { ) } } +impl MdEntityReference { + pub fn with_value_token(self, element: SyntaxToken) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(0usize..=0usize, once(Some(element.into()))), + ) + } +} impl MdFencedCodeBlock { pub fn with_l_fence_token(self, element: SyntaxToken) -> Self { Self::unwrap_cast( @@ -64,28 +86,16 @@ impl MdFencedCodeBlock { .splice_slots(1usize..=1usize, once(Some(element.into_syntax().into()))), ) } - pub fn with_l_hard_line(self, element: MdHardLine) -> Self { + pub fn with_content(self, element: MdInlineItemList) -> Self { Self::unwrap_cast( self.syntax .splice_slots(2usize..=2usize, once(Some(element.into_syntax().into()))), ) } - pub fn with_content(self, element: MdTextual) -> Self { - Self::unwrap_cast( - self.syntax - .splice_slots(3usize..=3usize, once(Some(element.into_syntax().into()))), - ) - } - pub fn with_r_hard_line(self, element: MdHardLine) -> Self { - Self::unwrap_cast( - self.syntax - .splice_slots(4usize..=4usize, once(Some(element.into_syntax().into()))), - ) - } pub fn with_r_fence_token(self, element: SyntaxToken) -> Self { Self::unwrap_cast( self.syntax - .splice_slots(5usize..=5usize, once(Some(element.into()))), + .splice_slots(3usize..=3usize, once(Some(element.into()))), ) } } @@ -126,7 +136,7 @@ impl MdHeader { } } impl MdHtmlBlock { - pub fn with_md_textual(self, element: MdTextual) -> Self { + pub fn with_content(self, element: MdInlineItemList) -> Self { Self::unwrap_cast( self.syntax .splice_slots(0usize..=0usize, once(Some(element.into_syntax().into()))), @@ -142,26 +152,12 @@ impl MdIndent { } } impl MdIndentCodeBlock { - pub fn with_lines(self, element: MdIndentedCodeLineList) -> Self { - Self::unwrap_cast( - self.syntax - .splice_slots(0usize..=0usize, once(Some(element.into_syntax().into()))), - ) - } -} -impl MdIndentedCodeLine { - pub fn with_indentation(self, element: MdIndent) -> Self { + pub fn with_content(self, element: MdInlineItemList) -> Self { Self::unwrap_cast( self.syntax .splice_slots(0usize..=0usize, once(Some(element.into_syntax().into()))), ) } - pub fn with_content(self, element: MdTextual) -> Self { - Self::unwrap_cast( - self.syntax - .splice_slots(1usize..=1usize, once(Some(element.into_syntax().into()))), - ) - } } impl MdInlineCode { pub fn with_l_tick_token(self, element: SyntaxToken) -> Self { @@ -203,46 +199,66 @@ impl MdInlineEmphasis { ) } } +impl MdInlineHtml { + pub fn with_value(self, element: MdInlineItemList) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(0usize..=0usize, once(Some(element.into_syntax().into()))), + ) + } +} impl MdInlineImage { - pub fn with_l_brack_token(self, element: SyntaxToken) -> Self { + pub fn with_excl_token(self, element: SyntaxToken) -> Self { Self::unwrap_cast( self.syntax .splice_slots(0usize..=0usize, once(Some(element.into()))), ) } - pub fn with_excl_token(self, element: SyntaxToken) -> Self { + pub fn with_l_brack_token(self, element: SyntaxToken) -> Self { Self::unwrap_cast( self.syntax .splice_slots(1usize..=1usize, once(Some(element.into()))), ) } - pub fn with_alt(self, element: MdInlineImageAlt) -> Self { + pub fn with_alt(self, element: MdInlineItemList) -> Self { Self::unwrap_cast( self.syntax .splice_slots(2usize..=2usize, once(Some(element.into_syntax().into()))), ) } - pub fn with_source(self, element: MdInlineImageSource) -> Self { + pub fn with_r_brack_token(self, element: SyntaxToken) -> Self { Self::unwrap_cast( self.syntax - .splice_slots(3usize..=3usize, once(Some(element.into_syntax().into()))), + .splice_slots(3usize..=3usize, once(Some(element.into()))), ) } - pub fn with_r_brack_token(self, element: SyntaxToken) -> Self { + pub fn with_l_paren_token(self, element: SyntaxToken) -> Self { Self::unwrap_cast( self.syntax .splice_slots(4usize..=4usize, once(Some(element.into()))), ) } - pub fn with_link(self, element: Option) -> Self { + pub fn with_destination(self, element: MdInlineItemList) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(5usize..=5usize, once(Some(element.into_syntax().into()))), + ) + } + pub fn with_title(self, element: Option) -> Self { Self::unwrap_cast(self.syntax.splice_slots( - 5usize..=5usize, + 6usize..=6usize, once(element.map(|element| element.into_syntax().into())), )) } + pub fn with_r_paren_token(self, element: SyntaxToken) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(7usize..=7usize, once(Some(element.into()))), + ) + } } -impl MdInlineImageAlt { - pub fn with_l_brack_token(self, element: SyntaxToken) -> Self { +impl MdInlineItalic { + pub fn with_l_fence_token(self, element: SyntaxToken) -> Self { Self::unwrap_cast( self.syntax .splice_slots(0usize..=0usize, once(Some(element.into()))), @@ -254,81 +270,101 @@ impl MdInlineImageAlt { .splice_slots(1usize..=1usize, once(Some(element.into_syntax().into()))), ) } - pub fn with_r_brack_token(self, element: SyntaxToken) -> Self { + pub fn with_r_fence_token(self, element: SyntaxToken) -> Self { Self::unwrap_cast( self.syntax .splice_slots(2usize..=2usize, once(Some(element.into()))), ) } } -impl MdInlineImageLink { - pub fn with_l_paren_token(self, element: SyntaxToken) -> Self { +impl MdInlineLink { + pub fn with_l_brack_token(self, element: SyntaxToken) -> Self { Self::unwrap_cast( self.syntax .splice_slots(0usize..=0usize, once(Some(element.into()))), ) } - pub fn with_content(self, element: MdInlineItemList) -> Self { + pub fn with_text(self, element: MdInlineItemList) -> Self { Self::unwrap_cast( self.syntax .splice_slots(1usize..=1usize, once(Some(element.into_syntax().into()))), ) } - pub fn with_r_paren_token(self, element: SyntaxToken) -> Self { + pub fn with_r_brack_token(self, element: SyntaxToken) -> Self { Self::unwrap_cast( self.syntax .splice_slots(2usize..=2usize, once(Some(element.into()))), ) } -} -impl MdInlineImageSource { pub fn with_l_paren_token(self, element: SyntaxToken) -> Self { Self::unwrap_cast( self.syntax - .splice_slots(0usize..=0usize, once(Some(element.into()))), + .splice_slots(3usize..=3usize, once(Some(element.into()))), ) } - pub fn with_content(self, element: MdInlineItemList) -> Self { + pub fn with_destination(self, element: MdInlineItemList) -> Self { Self::unwrap_cast( self.syntax - .splice_slots(1usize..=1usize, once(Some(element.into_syntax().into()))), + .splice_slots(4usize..=4usize, once(Some(element.into_syntax().into()))), ) } + pub fn with_title(self, element: Option) -> Self { + Self::unwrap_cast(self.syntax.splice_slots( + 5usize..=5usize, + once(element.map(|element| element.into_syntax().into())), + )) + } pub fn with_r_paren_token(self, element: SyntaxToken) -> Self { Self::unwrap_cast( self.syntax - .splice_slots(2usize..=2usize, once(Some(element.into()))), + .splice_slots(6usize..=6usize, once(Some(element.into()))), ) } } -impl MdInlineItalic { - pub fn with_l_fence_token(self, element: SyntaxToken) -> Self { +impl MdLinkBlock { + pub fn with_label(self, element: MdTextual) -> Self { Self::unwrap_cast( self.syntax - .splice_slots(0usize..=0usize, once(Some(element.into()))), + .splice_slots(0usize..=0usize, once(Some(element.into_syntax().into()))), ) } - pub fn with_content(self, element: MdInlineItemList) -> Self { + pub fn with_url(self, element: MdTextual) -> Self { Self::unwrap_cast( self.syntax .splice_slots(1usize..=1usize, once(Some(element.into_syntax().into()))), ) } - pub fn with_r_fence_token(self, element: SyntaxToken) -> Self { + pub fn with_title(self, element: Option) -> Self { + Self::unwrap_cast(self.syntax.splice_slots( + 2usize..=2usize, + once(element.map(|element| element.into_syntax().into())), + )) + } +} +impl MdLinkDestination { + pub fn with_content(self, element: MdInlineItemList) -> Self { Self::unwrap_cast( self.syntax - .splice_slots(2usize..=2usize, once(Some(element.into()))), + .splice_slots(0usize..=0usize, once(Some(element.into_syntax().into()))), ) } } -impl MdInlineLink { +impl MdLinkLabel { + pub fn with_content(self, element: MdInlineItemList) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(0usize..=0usize, once(Some(element.into_syntax().into()))), + ) + } +} +impl MdLinkReferenceDefinition { pub fn with_l_brack_token(self, element: SyntaxToken) -> Self { Self::unwrap_cast( self.syntax .splice_slots(0usize..=0usize, once(Some(element.into()))), ) } - pub fn with_text(self, element: MdInlineItemList) -> Self { + pub fn with_label(self, element: MdLinkLabel) -> Self { Self::unwrap_cast( self.syntax .splice_slots(1usize..=1usize, once(Some(element.into_syntax().into()))), @@ -340,82 +376,168 @@ impl MdInlineLink { .splice_slots(2usize..=2usize, once(Some(element.into()))), ) } - pub fn with_l_paren_token(self, element: SyntaxToken) -> Self { + pub fn with_colon_token(self, element: SyntaxToken) -> Self { Self::unwrap_cast( self.syntax .splice_slots(3usize..=3usize, once(Some(element.into()))), ) } - pub fn with_source(self, element: MdInlineItemList) -> Self { + pub fn with_destination(self, element: MdLinkDestination) -> Self { Self::unwrap_cast( self.syntax .splice_slots(4usize..=4usize, once(Some(element.into_syntax().into()))), ) } - pub fn with_r_paren_token(self, element: SyntaxToken) -> Self { + pub fn with_title(self, element: Option) -> Self { + Self::unwrap_cast(self.syntax.splice_slots( + 5usize..=5usize, + once(element.map(|element| element.into_syntax().into())), + )) + } +} +impl MdLinkTitle { + pub fn with_content(self, element: MdInlineItemList) -> Self { Self::unwrap_cast( self.syntax - .splice_slots(5usize..=5usize, once(Some(element.into()))), + .splice_slots(0usize..=0usize, once(Some(element.into_syntax().into()))), ) } } -impl MdLinkBlock { - pub fn with_label(self, element: MdTextual) -> Self { +impl MdNewline { + pub fn with_value_token(self, element: SyntaxToken) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(0usize..=0usize, once(Some(element.into()))), + ) + } +} +impl MdOrderedListItem { + pub fn with_md_bullet_list(self, element: MdBulletList) -> Self { Self::unwrap_cast( self.syntax .splice_slots(0usize..=0usize, once(Some(element.into_syntax().into()))), ) } - pub fn with_url(self, element: MdTextual) -> Self { +} +impl MdParagraph { + pub fn with_list(self, element: MdInlineItemList) -> Self { Self::unwrap_cast( self.syntax - .splice_slots(1usize..=1usize, once(Some(element.into_syntax().into()))), + .splice_slots(0usize..=0usize, once(Some(element.into_syntax().into()))), ) } - pub fn with_title(self, element: Option) -> Self { + pub fn with_hard_line(self, element: Option) -> Self { Self::unwrap_cast(self.syntax.splice_slots( - 2usize..=2usize, + 1usize..=1usize, once(element.map(|element| element.into_syntax().into())), )) } } -impl MdOrderListItem { - pub fn with_md_bullet_list(self, element: MdBulletList) -> Self { +impl MdQuote { + pub fn with_marker_token(self, element: SyntaxToken) -> Self { Self::unwrap_cast( self.syntax - .splice_slots(0usize..=0usize, once(Some(element.into_syntax().into()))), + .splice_slots(0usize..=0usize, once(Some(element.into()))), + ) + } + pub fn with_content(self, element: MdBlockList) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(1usize..=1usize, once(Some(element.into_syntax().into()))), ) } } -impl MdParagraph { - pub fn with_list(self, element: MdInlineItemList) -> Self { +impl MdReferenceImage { + pub fn with_excl_token(self, element: SyntaxToken) -> Self { Self::unwrap_cast( self.syntax - .splice_slots(0usize..=0usize, once(Some(element.into_syntax().into()))), + .splice_slots(0usize..=0usize, once(Some(element.into()))), ) } - pub fn with_hard_line(self, element: MdHardLine) -> Self { + pub fn with_l_brack_token(self, element: SyntaxToken) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(1usize..=1usize, once(Some(element.into()))), + ) + } + pub fn with_alt(self, element: MdInlineItemList) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(2usize..=2usize, once(Some(element.into_syntax().into()))), + ) + } + pub fn with_r_brack_token(self, element: SyntaxToken) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(3usize..=3usize, once(Some(element.into()))), + ) + } + pub fn with_label(self, element: Option) -> Self { + Self::unwrap_cast(self.syntax.splice_slots( + 4usize..=4usize, + once(element.map(|element| element.into_syntax().into())), + )) + } +} +impl MdReferenceLink { + pub fn with_l_brack_token(self, element: SyntaxToken) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(0usize..=0usize, once(Some(element.into()))), + ) + } + pub fn with_text(self, element: MdInlineItemList) -> Self { Self::unwrap_cast( self.syntax .splice_slots(1usize..=1usize, once(Some(element.into_syntax().into()))), ) } + pub fn with_r_brack_token(self, element: SyntaxToken) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(2usize..=2usize, once(Some(element.into()))), + ) + } + pub fn with_label(self, element: Option) -> Self { + Self::unwrap_cast(self.syntax.splice_slots( + 3usize..=3usize, + once(element.map(|element| element.into_syntax().into())), + )) + } } -impl MdQuote { - pub fn with_any_md_block(self, element: AnyMdBlock) -> Self { +impl MdReferenceLinkLabel { + pub fn with_l_brack_token(self, element: SyntaxToken) -> Self { Self::unwrap_cast( self.syntax - .splice_slots(0usize..=0usize, once(Some(element.into_syntax().into()))), + .splice_slots(0usize..=0usize, once(Some(element.into()))), + ) + } + pub fn with_label(self, element: MdInlineItemList) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(1usize..=1usize, once(Some(element.into_syntax().into()))), + ) + } + pub fn with_r_brack_token(self, element: SyntaxToken) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(2usize..=2usize, once(Some(element.into()))), ) } } impl MdSetextHeader { - pub fn with_md_paragraph(self, element: MdParagraph) -> Self { + pub fn with_content(self, element: MdInlineItemList) -> Self { Self::unwrap_cast( self.syntax .splice_slots(0usize..=0usize, once(Some(element.into_syntax().into()))), ) } + pub fn with_underline_token(self, element: SyntaxToken) -> Self { + Self::unwrap_cast( + self.syntax + .splice_slots(1usize..=1usize, once(Some(element.into()))), + ) + } } impl MdSoftBreak { pub fn with_value_token(self, element: SyntaxToken) -> Self { diff --git a/crates/biome_markdown_syntax/src/lib.rs b/crates/biome_markdown_syntax/src/lib.rs index de2f97a4c428..35a36c728abd 100644 --- a/crates/biome_markdown_syntax/src/lib.rs +++ b/crates/biome_markdown_syntax/src/lib.rs @@ -45,7 +45,9 @@ impl SyntaxKind for MarkdownSyntaxKind { } fn is_trivia(self) -> bool { - matches!(self, Self::NEWLINE | Self::WHITESPACE | Self::TAB) + // Markdown is markup: whitespace is syntactic, and NEWLINE is explicit. + // We intentionally avoid trivia for whitespace so it becomes part of text. + false } fn to_string(&self) -> Option<&'static str> { @@ -56,16 +58,7 @@ impl SyntaxKind for MarkdownSyntaxKind { impl TryFrom for TriviaPieceKind { type Error = (); - fn try_from(value: MarkdownSyntaxKind) -> Result { - if value.is_trivia() { - match value { - MarkdownSyntaxKind::NEWLINE => Ok(Self::Newline), - MarkdownSyntaxKind::WHITESPACE => Ok(Self::Whitespace), - MarkdownSyntaxKind::TAB => Ok(Self::Skipped), - _ => unreachable!("Not Trivia"), - } - } else { - Err(()) - } + fn try_from(_value: MarkdownSyntaxKind) -> Result { + Err(()) } } diff --git a/crates/biome_parser/src/lexer.rs b/crates/biome_parser/src/lexer.rs index 23b4a1b36326..0e4ed39f6368 100644 --- a/crates/biome_parser/src/lexer.rs +++ b/crates/biome_parser/src/lexer.rs @@ -499,6 +499,10 @@ impl<'l, Lex: Lexer<'l>> BufferedLexer { } } + pub fn lexer_mut(&mut self) -> &mut Lex { + &mut self.inner + } + /// Returns the kind of the next token and any associated diagnostic. /// /// [See `Lexer.next_token`](Lexer::next_token) @@ -587,8 +591,10 @@ impl<'l, Lex: Lexer<'l>> BufferedLexer { fn reset_lookahead(&mut self) { if let Some(current) = self.current.take() { self.inner.rewind(current); - self.lookahead.clear(); + } else if let Some(first) = self.lookahead.get_checkpoint(0).cloned() { + self.inner.rewind(first); } + self.lookahead.clear(); } /// Returns an iterator over the tokens following the current token to perform lookahead. @@ -609,27 +615,60 @@ where { /// Re-lex the current token in the given context pub fn re_lex(&mut self, context: Lex::ReLexContext) -> Lex::Kind { - let current_kind = self.current(); - let current_checkpoint = self.inner.checkpoint(); - if let Some(current) = self.current.take() { self.inner.rewind(current); + } else if let Some(first) = self.lookahead.get_checkpoint(0).cloned() { + self.inner.rewind(first); } let new_kind = self.inner.re_lex(context); - - if new_kind != current_kind { - // The token has changed, clear the lookahead - self.lookahead.clear(); - } else if !self.lookahead.is_empty() { - // It's still the same kind. So let's move the lexer back to the position it was before re-lexing - // and keep the lookahead as is. - self.current = Some(self.inner.checkpoint()); - self.inner.rewind(current_checkpoint); - } + self.current = Some(self.inner.checkpoint()); + self.lookahead.clear(); new_kind } + + /// Force re-lex the current token in a new lex context, clearing all lookahead. + /// + /// Use this after lookahead operations when you need to switch lexing context + /// and ensure cached tokens from the previous context don't leak through. + /// + /// This method: + /// 1. Rewinds to the current token's START position + /// 2. Clears all lookahead cache + /// 3. Re-lexes the current token fresh in the new context + pub fn force_relex_in_context(&mut self, context: Lex::LexContext) -> Lex::Kind { + let checkpoint = if let Some(current) = self.current.clone() { + current + } else if let Some(first) = self.lookahead.get_checkpoint(0).cloned() { + first + } else { + self.inner.checkpoint() + }; + + // Rewind to the START of the current token (not the end). + // Use neutral values for kind/flags since they're immediately + // overwritten by next_token and shouldn't leak old context state. + let rewind_checkpoint = LexerCheckpoint { + position: checkpoint.current_start, + current_start: checkpoint.current_start, + current_kind: Lex::Kind::EOF, + current_flags: TokenFlags::empty(), + after_line_break: checkpoint.after_line_break, + unicode_bom_length: checkpoint.unicode_bom_length, + diagnostics_pos: checkpoint.diagnostics_pos, + }; + + self.inner.rewind(rewind_checkpoint); + self.current = None; + self.lookahead.clear(); + + // Lex the token fresh in the new context + let kind = self.inner.next_token(context); + self.current = Some(self.inner.checkpoint()); + + kind + } } impl<'l, Lex> BufferedLexer diff --git a/xtask/codegen/markdown.ungram b/xtask/codegen/markdown.ungram index b514b5c793a6..7adbbe29f005 100644 --- a/xtask/codegen/markdown.ungram +++ b/xtask/codegen/markdown.ungram @@ -1,6 +1,6 @@ // Markdown Un-Grammar. // -// This grammar specifies the structure of Rust's concrete syntax tree. +// This grammar specifies the structure of Markdown's concrete syntax tree. // It does not specify parsing rules (ambiguities, precedence, etc are out of scope). // Tokens are processed -- contextual keywords are recognised, compound operators glued. // @@ -57,13 +57,15 @@ AnyLeafBlock = | MdSetextHeader | AnyCodeBlock | MdHtmlBlock + | MdLinkReferenceDefinition | MdLinkBlock | MdParagraph + | MdNewline AnyContainerBlock = MdQuote | MdBulletListItem - | MdOrderListItem + | MdOrderedListItem @@ -84,7 +86,11 @@ MdHash = '#' // === // bar // --- -MdSetextHeader = MdParagraph +// The underline indicates heading level: '=' for H1, '-' for H2 +// The underline is stored as a setext underline literal token +MdSetextHeader = + content: MdInlineItemList + underline: 'md_setext_underline_literal' // indented code blocks & fenced code blocks AnyCodeBlock = @@ -94,58 +100,83 @@ AnyCodeBlock = // code // ^^^^^^^^ // The space before "code" is intentional. +// Indentation is tracked in trivia, so we use MdInlineItemList for content. MdIndentCodeBlock = - lines: MdIndentedCodeLineList - -MdIndentedCodeLineList = MdIndentedCodeLine* - -MdIndentedCodeLine = - indentation: MdIndent - content: MdTextual + content: MdInlineItemList // ```shell // // ``` +// or +// ~~~shell +// +// ~~~ MdFencedCodeBlock = - l_fence: '```' + l_fence: ('```' | '~~~') code_list: MdCodeNameList - l_hard_line: MdHardLine - content: MdTextual - r_hard_line: MdHardLine - r_fence: '```' + content: MdInlineItemList + r_fence: ('```' | '~~~') -MdCodeNameList = (MdTextual (',' MdTextual)*) +MdCodeNameList = MdTextual* -// html block -MdHtmlBlock = MdTextual +// html block - content is stored as raw text (multiple textual tokens) +MdHtmlBlock = content: MdInlineItemList +// Link reference definition per CommonMark §4.7 +// [label]: destination "title" or [label]: destination 'title' or [label]: destination (title) +// Labels are case-insensitive and whitespace-normalized +MdLinkReferenceDefinition = + '[' + label: MdLinkLabel + ']' + ':' + destination: MdLinkDestination + title: MdLinkTitle? + +// Label for link reference definitions (CommonMark §4.7) +// Up to 999 chars, no unescaped brackets, whitespace-normalized +// Labels can contain multiple tokens (e.g., "foo-bar" is: foo, -, bar) +MdLinkLabel = content: MdInlineItemList + +// Destination URL for link reference definitions +// Can be angle-bracketed or bare url (no whitespace) +// Destinations can contain multiple tokens +MdLinkDestination = content: MdInlineItemList + +// Optional title for link reference definitions +// Quoted with ", ', or () +// Titles can contain multiple tokens +MdLinkTitle = content: MdInlineItemList + +// Legacy MdLinkBlock retained for backwards compatibility MdLinkBlock = label: MdTextual url: MdTextual title: MdTextual? -MdQuote = AnyMdBlock +MdQuote = + marker: '>' + content: AnyMdBlock MdBulletListItem = MdBulletList -MdOrderListItem = MdBulletList +MdOrderedListItem = MdBulletList MdBulletList = MdBullet* // - Hey! // ^^^^^^ +// 1. Hey! +// ^^^^^^^ MdBullet = - bullet: ('-' | '*') - space: 'md_textual_literal' - content: MdInlineItemList - -MdOrderList = AnyCodeBlock* + bullet: ('-' | '*' | '+' | 'md_ordered_list_marker') + content: MdBlockList // Any block paragraph // // Another block paragraph MdParagraph = list: MdInlineItemList - hard_line: MdHardLine + hard_line: MdHardLine? MdInlineItemList = AnyMdInline* @@ -156,6 +187,11 @@ AnyMdInline = | MdInlineItalic | MdInlineLink | MdInlineImage + | MdReferenceLink + | MdReferenceImage + | MdAutolink + | MdInlineHtml + | MdEntityReference | MdHtmlBlock | MdHardLine | MdSoftBreak @@ -191,39 +227,68 @@ MdInlineLink = text: MdInlineItemList ']' '(' - source: MdInlineItemList + destination: MdInlineItemList + title: MdLinkTitle? ')' -// [![alt](image)](link) -// ^^^^^^^^^^^^^^^^^^^^^ +// ![alt](image) +// ^^^^^^^^^^^^^^ MdInlineImage = + '!' + '[' + alt: MdInlineItemList + ']' + '(' + destination: MdInlineItemList + title: MdLinkTitle? + ')' + +// Reference-style link per CommonMark §6.5 +// Full reference: [text][label] +// Collapsed reference: [text][] +// Shortcut reference: [text] +MdReferenceLink = '[' + text: MdInlineItemList + ']' + label: MdReferenceLinkLabel? + +// Reference-style image per CommonMark §6.6 +// Full reference: ![alt][label] +// Collapsed reference: ![alt][] +// Shortcut reference: ![alt] +MdReferenceImage = '!' - alt: MdInlineImageAlt - source: MdInlineImageSource + '[' + alt: MdInlineItemList ']' - link: MdInlineImageLink? + label: MdReferenceLinkLabel? -// [![alt](image)](link) -// ^^^^^ -MdInlineImageAlt = +// Label part of a reference link/image +// Either [label] (full) or [] (collapsed) +// Absent for shortcut references +MdReferenceLinkLabel = '[' - content: MdInlineItemList + label: MdInlineItemList ']' -// [![alt](image)](link) -// ^^^^^^^ -MdInlineImageSource = - '(' - content: MdInlineItemList - ')' +// or +// Autolinks per CommonMark §6.4 (URI) and §6.5 (email) +MdAutolink = + '<' + value: MdInlineItemList + '>' -// [![alt](image)](link) -// ^^^^^^ -MdInlineImageLink = - '(' - content: MdInlineItemList - ')' +// Raw inline HTML per CommonMark §6.8 +// Includes: open tags, closing tags, comments, processing instructions, +// declarations, and CDATA sections +MdInlineHtml = value: MdInlineItemList + +// Entity and numeric character references per CommonMark §6.2 +// Named entities: &name; (2-31 alphanumeric chars starting with letter) +// Decimal numeric: &#digits; (1-7 decimal digits) +// Hexadecimal: &#xhex; or &#Xhex; (1-6 hex digits) +MdEntityReference = value: 'md_entity_literal' // *** // --- @@ -231,10 +296,14 @@ MdInlineImageLink = // https://spec.commonmark.org/0.31.2/#container-blocks-and-leaf-blocks MdThematicBreakBlock = value: 'md_thematic_break_literal' +// Explicit newline node for inter-block newlines. +// This preserves NEWLINEs in the CST without creating "newline-only paragraphs". +// Used when a NEWLINE appears between blocks and isn't part of inline content. +MdNewline = value: 'NEWLINE' + MdHardLine = value: 'md_hard_line_literal' MdSoftBreak = value: 'md_soft_break_literal' MdTextual = value: 'md_textual_literal' MdIndent = value: 'md_indent_chunk_literal' - diff --git a/xtask/codegen/src/markdown_kinds_src.rs b/xtask/codegen/src/markdown_kinds_src.rs index 1f99a289e34f..13154a184249 100644 --- a/xtask/codegen/src/markdown_kinds_src.rs +++ b/xtask/codegen/src/markdown_kinds_src.rs @@ -13,15 +13,18 @@ pub const MARKDOWN_KINDS_SRC: KindsSrc = KindsSrc { ("!", "BANG"), ("-", "MINUS"), ("*", "STAR"), + ("+", "PLUS"), ("**", "DOUBLE_STAR"), ("`", "BACKTICK"), ("```", "TRIPLE_BACKTICK"), ("~", "TILDE"), + ("~~~", "TRIPLE_TILDE"), (" ", "WHITESPACE3"), ("_", "UNDERSCORE"), ("__", "DOUBLE_UNDERSCORE"), ("#", "HASH"), (",", "COMMA"), + (":", "COLON"), ], keywords: &["null"], literals: &[ @@ -31,13 +34,17 @@ pub const MARKDOWN_KINDS_SRC: KindsSrc = KindsSrc { "MD_STRING_LITERAL", "MD_INDENT_CHUNK_LITERAL", "MD_THEMATIC_BREAK_LITERAL", + "MD_SETEXT_UNDERLINE_LITERAL", + "MD_ORDERED_LIST_MARKER", "MD_ERROR_LITERAL", + "MD_ENTITY_LITERAL", ], tokens: &["ERROR_TOKEN", "NEWLINE", "WHITESPACE", "TAB"], nodes: &[ // Bogus nodes "BOGUS", "MD_BOGUS", + "MD_BOGUS_BULLET", // node "MD_DOCUMENT", "MD_BLOCK_LIST", @@ -49,11 +56,14 @@ pub const MARKDOWN_KINDS_SRC: KindsSrc = KindsSrc { "MD_CODE_NAME_LIST", "MD_HTML_BLOCK", "MD_LINK_BLOCK", + "MD_LINK_REFERENCE_DEFINITION", + "MD_LINK_LABEL", + "MD_LINK_DESTINATION", + "MD_LINK_TITLE", "MD_QUOTE", - "MD_ORDER_LIST_ITEM", + "MD_ORDERED_LIST_ITEM", "MD_BULLET_LIST_ITEM", "MD_BULLET_LIST", - "MD_ORDER_LIST", "MD_PARAGRAPH", "MD_INLINE_ITEM_LIST", "MD_INLINE_EMPHASIS", @@ -62,6 +72,12 @@ pub const MARKDOWN_KINDS_SRC: KindsSrc = KindsSrc { "MD_BULLET", "MD_INLINE_LINK", "MD_INLINE_IMAGE", + "MD_REFERENCE_LINK", + "MD_REFERENCE_IMAGE", + "MD_REFERENCE_LINK_LABEL", + "MD_AUTOLINK", + "MD_INLINE_HTML", + "MD_ENTITY_REFERENCE", "MD_INLINE_IMAGE_ALT", "MD_INDENTED_CODE_LINE", "MD_INLINE_IMAGE_LINK", @@ -74,5 +90,6 @@ pub const MARKDOWN_KINDS_SRC: KindsSrc = KindsSrc { "MD_STRING", "MD_INDENT", "MD_THEMATIC_BREAK_BLOCK", + "MD_NEWLINE", ], }; From 4be0efe9f71b12282d4cc432d8986cba048ce428 Mon Sep 17 00:00:00 2001 From: "autofix-ci[bot]" <114827586+autofix-ci[bot]@users.noreply.github.com> Date: Tue, 30 Dec 2025 03:23:28 +0000 Subject: [PATCH 02/12] [autofix.ci] apply automated fixes --- .../src/generated/node_factory.rs | 2 +- .../src/generated/syntax_factory.rs | 2 +- crates/biome_markdown_parser/Cargo.toml | 30 +++++++++---------- .../src/generated/nodes.rs | 8 ++--- .../src/generated/nodes_mut.rs | 2 +- 5 files changed, 21 insertions(+), 23 deletions(-) diff --git a/crates/biome_markdown_factory/src/generated/node_factory.rs b/crates/biome_markdown_factory/src/generated/node_factory.rs index 84da802e71b8..e517fe04833b 100644 --- a/crates/biome_markdown_factory/src/generated/node_factory.rs +++ b/crates/biome_markdown_factory/src/generated/node_factory.rs @@ -421,7 +421,7 @@ impl MdParagraphBuilder { )) } } -pub fn md_quote(marker_token: SyntaxToken, content: MdBlockList) -> MdQuote { +pub fn md_quote(marker_token: SyntaxToken, content: AnyMdBlock) -> MdQuote { MdQuote::unwrap_cast(SyntaxNode::new_detached( MarkdownSyntaxKind::MD_QUOTE, [ diff --git a/crates/biome_markdown_factory/src/generated/syntax_factory.rs b/crates/biome_markdown_factory/src/generated/syntax_factory.rs index 0eb1dbd122a6..e0385453e25e 100644 --- a/crates/biome_markdown_factory/src/generated/syntax_factory.rs +++ b/crates/biome_markdown_factory/src/generated/syntax_factory.rs @@ -780,7 +780,7 @@ impl SyntaxFactory for MarkdownSyntaxFactory { } slots.next_slot(); if let Some(element) = ¤t_element - && MdBlockList::can_cast(element.kind()) + && AnyMdBlock::can_cast(element.kind()) { slots.mark_present(); current_element = elements.next(); diff --git a/crates/biome_markdown_parser/Cargo.toml b/crates/biome_markdown_parser/Cargo.toml index c4936ea721f3..561f1054cd32 100644 --- a/crates/biome_markdown_parser/Cargo.toml +++ b/crates/biome_markdown_parser/Cargo.toml @@ -15,11 +15,6 @@ publish = false [package.metadata.workspaces] independent = true -[features] -# Enables test utilities (to_html module) for CommonMark spec compliance testing. -# Not included in production builds to avoid unnecessary dependencies and code. -test_utils = ["dep:htmlize"] - [dependencies] biome_console = { workspace = true } biome_diagnostics = { workspace = true } @@ -28,25 +23,28 @@ biome_markdown_syntax = { workspace = true } biome_parser = { workspace = true } biome_rowan = { workspace = true } biome_unicode_table = { workspace = true } +# Optional dependency for test_utils feature (HTML rendering for spec tests) +htmlize = { version = "1.0.6", features = ["unescape"], optional = true } tracing = { workspace = true } unicode-bom = { workspace = true } -# Optional dependency for test_utils feature (HTML rendering for spec tests) -htmlize = { version = "1.0.6", features = ["unescape"], optional = true } - [dev-dependencies] -biome_test_utils = { path = "../biome_test_utils" } -insta = { workspace = true } -quickcheck = { workspace = true } -quickcheck_macros = { workspace = true } -serde = { workspace = true, features = ["derive"] } -serde_json = { workspace = true } -tests_macros = { path = "../tests_macros" } - # Self-dependency to enable test_utils for integration tests. # Integration tests are compiled as separate crates and can only access public API, # so we need to enable the feature here to make to_html available for spec tests. biome_markdown_parser = { path = ".", features = ["test_utils"] } +biome_test_utils = { path = "../biome_test_utils" } +insta = { workspace = true } +quickcheck = { workspace = true } +quickcheck_macros = { workspace = true } +serde = { workspace = true, features = ["derive"] } +serde_json = { workspace = true } +tests_macros = { path = "../tests_macros" } + +[features] +# Enables test utilities (to_html module) for CommonMark spec compliance testing. +# Not included in production builds to avoid unnecessary dependencies and code. +test_utils = ["dep:htmlize"] [lints] workspace = true diff --git a/crates/biome_markdown_syntax/src/generated/nodes.rs b/crates/biome_markdown_syntax/src/generated/nodes.rs index c9e9db0fd573..6fab92d37e27 100644 --- a/crates/biome_markdown_syntax/src/generated/nodes.rs +++ b/crates/biome_markdown_syntax/src/generated/nodes.rs @@ -1138,8 +1138,8 @@ impl MdQuote { pub fn marker_token(&self) -> SyntaxResult { support::required_token(&self.syntax, 0usize) } - pub fn content(&self) -> MdBlockList { - support::list(&self.syntax, 1usize) + pub fn content(&self) -> SyntaxResult { + support::required_node(&self.syntax, 1usize) } } impl Serialize for MdQuote { @@ -1153,7 +1153,7 @@ impl Serialize for MdQuote { #[derive(Serialize)] pub struct MdQuoteFields { pub marker_token: SyntaxResult, - pub content: MdBlockList, + pub content: SyntaxResult, } #[derive(Clone, PartialEq, Eq, Hash)] pub struct MdReferenceImage { @@ -3042,7 +3042,7 @@ impl std::fmt::Debug for MdQuote { "marker_token", &support::DebugSyntaxResult(self.marker_token()), ) - .field("content", &self.content()) + .field("content", &support::DebugSyntaxResult(self.content())) .finish() } else { f.debug_struct("MdQuote").finish() diff --git a/crates/biome_markdown_syntax/src/generated/nodes_mut.rs b/crates/biome_markdown_syntax/src/generated/nodes_mut.rs index d44555c5be76..77bbaaddb219 100644 --- a/crates/biome_markdown_syntax/src/generated/nodes_mut.rs +++ b/crates/biome_markdown_syntax/src/generated/nodes_mut.rs @@ -440,7 +440,7 @@ impl MdQuote { .splice_slots(0usize..=0usize, once(Some(element.into()))), ) } - pub fn with_content(self, element: MdBlockList) -> Self { + pub fn with_content(self, element: AnyMdBlock) -> Self { Self::unwrap_cast( self.syntax .splice_slots(1usize..=1usize, once(Some(element.into_syntax().into()))), From 2f3f36aa353b89684ac82ed1c802c5f646524589 Mon Sep 17 00:00:00 2001 From: jfmcdowell <206422+jfmcdowell@users.noreply.github.com> Date: Wed, 31 Dec 2025 19:03:52 -0500 Subject: [PATCH 03/12] fix(markdown_parser): harden parser safety and configurability Add unconditional no-progress recovery in block parsing and replace unsafe unreachable_unchecked() with safe unreachable!(). Introduce an upfront oversized-input guard in the lexer and make max nesting depth configurable via parse options. Update list/quote handling and related snapshots to match the new recovery behavior and configurable limits. --- crates/biome_markdown_parser/src/lexer/mod.rs | 36 +- crates/biome_markdown_parser/src/parser.rs | 18 +- crates/biome_markdown_parser/src/syntax.rs | 54 +-- .../src/syntax/fenced_code_block.rs | 2 +- .../biome_markdown_parser/src/syntax/list.rs | 52 ++- .../src/syntax/parse_error.rs | 27 +- .../biome_markdown_parser/src/syntax/quote.rs | 28 +- .../error/quote_nesting_too_deep.md.snap | 418 +++++++++--------- .../md_test_suite/ok/bullet_list.md.snap | 25 +- .../tests/md_test_suite/ok/edge_cases.md.snap | 234 +++------- .../ok/indent_code_block.md.snap | 20 +- .../ok/lazy_continuation.md.snap | 91 ++-- .../md_test_suite/ok/list_indentation.md.snap | 29 +- .../md_test_suite/ok/list_tightness.md.snap | 113 ++--- .../md_test_suite/ok/multiline_list.md.snap | 57 +-- .../md_test_suite/ok/ordered_list.md.snap | 15 +- 16 files changed, 611 insertions(+), 608 deletions(-) diff --git a/crates/biome_markdown_parser/src/lexer/mod.rs b/crates/biome_markdown_parser/src/lexer/mod.rs index 070ee2b29479..6ff6464dfdcd 100644 --- a/crates/biome_markdown_parser/src/lexer/mod.rs +++ b/crates/biome_markdown_parser/src/lexer/mod.rs @@ -9,7 +9,7 @@ use biome_parser::diagnostic::ParseDiagnostic; use biome_parser::lexer::{ LexContext, Lexer, LexerCheckpoint, LexerWithCheckpoint, ReLexer, TokenFlags, }; -use biome_rowan::{SyntaxKind, TextSize}; +use biome_rowan::{SyntaxKind, TextRange, TextSize}; use biome_unicode_table::Dispatch::{self, AMP, *}; use biome_unicode_table::lookup_byte; @@ -87,8 +87,11 @@ pub(crate) struct MarkdownLexer<'src> { diagnostics: Vec, force_ordered_list_marker: bool, + input_too_large: bool, } +const MAX_MARKDOWN_SOURCE_SIZE: usize = u32::MAX as usize; + impl<'src> Lexer<'src> for MarkdownLexer<'src> { const NEWLINE: Self::Kind = NEWLINE; @@ -118,6 +121,13 @@ impl<'src> Lexer<'src> for MarkdownLexer<'src> { } fn next_token(&mut self, context: Self::LexContext) -> Self::Kind { + if self.input_too_large { + self.current_start = TextSize::from(0); + self.current_flags = TokenFlags::empty(); + self.current_kind = EOF; + return EOF; + } + self.current_start = self.text_position(); self.current_flags = TokenFlags::empty(); @@ -199,6 +209,18 @@ impl<'src> Lexer<'src> for MarkdownLexer<'src> { impl<'src> MarkdownLexer<'src> { /// Make a new lexer from a str, this is safe because strs are valid utf8 pub fn from_str(source: &'src str) -> Self { + let input_too_large = source.len() > MAX_MARKDOWN_SOURCE_SIZE; + let mut diagnostics = vec![]; + if input_too_large { + diagnostics.push( + ParseDiagnostic::new( + "Markdown input exceeds the 4GB limit.", + TextRange::empty(TextSize::from(0)), + ) + .with_hint("Split the input into smaller files."), + ); + } + Self { source, // Start of document is treated as start of line for indentation purposes @@ -208,8 +230,9 @@ impl<'src> MarkdownLexer<'src> { current_start: TextSize::from(0), current_flags: TokenFlags::empty(), position: 0, - diagnostics: vec![], + diagnostics, force_ordered_list_marker: false, + input_too_large, } } @@ -940,10 +963,7 @@ impl<'src> MarkdownLexer<'src> { if let Some(chr) = string.chars().next() { chr } else { - // Safety: we always call this when we are at a valid char, so this branch is completely unreachable - unsafe { - core::hint::unreachable_unchecked(); - } + unreachable!("lexer expected a valid UTF-8 character at current position"); } } @@ -1190,6 +1210,10 @@ impl<'src> MarkdownLexer<'src> { impl<'src> ReLexer<'src> for MarkdownLexer<'src> { fn re_lex(&mut self, context: Self::ReLexContext) -> Self::Kind { + if self.input_too_large { + return EOF; + } + let old_position = self.position; self.position = u32::from(self.current_start) as usize; diff --git a/crates/biome_markdown_parser/src/parser.rs b/crates/biome_markdown_parser/src/parser.rs index 95933bc75402..2ac966faa0c5 100644 --- a/crates/biome_markdown_parser/src/parser.rs +++ b/crates/biome_markdown_parser/src/parser.rs @@ -11,12 +11,23 @@ use crate::syntax::inline::EmphasisContext; use crate::token_source::{MarkdownTokenSource, MarkdownTokenSourceCheckpoint}; /// Options for configuring the markdown parser. -// ... (omitted for brevity, but I'll include enough context) -#[derive(Default, Debug, Clone)] +#[derive(Debug, Clone)] pub struct MarkdownParseOptions { + /// Maximum nesting depth for block quotes and lists. + /// + /// This limits recursion on pathological input to avoid stack overflow. + pub max_nesting_depth: usize, // Reserved for future GFM options } +impl Default for MarkdownParseOptions { + fn default() -> Self { + Self { + max_nesting_depth: crate::syntax::parse_error::DEFAULT_MAX_NESTING_DEPTH, + } + } +} + /// Internal parser state for tracking nesting and context. /// /// # Depth Tracking @@ -61,6 +72,8 @@ pub(crate) struct MarkdownParserState { pub(crate) quote_indents: Vec, /// Virtual line start override for container prefixes (e.g., block quotes). pub(crate) virtual_line_start: Option, + /// Flag to unwind quote parsing when nesting exceeds the maximum depth. + pub(crate) quote_depth_exceeded: bool, } #[derive(Debug, Clone, PartialEq, Eq)] @@ -111,7 +124,6 @@ impl<'source> MarkdownParser<'source> { } /// Returns parser options. Reserved for GFM extensions. - #[expect(dead_code)] pub(crate) fn options(&self) -> &MarkdownParseOptions { &self.options } diff --git a/crates/biome_markdown_parser/src/syntax.rs b/crates/biome_markdown_parser/src/syntax.rs index 41a797913c67..1f218b71cd92 100644 --- a/crates/biome_markdown_parser/src/syntax.rs +++ b/crates/biome_markdown_parser/src/syntax.rs @@ -39,7 +39,8 @@ use biome_parser::{ }; use biome_rowan::TextSize; use fenced_code_block::{ - at_fenced_code_block, parse_fenced_code_block, parse_fenced_code_block_force, + at_fenced_code_block, info_string_has_backtick, parse_fenced_code_block, + parse_fenced_code_block_force, }; use header::{at_header, parse_header}; use html_block::{at_html_block, at_html_block_interrupt, parse_html_block}; @@ -245,14 +246,21 @@ pub(crate) fn parse_any_block_with_indent_code_policy( ParsedBlockKind::Paragraph }; - if start == p.cur_range().start() && std::env::var("CMARK_HANG_DEBUG").is_ok() { - eprintln!( - "parse_any_block stuck at {:?} {:?} => {:?}", - p.cur(), - p.cur_text(), - kind - ); - panic!("parse_any_block made no progress"); + if start == p.cur_range().start() { + let range = p.cur_range(); + if std::env::var("CMARK_HANG_DEBUG").is_ok() { + eprintln!( + "parse_any_block made no progress at {:?} {:?} => {:?}", + p.cur(), + p.cur_text(), + kind + ); + } + p.error(parse_error::parse_any_block_no_progress(p, range)); + if !p.at(T![EOF]) { + p.bump_any(); + } + return ParsedBlockKind::Other; } kind @@ -925,30 +933,22 @@ fn inline_list_source_len(p: &mut MarkdownParser) -> usize { }) } -fn line_starts_with_fence(p: &MarkdownParser) -> bool { +fn line_starts_with_fence(p: &mut MarkdownParser) -> bool { if !p.at_line_start() { return false; } - let source = p.source_after_current(); - let mut indent = 0usize; - let mut offset = 0usize; - for (idx, ch) in source.char_indices() { - match ch { - ' ' => indent += 1, - '\t' => indent += 4 - (indent % 4), - _ => { - offset = idx; - break; - } - } - if indent > 3 { + p.lookahead(|p| { + if p.line_start_leading_indent() > 3 { return false; } - } - - let rest = &source[offset..]; - rest.starts_with("```") || rest.starts_with("~~~") + p.skip_line_indent(3); + let rest = p.source_after_current(); + if rest.starts_with("```") { + return !info_string_has_backtick(p); + } + rest.starts_with("~~~") + }) } fn consume_partial_quote_prefix(p: &mut MarkdownParser, depth: usize) -> bool { diff --git a/crates/biome_markdown_parser/src/syntax/fenced_code_block.rs b/crates/biome_markdown_parser/src/syntax/fenced_code_block.rs index 3cac786d94eb..06a40cf15371 100644 --- a/crates/biome_markdown_parser/src/syntax/fenced_code_block.rs +++ b/crates/biome_markdown_parser/src/syntax/fenced_code_block.rs @@ -218,7 +218,7 @@ fn is_valid_closing_fence(p: &mut MarkdownParser, is_tilde_fence: bool, fence_le line_has_closing_fence(p, is_tilde_fence, fence_len) } -fn info_string_has_backtick(p: &mut MarkdownParser) -> bool { +pub(crate) fn info_string_has_backtick(p: &mut MarkdownParser) -> bool { p.lookahead(|p| { if p.at(TRIPLE_TILDE) { return false; diff --git a/crates/biome_markdown_parser/src/syntax/list.rs b/crates/biome_markdown_parser/src/syntax/list.rs index ba23f4802cd9..38c213e4e506 100644 --- a/crates/biome_markdown_parser/src/syntax/list.rs +++ b/crates/biome_markdown_parser/src/syntax/list.rs @@ -22,8 +22,9 @@ //! ## Depth Limits //! //! To prevent stack overflow from pathological input (deeply nested lists), -//! nesting depth is limited to 100 levels. Deeper nesting emits a diagnostic -//! and treats additional list markers as content. +//! nesting depth is limited by `MarkdownParseOptions::max_nesting_depth` +//! (default: 100). Deeper nesting emits a diagnostic and treats additional +//! list markers as content. //! //! ## Current Limitations //! @@ -43,7 +44,7 @@ use super::quote::{consume_quote_prefix, consume_quote_prefix_without_virtual, h use biome_rowan::TextRange; use super::fenced_code_block::parse_fenced_code_block; -use super::parse_error::{MAX_NESTING_DEPTH, list_nesting_too_deep}; +use super::parse_error::list_nesting_too_deep; use super::{ParsedBlockKind, at_block_interrupt, at_indent_code_block}; use crate::MarkdownParser; use crate::syntax::parse_any_block_with_indent_code_policy; @@ -437,17 +438,39 @@ fn expected_bullet(p: &MarkdownParser, range: TextRange) -> ParseDiagnostic { /// /// Parses consecutive bullet items into a single list. /// -/// Nesting is limited to `MAX_NESTING_DEPTH` to prevent stack overflow. +/// Nesting is limited to `MarkdownParseOptions::max_nesting_depth` to prevent stack overflow. pub(crate) fn parse_bullet_list_item(p: &mut MarkdownParser) -> ParsedSyntax { if !at_bullet_list_item(p) { return Absent; } // Check depth limit before parsing - if p.state().list_nesting_depth >= MAX_NESTING_DEPTH { + let max_nesting_depth = p.options().max_nesting_depth; + if p.state().list_nesting_depth >= max_nesting_depth { // Emit diagnostic and treat as content let range = p.cur_range(); - p.error(list_nesting_too_deep(p, range)); + p.error(list_nesting_too_deep(p, range, max_nesting_depth)); + skip_list_marker_indent(p); + if p.at(MD_SETEXT_UNDERLINE_LITERAL) { + p.parse_as_skipped_trivia_tokens(|p| p.bump_remap(T![-])); + } else if p.at(MD_TEXTUAL_LITERAL) { + let text = p.cur_text(); + if text == "-" { + p.parse_as_skipped_trivia_tokens(|p| p.bump_remap(T![-])); + } else if text == "*" { + p.parse_as_skipped_trivia_tokens(|p| p.bump_remap(T![*])); + } else if text == "+" { + p.parse_as_skipped_trivia_tokens(|p| p.bump_remap(T![+])); + } + } else if p.at(T![-]) || p.at(T![*]) || p.at(T![+]) { + p.parse_as_skipped_trivia_tokens(|p| p.bump(p.cur())); + } + if p.at(MD_TEXTUAL_LITERAL) { + let text = p.cur_text(); + if text.starts_with(' ') || text.starts_with('\t') { + p.parse_as_skipped_trivia_tokens(|p| p.bump(MD_TEXTUAL_LITERAL)); + } + } return Absent; } @@ -696,17 +719,28 @@ fn expected_ordered_item(p: &MarkdownParser, range: TextRange) -> ParseDiagnosti /// /// Parses consecutive ordered items into a single list. /// -/// Nesting is limited to `MAX_NESTING_DEPTH` to prevent stack overflow. +/// Nesting is limited to `MarkdownParseOptions::max_nesting_depth` to prevent stack overflow. pub(crate) fn parse_order_list_item(p: &mut MarkdownParser) -> ParsedSyntax { if !at_order_list_item(p) { return Absent; } // Check depth limit before parsing - if p.state().list_nesting_depth >= MAX_NESTING_DEPTH { + let max_nesting_depth = p.options().max_nesting_depth; + if p.state().list_nesting_depth >= max_nesting_depth { // Emit diagnostic and treat as content let range = p.cur_range(); - p.error(list_nesting_too_deep(p, range)); + p.error(list_nesting_too_deep(p, range, max_nesting_depth)); + skip_list_marker_indent(p); + if p.at(MD_ORDERED_LIST_MARKER) { + p.parse_as_skipped_trivia_tokens(|p| p.bump(MD_ORDERED_LIST_MARKER)); + } + if p.at(MD_TEXTUAL_LITERAL) { + let text = p.cur_text(); + if text.starts_with(' ') || text.starts_with('\t') { + p.parse_as_skipped_trivia_tokens(|p| p.bump(MD_TEXTUAL_LITERAL)); + } + } return Absent; } diff --git a/crates/biome_markdown_parser/src/syntax/parse_error.rs b/crates/biome_markdown_parser/src/syntax/parse_error.rs index 73a5abb5845b..5e848e4d936a 100644 --- a/crates/biome_markdown_parser/src/syntax/parse_error.rs +++ b/crates/biome_markdown_parser/src/syntax/parse_error.rs @@ -5,8 +5,8 @@ use biome_parser::Parser; use biome_parser::diagnostic::ParseDiagnostic; use biome_rowan::TextRange; -/// Maximum nesting depth for block quotes and lists. -pub(crate) const MAX_NESTING_DEPTH: usize = 100; +/// Default maximum nesting depth for block quotes and lists. +pub(crate) const DEFAULT_MAX_NESTING_DEPTH: usize = 100; /// Unclosed emphasis (bold/italic). /// @@ -134,9 +134,13 @@ pub(crate) fn unterminated_fenced_code( /// >>>>>>>>...>>>> (100+ levels) /// ^^^^^^^^^^^^^^^^ nesting too deep /// ``` -pub(crate) fn quote_nesting_too_deep(p: &MarkdownParser, range: TextRange) -> ParseDiagnostic { +pub(crate) fn quote_nesting_too_deep( + p: &MarkdownParser, + range: TextRange, + max_nesting_depth: usize, +) -> ParseDiagnostic { p.err_builder( - format!("Block quote nesting exceeds maximum depth of {MAX_NESTING_DEPTH}."), + format!("Block quote nesting exceeds maximum depth of {max_nesting_depth}."), range, ) .with_detail(range, "nesting limit reached here") @@ -149,11 +153,22 @@ pub(crate) fn quote_nesting_too_deep(p: &MarkdownParser, range: TextRange) -> Pa /// - - - - ... - (100+ levels) /// ^^^^^^^^^^^^^^ nesting too deep /// ``` -pub(crate) fn list_nesting_too_deep(p: &MarkdownParser, range: TextRange) -> ParseDiagnostic { +pub(crate) fn list_nesting_too_deep( + p: &MarkdownParser, + range: TextRange, + max_nesting_depth: usize, +) -> ParseDiagnostic { p.err_builder( - format!("List nesting exceeds maximum depth of {MAX_NESTING_DEPTH}."), + format!("List nesting exceeds maximum depth of {max_nesting_depth}."), range, ) .with_detail(range, "nesting limit reached here") .with_hint("Reduce nesting depth. Additional levels will be treated as content.") } + +/// Parser made no progress while parsing a block. +pub(crate) fn parse_any_block_no_progress(p: &MarkdownParser, range: TextRange) -> ParseDiagnostic { + p.err_builder("Parser made no progress while parsing a block.", range) + .with_detail(range, "stuck token skipped") + .with_hint("This is likely a parser bug; the token was skipped to recover.") +} diff --git a/crates/biome_markdown_parser/src/syntax/quote.rs b/crates/biome_markdown_parser/src/syntax/quote.rs index 416d61e0a98d..32791e26f2f7 100644 --- a/crates/biome_markdown_parser/src/syntax/quote.rs +++ b/crates/biome_markdown_parser/src/syntax/quote.rs @@ -13,8 +13,9 @@ //! ## Depth Limits //! //! To prevent stack overflow from pathological input (e.g., hundreds of `>`), -//! nesting depth is limited to 100 levels. Deeper nesting emits a diagnostic -//! and treats additional `>` as content. +//! nesting depth is limited by `MarkdownParseOptions::max_nesting_depth` +//! (default: 100). Deeper nesting emits a diagnostic and treats additional +//! `>` as content. //! //! ## Lazy Continuation (§5.1) //! @@ -35,7 +36,7 @@ use biome_markdown_syntax::kind::MarkdownSyntaxKind::*; use biome_parser::Parser; use biome_parser::prelude::ParsedSyntax::{self, *}; -use super::parse_error::{MAX_NESTING_DEPTH, quote_nesting_too_deep}; +use super::parse_error::quote_nesting_too_deep; use crate::MarkdownParser; /// Check if we're at the start of a block quote (`>`). @@ -66,15 +67,25 @@ pub(crate) fn at_quote(p: &mut MarkdownParser) -> bool { /// Multi-line quotes: consecutive `>` lines continue the same quote's content. /// Nested quotes: `>>` creates a nested quote inside the outer quote. /// -/// Nesting is limited to `MAX_NESTING_DEPTH` to prevent stack overflow. +/// Nesting is limited to `MarkdownParseOptions::max_nesting_depth` to prevent stack overflow. pub(crate) fn parse_quote(p: &mut MarkdownParser) -> ParsedSyntax { if !at_quote(p) { return Absent; } - if p.state().block_quote_depth >= MAX_NESTING_DEPTH { + let max_nesting_depth = p.options().max_nesting_depth; + if p.state().block_quote_depth >= max_nesting_depth { let range = p.cur_range(); - p.error(quote_nesting_too_deep(p, range)); + p.error(quote_nesting_too_deep(p, range, max_nesting_depth)); + p.state_mut().quote_depth_exceeded = true; + p.skip_line_indent(3); + if p.at(T![>]) { + p.parse_as_skipped_trivia_tokens(|p| p.bump(T![>])); + } else if p.at(MD_TEXTUAL_LITERAL) && p.cur_text() == ">" { + p.parse_as_skipped_trivia_tokens(|p| p.bump_remap(T![>])); + } + let has_indented_code = at_quote_indented_code_start(p); + skip_optional_marker_space(p, has_indented_code); return Absent; } @@ -111,6 +122,11 @@ fn parse_quote_block_list(p: &mut MarkdownParser) { let mut last_block_was_paragraph = false; loop { + if p.state().quote_depth_exceeded { + p.state_mut().quote_depth_exceeded = false; + break; + } + if p.at(T![EOF]) { break; } diff --git a/crates/biome_markdown_parser/tests/md_test_suite/error/quote_nesting_too_deep.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/error/quote_nesting_too_deep.md.snap index 1b95c0525e88..299145c63f15 100644 --- a/crates/biome_markdown_parser/tests/md_test_suite/error/quote_nesting_too_deep.md.snap +++ b/crates/biome_markdown_parser/tests/md_test_suite/error/quote_nesting_too_deep.md.snap @@ -1,5 +1,6 @@ --- source: crates/biome_markdown_parser/tests/spec_test.rs +assertion_line: 131 expression: snapshot --- ## Input @@ -95,17 +96,6 @@ MdDocument { }, ], }, - MdParagraph { - list: MdInlineItemList [ - MdTextual { - value_token: MD_TEXTUAL_LITERAL@100..110 "Too deep" [Skipped(">"), Skipped(" ")] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@110..111 "\n" [] [], - }, - ], - hard_line: missing (optional), - }, ], }, ], @@ -122,310 +112,310 @@ MdDocument { 0: MD_QUOTE@0..111 0: R_ANGLE@0..1 ">" [] [] 1: MD_BLOCK_LIST@1..111 - 0: MD_QUOTE@1..100 + 0: MD_QUOTE@1..111 0: R_ANGLE@1..2 ">" [] [] - 1: MD_BLOCK_LIST@2..100 - 0: MD_QUOTE@2..100 + 1: MD_BLOCK_LIST@2..111 + 0: MD_QUOTE@2..111 0: R_ANGLE@2..3 ">" [] [] - 1: MD_BLOCK_LIST@3..100 - 0: MD_QUOTE@3..100 + 1: MD_BLOCK_LIST@3..111 + 0: MD_QUOTE@3..111 0: R_ANGLE@3..4 ">" [] [] - 1: MD_BLOCK_LIST@4..100 - 0: MD_QUOTE@4..100 + 1: MD_BLOCK_LIST@4..111 + 0: MD_QUOTE@4..111 0: R_ANGLE@4..5 ">" [] [] - 1: MD_BLOCK_LIST@5..100 - 0: MD_QUOTE@5..100 + 1: MD_BLOCK_LIST@5..111 + 0: MD_QUOTE@5..111 0: R_ANGLE@5..6 ">" [] [] - 1: MD_BLOCK_LIST@6..100 - 0: MD_QUOTE@6..100 + 1: MD_BLOCK_LIST@6..111 + 0: MD_QUOTE@6..111 0: R_ANGLE@6..7 ">" [] [] - 1: MD_BLOCK_LIST@7..100 - 0: MD_QUOTE@7..100 + 1: MD_BLOCK_LIST@7..111 + 0: MD_QUOTE@7..111 0: R_ANGLE@7..8 ">" [] [] - 1: MD_BLOCK_LIST@8..100 - 0: MD_QUOTE@8..100 + 1: MD_BLOCK_LIST@8..111 + 0: MD_QUOTE@8..111 0: R_ANGLE@8..9 ">" [] [] - 1: MD_BLOCK_LIST@9..100 - 0: MD_QUOTE@9..100 + 1: MD_BLOCK_LIST@9..111 + 0: MD_QUOTE@9..111 0: R_ANGLE@9..10 ">" [] [] - 1: MD_BLOCK_LIST@10..100 - 0: MD_QUOTE@10..100 + 1: MD_BLOCK_LIST@10..111 + 0: MD_QUOTE@10..111 0: R_ANGLE@10..11 ">" [] [] - 1: MD_BLOCK_LIST@11..100 - 0: MD_QUOTE@11..100 + 1: MD_BLOCK_LIST@11..111 + 0: MD_QUOTE@11..111 0: R_ANGLE@11..12 ">" [] [] - 1: MD_BLOCK_LIST@12..100 - 0: MD_QUOTE@12..100 + 1: MD_BLOCK_LIST@12..111 + 0: MD_QUOTE@12..111 0: R_ANGLE@12..13 ">" [] [] - 1: MD_BLOCK_LIST@13..100 - 0: MD_QUOTE@13..100 + 1: MD_BLOCK_LIST@13..111 + 0: MD_QUOTE@13..111 0: R_ANGLE@13..14 ">" [] [] - 1: MD_BLOCK_LIST@14..100 - 0: MD_QUOTE@14..100 + 1: MD_BLOCK_LIST@14..111 + 0: MD_QUOTE@14..111 0: R_ANGLE@14..15 ">" [] [] - 1: MD_BLOCK_LIST@15..100 - 0: MD_QUOTE@15..100 + 1: MD_BLOCK_LIST@15..111 + 0: MD_QUOTE@15..111 0: R_ANGLE@15..16 ">" [] [] - 1: MD_BLOCK_LIST@16..100 - 0: MD_QUOTE@16..100 + 1: MD_BLOCK_LIST@16..111 + 0: MD_QUOTE@16..111 0: R_ANGLE@16..17 ">" [] [] - 1: MD_BLOCK_LIST@17..100 - 0: MD_QUOTE@17..100 + 1: MD_BLOCK_LIST@17..111 + 0: MD_QUOTE@17..111 0: R_ANGLE@17..18 ">" [] [] - 1: MD_BLOCK_LIST@18..100 - 0: MD_QUOTE@18..100 + 1: MD_BLOCK_LIST@18..111 + 0: MD_QUOTE@18..111 0: R_ANGLE@18..19 ">" [] [] - 1: MD_BLOCK_LIST@19..100 - 0: MD_QUOTE@19..100 + 1: MD_BLOCK_LIST@19..111 + 0: MD_QUOTE@19..111 0: R_ANGLE@19..20 ">" [] [] - 1: MD_BLOCK_LIST@20..100 - 0: MD_QUOTE@20..100 + 1: MD_BLOCK_LIST@20..111 + 0: MD_QUOTE@20..111 0: R_ANGLE@20..21 ">" [] [] - 1: MD_BLOCK_LIST@21..100 - 0: MD_QUOTE@21..100 + 1: MD_BLOCK_LIST@21..111 + 0: MD_QUOTE@21..111 0: R_ANGLE@21..22 ">" [] [] - 1: MD_BLOCK_LIST@22..100 - 0: MD_QUOTE@22..100 + 1: MD_BLOCK_LIST@22..111 + 0: MD_QUOTE@22..111 0: R_ANGLE@22..23 ">" [] [] - 1: MD_BLOCK_LIST@23..100 - 0: MD_QUOTE@23..100 + 1: MD_BLOCK_LIST@23..111 + 0: MD_QUOTE@23..111 0: R_ANGLE@23..24 ">" [] [] - 1: MD_BLOCK_LIST@24..100 - 0: MD_QUOTE@24..100 + 1: MD_BLOCK_LIST@24..111 + 0: MD_QUOTE@24..111 0: R_ANGLE@24..25 ">" [] [] - 1: MD_BLOCK_LIST@25..100 - 0: MD_QUOTE@25..100 + 1: MD_BLOCK_LIST@25..111 + 0: MD_QUOTE@25..111 0: R_ANGLE@25..26 ">" [] [] - 1: MD_BLOCK_LIST@26..100 - 0: MD_QUOTE@26..100 + 1: MD_BLOCK_LIST@26..111 + 0: MD_QUOTE@26..111 0: R_ANGLE@26..27 ">" [] [] - 1: MD_BLOCK_LIST@27..100 - 0: MD_QUOTE@27..100 + 1: MD_BLOCK_LIST@27..111 + 0: MD_QUOTE@27..111 0: R_ANGLE@27..28 ">" [] [] - 1: MD_BLOCK_LIST@28..100 - 0: MD_QUOTE@28..100 + 1: MD_BLOCK_LIST@28..111 + 0: MD_QUOTE@28..111 0: R_ANGLE@28..29 ">" [] [] - 1: MD_BLOCK_LIST@29..100 - 0: MD_QUOTE@29..100 + 1: MD_BLOCK_LIST@29..111 + 0: MD_QUOTE@29..111 0: R_ANGLE@29..30 ">" [] [] - 1: MD_BLOCK_LIST@30..100 - 0: MD_QUOTE@30..100 + 1: MD_BLOCK_LIST@30..111 + 0: MD_QUOTE@30..111 0: R_ANGLE@30..31 ">" [] [] - 1: MD_BLOCK_LIST@31..100 - 0: MD_QUOTE@31..100 + 1: MD_BLOCK_LIST@31..111 + 0: MD_QUOTE@31..111 0: R_ANGLE@31..32 ">" [] [] - 1: MD_BLOCK_LIST@32..100 - 0: MD_QUOTE@32..100 + 1: MD_BLOCK_LIST@32..111 + 0: MD_QUOTE@32..111 0: R_ANGLE@32..33 ">" [] [] - 1: MD_BLOCK_LIST@33..100 - 0: MD_QUOTE@33..100 + 1: MD_BLOCK_LIST@33..111 + 0: MD_QUOTE@33..111 0: R_ANGLE@33..34 ">" [] [] - 1: MD_BLOCK_LIST@34..100 - 0: MD_QUOTE@34..100 + 1: MD_BLOCK_LIST@34..111 + 0: MD_QUOTE@34..111 0: R_ANGLE@34..35 ">" [] [] - 1: MD_BLOCK_LIST@35..100 - 0: MD_QUOTE@35..100 + 1: MD_BLOCK_LIST@35..111 + 0: MD_QUOTE@35..111 0: R_ANGLE@35..36 ">" [] [] - 1: MD_BLOCK_LIST@36..100 - 0: MD_QUOTE@36..100 + 1: MD_BLOCK_LIST@36..111 + 0: MD_QUOTE@36..111 0: R_ANGLE@36..37 ">" [] [] - 1: MD_BLOCK_LIST@37..100 - 0: MD_QUOTE@37..100 + 1: MD_BLOCK_LIST@37..111 + 0: MD_QUOTE@37..111 0: R_ANGLE@37..38 ">" [] [] - 1: MD_BLOCK_LIST@38..100 - 0: MD_QUOTE@38..100 + 1: MD_BLOCK_LIST@38..111 + 0: MD_QUOTE@38..111 0: R_ANGLE@38..39 ">" [] [] - 1: MD_BLOCK_LIST@39..100 - 0: MD_QUOTE@39..100 + 1: MD_BLOCK_LIST@39..111 + 0: MD_QUOTE@39..111 0: R_ANGLE@39..40 ">" [] [] - 1: MD_BLOCK_LIST@40..100 - 0: MD_QUOTE@40..100 + 1: MD_BLOCK_LIST@40..111 + 0: MD_QUOTE@40..111 0: R_ANGLE@40..41 ">" [] [] - 1: MD_BLOCK_LIST@41..100 - 0: MD_QUOTE@41..100 + 1: MD_BLOCK_LIST@41..111 + 0: MD_QUOTE@41..111 0: R_ANGLE@41..42 ">" [] [] - 1: MD_BLOCK_LIST@42..100 - 0: MD_QUOTE@42..100 + 1: MD_BLOCK_LIST@42..111 + 0: MD_QUOTE@42..111 0: R_ANGLE@42..43 ">" [] [] - 1: MD_BLOCK_LIST@43..100 - 0: MD_QUOTE@43..100 + 1: MD_BLOCK_LIST@43..111 + 0: MD_QUOTE@43..111 0: R_ANGLE@43..44 ">" [] [] - 1: MD_BLOCK_LIST@44..100 - 0: MD_QUOTE@44..100 + 1: MD_BLOCK_LIST@44..111 + 0: MD_QUOTE@44..111 0: R_ANGLE@44..45 ">" [] [] - 1: MD_BLOCK_LIST@45..100 - 0: MD_QUOTE@45..100 + 1: MD_BLOCK_LIST@45..111 + 0: MD_QUOTE@45..111 0: R_ANGLE@45..46 ">" [] [] - 1: MD_BLOCK_LIST@46..100 - 0: MD_QUOTE@46..100 + 1: MD_BLOCK_LIST@46..111 + 0: MD_QUOTE@46..111 0: R_ANGLE@46..47 ">" [] [] - 1: MD_BLOCK_LIST@47..100 - 0: MD_QUOTE@47..100 + 1: MD_BLOCK_LIST@47..111 + 0: MD_QUOTE@47..111 0: R_ANGLE@47..48 ">" [] [] - 1: MD_BLOCK_LIST@48..100 - 0: MD_QUOTE@48..100 + 1: MD_BLOCK_LIST@48..111 + 0: MD_QUOTE@48..111 0: R_ANGLE@48..49 ">" [] [] - 1: MD_BLOCK_LIST@49..100 - 0: MD_QUOTE@49..100 + 1: MD_BLOCK_LIST@49..111 + 0: MD_QUOTE@49..111 0: R_ANGLE@49..50 ">" [] [] - 1: MD_BLOCK_LIST@50..100 - 0: MD_QUOTE@50..100 + 1: MD_BLOCK_LIST@50..111 + 0: MD_QUOTE@50..111 0: R_ANGLE@50..51 ">" [] [] - 1: MD_BLOCK_LIST@51..100 - 0: MD_QUOTE@51..100 + 1: MD_BLOCK_LIST@51..111 + 0: MD_QUOTE@51..111 0: R_ANGLE@51..52 ">" [] [] - 1: MD_BLOCK_LIST@52..100 - 0: MD_QUOTE@52..100 + 1: MD_BLOCK_LIST@52..111 + 0: MD_QUOTE@52..111 0: R_ANGLE@52..53 ">" [] [] - 1: MD_BLOCK_LIST@53..100 - 0: MD_QUOTE@53..100 + 1: MD_BLOCK_LIST@53..111 + 0: MD_QUOTE@53..111 0: R_ANGLE@53..54 ">" [] [] - 1: MD_BLOCK_LIST@54..100 - 0: MD_QUOTE@54..100 + 1: MD_BLOCK_LIST@54..111 + 0: MD_QUOTE@54..111 0: R_ANGLE@54..55 ">" [] [] - 1: MD_BLOCK_LIST@55..100 - 0: MD_QUOTE@55..100 + 1: MD_BLOCK_LIST@55..111 + 0: MD_QUOTE@55..111 0: R_ANGLE@55..56 ">" [] [] - 1: MD_BLOCK_LIST@56..100 - 0: MD_QUOTE@56..100 + 1: MD_BLOCK_LIST@56..111 + 0: MD_QUOTE@56..111 0: R_ANGLE@56..57 ">" [] [] - 1: MD_BLOCK_LIST@57..100 - 0: MD_QUOTE@57..100 + 1: MD_BLOCK_LIST@57..111 + 0: MD_QUOTE@57..111 0: R_ANGLE@57..58 ">" [] [] - 1: MD_BLOCK_LIST@58..100 - 0: MD_QUOTE@58..100 + 1: MD_BLOCK_LIST@58..111 + 0: MD_QUOTE@58..111 0: R_ANGLE@58..59 ">" [] [] - 1: MD_BLOCK_LIST@59..100 - 0: MD_QUOTE@59..100 + 1: MD_BLOCK_LIST@59..111 + 0: MD_QUOTE@59..111 0: R_ANGLE@59..60 ">" [] [] - 1: MD_BLOCK_LIST@60..100 - 0: MD_QUOTE@60..100 + 1: MD_BLOCK_LIST@60..111 + 0: MD_QUOTE@60..111 0: R_ANGLE@60..61 ">" [] [] - 1: MD_BLOCK_LIST@61..100 - 0: MD_QUOTE@61..100 + 1: MD_BLOCK_LIST@61..111 + 0: MD_QUOTE@61..111 0: R_ANGLE@61..62 ">" [] [] - 1: MD_BLOCK_LIST@62..100 - 0: MD_QUOTE@62..100 + 1: MD_BLOCK_LIST@62..111 + 0: MD_QUOTE@62..111 0: R_ANGLE@62..63 ">" [] [] - 1: MD_BLOCK_LIST@63..100 - 0: MD_QUOTE@63..100 + 1: MD_BLOCK_LIST@63..111 + 0: MD_QUOTE@63..111 0: R_ANGLE@63..64 ">" [] [] - 1: MD_BLOCK_LIST@64..100 - 0: MD_QUOTE@64..100 + 1: MD_BLOCK_LIST@64..111 + 0: MD_QUOTE@64..111 0: R_ANGLE@64..65 ">" [] [] - 1: MD_BLOCK_LIST@65..100 - 0: MD_QUOTE@65..100 + 1: MD_BLOCK_LIST@65..111 + 0: MD_QUOTE@65..111 0: R_ANGLE@65..66 ">" [] [] - 1: MD_BLOCK_LIST@66..100 - 0: MD_QUOTE@66..100 + 1: MD_BLOCK_LIST@66..111 + 0: MD_QUOTE@66..111 0: R_ANGLE@66..67 ">" [] [] - 1: MD_BLOCK_LIST@67..100 - 0: MD_QUOTE@67..100 + 1: MD_BLOCK_LIST@67..111 + 0: MD_QUOTE@67..111 0: R_ANGLE@67..68 ">" [] [] - 1: MD_BLOCK_LIST@68..100 - 0: MD_QUOTE@68..100 + 1: MD_BLOCK_LIST@68..111 + 0: MD_QUOTE@68..111 0: R_ANGLE@68..69 ">" [] [] - 1: MD_BLOCK_LIST@69..100 - 0: MD_QUOTE@69..100 + 1: MD_BLOCK_LIST@69..111 + 0: MD_QUOTE@69..111 0: R_ANGLE@69..70 ">" [] [] - 1: MD_BLOCK_LIST@70..100 - 0: MD_QUOTE@70..100 + 1: MD_BLOCK_LIST@70..111 + 0: MD_QUOTE@70..111 0: R_ANGLE@70..71 ">" [] [] - 1: MD_BLOCK_LIST@71..100 - 0: MD_QUOTE@71..100 + 1: MD_BLOCK_LIST@71..111 + 0: MD_QUOTE@71..111 0: R_ANGLE@71..72 ">" [] [] - 1: MD_BLOCK_LIST@72..100 - 0: MD_QUOTE@72..100 + 1: MD_BLOCK_LIST@72..111 + 0: MD_QUOTE@72..111 0: R_ANGLE@72..73 ">" [] [] - 1: MD_BLOCK_LIST@73..100 - 0: MD_QUOTE@73..100 + 1: MD_BLOCK_LIST@73..111 + 0: MD_QUOTE@73..111 0: R_ANGLE@73..74 ">" [] [] - 1: MD_BLOCK_LIST@74..100 - 0: MD_QUOTE@74..100 + 1: MD_BLOCK_LIST@74..111 + 0: MD_QUOTE@74..111 0: R_ANGLE@74..75 ">" [] [] - 1: MD_BLOCK_LIST@75..100 - 0: MD_QUOTE@75..100 + 1: MD_BLOCK_LIST@75..111 + 0: MD_QUOTE@75..111 0: R_ANGLE@75..76 ">" [] [] - 1: MD_BLOCK_LIST@76..100 - 0: MD_QUOTE@76..100 + 1: MD_BLOCK_LIST@76..111 + 0: MD_QUOTE@76..111 0: R_ANGLE@76..77 ">" [] [] - 1: MD_BLOCK_LIST@77..100 - 0: MD_QUOTE@77..100 + 1: MD_BLOCK_LIST@77..111 + 0: MD_QUOTE@77..111 0: R_ANGLE@77..78 ">" [] [] - 1: MD_BLOCK_LIST@78..100 - 0: MD_QUOTE@78..100 + 1: MD_BLOCK_LIST@78..111 + 0: MD_QUOTE@78..111 0: R_ANGLE@78..79 ">" [] [] - 1: MD_BLOCK_LIST@79..100 - 0: MD_QUOTE@79..100 + 1: MD_BLOCK_LIST@79..111 + 0: MD_QUOTE@79..111 0: R_ANGLE@79..80 ">" [] [] - 1: MD_BLOCK_LIST@80..100 - 0: MD_QUOTE@80..100 + 1: MD_BLOCK_LIST@80..111 + 0: MD_QUOTE@80..111 0: R_ANGLE@80..81 ">" [] [] - 1: MD_BLOCK_LIST@81..100 - 0: MD_QUOTE@81..100 + 1: MD_BLOCK_LIST@81..111 + 0: MD_QUOTE@81..111 0: R_ANGLE@81..82 ">" [] [] - 1: MD_BLOCK_LIST@82..100 - 0: MD_QUOTE@82..100 + 1: MD_BLOCK_LIST@82..111 + 0: MD_QUOTE@82..111 0: R_ANGLE@82..83 ">" [] [] - 1: MD_BLOCK_LIST@83..100 - 0: MD_QUOTE@83..100 + 1: MD_BLOCK_LIST@83..111 + 0: MD_QUOTE@83..111 0: R_ANGLE@83..84 ">" [] [] - 1: MD_BLOCK_LIST@84..100 - 0: MD_QUOTE@84..100 + 1: MD_BLOCK_LIST@84..111 + 0: MD_QUOTE@84..111 0: R_ANGLE@84..85 ">" [] [] - 1: MD_BLOCK_LIST@85..100 - 0: MD_QUOTE@85..100 + 1: MD_BLOCK_LIST@85..111 + 0: MD_QUOTE@85..111 0: R_ANGLE@85..86 ">" [] [] - 1: MD_BLOCK_LIST@86..100 - 0: MD_QUOTE@86..100 + 1: MD_BLOCK_LIST@86..111 + 0: MD_QUOTE@86..111 0: R_ANGLE@86..87 ">" [] [] - 1: MD_BLOCK_LIST@87..100 - 0: MD_QUOTE@87..100 + 1: MD_BLOCK_LIST@87..111 + 0: MD_QUOTE@87..111 0: R_ANGLE@87..88 ">" [] [] - 1: MD_BLOCK_LIST@88..100 - 0: MD_QUOTE@88..100 + 1: MD_BLOCK_LIST@88..111 + 0: MD_QUOTE@88..111 0: R_ANGLE@88..89 ">" [] [] - 1: MD_BLOCK_LIST@89..100 - 0: MD_QUOTE@89..100 + 1: MD_BLOCK_LIST@89..111 + 0: MD_QUOTE@89..111 0: R_ANGLE@89..90 ">" [] [] - 1: MD_BLOCK_LIST@90..100 - 0: MD_QUOTE@90..100 + 1: MD_BLOCK_LIST@90..111 + 0: MD_QUOTE@90..111 0: R_ANGLE@90..91 ">" [] [] - 1: MD_BLOCK_LIST@91..100 - 0: MD_QUOTE@91..100 + 1: MD_BLOCK_LIST@91..111 + 0: MD_QUOTE@91..111 0: R_ANGLE@91..92 ">" [] [] - 1: MD_BLOCK_LIST@92..100 - 0: MD_QUOTE@92..100 + 1: MD_BLOCK_LIST@92..111 + 0: MD_QUOTE@92..111 0: R_ANGLE@92..93 ">" [] [] - 1: MD_BLOCK_LIST@93..100 - 0: MD_QUOTE@93..100 + 1: MD_BLOCK_LIST@93..111 + 0: MD_QUOTE@93..111 0: R_ANGLE@93..94 ">" [] [] - 1: MD_BLOCK_LIST@94..100 - 0: MD_QUOTE@94..100 + 1: MD_BLOCK_LIST@94..111 + 0: MD_QUOTE@94..111 0: R_ANGLE@94..95 ">" [] [] - 1: MD_BLOCK_LIST@95..100 - 0: MD_QUOTE@95..100 + 1: MD_BLOCK_LIST@95..111 + 0: MD_QUOTE@95..111 0: R_ANGLE@95..96 ">" [] [] - 1: MD_BLOCK_LIST@96..100 - 0: MD_QUOTE@96..100 + 1: MD_BLOCK_LIST@96..111 + 0: MD_QUOTE@96..111 0: R_ANGLE@96..97 ">" [] [] - 1: MD_BLOCK_LIST@97..100 - 0: MD_QUOTE@97..100 + 1: MD_BLOCK_LIST@97..111 + 0: MD_QUOTE@97..111 0: R_ANGLE@97..98 ">" [] [] - 1: MD_BLOCK_LIST@98..100 - 0: MD_QUOTE@98..100 + 1: MD_BLOCK_LIST@98..111 + 0: MD_QUOTE@98..111 0: R_ANGLE@98..99 ">" [] [] - 1: MD_BLOCK_LIST@99..100 + 1: MD_BLOCK_LIST@99..111 0: MD_QUOTE@99..100 0: R_ANGLE@99..100 ">" [] [] 1: MD_BLOCK_LIST@100..100 - 1: MD_PARAGRAPH@100..111 - 0: MD_INLINE_ITEM_LIST@100..111 - 0: MD_TEXTUAL@100..110 - 0: MD_TEXTUAL_LITERAL@100..110 "Too deep" [Skipped(">"), Skipped(" ")] [] - 1: MD_TEXTUAL@110..111 - 0: MD_TEXTUAL_LITERAL@110..111 "\n" [] [] - 1: (empty) + 1: MD_PARAGRAPH@100..111 + 0: MD_INLINE_ITEM_LIST@100..111 + 0: MD_TEXTUAL@100..110 + 0: MD_TEXTUAL_LITERAL@100..110 "Too deep" [Skipped(">"), Skipped(" ")] [] + 1: MD_TEXTUAL@110..111 + 0: MD_TEXTUAL_LITERAL@110..111 "\n" [] [] + 1: (empty) 2: EOF@111..111 "" [] [] ``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/bullet_list.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/bullet_list.md.snap index ad8b4d5076aa..a9d32ac03dd2 100644 --- a/crates/biome_markdown_parser/tests/md_test_suite/ok/bullet_list.md.snap +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/bullet_list.md.snap @@ -1,5 +1,6 @@ --- source: crates/biome_markdown_parser/tests/spec_test.rs +assertion_line: 131 expression: snapshot --- ## Input @@ -77,6 +78,10 @@ MdDocument { }, ], }, + ], + }, + MdBulletListItem { + md_bullet_list: MdBulletList [ MdBullet { bullet: STAR@36..37 "*" [] [], content: MdBlockList [ @@ -112,6 +117,10 @@ MdDocument { }, ], }, + ], + }, + MdBulletListItem { + md_bullet_list: MdBulletList [ MdBullet { bullet: PLUS@73..74 "+" [] [], content: MdBlockList [ @@ -157,8 +166,8 @@ MdDocument { 0: MD_DOCUMENT@0..107 0: (empty) 1: MD_BLOCK_LIST@0..107 - 0: MD_BULLET_LIST_ITEM@0..107 - 0: MD_BULLET_LIST@0..107 + 0: MD_BULLET_LIST_ITEM@0..36 + 0: MD_BULLET_LIST@0..36 0: MD_BULLET@0..11 0: MINUS@0..1 "-" [] [] 1: MD_BLOCK_LIST@1..11 @@ -191,7 +200,9 @@ MdDocument { 1: (empty) 1: MD_NEWLINE@35..36 0: NEWLINE@35..36 "\n" [] [] - 3: MD_BULLET@36..57 + 1: MD_BULLET_LIST_ITEM@36..73 + 0: MD_BULLET_LIST@36..73 + 0: MD_BULLET@36..57 0: STAR@36..37 "*" [] [] 1: MD_BLOCK_LIST@37..57 0: MD_PARAGRAPH@37..57 @@ -201,7 +212,7 @@ MdDocument { 1: MD_TEXTUAL@56..57 0: MD_TEXTUAL_LITERAL@56..57 "\n" [] [] 1: (empty) - 4: MD_BULLET@57..73 + 1: MD_BULLET@57..73 0: STAR@57..58 "*" [] [] 1: MD_BLOCK_LIST@58..73 0: MD_PARAGRAPH@58..72 @@ -213,7 +224,9 @@ MdDocument { 1: (empty) 1: MD_NEWLINE@72..73 0: NEWLINE@72..73 "\n" [] [] - 5: MD_BULLET@73..87 + 2: MD_BULLET_LIST_ITEM@73..107 + 0: MD_BULLET_LIST@73..107 + 0: MD_BULLET@73..87 0: PLUS@73..74 "+" [] [] 1: MD_BLOCK_LIST@74..87 0: MD_PARAGRAPH@74..87 @@ -223,7 +236,7 @@ MdDocument { 1: MD_TEXTUAL@86..87 0: MD_TEXTUAL_LITERAL@86..87 "\n" [] [] 1: (empty) - 6: MD_BULLET@87..107 + 1: MD_BULLET@87..107 0: PLUS@87..88 "+" [] [] 1: MD_BLOCK_LIST@88..107 0: MD_PARAGRAPH@88..107 diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/edge_cases.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/edge_cases.md.snap index 216d30698c94..14af37a7897b 100644 --- a/crates/biome_markdown_parser/tests/md_test_suite/ok/edge_cases.md.snap +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/edge_cases.md.snap @@ -167,11 +167,11 @@ MdDocument { MdTextual { value_token: MD_TEXTUAL_LITERAL@160..161 "\n" [] [], }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@161..162 "\n" [] [], - }, ], }, + MdNewline { + value_token: NEWLINE@161..162 "\n" [] [], + }, MdHeader { before: MdHashList [ MdHash { @@ -228,19 +228,7 @@ MdDocument { value_token: MD_TEXTUAL_LITERAL@240..241 "\n" [] [], }, MdTextual { - value_token: MD_TEXTUAL_LITERAL@241..242 " " [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@242..243 " " [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@243..244 " " [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@244..245 " " [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@245..246 "#" [] [], + value_token: MD_TEXTUAL_LITERAL@241..246 "#" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [], }, MdTextual { value_token: MD_TEXTUAL_LITERAL@246..267 " not heading due to 4" [] [], @@ -255,19 +243,7 @@ MdDocument { value_token: MD_TEXTUAL_LITERAL@275..276 "\n" [] [], }, MdTextual { - value_token: MD_TEXTUAL_LITERAL@276..277 " " [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@277..278 " " [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@278..279 " " [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@279..280 " " [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@280..281 "-" [] [], + value_token: MD_TEXTUAL_LITERAL@276..281 "-" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [], }, MdTextual { value_token: MD_TEXTUAL_LITERAL@281..299 " not list due to 4" [] [], @@ -282,19 +258,7 @@ MdDocument { value_token: MD_TEXTUAL_LITERAL@307..308 "\n" [] [], }, MdTextual { - value_token: MD_TEXTUAL_LITERAL@308..309 " " [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@309..310 " " [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@310..311 " " [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@311..312 " " [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@312..313 ">" [] [], + value_token: MD_TEXTUAL_LITERAL@308..313 ">" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [], }, MdTextual { value_token: MD_TEXTUAL_LITERAL@313..332 " not quote due to 4" [] [], @@ -371,55 +335,19 @@ MdDocument { value_token: MD_TEXTUAL_LITERAL@442..443 "\n" [] [], }, MdTextual { - value_token: MD_TEXTUAL_LITERAL@443..444 " " [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@444..445 " " [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@445..446 " " [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@446..447 " " [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@447..450 "```" [] [], + value_token: MD_TEXTUAL_LITERAL@443..450 "```" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [], }, MdTextual { value_token: MD_TEXTUAL_LITERAL@450..451 "\n" [] [], }, MdTextual { - value_token: MD_TEXTUAL_LITERAL@451..452 " " [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@452..453 " " [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@453..454 " " [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@454..455 " " [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@455..471 "not a code fence" [] [], + value_token: MD_TEXTUAL_LITERAL@451..471 "not a code fence" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [], }, MdTextual { value_token: MD_TEXTUAL_LITERAL@471..472 "\n" [] [], }, MdTextual { - value_token: MD_TEXTUAL_LITERAL@472..473 " " [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@473..474 " " [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@474..475 " " [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@475..476 " " [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@476..479 "```" [] [], + value_token: MD_TEXTUAL_LITERAL@472..479 "```" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [], }, MdTextual { value_token: MD_TEXTUAL_LITERAL@479..480 "\n" [] [], @@ -498,8 +426,8 @@ MdDocument { 1: (empty) 7: MD_NEWLINE@101..102 0: NEWLINE@101..102 "\n" [] [] - 8: MD_INDENT_CODE_BLOCK@102..162 - 0: MD_INLINE_ITEM_LIST@102..162 + 8: MD_INDENT_CODE_BLOCK@102..161 + 0: MD_INLINE_ITEM_LIST@102..161 0: MD_TEXTUAL@102..107 0: MD_TEXTUAL_LITERAL@102..107 "*" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [] 1: MD_TEXTUAL@107..116 @@ -524,9 +452,9 @@ MdDocument { 0: MD_TEXTUAL_LITERAL@156..160 " too" [] [] 11: MD_TEXTUAL@160..161 0: MD_TEXTUAL_LITERAL@160..161 "\n" [] [] - 12: MD_TEXTUAL@161..162 - 0: MD_TEXTUAL_LITERAL@161..162 "\n" [] [] - 9: MD_HEADER@162..195 + 9: MD_NEWLINE@161..162 + 0: NEWLINE@161..162 "\n" [] [] + 10: MD_HEADER@162..195 0: MD_HASH_LIST@162..163 0: MD_HASH@162..163 0: HASH@162..163 "#" [] [] @@ -536,11 +464,11 @@ MdDocument { 0: MD_TEXTUAL_LITERAL@163..195 "\tTab after hash is valid heading" [] [] 1: (empty) 2: MD_HASH_LIST@195..195 - 10: MD_NEWLINE@195..196 + 11: MD_NEWLINE@195..196 0: NEWLINE@195..196 "\n" [] [] - 11: MD_NEWLINE@196..197 + 12: MD_NEWLINE@196..197 0: NEWLINE@196..197 "\n" [] [] - 12: MD_HEADER@197..224 + 13: MD_HEADER@197..224 0: MD_HASH_LIST@197..199 0: MD_HASH@197..198 0: HASH@197..198 "#" [] [] @@ -552,85 +480,61 @@ MdDocument { 0: MD_TEXTUAL_LITERAL@199..224 "\tMultiple hashes with tab" [] [] 1: (empty) 2: MD_HASH_LIST@224..224 - 13: MD_NEWLINE@224..225 + 14: MD_NEWLINE@224..225 0: NEWLINE@224..225 "\n" [] [] - 14: MD_NEWLINE@225..226 + 15: MD_NEWLINE@225..226 0: NEWLINE@225..226 "\n" [] [] - 15: MD_PARAGRAPH@226..364 + 16: MD_PARAGRAPH@226..364 0: MD_INLINE_ITEM_LIST@226..364 0: MD_TEXTUAL@226..240 0: MD_TEXTUAL_LITERAL@226..240 "Paragraph here" [] [] 1: MD_TEXTUAL@240..241 0: MD_TEXTUAL_LITERAL@240..241 "\n" [] [] - 2: MD_TEXTUAL@241..242 - 0: MD_TEXTUAL_LITERAL@241..242 " " [] [] - 3: MD_TEXTUAL@242..243 - 0: MD_TEXTUAL_LITERAL@242..243 " " [] [] - 4: MD_TEXTUAL@243..244 - 0: MD_TEXTUAL_LITERAL@243..244 " " [] [] - 5: MD_TEXTUAL@244..245 - 0: MD_TEXTUAL_LITERAL@244..245 " " [] [] - 6: MD_TEXTUAL@245..246 - 0: MD_TEXTUAL_LITERAL@245..246 "#" [] [] - 7: MD_TEXTUAL@246..267 + 2: MD_TEXTUAL@241..246 + 0: MD_TEXTUAL_LITERAL@241..246 "#" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [] + 3: MD_TEXTUAL@246..267 0: MD_TEXTUAL_LITERAL@246..267 " not heading due to 4" [] [] - 8: MD_TEXTUAL@267..268 + 4: MD_TEXTUAL@267..268 0: MD_TEXTUAL_LITERAL@267..268 "+" [] [] - 9: MD_TEXTUAL@268..275 + 5: MD_TEXTUAL@268..275 0: MD_TEXTUAL_LITERAL@268..275 " spaces" [] [] - 10: MD_TEXTUAL@275..276 + 6: MD_TEXTUAL@275..276 0: MD_TEXTUAL_LITERAL@275..276 "\n" [] [] - 11: MD_TEXTUAL@276..277 - 0: MD_TEXTUAL_LITERAL@276..277 " " [] [] - 12: MD_TEXTUAL@277..278 - 0: MD_TEXTUAL_LITERAL@277..278 " " [] [] - 13: MD_TEXTUAL@278..279 - 0: MD_TEXTUAL_LITERAL@278..279 " " [] [] - 14: MD_TEXTUAL@279..280 - 0: MD_TEXTUAL_LITERAL@279..280 " " [] [] - 15: MD_TEXTUAL@280..281 - 0: MD_TEXTUAL_LITERAL@280..281 "-" [] [] - 16: MD_TEXTUAL@281..299 + 7: MD_TEXTUAL@276..281 + 0: MD_TEXTUAL_LITERAL@276..281 "-" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [] + 8: MD_TEXTUAL@281..299 0: MD_TEXTUAL_LITERAL@281..299 " not list due to 4" [] [] - 17: MD_TEXTUAL@299..300 + 9: MD_TEXTUAL@299..300 0: MD_TEXTUAL_LITERAL@299..300 "+" [] [] - 18: MD_TEXTUAL@300..307 + 10: MD_TEXTUAL@300..307 0: MD_TEXTUAL_LITERAL@300..307 " spaces" [] [] - 19: MD_TEXTUAL@307..308 + 11: MD_TEXTUAL@307..308 0: MD_TEXTUAL_LITERAL@307..308 "\n" [] [] - 20: MD_TEXTUAL@308..309 - 0: MD_TEXTUAL_LITERAL@308..309 " " [] [] - 21: MD_TEXTUAL@309..310 - 0: MD_TEXTUAL_LITERAL@309..310 " " [] [] - 22: MD_TEXTUAL@310..311 - 0: MD_TEXTUAL_LITERAL@310..311 " " [] [] - 23: MD_TEXTUAL@311..312 - 0: MD_TEXTUAL_LITERAL@311..312 " " [] [] - 24: MD_TEXTUAL@312..313 - 0: MD_TEXTUAL_LITERAL@312..313 ">" [] [] - 25: MD_TEXTUAL@313..332 + 12: MD_TEXTUAL@308..313 + 0: MD_TEXTUAL_LITERAL@308..313 ">" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [] + 13: MD_TEXTUAL@313..332 0: MD_TEXTUAL_LITERAL@313..332 " not quote due to 4" [] [] - 26: MD_TEXTUAL@332..333 + 14: MD_TEXTUAL@332..333 0: MD_TEXTUAL_LITERAL@332..333 "+" [] [] - 27: MD_TEXTUAL@333..340 + 15: MD_TEXTUAL@333..340 0: MD_TEXTUAL_LITERAL@333..340 " spaces" [] [] - 28: MD_TEXTUAL@340..341 + 16: MD_TEXTUAL@340..341 0: MD_TEXTUAL_LITERAL@340..341 "\n" [] [] - 29: MD_TEXTUAL@341..363 + 17: MD_TEXTUAL@341..363 0: MD_TEXTUAL_LITERAL@341..363 "continues as paragraph" [] [] - 30: MD_TEXTUAL@363..364 + 18: MD_TEXTUAL@363..364 0: MD_TEXTUAL_LITERAL@363..364 "\n" [] [] 1: (empty) - 16: MD_NEWLINE@364..365 + 17: MD_NEWLINE@364..365 0: NEWLINE@364..365 "\n" [] [] - 17: MD_PARAGRAPH@365..378 + 18: MD_PARAGRAPH@365..378 0: MD_INLINE_ITEM_LIST@365..378 0: MD_TEXTUAL@365..377 0: MD_TEXTUAL_LITERAL@365..377 "Another para" [] [] 1: MD_TEXTUAL@377..378 0: MD_TEXTUAL_LITERAL@377..378 "\n" [] [] 1: (empty) - 18: MD_HEADER@378..416 + 19: MD_HEADER@378..416 0: MD_HASH_LIST@378..382 0: MD_HASH@378..382 0: HASH@378..382 "#" [Skipped(" "), Skipped(" "), Skipped(" ")] [] @@ -646,55 +550,31 @@ MdDocument { 0: MD_TEXTUAL_LITERAL@415..416 ")" [] [] 1: (empty) 2: MD_HASH_LIST@416..416 - 19: MD_NEWLINE@416..417 + 20: MD_NEWLINE@416..417 0: NEWLINE@416..417 "\n" [] [] - 20: MD_NEWLINE@417..418 + 21: MD_NEWLINE@417..418 0: NEWLINE@417..418 "\n" [] [] - 21: MD_PARAGRAPH@418..505 + 22: MD_PARAGRAPH@418..505 0: MD_INLINE_ITEM_LIST@418..505 0: MD_TEXTUAL@418..442 0: MD_TEXTUAL_LITERAL@418..442 "Para with indented fence" [] [] 1: MD_TEXTUAL@442..443 0: MD_TEXTUAL_LITERAL@442..443 "\n" [] [] - 2: MD_TEXTUAL@443..444 - 0: MD_TEXTUAL_LITERAL@443..444 " " [] [] - 3: MD_TEXTUAL@444..445 - 0: MD_TEXTUAL_LITERAL@444..445 " " [] [] - 4: MD_TEXTUAL@445..446 - 0: MD_TEXTUAL_LITERAL@445..446 " " [] [] - 5: MD_TEXTUAL@446..447 - 0: MD_TEXTUAL_LITERAL@446..447 " " [] [] - 6: MD_TEXTUAL@447..450 - 0: MD_TEXTUAL_LITERAL@447..450 "```" [] [] - 7: MD_TEXTUAL@450..451 + 2: MD_TEXTUAL@443..450 + 0: MD_TEXTUAL_LITERAL@443..450 "```" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [] + 3: MD_TEXTUAL@450..451 0: MD_TEXTUAL_LITERAL@450..451 "\n" [] [] - 8: MD_TEXTUAL@451..452 - 0: MD_TEXTUAL_LITERAL@451..452 " " [] [] - 9: MD_TEXTUAL@452..453 - 0: MD_TEXTUAL_LITERAL@452..453 " " [] [] - 10: MD_TEXTUAL@453..454 - 0: MD_TEXTUAL_LITERAL@453..454 " " [] [] - 11: MD_TEXTUAL@454..455 - 0: MD_TEXTUAL_LITERAL@454..455 " " [] [] - 12: MD_TEXTUAL@455..471 - 0: MD_TEXTUAL_LITERAL@455..471 "not a code fence" [] [] - 13: MD_TEXTUAL@471..472 + 4: MD_TEXTUAL@451..471 + 0: MD_TEXTUAL_LITERAL@451..471 "not a code fence" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [] + 5: MD_TEXTUAL@471..472 0: MD_TEXTUAL_LITERAL@471..472 "\n" [] [] - 14: MD_TEXTUAL@472..473 - 0: MD_TEXTUAL_LITERAL@472..473 " " [] [] - 15: MD_TEXTUAL@473..474 - 0: MD_TEXTUAL_LITERAL@473..474 " " [] [] - 16: MD_TEXTUAL@474..475 - 0: MD_TEXTUAL_LITERAL@474..475 " " [] [] - 17: MD_TEXTUAL@475..476 - 0: MD_TEXTUAL_LITERAL@475..476 " " [] [] - 18: MD_TEXTUAL@476..479 - 0: MD_TEXTUAL_LITERAL@476..479 "```" [] [] - 19: MD_TEXTUAL@479..480 + 6: MD_TEXTUAL@472..479 + 0: MD_TEXTUAL_LITERAL@472..479 "```" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [] + 7: MD_TEXTUAL@479..480 0: MD_TEXTUAL_LITERAL@479..480 "\n" [] [] - 20: MD_TEXTUAL@480..504 + 8: MD_TEXTUAL@480..504 0: MD_TEXTUAL_LITERAL@480..504 "still the same paragraph" [] [] - 21: MD_TEXTUAL@504..505 + 9: MD_TEXTUAL@504..505 0: MD_TEXTUAL_LITERAL@504..505 "\n" [] [] 1: (empty) 2: EOF@505..505 "" [] [] diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/indent_code_block.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/indent_code_block.md.snap index dbbb87aae9f0..c1206d1a9732 100644 --- a/crates/biome_markdown_parser/tests/md_test_suite/ok/indent_code_block.md.snap +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/indent_code_block.md.snap @@ -68,11 +68,11 @@ MdDocument { MdTextual { value_token: MD_TEXTUAL_LITERAL@61..62 "\n" [] [], }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@62..63 "\n" [] [], - }, ], }, + MdNewline { + value_token: NEWLINE@62..63 "\n" [] [], + }, MdParagraph { list: MdInlineItemList [ MdTextual { @@ -114,8 +114,8 @@ MdDocument { 0: MD_DOCUMENT@0..121 0: (empty) 1: MD_BLOCK_LIST@0..121 - 0: MD_INDENT_CODE_BLOCK@0..63 - 0: MD_INLINE_ITEM_LIST@0..63 + 0: MD_INDENT_CODE_BLOCK@0..62 + 0: MD_INLINE_ITEM_LIST@0..62 0: MD_TEXTUAL@0..18 0: MD_TEXTUAL_LITERAL@0..18 "function hello" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [] 1: MD_TEXTUAL@18..19 @@ -144,18 +144,18 @@ MdDocument { 0: MD_TEXTUAL_LITERAL@56..61 "}" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [] 13: MD_TEXTUAL@61..62 0: MD_TEXTUAL_LITERAL@61..62 "\n" [] [] - 14: MD_TEXTUAL@62..63 - 0: MD_TEXTUAL_LITERAL@62..63 "\n" [] [] - 1: MD_PARAGRAPH@63..87 + 1: MD_NEWLINE@62..63 + 0: NEWLINE@62..63 "\n" [] [] + 2: MD_PARAGRAPH@63..87 0: MD_INLINE_ITEM_LIST@63..87 0: MD_TEXTUAL@63..86 0: MD_TEXTUAL_LITERAL@63..86 "Regular paragraph here." [] [] 1: MD_TEXTUAL@86..87 0: MD_TEXTUAL_LITERAL@86..87 "\n" [] [] 1: (empty) - 2: MD_NEWLINE@87..88 + 3: MD_NEWLINE@87..88 0: NEWLINE@87..88 "\n" [] [] - 3: MD_INDENT_CODE_BLOCK@88..121 + 4: MD_INDENT_CODE_BLOCK@88..121 0: MD_INLINE_ITEM_LIST@88..121 0: MD_TEXTUAL@88..101 0: MD_TEXTUAL_LITERAL@88..101 "More code" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [] diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/lazy_continuation.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/lazy_continuation.md.snap index 60c8b8c17601..bd255c764113 100644 --- a/crates/biome_markdown_parser/tests/md_test_suite/ok/lazy_continuation.md.snap +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/lazy_continuation.md.snap @@ -1,5 +1,6 @@ --- source: crates/biome_markdown_parser/tests/spec_test.rs +assertion_line: 131 expression: snapshot --- ## Input @@ -180,8 +181,8 @@ MdDocument { MdQuote { marker_token: R_ANGLE@248..249 ">" [] [], content: MdBlockList [ - MdSetextHeader { - content: MdInlineItemList [ + MdParagraph { + list: MdInlineItemList [ MdTextual { value_token: MD_TEXTUAL_LITERAL@249..285 "Quote interrupted by setext heading" [Skipped(" ")] [], }, @@ -195,10 +196,13 @@ MdDocument { value_token: MD_TEXTUAL_LITERAL@300..301 "\n" [] [], }, ], - underline_token: MD_SETEXT_UNDERLINE_LITERAL@301..304 "---" [] [], + hard_line: missing (optional), }, ], }, + MdThematicBreakBlock { + value_token: MD_THEMATIC_BREAK_LITERAL@301..304 "---" [] [], + }, MdNewline { value_token: NEWLINE@304..305 "\n" [] [], }, @@ -285,8 +289,8 @@ MdDocument { MdQuote { marker_token: R_ANGLE@430..431 ">" [] [], content: MdBlockList [ - MdSetextHeader { - content: MdInlineItemList [ + MdParagraph { + list: MdInlineItemList [ MdTextual { value_token: MD_TEXTUAL_LITERAL@431..467 "Quote interrupted by thematic break" [Skipped(" ")] [], }, @@ -294,10 +298,13 @@ MdDocument { value_token: MD_TEXTUAL_LITERAL@467..468 "\n" [] [], }, ], - underline_token: MD_SETEXT_UNDERLINE_LITERAL@468..471 "---" [] [], + hard_line: missing (optional), }, ], }, + MdThematicBreakBlock { + value_token: MD_THEMATIC_BREAK_LITERAL@468..471 "---" [] [], + }, MdNewline { value_token: NEWLINE@471..472 "\n" [] [], }, @@ -316,19 +323,7 @@ MdDocument { value_token: MD_TEXTUAL_LITERAL@509..510 "\n" [] [], }, MdTextual { - value_token: MD_TEXTUAL_LITERAL@510..511 " " [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@511..512 " " [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@512..513 " " [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@513..514 " " [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@514..544 "This is an indented code block" [] [], + value_token: MD_TEXTUAL_LITERAL@510..544 "This is an indented code block" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [], }, MdTextual { value_token: MD_TEXTUAL_LITERAL@544..545 "\n" [] [], @@ -433,10 +428,10 @@ MdDocument { 0: NEWLINE@246..247 "\n" [] [] 9: MD_NEWLINE@247..248 0: NEWLINE@247..248 "\n" [] [] - 10: MD_QUOTE@248..304 + 10: MD_QUOTE@248..301 0: R_ANGLE@248..249 ">" [] [] - 1: MD_BLOCK_LIST@249..304 - 0: MD_SETEXT_HEADER@249..304 + 1: MD_BLOCK_LIST@249..301 + 0: MD_PARAGRAPH@249..301 0: MD_INLINE_ITEM_LIST@249..301 0: MD_TEXTUAL@249..285 0: MD_TEXTUAL_LITERAL@249..285 "Quote interrupted by setext heading" [Skipped(" ")] [] @@ -446,12 +441,14 @@ MdDocument { 0: MD_TEXTUAL_LITERAL@286..300 "Setext heading" [] [] 3: MD_TEXTUAL@300..301 0: MD_TEXTUAL_LITERAL@300..301 "\n" [] [] - 1: MD_SETEXT_UNDERLINE_LITERAL@301..304 "---" [] [] - 11: MD_NEWLINE@304..305 + 1: (empty) + 11: MD_THEMATIC_BREAK_BLOCK@301..304 + 0: MD_THEMATIC_BREAK_LITERAL@301..304 "---" [] [] + 12: MD_NEWLINE@304..305 0: NEWLINE@304..305 "\n" [] [] - 12: MD_NEWLINE@305..306 + 13: MD_NEWLINE@305..306 0: NEWLINE@305..306 "\n" [] [] - 13: MD_QUOTE@306..334 + 14: MD_QUOTE@306..334 0: R_ANGLE@306..307 ">" [] [] 1: MD_BLOCK_LIST@307..334 0: MD_PARAGRAPH@307..334 @@ -461,7 +458,7 @@ MdDocument { 1: MD_TEXTUAL@333..334 0: MD_TEXTUAL_LITERAL@333..334 "\n" [] [] 1: (empty) - 14: MD_BULLET_LIST_ITEM@334..374 + 15: MD_BULLET_LIST_ITEM@334..374 0: MD_BULLET_LIST@334..374 0: MD_BULLET@334..374 0: MINUS@334..335 "-" [] [] @@ -473,9 +470,9 @@ MdDocument { 1: MD_TEXTUAL@373..374 0: MD_TEXTUAL_LITERAL@373..374 "\n" [] [] 1: (empty) - 15: MD_NEWLINE@374..375 + 16: MD_NEWLINE@374..375 0: NEWLINE@374..375 "\n" [] [] - 16: MD_QUOTE@375..410 + 17: MD_QUOTE@375..410 0: R_ANGLE@375..376 ">" [] [] 1: MD_BLOCK_LIST@376..410 0: MD_PARAGRAPH@376..410 @@ -485,7 +482,7 @@ MdDocument { 1: MD_TEXTUAL@409..410 0: MD_TEXTUAL_LITERAL@409..410 "\n" [] [] 1: (empty) - 17: MD_FENCED_CODE_BLOCK@410..428 + 18: MD_FENCED_CODE_BLOCK@410..428 0: TRIPLE_BACKTICK@410..413 "```" [] [] 1: MD_CODE_NAME_LIST@413..413 2: MD_INLINE_ITEM_LIST@413..425 @@ -496,25 +493,27 @@ MdDocument { 2: MD_TEXTUAL@424..425 0: MD_TEXTUAL_LITERAL@424..425 "\n" [] [] 3: TRIPLE_BACKTICK@425..428 "```" [] [] - 18: MD_NEWLINE@428..429 + 19: MD_NEWLINE@428..429 0: NEWLINE@428..429 "\n" [] [] - 19: MD_NEWLINE@429..430 + 20: MD_NEWLINE@429..430 0: NEWLINE@429..430 "\n" [] [] - 20: MD_QUOTE@430..471 + 21: MD_QUOTE@430..468 0: R_ANGLE@430..431 ">" [] [] - 1: MD_BLOCK_LIST@431..471 - 0: MD_SETEXT_HEADER@431..471 + 1: MD_BLOCK_LIST@431..468 + 0: MD_PARAGRAPH@431..468 0: MD_INLINE_ITEM_LIST@431..468 0: MD_TEXTUAL@431..467 0: MD_TEXTUAL_LITERAL@431..467 "Quote interrupted by thematic break" [Skipped(" ")] [] 1: MD_TEXTUAL@467..468 0: MD_TEXTUAL_LITERAL@467..468 "\n" [] [] - 1: MD_SETEXT_UNDERLINE_LITERAL@468..471 "---" [] [] - 21: MD_NEWLINE@471..472 + 1: (empty) + 22: MD_THEMATIC_BREAK_BLOCK@468..471 + 0: MD_THEMATIC_BREAK_LITERAL@468..471 "---" [] [] + 23: MD_NEWLINE@471..472 0: NEWLINE@471..472 "\n" [] [] - 22: MD_NEWLINE@472..473 + 24: MD_NEWLINE@472..473 0: NEWLINE@472..473 "\n" [] [] - 23: MD_QUOTE@473..545 + 25: MD_QUOTE@473..545 0: R_ANGLE@473..474 ">" [] [] 1: MD_BLOCK_LIST@474..545 0: MD_PARAGRAPH@474..545 @@ -523,17 +522,9 @@ MdDocument { 0: MD_TEXTUAL_LITERAL@474..509 "Quote interrupted by indented code" [Skipped(" ")] [] 1: MD_TEXTUAL@509..510 0: MD_TEXTUAL_LITERAL@509..510 "\n" [] [] - 2: MD_TEXTUAL@510..511 - 0: MD_TEXTUAL_LITERAL@510..511 " " [] [] - 3: MD_TEXTUAL@511..512 - 0: MD_TEXTUAL_LITERAL@511..512 " " [] [] - 4: MD_TEXTUAL@512..513 - 0: MD_TEXTUAL_LITERAL@512..513 " " [] [] - 5: MD_TEXTUAL@513..514 - 0: MD_TEXTUAL_LITERAL@513..514 " " [] [] - 6: MD_TEXTUAL@514..544 - 0: MD_TEXTUAL_LITERAL@514..544 "This is an indented code block" [] [] - 7: MD_TEXTUAL@544..545 + 2: MD_TEXTUAL@510..544 + 0: MD_TEXTUAL_LITERAL@510..544 "This is an indented code block" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [] + 3: MD_TEXTUAL@544..545 0: MD_TEXTUAL_LITERAL@544..545 "\n" [] [] 1: (empty) 2: EOF@545..545 "" [] [] diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/list_indentation.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/list_indentation.md.snap index 452b06da4ad5..58ebed20087d 100644 --- a/crates/biome_markdown_parser/tests/md_test_suite/ok/list_indentation.md.snap +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/list_indentation.md.snap @@ -1,5 +1,6 @@ --- source: crates/biome_markdown_parser/tests/spec_test.rs +assertion_line: 131 expression: snapshot --- ## Input @@ -644,19 +645,19 @@ MdDocument { value_token: MD_TEXTUAL_LITERAL@721..722 "\n" [] [], }, MdTextual { - value_token: MD_TEXTUAL_LITERAL@722..741 "inner continued" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [], + value_token: MD_TEXTUAL_LITERAL@722..725 " " [Skipped(" "), Skipped(" ")] [], }, MdTextual { - value_token: MD_TEXTUAL_LITERAL@741..742 "\n" [] [], + value_token: MD_TEXTUAL_LITERAL@725..726 " " [] [], }, MdTextual { - value_token: MD_TEXTUAL_LITERAL@742..743 " " [] [], + value_token: MD_TEXTUAL_LITERAL@726..741 "inner continued" [] [], }, MdTextual { - value_token: MD_TEXTUAL_LITERAL@743..744 " " [] [], + value_token: MD_TEXTUAL_LITERAL@741..742 "\n" [] [], }, MdTextual { - value_token: MD_TEXTUAL_LITERAL@744..759 "outer continued" [] [], + value_token: MD_TEXTUAL_LITERAL@742..759 "outer continued" [Skipped(" "), Skipped(" ")] [], }, MdTextual { value_token: MD_TEXTUAL_LITERAL@759..760 "\n" [] [], @@ -1121,16 +1122,16 @@ MdDocument { 0: MD_TEXTUAL_LITERAL@715..721 " inner" [] [] 1: MD_TEXTUAL@721..722 0: MD_TEXTUAL_LITERAL@721..722 "\n" [] [] - 2: MD_TEXTUAL@722..741 - 0: MD_TEXTUAL_LITERAL@722..741 "inner continued" [Skipped(" "), Skipped(" "), Skipped(" "), Skipped(" ")] [] - 3: MD_TEXTUAL@741..742 + 2: MD_TEXTUAL@722..725 + 0: MD_TEXTUAL_LITERAL@722..725 " " [Skipped(" "), Skipped(" ")] [] + 3: MD_TEXTUAL@725..726 + 0: MD_TEXTUAL_LITERAL@725..726 " " [] [] + 4: MD_TEXTUAL@726..741 + 0: MD_TEXTUAL_LITERAL@726..741 "inner continued" [] [] + 5: MD_TEXTUAL@741..742 0: MD_TEXTUAL_LITERAL@741..742 "\n" [] [] - 4: MD_TEXTUAL@742..743 - 0: MD_TEXTUAL_LITERAL@742..743 " " [] [] - 5: MD_TEXTUAL@743..744 - 0: MD_TEXTUAL_LITERAL@743..744 " " [] [] - 6: MD_TEXTUAL@744..759 - 0: MD_TEXTUAL_LITERAL@744..759 "outer continued" [] [] + 6: MD_TEXTUAL@742..759 + 0: MD_TEXTUAL_LITERAL@742..759 "outer continued" [Skipped(" "), Skipped(" ")] [] 7: MD_TEXTUAL@759..760 0: MD_TEXTUAL_LITERAL@759..760 "\n" [] [] 1: (empty) diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/list_tightness.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/list_tightness.md.snap index a4f40a10ce09..1ead17492a50 100644 --- a/crates/biome_markdown_parser/tests/md_test_suite/ok/list_tightness.md.snap +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/list_tightness.md.snap @@ -1,5 +1,6 @@ --- source: crates/biome_markdown_parser/tests/spec_test.rs +assertion_line: 131 expression: snapshot --- ## Input @@ -593,21 +594,25 @@ MdDocument { MdNewline { value_token: NEWLINE@559..560 "\n" [] [], }, - ], - }, - MdBullet { - bullet: MINUS@560..563 "-" [Skipped(" "), Skipped(" ")] [], - content: MdBlockList [ - MdParagraph { - list: MdInlineItemList [ - MdTextual { - value_token: MD_TEXTUAL_LITERAL@563..571 " Inner 2" [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@571..572 "\n" [] [], + MdBulletListItem { + md_bullet_list: MdBulletList [ + MdBullet { + bullet: MINUS@560..563 "-" [Skipped(" "), Skipped(" ")] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@563..571 " Inner 2" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@571..572 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], }, ], - hard_line: missing (optional), }, ], }, @@ -701,21 +706,25 @@ MdDocument { ], hard_line: missing (optional), }, - ], - }, - MdBullet { - bullet: MINUS@643..646 "-" [Skipped(" "), Skipped(" ")] [], - content: MdBlockList [ - MdParagraph { - list: MdInlineItemList [ - MdTextual { - value_token: MD_TEXTUAL_LITERAL@646..654 " Inner B" [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@654..655 "\n" [] [], + MdBulletListItem { + md_bullet_list: MdBulletList [ + MdBullet { + bullet: MINUS@643..646 "-" [Skipped(" "), Skipped(" ")] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@646..654 " Inner B" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@654..655 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], }, ], - hard_line: missing (optional), }, ], }, @@ -1056,9 +1065,9 @@ MdDocument { 1: (empty) 1: MD_BULLET_LIST_ITEM@547..572 0: MD_BULLET_LIST@547..572 - 0: MD_BULLET@547..560 + 0: MD_BULLET@547..572 0: MINUS@547..550 "-" [Skipped(" "), Skipped(" ")] [] - 1: MD_BLOCK_LIST@550..560 + 1: MD_BLOCK_LIST@550..572 0: MD_PARAGRAPH@550..559 0: MD_INLINE_ITEM_LIST@550..559 0: MD_TEXTUAL@550..558 @@ -1068,16 +1077,18 @@ MdDocument { 1: (empty) 1: MD_NEWLINE@559..560 0: NEWLINE@559..560 "\n" [] [] - 1: MD_BULLET@560..572 - 0: MINUS@560..563 "-" [Skipped(" "), Skipped(" ")] [] - 1: MD_BLOCK_LIST@563..572 - 0: MD_PARAGRAPH@563..572 - 0: MD_INLINE_ITEM_LIST@563..572 - 0: MD_TEXTUAL@563..571 - 0: MD_TEXTUAL_LITERAL@563..571 " Inner 2" [] [] - 1: MD_TEXTUAL@571..572 - 0: MD_TEXTUAL_LITERAL@571..572 "\n" [] [] - 1: (empty) + 2: MD_BULLET_LIST_ITEM@560..572 + 0: MD_BULLET_LIST@560..572 + 0: MD_BULLET@560..572 + 0: MINUS@560..563 "-" [Skipped(" "), Skipped(" ")] [] + 1: MD_BLOCK_LIST@563..572 + 0: MD_PARAGRAPH@563..572 + 0: MD_INLINE_ITEM_LIST@563..572 + 0: MD_TEXTUAL@563..571 + 0: MD_TEXTUAL_LITERAL@563..571 " Inner 2" [] [] + 1: MD_TEXTUAL@571..572 + 0: MD_TEXTUAL_LITERAL@571..572 "\n" [] [] + 1: (empty) 1: MD_BULLET@572..582 0: MINUS@572..573 "-" [] [] 1: MD_BLOCK_LIST@573..582 @@ -1125,9 +1136,9 @@ MdDocument { 1: (empty) 1: MD_BULLET_LIST_ITEM@631..655 0: MD_BULLET_LIST@631..655 - 0: MD_BULLET@631..643 + 0: MD_BULLET@631..655 0: MINUS@631..634 "-" [Skipped(" "), Skipped(" ")] [] - 1: MD_BLOCK_LIST@634..643 + 1: MD_BLOCK_LIST@634..655 0: MD_PARAGRAPH@634..643 0: MD_INLINE_ITEM_LIST@634..643 0: MD_TEXTUAL@634..642 @@ -1135,16 +1146,18 @@ MdDocument { 1: MD_TEXTUAL@642..643 0: MD_TEXTUAL_LITERAL@642..643 "\n" [] [] 1: (empty) - 1: MD_BULLET@643..655 - 0: MINUS@643..646 "-" [Skipped(" "), Skipped(" ")] [] - 1: MD_BLOCK_LIST@646..655 - 0: MD_PARAGRAPH@646..655 - 0: MD_INLINE_ITEM_LIST@646..655 - 0: MD_TEXTUAL@646..654 - 0: MD_TEXTUAL_LITERAL@646..654 " Inner B" [] [] - 1: MD_TEXTUAL@654..655 - 0: MD_TEXTUAL_LITERAL@654..655 "\n" [] [] - 1: (empty) + 1: MD_BULLET_LIST_ITEM@643..655 + 0: MD_BULLET_LIST@643..655 + 0: MD_BULLET@643..655 + 0: MINUS@643..646 "-" [Skipped(" "), Skipped(" ")] [] + 1: MD_BLOCK_LIST@646..655 + 0: MD_PARAGRAPH@646..655 + 0: MD_INLINE_ITEM_LIST@646..655 + 0: MD_TEXTUAL@646..654 + 0: MD_TEXTUAL_LITERAL@646..654 " Inner B" [] [] + 1: MD_TEXTUAL@654..655 + 0: MD_TEXTUAL_LITERAL@654..655 "\n" [] [] + 1: (empty) 2: EOF@655..655 "" [] [] ``` diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/multiline_list.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/multiline_list.md.snap index 3aad16c400a2..87d6d6b68c0b 100644 --- a/crates/biome_markdown_parser/tests/md_test_suite/ok/multiline_list.md.snap +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/multiline_list.md.snap @@ -1,5 +1,6 @@ --- source: crates/biome_markdown_parser/tests/spec_test.rs +assertion_line: 131 expression: snapshot --- ## Input @@ -149,21 +150,25 @@ MdDocument { ], hard_line: missing (optional), }, - ], - }, - MdBullet { - bullet: MINUS@130..133 "-" [Skipped(" "), Skipped(" ")] [], - content: MdBlockList [ - MdParagraph { - list: MdInlineItemList [ - MdTextual { - value_token: MD_TEXTUAL_LITERAL@133..147 " Another child" [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@147..148 "\n" [] [], + MdBulletListItem { + md_bullet_list: MdBulletList [ + MdBullet { + bullet: MINUS@130..133 "-" [Skipped(" "), Skipped(" ")] [], + content: MdBlockList [ + MdParagraph { + list: MdInlineItemList [ + MdTextual { + value_token: MD_TEXTUAL_LITERAL@133..147 " Another child" [] [], + }, + MdTextual { + value_token: MD_TEXTUAL_LITERAL@147..148 "\n" [] [], + }, + ], + hard_line: missing (optional), + }, + ], }, ], - hard_line: missing (optional), }, ], }, @@ -270,9 +275,9 @@ MdDocument { 1: (empty) 1: MD_BULLET_LIST_ITEM@115..148 0: MD_BULLET_LIST@115..148 - 0: MD_BULLET@115..130 + 0: MD_BULLET@115..148 0: MINUS@115..118 "-" [Skipped(" "), Skipped(" ")] [] - 1: MD_BLOCK_LIST@118..130 + 1: MD_BLOCK_LIST@118..148 0: MD_PARAGRAPH@118..130 0: MD_INLINE_ITEM_LIST@118..130 0: MD_TEXTUAL@118..129 @@ -280,16 +285,18 @@ MdDocument { 1: MD_TEXTUAL@129..130 0: MD_TEXTUAL_LITERAL@129..130 "\n" [] [] 1: (empty) - 1: MD_BULLET@130..148 - 0: MINUS@130..133 "-" [Skipped(" "), Skipped(" ")] [] - 1: MD_BLOCK_LIST@133..148 - 0: MD_PARAGRAPH@133..148 - 0: MD_INLINE_ITEM_LIST@133..148 - 0: MD_TEXTUAL@133..147 - 0: MD_TEXTUAL_LITERAL@133..147 " Another child" [] [] - 1: MD_TEXTUAL@147..148 - 0: MD_TEXTUAL_LITERAL@147..148 "\n" [] [] - 1: (empty) + 1: MD_BULLET_LIST_ITEM@130..148 + 0: MD_BULLET_LIST@130..148 + 0: MD_BULLET@130..148 + 0: MINUS@130..133 "-" [Skipped(" "), Skipped(" ")] [] + 1: MD_BLOCK_LIST@133..148 + 0: MD_PARAGRAPH@133..148 + 0: MD_INLINE_ITEM_LIST@133..148 + 0: MD_TEXTUAL@133..147 + 0: MD_TEXTUAL_LITERAL@133..147 " Another child" [] [] + 1: MD_TEXTUAL@147..148 + 0: MD_TEXTUAL_LITERAL@147..148 "\n" [] [] + 1: (empty) 1: MD_BULLET@148..165 0: MINUS@148..149 "-" [] [] 1: MD_BLOCK_LIST@149..165 diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/ordered_list.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/ordered_list.md.snap index 3a583f0f0ce7..34c15d65a4f4 100644 --- a/crates/biome_markdown_parser/tests/md_test_suite/ok/ordered_list.md.snap +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/ordered_list.md.snap @@ -1,5 +1,6 @@ --- source: crates/biome_markdown_parser/tests/spec_test.rs +assertion_line: 131 expression: snapshot --- ## Input @@ -74,6 +75,10 @@ MdDocument { }, ], }, + ], + }, + MdOrderedListItem { + md_bullet_list: MdBulletList [ MdBullet { bullet: MD_ORDERED_LIST_MARKER@44..46 "1)" [] [], content: MdBlockList [ @@ -119,8 +124,8 @@ MdDocument { 0: MD_DOCUMENT@0..81 0: (empty) 1: MD_BLOCK_LIST@0..81 - 0: MD_ORDERED_LIST_ITEM@0..81 - 0: MD_BULLET_LIST@0..81 + 0: MD_ORDERED_LIST_ITEM@0..44 + 0: MD_BULLET_LIST@0..44 0: MD_BULLET@0..14 0: MD_ORDERED_LIST_MARKER@0..2 "1." [] [] 1: MD_BLOCK_LIST@2..14 @@ -153,7 +158,9 @@ MdDocument { 1: (empty) 1: MD_NEWLINE@43..44 0: NEWLINE@43..44 "\n" [] [] - 3: MD_BULLET@44..65 + 1: MD_ORDERED_LIST_ITEM@44..81 + 0: MD_BULLET_LIST@44..81 + 0: MD_BULLET@44..65 0: MD_ORDERED_LIST_MARKER@44..46 "1)" [] [] 1: MD_BLOCK_LIST@46..65 0: MD_PARAGRAPH@46..65 @@ -163,7 +170,7 @@ MdDocument { 1: MD_TEXTUAL@64..65 0: MD_TEXTUAL_LITERAL@64..65 "\n" [] [] 1: (empty) - 4: MD_BULLET@65..81 + 1: MD_BULLET@65..81 0: MD_ORDERED_LIST_MARKER@65..67 "2)" [] [] 1: MD_BLOCK_LIST@67..81 0: MD_PARAGRAPH@67..81 From c99fa5d3dc72195ce8cc545c9667043b424f9405 Mon Sep 17 00:00:00 2001 From: jfmcdowell <206422+jfmcdowell@users.noreply.github.com> Date: Mon, 19 Jan 2026 15:02:08 -0500 Subject: [PATCH 04/12] refactor(markdown_parser): make fenced code block detection self-documenting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add explicit `detect_fence()` helper with `MIN_FENCE_LENGTH` constant that clearly documents the CommonMark §4.5 requirement: "A code fence is a sequence of at least three consecutive backtick or tilde characters." The previous `starts_with("```")` approach worked correctly for 3+ backticks but wasn't self-documenting. The new implementation explicitly counts consecutive fence characters and validates against the minimum length. Also improve the spec test harness to gracefully handle bogus nodes instead of panicking. --- .../src/syntax/fenced_code_block.rs | 47 +++++++++++++------ .../tests/commonmark_spec.rs | 21 ++++++++- 2 files changed, 53 insertions(+), 15 deletions(-) diff --git a/crates/biome_markdown_parser/src/syntax/fenced_code_block.rs b/crates/biome_markdown_parser/src/syntax/fenced_code_block.rs index 06a40cf15371..a4c3e4fa556c 100644 --- a/crates/biome_markdown_parser/src/syntax/fenced_code_block.rs +++ b/crates/biome_markdown_parser/src/syntax/fenced_code_block.rs @@ -35,6 +35,33 @@ use biome_parser::{ use super::parse_error::unterminated_fenced_code; use super::quote::{consume_quote_prefix, has_quote_prefix}; +/// Minimum number of fence characters required per CommonMark §4.5. +const MIN_FENCE_LENGTH: usize = 3; + +/// Detect a code fence at the start of a string. +/// +/// Per CommonMark §4.5: "A code fence is a sequence of at least three +/// consecutive backtick characters (`) or tildes (~)." +/// +/// Returns `Some((fence_char, length))` if a valid fence is found, +/// where `length` is the actual number of fence characters (3 or more). +/// Returns `None` if no valid fence is present. +fn detect_fence(s: &str) -> Option<(char, usize)> { + let first_char = s.chars().next()?; + + if first_char != '`' && first_char != '~' { + return None; + } + + let len = s.chars().take_while(|&c| c == first_char).count(); + + if len >= MIN_FENCE_LENGTH { + Some((first_char, len)) + } else { + None + } +} + /// Check if we're at a fenced code block (``` or ~~~). pub(crate) fn at_fenced_code_block(p: &mut MarkdownParser) -> bool { p.lookahead(|p| { @@ -44,12 +71,10 @@ pub(crate) fn at_fenced_code_block(p: &mut MarkdownParser) -> bool { p.skip_line_indent(3); let rest = p.source_after_current(); - let is_backtick_fence = rest.starts_with("```"); - let is_tilde_fence = rest.starts_with("~~~"); - if !is_backtick_fence && !is_tilde_fence { + let Some((fence_char, _)) = detect_fence(rest) else { return false; - } - if is_backtick_fence && info_string_has_backtick(p) { + }; + if fence_char == '`' && info_string_has_backtick(p) { return false; } true @@ -87,13 +112,11 @@ fn parse_fenced_code_block_impl(p: &mut MarkdownParser, force: bool) -> ParsedSy } p.skip_line_indent(3); - // Track which fence type we opened with (must close with same type per CommonMark) + // Detect fence type and length (must close with same type and >= length per CommonMark §4.5) let text = p.cur_text(); - let is_textual_tilde_fence = p.at(MD_TEXTUAL_LITERAL) && text.starts_with("~~~"); - let is_tilde_fence = - p.at(TRIPLE_TILDE) || (p.at(TILDE) && p.cur_text().len() >= 3) || is_textual_tilde_fence; + let (fence_char, fence_len) = detect_fence(text).unwrap_or(('`', MIN_FENCE_LENGTH)); + let is_tilde_fence = fence_char == '~'; let fence_type = if is_tilde_fence { "~~~" } else { "```" }; - let fence_len = fence_prefix_len(p.cur_text(), if is_tilde_fence { '~' } else { '`' }); // Record opening fence range for diagnostic let opening_range = p.cur_range(); @@ -145,10 +168,6 @@ fn parse_fenced_code_block_impl(p: &mut MarkdownParser, force: bool) -> ParsedSy Present(m.complete(p, MD_FENCED_CODE_BLOCK)) } -fn fence_prefix_len(text: &str, fence_char: char) -> usize { - text.chars().take_while(|c| *c == fence_char).count() -} - /// Parse the code name list (language info string). /// Grammar: MdCodeNameList = MdTextual* /// diff --git a/crates/biome_markdown_parser/tests/commonmark_spec.rs b/crates/biome_markdown_parser/tests/commonmark_spec.rs index 686bd6edcb7b..20aba18217c8 100644 --- a/crates/biome_markdown_parser/tests/commonmark_spec.rs +++ b/crates/biome_markdown_parser/tests/commonmark_spec.rs @@ -6,6 +6,8 @@ //! Run with: `cargo test -p biome_markdown_parser --test commonmark_spec -- --nocapture` use biome_markdown_parser::{document_to_html, parse_markdown}; +use biome_markdown_syntax::MdDocument; +use biome_rowan::AstNode; use serde::Deserialize; /// Embedded CommonMark spec test cases. @@ -102,6 +104,7 @@ fn commonmark_spec_compliance() { std::collections::HashMap::new(); let log_progress = std::env::var("CMARK_PROGRESS").is_ok(); + let mut bogus_count = 0; for (index, test) in tests.iter().enumerate() { if log_progress { println!( @@ -113,8 +116,24 @@ fn commonmark_spec_compliance() { ); } let parsed = parse_markdown(&test.markdown); + + // Handle bogus nodes gracefully - count as failure instead of panicking + let Some(document) = MdDocument::cast(parsed.syntax()) else { + bogus_count += 1; + let section_entry = section_stats.entry(test.section.clone()).or_insert((0, 0)); + section_entry.1 += 1; + failed.push(FailedTest { + example: test.example, + section: test.section.clone(), + markdown: test.markdown.clone(), + expected: test.html.clone(), + actual: format!("", parsed.syntax().kind()), + }); + continue; + }; + let actual = document_to_html( - &parsed.tree(), + &document, parsed.list_tightness(), parsed.list_item_indents(), parsed.quote_indents(), From f17c66b9095d289f0bc85baf1de75a5bbead0e8b Mon Sep 17 00:00:00 2001 From: jfmcdowell <206422+jfmcdowell@users.noreply.github.com> Date: Mon, 19 Jan 2026 17:05:30 -0500 Subject: [PATCH 05/12] refactor(markdown_parser): reduce header node allocations and fix MdQuote grammar Address maintainer feedback about header parsing allocating too many nodes. Previously, `###### Heading 6` would create 7 nodes (6 MdHash + 1 MdHashList). Now it creates only 2 nodes (1 MdHash + 1 MdHashList). Changes: - Lexer: emit all consecutive `#` characters as a single HASH token - Parser: determine heading level from token text length instead of counting nodes - Grammar: fix MdQuote to use `content: MdBlockList` (matches MdBullet pattern) - Update helper functions to work with consolidated tokens The grammar fix restores CommonMark compliance from 69.3% back to 75.2%, with Block quotes returning to 100% (25/25) and HTML blocks at 100% (44/44). --- .../src/generated/node_factory.rs | 2 +- .../src/generated/syntax_factory.rs | 2 +- crates/biome_markdown_parser/src/lexer/mod.rs | 19 +- .../biome_markdown_parser/src/lexer/tests.rs | 31 ++- crates/biome_markdown_parser/src/syntax.rs | 31 ++- .../src/syntax/fenced_code_block.rs | 218 +++++++++--------- .../src/syntax/header.rs | 65 ++++-- crates/biome_markdown_parser/src/to_html.rs | 12 +- .../biome_markdown_parser/src/token_source.rs | 50 ++-- .../tests/commonmark_spec.rs | 20 +- .../error/too_many_hashes.md.snap | 40 +--- .../tests/md_test_suite/ok/edge_cases.md.snap | 27 +-- .../tests/md_test_suite/ok/header.md.snap | 196 +++------------- .../ok/inline_html_edge_cases.md.snap | 144 ++++-------- .../ok/inline_html_invalid.md.snap | 34 +-- .../src/generated/nodes.rs | 8 +- .../src/generated/nodes_mut.rs | 2 +- xtask/codegen/markdown.ungram | 2 +- 18 files changed, 354 insertions(+), 549 deletions(-) diff --git a/crates/biome_markdown_factory/src/generated/node_factory.rs b/crates/biome_markdown_factory/src/generated/node_factory.rs index e517fe04833b..84da802e71b8 100644 --- a/crates/biome_markdown_factory/src/generated/node_factory.rs +++ b/crates/biome_markdown_factory/src/generated/node_factory.rs @@ -421,7 +421,7 @@ impl MdParagraphBuilder { )) } } -pub fn md_quote(marker_token: SyntaxToken, content: AnyMdBlock) -> MdQuote { +pub fn md_quote(marker_token: SyntaxToken, content: MdBlockList) -> MdQuote { MdQuote::unwrap_cast(SyntaxNode::new_detached( MarkdownSyntaxKind::MD_QUOTE, [ diff --git a/crates/biome_markdown_factory/src/generated/syntax_factory.rs b/crates/biome_markdown_factory/src/generated/syntax_factory.rs index e0385453e25e..0eb1dbd122a6 100644 --- a/crates/biome_markdown_factory/src/generated/syntax_factory.rs +++ b/crates/biome_markdown_factory/src/generated/syntax_factory.rs @@ -780,7 +780,7 @@ impl SyntaxFactory for MarkdownSyntaxFactory { } slots.next_slot(); if let Some(element) = ¤t_element - && AnyMdBlock::can_cast(element.kind()) + && MdBlockList::can_cast(element.kind()) { slots.mark_present(); current_element = elements.next(); diff --git a/crates/biome_markdown_parser/src/lexer/mod.rs b/crates/biome_markdown_parser/src/lexer/mod.rs index 6ff6464dfdcd..c0672d5786b4 100644 --- a/crates/biome_markdown_parser/src/lexer/mod.rs +++ b/crates/biome_markdown_parser/src/lexer/mod.rs @@ -883,15 +883,22 @@ impl<'src> MarkdownLexer<'src> { /// Consume hash character(s). /// - /// Emits HASH tokens for ATX headers and trailing header markers. + /// Emits a single HASH token containing all consecutive `#` characters. + /// The parser can determine the heading level by checking the token's length. + /// + /// Per CommonMark §4.2: ATX headings use 1-6 `#` characters. fn consume_hash(&mut self) -> MarkdownSyntaxKind { self.assert_at_char_boundary(); - // In all other cases, emit HASH - // - At line start for ATX headers (# Header) - // - After other hashes for multi-level headers (### Header) - // - For trailing hashes (# Header #) - self.advance(1); + // Count consecutive hash characters + let mut count = 0; + while let Some(b'#') = self.byte_at(count) { + count += 1; + } + + // Emit all consecutive hashes as a single HASH token + // The parser determines heading level from token length + self.advance(count); HASH } diff --git a/crates/biome_markdown_parser/src/lexer/tests.rs b/crates/biome_markdown_parser/src/lexer/tests.rs index 835e6f077c16..328107b31ddb 100644 --- a/crates/biome_markdown_parser/src/lexer/tests.rs +++ b/crates/biome_markdown_parser/src/lexer/tests.rs @@ -174,12 +174,10 @@ fn hash_token() { #[test] fn multiple_hashes() { - // Multiple hashes for different header levels + // Multiple hashes emitted as a single token - parser determines level from length assert_lex! { "###", - HASH:1, - HASH:1, - HASH:1, + HASH:3, } } @@ -551,3 +549,28 @@ fn link_reference_definition_tokens() { MD_TEXTUAL_LITERAL:20, // " https://example.com" (with leading space) } } + +#[test] +fn block_quote_with_header() { + // Block quote with header inside + assert_lex! { + "> # Foo\n", + R_ANGLE:1, + MD_TEXTUAL_LITERAL:1, // " " + HASH:1, // "#" + MD_TEXTUAL_LITERAL:4, // " Foo" + NEWLINE:1, + } +} + +#[test] +fn block_quote_simple() { + // Simple block quote without header + assert_lex! { + "> This is a quote\n", + R_ANGLE:1, + MD_TEXTUAL_LITERAL:1, // " " + MD_TEXTUAL_LITERAL:15, // "This is a quote" + NEWLINE:1, + } +} diff --git a/crates/biome_markdown_parser/src/syntax.rs b/crates/biome_markdown_parser/src/syntax.rs index 1f218b71cd92..0b03f09602b0 100644 --- a/crates/biome_markdown_parser/src/syntax.rs +++ b/crates/biome_markdown_parser/src/syntax.rs @@ -1360,18 +1360,13 @@ fn check_too_many_hashes(p: &mut MarkdownParser) -> Option<(biome_rowan::TextRan return None; } - let start = p.cur_range().start(); - let mut count = 0; - - while p.at(T![#]) { - p.bump(T![#]); - count += 1; - } - - let end = p.cur_range().start(); + // The lexer emits all consecutive `#` as a single HASH token. + // Get the count from token text length. + let range = p.cur_range(); + let count = p.cur_text().len(); if count > 6 { - Some((biome_rowan::TextRange::new(start, end), count)) + Some((range, count)) } else { None } @@ -1384,19 +1379,21 @@ fn is_valid_atx_heading_start(p: &mut MarkdownParser) -> bool { p.lookahead(|p| { p.skip_line_indent(3); - let mut hash_count = 0; - - // Count consecutive hashes (must be 1-6) - while p.at(T![#]) && hash_count <= 6 { - p.bump(T![#]); - hash_count += 1; + // The lexer emits all consecutive `#` as a single HASH token. + // Count hash characters from the token's text length. + if !p.at(T![#]) { + return false; } - // Too many hashes - not a valid heading + let hash_count = p.cur_text().len(); + + // Too many hashes - not a valid heading (must be 1-6) if hash_count > 6 { return false; } + p.bump(T![#]); + // Check if followed by space, tab, or EOL/EOF per CommonMark §4.2 // In Markdown, whitespace is significant and included in token text. let text = p.cur_text(); diff --git a/crates/biome_markdown_parser/src/syntax/fenced_code_block.rs b/crates/biome_markdown_parser/src/syntax/fenced_code_block.rs index a4c3e4fa556c..ccfd1430b225 100644 --- a/crates/biome_markdown_parser/src/syntax/fenced_code_block.rs +++ b/crates/biome_markdown_parser/src/syntax/fenced_code_block.rs @@ -38,6 +38,95 @@ use super::quote::{consume_quote_prefix, has_quote_prefix}; /// Minimum number of fence characters required per CommonMark §4.5. const MIN_FENCE_LENGTH: usize = 3; +/// Consume indent whitespace (spaces/tabs) from source bytes. +/// +/// Tracks column width where tabs expand to next multiple of 4. +/// Returns `Some(new_idx)` on success, or `None` if required indent wasn't met. +fn consume_indent(source: &[u8], mut idx: usize, limit: usize, required: bool) -> Option { + let mut column = 0usize; + while column < limit { + match source.get(idx).copied() { + Some(b' ') => { + column += 1; + idx += 1; + } + Some(b'\t') => { + let tab_width = 4 - (column % 4); + // For optional indent, don't exceed limit with tab + if !required && column + tab_width > limit { + break; + } + column += tab_width; + idx += 1; + } + _ => { + if required { + return None; + } + break; + } + } + } + Some(idx) +} + +/// Get parser source context with bounds checking. +/// +/// Returns `Some((start_position, source_text))` if the current position is valid, +/// or `None` if the position is out of bounds. +fn get_source_context<'a>(p: &'a MarkdownParser) -> Option<(usize, &'a str)> { + let start: usize = p.cur_range().start().into(); + let source = p.source().text(); + if start > source.len() { + return None; + } + Some((start, source)) +} + +/// Check if the prefix from line_start to start is all whitespace. +/// +/// Returns `true` if the prefix contains only spaces/tabs. +fn is_whitespace_prefix(source: &str, start: usize, line_start: usize) -> bool { + source[line_start..start] + .chars() + .all(|c| c == ' ' || c == '\t') +} + +/// Bump a fence token (``` or ~~~), remapping if necessary. +fn bump_fence(p: &mut MarkdownParser, is_tilde_fence: bool) { + if is_tilde_fence { + if p.at(TRIPLE_TILDE) { + p.bump(TRIPLE_TILDE); + } else { + p.bump_remap(TRIPLE_TILDE); + } + } else if p.at(T!["```"]) { + p.bump(T!["```"]); + } else { + p.bump_remap(T!["```"]); + } +} + +/// Find the start position of the current line in the source text. +/// +/// Given a slice of text before the current position, finds the byte offset +/// where the current line begins (after the last newline, handling CRLF). +fn find_line_start(before: &str) -> usize { + let last_newline_pos = before.rfind(['\n', '\r']); + match last_newline_pos { + Some(pos) => { + let bytes = before.as_bytes(); + // Handle CRLF: if we found \r and next char is \n, skip both + if bytes.get(pos) == Some(&b'\r') && bytes.get(pos + 1) == Some(&b'\n') { + pos + 2 + } else { + pos + 1 + } + } + None => 0, + } +} + /// Detect a code fence at the start of a string. /// /// Per CommonMark §4.5: "A code fence is a sequence of at least three @@ -122,17 +211,7 @@ fn parse_fenced_code_block_impl(p: &mut MarkdownParser, force: bool) -> ParsedSy let opening_range = p.cur_range(); // Opening fence (``` or ~~~) - if is_tilde_fence { - if p.at(TRIPLE_TILDE) { - p.bump(TRIPLE_TILDE); - } else { - p.bump_remap(TRIPLE_TILDE); - } - } else if p.at(T!["```"]) { - p.bump(T!["```"]); - } else { - p.bump_remap(T!["```"]); - } + bump_fence(p, is_tilde_fence); // Optional language info string (MdCodeNameList) parse_code_name_list(p); @@ -149,17 +228,7 @@ fn parse_fenced_code_block_impl(p: &mut MarkdownParser, force: bool) -> ParsedSy p.skip_line_indent(p.state().list_item_required_indent); } p.skip_line_indent(3); - if is_tilde_fence { - if p.at(TRIPLE_TILDE) { - p.bump(TRIPLE_TILDE); - } else { - p.bump_remap(TRIPLE_TILDE); - } - } else if p.at(T!["```"]) { - p.bump(T!["```"]); - } else { - p.bump_remap(T!["```"]); - } + bump_fence(p, is_tilde_fence); } else { // Emit diagnostic for unterminated code block p.error(unterminated_fenced_code(p, opening_range, fence_type)); @@ -219,7 +288,7 @@ fn parse_code_content( if p.at_line_start() && fence_indent > 0 { skip_fenced_content_indent(p, fence_indent); - if at_closing_fence_after_indent(p, is_tilde_fence, fence_len) { + if at_closing_fence(p, is_tilde_fence, fence_len) { break; } } @@ -233,10 +302,6 @@ fn parse_code_content( m.complete(p, MD_INLINE_ITEM_LIST); } -fn is_valid_closing_fence(p: &mut MarkdownParser, is_tilde_fence: bool, fence_len: usize) -> bool { - line_has_closing_fence(p, is_tilde_fence, fence_len) -} - pub(crate) fn info_string_has_backtick(p: &mut MarkdownParser) -> bool { p.lookahead(|p| { if p.at(TRIPLE_TILDE) { @@ -263,15 +328,7 @@ pub(crate) fn info_string_has_backtick(p: &mut MarkdownParser) -> bool { } fn at_closing_fence(p: &mut MarkdownParser, is_tilde_fence: bool, fence_len: usize) -> bool { - p.lookahead(|p| is_valid_closing_fence(p, is_tilde_fence, fence_len)) -} - -fn at_closing_fence_after_indent( - p: &mut MarkdownParser, - is_tilde_fence: bool, - fence_len: usize, -) -> bool { - p.lookahead(|p| is_valid_closing_fence(p, is_tilde_fence, fence_len)) + p.lookahead(|p| line_has_closing_fence(p, is_tilde_fence, fence_len)) } fn skip_fenced_content_indent(p: &mut MarkdownParser, indent: usize) { @@ -298,66 +355,26 @@ fn skip_fenced_content_indent(p: &mut MarkdownParser, indent: usize) { } fn line_has_closing_fence(p: &MarkdownParser, is_tilde_fence: bool, fence_len: usize) -> bool { - let start: usize = p.cur_range().start().into(); - let source = p.source().text(); - if start > source.len() { + let Some((start, source)) = get_source_context(p) else { return false; - } - - let before = &source[..start]; - let last_newline_pos = before.rfind(['\n', '\r']); - let line_start = match last_newline_pos { - Some(pos) => { - let bytes = before.as_bytes(); - if bytes.get(pos) == Some(&b'\r') && bytes.get(pos + 1) == Some(&b'\n') { - pos + 2 - } else { - pos + 1 - } - } - None => 0, }; - let prefix = &source[line_start..start]; - if !prefix.chars().all(|c| c == ' ' || c == '\t') { + let line_start = find_line_start(&source[..start]); + + if !is_whitespace_prefix(source, start, line_start) { return false; } - let mut idx = line_start; - let mut column = 0usize; let list_indent = p.state().list_item_required_indent; - while column < list_indent { - match source.as_bytes().get(idx).copied() { - Some(b' ') => { - column += 1; - idx += 1; - } - Some(b'\t') => { - column += 4 - (column % 4); - idx += 1; - } - _ => return false, - } - } + // Skip required list indent (must have enough whitespace) + let Some(idx) = consume_indent(source.as_bytes(), line_start, list_indent, true) else { + return false; + }; - let mut extra = 0usize; - while extra < 3 { - match source.as_bytes().get(idx).copied() { - Some(b' ') => { - extra += 1; - idx += 1; - } - Some(b'\t') => { - extra += 4 - (extra % 4); - if extra > 3 { - break; - } - idx += 1; - } - _ => break, - } - } + // Skip optional extra indent (up to 3 spaces per CommonMark) + // This always succeeds since required=false + let idx = consume_indent(source.as_bytes(), idx, 3, false).unwrap(); let fence_char = if is_tilde_fence { b'~' } else { b'`' }; let mut fence_count = 0usize; @@ -382,37 +399,20 @@ fn is_line_start_within_indent(p: &MarkdownParser, max_indent: usize) -> bool { return true; } - let start: usize = p.cur_range().start().into(); - let source = p.source().text(); - if start > source.len() { + let Some((start, source)) = get_source_context(p) else { return false; - } + }; let virtual_start: usize = match p.state().virtual_line_start { Some(virtual_start) => virtual_start.into(), - None => { - let before = &source[..start]; - let last_newline_pos = before.rfind(['\n', '\r']); - match last_newline_pos { - Some(pos) => { - let bytes = before.as_bytes(); - if bytes.get(pos) == Some(&b'\r') && bytes.get(pos + 1) == Some(&b'\n') { - pos + 2 - } else { - pos + 1 - } - } - None => 0, - } - } + None => find_line_start(&source[..start]), }; - let prefix = &source[virtual_start..start]; - if !prefix.chars().all(|c| c == ' ' || c == '\t') { + if !is_whitespace_prefix(source, start, virtual_start) { return false; } - let mut indent = prefix + let mut indent = source[virtual_start..start] .chars() .fold(0usize, |count, c| count + if c == '\t' { 4 } else { 1 }); diff --git a/crates/biome_markdown_parser/src/syntax/header.rs b/crates/biome_markdown_parser/src/syntax/header.rs index db31ca172263..1dfefc0ed1fd 100644 --- a/crates/biome_markdown_parser/src/syntax/header.rs +++ b/crates/biome_markdown_parser/src/syntax/header.rs @@ -61,21 +61,28 @@ pub(crate) fn parse_header(p: &mut MarkdownParser) -> ParsedSyntax { return Absent; } - let m = p.start(); - - p.skip_line_indent(3); - - // Parse opening hashes (MdHashList containing MdHash nodes) - let hash_count = parse_hash_list(p); + // Check hash count BEFORE starting to parse (via lookahead). + // The lexer emits all consecutive `#` chars as a single HASH token, + // so we need to verify the token length doesn't exceed 6 before consuming it. + let hash_count = p.lookahead(|p| { + p.skip_line_indent(3); + if p.at(T![#]) { p.cur_text().len() } else { 0 } + }); // Validate hash count (must be 1-6) // Diagnostic for >6 hashes is emitted in parse_any_block before try_parse if hash_count > MAX_HEADER_HASHES { - // Not a valid header - abandon and let it be parsed as paragraph - m.abandon(p); + // Not a valid header - let it be parsed as paragraph return Absent; } + let m = p.start(); + + p.skip_line_indent(3); + + // Parse opening hashes (MdHashList containing MdHash nodes) + parse_hash_list(p); + // Per CommonMark §4.2: opening hashes must be followed by space, tab, or end of line. // `#foo` is NOT a valid header; `# foo`, `#\tfoo`, or `#\n` are valid. // Check if the next token has preceding whitespace or we're at EOL/EOF. @@ -103,19 +110,26 @@ pub(crate) fn parse_header(p: &mut MarkdownParser) -> ParsedSyntax { Present(m.complete(p, MD_HEADER)) } -/// Parse a list of hash tokens as MdHashList containing MdHash nodes. -/// Returns the number of hashes parsed. +/// Parse the opening hash sequence for an ATX header. +/// +/// The lexer emits all consecutive `#` characters as a single HASH token. +/// We determine the heading level from the token's text length. +/// +/// Creates MdHashList containing a single MdHash node that wraps the token. +/// +/// Returns the number of hashes (heading level). fn parse_hash_list(p: &mut MarkdownParser) -> usize { let m = p.start(); - let mut count = 0; - - while p.at(T![#]) { + let count = if p.at(T![#]) { + let len = p.cur_text().len(); + // Wrap the HASH token in an MdHash node to match grammar let hash_m = p.start(); p.bump(T![#]); hash_m.complete(p, MD_HASH); - count += 1; - } - + len + } else { + 0 + }; m.complete(p, MD_HASH_LIST); count } @@ -167,8 +181,8 @@ fn parse_header_content(p: &mut MarkdownParser) { /// (NEWLINE or EOF), and NOT preceded by a line break (which would /// indicate a new block, not trailing hashes). /// -/// Note: NEWLINE is an explicit token, so we check `at_inline_end()` after -/// consuming hashes to see if we've reached end of line. +/// Note: The lexer emits all consecutive `#` characters as a single HASH token, +/// so we just need to consume that one token and check for end of line. fn is_trailing_hash_sequence(p: &mut MarkdownParser) -> bool { if !p.at(T![#]) { return false; @@ -176,10 +190,10 @@ fn is_trailing_hash_sequence(p: &mut MarkdownParser) -> bool { let checkpoint = p.checkpoint(); - while p.at(T![#]) { - p.bump(T![#]); - } + // Consume the single HASH token (contains all consecutive hashes) + p.bump(T![#]); + // Skip any trailing whitespace after hashes while p.at(MD_TEXTUAL_LITERAL) { let text = p.cur_text(); if text.chars().all(|c| c == ' ' || c == '\t') { @@ -218,10 +232,14 @@ fn at_trailing_hashes_start(p: &mut MarkdownParser) -> bool { /// /// Per CommonMark spec, a closing sequence of `#` characters is optional. /// It must be at the end of the line, preceded by optional whitespace. +/// +/// The lexer emits all consecutive `#` characters as a single HASH token. +/// We wrap it in an MdHash node to match the grammar. fn parse_trailing_hashes(p: &mut MarkdownParser) { let m = p.start(); if at_trailing_hashes_start(p) { + // Skip whitespace before trailing hashes while p.at(MD_TEXTUAL_LITERAL) { let text = p.cur_text(); if text.chars().all(|c| c == ' ' || c == '\t') { @@ -231,9 +249,8 @@ fn parse_trailing_hashes(p: &mut MarkdownParser) { } } - // Only parse hashes that are on the same line - // Stop if we hit end of line (NEWLINE, EOF, or preceding line break) - while p.at(T![#]) && !p.at_inline_end() { + // Consume the trailing hash token and wrap in MdHash node + if p.at(T![#]) && !p.at_inline_end() { let hash_m = p.start(); p.bump(T![#]); hash_m.complete(p, MD_HASH); diff --git a/crates/biome_markdown_parser/src/to_html.rs b/crates/biome_markdown_parser/src/to_html.rs index 7b19b457eb68..35e1b04b89b0 100644 --- a/crates/biome_markdown_parser/src/to_html.rs +++ b/crates/biome_markdown_parser/src/to_html.rs @@ -478,7 +478,17 @@ fn strip_paragraph_indent(content: &str) -> String { /// Render an ATX header (# style). fn render_atx_header(header: &MdHeader, ctx: &HtmlRenderContext, out: &mut String) { - let level = header.before().len().clamp(1, 6); + // Count total hash characters in the before list. + // The lexer emits all consecutive `#` chars as a single HASH token, + // so we sum the text lengths of all hash tokens. + // Use text_trimmed() to exclude any leading trivia (skipped indentation spaces). + let level = header + .before() + .iter() + .filter_map(|h| h.hash_token().ok()) + .map(|tok| tok.text_trimmed().len()) + .sum::() + .clamp(1, 6); out.push_str(" usize { + let last_newline_pos = before.rfind(['\n', '\r']); + match last_newline_pos { + Some(pos) => { + let bytes = before.as_bytes(); + // Handle CRLF: if we found \r and next char is \n, skip both + if bytes.get(pos) == Some(&b'\r') && bytes.get(pos + 1) == Some(&b'\n') { + pos + 2 + } else { + pos + 1 + } + } + None => 0, + } +} + pub(crate) struct MarkdownTokenSource<'source> { lexer: BufferedLexer>, @@ -94,21 +114,7 @@ impl<'source> MarkdownTokenSource<'source> { let start: usize = range.start().into(); let source = self.lexer.source(); - let before_token = &source[..start]; - - // Find the last newline before current token - let last_newline_pos = before_token.rfind(['\n', '\r']); - let line_start = match last_newline_pos { - Some(pos) => { - let bytes = before_token.as_bytes(); - if bytes.get(pos) == Some(&b'\r') && bytes.get(pos + 1) == Some(&b'\n') { - pos + 2 - } else { - pos + 1 - } - } - None => 0, - }; + let line_start = find_line_start(&source[..start]); let line = &source[line_start..]; let mut count = 0usize; @@ -131,19 +137,7 @@ impl<'source> MarkdownTokenSource<'source> { let source = self.lexer.source(); let before_token = &source[..start]; - - let last_newline_pos = before_token.rfind(['\n', '\r']); - let line_start = match last_newline_pos { - Some(pos) => { - let bytes = before_token.as_bytes(); - if bytes.get(pos) == Some(&b'\r') && bytes.get(pos + 1) == Some(&b'\n') { - pos + 2 - } else { - pos + 1 - } - } - None => 0, - }; + let line_start = find_line_start(before_token); source[line_start..start] .chars() diff --git a/crates/biome_markdown_parser/tests/commonmark_spec.rs b/crates/biome_markdown_parser/tests/commonmark_spec.rs index 20aba18217c8..df715286cf8b 100644 --- a/crates/biome_markdown_parser/tests/commonmark_spec.rs +++ b/crates/biome_markdown_parser/tests/commonmark_spec.rs @@ -104,7 +104,6 @@ fn commonmark_spec_compliance() { std::collections::HashMap::new(); let log_progress = std::env::var("CMARK_PROGRESS").is_ok(); - let mut bogus_count = 0; for (index, test) in tests.iter().enumerate() { if log_progress { println!( @@ -119,7 +118,6 @@ fn commonmark_spec_compliance() { // Handle bogus nodes gracefully - count as failure instead of panicking let Some(document) = MdDocument::cast(parsed.syntax()) else { - bogus_count += 1; let section_entry = section_stats.entry(test.section.clone()).or_insert((0, 0)); section_entry.1 += 1; failed.push(FailedTest { @@ -259,7 +257,7 @@ fn debug_single_example() { let tests: Vec = serde_json::from_str(SPEC_JSON).expect("Failed to parse spec.json"); // Change this to debug a specific example - let example_num = 259; + let example_num = 228; if let Some(test) = tests.iter().find(|t| t.example == example_num) { println!("Example {}: {}", test.example, test.section); @@ -268,12 +266,20 @@ fn debug_single_example() { let parsed = parse_markdown(&test.markdown); - println!("AST:"); - println!("{:#?}", parsed.tree()); + println!("CST (raw syntax):"); + println!("{:#?}", parsed.syntax()); println!(); - println!("CST:"); - println!("{:#?}", parsed.syntax()); + println!("AST:"); + if parsed.syntax().kind() == biome_markdown_syntax::MarkdownSyntaxKind::MD_DOCUMENT { + println!("{:#?}", parsed.tree()); + } else { + println!( + "Cannot cast to MdDocument - root is {:?}", + parsed.syntax().kind() + ); + } + println!(); println!(); if parsed.has_errors() { diff --git a/crates/biome_markdown_parser/tests/md_test_suite/error/too_many_hashes.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/error/too_many_hashes.md.snap index 513faad7c059..ce13388fe54f 100644 --- a/crates/biome_markdown_parser/tests/md_test_suite/error/too_many_hashes.md.snap +++ b/crates/biome_markdown_parser/tests/md_test_suite/error/too_many_hashes.md.snap @@ -19,25 +19,7 @@ MdDocument { MdParagraph { list: MdInlineItemList [ MdTextual { - value_token: MD_TEXTUAL_LITERAL@0..1 "#" [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@1..2 "#" [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@2..3 "#" [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@3..4 "#" [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@4..5 "#" [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@5..6 "#" [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@6..7 "#" [] [], + value_token: MD_TEXTUAL_LITERAL@0..7 "#######" [] [], }, MdTextual { value_token: MD_TEXTUAL_LITERAL@7..32 " This has too many hashes" [] [], @@ -61,23 +43,11 @@ MdDocument { 1: MD_BLOCK_LIST@0..33 0: MD_PARAGRAPH@0..33 0: MD_INLINE_ITEM_LIST@0..33 - 0: MD_TEXTUAL@0..1 - 0: MD_TEXTUAL_LITERAL@0..1 "#" [] [] - 1: MD_TEXTUAL@1..2 - 0: MD_TEXTUAL_LITERAL@1..2 "#" [] [] - 2: MD_TEXTUAL@2..3 - 0: MD_TEXTUAL_LITERAL@2..3 "#" [] [] - 3: MD_TEXTUAL@3..4 - 0: MD_TEXTUAL_LITERAL@3..4 "#" [] [] - 4: MD_TEXTUAL@4..5 - 0: MD_TEXTUAL_LITERAL@4..5 "#" [] [] - 5: MD_TEXTUAL@5..6 - 0: MD_TEXTUAL_LITERAL@5..6 "#" [] [] - 6: MD_TEXTUAL@6..7 - 0: MD_TEXTUAL_LITERAL@6..7 "#" [] [] - 7: MD_TEXTUAL@7..32 + 0: MD_TEXTUAL@0..7 + 0: MD_TEXTUAL_LITERAL@0..7 "#######" [] [] + 1: MD_TEXTUAL@7..32 0: MD_TEXTUAL_LITERAL@7..32 " This has too many hashes" [] [] - 8: MD_TEXTUAL@32..33 + 2: MD_TEXTUAL@32..33 0: MD_TEXTUAL_LITERAL@32..33 "\n" [] [] 1: (empty) 2: EOF@33..33 "" [] [] diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/edge_cases.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/edge_cases.md.snap index 14af37a7897b..f48ed0d022d2 100644 --- a/crates/biome_markdown_parser/tests/md_test_suite/ok/edge_cases.md.snap +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/edge_cases.md.snap @@ -1,6 +1,5 @@ --- source: crates/biome_markdown_parser/tests/spec_test.rs -assertion_line: 131 expression: snapshot --- ## Input @@ -66,10 +65,7 @@ MdDocument { MdParagraph { list: MdInlineItemList [ MdTextual { - value_token: MD_TEXTUAL_LITERAL@17..18 "#" [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@18..19 "#" [] [], + value_token: MD_TEXTUAL_LITERAL@17..19 "##" [] [], }, MdTextual { value_token: MD_TEXTUAL_LITERAL@19..30 "AlsoNoSpace" [] [], @@ -197,10 +193,7 @@ MdDocument { MdHeader { before: MdHashList [ MdHash { - hash_token: HASH@197..198 "#" [] [], - }, - MdHash { - hash_token: HASH@198..199 "#" [] [], + hash_token: HASH@197..199 "##" [] [], }, ], content: MdParagraph { @@ -385,13 +378,11 @@ MdDocument { 0: NEWLINE@16..17 "\n" [] [] 2: MD_PARAGRAPH@17..31 0: MD_INLINE_ITEM_LIST@17..31 - 0: MD_TEXTUAL@17..18 - 0: MD_TEXTUAL_LITERAL@17..18 "#" [] [] - 1: MD_TEXTUAL@18..19 - 0: MD_TEXTUAL_LITERAL@18..19 "#" [] [] - 2: MD_TEXTUAL@19..30 + 0: MD_TEXTUAL@17..19 + 0: MD_TEXTUAL_LITERAL@17..19 "##" [] [] + 1: MD_TEXTUAL@19..30 0: MD_TEXTUAL_LITERAL@19..30 "AlsoNoSpace" [] [] - 3: MD_TEXTUAL@30..31 + 2: MD_TEXTUAL@30..31 0: MD_TEXTUAL_LITERAL@30..31 "\n" [] [] 1: (empty) 3: MD_NEWLINE@31..32 @@ -470,10 +461,8 @@ MdDocument { 0: NEWLINE@196..197 "\n" [] [] 13: MD_HEADER@197..224 0: MD_HASH_LIST@197..199 - 0: MD_HASH@197..198 - 0: HASH@197..198 "#" [] [] - 1: MD_HASH@198..199 - 0: HASH@198..199 "#" [] [] + 0: MD_HASH@197..199 + 0: HASH@197..199 "##" [] [] 1: MD_PARAGRAPH@199..224 0: MD_INLINE_ITEM_LIST@199..224 0: MD_TEXTUAL@199..224 diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/header.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/header.md.snap index 03b5a2a8d685..902ced97c7bf 100644 --- a/crates/biome_markdown_parser/tests/md_test_suite/ok/header.md.snap +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/header.md.snap @@ -1,6 +1,5 @@ --- source: crates/biome_markdown_parser/tests/spec_test.rs -assertion_line: 131 expression: snapshot --- ## Input @@ -58,10 +57,7 @@ MdDocument { MdHeader { before: MdHashList [ MdHash { - hash_token: HASH@13..14 "#" [] [], - }, - MdHash { - hash_token: HASH@14..15 "#" [] [], + hash_token: HASH@13..15 "##" [] [], }, ], content: MdParagraph { @@ -83,13 +79,7 @@ MdDocument { MdHeader { before: MdHashList [ MdHash { - hash_token: HASH@27..28 "#" [] [], - }, - MdHash { - hash_token: HASH@28..29 "#" [] [], - }, - MdHash { - hash_token: HASH@29..30 "#" [] [], + hash_token: HASH@27..30 "###" [] [], }, ], content: MdParagraph { @@ -111,16 +101,7 @@ MdDocument { MdHeader { before: MdHashList [ MdHash { - hash_token: HASH@42..43 "#" [] [], - }, - MdHash { - hash_token: HASH@43..44 "#" [] [], - }, - MdHash { - hash_token: HASH@44..45 "#" [] [], - }, - MdHash { - hash_token: HASH@45..46 "#" [] [], + hash_token: HASH@42..46 "####" [] [], }, ], content: MdParagraph { @@ -142,19 +123,7 @@ MdDocument { MdHeader { before: MdHashList [ MdHash { - hash_token: HASH@58..59 "#" [] [], - }, - MdHash { - hash_token: HASH@59..60 "#" [] [], - }, - MdHash { - hash_token: HASH@60..61 "#" [] [], - }, - MdHash { - hash_token: HASH@61..62 "#" [] [], - }, - MdHash { - hash_token: HASH@62..63 "#" [] [], + hash_token: HASH@58..63 "#####" [] [], }, ], content: MdParagraph { @@ -176,22 +145,7 @@ MdDocument { MdHeader { before: MdHashList [ MdHash { - hash_token: HASH@75..76 "#" [] [], - }, - MdHash { - hash_token: HASH@76..77 "#" [] [], - }, - MdHash { - hash_token: HASH@77..78 "#" [] [], - }, - MdHash { - hash_token: HASH@78..79 "#" [] [], - }, - MdHash { - hash_token: HASH@79..80 "#" [] [], - }, - MdHash { - hash_token: HASH@80..81 "#" [] [], + hash_token: HASH@75..81 "######" [] [], }, ], content: MdParagraph { @@ -239,10 +193,7 @@ MdDocument { MdHeader { before: MdHashList [ MdHash { - hash_token: HASH@112..113 "#" [] [], - }, - MdHash { - hash_token: HASH@113..114 "#" [] [], + hash_token: HASH@112..114 "##" [] [], }, ], content: MdParagraph { @@ -255,10 +206,7 @@ MdDocument { }, after: MdHashList [ MdHash { - hash_token: HASH@132..134 "#" [Skipped(" ")] [], - }, - MdHash { - hash_token: HASH@134..135 "#" [] [], + hash_token: HASH@132..135 "##" [Skipped(" ")] [], }, ], }, @@ -271,13 +219,7 @@ MdDocument { MdHeader { before: MdHashList [ MdHash { - hash_token: HASH@137..138 "#" [] [], - }, - MdHash { - hash_token: HASH@138..139 "#" [] [], - }, - MdHash { - hash_token: HASH@139..140 "#" [] [], + hash_token: HASH@137..140 "###" [] [], }, ], content: MdParagraph { @@ -292,22 +234,13 @@ MdDocument { value_token: MD_TEXTUAL_LITERAL@148..157 " content " [] [], }, MdTextual { - value_token: MD_TEXTUAL_LITERAL@157..158 "#" [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@158..159 "#" [] [], + value_token: MD_TEXTUAL_LITERAL@157..159 "##" [] [], }, MdTextual { value_token: MD_TEXTUAL_LITERAL@159..165 " with " [] [], }, MdTextual { - value_token: MD_TEXTUAL_LITERAL@165..166 "#" [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@166..167 "#" [] [], - }, - MdTextual { - value_token: MD_TEXTUAL_LITERAL@167..168 "#" [] [], + value_token: MD_TEXTUAL_LITERAL@165..168 "###" [] [], }, MdTextual { value_token: MD_TEXTUAL_LITERAL@168..177 " trailing" [] [], @@ -317,16 +250,7 @@ MdDocument { }, after: MdHashList [ MdHash { - hash_token: HASH@177..179 "#" [Skipped(" ")] [], - }, - MdHash { - hash_token: HASH@179..180 "#" [] [], - }, - MdHash { - hash_token: HASH@180..181 "#" [] [], - }, - MdHash { - hash_token: HASH@181..182 "#" [] [], + hash_token: HASH@177..182 "####" [Skipped(" ")] [], }, ], }, @@ -360,10 +284,8 @@ MdDocument { 0: NEWLINE@12..13 "\n" [] [] 3: MD_HEADER@13..25 0: MD_HASH_LIST@13..15 - 0: MD_HASH@13..14 - 0: HASH@13..14 "#" [] [] - 1: MD_HASH@14..15 - 0: HASH@14..15 "#" [] [] + 0: MD_HASH@13..15 + 0: HASH@13..15 "##" [] [] 1: MD_PARAGRAPH@15..25 0: MD_INLINE_ITEM_LIST@15..25 0: MD_TEXTUAL@15..25 @@ -376,12 +298,8 @@ MdDocument { 0: NEWLINE@26..27 "\n" [] [] 6: MD_HEADER@27..40 0: MD_HASH_LIST@27..30 - 0: MD_HASH@27..28 - 0: HASH@27..28 "#" [] [] - 1: MD_HASH@28..29 - 0: HASH@28..29 "#" [] [] - 2: MD_HASH@29..30 - 0: HASH@29..30 "#" [] [] + 0: MD_HASH@27..30 + 0: HASH@27..30 "###" [] [] 1: MD_PARAGRAPH@30..40 0: MD_INLINE_ITEM_LIST@30..40 0: MD_TEXTUAL@30..40 @@ -394,14 +312,8 @@ MdDocument { 0: NEWLINE@41..42 "\n" [] [] 9: MD_HEADER@42..56 0: MD_HASH_LIST@42..46 - 0: MD_HASH@42..43 - 0: HASH@42..43 "#" [] [] - 1: MD_HASH@43..44 - 0: HASH@43..44 "#" [] [] - 2: MD_HASH@44..45 - 0: HASH@44..45 "#" [] [] - 3: MD_HASH@45..46 - 0: HASH@45..46 "#" [] [] + 0: MD_HASH@42..46 + 0: HASH@42..46 "####" [] [] 1: MD_PARAGRAPH@46..56 0: MD_INLINE_ITEM_LIST@46..56 0: MD_TEXTUAL@46..56 @@ -414,16 +326,8 @@ MdDocument { 0: NEWLINE@57..58 "\n" [] [] 12: MD_HEADER@58..73 0: MD_HASH_LIST@58..63 - 0: MD_HASH@58..59 - 0: HASH@58..59 "#" [] [] - 1: MD_HASH@59..60 - 0: HASH@59..60 "#" [] [] - 2: MD_HASH@60..61 - 0: HASH@60..61 "#" [] [] - 3: MD_HASH@61..62 - 0: HASH@61..62 "#" [] [] - 4: MD_HASH@62..63 - 0: HASH@62..63 "#" [] [] + 0: MD_HASH@58..63 + 0: HASH@58..63 "#####" [] [] 1: MD_PARAGRAPH@63..73 0: MD_INLINE_ITEM_LIST@63..73 0: MD_TEXTUAL@63..73 @@ -436,18 +340,8 @@ MdDocument { 0: NEWLINE@74..75 "\n" [] [] 15: MD_HEADER@75..91 0: MD_HASH_LIST@75..81 - 0: MD_HASH@75..76 - 0: HASH@75..76 "#" [] [] - 1: MD_HASH@76..77 - 0: HASH@76..77 "#" [] [] - 2: MD_HASH@77..78 - 0: HASH@77..78 "#" [] [] - 3: MD_HASH@78..79 - 0: HASH@78..79 "#" [] [] - 4: MD_HASH@79..80 - 0: HASH@79..80 "#" [] [] - 5: MD_HASH@80..81 - 0: HASH@80..81 "#" [] [] + 0: MD_HASH@75..81 + 0: HASH@75..81 "######" [] [] 1: MD_PARAGRAPH@81..91 0: MD_INLINE_ITEM_LIST@81..91 0: MD_TEXTUAL@81..91 @@ -476,32 +370,24 @@ MdDocument { 0: NEWLINE@111..112 "\n" [] [] 21: MD_HEADER@112..135 0: MD_HASH_LIST@112..114 - 0: MD_HASH@112..113 - 0: HASH@112..113 "#" [] [] - 1: MD_HASH@113..114 - 0: HASH@113..114 "#" [] [] + 0: MD_HASH@112..114 + 0: HASH@112..114 "##" [] [] 1: MD_PARAGRAPH@114..132 0: MD_INLINE_ITEM_LIST@114..132 0: MD_TEXTUAL@114..132 0: MD_TEXTUAL_LITERAL@114..132 " Multiple trailing" [] [] 1: (empty) 2: MD_HASH_LIST@132..135 - 0: MD_HASH@132..134 - 0: HASH@132..134 "#" [Skipped(" ")] [] - 1: MD_HASH@134..135 - 0: HASH@134..135 "#" [] [] + 0: MD_HASH@132..135 + 0: HASH@132..135 "##" [Skipped(" ")] [] 22: MD_NEWLINE@135..136 0: NEWLINE@135..136 "\n" [] [] 23: MD_NEWLINE@136..137 0: NEWLINE@136..137 "\n" [] [] 24: MD_HEADER@137..182 0: MD_HASH_LIST@137..140 - 0: MD_HASH@137..138 - 0: HASH@137..138 "#" [] [] - 1: MD_HASH@138..139 - 0: HASH@138..139 "#" [] [] - 2: MD_HASH@139..140 - 0: HASH@139..140 "#" [] [] + 0: MD_HASH@137..140 + 0: HASH@137..140 "###" [] [] 1: MD_PARAGRAPH@140..177 0: MD_INLINE_ITEM_LIST@140..177 0: MD_TEXTUAL@140..147 @@ -510,30 +396,18 @@ MdDocument { 0: MD_TEXTUAL_LITERAL@147..148 "#" [] [] 2: MD_TEXTUAL@148..157 0: MD_TEXTUAL_LITERAL@148..157 " content " [] [] - 3: MD_TEXTUAL@157..158 - 0: MD_TEXTUAL_LITERAL@157..158 "#" [] [] - 4: MD_TEXTUAL@158..159 - 0: MD_TEXTUAL_LITERAL@158..159 "#" [] [] - 5: MD_TEXTUAL@159..165 + 3: MD_TEXTUAL@157..159 + 0: MD_TEXTUAL_LITERAL@157..159 "##" [] [] + 4: MD_TEXTUAL@159..165 0: MD_TEXTUAL_LITERAL@159..165 " with " [] [] - 6: MD_TEXTUAL@165..166 - 0: MD_TEXTUAL_LITERAL@165..166 "#" [] [] - 7: MD_TEXTUAL@166..167 - 0: MD_TEXTUAL_LITERAL@166..167 "#" [] [] - 8: MD_TEXTUAL@167..168 - 0: MD_TEXTUAL_LITERAL@167..168 "#" [] [] - 9: MD_TEXTUAL@168..177 + 5: MD_TEXTUAL@165..168 + 0: MD_TEXTUAL_LITERAL@165..168 "###" [] [] + 6: MD_TEXTUAL@168..177 0: MD_TEXTUAL_LITERAL@168..177 " trailing" [] [] 1: (empty) 2: MD_HASH_LIST@177..182 - 0: MD_HASH@177..179 - 0: HASH@177..179 "#" [Skipped(" ")] [] - 1: MD_HASH@179..180 - 0: HASH@179..180 "#" [] [] - 2: MD_HASH@180..181 - 0: HASH@180..181 "#" [] [] - 3: MD_HASH@181..182 - 0: HASH@181..182 "#" [] [] + 0: MD_HASH@177..182 + 0: HASH@177..182 "####" [Skipped(" ")] [] 25: MD_NEWLINE@182..183 0: NEWLINE@182..183 "\n" [] [] 2: EOF@183..183 "" [] [] diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_html_edge_cases.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_html_edge_cases.md.snap index 3ccde7873506..d9c339687d5b 100644 --- a/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_html_edge_cases.md.snap +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_html_edge_cases.md.snap @@ -1,6 +1,5 @@ --- source: crates/biome_markdown_parser/tests/spec_test.rs -assertion_line: 131 expression: snapshot --- ## Input @@ -104,10 +103,7 @@ MdDocument { MdHeader { before: MdHashList [ MdHash { - hash_token: HASH@26..27 "#" [] [], - }, - MdHash { - hash_token: HASH@27..28 "#" [] [], + hash_token: HASH@26..28 "##" [] [], }, ], content: MdParagraph { @@ -210,10 +206,7 @@ MdDocument { MdHeader { before: MdHashList [ MdHash { - hash_token: HASH@124..125 "#" [] [], - }, - MdHash { - hash_token: HASH@125..126 "#" [] [], + hash_token: HASH@124..126 "##" [] [], }, ], content: MdParagraph { @@ -312,10 +305,7 @@ MdDocument { MdHeader { before: MdHashList [ MdHash { - hash_token: HASH@232..233 "#" [] [], - }, - MdHash { - hash_token: HASH@233..234 "#" [] [], + hash_token: HASH@232..234 "##" [] [], }, ], content: MdParagraph { @@ -444,10 +434,7 @@ MdDocument { MdHeader { before: MdHashList [ MdHash { - hash_token: HASH@322..323 "#" [] [], - }, - MdHash { - hash_token: HASH@323..324 "#" [] [], + hash_token: HASH@322..324 "##" [] [], }, ], content: MdParagraph { @@ -631,10 +618,7 @@ MdDocument { MdHeader { before: MdHashList [ MdHash { - hash_token: HASH@456..457 "#" [] [], - }, - MdHash { - hash_token: HASH@457..458 "#" [] [], + hash_token: HASH@456..458 "##" [] [], }, ], content: MdParagraph { @@ -705,10 +689,7 @@ MdDocument { MdHeader { before: MdHashList [ MdHash { - hash_token: HASH@553..554 "#" [] [], - }, - MdHash { - hash_token: HASH@554..555 "#" [] [], + hash_token: HASH@553..555 "##" [] [], }, ], content: MdParagraph { @@ -821,10 +802,7 @@ MdDocument { MdHeader { before: MdHashList [ MdHash { - hash_token: HASH@639..640 "#" [] [], - }, - MdHash { - hash_token: HASH@640..641 "#" [] [], + hash_token: HASH@639..641 "##" [] [], }, ], content: MdParagraph { @@ -932,10 +910,7 @@ MdDocument { MdHeader { before: MdHashList [ MdHash { - hash_token: HASH@802..803 "#" [] [], - }, - MdHash { - hash_token: HASH@803..804 "#" [] [], + hash_token: HASH@802..804 "##" [] [], }, ], content: MdParagraph { @@ -1076,10 +1051,7 @@ MdDocument { MdHeader { before: MdHashList [ MdHash { - hash_token: HASH@969..970 "#" [] [], - }, - MdHash { - hash_token: HASH@970..971 "#" [] [], + hash_token: HASH@969..971 "##" [] [], }, ], content: MdParagraph { @@ -1332,10 +1304,7 @@ MdDocument { MdHeader { before: MdHashList [ MdHash { - hash_token: HASH@1262..1263 "#" [] [], - }, - MdHash { - hash_token: HASH@1263..1264 "#" [] [], + hash_token: HASH@1262..1264 "##" [] [], }, ], content: MdParagraph { @@ -1456,10 +1425,7 @@ MdDocument { MdHeader { before: MdHashList [ MdHash { - hash_token: HASH@1390..1391 "#" [] [], - }, - MdHash { - hash_token: HASH@1391..1392 "#" [] [], + hash_token: HASH@1390..1392 "##" [] [], }, ], content: MdParagraph { @@ -1528,10 +1494,7 @@ MdDocument { MdHeader { before: MdHashList [ MdHash { - hash_token: HASH@1494..1495 "#" [] [], - }, - MdHash { - hash_token: HASH@1495..1496 "#" [] [], + hash_token: HASH@1494..1496 "##" [] [], }, ], content: MdParagraph { @@ -1670,10 +1633,7 @@ MdDocument { MdHeader { before: MdHashList [ MdHash { - hash_token: HASH@1634..1635 "#" [] [], - }, - MdHash { - hash_token: HASH@1635..1636 "#" [] [], + hash_token: HASH@1634..1636 "##" [] [], }, ], content: MdParagraph { @@ -1778,10 +1738,8 @@ MdDocument { 0: NEWLINE@25..26 "\n" [] [] 3: MD_HEADER@26..44 0: MD_HASH_LIST@26..28 - 0: MD_HASH@26..27 - 0: HASH@26..27 "#" [] [] - 1: MD_HASH@27..28 - 0: HASH@27..28 "#" [] [] + 0: MD_HASH@26..28 + 0: HASH@26..28 "##" [] [] 1: MD_PARAGRAPH@28..44 0: MD_INLINE_ITEM_LIST@28..44 0: MD_TEXTUAL@28..44 @@ -1845,10 +1803,8 @@ MdDocument { 0: NEWLINE@123..124 "\n" [] [] 7: MD_HEADER@124..144 0: MD_HASH_LIST@124..126 - 0: MD_HASH@124..125 - 0: HASH@124..125 "#" [] [] - 1: MD_HASH@125..126 - 0: HASH@125..126 "#" [] [] + 0: MD_HASH@124..126 + 0: HASH@124..126 "##" [] [] 1: MD_PARAGRAPH@126..144 0: MD_INLINE_ITEM_LIST@126..144 0: MD_TEXTUAL@126..131 @@ -1910,10 +1866,8 @@ MdDocument { 0: NEWLINE@231..232 "\n" [] [] 11: MD_HEADER@232..247 0: MD_HASH_LIST@232..234 - 0: MD_HASH@232..233 - 0: HASH@232..233 "#" [] [] - 1: MD_HASH@233..234 - 0: HASH@233..234 "#" [] [] + 0: MD_HASH@232..234 + 0: HASH@232..234 "##" [] [] 1: MD_PARAGRAPH@234..247 0: MD_INLINE_ITEM_LIST@234..247 0: MD_TEXTUAL@234..247 @@ -1993,10 +1947,8 @@ MdDocument { 0: NEWLINE@321..322 "\n" [] [] 15: MD_HEADER@322..333 0: MD_HASH_LIST@322..324 - 0: MD_HASH@322..323 - 0: HASH@322..323 "#" [] [] - 1: MD_HASH@323..324 - 0: HASH@323..324 "#" [] [] + 0: MD_HASH@322..324 + 0: HASH@322..324 "##" [] [] 1: MD_PARAGRAPH@324..333 0: MD_INLINE_ITEM_LIST@324..333 0: MD_TEXTUAL@324..333 @@ -2114,10 +2066,8 @@ MdDocument { 0: NEWLINE@455..456 "\n" [] [] 19: MD_HEADER@456..482 0: MD_HASH_LIST@456..458 - 0: MD_HASH@456..457 - 0: HASH@456..457 "#" [] [] - 1: MD_HASH@457..458 - 0: HASH@457..458 "#" [] [] + 0: MD_HASH@456..458 + 0: HASH@456..458 "##" [] [] 1: MD_PARAGRAPH@458..482 0: MD_INLINE_ITEM_LIST@458..482 0: MD_TEXTUAL@458..482 @@ -2161,10 +2111,8 @@ MdDocument { 0: NEWLINE@552..553 "\n" [] [] 23: MD_HEADER@553..570 0: MD_HASH_LIST@553..555 - 0: MD_HASH@553..554 - 0: HASH@553..554 "#" [] [] - 1: MD_HASH@554..555 - 0: HASH@554..555 "#" [] [] + 0: MD_HASH@553..555 + 0: HASH@553..555 "##" [] [] 1: MD_PARAGRAPH@555..570 0: MD_INLINE_ITEM_LIST@555..570 0: MD_TEXTUAL@555..570 @@ -2236,10 +2184,8 @@ MdDocument { 0: NEWLINE@638..639 "\n" [] [] 27: MD_HEADER@639..654 0: MD_HASH_LIST@639..641 - 0: MD_HASH@639..640 - 0: HASH@639..640 "#" [] [] - 1: MD_HASH@640..641 - 0: HASH@640..641 "#" [] [] + 0: MD_HASH@639..641 + 0: HASH@639..641 "##" [] [] 1: MD_PARAGRAPH@641..654 0: MD_INLINE_ITEM_LIST@641..654 0: MD_TEXTUAL@641..654 @@ -2307,10 +2253,8 @@ MdDocument { 0: NEWLINE@801..802 "\n" [] [] 31: MD_HEADER@802..827 0: MD_HASH_LIST@802..804 - 0: MD_HASH@802..803 - 0: HASH@802..803 "#" [] [] - 1: MD_HASH@803..804 - 0: HASH@803..804 "#" [] [] + 0: MD_HASH@802..804 + 0: HASH@802..804 "##" [] [] 1: MD_PARAGRAPH@804..827 0: MD_INLINE_ITEM_LIST@804..827 0: MD_TEXTUAL@804..827 @@ -2398,10 +2342,8 @@ MdDocument { 0: NEWLINE@968..969 "\n" [] [] 35: MD_HEADER@969..1001 0: MD_HASH_LIST@969..971 - 0: MD_HASH@969..970 - 0: HASH@969..970 "#" [] [] - 1: MD_HASH@970..971 - 0: HASH@970..971 "#" [] [] + 0: MD_HASH@969..971 + 0: HASH@969..971 "##" [] [] 1: MD_PARAGRAPH@971..1001 0: MD_INLINE_ITEM_LIST@971..1001 0: MD_TEXTUAL@971..1001 @@ -2561,10 +2503,8 @@ MdDocument { 0: NEWLINE@1261..1262 "\n" [] [] 39: MD_HEADER@1262..1308 0: MD_HASH_LIST@1262..1264 - 0: MD_HASH@1262..1263 - 0: HASH@1262..1263 "#" [] [] - 1: MD_HASH@1263..1264 - 0: HASH@1263..1264 "#" [] [] + 0: MD_HASH@1262..1264 + 0: HASH@1262..1264 "##" [] [] 1: MD_PARAGRAPH@1264..1308 0: MD_INLINE_ITEM_LIST@1264..1308 0: MD_TEXTUAL@1264..1279 @@ -2640,10 +2580,8 @@ MdDocument { 0: NEWLINE@1389..1390 "\n" [] [] 43: MD_HEADER@1390..1424 0: MD_HASH_LIST@1390..1392 - 0: MD_HASH@1390..1391 - 0: HASH@1390..1391 "#" [] [] - 1: MD_HASH@1391..1392 - 0: HASH@1391..1392 "#" [] [] + 0: MD_HASH@1390..1392 + 0: HASH@1390..1392 "##" [] [] 1: MD_PARAGRAPH@1392..1424 0: MD_INLINE_ITEM_LIST@1392..1424 0: MD_TEXTUAL@1392..1402 @@ -2687,10 +2625,8 @@ MdDocument { 0: NEWLINE@1493..1494 "\n" [] [] 47: MD_HEADER@1494..1519 0: MD_HASH_LIST@1494..1496 - 0: MD_HASH@1494..1495 - 0: HASH@1494..1495 "#" [] [] - 1: MD_HASH@1495..1496 - 0: HASH@1495..1496 "#" [] [] + 0: MD_HASH@1494..1496 + 0: HASH@1494..1496 "##" [] [] 1: MD_PARAGRAPH@1496..1519 0: MD_INLINE_ITEM_LIST@1496..1519 0: MD_TEXTUAL@1496..1519 @@ -2778,10 +2714,8 @@ MdDocument { 0: NEWLINE@1633..1634 "\n" [] [] 51: MD_HEADER@1634..1647 0: MD_HASH_LIST@1634..1636 - 0: MD_HASH@1634..1635 - 0: HASH@1634..1635 "#" [] [] - 1: MD_HASH@1635..1636 - 0: HASH@1635..1636 "#" [] [] + 0: MD_HASH@1634..1636 + 0: HASH@1634..1636 "##" [] [] 1: MD_PARAGRAPH@1636..1647 0: MD_INLINE_ITEM_LIST@1636..1647 0: MD_TEXTUAL@1636..1647 diff --git a/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_html_invalid.md.snap b/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_html_invalid.md.snap index e9c58d4bc941..1c1d0b04cd8e 100644 --- a/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_html_invalid.md.snap +++ b/crates/biome_markdown_parser/tests/md_test_suite/ok/inline_html_invalid.md.snap @@ -1,6 +1,5 @@ --- source: crates/biome_markdown_parser/tests/spec_test.rs -assertion_line: 131 expression: snapshot --- ## Input @@ -76,10 +75,7 @@ MdDocument { MdHeader { before: MdHashList [ MdHash { - hash_token: HASH@86..87 "#" [] [], - }, - MdHash { - hash_token: HASH@87..88 "#" [] [], + hash_token: HASH@86..88 "##" [] [], }, ], content: MdParagraph { @@ -142,10 +138,7 @@ MdDocument { MdHeader { before: MdHashList [ MdHash { - hash_token: HASH@197..198 "#" [] [], - }, - MdHash { - hash_token: HASH@198..199 "#" [] [], + hash_token: HASH@197..199 "##" [] [], }, ], content: MdParagraph { @@ -301,10 +294,7 @@ MdDocument { MdHeader { before: MdHashList [ MdHash { - hash_token: HASH@507..508 "#" [] [], - }, - MdHash { - hash_token: HASH@508..509 "#" [] [], + hash_token: HASH@507..509 "##" [] [], }, ], content: MdParagraph { @@ -454,10 +444,8 @@ MdDocument { 0: NEWLINE@85..86 "\n" [] [] 5: MD_HEADER@86..107 0: MD_HASH_LIST@86..88 - 0: MD_HASH@86..87 - 0: HASH@86..87 "#" [] [] - 1: MD_HASH@87..88 - 0: HASH@87..88 "#" [] [] + 0: MD_HASH@86..88 + 0: HASH@86..88 "##" [] [] 1: MD_PARAGRAPH@88..107 0: MD_INLINE_ITEM_LIST@88..107 0: MD_TEXTUAL@88..107 @@ -497,10 +485,8 @@ MdDocument { 0: NEWLINE@196..197 "\n" [] [] 9: MD_HEADER@197..213 0: MD_HASH_LIST@197..199 - 0: MD_HASH@197..198 - 0: HASH@197..198 "#" [] [] - 1: MD_HASH@198..199 - 0: HASH@198..199 "#" [] [] + 0: MD_HASH@197..199 + 0: HASH@197..199 "##" [] [] 1: MD_PARAGRAPH@199..213 0: MD_INLINE_ITEM_LIST@199..213 0: MD_TEXTUAL@199..213 @@ -602,10 +588,8 @@ MdDocument { 0: NEWLINE@506..507 "\n" [] [] 13: MD_HEADER@507..526 0: MD_HASH_LIST@507..509 - 0: MD_HASH@507..508 - 0: HASH@507..508 "#" [] [] - 1: MD_HASH@508..509 - 0: HASH@508..509 "#" [] [] + 0: MD_HASH@507..509 + 0: HASH@507..509 "##" [] [] 1: MD_PARAGRAPH@509..526 0: MD_INLINE_ITEM_LIST@509..526 0: MD_TEXTUAL@509..526 diff --git a/crates/biome_markdown_syntax/src/generated/nodes.rs b/crates/biome_markdown_syntax/src/generated/nodes.rs index 6fab92d37e27..c9e9db0fd573 100644 --- a/crates/biome_markdown_syntax/src/generated/nodes.rs +++ b/crates/biome_markdown_syntax/src/generated/nodes.rs @@ -1138,8 +1138,8 @@ impl MdQuote { pub fn marker_token(&self) -> SyntaxResult { support::required_token(&self.syntax, 0usize) } - pub fn content(&self) -> SyntaxResult { - support::required_node(&self.syntax, 1usize) + pub fn content(&self) -> MdBlockList { + support::list(&self.syntax, 1usize) } } impl Serialize for MdQuote { @@ -1153,7 +1153,7 @@ impl Serialize for MdQuote { #[derive(Serialize)] pub struct MdQuoteFields { pub marker_token: SyntaxResult, - pub content: SyntaxResult, + pub content: MdBlockList, } #[derive(Clone, PartialEq, Eq, Hash)] pub struct MdReferenceImage { @@ -3042,7 +3042,7 @@ impl std::fmt::Debug for MdQuote { "marker_token", &support::DebugSyntaxResult(self.marker_token()), ) - .field("content", &support::DebugSyntaxResult(self.content())) + .field("content", &self.content()) .finish() } else { f.debug_struct("MdQuote").finish() diff --git a/crates/biome_markdown_syntax/src/generated/nodes_mut.rs b/crates/biome_markdown_syntax/src/generated/nodes_mut.rs index 77bbaaddb219..d44555c5be76 100644 --- a/crates/biome_markdown_syntax/src/generated/nodes_mut.rs +++ b/crates/biome_markdown_syntax/src/generated/nodes_mut.rs @@ -440,7 +440,7 @@ impl MdQuote { .splice_slots(0usize..=0usize, once(Some(element.into()))), ) } - pub fn with_content(self, element: AnyMdBlock) -> Self { + pub fn with_content(self, element: MdBlockList) -> Self { Self::unwrap_cast( self.syntax .splice_slots(1usize..=1usize, once(Some(element.into_syntax().into()))), diff --git a/xtask/codegen/markdown.ungram b/xtask/codegen/markdown.ungram index 7adbbe29f005..fc74eb66cbe2 100644 --- a/xtask/codegen/markdown.ungram +++ b/xtask/codegen/markdown.ungram @@ -156,7 +156,7 @@ MdLinkBlock = label: MdTextual MdQuote = marker: '>' - content: AnyMdBlock + content: MdBlockList MdBulletListItem = MdBulletList MdOrderedListItem = MdBulletList From 3abfdacaae630a417b50fb4e3ff0fcb74df68c43 Mon Sep 17 00:00:00 2001 From: jfmcdowell <206422+jfmcdowell@users.noreply.github.com> Date: Mon, 19 Jan 2026 18:20:21 -0500 Subject: [PATCH 06/12] refactor(markdown_parser): simplify HTML block parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Capture HTML content as raw text rather than implementing all 7 CommonMark §4.6 termination rules. This keeps the parser simple; full HTML parsing can be added via workspace snippets integration in the future. Trade-off: All HTML blocks terminate at blank lines, so not 100% CommonMark §4.6 compliant (types 1-5 have specific terminators like -->, ?>, etc.). --- .../src/syntax/html_block.rs | 406 ++++-------------- 1 file changed, 95 insertions(+), 311 deletions(-) diff --git a/crates/biome_markdown_parser/src/syntax/html_block.rs b/crates/biome_markdown_parser/src/syntax/html_block.rs index ce03cdbc0d04..7865eb63651b 100644 --- a/crates/biome_markdown_parser/src/syntax/html_block.rs +++ b/crates/biome_markdown_parser/src/syntax/html_block.rs @@ -1,22 +1,12 @@ -//! HTML block parsing for Markdown (CommonMark §4.6). +//! HTML block parsing for Markdown. //! -//! Per CommonMark §4.6, there are 7 types of HTML blocks: +//! HTML content is captured as raw text rather than fully parsed. This keeps +//! the markdown parser simple; full HTML parsing can be added via workspace +//! snippets integration in the future (similar to how `