Issue 818 (#1128)

* #1120 * #1122 * #1123 * #1124 * #1125 * #1126 * #1127
jackdewinter · Jul 5, 2024 · 0d55e32 · 0d55e32
1 parent b3d0404
commit 0d55e32
Show file tree

Hide file tree

Showing 11 changed files with 893 additions and 18 deletions.
diff --git a/newdocs/src/changelog.md b/newdocs/src/changelog.md
@@ -8,7 +8,13 @@
 
 ### Fixed
 
-- None
+- [None](https://github.com/jackdewinter/pymarkdown/issues/1120)
+- https://github.com/jackdewinter/pymarkdown/issues/1122
+- https://github.com/jackdewinter/pymarkdown/issues/1123
+- https://github.com/jackdewinter/pymarkdown/issues/1124
+https://github.com/jackdewinter/pymarkdown/issues/1125
+https://github.com/jackdewinter/pymarkdown/issues/1126
+https://github.com/jackdewinter/pymarkdown/issues/1127
 
 ### Changed
 

diff --git a/pymarkdown/block_quotes/block_quote_count_helper.py b/pymarkdown/block_quotes/block_quote_count_helper.py
@@ -218,6 +218,10 @@ def __should_continue_processing(
                 ) = BlockQuoteCountHelper.__is_special_double_block_case(
                     parser_state, adjusted_line, start_index, current_count, stack_count
                 )
+                if not continue_processing and current_count < stack_count:
+                    continue_proc, stack_token_index = BlockQuoteCountHelper.__xx_part_one(parser_state, start_index, current_count, stack_count)
+                    if continue_proc:
+                        current_count, start_index, last_block_quote_index = BlockQuoteCountHelper.__xx_part_two(parser_state, stack_token_index, start_index, current_count, stack_count, last_block_quote_index)
             else:
                 continue_processing = True
         return (
@@ -229,6 +233,51 @@ def __should_continue_processing(
             current_count,
         )
 
+    @staticmethod
+    def __xx_part_one(parser_state:ParserState, start_index, current_count, stack_count):
+        if parser_state.token_stack[-1].is_fenced_code_block:
+            return False, -1
+        block_quote_character_count = ParserHelper.count_characters_in_text(parser_state.original_line_to_parse[:start_index], ">")
+        if block_quote_character_count > current_count:
+            return False, -1
+        count_block_quotes = 0
+        for stack_token_index in range(len(parser_state.token_stack)):
+            if parser_state.token_stack[stack_token_index].is_block_quote:
+                count_block_quotes += 1
+                if count_block_quotes == block_quote_character_count:
+                    break
+        assert stack_token_index != len(parser_state.token_stack), "should have completed before this"
+        stack_token_index += 1
+        return not parser_state.token_stack[stack_token_index].is_block_quote, stack_token_index
+
+    @staticmethod
+    def __xx_part_two(parser_state:ParserState, stack_index, start_index, current_count, stack_count, last_block_quote_index):
+        # At this point, we have a "prefix", which may be partial, that has the
+        # current_count of > characters, and ends with a list. If we are here,
+        # we know that previous lines have had at least one more > character and
+        # counted block quote.
+        assert parser_state.token_stack[stack_index].is_list, "If not bq, must be a list."
+        while parser_state.token_stack[stack_index].is_list:
+            stack_index += 1
+        embedded_list_stack_token = parser_state.token_stack[stack_index-1]
+        if parser_state.original_line_to_parse[start_index:embedded_list_stack_token.indent_level].strip():
+            return current_count, start_index, last_block_quote_index
+        assert current_count + 1 == stack_count
+        if (
+            parser_state.original_line_to_parse[
+                embedded_list_stack_token.indent_level
+            ]
+            != ">"
+        ):
+            return current_count, start_index, last_block_quote_index
+        last_block_quote_index = embedded_list_stack_token.indent_level + 1
+        if last_block_quote_index < len(parser_state.original_line_to_parse):
+            character_after_block_quote = parser_state.original_line_to_parse[last_block_quote_index]
+            if character_after_block_quote == " ":
+                last_block_quote_index += 1
+
+        return current_count + 1, last_block_quote_index, last_block_quote_index
+
     # pylint: enable=too-many-arguments
     @staticmethod
     def __is_special_double_block_case(
@@ -294,6 +343,8 @@ def __increase_stack(
         extracted_whitespace: str,
     ) -> Tuple[int, BlockQuoteData]:
         POGGER.debug("container_level_tokens>>$", container_level_tokens)
+        POGGER.debug("current_count>>$", block_quote_data.current_count)
+        POGGER.debug("stack_count>>$", block_quote_data.stack_count)
         stack_count = block_quote_data.stack_count
         while block_quote_data.current_count > stack_count:
             POGGER.debug(
@@ -560,6 +611,8 @@ def ensure_stack_at_level(
             parser_state, block_quote_data
         )
 
+        POGGER.debug("current_count>>$", block_quote_data.current_count)
+        POGGER.debug("stack_count>>$", block_quote_data.stack_count)
         POGGER.debug(
             "stack_increase_needed>>$, stack_decrease_needed=$",
             stack_increase_needed,

diff --git a/pymarkdown/block_quotes/block_quote_processor.py b/pymarkdown/block_quotes/block_quote_processor.py
@@ -12,6 +12,7 @@
 )
 from pymarkdown.container_blocks.container_grab_bag import ContainerGrabBag
 from pymarkdown.general.constants import Constants
+from pymarkdown.general.parser_helper import ParserHelper
 from pymarkdown.general.parser_logger import ParserLogger
 from pymarkdown.general.parser_state import ParserState
 from pymarkdown.general.position_marker import PositionMarker
@@ -149,6 +150,7 @@ def __handle_block_quote_block_really_start(
         ), "If starting here, we need a block quote count."
         POGGER.debug("handle_block_quote_block>>block-start")
         POGGER.debug("original_line:>:$:<", grab_bag.original_line)
+        POGGER.debug("container_start_bq_count:>:$:<", grab_bag.container_start_bq_count)
         (
             adjusted_text_to_parse,
             adjusted_index_number,
@@ -554,17 +556,15 @@ def __handle_block_quote_section(
             parser_state.token_stack[-1].is_fenced_code_block,
             parser_state.token_stack[-1].is_html_block,
         )
+        POGGER.debug("block_quote_data>>:curr=$:stack=$:", block_quote_data.current_count, block_quote_data.stack_count)
         POGGER.debug("start_index>>:$:", start_index)
+        POGGER.debug("line_to_parse>>:$:", line_to_parse)
+        POGGER.debug("last_block_quote_index>>:$:", last_block_quote_index)
+        POGGER.debug(">>avoid_block_starts>>$", avoid_block_starts)
 
         POGGER.debug("token_stack--$", parser_state.token_stack)
         POGGER.debug(">>container_start_bq_count>>$", container_start_bq_count)
-        POGGER.debug(
-            ">>block_quote_data.current_count>>$", block_quote_data.current_count
-        )
-        POGGER.debug(">>block_quote_data.stack_count>>$", block_quote_data.stack_count)
-        POGGER.debug(">>start_index>>$", start_index)
         POGGER.debug(">>original_start_index>>$", position_marker.index_number)
-        POGGER.debug(">>avoid_block_starts>>$", avoid_block_starts)
 
         if last_block_quote_index != -1:
             POGGER.debug("start_index>>:$:", start_index)
@@ -601,6 +601,83 @@ def __handle_block_quote_section(
 
     # pylint: enable=too-many-arguments
 
+    @staticmethod
+    def __handle_existing_block_quote_fenced_special(parser_state, start_index, block_quote_data):
+        block_quote_character_count = ParserHelper.count_characters_in_text(parser_state.original_line_to_parse[:start_index], ">")
+        assert block_quote_character_count <= block_quote_data.current_count, "if not, overreach"
+        count_block_quotes = 0
+        for stack_token_index in range(len(parser_state.token_stack)):
+            if parser_state.token_stack[stack_token_index].is_block_quote:
+                count_block_quotes += 1
+                if count_block_quotes == block_quote_character_count:
+                    break
+        assert stack_token_index != len(parser_state.token_stack), "should have completed before this"
+        stack_token_index += 1
+        process_fenced_block = parser_state.token_stack[stack_token_index].is_block_quote
+        return process_fenced_block, stack_token_index
+
+    @staticmethod
+    def __handle_existing_block_quote_fenced_special_part_two(parser_state:ParserState, stack_index, line_to_parse, start_index, block_quote_data, leaf_tokens,
+            container_level_tokens, avoid_block_starts) -> Tuple[
+        str,
+        int,
+        List[MarkdownToken],
+        List[MarkdownToken],
+        BlockQuoteData,
+        int,
+        bool,
+        int,
+        Optional[str],
+        bool,
+        Optional[RequeueLineInfo],
+        bool,
+    ]:
+        # At this point, we have a "prefix", which may be partial, that has the
+        # current_count of > characters, and ends with a list. If we are here,
+        # we know that previous lines have had at least one more > character and
+        # counted block quote.
+        assert parser_state.token_stack[stack_index].is_list, "If not bq, must be a list."
+        while parser_state.token_stack[stack_index].is_list:
+            stack_index += 1
+        embedded_list_stack_token = parser_state.token_stack[stack_index-1]
+        block_stack_token = parser_state.token_stack[stack_index]
+        block_markdown_token = cast(BlockQuoteMarkdownToken, block_stack_token.matching_markdown_token)
+        list_markdown_token = cast(ListStartMarkdownToken, embedded_list_stack_token.matching_markdown_token)
+        character_after_list = parser_state.original_line_to_parse[start_index:embedded_list_stack_token.indent_level].strip()
+        assert not character_after_list
+        assert block_quote_data.current_count + 1 == block_quote_data.stack_count
+        sd = parser_state.original_line_to_parse[embedded_list_stack_token.indent_level]
+        assert sd == ">"
+        last_block_quote_index = embedded_list_stack_token.indent_level + 1
+        character_after_block_quote = parser_state.original_line_to_parse[last_block_quote_index]
+        if character_after_block_quote == " ":
+            last_block_quote_index += 1
+        # character_after_block_quote = parser_state.original_line_to_parse[last_block_quote_index]
+
+        start_index = last_block_quote_index
+        text_removed_by_container = parser_state.original_line_to_parse[:start_index]
+        block_markdown_token.add_bleading_spaces(text_removed_by_container)
+        if block_markdown_token.weird_kludge_one:
+            block_markdown_token.weird_kludge_one += 1
+        else:
+            block_markdown_token.weird_kludge_one = 1
+        list_markdown_token.add_leading_spaces("")
+        block_quote_data = BlockQuoteData(block_quote_data.current_count + 1, block_quote_data.stack_count)
+        return (
+            line_to_parse[start_index:],
+            start_index,
+            leaf_tokens,
+            container_level_tokens,
+            block_quote_data,
+            0,
+            False,
+            last_block_quote_index,
+            text_removed_by_container,
+            avoid_block_starts,
+            None,
+            False,
+        )
+
     # pylint: disable=too-many-arguments
     @staticmethod
     def __handle_existing_block_quote(
@@ -654,7 +731,13 @@ def __handle_existing_block_quote(
                 block_quote_data.stack_count,
             )
 
-        if not parser_state.token_stack[-1].is_fenced_code_block:
+        process_fenced_block = parser_state.token_stack[-1].is_fenced_code_block
+        if process_fenced_block and block_quote_data.current_count < block_quote_data.stack_count:
+            process_fenced_block, stack_index = BlockQuoteProcessor.__handle_existing_block_quote_fenced_special(parser_state, start_index, block_quote_data)
+            if not process_fenced_block:
+                return BlockQuoteProcessor.__handle_existing_block_quote_fenced_special_part_two(parser_state, stack_index, line_to_parse, start_index, block_quote_data, leaf_tokens,
+            container_level_tokens, avoid_block_starts)
+        if not process_fenced_block:
             return BlockQuoteNonFencedHelper.handle_non_fenced_code_section(
                 parser_state,
                 block_quote_data,

diff --git a/pymarkdown/container_blocks/container_block_non_leaf_processor.py b/pymarkdown/container_blocks/container_block_non_leaf_processor.py
@@ -715,6 +715,7 @@ def __get_block_start_index(
         )
         POGGER.debug("text_to_parse>$<", new_position_marker.text_to_parse)
         POGGER.debug("index_number>$<", new_position_marker.index_number)
+        POGGER.debug("container_start_bq_count>$<", grab_bag.container_start_bq_count)
         assert (
             grab_bag.container_start_bq_count is not None
         ), "If here, we should have a count of bq starts."
@@ -736,8 +737,14 @@ def __get_block_start_index(
             POGGER.debug(">>requeuing lines after looking for block start. returning.")
 
         if grab_bag.did_blank:
+            assert block_leaf_tokens and block_leaf_tokens[-1].is_blank_line, "should be a blank at the end"
             POGGER.debug(">>already handled blank line. returning.")
             grab_bag.extend_container_tokens_with_leaf_tokens()
+            stack_index = len(parser_state.token_stack) - 1
+            if stack_index > 2 and parser_state.token_stack[stack_index].is_block_quote and parser_state.token_stack[stack_index-1].is_block_quote and\
+                parser_state.token_stack[stack_index-2].is_list and \
+                parser_state.token_stack[stack_index-2].matching_markdown_token.line_number != block_leaf_tokens[-1].line_number:
+                    parser_state.token_stack[stack_index-2].matching_markdown_token.add_leading_spaces("")
 
         grab_bag.can_continue = (
             not grab_bag.requeue_line_info and not grab_bag.did_blank

diff --git a/pymarkdown/tokens/block_quote_markdown_token.py b/pymarkdown/tokens/block_quote_markdown_token.py
@@ -44,6 +44,7 @@ def __init__(
             position_marker=position_marker,
         )
         self.__compose_extra_data_field()
+        self.weird_kludge_one = None
 
     # pylint: disable=protected-access
     @staticmethod

diff --git a/pymarkdown/tokens/markdown_token.py b/pymarkdown/tokens/markdown_token.py
@@ -590,6 +590,18 @@ def is_inline_image(self) -> bool:
         """
         return self.token_name == MarkdownToken._token_inline_image
 
+    def adjust_line_number(self, context: PluginModifyContext, adjust_delta:int) -> None:
+        # By design, tokens can only be modified in fix mode during the token pass.
+        if not context.in_fix_mode:
+            raise BadPluginFixError(
+                f"Token '{self.__token_name}' can only be modified in fix mode."
+            )
+        if context.is_during_line_pass:
+            raise BadPluginFixError(
+                f"Token '{self.__token_name}' can only be modified during the token pass in fix mode."
+            )
+        self.__line_number += adjust_delta
+
     def modify_token(
         self,
         context: PluginModifyContext,

diff --git a/pymarkdown/transform_markdown/markdown_transform_context.py b/pymarkdown/transform_markdown/markdown_transform_context.py
@@ -2,6 +2,7 @@
 Module to provide context to markdown transforms.
 """
 
+from dataclasses import dataclass
 import logging
 from typing import List, Optional
 
@@ -14,6 +15,11 @@
 POGGER = ParserLogger(logging.getLogger(__name__))
 
 
+@dataclass
+class IndentAdjustment:
+    adjustment:int = 0
+
+
 # pylint: disable=too-few-public-methods
 class MarkdownTransformContext:
     """
@@ -23,6 +29,7 @@ class MarkdownTransformContext:
     def __init__(self) -> None:
         self.block_stack: List[MarkdownToken] = []
         self.container_token_stack: List[MarkdownToken] = []
+        self.container_token_indents: List[IndentAdjustment] = []
 
 
 # pylint: enable=too-few-public-methods

diff --git a/pymarkdown/transform_markdown/transform_block_quote.py b/pymarkdown/transform_markdown/transform_block_quote.py
@@ -8,6 +8,7 @@
 from pymarkdown.tokens.list_start_markdown_token import ListStartMarkdownToken
 from pymarkdown.tokens.markdown_token import EndMarkdownToken, MarkdownToken
 from pymarkdown.transform_markdown.markdown_transform_context import (
+    IndentAdjustment,
     MarkdownTransformContext,
 )
 
@@ -110,6 +111,8 @@ def __rehydrate_block_quote_start(
         )
         new_instance.leading_text_index = 0
         context.container_token_stack.append(new_instance)
+        context.container_token_indents.append(IndentAdjustment())
+
         POGGER.debug(f">bquote>{ParserHelper.make_value_visible(new_instance)}")
         POGGER.debug(
             f">self.container_token_stack>{ParserHelper.make_value_visible(context.container_token_stack)}"
@@ -193,6 +196,14 @@ def rehydrate_block_quote_end(
         any_non_container_end_tokens = search_index < len(actual_tokens)
         POGGER.debug(f">>{any_non_container_end_tokens}")
 
+        del context.container_token_indents[-1]
+        if context.container_token_indents and any_non_container_end_tokens:
+            indent_adjust = actual_tokens[search_index].line_number - current_start_token.line_number - 1
+
+            for indent_index in range(len(context.container_token_indents)-1, -1, -1):
+                if context.container_token_stack[indent_index].is_block_quote_start:
+                    context.container_token_indents[indent_index].adjustment += indent_adjust
+                    break
         del context.container_token_stack[-1]
 
         return adjusted_end_string
diff --git a/pymarkdown/transform_markdown/transform_containers.py b/pymarkdown/transform_markdown/transform_containers.py
@@ -550,7 +550,10 @@ def __adjust_for_list_check(
                 + f"fg={leading_spaces_newline_count} + "
                 + f"line={removed_block_token.line_number}"
             )
-            new_list_item_adjust = leading_spaces_newline_count > 1
+            weird_kludge_one_count = removed_tokens[-1].weird_kludge_one
+            new_list_item_adjust = leading_spaces_newline_count > 1 and (
+                weird_kludge_one_count is None or weird_kludge_one_count <= 1
+            )
             POGGER.debug(f"new_list_item_adjust:{new_list_item_adjust}")
 
         return (
@@ -685,7 +688,8 @@ def __apply_primary_transformation(
             ), "If an abrupt bq end, the change record's item_d field must be defined."
             was_abrupt_block_quote_end = bool(
                 current_changed_record.item_d.was_forced
-                and current_changed_record.item_d.extra_end_data == "> "
+                and current_changed_record.item_d.extra_end_data
+                and ">" in current_changed_record.item_d.extra_end_data
             )
 
         applied_leading_spaces_to_start_of_container_line = (