From 611ffc477b18d3a405b4f43b89ffb02cc74c766f Mon Sep 17 00:00:00 2001 From: elijahr Date: Sat, 25 Oct 2025 23:13:33 -0500 Subject: [PATCH 1/5] Fix #1245: Preserve comment formatting in fmt:off/on blocks This fixes an issue where comments between `# fmt: off` and `# fmt: on` directives were being reformatted even though they should be left untouched. Root Causes: 1. When ONLY comments exist between fmt directives (no code), the `generate_ignored_nodes()` function returns empty because comments don't create AST nodes - they exist as strings in node prefixes. 2. After processing the first comment-only block, subsequent blocks in the same function were still being reformatted due to: - Identity check bug: Used `c is comment` to find fmt:off, but after modifying the prefix, new comment objects were created, causing the identity check to fail - Whitespace check bug: STANDALONE_COMMENT nodes weren't treated as whitespace, preventing subsequent fmt:off blocks from being detected Changes: - Modified normalize_fmt_off() in src/black/comments.py to: * Detect and preserve comment-only blocks between fmt:off and fmt:on * Find first fmt:off by value check instead of identity check * Treat STANDALONE_COMMENT as whitespace for standalone detection - Updated visit_STANDALONE_COMMENT() in src/black/linegen.py to bypass prefix normalization for fmt:off/on blocks - Enhanced make_comment() in src/black/comments.py to preserve exact formatting of comments containing any fmt directive (off/on/skip) - Added comprehensive test coverage in tests/data/cases/fmtonoff.py - Updated fmtskip7.py test to reflect new preservation behavior This fix handles: - File-level and function-level comment-only blocks - Multiple consecutive fmt:off/on blocks in the same function - Comments with unusual indentation or spacing - Comments with blank lines between fmt:off and comment content - Comments containing fmt directives (skip/off/on/yapf) All 408 existing tests pass. --- CHANGES.md | 5 ++ src/black/comments.py | 131 +++++++++++++++++++++++++++++- src/black/linegen.py | 28 ++++++- tests/data/cases/fmtonoff.py | 150 ++++++++++++++++++++++++++++++----- tests/data/cases/fmtskip7.py | 10 +-- 5 files changed, 297 insertions(+), 27 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 2d7f8684ca8..694dddee764 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -15,6 +15,11 @@ +- Fix bug where comments between `# fmt: off` and `# fmt: on` were reformatted (#1245) +- Comments containing fmt directives (`fmt:skip`, `fmt:off`, `fmt:on`) now preserve their + exact formatting instead of being normalized. For example, `#fmt:skip` is no longer + changed to `# fmt:skip` (#1245) + ### Preview style diff --git a/src/black/comments.py b/src/black/comments.py index 25c20a8e677..19e203a45f5 100644 --- a/src/black/comments.py +++ b/src/black/comments.py @@ -143,6 +143,31 @@ def normalize_trailing_prefix(leaf: LN, total_consumed: int) -> None: leaf.prefix = "" +def _contains_fmt_directive(comment_line: str) -> bool: + """ + Checks if the given comment contains any fmt directive (skip, off, on, yapf). + Similar to _contains_fmt_skip_comment but checks all fmt directives. + """ + all_fmt_directives = FMT_OFF | FMT_ON | FMT_SKIP + + # Parse comment into semantic blocks (handles multiple comments and semicolon-separated lists) + semantic_comment_blocks = [ + comment_line, + *[ + _COMMENT_PREFIX + comment.strip() + for comment in comment_line.split(_COMMENT_PREFIX)[1:] + ], + *[ + _COMMENT_PREFIX + comment.strip() + for comment in comment_line.strip(_COMMENT_PREFIX).split( + _COMMENT_LIST_SEPARATOR + ) + ], + ] + + return any(comment in all_fmt_directives for comment in semantic_comment_blocks) + + def make_comment(content: str, mode: Mode) -> str: """Return a consistently formatted comment from the given `content` string. @@ -150,11 +175,18 @@ def make_comment(content: str, mode: Mode) -> str: space between the hash sign and the content. If `content` didn't start with a hash sign, one is provided. + + Comments containing fmt directives are preserved exactly as-is to respect + user intent (e.g., `#no space # fmt: skip` stays as-is). """ content = content.rstrip() if not content: return "#" + # Preserve comments with fmt directives exactly as-is + if content.startswith("#") and _contains_fmt_directive(content): + return content + if content[0] == "#": content = content[1:] if ( @@ -211,13 +243,110 @@ def convert_one_fmt_off_pair( if comment.type != STANDALONE_COMMENT: prev = preceding_leaf(leaf) if prev: - if is_fmt_off and prev.type not in WHITESPACE: + # Treat STANDALONE_COMMENT nodes as whitespace for this check, + # since they represent preserved fmt:off/on blocks + if is_fmt_off and prev.type not in WHITESPACE and prev.type != STANDALONE_COMMENT: continue if is_fmt_skip and prev.type in WHITESPACE: continue ignored_nodes = list(generate_ignored_nodes(leaf, comment, mode)) if not ignored_nodes: + # When there are ONLY comments between `# fmt: off` and `# fmt: on`, + # `generate_ignored_nodes()` returns empty because comments don't create + # AST nodes - they exist as strings in node prefixes. + # This is a special case where both directives and all content between them + # exist in the prefix of a single leaf (e.g., ENDMARKER for file-level, + # or the first leaf of the next statement for nested cases). + # We detect this and create a STANDALONE_COMMENT to preserve formatting. + # Note: visit_default in linegen.py has special handling to preserve + # the indentation of these STANDALONE_COMMENT nodes. + # See issue #1245. + if is_fmt_off: + all_comments = list_comments(leaf.prefix, is_endmarker=False, mode=mode) + + # Find the first fmt:off and its matching fmt:on in the same prefix + fmt_off_idx = None + fmt_on_idx = None + for idx, c in enumerate(all_comments): + # Find the first fmt:off directive + if fmt_off_idx is None and c.value in FMT_OFF: + fmt_off_idx = idx + # Find the first fmt:on after the fmt:off + if fmt_off_idx is not None and idx > fmt_off_idx and c.value in FMT_ON: + fmt_on_idx = idx + break + + # Only proceed if we found both directives in the same prefix + if fmt_on_idx is not None and fmt_off_idx is not None: + # Use the comments we found, not the original comment object + comment = all_comments[fmt_off_idx] + # Extract the original source between fmt: off and fmt: on + fmt_on_comment = all_comments[fmt_on_idx] + # Get the original text from the prefix + original_prefix = leaf.prefix + + # Extract content between fmt: off and fmt: on from original source. + # `consumed` represents the position in the prefix string after + # consuming this comment (including its newline). + # We include everything from after fmt: off through fmt: on. + start_pos = comment.consumed + end_pos = fmt_on_comment.consumed + + # Get the original content between the directives (including fmt: on) + content_between_and_fmt_on = original_prefix[start_pos:end_pos] + + # Build the hidden value with original formatting preserved. + # Structure: "# fmt: off\n\n# fmt: on\n" + # This entire block will be stored as a STANDALONE_COMMENT node, + # bypassing all comment normalization. + hidden_value = comment.value + "\n" + content_between_and_fmt_on + + # Remove trailing newline since visit_STANDALONE_COMMENT in linegen.py + # will call yield from self.line() after appending the node, which + # adds a newline. Keeping the trailing newline here would create + # an extra blank line. + if hidden_value.endswith("\n"): + hidden_value = hidden_value[:-1] + + # The new prefix should be everything before fmt: off + standalone_comment_prefix = original_prefix[:previous_consumed] + "\n" * comment.newlines + + # Keep indentation of fmt: off comment by preserving original whitespace. + # Extract the whitespace before the comment directive. + fmt_off_prefix = original_prefix.split(comment.value)[0] + if "\n" in fmt_off_prefix: + # Take only the whitespace after the last newline (i.e., the indentation) + fmt_off_prefix = fmt_off_prefix.split("\n")[-1] + standalone_comment_prefix += fmt_off_prefix + + # The new leaf prefix should be everything after fmt: on + leaf.prefix = original_prefix[fmt_on_comment.consumed:] + + # Insert the STANDALONE_COMMENT + parent = leaf.parent + assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (prefix only)" + + # Find the index of the leaf in its parent + leaf_idx = None + for idx, child in enumerate(parent.children): + if child is leaf: + leaf_idx = idx + break + + assert leaf_idx is not None, "INTERNAL ERROR: fmt: on/off handling (leaf index)" + + parent.insert_child( + leaf_idx, + Leaf( + STANDALONE_COMMENT, + hidden_value, + prefix=standalone_comment_prefix, + fmt_pass_converted_first_leaf=None, + ), + ) + return True + continue first = ignored_nodes[0] # Can be a container node with the `leaf`. diff --git a/src/black/linegen.py b/src/black/linegen.py index 27601262604..5475a6a1897 100644 --- a/src/black/linegen.py +++ b/src/black/linegen.py @@ -17,7 +17,7 @@ get_leaves_inside_matching_brackets, max_delimiter_priority_in_atom, ) -from black.comments import FMT_OFF, generate_comments, list_comments +from black.comments import FMT_OFF, FMT_ON, generate_comments, list_comments from black.lines import ( Line, RHSResult, @@ -383,7 +383,31 @@ def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]: def visit_STANDALONE_COMMENT(self, leaf: Leaf) -> Iterator[Line]: if not self.current_line.bracket_tracker.any_open_brackets(): yield from self.line() - yield from self.visit_default(leaf) + # STANDALONE_COMMENT nodes created by our special handling in normalize_fmt_off + # for comment-only blocks have fmt:off as the first line and fmt:on as the last line + # (each directive on its own line, not embedded in other text). + # These should be appended directly without calling visit_default, which + # would process their prefix and lose indentation. + # Normal STANDALONE_COMMENT nodes go through visit_default. + value = leaf.value + lines = value.splitlines() + if len(lines) >= 2: + # Check if first line (after stripping whitespace) is exactly a fmt:off directive + first_line = lines[0].lstrip() + first_is_fmt_off = first_line in FMT_OFF + # Check if last line (after stripping whitespace) is exactly a fmt:on directive + last_line = lines[-1].lstrip() + last_is_fmt_on = last_line in FMT_ON + is_fmt_off_block = first_is_fmt_off and last_is_fmt_on + else: + is_fmt_off_block = False + if is_fmt_off_block: + # This is a fmt:off/on block from normalize_fmt_off - append directly + self.current_line.append(leaf) + yield from self.line() + else: + # Normal standalone comment - process through visit_default + yield from self.visit_default(leaf) def visit_factor(self, node: Node) -> Iterator[Line]: """Force parentheses between a unary op and a binary power: diff --git a/tests/data/cases/fmtonoff.py b/tests/data/cases/fmtonoff.py index 8af94563af8..bba49a0d760 100644 --- a/tests/data/cases/fmtonoff.py +++ b/tests/data/cases/fmtonoff.py @@ -91,20 +91,22 @@ def example(session): .all() # fmt: on def off_and_on_without_data(): - """All comments here are technically on the same prefix. - - The comments between will be formatted. This is a known limitation. - """ + """Test that comment-only fmt:off/on blocks preserve formatting.""" + # fmt: off + #should not be formatted + # fmt: on # fmt: off - - #hey, that won't work - + #should not be formatted # fmt: on + # fmt: off + #should not be formatted + #should not be formatted #also should not be formatted + # fmt: on pass -def on_and_off_broken(): - """Another known limitation.""" +def on_and_off_with_comment_only_blocks(): + """Test that fmt:off/on works with multiple directives and comment-only blocks.""" # fmt: on # fmt: off this=should.not_be.formatted() @@ -113,7 +115,16 @@ def on_and_off_broken(): now . considers . multiple . fmt . directives . within . one . prefix # fmt: on # fmt: off - # ...but comments still get reformatted even though they should not be + #should not be formatted + # fmt: on + # fmt: off + + #should not be formatted + + # fmt: on + # fmt: off + #should not be formatted + #should not be formatted #also should not be formatted # fmt: on def long_lines(): if True: @@ -178,6 +189,50 @@ def single_literal_yapf_disable(): # fmt: on xxxxxxxxxx_xxxxxxxxxxx_xxxxxxx_xxxxxxxxx=5 ) + +# Test comment-only blocks at file level with various spacing patterns +# fmt: off + #nospace + # twospaces +# fmt: on + + +# fmt: off +#nospaceatall + #extraspaces + #evenmorespaces +# fmt: on + + +# fmt: off +# fmt: on + + +# fmt: off +#SBATCH --job-name=test +#SBATCH --output=test.out +# fmt: on + + +# fmt: off + #first + + #second +# fmt: on + + +# fmt: off + #!@#$%^&*() + #<=>+-*/ +# fmt: on + + +# fmt: off + #x=1+2 + #y = 3 + #z = 4 +# fmt: on + # fmt: off yield 'hello' # No formatting to the end of the file @@ -185,8 +240,8 @@ def single_literal_yapf_disable(): d={'a':1, 'b':2} -# output +# output #!/usr/bin/env python3 import asyncio @@ -302,29 +357,42 @@ def example(session): def off_and_on_without_data(): - """All comments here are technically on the same prefix. - - The comments between will be formatted. This is a known limitation. - """ + """Test that comment-only fmt:off/on blocks preserve formatting.""" + # fmt: off + #should not be formatted + # fmt: on # fmt: off - # hey, that won't work + #should not be formatted # fmt: on + # fmt: off + #should not be formatted + #should not be formatted #also should not be formatted + # fmt: on pass -def on_and_off_broken(): - """Another known limitation.""" +def on_and_off_with_comment_only_blocks(): + """Test that fmt:off/on works with multiple directives and comment-only blocks.""" # fmt: on # fmt: off this=should.not_be.formatted() and_=indeed . it is not formatted because . the . handling . inside . generate_ignored_nodes() now . considers . multiple . fmt . directives . within . one . prefix + + # fmt: off + #should not be formatted # fmt: on # fmt: off - # ...but comments still get reformatted even though they should not be + + #should not be formatted + + # fmt: on + # fmt: off + #should not be formatted + #should not be formatted #also should not be formatted # fmt: on @@ -401,6 +469,50 @@ def single_literal_yapf_disable(): # fmt: on xxxxxxxxxx_xxxxxxxxxxx_xxxxxxx_xxxxxxxxx=5, ) + +# Test comment-only blocks at file level with various spacing patterns +# fmt: off + #nospace + # twospaces +# fmt: on + + +# fmt: off +#nospaceatall + #extraspaces + #evenmorespaces +# fmt: on + + +# fmt: off +# fmt: on + + +# fmt: off +#SBATCH --job-name=test +#SBATCH --output=test.out +# fmt: on + + +# fmt: off + #first + + #second +# fmt: on + + +# fmt: off + #!@#$%^&*() + #<=>+-*/ +# fmt: on + + +# fmt: off + #x=1+2 + #y = 3 + #z = 4 +# fmt: on + # fmt: off yield 'hello' # No formatting to the end of the file diff --git a/tests/data/cases/fmtskip7.py b/tests/data/cases/fmtskip7.py index 15ac0ad7080..89b29513152 100644 --- a/tests/data/cases/fmtskip7.py +++ b/tests/data/cases/fmtskip7.py @@ -1,11 +1,11 @@ a = "this is some code" -b = 5 #fmt:skip +b = 5 # fmt:skip c = 9 #fmt: skip -d = "thisisasuperlongstringthisisasuperlongstringthisisasuperlongstringthisisasuperlongstring" #fmt:skip +d = "thisisasuperlongstringthisisasuperlongstringthisisasuperlongstringthisisasuperlongstring" # fmt:skip # output a = "this is some code" -b = 5 # fmt:skip -c = 9 # fmt: skip -d = "thisisasuperlongstringthisisasuperlongstringthisisasuperlongstringthisisasuperlongstring" # fmt:skip +b = 5 # fmt:skip +c = 9 #fmt: skip +d = "thisisasuperlongstringthisisasuperlongstringthisisasuperlongstringthisisasuperlongstring" # fmt:skip From 36b6cd2165467ee0e82386aa72f85e25d4ff9209 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 26 Oct 2025 07:28:11 +0000 Subject: [PATCH 2/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- CHANGES.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 694dddee764..014f023790e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -16,9 +16,9 @@ - Fix bug where comments between `# fmt: off` and `# fmt: on` were reformatted (#1245) -- Comments containing fmt directives (`fmt:skip`, `fmt:off`, `fmt:on`) now preserve their - exact formatting instead of being normalized. For example, `#fmt:skip` is no longer - changed to `# fmt:skip` (#1245) +- Comments containing fmt directives (`fmt:skip`, `fmt:off`, `fmt:on`) now preserve + their exact formatting instead of being normalized. For example, `#fmt:skip` is no + longer changed to `# fmt:skip` (#1245) ### Preview style From ea457c07f661975d87183fc8c2a713ce48d4ff3f Mon Sep 17 00:00:00 2001 From: elijahr Date: Sun, 26 Oct 2025 05:30:45 -0500 Subject: [PATCH 3/5] Formatting --- CHANGES.md | 6 +- src/black/comments.py | 369 ++++++++++++++++++++++++------------------ src/black/linegen.py | 19 ++- 3 files changed, 224 insertions(+), 170 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 694dddee764..014f023790e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -16,9 +16,9 @@ - Fix bug where comments between `# fmt: off` and `# fmt: on` were reformatted (#1245) -- Comments containing fmt directives (`fmt:skip`, `fmt:off`, `fmt:on`) now preserve their - exact formatting instead of being normalized. For example, `#fmt:skip` is no longer - changed to `# fmt:skip` (#1245) +- Comments containing fmt directives (`fmt:skip`, `fmt:off`, `fmt:on`) now preserve + their exact formatting instead of being normalized. For example, `#fmt:skip` is no + longer changed to `# fmt:skip` (#1245) ### Preview style diff --git a/src/black/comments.py b/src/black/comments.py index 19e203a45f5..c714956528b 100644 --- a/src/black/comments.py +++ b/src/black/comments.py @@ -150,7 +150,8 @@ def _contains_fmt_directive(comment_line: str) -> bool: """ all_fmt_directives = FMT_OFF | FMT_ON | FMT_SKIP - # Parse comment into semantic blocks (handles multiple comments and semicolon-separated lists) + # Parse comment into semantic blocks (handles multiple comments and + # semicolon-separated lists) semantic_comment_blocks = [ comment_line, *[ @@ -218,6 +219,183 @@ def normalize_fmt_off( try_again = convert_one_fmt_off_pair(node, mode, lines) +def _should_process_fmt_comment( + comment: ProtoComment, leaf: Leaf, mode: Mode +) -> tuple[bool, bool, bool]: + """Check if comment should be processed for fmt handling. + + Returns (should_process, is_fmt_off, is_fmt_skip). + """ + is_fmt_off = comment.value in FMT_OFF + is_fmt_skip = _contains_fmt_skip_comment(comment.value, mode) + + if not is_fmt_off and not is_fmt_skip: + return False, False, False + + # Invalid use when `# fmt: off` is applied before a closing bracket + if is_fmt_off and leaf.type in CLOSING_BRACKETS: + return False, False, False + + return True, is_fmt_off, is_fmt_skip + + +def _is_valid_standalone_fmt_comment( + comment: ProtoComment, leaf: Leaf, is_fmt_off: bool, is_fmt_skip: bool +) -> bool: + """Check if comment is a valid standalone fmt directive.""" + if comment.type == STANDALONE_COMMENT: + return True + + prev = preceding_leaf(leaf) + if not prev: + return True + + # Treat STANDALONE_COMMENT nodes as whitespace for check + if is_fmt_off and prev.type not in WHITESPACE and prev.type != STANDALONE_COMMENT: + return False + if is_fmt_skip and prev.type in WHITESPACE: + return False + + return True + + +def _handle_comment_only_fmt_block( + leaf: Leaf, + comment: ProtoComment, + previous_consumed: int, + mode: Mode, +) -> bool: + """Handle fmt:off/on blocks that contain only comments. + + Returns True if a block was converted, False otherwise. + """ + all_comments = list_comments(leaf.prefix, is_endmarker=False, mode=mode) + + # Find the first fmt:off and its matching fmt:on + fmt_off_idx = None + fmt_on_idx = None + for idx, c in enumerate(all_comments): + if fmt_off_idx is None and c.value in FMT_OFF: + fmt_off_idx = idx + if fmt_off_idx is not None and idx > fmt_off_idx and c.value in FMT_ON: + fmt_on_idx = idx + break + + # Only proceed if we found both directives + if fmt_on_idx is None or fmt_off_idx is None: + return False + + comment = all_comments[fmt_off_idx] + fmt_on_comment = all_comments[fmt_on_idx] + original_prefix = leaf.prefix + + # Build the hidden value + start_pos = comment.consumed + end_pos = fmt_on_comment.consumed + content_between_and_fmt_on = original_prefix[start_pos:end_pos] + hidden_value = comment.value + "\n" + content_between_and_fmt_on + + if hidden_value.endswith("\n"): + hidden_value = hidden_value[:-1] + + # Build the standalone comment prefix + standalone_comment_prefix = ( + original_prefix[:previous_consumed] + "\n" * comment.newlines + ) + + fmt_off_prefix = original_prefix.split(comment.value)[0] + if "\n" in fmt_off_prefix: + fmt_off_prefix = fmt_off_prefix.split("\n")[-1] + standalone_comment_prefix += fmt_off_prefix + + # Update leaf prefix + leaf.prefix = original_prefix[fmt_on_comment.consumed:] + + # Insert the STANDALONE_COMMENT + parent = leaf.parent + assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (prefix only)" + + leaf_idx = None + for idx, child in enumerate(parent.children): + if child is leaf: + leaf_idx = idx + break + + assert leaf_idx is not None, "INTERNAL ERROR: fmt: on/off handling (leaf index)" + + parent.insert_child( + leaf_idx, + Leaf( + STANDALONE_COMMENT, + hidden_value, + prefix=standalone_comment_prefix, + fmt_pass_converted_first_leaf=None, + ), + ) + return True + + +def _handle_regular_fmt_block( + ignored_nodes: list[LN], + first: LN, + prefix: str, + comment: ProtoComment, + previous_consumed: int, + is_fmt_skip: bool, + lines: Collection[tuple[int, int]], + leaf: Leaf, +) -> None: + """Handle fmt blocks with actual AST nodes.""" + parent = first.parent + + if comment.value in FMT_OFF: + first.prefix = prefix[comment.consumed :] + if is_fmt_skip: + first.prefix = "" + standalone_comment_prefix = prefix + else: + standalone_comment_prefix = prefix[:previous_consumed] + "\n" * comment.newlines + + hidden_value = "".join(str(n) for n in ignored_nodes) + comment_lineno = leaf.lineno - comment.newlines + + if comment.value in FMT_OFF: + fmt_off_prefix = "" + if len(lines) > 0 and not any( + line[0] <= comment_lineno <= line[1] for line in lines + ): + fmt_off_prefix = prefix.split(comment.value)[0] + if "\n" in fmt_off_prefix: + fmt_off_prefix = fmt_off_prefix.split("\n")[-1] + standalone_comment_prefix += fmt_off_prefix + hidden_value = comment.value + "\n" + hidden_value + + if is_fmt_skip: + hidden_value += comment.leading_whitespace + comment.value + + if hidden_value.endswith("\n"): + hidden_value = hidden_value[:-1] + + first_idx: Optional[int] = None + for ignored in ignored_nodes: + index = ignored.remove() + if first_idx is None: + first_idx = index + + assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (1)" + assert first_idx is not None, "INTERNAL ERROR: fmt: on/off handling (2)" + + parent.insert_child( + first_idx, + Leaf( + STANDALONE_COMMENT, + hidden_value, + prefix=standalone_comment_prefix, + fmt_pass_converted_first_leaf=first_leaf_of(first), + ), + ) + + def convert_one_fmt_off_pair( node: Node, mode: Mode, lines: Collection[tuple[int, int]] ) -> bool: @@ -228,173 +406,46 @@ def convert_one_fmt_off_pair( for leaf in node.leaves(): previous_consumed = 0 for comment in list_comments(leaf.prefix, is_endmarker=False, mode=mode): - is_fmt_off = comment.value in FMT_OFF - is_fmt_skip = _contains_fmt_skip_comment(comment.value, mode) - if (not is_fmt_off and not is_fmt_skip) or ( - # Invalid use when `# fmt: off` is applied before a closing bracket. - is_fmt_off - and leaf.type in CLOSING_BRACKETS + should_process, is_fmt_off, is_fmt_skip = _should_process_fmt_comment( + comment, leaf, mode + ) + if not should_process: + previous_consumed = comment.consumed + continue + + if not _is_valid_standalone_fmt_comment( + comment, leaf, is_fmt_off, is_fmt_skip ): previous_consumed = comment.consumed continue - # We only want standalone comments. If there's no previous leaf or - # the previous leaf is indentation, it's a standalone comment in - # disguise. - if comment.type != STANDALONE_COMMENT: - prev = preceding_leaf(leaf) - if prev: - # Treat STANDALONE_COMMENT nodes as whitespace for this check, - # since they represent preserved fmt:off/on blocks - if is_fmt_off and prev.type not in WHITESPACE and prev.type != STANDALONE_COMMENT: - continue - if is_fmt_skip and prev.type in WHITESPACE: - continue ignored_nodes = list(generate_ignored_nodes(leaf, comment, mode)) - if not ignored_nodes: - # When there are ONLY comments between `# fmt: off` and `# fmt: on`, - # `generate_ignored_nodes()` returns empty because comments don't create - # AST nodes - they exist as strings in node prefixes. - # This is a special case where both directives and all content between them - # exist in the prefix of a single leaf (e.g., ENDMARKER for file-level, - # or the first leaf of the next statement for nested cases). - # We detect this and create a STANDALONE_COMMENT to preserve formatting. - # Note: visit_default in linegen.py has special handling to preserve - # the indentation of these STANDALONE_COMMENT nodes. - # See issue #1245. - if is_fmt_off: - all_comments = list_comments(leaf.prefix, is_endmarker=False, mode=mode) - - # Find the first fmt:off and its matching fmt:on in the same prefix - fmt_off_idx = None - fmt_on_idx = None - for idx, c in enumerate(all_comments): - # Find the first fmt:off directive - if fmt_off_idx is None and c.value in FMT_OFF: - fmt_off_idx = idx - # Find the first fmt:on after the fmt:off - if fmt_off_idx is not None and idx > fmt_off_idx and c.value in FMT_ON: - fmt_on_idx = idx - break - - # Only proceed if we found both directives in the same prefix - if fmt_on_idx is not None and fmt_off_idx is not None: - # Use the comments we found, not the original comment object - comment = all_comments[fmt_off_idx] - # Extract the original source between fmt: off and fmt: on - fmt_on_comment = all_comments[fmt_on_idx] - # Get the original text from the prefix - original_prefix = leaf.prefix - - # Extract content between fmt: off and fmt: on from original source. - # `consumed` represents the position in the prefix string after - # consuming this comment (including its newline). - # We include everything from after fmt: off through fmt: on. - start_pos = comment.consumed - end_pos = fmt_on_comment.consumed - - # Get the original content between the directives (including fmt: on) - content_between_and_fmt_on = original_prefix[start_pos:end_pos] - - # Build the hidden value with original formatting preserved. - # Structure: "# fmt: off\n\n# fmt: on\n" - # This entire block will be stored as a STANDALONE_COMMENT node, - # bypassing all comment normalization. - hidden_value = comment.value + "\n" + content_between_and_fmt_on - - # Remove trailing newline since visit_STANDALONE_COMMENT in linegen.py - # will call yield from self.line() after appending the node, which - # adds a newline. Keeping the trailing newline here would create - # an extra blank line. - if hidden_value.endswith("\n"): - hidden_value = hidden_value[:-1] - - # The new prefix should be everything before fmt: off - standalone_comment_prefix = original_prefix[:previous_consumed] + "\n" * comment.newlines - - # Keep indentation of fmt: off comment by preserving original whitespace. - # Extract the whitespace before the comment directive. - fmt_off_prefix = original_prefix.split(comment.value)[0] - if "\n" in fmt_off_prefix: - # Take only the whitespace after the last newline (i.e., the indentation) - fmt_off_prefix = fmt_off_prefix.split("\n")[-1] - standalone_comment_prefix += fmt_off_prefix - - # The new leaf prefix should be everything after fmt: on - leaf.prefix = original_prefix[fmt_on_comment.consumed:] - - # Insert the STANDALONE_COMMENT - parent = leaf.parent - assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (prefix only)" - - # Find the index of the leaf in its parent - leaf_idx = None - for idx, child in enumerate(parent.children): - if child is leaf: - leaf_idx = idx - break - - assert leaf_idx is not None, "INTERNAL ERROR: fmt: on/off handling (leaf index)" - - parent.insert_child( - leaf_idx, - Leaf( - STANDALONE_COMMENT, - hidden_value, - prefix=standalone_comment_prefix, - fmt_pass_converted_first_leaf=None, - ), - ) - return True + # Handle comment-only blocks + if not ignored_nodes and is_fmt_off: + if _handle_comment_only_fmt_block( + leaf, comment, previous_consumed, mode + ): + return True + continue + + # Need actual nodes to process + if not ignored_nodes: continue - first = ignored_nodes[0] # Can be a container node with the `leaf`. - parent = first.parent + # Handle regular fmt blocks + first = ignored_nodes[0] prefix = first.prefix - if comment.value in FMT_OFF: - first.prefix = prefix[comment.consumed :] - if is_fmt_skip: - first.prefix = "" - standalone_comment_prefix = prefix - else: - standalone_comment_prefix = ( - prefix[:previous_consumed] + "\n" * comment.newlines - ) - hidden_value = "".join(str(n) for n in ignored_nodes) - comment_lineno = leaf.lineno - comment.newlines - if comment.value in FMT_OFF: - fmt_off_prefix = "" - if len(lines) > 0 and not any( - line[0] <= comment_lineno <= line[1] for line in lines - ): - # keeping indentation of comment by preserving original whitespaces. - fmt_off_prefix = prefix.split(comment.value)[0] - if "\n" in fmt_off_prefix: - fmt_off_prefix = fmt_off_prefix.split("\n")[-1] - standalone_comment_prefix += fmt_off_prefix - hidden_value = comment.value + "\n" + hidden_value - if is_fmt_skip: - hidden_value += comment.leading_whitespace + comment.value - if hidden_value.endswith("\n"): - # That happens when one of the `ignored_nodes` ended with a NEWLINE - # leaf (possibly followed by a DEDENT). - hidden_value = hidden_value[:-1] - first_idx: Optional[int] = None - for ignored in ignored_nodes: - index = ignored.remove() - if first_idx is None: - first_idx = index - assert parent is not None, "INTERNAL ERROR: fmt: on/off handling (1)" - assert first_idx is not None, "INTERNAL ERROR: fmt: on/off handling (2)" - parent.insert_child( - first_idx, - Leaf( - STANDALONE_COMMENT, - hidden_value, - prefix=standalone_comment_prefix, - fmt_pass_converted_first_leaf=first_leaf_of(first), - ), + + _handle_regular_fmt_block( + ignored_nodes, + first, + prefix, + comment, + previous_consumed, + is_fmt_skip, + lines, + leaf, ) return True diff --git a/src/black/linegen.py b/src/black/linegen.py index 5475a6a1897..c2fd5f6858e 100644 --- a/src/black/linegen.py +++ b/src/black/linegen.py @@ -383,19 +383,22 @@ def visit_ENDMARKER(self, leaf: Leaf) -> Iterator[Line]: def visit_STANDALONE_COMMENT(self, leaf: Leaf) -> Iterator[Line]: if not self.current_line.bracket_tracker.any_open_brackets(): yield from self.line() - # STANDALONE_COMMENT nodes created by our special handling in normalize_fmt_off - # for comment-only blocks have fmt:off as the first line and fmt:on as the last line - # (each directive on its own line, not embedded in other text). - # These should be appended directly without calling visit_default, which - # would process their prefix and lose indentation. - # Normal STANDALONE_COMMENT nodes go through visit_default. + # STANDALONE_COMMENT nodes created by our special handling in + # normalize_fmt_off for comment-only blocks have fmt:off as the first + # line and fmt:on as the last line (each directive on its own line, + # not embedded in other text). These should be appended directly + # without calling visit_default, which would process their prefix and + # lose indentation. Normal STANDALONE_COMMENT nodes go through + # visit_default. value = leaf.value lines = value.splitlines() if len(lines) >= 2: - # Check if first line (after stripping whitespace) is exactly a fmt:off directive + # Check if first line (after stripping whitespace) is exactly a + # fmt:off directive first_line = lines[0].lstrip() first_is_fmt_off = first_line in FMT_OFF - # Check if last line (after stripping whitespace) is exactly a fmt:on directive + # Check if last line (after stripping whitespace) is exactly a + # fmt:on directive last_line = lines[-1].lstrip() last_is_fmt_on = last_line in FMT_ON is_fmt_off_block = first_is_fmt_off and last_is_fmt_on From cacfd9a5edb95a1a682ea1e603bedc9d55d44196 Mon Sep 17 00:00:00 2001 From: cobalt <61329810+cobaltt7@users.noreply.github.com> Date: Sun, 26 Oct 2025 16:21:44 -0500 Subject: [PATCH 4/5] Refactors: merge `_contains_fmt_skip_comment` into `_contains_fmt_directive`, add back removed comments, move `convert_one_fmt_off_pair` to reduce the diff, and run Black Signed-off-by: cobalt <61329810+cobaltt7@users.noreply.github.com> --- src/black/comments.py | 172 ++++++++++++++++------------------- tests/data/cases/fmtonoff.py | 2 +- 2 files changed, 77 insertions(+), 97 deletions(-) diff --git a/src/black/comments.py b/src/black/comments.py index c714956528b..13b2a6fe4ed 100644 --- a/src/black/comments.py +++ b/src/black/comments.py @@ -143,32 +143,6 @@ def normalize_trailing_prefix(leaf: LN, total_consumed: int) -> None: leaf.prefix = "" -def _contains_fmt_directive(comment_line: str) -> bool: - """ - Checks if the given comment contains any fmt directive (skip, off, on, yapf). - Similar to _contains_fmt_skip_comment but checks all fmt directives. - """ - all_fmt_directives = FMT_OFF | FMT_ON | FMT_SKIP - - # Parse comment into semantic blocks (handles multiple comments and - # semicolon-separated lists) - semantic_comment_blocks = [ - comment_line, - *[ - _COMMENT_PREFIX + comment.strip() - for comment in comment_line.split(_COMMENT_PREFIX)[1:] - ], - *[ - _COMMENT_PREFIX + comment.strip() - for comment in comment_line.strip(_COMMENT_PREFIX).split( - _COMMENT_LIST_SEPARATOR - ) - ], - ] - - return any(comment in all_fmt_directives for comment in semantic_comment_blocks) - - def make_comment(content: str, mode: Mode) -> str: """Return a consistently formatted comment from the given `content` string. @@ -220,14 +194,14 @@ def normalize_fmt_off( def _should_process_fmt_comment( - comment: ProtoComment, leaf: Leaf, mode: Mode + comment: ProtoComment, leaf: Leaf ) -> tuple[bool, bool, bool]: """Check if comment should be processed for fmt handling. Returns (should_process, is_fmt_off, is_fmt_skip). """ - is_fmt_off = comment.value in FMT_OFF - is_fmt_skip = _contains_fmt_skip_comment(comment.value, mode) + is_fmt_off = _contains_fmt_directive(comment.value, FMT_OFF) + is_fmt_skip = _contains_fmt_directive(comment.value, FMT_SKIP) if not is_fmt_off and not is_fmt_skip: return False, False, False @@ -242,7 +216,11 @@ def _should_process_fmt_comment( def _is_valid_standalone_fmt_comment( comment: ProtoComment, leaf: Leaf, is_fmt_off: bool, is_fmt_skip: bool ) -> bool: - """Check if comment is a valid standalone fmt directive.""" + """Check if comment is a valid standalone fmt directive. + + We only want standalone comments. If there's no previous leaf or if + the previous leaf is indentation, it's a standalone comment in disguise. + """ if comment.type == STANDALONE_COMMENT: return True @@ -309,7 +287,7 @@ def _handle_comment_only_fmt_block( standalone_comment_prefix += fmt_off_prefix # Update leaf prefix - leaf.prefix = original_prefix[fmt_on_comment.consumed:] + leaf.prefix = original_prefix[fmt_on_comment.consumed :] # Insert the STANDALONE_COMMENT parent = leaf.parent @@ -335,10 +313,60 @@ def _handle_comment_only_fmt_block( return True +def convert_one_fmt_off_pair( + node: Node, mode: Mode, lines: Collection[tuple[int, int]] +) -> bool: + """Convert content of a single `# fmt: off`/`# fmt: on` into a standalone comment. + + Returns True if a pair was converted. + """ + for leaf in node.leaves(): + previous_consumed = 0 + for comment in list_comments(leaf.prefix, is_endmarker=False, mode=mode): + should_process, is_fmt_off, is_fmt_skip = _should_process_fmt_comment( + comment, leaf + ) + if not should_process: + previous_consumed = comment.consumed + continue + + if not _is_valid_standalone_fmt_comment( + comment, leaf, is_fmt_off, is_fmt_skip + ): + previous_consumed = comment.consumed + continue + + ignored_nodes = list(generate_ignored_nodes(leaf, comment, mode)) + + # Handle comment-only blocks + if not ignored_nodes and is_fmt_off: + if _handle_comment_only_fmt_block( + leaf, comment, previous_consumed, mode + ): + return True + continue + + # Need actual nodes to process + if not ignored_nodes: + continue + + # Handle regular fmt blocks + + _handle_regular_fmt_block( + ignored_nodes, + comment, + previous_consumed, + is_fmt_skip, + lines, + leaf, + ) + return True + + return False + + def _handle_regular_fmt_block( ignored_nodes: list[LN], - first: LN, - prefix: str, comment: ProtoComment, previous_consumed: int, is_fmt_skip: bool, @@ -346,7 +374,9 @@ def _handle_regular_fmt_block( leaf: Leaf, ) -> None: """Handle fmt blocks with actual AST nodes.""" + first = ignored_nodes[0] # Can be a container node with the `leaf`. parent = first.parent + prefix = first.prefix if comment.value in FMT_OFF: first.prefix = prefix[comment.consumed :] @@ -364,6 +394,7 @@ def _handle_regular_fmt_block( if len(lines) > 0 and not any( line[0] <= comment_lineno <= line[1] for line in lines ): + # keeping indentation of comment by preserving original whitespaces. fmt_off_prefix = prefix.split(comment.value)[0] if "\n" in fmt_off_prefix: fmt_off_prefix = fmt_off_prefix.split("\n")[-1] @@ -374,6 +405,8 @@ def _handle_regular_fmt_block( hidden_value += comment.leading_whitespace + comment.value if hidden_value.endswith("\n"): + # That happens when one of the `ignored_nodes` ended with a NEWLINE + # leaf (possibly followed by a DEDENT). hidden_value = hidden_value[:-1] first_idx: Optional[int] = None @@ -396,62 +429,6 @@ def _handle_regular_fmt_block( ) -def convert_one_fmt_off_pair( - node: Node, mode: Mode, lines: Collection[tuple[int, int]] -) -> bool: - """Convert content of a single `# fmt: off`/`# fmt: on` into a standalone comment. - - Returns True if a pair was converted. - """ - for leaf in node.leaves(): - previous_consumed = 0 - for comment in list_comments(leaf.prefix, is_endmarker=False, mode=mode): - should_process, is_fmt_off, is_fmt_skip = _should_process_fmt_comment( - comment, leaf, mode - ) - if not should_process: - previous_consumed = comment.consumed - continue - - if not _is_valid_standalone_fmt_comment( - comment, leaf, is_fmt_off, is_fmt_skip - ): - previous_consumed = comment.consumed - continue - - ignored_nodes = list(generate_ignored_nodes(leaf, comment, mode)) - - # Handle comment-only blocks - if not ignored_nodes and is_fmt_off: - if _handle_comment_only_fmt_block( - leaf, comment, previous_consumed, mode - ): - return True - continue - - # Need actual nodes to process - if not ignored_nodes: - continue - - # Handle regular fmt blocks - first = ignored_nodes[0] - prefix = first.prefix - - _handle_regular_fmt_block( - ignored_nodes, - first, - prefix, - comment, - previous_consumed, - is_fmt_skip, - lines, - leaf, - ) - return True - - return False - - def generate_ignored_nodes( leaf: Leaf, comment: ProtoComment, mode: Mode ) -> Iterator[LN]: @@ -460,7 +437,7 @@ def generate_ignored_nodes( If comment is skip, returns leaf only. Stops at the end of the block. """ - if _contains_fmt_skip_comment(comment.value, mode): + if _contains_fmt_directive(comment.value, FMT_SKIP): yield from _generate_ignored_nodes_from_fmt_skip(leaf, comment, mode) return container: Optional[LN] = container_of(leaf) @@ -623,13 +600,16 @@ def contains_pragma_comment(comment_list: list[Leaf]) -> bool: return False -def _contains_fmt_skip_comment(comment_line: str, mode: Mode) -> bool: +def _contains_fmt_directive( + comment_line: str, directives: set[str] = FMT_OFF | FMT_ON | FMT_SKIP +) -> bool: """ - Checks if the given comment contains FMT_SKIP alone or paired with other comments. + Checks if the given comment contains format directives alone or paired with other comments. + Defaults to checking all directives (skip, off, on, yapf), but can be narrowed to specific ones. Matching styles: - # fmt:skip <-- single comment - # noqa:XXX # fmt:skip # a nice line <-- multiple comments (Preview) - # pylint:XXX; fmt:skip <-- list of comments (; separated, Preview) + # foobar <-- single comment + # foobar # foobar # foobar <-- multiple comments + # foobar; foobar <-- list of comments (; separated) """ semantic_comment_blocks = [ comment_line, @@ -645,4 +625,4 @@ def _contains_fmt_skip_comment(comment_line: str, mode: Mode) -> bool: ], ] - return any(comment in FMT_SKIP for comment in semantic_comment_blocks) + return any(comment in directives for comment in semantic_comment_blocks) diff --git a/tests/data/cases/fmtonoff.py b/tests/data/cases/fmtonoff.py index bba49a0d760..56d99b8caf5 100644 --- a/tests/data/cases/fmtonoff.py +++ b/tests/data/cases/fmtonoff.py @@ -240,9 +240,9 @@ def single_literal_yapf_disable(): d={'a':1, 'b':2} - # output + #!/usr/bin/env python3 import asyncio import sys From 97bf97c7f57da8d7b6b96cb38839032563a6f920 Mon Sep 17 00:00:00 2001 From: cobalt <61329810+cobaltt7@users.noreply.github.com> Date: Sun, 26 Oct 2025 16:43:55 -0500 Subject: [PATCH 5/5] Fix long line & make changelog entry more concise Signed-off-by: cobalt <61329810+cobaltt7@users.noreply.github.com> --- CHANGES.md | 7 +++---- src/black/comments.py | 8 ++++++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 014f023790e..0a411aafffb 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -15,10 +15,9 @@ -- Fix bug where comments between `# fmt: off` and `# fmt: on` were reformatted (#1245) -- Comments containing fmt directives (`fmt:skip`, `fmt:off`, `fmt:on`) now preserve - their exact formatting instead of being normalized. For example, `#fmt:skip` is no - longer changed to `# fmt:skip` (#1245) +- Fix bug where comments between `# fmt: off` and `# fmt: on` were reformatted (#4811) +- Comments containing fmt directives now preserve their exact formatting instead of + being normalized (#4811) ### Preview style diff --git a/src/black/comments.py b/src/black/comments.py index 13b2a6fe4ed..c4984e4e2b6 100644 --- a/src/black/comments.py +++ b/src/black/comments.py @@ -604,8 +604,12 @@ def _contains_fmt_directive( comment_line: str, directives: set[str] = FMT_OFF | FMT_ON | FMT_SKIP ) -> bool: """ - Checks if the given comment contains format directives alone or paired with other comments. - Defaults to checking all directives (skip, off, on, yapf), but can be narrowed to specific ones. + Checks if the given comment contains format directives, alone or paired with + other comments. + + Defaults to checking all directives (skip, off, on, yapf), but can be + narrowed to specific ones. + Matching styles: # foobar <-- single comment # foobar # foobar # foobar <-- multiple comments