diff --git a/CHANGES.md b/CHANGES.md index 7953d0dc626..8bbaa13e2db 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -37,6 +37,8 @@ - Wrap the `in` clause of comprehensions across lines if necessary (#4699) - Remove parentheses around multiple exception types in `except` and `except*` without `as`. (#4720) +- Add `\r` style newlines to the potential newlines to normalize file newlines both from + and to (#4710) ### Configuration diff --git a/docs/the_black_code_style/future_style.md b/docs/the_black_code_style/future_style.md index 13bcaa94e5d..837aec457b0 100644 --- a/docs/the_black_code_style/future_style.md +++ b/docs/the_black_code_style/future_style.md @@ -33,6 +33,8 @@ Currently, the following features are included in the preview style: across lines if it would otherwise exceed the maximum line length. - `remove_parens_around_except_types`: Remove parentheses around multiple exception types in `except` and `except*` without `as`. See PEP 758 for details. +- `normalize_cr_newlines`: Add `\r` style newlines to the potential newlines to + normalize file newlines both from and to. (labels/unstable-features)= diff --git a/src/black/__init__.py b/src/black/__init__.py index 082f705f196..79541df2149 100644 --- a/src/black/__init__.py +++ b/src/black/__init__.py @@ -946,7 +946,7 @@ def format_file_in_place( with open(src, "rb") as buf: if mode.skip_source_first_line: header = buf.readline() - src_contents, encoding, newline = decode_bytes(buf.read()) + src_contents, encoding, newline = decode_bytes(buf.read(), mode) try: dst_contents = format_file_contents( src_contents, fast=fast, mode=mode, lines=lines @@ -1008,7 +1008,9 @@ def format_stdin_to_stdout( then = datetime.now(timezone.utc) if content is None: - src, encoding, newline = decode_bytes(sys.stdin.buffer.read()) + src, encoding, newline = decode_bytes(sys.stdin.buffer.read(), mode) + elif Preview.normalize_cr_newlines in mode: + src, encoding, newline = content, "utf-8", "\n" else: src, encoding, newline = content, "utf-8", "" @@ -1026,8 +1028,12 @@ def format_stdin_to_stdout( ) if write_back == WriteBack.YES: # Make sure there's a newline after the content - if dst and dst[-1] != "\n": - dst += "\n" + if Preview.normalize_cr_newlines in mode: + if dst and dst[-1] != "\n" and dst[-1] != "\r": + dst += newline + else: + if dst and dst[-1] != "\n": + dst += "\n" f.write(dst) elif write_back in (WriteBack.DIFF, WriteBack.COLOR_DIFF): now = datetime.now(timezone.utc) @@ -1217,7 +1223,17 @@ def f( def _format_str_once( src_contents: str, *, mode: Mode, lines: Collection[tuple[int, int]] = () ) -> str: - src_node = lib2to3_parse(src_contents.lstrip(), mode.target_versions) + if Preview.normalize_cr_newlines in mode: + normalized_contents, _, newline_type = decode_bytes( + src_contents.encode("utf-8"), mode + ) + + src_node = lib2to3_parse( + normalized_contents.lstrip(), target_versions=mode.target_versions + ) + else: + src_node = lib2to3_parse(src_contents.lstrip(), mode.target_versions) + dst_blocks: list[LinesBlock] = [] if mode.target_versions: versions = mode.target_versions @@ -1262,16 +1278,25 @@ def _format_str_once( for block in dst_blocks: dst_contents.extend(block.all_lines()) if not dst_contents: - # Use decode_bytes to retrieve the correct source newline (CRLF or LF), - # and check if normalized_content has more than one line - normalized_content, _, newline = decode_bytes(src_contents.encode("utf-8")) - if "\n" in normalized_content: - return newline + if Preview.normalize_cr_newlines in mode: + if "\n" in normalized_contents: + return newline_type + else: + # Use decode_bytes to retrieve the correct source newline (CRLF or LF), + # and check if normalized_content has more than one line + normalized_content, _, newline = decode_bytes( + src_contents.encode("utf-8"), mode + ) + if "\n" in normalized_content: + return newline return "" - return "".join(dst_contents) + if Preview.normalize_cr_newlines in mode: + return "".join(dst_contents).replace("\n", newline_type) + else: + return "".join(dst_contents) -def decode_bytes(src: bytes) -> tuple[FileContent, Encoding, NewLine]: +def decode_bytes(src: bytes, mode: Mode) -> tuple[FileContent, Encoding, NewLine]: """Return a tuple of (decoded_contents, encoding, newline). `newline` is either CRLF or LF but `decoded_contents` is decoded with @@ -1282,7 +1307,25 @@ def decode_bytes(src: bytes) -> tuple[FileContent, Encoding, NewLine]: if not lines: return "", encoding, "\n" - newline = "\r\n" if lines[0][-2:] == b"\r\n" else "\n" + if Preview.normalize_cr_newlines in mode: + if lines[0][-2:] == b"\r\n": + if b"\r" in lines[0][:-2]: + newline = "\r" + else: + newline = "\r\n" + elif lines[0][-1:] == b"\n": + if b"\r" in lines[0][:-1]: + newline = "\r" + else: + newline = "\n" + else: + if b"\r" in lines[0]: + newline = "\r" + else: + newline = "\n" + else: + newline = "\r\n" if lines[0][-2:] == b"\r\n" else "\n" + srcbuf.seek(0) with io.TextIOWrapper(srcbuf, encoding) as tiow: return tiow.read(), encoding, newline diff --git a/src/black/mode.py b/src/black/mode.py index 4d85358d5c5..85a205949dc 100644 --- a/src/black/mode.py +++ b/src/black/mode.py @@ -235,6 +235,7 @@ class Preview(Enum): # Remove parentheses around multiple exception types in except and # except* without as. See PEP 758 for details. remove_parens_around_except_types = auto() + normalize_cr_newlines = auto() UNSTABLE_FEATURES: set[Preview] = { diff --git a/src/black/resources/black.schema.json b/src/black/resources/black.schema.json index c3d7d03d4cc..549e0e8049f 100644 --- a/src/black/resources/black.schema.json +++ b/src/black/resources/black.schema.json @@ -87,7 +87,8 @@ "always_one_newline_after_import", "fix_fmt_skip_in_one_liners", "wrap_comprehension_in", - "remove_parens_around_except_types" + "remove_parens_around_except_types", + "normalize_cr_newlines" ] }, "description": "Enable specific features included in the `--unstable` style. Requires `--preview`. No compatibility guarantees are provided on the behavior or existence of any unstable features." diff --git a/src/blackd/__init__.py b/src/blackd/__init__.py index 86309da0ef0..2f9a516d6e5 100644 --- a/src/blackd/__init__.py +++ b/src/blackd/__init__.py @@ -22,6 +22,7 @@ import black from _black_version import version as __version__ from black.concurrency import maybe_install_uvloop +from black.mode import Preview # This is used internally by tests to shut down the server prematurely _stop_signal = asyncio.Event() @@ -129,13 +130,14 @@ async def handle(request: web.Request, executor: Executor) -> web.Response: executor, partial(black.format_file_contents, req_str, fast=fast, mode=mode) ) - # Preserve CRLF line endings - nl = req_str.find("\n") - if nl > 0 and req_str[nl - 1] == "\r": - formatted_str = formatted_str.replace("\n", "\r\n") - # If, after swapping line endings, nothing changed, then say so - if formatted_str == req_str: - raise black.NothingChanged + if Preview.normalize_cr_newlines not in mode: + # Preserve CRLF line endings + nl = req_str.find("\n") + if nl > 0 and req_str[nl - 1] == "\r": + formatted_str = formatted_str.replace("\n", "\r\n") + # If, after swapping line endings, nothing changed, then say so + if formatted_str == req_str: + raise black.NothingChanged # Put the source first line back req_str = header + req_str diff --git a/tests/test_black.py b/tests/test_black.py index a9ea4b9a765..36ee7d9e1b9 100644 --- a/tests/test_black.py +++ b/tests/test_black.py @@ -3,6 +3,7 @@ import asyncio import inspect import io +import itertools import logging import multiprocessing import os @@ -2083,6 +2084,12 @@ def test_carriage_return_edge_cases(self) -> None: == "class A: ...\n" ) + def test_preview_newline_type_detection(self) -> None: + mode = Mode(enabled_features={Preview.normalize_cr_newlines}) + newline_types = ["A\n", "A\r\n", "A\r"] + for test_case in itertools.permutations(newline_types): + assert black.format_str("".join(test_case), mode=mode) == test_case[0] * 3 + class TestCaching: def test_get_cache_dir(