From ba618a307a30a119b4fafe526ebf7d5f092ba981 Mon Sep 17 00:00:00 2001 From: "Yilei \"Dolee\" Yang" Date: Tue, 30 Aug 2022 19:52:00 -0700 Subject: [PATCH] Add parens around implicit string concatenations where it increases readability (#3162) Adds parentheses around implicit string concatenations when it's inside a list, set, or tuple. Except when it's only element and there's no trailing comma. Looking at the order of the transformers here, we need to "wrap in parens" before string_split runs. So my solution is to introduce a "collaboration" between StringSplitter and StringParenWrapper where the splitter "skips" the split until the wrapper adds the parens (and then the line after the paren is split by StringSplitter) in another pass. I have also considered an alternative approach, where I tried to add a different "string paren wrapper" class, and it runs before string_split. Then I found out it requires a different do_transform implementation than StringParenWrapper.do_transform, since the later assumes it runs after the delimiter_split transform. So I stopped researching that route. Originally function calls were also included in this change, but given missing commas should usually result in a runtime error and the scary amount of changes this cause on downstream code, they were removed in later revisions. --- CHANGES.md | 2 + src/black/trans.py | 50 +++++++++++- tests/data/preview/comments7.py | 46 +++++++---- tests/data/preview/long_strings.py | 81 ++++++++++++++++++- .../data/preview/long_strings__regression.py | 32 +++++--- tests/test_black.py | 24 +++--- 6 files changed, 191 insertions(+), 44 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 34c54710775..a5ce3b1fbe2 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -26,6 +26,8 @@ normalized as expected (#3168) - When using `--skip-magic-trailing-comma` or `-C`, trailing commas are stripped from subscript expressions with more than 1 element (#3209) +- Implicitly concatenated strings inside a list, set, or tuple are now wrapped inside + parentheses (#3162) - Fix a string merging/split issue when a comment is present in the middle of implicitly concatenated strings on its own line (#3227) diff --git a/src/black/trans.py b/src/black/trans.py index 9e0284cefe3..7ecfcef703d 100644 --- a/src/black/trans.py +++ b/src/black/trans.py @@ -1043,6 +1043,41 @@ def _get_max_string_length(self, line: Line, string_idx: int) -> int: max_string_length = self.line_length - offset return max_string_length + @staticmethod + def _prefer_paren_wrap_match(LL: List[Leaf]) -> Optional[int]: + """ + Returns: + string_idx such that @LL[string_idx] is equal to our target (i.e. + matched) string, if this line matches the "prefer paren wrap" statement + requirements listed in the 'Requirements' section of the StringParenWrapper + class's docstring. + OR + None, otherwise. + """ + # The line must start with a string. + if LL[0].type != token.STRING: + return None + + matching_nodes = [ + syms.listmaker, + syms.dictsetmaker, + syms.testlist_gexp, + ] + # If the string is an immediate child of a list/set/tuple literal... + if ( + parent_type(LL[0]) in matching_nodes + or parent_type(LL[0].parent) in matching_nodes + ): + # And the string is surrounded by commas (or is the first/last child)... + prev_sibling = LL[0].prev_sibling + next_sibling = LL[0].next_sibling + if (not prev_sibling or prev_sibling.type == token.COMMA) and ( + not next_sibling or next_sibling.type == token.COMMA + ): + return 0 + + return None + def iter_fexpr_spans(s: str) -> Iterator[Tuple[int, int]]: """ @@ -1138,6 +1173,9 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): def do_splitter_match(self, line: Line) -> TMatchResult: LL = line.leaves + if self._prefer_paren_wrap_match(LL) is not None: + return TErr("Line needs to be wrapped in parens first.") + is_valid_index = is_valid_index_factory(LL) idx = 0 @@ -1583,8 +1621,7 @@ def _get_string_operator_leaves(self, leaves: Iterable[Leaf]) -> List[Leaf]: class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin): """ - StringTransformer that splits non-"atom" strings (i.e. strings that do not - exist on lines by themselves). + StringTransformer that wraps strings in parens and then splits at the LPAR. Requirements: All of the requirements listed in BaseStringSplitter's docstring in @@ -1604,6 +1641,11 @@ class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin): OR * The line is a dictionary key assignment where some valid key is being assigned the value of some string. + OR + * The line starts with an "atom" string that prefers to be wrapped in + parens. It's preferred to be wrapped when it's is an immediate child of + a list/set/tuple literal, AND the string is surrounded by commas (or is + the first/last child). Transformations: The chosen string is wrapped in parentheses and then split at the LPAR. @@ -1628,6 +1670,9 @@ class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin): changed such that it no longer needs to be given its own line, StringParenWrapper relies on StringParenStripper to clean up the parentheses it created. + + For "atom" strings that prefers to be wrapped in parens, it requires + StringSplitter to hold the split until the string is wrapped in parens. """ def do_splitter_match(self, line: Line) -> TMatchResult: @@ -1644,6 +1689,7 @@ def do_splitter_match(self, line: Line) -> TMatchResult: or self._assert_match(LL) or self._assign_match(LL) or self._dict_match(LL) + or self._prefer_paren_wrap_match(LL) ) if string_idx is not None: diff --git a/tests/data/preview/comments7.py b/tests/data/preview/comments7.py index ca9d7c62b21..ec2dc501d8e 100644 --- a/tests/data/preview/comments7.py +++ b/tests/data/preview/comments7.py @@ -226,39 +226,53 @@ class C: # metadata_version errors. ( {}, - "None is an invalid value for Metadata-Version. Error: This field is" - " required. see" - " https://packaging.python.org/specifications/core-metadata", + ( + "None is an invalid value for Metadata-Version. Error: This field" + " is required. see" + " https://packaging.python.org/specifications/core-metadata" + ), ), ( {"metadata_version": "-1"}, - "'-1' is an invalid value for Metadata-Version. Error: Unknown Metadata" - " Version see" - " https://packaging.python.org/specifications/core-metadata", + ( + "'-1' is an invalid value for Metadata-Version. Error: Unknown" + " Metadata Version see" + " https://packaging.python.org/specifications/core-metadata" + ), ), # name errors. ( {"metadata_version": "1.2"}, - "'' is an invalid value for Name. Error: This field is required. see" - " https://packaging.python.org/specifications/core-metadata", + ( + "'' is an invalid value for Name. Error: This field is required." + " see https://packaging.python.org/specifications/core-metadata" + ), ), ( {"metadata_version": "1.2", "name": "foo-"}, - "'foo-' is an invalid value for Name. Error: Must start and end with a" - " letter or numeral and contain only ascii numeric and '.', '_' and" - " '-'. see https://packaging.python.org/specifications/core-metadata", + ( + "'foo-' is an invalid value for Name. Error: Must start and end" + " with a letter or numeral and contain only ascii numeric and '.'," + " '_' and '-'. see" + " https://packaging.python.org/specifications/core-metadata" + ), ), # version errors. ( {"metadata_version": "1.2", "name": "example"}, - "'' is an invalid value for Version. Error: This field is required. see" - " https://packaging.python.org/specifications/core-metadata", + ( + "'' is an invalid value for Version. Error: This field is required." + " see https://packaging.python.org/specifications/core-metadata" + ), ), ( {"metadata_version": "1.2", "name": "example", "version": "dog"}, - "'dog' is an invalid value for Version. Error: Must start and end with" - " a letter or numeral and contain only ascii numeric and '.', '_' and" - " '-'. see https://packaging.python.org/specifications/core-metadata", + ( + "'dog' is an invalid value for Version. Error: Must start and end" + " with a letter or numeral and contain only ascii numeric and '.'," + " '_' and '-'. see" + " https://packaging.python.org/specifications/core-metadata" + ), ), ], ) diff --git a/tests/data/preview/long_strings.py b/tests/data/preview/long_strings.py index 26400eea450..3ad5f355e33 100644 --- a/tests/data/preview/long_strings.py +++ b/tests/data/preview/long_strings.py @@ -18,6 +18,18 @@ D4 = {"A long and ridiculous {}".format(string_key): "This is a really really really long string that has to go i,side of a dictionary. It is soooo bad.", some_func("calling", "some", "stuff"): "This is a really really really long string that has to go inside of a dictionary. It is {soooo} bad (#{x}).".format(sooo="soooo", x=2), "A %s %s" % ("formatted", "string"): "This is a really really really long string that has to go inside of a dictionary. It is %s bad (#%d)." % ("soooo", 2)} +L1 = ["The is a short string", "This is a really long string that can't possibly be expected to fit all together on one line. Also it is inside a list literal, so it's expected to be wrapped in parens when spliting to avoid implicit str concatenation.", short_call("arg", {"key": "value"}), "This is another really really (not really) long string that also can't be expected to fit on one line and is, like the other string, inside a list literal.", ("parens should be stripped for short string in list")] + +L2 = ["This is a really long string that can't be expected to fit in one line and is the only child of a list literal."] + +S1 = {"The is a short string", "This is a really long string that can't possibly be expected to fit all together on one line. Also it is inside a set literal, so it's expected to be wrapped in parens when spliting to avoid implicit str concatenation.", short_call("arg", {"key": "value"}), "This is another really really (not really) long string that also can't be expected to fit on one line and is, like the other string, inside a set literal.", ("parens should be stripped for short string in set")} + +S2 = {"This is a really long string that can't be expected to fit in one line and is the only child of a set literal."} + +T1 = ("The is a short string", "This is a really long string that can't possibly be expected to fit all together on one line. Also it is inside a tuple literal, so it's expected to be wrapped in parens when spliting to avoid implicit str concatenation.", short_call("arg", {"key": "value"}), "This is another really really (not really) long string that also can't be expected to fit on one line and is, like the other string, inside a tuple literal.", ("parens should be stripped for short string in list")) + +T2 = ("This is a really long string that can't be expected to fit in one line and is the only child of a tuple literal.",) + func_with_keywords(my_arg, my_kwarg="Long keyword strings also need to be wrapped, but they will probably need to be handled a little bit differently.") bad_split1 = ( @@ -109,7 +121,7 @@ comment_string = "Long lines with inline comments should have their comments appended to the reformatted string's enclosing right parentheses." # This comment gets thrown to the top. -arg_comment_string = print("Long lines with inline comments which are apart of (and not the only member of) an argument list should have their comments appended to the reformatted string's enclosing left parentheses.", # This comment stays on the bottom. +arg_comment_string = print("Long lines with inline comments which are apart of (and not the only member of) an argument list should have their comments appended to the reformatted string's enclosing left parentheses.", # This comment gets thrown to the top. "Arg #2", "Arg #3", "Arg #4", "Arg #5") pragma_comment_string1 = "Lines which end with an inline pragma comment of the form `# : <...>` should be left alone." # noqa: E501 @@ -345,6 +357,71 @@ def foo(): % ("soooo", 2), } +L1 = [ + "The is a short string", + ( + "This is a really long string that can't possibly be expected to fit all" + " together on one line. Also it is inside a list literal, so it's expected to" + " be wrapped in parens when spliting to avoid implicit str concatenation." + ), + short_call("arg", {"key": "value"}), + ( + "This is another really really (not really) long string that also can't be" + " expected to fit on one line and is, like the other string, inside a list" + " literal." + ), + "parens should be stripped for short string in list", +] + +L2 = [ + "This is a really long string that can't be expected to fit in one line and is the" + " only child of a list literal." +] + +S1 = { + "The is a short string", + ( + "This is a really long string that can't possibly be expected to fit all" + " together on one line. Also it is inside a set literal, so it's expected to be" + " wrapped in parens when spliting to avoid implicit str concatenation." + ), + short_call("arg", {"key": "value"}), + ( + "This is another really really (not really) long string that also can't be" + " expected to fit on one line and is, like the other string, inside a set" + " literal." + ), + "parens should be stripped for short string in set", +} + +S2 = { + "This is a really long string that can't be expected to fit in one line and is the" + " only child of a set literal." +} + +T1 = ( + "The is a short string", + ( + "This is a really long string that can't possibly be expected to fit all" + " together on one line. Also it is inside a tuple literal, so it's expected to" + " be wrapped in parens when spliting to avoid implicit str concatenation." + ), + short_call("arg", {"key": "value"}), + ( + "This is another really really (not really) long string that also can't be" + " expected to fit on one line and is, like the other string, inside a tuple" + " literal." + ), + "parens should be stripped for short string in list", +) + +T2 = ( + ( + "This is a really long string that can't be expected to fit in one line and is" + " the only child of a tuple literal." + ), +) + func_with_keywords( my_arg, my_kwarg=( @@ -487,7 +564,7 @@ def foo(): arg_comment_string = print( "Long lines with inline comments which are apart of (and not the only member of) an" " argument list should have their comments appended to the reformatted string's" - " enclosing left parentheses.", # This comment stays on the bottom. + " enclosing left parentheses.", # This comment gets thrown to the top. "Arg #2", "Arg #3", "Arg #4", diff --git a/tests/data/preview/long_strings__regression.py b/tests/data/preview/long_strings__regression.py index 58ccc4ac0b1..634db46a5e0 100644 --- a/tests/data/preview/long_strings__regression.py +++ b/tests/data/preview/long_strings__regression.py @@ -763,20 +763,28 @@ def xxxx_xxx_xx_xxxxxxxxxx_xxxx_xxxxxxxxx(xxxx): some_dictionary = { "xxxxx006": [ - "xxx-xxx" - " xxxxx3xxxx1xx2xxxxxxxxxxxxxx0xx6xxxxxxxxxx2xxxxxx9xxxxxxxxxx0xxxxx1xxx2x/xx9xx6+x+xxxxxxxxxxxxxx4xxxxxxxxxxxxxxxxxxxxx43xxx2xx2x4x++xxx6xxxxxxxxx+xxxxx/xx9x+xxxxxxxxxxxxxx8x15xxxxxxxxxxxxxxxxx82xx/xxxxxxxxxxxxxx/x5xxxxxxxxxxxxxx6xxxxxx74x4/xxx4x+xxxxxxxxx2xxxxxxxx87xxxxx4xxxxxxxx3xx0xxxxx4xxx1xx9xx5xxxxxxx/xxxxx5xx6xx4xxxx1x/x2xxxxxxxxxxxx64xxxxxxx1x0xx5xxxxxxxxxxxxxx==" - " xxxxx000 xxxxxxxxxx\n", - "xxx-xxx" - " xxxxx3xxxx1xx2xxxxxxxxxxxxxx6xxxxxxxxxxxxxx9xxxxxxxxxxxxx3xxx9xxxxxxxxxxxxxxxx0xxxxxxxxxxxxxxxxx2xxxx2xxx6xxxxx/xx54xxxxxxxxx4xxx3xxxxxx9xx3xxxxx39xxxxxxxxx5xx91xxxx7xxxxxx8xxxxxxxxxxxxxxxx9xxx93xxxxxxxxxxxxxxxxx7xxx8xx8xx4/x1xxxxx1x3xxxxxxxxxxxxx3xxxxxx9xx4xx4x7xxxxxxxxxxxxx1xxxxxxxxx7xxxxxxxxxxxxxx4xx6xxxxxxxxx9xxx7xxxx2xxxxxxxxxxxxxxxxxxxxxx8xxxxxxxxxxxxxxxxxxxx6xx==" - " xxxxx010 xxxxxxxxxx\n", + ( + "xxx-xxx" + " xxxxx3xxxx1xx2xxxxxxxxxxxxxx0xx6xxxxxxxxxx2xxxxxx9xxxxxxxxxx0xxxxx1xxx2x/xx9xx6+x+xxxxxxxxxxxxxx4xxxxxxxxxxxxxxxxxxxxx43xxx2xx2x4x++xxx6xxxxxxxxx+xxxxx/xx9x+xxxxxxxxxxxxxx8x15xxxxxxxxxxxxxxxxx82xx/xxxxxxxxxxxxxx/x5xxxxxxxxxxxxxx6xxxxxx74x4/xxx4x+xxxxxxxxx2xxxxxxxx87xxxxx4xxxxxxxx3xx0xxxxx4xxx1xx9xx5xxxxxxx/xxxxx5xx6xx4xxxx1x/x2xxxxxxxxxxxx64xxxxxxx1x0xx5xxxxxxxxxxxxxx==" + " xxxxx000 xxxxxxxxxx\n" + ), + ( + "xxx-xxx" + " xxxxx3xxxx1xx2xxxxxxxxxxxxxx6xxxxxxxxxxxxxx9xxxxxxxxxxxxx3xxx9xxxxxxxxxxxxxxxx0xxxxxxxxxxxxxxxxx2xxxx2xxx6xxxxx/xx54xxxxxxxxx4xxx3xxxxxx9xx3xxxxx39xxxxxxxxx5xx91xxxx7xxxxxx8xxxxxxxxxxxxxxxx9xxx93xxxxxxxxxxxxxxxxx7xxx8xx8xx4/x1xxxxx1x3xxxxxxxxxxxxx3xxxxxx9xx4xx4x7xxxxxxxxxxxxx1xxxxxxxxx7xxxxxxxxxxxxxx4xx6xxxxxxxxx9xxx7xxxx2xxxxxxxxxxxxxxxxxxxxxx8xxxxxxxxxxxxxxxxxxxx6xx==" + " xxxxx010 xxxxxxxxxx\n" + ), ], "xxxxx016": [ - "xxx-xxx" - " xxxxx3xxxx1xx2xxxxxxxxxxxxxx0xx6xxxxxxxxxx2xxxxxx9xxxxxxxxxx0xxxxx1xxx2x/xx9xx6+x+xxxxxxxxxxxxxx4xxxxxxxxxxxxxxxxxxxxx43xxx2xx2x4x++xxx6xxxxxxxxx+xxxxx/xx9x+xxxxxxxxxxxxxx8x15xxxxxxxxxxxxxxxxx82xx/xxxxxxxxxxxxxx/x5xxxxxxxxxxxxxx6xxxxxx74x4/xxx4x+xxxxxxxxx2xxxxxxxx87xxxxx4xxxxxxxx3xx0xxxxx4xxx1xx9xx5xxxxxxx/xxxxx5xx6xx4xxxx1x/x2xxxxxxxxxxxx64xxxxxxx1x0xx5xxxxxxxxxxxxxx==" - " xxxxx000 xxxxxxxxxx\n", - "xxx-xxx" - " xxxxx3xxxx1xx2xxxxxxxxxxxxxx6xxxxxxxxxxxxxx9xxxxxxxxxxxxx3xxx9xxxxxxxxxxxxxxxx0xxxxxxxxxxxxxxxxx2xxxx2xxx6xxxxx/xx54xxxxxxxxx4xxx3xxxxxx9xx3xxxxx39xxxxxxxxx5xx91xxxx7xxxxxx8xxxxxxxxxxxxxxxx9xxx93xxxxxxxxxxxxxxxxx7xxx8xx8xx4/x1xxxxx1x3xxxxxxxxxxxxx3xxxxxx9xx4xx4x7xxxxxxxxxxxxx1xxxxxxxxx7xxxxxxxxxxxxxx4xx6xxxxxxxxx9xxx7xxxx2xxxxxxxxxxxxxxxxxxxxxx8xxxxxxxxxxxxxxxxxxxx6xx==" - " xxxxx010 xxxxxxxxxx\n", + ( + "xxx-xxx" + " xxxxx3xxxx1xx2xxxxxxxxxxxxxx0xx6xxxxxxxxxx2xxxxxx9xxxxxxxxxx0xxxxx1xxx2x/xx9xx6+x+xxxxxxxxxxxxxx4xxxxxxxxxxxxxxxxxxxxx43xxx2xx2x4x++xxx6xxxxxxxxx+xxxxx/xx9x+xxxxxxxxxxxxxx8x15xxxxxxxxxxxxxxxxx82xx/xxxxxxxxxxxxxx/x5xxxxxxxxxxxxxx6xxxxxx74x4/xxx4x+xxxxxxxxx2xxxxxxxx87xxxxx4xxxxxxxx3xx0xxxxx4xxx1xx9xx5xxxxxxx/xxxxx5xx6xx4xxxx1x/x2xxxxxxxxxxxx64xxxxxxx1x0xx5xxxxxxxxxxxxxx==" + " xxxxx000 xxxxxxxxxx\n" + ), + ( + "xxx-xxx" + " xxxxx3xxxx1xx2xxxxxxxxxxxxxx6xxxxxxxxxxxxxx9xxxxxxxxxxxxx3xxx9xxxxxxxxxxxxxxxx0xxxxxxxxxxxxxxxxx2xxxx2xxx6xxxxx/xx54xxxxxxxxx4xxx3xxxxxx9xx3xxxxx39xxxxxxxxx5xx91xxxx7xxxxxx8xxxxxxxxxxxxxxxx9xxx93xxxxxxxxxxxxxxxxx7xxx8xx8xx4/x1xxxxx1x3xxxxxxxxxxxxx3xxxxxx9xx4xx4x7xxxxxxxxxxxxx1xxxxxxxxx7xxxxxxxxxxxxxx4xx6xxxxxxxxx9xxx7xxxx2xxxxxxxxxxxxxxxxxxxxxx8xxxxxxxxxxxxxxxxxxxx6xx==" + " xxxxx010 xxxxxxxxxx\n" + ), ], } diff --git a/tests/test_black.py b/tests/test_black.py index 5da247b71b0..089e043d639 100644 --- a/tests/test_black.py +++ b/tests/test_black.py @@ -513,15 +513,15 @@ def err(msg: str, **kwargs: Any) -> None: report.check = True self.assertEqual( unstyle(str(report)), - "2 files would be reformatted, 3 files would be left unchanged, 2 files" - " would fail to reformat.", + "2 files would be reformatted, 3 files would be left unchanged, 2" + " files would fail to reformat.", ) report.check = False report.diff = True self.assertEqual( unstyle(str(report)), - "2 files would be reformatted, 3 files would be left unchanged, 2 files" - " would fail to reformat.", + "2 files would be reformatted, 3 files would be left unchanged, 2" + " files would fail to reformat.", ) def test_report_quiet(self) -> None: @@ -607,15 +607,15 @@ def err(msg: str, **kwargs: Any) -> None: report.check = True self.assertEqual( unstyle(str(report)), - "2 files would be reformatted, 3 files would be left unchanged, 2 files" - " would fail to reformat.", + "2 files would be reformatted, 3 files would be left unchanged, 2" + " files would fail to reformat.", ) report.check = False report.diff = True self.assertEqual( unstyle(str(report)), - "2 files would be reformatted, 3 files would be left unchanged, 2 files" - " would fail to reformat.", + "2 files would be reformatted, 3 files would be left unchanged, 2" + " files would fail to reformat.", ) def test_report_normal(self) -> None: @@ -704,15 +704,15 @@ def err(msg: str, **kwargs: Any) -> None: report.check = True self.assertEqual( unstyle(str(report)), - "2 files would be reformatted, 3 files would be left unchanged, 2 files" - " would fail to reformat.", + "2 files would be reformatted, 3 files would be left unchanged, 2" + " files would fail to reformat.", ) report.check = False report.diff = True self.assertEqual( unstyle(str(report)), - "2 files would be reformatted, 3 files would be left unchanged, 2 files" - " would fail to reformat.", + "2 files would be reformatted, 3 files would be left unchanged, 2" + " files would fail to reformat.", ) def test_lib2to3_parse(self) -> None: