Use ast to find candidate fstring expressions (#183)

* draft code between * fix range * rufff? * code_in_chunk + test * test cases for code_between * fix line num, more tests * rely more on original source code - for quote type * replace unneeded generator * ruff * WIP 106 failing * WIP 20 failing * WIP 8 failing * WIP 6 failing * WIP 4 failing * WIP 2 failing * it works! * rufff * black! * drop python 3.7; use legacy @cache
ikamensh · Jun 16, 2023 · cc98a97 · cc98a97
1 parent 0696643
commit cc98a97
Show file tree

Hide file tree

Showing 13 changed files with 200 additions and 28 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -14,7 +14,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12-dev"]
+        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12-dev"]
         os: [ubuntu-latest, macOS-latest, windows-latest]
     steps:
       - uses: actions/checkout@v3

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,16 @@
+#### v.1.0.0
+
+Drop support for python 3.7.
+
+##### Moved % and .format expression identification to `ast` instead of legacy token state machine. 
+This has led to small changes in formatting of output code, e.g. type of quotes in ambiguous cases 
+might have changed. Example:
+`'first part {}'"second part {}".format(one, two)` used to result in `"` quotes, 
+and now results in `'`, as in `f'first part {one}second part {two}'`. I think it's a minor change
+in the output. At the same time it's a huge simplification of the source code that should help 
+maintain and develop this project in the future.
+
+
 #### v.0.77
 
 *[Contributed by Aarni Koskela]* `--transform-joins` (`-tj`) will transform string join operations on static operands

diff --git a/pyproject.toml b/pyproject.toml
@@ -19,7 +19,6 @@ keywords = [
 classifiers = [
     "Programming Language :: Python",
     "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.7",
     "Programming Language :: Python :: 3.8",
     "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",

diff --git a/src/flynt/__init__.py b/src/flynt/__init__.py
@@ -2,7 +2,7 @@
 from old "%-formatted" and .format(...) strings into Python 3.6+'s f-strings.
 Learn more about f-strings at https://www.python.org/dev/peps/pep-0498/"""
 
-__version__ = "0.78"
+__version__ = "1.0.0"
 
 from flynt.cli import main
 

diff --git a/src/flynt/candidates/ast_call_candidates.py b/src/flynt/candidates/ast_call_candidates.py
@@ -0,0 +1,40 @@
+import ast
+from typing import List
+
+from flynt.state import State
+
+from .ast_chunk import AstChunk
+
+
+def is_call_format(node):
+    return (
+        isinstance(node, ast.Call)
+        and isinstance(node.func, ast.Attribute)
+        and node.func.attr == "format"
+        and isinstance(node.func.value, (ast.Str, ast.Name))
+    )
+
+
+class CallFmtFinder(ast.NodeVisitor):
+    def __init__(self) -> None:
+        super().__init__()
+        self.candidates: List[AstChunk] = []
+
+    def visit_Call(self, node: ast.Call) -> None:
+        """
+        Finds all nodes that are string concatenations with a literal.
+        """
+        if is_call_format(node):
+            self.candidates.append(AstChunk(node))
+        else:
+            self.generic_visit(node)
+
+
+def call_candidates(code: str, state: State) -> List[AstChunk]:
+    tree = ast.parse(code)
+
+    finder = CallFmtFinder()
+    finder.visit(tree)
+
+    state.call_candidates += len(finder.candidates)
+    return finder.candidates
diff --git a/src/flynt/candidates/ast_chunk.py b/src/flynt/candidates/ast_chunk.py
@@ -2,8 +2,6 @@
 
 import ast
 
-from flynt.format import QuoteTypes
-
 
 class AstChunk:
     def __init__(self, node: ast.AST) -> None:
@@ -39,7 +37,7 @@ def string_in_string(self) -> bool:
 
     @property
     def quote_type(self) -> str:
-        return QuoteTypes.double
+        raise NotImplementedError
 
     def __str__(self) -> str:
         from flynt.utils import ast_to_string

diff --git a/src/flynt/candidates/ast_percent_candidates.py b/src/flynt/candidates/ast_percent_candidates.py
@@ -0,0 +1,40 @@
+import ast
+from typing import List
+
+from flynt.state import State
+
+from .ast_chunk import AstChunk
+
+
+def is_percent_format(node):
+    return (
+        isinstance(node, ast.BinOp)
+        and isinstance(node.op, ast.Mod)
+        and isinstance(node.left, ast.Str)
+    )
+
+
+class PercentFmtFinder(ast.NodeVisitor):
+    def __init__(self) -> None:
+        super().__init__()
+        self.candidates: List[AstChunk] = []
+
+    def visit_BinOp(self, node: ast.BinOp) -> None:
+        """
+        Finds all nodes that are string concatenations with a literal.
+        """
+        if is_percent_format(node):
+            self.candidates.append(AstChunk(node))
+        else:
+            self.generic_visit(node)
+
+
+def percent_candidates(code: str, state: State) -> List[AstChunk]:
+    tree = ast.parse(code)
+
+    finder = PercentFmtFinder()
+    finder.visit(tree)
+
+    state.percent_candidates += len(finder.candidates)
+
+    return finder.candidates
diff --git a/src/flynt/code_editor.py b/src/flynt/code_editor.py
@@ -2,11 +2,12 @@
 import re
 import string
 import sys
-from functools import partial
+from functools import lru_cache, partial
 from typing import Callable, List, Optional, Tuple, Union
 
-from flynt.candidates import split
+from flynt.candidates.ast_call_candidates import call_candidates
 from flynt.candidates.ast_chunk import AstChunk
+from flynt.candidates.ast_percent_candidates import percent_candidates
 from flynt.candidates.chunk import Chunk
 from flynt.exceptions import FlyntException
 from flynt.format import QuoteTypes as qt
@@ -17,6 +18,7 @@
 from flynt.string_concat.candidates import concat_candidates
 from flynt.string_concat.transformer import transform_concat
 from flynt.transform.transform import transform_chunk
+from flynt.utils import contains_comment
 
 noqa_regex = re.compile("#[ ]*noqa.*flynt")
 
@@ -74,6 +76,29 @@ def edit(self) -> Tuple[str, int]:
         self.output = "".join(self.results)[:-1]
         return self.output, self.count_expressions
 
+    def code_between(
+        self, start_line: int, start_idx: int, end_line: int, end_idx: int
+    ) -> str:
+        """get source code in the original between two locations."""
+        assert end_line >= start_line
+        result = []
+        if start_line == end_line:
+            assert end_idx >= start_idx
+            result.append(self.src_lines[start_line][start_idx:end_idx])
+        else:
+            result.append(self.src_lines[start_line][start_idx:])
+            full_lines = range(start_line + 1, end_line)
+            for line in full_lines:
+                result.append(self.src_lines[line])
+            result.append(self.src_lines[end_line][:end_idx])
+        return "\n".join(result)
+
+    @lru_cache(None)
+    def code_in_chunk(self, chunk: Union[Chunk, AstChunk]):
+        return self.code_between(
+            chunk.start_line, chunk.start_idx, chunk.end_line, chunk.end_idx
+        )
+
     def fill_up_to(self, chunk: Union[Chunk, AstChunk]) -> None:
         start_line, start_idx, _ = (chunk.start_line, chunk.start_idx, chunk.end_idx)
         if start_line == self.last_line:
@@ -100,16 +125,25 @@ def try_chunk(self, chunk: Union[Chunk, AstChunk]) -> None:
 
         Transformation function is free to decide to refuse conversion,
         e.g. in edge cases that are not supported."""
+
+        # if a chunk has a comment in it, we should abort.
+        if contains_comment(self.code_in_chunk(chunk)):
+            return
+
+        # skip raw strings
+        if self.code_in_chunk(chunk)[0] == "r":
+            return
+
+        # skip lines with # noqa comment
         for line in self.src_lines[chunk.start_line : chunk.end_line + 1]:
             if noqa_regex.findall(line):
-                # user does not wish for this line to be converted.
                 return
 
         try:
             quote_type = (
                 qt.double
                 if chunk.string_in_string and chunk.n_lines == 1
-                else chunk.quote_type
+                else get_quote_type(self.code_in_chunk(chunk))
             )
         except FlyntException:
             quote_type = qt.double
@@ -136,7 +170,10 @@ def maybe_replace(
 
         For example, we might not want to change multiple lines."""
         if contract_lines:
-            if get_quote_type(str(chunk)) in (qt.triple_double, qt.triple_single):
+            if get_quote_type(self.code_in_chunk(chunk)) in (
+                qt.triple_double,
+                qt.triple_single,
+            ):
                 lines = converted.split("\\n")
                 lines[-1] += rest
                 lines_fit = all(
@@ -196,9 +233,15 @@ def add_rest(self) -> None:
 
 def fstringify_code_by_line(code: str, state: State) -> Tuple[str, int]:
     """returns fstringified version of the code and amount of lines edited."""
+
+    def candidates(code, state):
+        chunks = percent_candidates(code, state) + call_candidates(code, state)
+        chunks.sort(key=lambda c: (c.start_line, c.start_idx))
+        return chunks
+
     return _transform_code(
         code,
-        partial(split.get_fstringify_chunks, lexer_context=state.lexer_context),
+        partial(candidates, state=state),
         partial(transform_chunk, state=state),
         state,
     )

diff --git a/src/flynt/transform/transform.py b/src/flynt/transform/transform.py
@@ -1,6 +1,7 @@
 import ast
 import copy
 import logging
+import traceback
 from typing import Tuple
 
 from flynt.exceptions import ConversionRefused
@@ -37,6 +38,7 @@ def transform_chunk(
         state.invalid_conversions += 1
         return code, False
     except Exception:
+        traceback.print_exc()
         log.exception("Exception during conversion of code '%s'", code)
         state.invalid_conversions += 1
         return code, False

diff --git a/src/flynt/utils.py b/src/flynt/utils.py
@@ -1,4 +1,6 @@
 import ast
+import io
+import tokenize
 from typing import Optional
 
 import astor
@@ -77,3 +79,11 @@ def fixup_transformed(tree: ast.AST, quote_type: Optional[str] = None) -> str:
     new_code = new_code.replace("\n", "\\n")
     new_code = new_code.replace("\t", "\\t")
     return new_code
+
+
+def contains_comment(code: str) -> bool:
+    tokens = tokenize.generate_tokens(io.StringIO(code).readline)
+    for token in tokens:
+        if token.type == tokenize.COMMENT:
+            return True
+    return False
diff --git a/test/integration/utils.py b/test/integration/utils.py
@@ -10,6 +10,7 @@
     "multiline_limit.py",
 }
 samples = {p.name for p in (int_test_path / "samples_in").glob("*.py")} - EXCLUDED
+# samples = {"multiline_1.py"}
 concat_samples = {p.name for p in (int_test_path / "samples_in_concat").glob("*.py")}
 
 

diff --git a/test/test_code_editor.py b/test/test_code_editor.py
@@ -0,0 +1,35 @@
+import pytest
+
+from flynt.candidates import split
+from flynt.code_editor import CodeEditor
+from flynt.format import get_quote_type
+
+s0 = """'%s' % (
+                    v['key'])"""
+s1 = """s = '%s' % (
+                    v['key'])"""
+
+s2 = """\"%(a)-6d %(a)s" % d"""
+
+
+@pytest.mark.parametrize(
+    "s_in",
+    [s1, s2],
+)
+def test_code_between_qoute_types(s_in):
+
+    chunk = set(split.get_fstringify_chunks(s_in)).pop()
+    editor = CodeEditor(s_in, None, lambda *args: None, None)
+
+    assert get_quote_type(editor.code_in_chunk(chunk)) == get_quote_type(str(chunk))
+
+
+@pytest.mark.parametrize(
+    "s_in",
+    [s0, s2],
+)
+def test_code_between_exact(s_in):
+    chunk = set(split.get_fstringify_chunks(s_in)).pop()
+    editor = CodeEditor(s_in, None, lambda *args: None, None)
+
+    assert editor.code_in_chunk(chunk) == s_in