Skip to content

Commit

Permalink
Use ast to find candidate fstring expressions (#183)
Browse files Browse the repository at this point in the history
* draft code between

* fix range

* rufff?

* code_in_chunk + test

* test cases for code_between

* fix line num, more tests

* rely more on original source code - for quote type

* replace unneeded generator

* ruff

* WIP 106 failing

* WIP 20 failing

* WIP 8 failing

* WIP 6 failing

* WIP 4 failing

* WIP 2 failing

* it works!

* rufff

* black!

* drop python 3.7; use legacy @cache
  • Loading branch information
ikamensh authored Jun 16, 2023
1 parent 0696643 commit cc98a97
Show file tree
Hide file tree
Showing 13 changed files with 200 additions and 28 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12-dev"]
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12-dev"]
os: [ubuntu-latest, macOS-latest, windows-latest]
steps:
- uses: actions/checkout@v3
Expand Down
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
#### v.1.0.0

Drop support for python 3.7.

##### Moved % and .format expression identification to `ast` instead of legacy token state machine.
This has led to small changes in formatting of output code, e.g. type of quotes in ambiguous cases
might have changed. Example:
`'first part {}'"second part {}".format(one, two)` used to result in `"` quotes,
and now results in `'`, as in `f'first part {one}second part {two}'`. I think it's a minor change
in the output. At the same time it's a huge simplification of the source code that should help
maintain and develop this project in the future.


#### v.0.77

*[Contributed by Aarni Koskela]* `--transform-joins` (`-tj`) will transform string join operations on static operands
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ keywords = [
classifiers = [
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
Expand Down
2 changes: 1 addition & 1 deletion src/flynt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from old "%-formatted" and .format(...) strings into Python 3.6+'s f-strings.
Learn more about f-strings at https://www.python.org/dev/peps/pep-0498/"""

__version__ = "0.78"
__version__ = "1.0.0"

from flynt.cli import main

Expand Down
40 changes: 40 additions & 0 deletions src/flynt/candidates/ast_call_candidates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import ast
from typing import List

from flynt.state import State

from .ast_chunk import AstChunk


def is_call_format(node):
return (
isinstance(node, ast.Call)
and isinstance(node.func, ast.Attribute)
and node.func.attr == "format"
and isinstance(node.func.value, (ast.Str, ast.Name))
)


class CallFmtFinder(ast.NodeVisitor):
def __init__(self) -> None:
super().__init__()
self.candidates: List[AstChunk] = []

def visit_Call(self, node: ast.Call) -> None:
"""
Finds all nodes that are string concatenations with a literal.
"""
if is_call_format(node):
self.candidates.append(AstChunk(node))
else:
self.generic_visit(node)


def call_candidates(code: str, state: State) -> List[AstChunk]:
tree = ast.parse(code)

finder = CallFmtFinder()
finder.visit(tree)

state.call_candidates += len(finder.candidates)
return finder.candidates
4 changes: 1 addition & 3 deletions src/flynt/candidates/ast_chunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

import ast

from flynt.format import QuoteTypes


class AstChunk:
def __init__(self, node: ast.AST) -> None:
Expand Down Expand Up @@ -39,7 +37,7 @@ def string_in_string(self) -> bool:

@property
def quote_type(self) -> str:
return QuoteTypes.double
raise NotImplementedError

def __str__(self) -> str:
from flynt.utils import ast_to_string
Expand Down
40 changes: 40 additions & 0 deletions src/flynt/candidates/ast_percent_candidates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import ast
from typing import List

from flynt.state import State

from .ast_chunk import AstChunk


def is_percent_format(node):
return (
isinstance(node, ast.BinOp)
and isinstance(node.op, ast.Mod)
and isinstance(node.left, ast.Str)
)


class PercentFmtFinder(ast.NodeVisitor):
def __init__(self) -> None:
super().__init__()
self.candidates: List[AstChunk] = []

def visit_BinOp(self, node: ast.BinOp) -> None:
"""
Finds all nodes that are string concatenations with a literal.
"""
if is_percent_format(node):
self.candidates.append(AstChunk(node))
else:
self.generic_visit(node)


def percent_candidates(code: str, state: State) -> List[AstChunk]:
tree = ast.parse(code)

finder = PercentFmtFinder()
finder.visit(tree)

state.percent_candidates += len(finder.candidates)

return finder.candidates
55 changes: 49 additions & 6 deletions src/flynt/code_editor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
import re
import string
import sys
from functools import partial
from functools import lru_cache, partial
from typing import Callable, List, Optional, Tuple, Union

from flynt.candidates import split
from flynt.candidates.ast_call_candidates import call_candidates
from flynt.candidates.ast_chunk import AstChunk
from flynt.candidates.ast_percent_candidates import percent_candidates
from flynt.candidates.chunk import Chunk
from flynt.exceptions import FlyntException
from flynt.format import QuoteTypes as qt
Expand All @@ -17,6 +18,7 @@
from flynt.string_concat.candidates import concat_candidates
from flynt.string_concat.transformer import transform_concat
from flynt.transform.transform import transform_chunk
from flynt.utils import contains_comment

noqa_regex = re.compile("#[ ]*noqa.*flynt")

Expand Down Expand Up @@ -74,6 +76,29 @@ def edit(self) -> Tuple[str, int]:
self.output = "".join(self.results)[:-1]
return self.output, self.count_expressions

def code_between(
self, start_line: int, start_idx: int, end_line: int, end_idx: int
) -> str:
"""get source code in the original between two locations."""
assert end_line >= start_line
result = []
if start_line == end_line:
assert end_idx >= start_idx
result.append(self.src_lines[start_line][start_idx:end_idx])
else:
result.append(self.src_lines[start_line][start_idx:])
full_lines = range(start_line + 1, end_line)
for line in full_lines:
result.append(self.src_lines[line])
result.append(self.src_lines[end_line][:end_idx])
return "\n".join(result)

@lru_cache(None)
def code_in_chunk(self, chunk: Union[Chunk, AstChunk]):
return self.code_between(
chunk.start_line, chunk.start_idx, chunk.end_line, chunk.end_idx
)

def fill_up_to(self, chunk: Union[Chunk, AstChunk]) -> None:
start_line, start_idx, _ = (chunk.start_line, chunk.start_idx, chunk.end_idx)
if start_line == self.last_line:
Expand All @@ -100,16 +125,25 @@ def try_chunk(self, chunk: Union[Chunk, AstChunk]) -> None:
Transformation function is free to decide to refuse conversion,
e.g. in edge cases that are not supported."""

# if a chunk has a comment in it, we should abort.
if contains_comment(self.code_in_chunk(chunk)):
return

# skip raw strings
if self.code_in_chunk(chunk)[0] == "r":
return

# skip lines with # noqa comment
for line in self.src_lines[chunk.start_line : chunk.end_line + 1]:
if noqa_regex.findall(line):
# user does not wish for this line to be converted.
return

try:
quote_type = (
qt.double
if chunk.string_in_string and chunk.n_lines == 1
else chunk.quote_type
else get_quote_type(self.code_in_chunk(chunk))
)
except FlyntException:
quote_type = qt.double
Expand All @@ -136,7 +170,10 @@ def maybe_replace(
For example, we might not want to change multiple lines."""
if contract_lines:
if get_quote_type(str(chunk)) in (qt.triple_double, qt.triple_single):
if get_quote_type(self.code_in_chunk(chunk)) in (
qt.triple_double,
qt.triple_single,
):
lines = converted.split("\\n")
lines[-1] += rest
lines_fit = all(
Expand Down Expand Up @@ -196,9 +233,15 @@ def add_rest(self) -> None:

def fstringify_code_by_line(code: str, state: State) -> Tuple[str, int]:
"""returns fstringified version of the code and amount of lines edited."""

def candidates(code, state):
chunks = percent_candidates(code, state) + call_candidates(code, state)
chunks.sort(key=lambda c: (c.start_line, c.start_idx))
return chunks

return _transform_code(
code,
partial(split.get_fstringify_chunks, lexer_context=state.lexer_context),
partial(candidates, state=state),
partial(transform_chunk, state=state),
state,
)
Expand Down
2 changes: 2 additions & 0 deletions src/flynt/transform/transform.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import ast
import copy
import logging
import traceback
from typing import Tuple

from flynt.exceptions import ConversionRefused
Expand Down Expand Up @@ -37,6 +38,7 @@ def transform_chunk(
state.invalid_conversions += 1
return code, False
except Exception:
traceback.print_exc()
log.exception("Exception during conversion of code '%s'", code)
state.invalid_conversions += 1
return code, False
Expand Down
10 changes: 10 additions & 0 deletions src/flynt/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import ast
import io
import tokenize
from typing import Optional

import astor
Expand Down Expand Up @@ -77,3 +79,11 @@ def fixup_transformed(tree: ast.AST, quote_type: Optional[str] = None) -> str:
new_code = new_code.replace("\n", "\\n")
new_code = new_code.replace("\t", "\\t")
return new_code


def contains_comment(code: str) -> bool:
tokens = tokenize.generate_tokens(io.StringIO(code).readline)
for token in tokens:
if token.type == tokenize.COMMENT:
return True
return False
1 change: 1 addition & 0 deletions test/integration/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
"multiline_limit.py",
}
samples = {p.name for p in (int_test_path / "samples_in").glob("*.py")} - EXCLUDED
# samples = {"multiline_1.py"}
concat_samples = {p.name for p in (int_test_path / "samples_in_concat").glob("*.py")}


Expand Down
35 changes: 35 additions & 0 deletions test/test_code_editor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import pytest

from flynt.candidates import split
from flynt.code_editor import CodeEditor
from flynt.format import get_quote_type

s0 = """'%s' % (
v['key'])"""
s1 = """s = '%s' % (
v['key'])"""

s2 = """\"%(a)-6d %(a)s" % d"""


@pytest.mark.parametrize(
"s_in",
[s1, s2],
)
def test_code_between_qoute_types(s_in):

chunk = set(split.get_fstringify_chunks(s_in)).pop()
editor = CodeEditor(s_in, None, lambda *args: None, None)

assert get_quote_type(editor.code_in_chunk(chunk)) == get_quote_type(str(chunk))


@pytest.mark.parametrize(
"s_in",
[s0, s2],
)
def test_code_between_exact(s_in):
chunk = set(split.get_fstringify_chunks(s_in)).pop()
editor = CodeEditor(s_in, None, lambda *args: None, None)

assert editor.code_in_chunk(chunk) == s_in
Loading

0 comments on commit cc98a97

Please sign in to comment.