diff --git a/src/flynt/candidates/PyToken.py b/src/flynt/candidates/PyToken.py deleted file mode 100644 index b268633..0000000 --- a/src/flynt/candidates/PyToken.py +++ /dev/null @@ -1,68 +0,0 @@ -import token -from tokenize import TokenInfo -from typing import Optional, Tuple - -from flynt.exceptions import FlyntException -from flynt.format import QuoteTypes - -line_num = int -char_idx = int - - -class PyToken: - def __init__(self, t: TokenInfo) -> None: - toknum, tokval, start, end, line = t - self.toknum: int = toknum - self.tokval: str = tokval - self.start: Tuple[line_num, char_idx] = start - self.end: Tuple[line_num, char_idx] = end - - def is_percent_op(self) -> bool: - return self.toknum == token.OP and self.tokval == "%" - - def is_expr_continuation_op(self) -> bool: - return ( - self.is_sq_brack_op() - or self.is_paren_op() - or self.is_dot_op() - or self.is_exponentiation_op() - ) - - def is_sq_brack_op(self) -> bool: - return self.toknum == token.OP and self.tokval == "[" - - def is_dot_op(self) -> bool: - return self.toknum == token.OP and self.tokval == "." - - def is_paren_op(self) -> bool: - return self.toknum == token.OP and self.tokval == "(" - - def is_exponentiation_op(self) -> bool: - return self.toknum == token.OP and self.tokval == "**" - - def is_string(self) -> bool: - return self.toknum == token.STRING - - def get_quote_type(self) -> Optional[str]: - if self.toknum is not token.STRING: - return None - - for qt in QuoteTypes.all: - if self.tokval[: len(qt)] == qt and self.tokval[-len(qt) :] == qt: - return qt - - if self.is_legacy_unicode_string(): - for qt in QuoteTypes.all: - if self.tokval[1 : len(qt) + 1] == qt and self.tokval[-len(qt) :] == qt: - return qt - - raise FlyntException(f"Can't determine quote type of the string {self.tokval}.") - - def is_legacy_unicode_string(self) -> bool: - return self.toknum == token.STRING and self.tokval[0] == "u" - - def is_raw_string(self) -> bool: - return self.toknum == token.STRING and self.tokval[0] == "r" - - def __repr__(self): - return f"PyToken {self.toknum} : {self.tokval}" diff --git a/src/flynt/candidates/chunk.py b/src/flynt/candidates/chunk.py deleted file mode 100644 index e5b8b74..0000000 --- a/src/flynt/candidates/chunk.py +++ /dev/null @@ -1,171 +0,0 @@ -import ast -from collections import deque -from typing import Deque, Iterable, Iterator, Optional - -from flynt.candidates.context import LexerContext -from flynt.candidates.PyToken import PyToken - -REUSE = "Token was not used" - - -class Chunk: - def __init__( - self, - tokens: Iterable[PyToken] = (), - *, - lexer_context: LexerContext, - ) -> None: - self.lexer_context = lexer_context - - self.tokens: Deque[PyToken] = deque(tokens) - self.complete = False - - self.is_percent_chunk = False - self.percent_ongoing = False - - self.is_call_chunk = False - self.successful = False - - self.string_in_string = False - - def empty_append(self, t: PyToken) -> None: - if not t.is_string() or t.is_raw_string(): - self.complete = True - - self.tokens.append(t) - - def second_append(self, t: PyToken) -> None: - if t.is_string(): - self.tokens[0].tokval += t.tokval - self.tokens[0].end = t.end - elif t.is_percent_op(): - self.tokens.append(t) - self.is_percent_chunk = True - elif t.is_dot_op(): - self.tokens.append(t) - self.is_call_chunk = True - else: - self.tokens.append(t) - self.complete = True - - def percent_append(self, t: PyToken) -> Optional[str]: - # todo handle all cases? - if not self[0].is_string(): - self.complete = True - return None - - if len(self) == 2: - self.tokens.append(t) - if self.is_parseable: - self.successful = True - else: - self.percent_ongoing = True - - else: - if self.percent_ongoing: - self.tokens.append(t) - if t.is_string() and "{" not in str(self): - self.string_in_string = True - if self.is_parseable: - self.percent_ongoing = False - self.successful = True - elif t.is_expr_continuation_op(): - self.tokens.append(t) - self.percent_ongoing = True - else: - self.complete = True - self.successful = self.is_parseable - return REUSE - return None - - def call_append(self, t: PyToken) -> None: - if t.is_string(): - self.string_in_string = True - - if len(self) == 2 and t.tokval != "format": - self.complete = True - self.successful = False - return - - self.tokens.append(t) - if len(self) > 3 and self.is_parseable: - self.complete = True - self.successful = True - - def append(self, t: PyToken) -> Optional[str]: - # stop on a comment or too long chunk - if t.toknum in self.lexer_context.break_tokens: - self.complete = True - self.successful = self.is_parseable and ( - self.is_percent_chunk or self.is_call_chunk - ) - return None - - if len(self) > 50: - self.complete = True - self.successful = False - return None - - if t.toknum in self.lexer_context.skip_tokens: - return None - - if len(self) == 0: - self.empty_append(t) - elif not (self.is_call_chunk or self.is_percent_chunk): - self.second_append(t) - elif self.is_call_chunk: - self.call_append(t) - else: - return self.percent_append(t) - return None - - @property - def is_parseable(self) -> bool: - if len(self.tokens) < 1: - return False - try: - ast.parse(str(self)) - return True - except SyntaxError: - return False - - @property - def start_line(self) -> int: - return self.tokens[0].start[0] - 1 - - @property - def start_idx(self) -> int: - return self.tokens[0].start[1] - - @property - def end_idx(self) -> int: - return self.tokens[-1].end[1] - - @property - def end_line(self) -> int: - return self.tokens[-1].end[0] - 1 - - @property - def n_lines(self) -> int: - return 1 + self.end_line - self.start_line - - @property - def quote_type(self) -> Optional[str]: - return self.tokens[0].get_quote_type() - - def __getitem__(self, item: int) -> PyToken: - return self.tokens[item] - - def __iter__(self) -> Iterator[PyToken]: - return iter(self.tokens) - - def __len__(self) -> int: - return len(self.tokens) - - def __str__(self) -> str: - return " ".join(t.tokval for t in self) - - def __repr__(self): - if self.tokens: - return f"Chunk: {self}" - return "Empty Chunk" diff --git a/src/flynt/candidates/context.py b/src/flynt/candidates/context.py deleted file mode 100644 index 26b9e56..0000000 --- a/src/flynt/candidates/context.py +++ /dev/null @@ -1,23 +0,0 @@ -import dataclasses -import token -from typing import FrozenSet - - -@dataclasses.dataclass(frozen=True) -class LexerContext: - skip_tokens: FrozenSet[int] - break_tokens: FrozenSet[int] - multiline: bool - - -single_line_context = LexerContext( - skip_tokens=frozenset(), - break_tokens=frozenset((token.COMMENT, token.NEWLINE, token.NL)), - multiline=False, -) - -multi_line_context = LexerContext( - skip_tokens=frozenset((token.NEWLINE, token.NL)), - break_tokens=frozenset((token.COMMENT,)), - multiline=True, -) diff --git a/src/flynt/candidates/split.py b/src/flynt/candidates/split.py deleted file mode 100644 index ecbd1c1..0000000 --- a/src/flynt/candidates/split.py +++ /dev/null @@ -1,62 +0,0 @@ -import io -import logging -import tokenize -from typing import Generator - -from flynt.candidates.chunk import Chunk -from flynt.candidates.context import LexerContext, multi_line_context -from flynt.candidates.PyToken import PyToken - -log = logging.getLogger(__name__) - - -def get_chunks( - code: str, - *, - lexer_context: LexerContext, -) -> Generator[Chunk, None, None]: - g = tokenize.tokenize(io.BytesIO(code.encode("utf-8")).readline) - chunk = Chunk(lexer_context=lexer_context) - - try: - for item in g: - t = PyToken(item) - reuse = chunk.append(t) - - if chunk.complete: - - yield chunk - chunk = Chunk(lexer_context=lexer_context) - if reuse: - reuse = chunk.append(t) - # assert not reuse - if chunk.complete: - yield chunk - chunk = Chunk(lexer_context=lexer_context) - - yield chunk - except tokenize.TokenError as e: - log.error( - f"TokenError: {e}", - exc_info=True, - ) - - -def get_fstringify_chunks( - code: str, - lexer_context: LexerContext = multi_line_context, -) -> Generator[Chunk, None, None]: - """ - A generator yielding Chunks of the code where fstring can be formed. - """ - last_concat = False - - for chunk in get_chunks(code, lexer_context=lexer_context): - if chunk.successful and not last_concat: - yield chunk - - if len(chunk) and chunk[-1].is_string(): - last_concat = True - else: - if lexer_context.multiline or len(chunk) > 0: - last_concat = False diff --git a/src/flynt/code_editor.py b/src/flynt/code_editor.py index e2a3221..f71e804 100644 --- a/src/flynt/code_editor.py +++ b/src/flynt/code_editor.py @@ -3,12 +3,11 @@ import string import sys from functools import lru_cache, partial -from typing import Callable, List, Optional, Tuple, Union +from typing import Callable, List, Optional, Tuple from flynt.candidates.ast_call_candidates import call_candidates from flynt.candidates.ast_chunk import AstChunk from flynt.candidates.ast_percent_candidates import percent_candidates -from flynt.candidates.chunk import Chunk from flynt.exceptions import FlyntException from flynt.format import QuoteTypes as qt from flynt.format import get_quote_type @@ -94,12 +93,12 @@ def code_between( return "\n".join(result) @lru_cache(None) - def code_in_chunk(self, chunk: Union[Chunk, AstChunk]): + def code_in_chunk(self, chunk: AstChunk): return self.code_between( chunk.start_line, chunk.start_idx, chunk.end_line, chunk.end_idx ) - def fill_up_to(self, chunk: Union[Chunk, AstChunk]) -> None: + def fill_up_to(self, chunk: AstChunk) -> None: start_line, start_idx, _ = (chunk.start_line, chunk.start_idx, chunk.end_idx) if start_line == self.last_line: self.results.append( @@ -120,7 +119,7 @@ def fill_up_to_line(self, line: int) -> None: self.results.append(self.src_lines[self.last_line] + "\n") self.last_line += 1 - def try_chunk(self, chunk: Union[Chunk, AstChunk]) -> None: + def try_chunk(self, chunk: AstChunk) -> None: """Try applying a transform to a chunk of code. Transformation function is free to decide to refuse conversion, @@ -161,7 +160,7 @@ def try_chunk(self, chunk: Union[Chunk, AstChunk]) -> None: def maybe_replace( self, - chunk: Union[Chunk, AstChunk], + chunk: AstChunk, contract_lines: int, converted: str, rest: str, @@ -231,17 +230,18 @@ def add_rest(self) -> None: self.last_line += 1 +def fstring_candidates(code, state): + chunks = percent_candidates(code, state) + call_candidates(code, state) + chunks.sort(key=lambda c: (c.start_line, c.start_idx)) + return chunks + + def fstringify_code_by_line(code: str, state: State) -> Tuple[str, int]: """returns fstringified version of the code and amount of lines edited.""" - def candidates(code, state): - chunks = percent_candidates(code, state) + call_candidates(code, state) - chunks.sort(key=lambda c: (c.start_line, c.start_idx)) - return chunks - return _transform_code( code, - partial(candidates, state=state), + partial(fstring_candidates, state=state), partial(transform_chunk, state=state), state, ) diff --git a/src/flynt/format.py b/src/flynt/format.py index f583c80..178d37b 100644 --- a/src/flynt/format.py +++ b/src/flynt/format.py @@ -4,8 +4,12 @@ import io import re +import token import tokenize -from typing import Optional +from tokenize import TokenInfo +from typing import Optional, Tuple + +from flynt.exceptions import FlyntException lonely_quote = re.compile(r"(? None: + toknum, tokval, start, end, line = t + self.toknum: int = toknum + self.tokval: str = tokval + self.start: Tuple[line_num, char_idx] = start + self.end: Tuple[line_num, char_idx] = end + + def get_quote_type(self) -> Optional[str]: + if self.toknum is not token.STRING: + return None + + for qt in QuoteTypes.all: + if self.tokval[: len(qt)] == qt and self.tokval[-len(qt) :] == qt: + return qt + + if self.is_legacy_unicode_string(): + for qt in QuoteTypes.all: + if self.tokval[1 : len(qt) + 1] == qt and self.tokval[-len(qt) :] == qt: + return qt + + raise FlyntException(f"Can't determine quote type of the string {self.tokval}.") + + def is_legacy_unicode_string(self) -> bool: + return self.toknum == token.STRING and self.tokval[0] == "u" + + def __repr__(self): + return f"PyToken {self.toknum} : {self.tokval}" + + def get_quote_type(code: str) -> Optional[str]: - from flynt.candidates.PyToken import PyToken g = tokenize.tokenize(io.BytesIO(code.encode("utf-8")).readline) next(g) - token = PyToken(next(g)) + t = PyToken(next(g)) - return token.get_quote_type() + return t.get_quote_type() def remove_quotes(code: str) -> str: diff --git a/src/flynt/state.py b/src/flynt/state.py index 988c16f..f93c835 100644 --- a/src/flynt/state.py +++ b/src/flynt/state.py @@ -3,12 +3,6 @@ import dataclasses from typing import Optional -from flynt.candidates.context import ( - LexerContext, - multi_line_context, - single_line_context, -) - @dataclasses.dataclass class State: @@ -42,9 +36,3 @@ class State: def __post_init__(self): if not self.multiline: self.len_limit = 0 - - @property - def lexer_context(self) -> LexerContext: - if self.multiline: - return multi_line_context - return single_line_context diff --git a/test/test_lexer.py b/test/test_candidates.py similarity index 59% rename from test/test_lexer.py rename to test/test_candidates.py index 4ae1bc6..b931a25 100644 --- a/test/test_lexer.py +++ b/test/test_candidates.py @@ -1,25 +1,32 @@ -from flynt.candidates import split +from functools import partial + +from flynt.candidates.ast_percent_candidates import percent_candidates +from flynt.code_editor import fstring_candidates +from flynt.state import State + +percent_candidates = partial(percent_candidates, state=State()) +fstring_candidates = partial(fstring_candidates, state=State()) def test_str_newline(): s_in = """a = '%s\\n' % var""" - generator = split.get_fstringify_chunks(s_in) - assert len(list(generator)) == 1 + candidates = percent_candidates(s_in) + assert len(list(candidates)) == 1 def test_triple(): s_in = """print("{}".format(Bar + 1), '%d' % var, "{s}".format(s=foo))""" - generator = split.get_fstringify_chunks(s_in) - assert len(list(generator)) == 3 + candidates = fstring_candidates(s_in) + assert len(list(candidates)) == 3 def test_one_string(): s = """"my string {}, but also {} and {}".format(var, f, cada_bra)""" - chunks_gen = split.get_fstringify_chunks(s) + chunks_gen = fstring_candidates(s) assert len(list(chunks_gen)) == 1 - generator = split.get_fstringify_chunks(s) - chunk = next(generator) + candidates = fstring_candidates(s) + chunk = next(iter(candidates)) assert chunk.start_line == 0 assert s[: chunk.end_idx] == s @@ -27,28 +34,27 @@ def test_one_string(): def test_yields_parsable(): code_in = """attrs = {'r': '{}'.format(row_idx)}""" - generator = split.get_fstringify_chunks(code_in) - chunk = next(generator) + candidates = fstring_candidates(code_in) + chunk = next(iter(candidates)) - assert chunk.is_parseable assert code_in[chunk.start_idx : chunk.end_idx] == "'{}'.format(row_idx)" def test_percent_attribute(): code_in = """src_info = 'application "%s"' % srcobj.import_name""" - generator = split.get_fstringify_chunks(code_in) - chunk = next(generator) + candidates = fstring_candidates(code_in) + chunk = next(iter(candidates)) expected = """'application "%s"' % srcobj.import_name""" assert code_in[chunk.start_idx : chunk.end_idx] == expected def test_percent_call(): - code_in = """"filename*": "UTF-8''%s" % url_quote(attachment_filename)""" + code_in = """{"filename*": "UTF-8''%s" % url_quote(attachment_filename)}""" - generator = split.get_fstringify_chunks(code_in) - chunk = next(generator) + candidates = fstring_candidates(code_in) + chunk = next(iter(candidates)) expected = """"UTF-8''%s" % url_quote(attachment_filename)""" assert code_in[chunk.start_idx : chunk.end_idx] == expected @@ -60,35 +66,37 @@ def test_two_strings(): + 'b = "my string {}, but also {} and {}".format(var, what, cada_bra)' ) - chunks_gen = split.get_fstringify_chunks(s) + chunks_gen = fstring_candidates(s) assert len(list(chunks_gen)) == 2 - generator = split.get_fstringify_chunks(s) + candidates = fstring_candidates(s) lines = s.split("\n") - chunk = next(generator) + chunk = candidates[0] assert chunk.start_line == 0 assert lines[0][: chunk.end_idx] == lines[0] - chunk = next(generator) + chunk = candidates[1] assert chunk.start_line == 1 assert lines[1][: chunk.end_idx] == lines[1] +indented = """ +var = 5 +if var % 3 == 0: + a = "my string {}".format(var)""".strip() + + def test_indented(): - indented = """ - var = 5 - if var % 3 == 0: - a = "my string {}".format(var)""".strip() - generator = split.get_fstringify_chunks(indented) - assert len(list(generator)) == 1 + candidates = fstring_candidates(indented) + assert len(list(candidates)) == 1 lines = indented.split("\n") - generator = split.get_fstringify_chunks(indented) - chunk = next(generator) + candidates = fstring_candidates(indented) + chunk = next(iter(candidates)) assert chunk.start_line == 2 assert lines[2][: chunk.end_idx] == lines[2] @@ -100,10 +108,10 @@ def write_row(self, xf, row, row_idx): attrs = {'r': '{}'.format(row_idx)}""".strip() - generator = split.get_fstringify_chunks(code_empty_line) + candidates = fstring_candidates(code_empty_line) lines = code_empty_line.split("\n") - chunk = next(generator) + chunk = next(iter(candidates)) assert chunk.start_line == 2 assert lines[2][chunk.start_idx : chunk.end_idx] == "'{}'.format(row_idx)" @@ -119,8 +127,8 @@ def write_row(self, xf, row, row_idx): def test_multiline(): - generator = split.get_fstringify_chunks(multiline_code) - assert len(list(generator)) == 1 + candidates = fstring_candidates(multiline_code) + assert len(list(candidates)) == 1 not_implicit_concat = """ @@ -129,8 +137,8 @@ def test_multiline(): def test_not_implicit_concat(): - generator = split.get_fstringify_chunks(not_implicit_concat) - assert len(list(generator)) == 1 + candidates = fstring_candidates(not_implicit_concat) + assert len(list(candidates)) == 1 line_continuation = """ @@ -139,8 +147,8 @@ def test_not_implicit_concat(): def test_line_continuation(): - generator = split.get_fstringify_chunks(line_continuation) - assert len(list(generator)) == 1 + candidates = fstring_candidates(line_continuation) + assert len(list(candidates)) == 1 tuple_in_list = """ @@ -150,14 +158,14 @@ def test_line_continuation(): def test_tuple_list(): - generator = split.get_fstringify_chunks(tuple_in_list) - assert len(list(generator)) == 1 + candidates = fstring_candidates(tuple_in_list) + assert len(list(candidates)) == 1 def test_indexed_percent(): code = 'return "Hello %s!" % flask.request.args[name]' - generator = split.get_fstringify_chunks(code) - chunk = next(generator) + candidates = fstring_candidates(code) + chunk = next(iter(candidates)) assert ( code[chunk.start_idx : chunk.end_idx] @@ -167,5 +175,5 @@ def test_indexed_percent(): def test_tuple_percent(): code = """print("%s %s " % (var+var, abc))""" - generator = split.get_fstringify_chunks(tuple_in_list) - assert len(list(generator)) == 1 + candidates = fstring_candidates(code) + assert len(list(candidates)) == 1 diff --git a/test/test_code_editor.py b/test/test_code_editor.py index b2043b9..3eeb8f8 100644 --- a/test/test_code_editor.py +++ b/test/test_code_editor.py @@ -1,35 +1,20 @@ import pytest -from flynt.candidates import split +from flynt.candidates.ast_percent_candidates import percent_candidates from flynt.code_editor import CodeEditor -from flynt.format import get_quote_type +from flynt.state import State s0 = """'%s' % ( v['key'])""" -s1 = """s = '%s' % ( - v['key'])""" - -s2 = """\"%(a)-6d %(a)s" % d""" - - -@pytest.mark.parametrize( - "s_in", - [s1, s2], -) -def test_code_between_qoute_types(s_in): - - chunk = set(split.get_fstringify_chunks(s_in)).pop() - editor = CodeEditor(s_in, None, lambda *args: None, None) - - assert get_quote_type(editor.code_in_chunk(chunk)) == get_quote_type(str(chunk)) +s1 = """\"%(a)-6d %(a)s" % d""" @pytest.mark.parametrize( "s_in", - [s0, s2], + [s0, s1], ) def test_code_between_exact(s_in): - chunk = set(split.get_fstringify_chunks(s_in)).pop() + chunk = set(percent_candidates(s_in, State())).pop() editor = CodeEditor(s_in, None, lambda *args: None, None) assert editor.code_in_chunk(chunk) == s_in diff --git a/test/test_styles.py b/test/test_styles.py index a4da333..3ad5834 100644 --- a/test/test_styles.py +++ b/test/test_styles.py @@ -3,27 +3,6 @@ import pytest from flynt.format import QuoteTypes, get_quote_type, set_quote_type -from flynt.candidates.context import multi_line_context -from flynt.candidates.split import get_chunks - - -@pytest.mark.parametrize( - argnames=["code", "quote_type"], - argvalues=[ - ("'abra'", QuoteTypes.single), - ('"bobro"', QuoteTypes.double), - ("'''abra'''", QuoteTypes.triple_single), - ('"""bobro"""', QuoteTypes.triple_double), - ], -) -def test_get_quote_type_token(code, quote_type): - - g = get_chunks(code, lexer_context=multi_line_context) - next(g) - chunk = next(g) - token = chunk.tokens[0] - - assert token.get_quote_type() == quote_type @pytest.mark.parametrize(