Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix skipping Jupyter cells with unknown %% magic #4462

Merged
merged 3 commits into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

- Fix crashes involving comments in parenthesised return types or `X | Y` style unions.
(#4453)
- Fix skipping Jupyter cells with unknown `%%` magic (#4462)

### Preview style

Expand Down
28 changes: 1 addition & 27 deletions src/black/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,12 @@
)
from black.handle_ipynb_magics import (
PYTHON_CELL_MAGICS,
TRANSFORMED_MAGICS,
jupyter_dependencies_are_installed,
mask_cell,
put_trailing_semicolon_back,
remove_trailing_semicolon,
unmask_cell,
validate_cell,
)
from black.linegen import LN, LineGenerator, transform_line
from black.lines import EmptyLineTracker, LinesBlock
Expand Down Expand Up @@ -1084,32 +1084,6 @@ def format_file_contents(
return dst_contents


def validate_cell(src: str, mode: Mode) -> None:
"""Check that cell does not already contain TransformerManager transformations,
or non-Python cell magics, which might cause tokenizer_rt to break because of
indentations.

If a cell contains ``!ls``, then it'll be transformed to
``get_ipython().system('ls')``. However, if the cell originally contained
``get_ipython().system('ls')``, then it would get transformed in the same way:

>>> TransformerManager().transform_cell("get_ipython().system('ls')")
"get_ipython().system('ls')\n"
>>> TransformerManager().transform_cell("!ls")
"get_ipython().system('ls')\n"

Due to the impossibility of safely roundtripping in such situations, cells
containing transformed magics will be ignored.
"""
if any(transformed_magic in src for transformed_magic in TRANSFORMED_MAGICS):
raise NothingChanged
if (
src[:2] == "%%"
and src.split()[0][2:] not in PYTHON_CELL_MAGICS | mode.python_cell_magics
):
raise NothingChanged


def format_cell(src: str, *, fast: bool, mode: Mode) -> str:
"""Format code in given cell of Jupyter notebook.

Expand Down
45 changes: 45 additions & 0 deletions src/black/handle_ipynb_magics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import ast
import collections
import dataclasses
import re
import secrets
import sys
from functools import lru_cache
Expand All @@ -14,6 +15,7 @@
else:
from typing_extensions import TypeGuard

from black.mode import Mode
from black.output import out
from black.report import NothingChanged

Expand Down Expand Up @@ -64,6 +66,34 @@ def jupyter_dependencies_are_installed(*, warn: bool) -> bool:
return installed


def validate_cell(src: str, mode: Mode) -> None:
"""Check that cell does not already contain TransformerManager transformations,
or non-Python cell magics, which might cause tokenizer_rt to break because of
indentations.

If a cell contains ``!ls``, then it'll be transformed to
``get_ipython().system('ls')``. However, if the cell originally contained
``get_ipython().system('ls')``, then it would get transformed in the same way:

>>> TransformerManager().transform_cell("get_ipython().system('ls')")
"get_ipython().system('ls')\n"
>>> TransformerManager().transform_cell("!ls")
"get_ipython().system('ls')\n"

Due to the impossibility of safely roundtripping in such situations, cells
containing transformed magics will be ignored.
"""
if any(transformed_magic in src for transformed_magic in TRANSFORMED_MAGICS):
raise NothingChanged

line = _get_code_start(src)
if line.startswith("%%") and (
line.split(maxsplit=1)[0][2:]
not in PYTHON_CELL_MAGICS | mode.python_cell_magics
):
raise NothingChanged


def remove_trailing_semicolon(src: str) -> tuple[str, bool]:
"""Remove trailing semicolon from Jupyter notebook cell.

Expand Down Expand Up @@ -276,6 +306,21 @@ def unmask_cell(src: str, replacements: list[Replacement]) -> str:
return src


def _get_code_start(src: str) -> str:
"""Provides the first line where the code starts.

Iterates over lines of code until it finds the first line that doesn't
contain only empty spaces and comments. It removes any empty spaces at the
start of the line and returns it. If such line doesn't exist, it returns an
empty string.
"""
for match in re.finditer(".+", src):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
for match in re.finditer(".+", src):
for match in src.splitlines():

Any reason not to use this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There might be a slight performance difference because re.finditer will stop splitting string once the first line with code is found while src.splitlines() will always split the entire string by lines. But I haven't benchmarked anything and this probably won't affect much the performance of the entire formatting process.

I'm also ok with switching to src.splitlines() if that is the preferred way.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, I guess this is fine.

line = match.group(0).lstrip()
if line and not line.startswith("#"):
return line
return ""


def _is_ipython_magic(node: ast.expr) -> TypeGuard[ast.Attribute]:
"""Check if attribute is IPython magic.

Expand Down
16 changes: 16 additions & 0 deletions tests/test_ipynb.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,22 @@ def test_cell_magic_with_custom_python_magic(
assert result == expected_output


@pytest.mark.parametrize(
"src",
(
" %%custom_magic \nx=2",
"\n\n%%custom_magic\nx=2",
"# comment\n%%custom_magic\nx=2",
"\n \n # comment with %%time\n\t\n %%custom_magic # comment \nx=2",
),
)
def test_cell_magic_with_custom_python_magic_after_spaces_and_comments_noop(
src: str,
) -> None:
with pytest.raises(NothingChanged):
format_cell(src, fast=True, mode=JUPYTER_MODE)


def test_cell_magic_nested() -> None:
src = "%%time\n%%time\n2+2"
result = format_cell(src, fast=True, mode=JUPYTER_MODE)
Expand Down
Loading