Skip to content

Commit

Permalink
Improve catch-all handling and avoid some pylint suppressions (#1919)
Browse files Browse the repository at this point in the history
## Changes

This PR is stacked on top of #1905; changes include:

 - Avoid some pylint-suppressions that aren't really necessary.
- Improve logging when catch-all handlers are hit so we can better
understand what went wrong.
- When an internal error occurs during magic-processing, work already
completed is returned instead of abandoned.

Incidental changes include some more type annotations amongst the unit
tests.

### Tests

- manually tested
- added unit tests

---------

Co-authored-by: Eric Vergnaud <[email protected]>
Co-authored-by: Eric Vergnaud <[email protected]>
Co-authored-by: Serge Smertin <[email protected]>
  • Loading branch information
4 people authored Jul 10, 2024
1 parent 65fdd02 commit 6dfcaaf
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 8 deletions.
17 changes: 11 additions & 6 deletions src/databricks/labs/ucx/source_code/notebooks/cells.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from pathlib import Path

from astroid import Call, Const, ImportFrom, Name, NodeNG # type: ignore
from astroid.exceptions import AstroidSyntaxError # type: ignore
from sqlglot import parse as parse_sql, ParseError as SQLParseError

from databricks.sdk.service.workspace import Language
Expand Down Expand Up @@ -403,7 +404,8 @@ def build_graph_from_python_source(self, python_code: str) -> list[DependencyPro
problems: list[DependencyProblem] = []
try:
tree = Tree.normalize_and_parse(python_code)
except Exception as e: # pylint: disable=broad-except
except AstroidSyntaxError as e:
logger.debug(f"Could not parse Python code: {python_code}", exc_info=True)
problems.append(DependencyProblem('parse-error', f"Could not parse Python code: {e}"))
return problems
syspath_changes = SysPathChange.extract_from_tree(self._context.session_state, tree)
Expand Down Expand Up @@ -485,11 +487,11 @@ def extract_from_tree(
nodes = tree.locate(Call, [("magic_command", Name)])
for command in cls._make_commands_for_magic_command_call_nodes(nodes):
commands.append(command)
return commands, problems
except Exception as e: # pylint: disable=broad-except
logger.debug(f"Internal error while checking magic commands in tree: {tree.root}", exc_info=True)
problem = problem_factory('internal-error', f"While checking magic commands: {e}", tree.root)
problems.append(problem)
return [], problems
return commands, problems

@classmethod
def _make_commands_for_magic_command_call_nodes(cls, nodes: list[Call]):
Expand Down Expand Up @@ -527,8 +529,11 @@ def build_dependency_graph(self, graph: DependencyGraph) -> list[DependencyProbl
return [DependencyProblem("library-install-failed", "Missing arguments after 'pip install'")]
return graph.register_library(*argv[2:]) # Skipping %pip install

@staticmethod
def _split(code) -> list[str]:
# Cache re-used regex (and ensure issues are raised during class init instead of upon first use).
_splitter = re.compile(r"(?<!\\)\n")

@classmethod
def _split(cls, code: str) -> list[str]:
"""Split pip cell code into multiple arguments
Note:
Expand All @@ -537,7 +542,7 @@ def _split(code) -> list[str]:
Sources:
https://docs.databricks.com/en/libraries/notebooks-python-libraries.html#manage-libraries-with-pip-commands
"""
match = re.search(r"(?<!\\)\n", code)
match = cls._splitter.search(code)
if match:
code = code[: match.start()] # Remove code after non-escaped newline
code = code.replace("\\\n", " ")
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def __repr__(self):


@pytest.fixture
def mock_path_lookup():
def mock_path_lookup() -> PathLookup:
return MockPathLookup()


Expand Down
3 changes: 2 additions & 1 deletion tests/unit/source_code/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from databricks.labs.ucx.source_code.known import KnownList
from databricks.labs.ucx.source_code.linters.files import ImportFileResolver, FileLoader
from databricks.labs.ucx.source_code.notebooks.loaders import NotebookLoader, NotebookResolver
from databricks.labs.ucx.source_code.path_lookup import PathLookup
from databricks.labs.ucx.source_code.python_libraries import PythonLibraryResolver


Expand Down Expand Up @@ -48,7 +49,7 @@ def extended_test_index():


@pytest.fixture
def simple_dependency_resolver(mock_path_lookup):
def simple_dependency_resolver(mock_path_lookup: PathLookup) -> DependencyResolver:
allow_list = KnownList()
library_resolver = PythonLibraryResolver(allow_list)
notebook_resolver = NotebookResolver(NotebookLoader())
Expand Down
63 changes: 63 additions & 0 deletions tests/unit/source_code/notebooks/test_cells.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,15 @@
from databricks.labs.ucx.source_code.linters.files import FileLoader, ImportFileResolver
from databricks.labs.ucx.source_code.linters.python_ast import Tree
from databricks.labs.ucx.source_code.notebooks.cells import CellLanguage, PipCell, PipMagic, MagicCommand
from databricks.labs.ucx.source_code.notebooks.cells import (
GraphBuilder,
PythonCell,
)
from databricks.labs.ucx.source_code.notebooks.loaders import (
NotebookResolver,
NotebookLoader,
)
from databricks.labs.ucx.source_code.path_lookup import PathLookup
from databricks.labs.ucx.source_code.python_libraries import PythonLibraryResolver
from databricks.labs.ucx.source_code.known import KnownList

Expand Down Expand Up @@ -167,6 +172,58 @@ def test_pip_cell_build_dependency_graph_handles_multiline_code():
graph.register_library.assert_called_once_with("databricks")


def test_graph_builder_parse_error(
simple_dependency_resolver: DependencyResolver, mock_path_lookup: PathLookup
) -> None:
"""Check that internal parsing errors are caught and logged."""
# Fixture.
dependency = Dependency(FileLoader(), Path(""))
graph = DependencyGraph(dependency, None, simple_dependency_resolver, mock_path_lookup, CurrentSessionState())
graph.new_graph_builder_context()
builder = GraphBuilder(graph.new_graph_builder_context())

# Run the test.
problems = builder.build_graph_from_python_source("this is not valid python")

# Check results.
assert [
problem
for problem in problems
if problem.code == "parse-error" and problem.message.startswith("Could not parse Python code")
]


def test_parses_python_cell_with_magic_commands(simple_dependency_resolver, mock_path_lookup):
code = """
a = 'something'
%pip install databricks
b = 'else'
"""
cell = PythonCell(code, original_offset=1)
dependency = Dependency(FileLoader(), Path(""))
graph = DependencyGraph(dependency, None, simple_dependency_resolver, mock_path_lookup, CurrentSessionState())
problems = cell.build_dependency_graph(graph)
assert not problems


@pytest.mark.xfail(reason="Line-magic as an expression is not supported ", strict=True)
def test_python_cell_with_expression_magic(
simple_dependency_resolver: DependencyResolver, mock_path_lookup: PathLookup
) -> None:
"""Line magic (%) can be used in places where expressions are expected; check that this is handled."""
# Fixture
code = "current_directory = %pwd"
cell = PythonCell(code, original_offset=1)
dependency = Dependency(FileLoader(), Path(""))
graph = DependencyGraph(dependency, None, simple_dependency_resolver, mock_path_lookup, CurrentSessionState())

# Run the test
problems = cell.build_dependency_graph(graph)

# Verify there were no problems.
assert not problems


@pytest.mark.parametrize(
"code,split",
[
Expand All @@ -193,6 +250,12 @@ def test_pip_cell_build_dependency_graph_handles_multiline_code():
],
)
def test_pip_magic_split(code, split):
# Avoid direct protected access to the _split method.
class _PipMagicFriend(PipMagic):
@classmethod
def split(cls, code: str) -> list[str]:
return cls._split(code)

assert PipMagic._split(code) == split # pylint: disable=protected-access


Expand Down

0 comments on commit 6dfcaaf

Please sign in to comment.