diff --git a/src/databricks/labs/ucx/source_code/notebooks/cells.py b/src/databricks/labs/ucx/source_code/notebooks/cells.py index 497aca7052..e190ae0422 100644 --- a/src/databricks/labs/ucx/source_code/notebooks/cells.py +++ b/src/databricks/labs/ucx/source_code/notebooks/cells.py @@ -10,6 +10,7 @@ from pathlib import Path from astroid import Call, Const, ImportFrom, Name, NodeNG # type: ignore +from astroid.exceptions import AstroidSyntaxError # type: ignore from sqlglot import parse as parse_sql, ParseError as SQLParseError from databricks.sdk.service.workspace import Language @@ -403,7 +404,8 @@ def build_graph_from_python_source(self, python_code: str) -> list[DependencyPro problems: list[DependencyProblem] = [] try: tree = Tree.normalize_and_parse(python_code) - except Exception as e: # pylint: disable=broad-except + except AstroidSyntaxError as e: + logger.debug(f"Could not parse Python code: {python_code}", exc_info=True) problems.append(DependencyProblem('parse-error', f"Could not parse Python code: {e}")) return problems syspath_changes = SysPathChange.extract_from_tree(self._context.session_state, tree) @@ -485,11 +487,11 @@ def extract_from_tree( nodes = tree.locate(Call, [("magic_command", Name)]) for command in cls._make_commands_for_magic_command_call_nodes(nodes): commands.append(command) - return commands, problems except Exception as e: # pylint: disable=broad-except + logger.debug(f"Internal error while checking magic commands in tree: {tree.root}", exc_info=True) problem = problem_factory('internal-error', f"While checking magic commands: {e}", tree.root) problems.append(problem) - return [], problems + return commands, problems @classmethod def _make_commands_for_magic_command_call_nodes(cls, nodes: list[Call]): @@ -527,8 +529,11 @@ def build_dependency_graph(self, graph: DependencyGraph) -> list[DependencyProbl return [DependencyProblem("library-install-failed", "Missing arguments after 'pip install'")] return graph.register_library(*argv[2:]) # Skipping %pip install - @staticmethod - def _split(code) -> list[str]: + # Cache re-used regex (and ensure issues are raised during class init instead of upon first use). + _splitter = re.compile(r"(? list[str]: """Split pip cell code into multiple arguments Note: @@ -537,7 +542,7 @@ def _split(code) -> list[str]: Sources: https://docs.databricks.com/en/libraries/notebooks-python-libraries.html#manage-libraries-with-pip-commands """ - match = re.search(r"(? PathLookup: return MockPathLookup() diff --git a/tests/unit/source_code/conftest.py b/tests/unit/source_code/conftest.py index 95ea18cb12..48d4544231 100644 --- a/tests/unit/source_code/conftest.py +++ b/tests/unit/source_code/conftest.py @@ -8,6 +8,7 @@ from databricks.labs.ucx.source_code.known import KnownList from databricks.labs.ucx.source_code.linters.files import ImportFileResolver, FileLoader from databricks.labs.ucx.source_code.notebooks.loaders import NotebookLoader, NotebookResolver +from databricks.labs.ucx.source_code.path_lookup import PathLookup from databricks.labs.ucx.source_code.python_libraries import PythonLibraryResolver @@ -48,7 +49,7 @@ def extended_test_index(): @pytest.fixture -def simple_dependency_resolver(mock_path_lookup): +def simple_dependency_resolver(mock_path_lookup: PathLookup) -> DependencyResolver: allow_list = KnownList() library_resolver = PythonLibraryResolver(allow_list) notebook_resolver = NotebookResolver(NotebookLoader()) diff --git a/tests/unit/source_code/notebooks/test_cells.py b/tests/unit/source_code/notebooks/test_cells.py index 6b5eeac7e1..6e0306dbf7 100644 --- a/tests/unit/source_code/notebooks/test_cells.py +++ b/tests/unit/source_code/notebooks/test_cells.py @@ -10,10 +10,15 @@ from databricks.labs.ucx.source_code.linters.files import FileLoader, ImportFileResolver from databricks.labs.ucx.source_code.linters.python_ast import Tree from databricks.labs.ucx.source_code.notebooks.cells import CellLanguage, PipCell, PipMagic, MagicCommand +from databricks.labs.ucx.source_code.notebooks.cells import ( + GraphBuilder, + PythonCell, +) from databricks.labs.ucx.source_code.notebooks.loaders import ( NotebookResolver, NotebookLoader, ) +from databricks.labs.ucx.source_code.path_lookup import PathLookup from databricks.labs.ucx.source_code.python_libraries import PythonLibraryResolver from databricks.labs.ucx.source_code.known import KnownList @@ -167,6 +172,58 @@ def test_pip_cell_build_dependency_graph_handles_multiline_code(): graph.register_library.assert_called_once_with("databricks") +def test_graph_builder_parse_error( + simple_dependency_resolver: DependencyResolver, mock_path_lookup: PathLookup +) -> None: + """Check that internal parsing errors are caught and logged.""" + # Fixture. + dependency = Dependency(FileLoader(), Path("")) + graph = DependencyGraph(dependency, None, simple_dependency_resolver, mock_path_lookup, CurrentSessionState()) + graph.new_graph_builder_context() + builder = GraphBuilder(graph.new_graph_builder_context()) + + # Run the test. + problems = builder.build_graph_from_python_source("this is not valid python") + + # Check results. + assert [ + problem + for problem in problems + if problem.code == "parse-error" and problem.message.startswith("Could not parse Python code") + ] + + +def test_parses_python_cell_with_magic_commands(simple_dependency_resolver, mock_path_lookup): + code = """ +a = 'something' +%pip install databricks +b = 'else' +""" + cell = PythonCell(code, original_offset=1) + dependency = Dependency(FileLoader(), Path("")) + graph = DependencyGraph(dependency, None, simple_dependency_resolver, mock_path_lookup, CurrentSessionState()) + problems = cell.build_dependency_graph(graph) + assert not problems + + +@pytest.mark.xfail(reason="Line-magic as an expression is not supported ", strict=True) +def test_python_cell_with_expression_magic( + simple_dependency_resolver: DependencyResolver, mock_path_lookup: PathLookup +) -> None: + """Line magic (%) can be used in places where expressions are expected; check that this is handled.""" + # Fixture + code = "current_directory = %pwd" + cell = PythonCell(code, original_offset=1) + dependency = Dependency(FileLoader(), Path("")) + graph = DependencyGraph(dependency, None, simple_dependency_resolver, mock_path_lookup, CurrentSessionState()) + + # Run the test + problems = cell.build_dependency_graph(graph) + + # Verify there were no problems. + assert not problems + + @pytest.mark.parametrize( "code,split", [ @@ -193,6 +250,12 @@ def test_pip_cell_build_dependency_graph_handles_multiline_code(): ], ) def test_pip_magic_split(code, split): + # Avoid direct protected access to the _split method. + class _PipMagicFriend(PipMagic): + @classmethod + def split(cls, code: str) -> list[str]: + return cls._split(code) + assert PipMagic._split(code) == split # pylint: disable=protected-access