diff --git a/src/databricks/labs/ucx/source_code/graph.py b/src/databricks/labs/ucx/source_code/graph.py index 960bc4cd11..1c61eb87dc 100644 --- a/src/databricks/labs/ucx/source_code/graph.py +++ b/src/databricks/labs/ucx/source_code/graph.py @@ -1,6 +1,7 @@ from __future__ import annotations import abc +import logging from dataclasses import dataclass from pathlib import Path from collections.abc import Callable @@ -13,10 +14,13 @@ ImportSource, NotebookRunCall, SysPathChange, + UnresolvedPath, ) from databricks.labs.ucx.source_code.linters.python_ast import Tree, NodeBase from databricks.labs.ucx.source_code.path_lookup import PathLookup +logger = logging.Logger(__file__) + class DependencyGraph: @@ -182,21 +186,42 @@ def build_graph_from_python_source(self, python_code: str) -> list[DependencyPro def _process_node(self, base_node: NodeBase): if isinstance(base_node, SysPathChange): - self._mutate_path_lookup(base_node) - if isinstance(base_node, NotebookRunCall): - strpath = base_node.get_notebook_path() - if strpath is None: - yield DependencyProblem('dependency-not-constant', "Can't check dependency not provided as a constant") - else: - yield from self.register_notebook(Path(strpath)) - if isinstance(base_node, ImportSource): - prefix = "" - if isinstance(base_node.node, ImportFrom) and base_node.node.level is not None: - prefix = "." * base_node.node.level - name = base_node.name or "" - yield from self.register_import(prefix + name) + yield from self._mutate_path_lookup(base_node) + elif isinstance(base_node, NotebookRunCall): + yield from self._register_notebook(base_node) + elif isinstance(base_node, ImportSource): + yield from self._register_import(base_node) + else: + logger.warning(f"Can't process {NodeBase.__name__} of type {type(base_node).__name__}") + + def _register_import(self, base_node: ImportSource): + prefix = "" + if isinstance(base_node.node, ImportFrom) and base_node.node.level is not None: + prefix = "." * base_node.node.level + name = base_node.name or "" + yield from self.register_import(prefix + name) + + def _register_notebook(self, base_node: NotebookRunCall): + paths = base_node.get_notebook_paths() + asserted = False + for path in paths: + if isinstance(path, str): + yield from self.register_notebook(Path(path)) + continue + if not asserted: + asserted = True + yield DependencyProblem( + 'dependency-cannot-compute', + f"Can't check dependency from {base_node.node.as_string()} because the expression cannot be computed", + ) def _mutate_path_lookup(self, change: SysPathChange): + if isinstance(change, UnresolvedPath): + yield DependencyProblem( + 'sys-path-cannot-compute', + f"Can't update sys.path from {change.node.as_string()} because the expression cannot be computed", + ) + return path = Path(change.path) if not path.is_absolute(): path = self._path_lookup.cwd / path diff --git a/src/databricks/labs/ucx/source_code/linters/imports.py b/src/databricks/labs/ucx/source_code/linters/imports.py index 1e62650dcc..9a7d17f772 100644 --- a/src/databricks/labs/ucx/source_code/linters/imports.py +++ b/src/databricks/labs/ucx/source_code/linters/imports.py @@ -9,10 +9,12 @@ Attribute, Call, Const, + InferenceError, Import, ImportFrom, Name, NodeNG, + Uninferable, ) from databricks.labs.ucx.source_code.base import Linter, Advice, Advisory @@ -83,12 +85,23 @@ class NotebookRunCall(NodeBase): def __init__(self, node: Call): super().__init__(node) - def get_notebook_path(self) -> str | None: - node = DbutilsLinter.get_dbutils_notebook_run_path_arg(cast(Call, self.node)) - inferred = next(node.infer(), None) - if isinstance(inferred, Const): - return inferred.value.strip().strip("'").strip('"') - return None + def get_notebook_paths(self) -> list[str | None]: + node = DbutilsLinter.get_dbutils_notebook_run_path_arg(self.node) + try: + return self._get_notebook_paths(node.infer()) + except InferenceError: + logger.debug(f"Can't infer value(s) of {node.as_string()}") + return [None] + + @classmethod + def _get_notebook_paths(cls, nodes: Iterable[NodeNG]) -> list[str | None]: + paths: list[str | None] = [] + for node in nodes: + if isinstance(node, Const): + paths.append(node.as_string().strip("'").strip('"')) + continue + paths.append(None) + return paths T = TypeVar("T", bound=Callable) @@ -104,19 +117,20 @@ def lint(self, code: str) -> Iterable[Advice]: @classmethod def _convert_dbutils_notebook_run_to_advice(cls, node: NodeNG) -> Advisory: assert isinstance(node, Call) - path = cls.get_dbutils_notebook_run_path_arg(node) - if isinstance(path, Const): + call = NotebookRunCall(cast(Call, node)) + paths = call.get_notebook_paths() + if None in paths: return Advisory( - 'dbutils-notebook-run-literal', - "Call to 'dbutils.notebook.run' will be migrated automatically", + 'dbutils-notebook-run-dynamic', + "Path for 'dbutils.notebook.run' is too complex and requires adjusting the notebook path(s)", node.lineno, node.col_offset, node.end_lineno or 0, node.end_col_offset or 0, ) return Advisory( - 'dbutils-notebook-run-dynamic', - "Path for 'dbutils.notebook.run' is not a constant and requires adjusting the notebook path", + 'dbutils-notebook-run-literal', + "Call to 'dbutils.notebook.run' will be migrated automatically", node.lineno, node.col_offset, node.end_lineno or 0, @@ -172,6 +186,11 @@ class RelativePath(SysPathChange): pass +class UnresolvedPath(SysPathChange): + # path added to sys.path that cannot be inferred + pass + + class SysPathChangesVisitor(TreeVisitor): def __init__(self): @@ -204,10 +223,27 @@ def visit_call(self, node: Call): return is_append = func.attrname == "append" changed = node.args[0] if is_append else node.args[1] - if isinstance(changed, Const): - self.sys_path_changes.append(AbsolutePath(node, changed.value, is_append)) - elif isinstance(changed, Call): - self._visit_relative_path(changed, is_append) + relative = False + if isinstance(changed, Call): + if not self._match_aliases(changed.func, ["os", "path", "abspath"]): + return + relative = True + changed = changed.args[0] + try: + all_inferred = changed.inferred() + for inferred in all_inferred: + self._visit_inferred(changed, inferred, relative, is_append) + except InferenceError: + self.sys_path_changes.append(UnresolvedPath(changed, changed.as_string(), is_append)) + + def _visit_inferred(self, changed: NodeNG, inferred: NodeNG, is_relative: bool, is_append: bool): + if inferred is Uninferable or not isinstance(inferred, Const): + self.sys_path_changes.append(UnresolvedPath(changed, changed.as_string(), is_append)) + return + if is_relative: + self.sys_path_changes.append(RelativePath(changed, inferred.value, is_append)) + else: + self.sys_path_changes.append(AbsolutePath(changed, inferred.value, is_append)) def _match_aliases(self, node: NodeNG, names: list[str]): if isinstance(node, Attribute): @@ -221,11 +257,3 @@ def _match_aliases(self, node: NodeNG, names: list[str]): alias = self._aliases.get(full_name, full_name) return node.name == alias return False - - def _visit_relative_path(self, node: Call, is_append: bool): - # check for 'os.path.abspath' - if not self._match_aliases(node.func, ["os", "path", "abspath"]): - return - changed = node.args[0] - if isinstance(changed, Const): - self.sys_path_changes.append(RelativePath(changed, changed.value, is_append)) diff --git a/src/databricks/labs/ucx/source_code/notebooks/loaders.py b/src/databricks/labs/ucx/source_code/notebooks/loaders.py index c26cfd9e39..5cfe6f2198 100644 --- a/src/databricks/labs/ucx/source_code/notebooks/loaders.py +++ b/src/databricks/labs/ucx/source_code/notebooks/loaders.py @@ -14,7 +14,7 @@ SourceContainer, ) from databricks.labs.ucx.source_code.notebooks.cells import CellLanguage -from databricks.labs.ucx.source_code.notebooks.sources import Notebook +from databricks.labs.ucx.source_code.notebooks.sources import Notebook, SUPPORTED_EXTENSION_LANGUAGES from databricks.labs.ucx.source_code.path_lookup import PathLookup logger = logging.getLogger(__name__) @@ -55,17 +55,20 @@ def load_dependency(self, path_lookup: PathLookup, dependency: Dependency) -> So except NotFound: logger.warning(f"Could not read notebook from workspace: {absolute_path}") return None - language = self._detect_language(content) + language = self.detect_language(absolute_path, content) if not language: logger.warning(f"Could not detect language for {absolute_path}") return None return Notebook.parse(absolute_path, content, language) @staticmethod - def _detect_language(content: str): - for language in CellLanguage: - if content.startswith(language.file_magic_header): - return language.language + def detect_language(path: Path, content: str): + language = SUPPORTED_EXTENSION_LANGUAGES.get(path.suffix, None) + if language: + return language + for cell_language in CellLanguage: + if content.startswith(cell_language.file_magic_header): + return cell_language.language return None @staticmethod diff --git a/tests/unit/source_code/linters/test_python_ast.py b/tests/unit/source_code/linters/test_python_ast.py new file mode 100644 index 0000000000..c143f2b328 --- /dev/null +++ b/tests/unit/source_code/linters/test_python_ast.py @@ -0,0 +1,123 @@ +import functools +import operator + +import pytest +from astroid import Attribute, Call, Const, Expr # type: ignore + +from databricks.labs.ucx.source_code.linters.imports import DbutilsLinter +from databricks.labs.ucx.source_code.linters.python_ast import Tree + + +def test_extract_call_by_name(): + tree = Tree.parse("o.m1().m2().m3()") + stmt = tree.first_statement() + assert isinstance(stmt, Expr) + assert isinstance(stmt.value, Call) + act = Tree.extract_call_by_name(stmt.value, "m2") + assert isinstance(act, Call) + assert isinstance(act.func, Attribute) + assert act.func.attrname == "m2" + + +def test_extract_call_by_name_none(): + tree = Tree.parse("o.m1().m2().m3()") + stmt = tree.first_statement() + assert isinstance(stmt, Expr) + assert isinstance(stmt.value, Call) + act = Tree.extract_call_by_name(stmt.value, "m5000") + assert act is None + + +@pytest.mark.parametrize( + "code, arg_index, arg_name, expected", + [ + ("o.m1()", 1, "second", None), + ("o.m1(3)", 1, "second", None), + ("o.m1(first=3)", 1, "second", None), + ("o.m1(4, 3)", None, None, None), + ("o.m1(4, 3)", None, "second", None), + ("o.m1(4, 3)", 1, "second", 3), + ("o.m1(4, 3)", 1, None, 3), + ("o.m1(first=4, second=3)", 1, "second", 3), + ("o.m1(second=3, first=4)", 1, "second", 3), + ("o.m1(second=3, first=4)", None, "second", 3), + ("o.m1(second=3)", 1, "second", 3), + ("o.m1(4, 3, 2)", 1, "second", 3), + ], +) +def test_linter_gets_arg(code, arg_index, arg_name, expected): + tree = Tree.parse(code) + stmt = tree.first_statement() + assert isinstance(stmt, Expr) + assert isinstance(stmt.value, Call) + act = Tree.get_arg(stmt.value, arg_index, arg_name) + if expected is None: + assert act is None + else: + assert isinstance(act, Const) + assert act.value == expected + + +@pytest.mark.parametrize( + "code, expected", + [ + ("o.m1()", 0), + ("o.m1(3)", 1), + ("o.m1(first=3)", 1), + ("o.m1(3, 3)", 2), + ("o.m1(first=3, second=3)", 2), + ("o.m1(3, second=3)", 2), + ("o.m1(3, *b, **c, second=3)", 4), + ], +) +def test_args_count(code, expected): + tree = Tree.parse(code) + stmt = tree.first_statement() + assert isinstance(stmt, Expr) + assert isinstance(stmt.value, Call) + act = Tree.args_count(stmt.value) + assert act == expected + + +def test_tree_walks_nodes_once(): + nodes = set() + count = 0 + tree = Tree.parse("o.m1().m2().m3()") + for node in tree.walk(): + nodes.add(node) + count += 1 + assert len(nodes) == count + + +@pytest.mark.parametrize( + "code, expected", + [ + ( + """ +name = "xyz" +dbutils.notebook.run(name) +""", + ["xyz"], + ), + ( + """ +name = "xyz" + "-" + "abc" +dbutils.notebook.run(name) +""", + ["xyz-abc"], + ), + ( + """ +names = ["abc", "xyz"] +for name in names: + dbutils.notebook.run(name) +""", + ["abc", "xyz"], + ), + ], +) +def test_infers_dbutils_notebook_run_dynamic_value(code, expected): + tree = Tree.parse(code) + calls = DbutilsLinter.list_dbutils_notebook_run_calls(tree) + actual = functools.reduce(operator.iconcat, list(call.get_notebook_paths() for call in calls), []) + assert expected == actual diff --git a/tests/unit/source_code/linters/test_python_imports.py b/tests/unit/source_code/linters/test_python_imports.py index aac3c693af..4c2d312603 100644 --- a/tests/unit/source_code/linters/test_python_imports.py +++ b/tests/unit/source_code/linters/test_python_imports.py @@ -1,8 +1,5 @@ from __future__ import annotations - -import pytest -from astroid import Attribute, Call, Const, Expr # type: ignore from databricks.labs.ucx.source_code.graph import DependencyProblem from databricks.labs.ucx.source_code.linters.imports import DbutilsLinter, ImportSource, SysPathChange @@ -137,101 +134,12 @@ def test_linter_returns_appended_relative_paths_with_os_path_abspath_alias(): assert "relative_path" in [p.path for p in appended] -def test_extract_call_by_name(): - tree = Tree.parse("o.m1().m2().m3()") - stmt = tree.first_statement() - assert isinstance(stmt, Expr) - assert isinstance(stmt.value, Call) - act = Tree.extract_call_by_name(stmt.value, "m2") - assert isinstance(act, Call) - assert isinstance(act.func, Attribute) - assert act.func.attrname == "m2" - - -def test_extract_call_by_name_none(): - tree = Tree.parse("o.m1().m2().m3()") - stmt = tree.first_statement() - assert isinstance(stmt, Expr) - assert isinstance(stmt.value, Call) - act = Tree.extract_call_by_name(stmt.value, "m5000") - assert act is None - - -@pytest.mark.parametrize( - "code, arg_index, arg_name, expected", - [ - ("o.m1()", 1, "second", None), - ("o.m1(3)", 1, "second", None), - ("o.m1(first=3)", 1, "second", None), - ("o.m1(4, 3)", None, None, None), - ("o.m1(4, 3)", None, "second", None), - ("o.m1(4, 3)", 1, "second", 3), - ("o.m1(4, 3)", 1, None, 3), - ("o.m1(first=4, second=3)", 1, "second", 3), - ("o.m1(second=3, first=4)", 1, "second", 3), - ("o.m1(second=3, first=4)", None, "second", 3), - ("o.m1(second=3)", 1, "second", 3), - ("o.m1(4, 3, 2)", 1, "second", 3), - ], -) -def test_linter_gets_arg(code, arg_index, arg_name, expected): - tree = Tree.parse(code) - stmt = tree.first_statement() - assert isinstance(stmt, Expr) - assert isinstance(stmt.value, Call) - act = Tree.get_arg(stmt.value, arg_index, arg_name) - if expected is None: - assert act is None - else: - assert isinstance(act, Const) - assert act.value == expected - - -@pytest.mark.parametrize( - "code, expected", - [ - ("o.m1()", 0), - ("o.m1(3)", 1), - ("o.m1(first=3)", 1), - ("o.m1(3, 3)", 2), - ("o.m1(first=3, second=3)", 2), - ("o.m1(3, second=3)", 2), - ("o.m1(3, *b, **c, second=3)", 4), - ], -) -def test_args_count(code, expected): - tree = Tree.parse(code) - stmt = tree.first_statement() - assert isinstance(stmt, Expr) - assert isinstance(stmt.value, Call) - act = Tree.args_count(stmt.value) - assert act == expected - - -@pytest.mark.parametrize( - "code, expected", - [ - ( - """ -name = "xyz" -dbutils.notebook.run(name) -""", - "xyz", - ) - ], -) -def test_infers_string_variable_value(code, expected): +def test_linter_returns_inferred_paths(): + code = """ +import sys +path = "absolute_path_1" +sys.path.append(path) +""" tree = Tree.parse(code) - calls = DbutilsLinter.list_dbutils_notebook_run_calls(tree) - actual = list(call.get_notebook_path() for call in calls) - assert [expected] == actual - - -def test_tree_walker_walks_nodes_once(): - nodes = set() - count = 0 - tree = Tree.parse("o.m1().m2().m3()") - for node in tree.walk(): - nodes.add(node) - count += 1 - assert len(nodes) == count + appended = SysPathChange.extract_from_tree(tree) + assert ["absolute_path_1"] == [p.path for p in appended] diff --git a/tests/unit/source_code/notebooks/test_loader.py b/tests/unit/source_code/notebooks/test_loader.py new file mode 100644 index 0000000000..c59371973f --- /dev/null +++ b/tests/unit/source_code/notebooks/test_loader.py @@ -0,0 +1,12 @@ +from pathlib import Path + +from databricks.labs.ucx.source_code.notebooks.loaders import NotebookLoader +from databricks.sdk.service.workspace import Language + + +def test_detects_language(): + assert NotebookLoader.detect_language(Path("hi.py"), "stuff") == Language.PYTHON + assert NotebookLoader.detect_language(Path("hi.sql"), "stuff") == Language.SQL + assert NotebookLoader.detect_language(Path("hi"), "# Databricks notebook source") == Language.PYTHON + assert NotebookLoader.detect_language(Path("hi"), "-- Databricks notebook source") == Language.SQL + assert not NotebookLoader.detect_language(Path("hi"), "stuff") diff --git a/tests/unit/source_code/samples/run_notebooks.py b/tests/unit/source_code/samples/run_notebooks.py index 3bd59fde10..46aae04ee7 100644 --- a/tests/unit/source_code/samples/run_notebooks.py +++ b/tests/unit/source_code/samples/run_notebooks.py @@ -1,10 +1,10 @@ +# Databricks notebook source import datetime # Updated List of Notebooks notebooks_list = [ - '/Production/data_solutions/accounts/companyA_10011111/de/companyA_de_report', - '/Production/data_solutions/accounts/companyB_10022222/de/companyB_de_report', - '/Production/data_solutions/accounts/companyC_10033333/de/companyC_de_report', - '/Production/data_solutions/accounts/companyD_10044444/de/companyD_de_report', + './leaf1.py', + './leaf2.py', + './leaf3.py', ] # Execution: for notebook in notebooks_list: diff --git a/tests/unit/source_code/samples/run_notebooks_with_fstring.py b/tests/unit/source_code/samples/run_notebooks_with_fstring.py new file mode 100644 index 0000000000..299228b116 --- /dev/null +++ b/tests/unit/source_code/samples/run_notebooks_with_fstring.py @@ -0,0 +1,19 @@ +# Databricks notebook source +import datetime +# Updated List of Notebooks +path = './leaf2.py' +notebooks_list = [ + './leaf1.py', + f"{path}", + './leaf3.py', + ] +# Execution: +for notebook in notebooks_list: + try: + start_time = datetime.datetime.now() + print("Running the report of " + str(notebook).split('/')[len(str(notebook).split('/'))-1]) + status = dbutils.notebook.run(notebook,100000) + end_time = datetime.datetime.now() + print("Finished, time taken: " + str(start_time-end_time)) + except: + print("The notebook {0} failed to run".format(notebook)) diff --git a/tests/unit/source_code/samples/sys-path-with-fstring.py b/tests/unit/source_code/samples/sys-path-with-fstring.py new file mode 100644 index 0000000000..9be41b98cb --- /dev/null +++ b/tests/unit/source_code/samples/sys-path-with-fstring.py @@ -0,0 +1,3 @@ +import sys +name = "whatever" +sys.path.append(f"{name}") diff --git a/tests/unit/source_code/test_dependencies.py b/tests/unit/source_code/test_dependencies.py index 1e5352de0b..93ffc444f7 100644 --- a/tests/unit/source_code/test_dependencies.py +++ b/tests/unit/source_code/test_dependencies.py @@ -1,10 +1,12 @@ from pathlib import Path +from unittest.mock import create_autospec from databricks.labs.ucx.source_code.graph import ( SourceContainer, DependencyResolver, DependencyProblem, Dependency, + BaseImportResolver, ) from databricks.labs.ucx.source_code.linters.files import FileLoader, ImportFileResolver from databricks.labs.ucx.source_code.notebooks.loaders import ( @@ -20,54 +22,46 @@ from tests.unit.conftest import MockPathLookup +def dependency_resolver(notebook_resolver, path_lookup): + library_resolver = PythonLibraryResolver(Whitelist()) + import_resolver = ImportFileResolver(FileLoader(), Whitelist()) + return DependencyResolver(library_resolver, notebook_resolver, import_resolver, path_lookup) + + def test_dependency_resolver_repr(mock_notebook_resolver, mock_path_lookup): """for improving test coverage""" - pip_resolver = PythonLibraryResolver(Whitelist()) - dependency_resolver = DependencyResolver(pip_resolver, mock_notebook_resolver, [], mock_path_lookup) - assert len(repr(dependency_resolver)) > 0 + resolver = dependency_resolver(mock_notebook_resolver, mock_path_lookup) + assert len(repr(resolver)) > 0 def test_dependency_resolver_visits_workspace_notebook_dependencies(mock_path_lookup): - notebook_loader = NotebookLoader() - notebook_resolver = NotebookResolver(notebook_loader) - pip_resolver = PythonLibraryResolver(Whitelist()) - dependency_resolver = DependencyResolver(pip_resolver, notebook_resolver, [], mock_path_lookup) - maybe = dependency_resolver.build_notebook_dependency_graph(Path("root3.run.py")) + notebook_resolver = NotebookResolver(NotebookLoader()) + resolver = dependency_resolver(notebook_resolver, mock_path_lookup) + maybe = resolver.build_notebook_dependency_graph(Path("root3.run.py")) assert not maybe.failed assert maybe.graph.all_relative_names() == {"root3.run.py", "root1.run.py", "leaf1.py", "leaf2.py"} def test_dependency_resolver_visits_local_notebook_dependencies(mock_path_lookup): - notebook_loader = NotebookLoader() - notebook_resolver = NotebookResolver(notebook_loader) - pip_resolver = PythonLibraryResolver(Whitelist()) - dependency_resolver = DependencyResolver(pip_resolver, notebook_resolver, [], mock_path_lookup) - maybe = dependency_resolver.build_notebook_dependency_graph(Path("root4.py")) + notebook_resolver = NotebookResolver(NotebookLoader()) + resolver = dependency_resolver(notebook_resolver, mock_path_lookup) + maybe = resolver.build_notebook_dependency_graph(Path("root4.py")) assert not maybe.failed assert maybe.graph.all_relative_names() == {"root4.py", "leaf3.py"} def test_dependency_resolver_visits_workspace_file_dependencies(mock_path_lookup): - whitelist = Whitelist() - file_loader = FileLoader() - notebook_loader = NotebookLoader() - notebook_resolver = NotebookResolver(notebook_loader) - import_resolver = ImportFileResolver(file_loader, whitelist) - pip_resolver = PythonLibraryResolver(whitelist) - dependency_resolver = DependencyResolver(pip_resolver, notebook_resolver, import_resolver, mock_path_lookup) - maybe = dependency_resolver.build_local_file_dependency_graph(Path('./root8.py')) + notebook_resolver = NotebookResolver(NotebookLoader()) + resolver = dependency_resolver(notebook_resolver, mock_path_lookup) + maybe = resolver.build_local_file_dependency_graph(Path('./root8.py')) assert not maybe.failed assert maybe.graph.all_relative_names() == {'leaf1.py', 'leaf2.py', 'root8.py'} def test_dependency_resolver_raises_problem_with_unfound_workspace_notebook_dependency(mock_path_lookup): - file_loader = FileLoader() - notebook_loader = NotebookLoader() - notebook_resolver = NotebookResolver(notebook_loader) - import_resolver = ImportFileResolver(file_loader, Whitelist()) - pip_resolver = PythonLibraryResolver(Whitelist()) - dependency_resolver = DependencyResolver(pip_resolver, notebook_resolver, import_resolver, mock_path_lookup) - maybe = dependency_resolver.build_notebook_dependency_graph(Path("root1-no-leaf.run.py")) + notebook_resolver = NotebookResolver(NotebookLoader()) + resolver = dependency_resolver(notebook_resolver, mock_path_lookup) + maybe = resolver.build_notebook_dependency_graph(Path("root1-no-leaf.run.py")) assert list(maybe.problems) == [ DependencyProblem( 'notebook-not-found', @@ -82,11 +76,9 @@ def test_dependency_resolver_raises_problem_with_unfound_workspace_notebook_depe def test_dependency_resolver_raises_problem_with_unfound_local_notebook_dependency(mock_path_lookup): - notebook_loader = NotebookLoader() - notebook_resolver = NotebookResolver(notebook_loader) - pip_resolver = PythonLibraryResolver(Whitelist()) - dependency_resolver = DependencyResolver(pip_resolver, notebook_resolver, [], mock_path_lookup) - maybe = dependency_resolver.build_notebook_dependency_graph(Path("root4-no-leaf.py")) + notebook_resolver = NotebookResolver(NotebookLoader()) + resolver = dependency_resolver(notebook_resolver, mock_path_lookup) + maybe = resolver.build_notebook_dependency_graph(Path("root4-no-leaf.py")) assert list(maybe.problems) == [ DependencyProblem( 'notebook-not-found', 'Notebook not found: __NO_LEAF__', Path('root4-no-leaf.py'), 1, 0, 1, 37 @@ -95,57 +87,43 @@ def test_dependency_resolver_raises_problem_with_unfound_local_notebook_dependen def test_dependency_resolver_raises_problem_with_invalid_run_cell(mock_path_lookup): - notebook_loader = NotebookLoader() - notebook_resolver = NotebookResolver(notebook_loader) - pip_resolver = PythonLibraryResolver(Whitelist()) - dependency_resolver = DependencyResolver(pip_resolver, notebook_resolver, [], mock_path_lookup) - maybe = dependency_resolver.build_notebook_dependency_graph(Path('leaf6.py')) + notebook_resolver = NotebookResolver(NotebookLoader()) + resolver = dependency_resolver(notebook_resolver, mock_path_lookup) + maybe = resolver.build_notebook_dependency_graph(Path('leaf6.py')) assert list(maybe.problems) == [ DependencyProblem('invalid-run-cell', 'Missing notebook path in %run command', Path('leaf6.py'), 5, 0, 5, 4) ] def test_dependency_resolver_visits_recursive_file_dependencies(mock_path_lookup): - notebook_loader = NotebookLoader() - notebook_resolver = NotebookResolver(notebook_loader) - import_resolver = ImportFileResolver(FileLoader(), Whitelist()) - dependency_resolver = DependencyResolver([], notebook_resolver, import_resolver, mock_path_lookup) - maybe = dependency_resolver.build_local_file_dependency_graph(Path("root6.py")) + notebook_resolver = NotebookResolver(NotebookLoader()) + resolver = dependency_resolver(notebook_resolver, mock_path_lookup) + maybe = resolver.build_local_file_dependency_graph(Path("root6.py")) assert not maybe.failed assert maybe.graph.all_relative_names() == {"root6.py", "root5.py", "leaf4.py"} def test_dependency_resolver_raises_problem_with_unresolved_import(mock_path_lookup): - notebook_loader = NotebookLoader() - notebook_resolver = NotebookResolver(notebook_loader) - whitelist = Whitelist() - import_resolver = ImportFileResolver(FileLoader(), whitelist) - pip_resolver = PythonLibraryResolver(whitelist) - dependency_resolver = DependencyResolver(pip_resolver, notebook_resolver, import_resolver, mock_path_lookup) - maybe = dependency_resolver.build_local_file_dependency_graph(Path('root7.py')) + notebook_resolver = NotebookResolver(NotebookLoader()) + resolver = dependency_resolver(notebook_resolver, mock_path_lookup) + maybe = resolver.build_local_file_dependency_graph(Path('root7.py')) assert list(maybe.problems) == [ DependencyProblem('import-not-found', 'Could not locate import: some_library', Path("root7.py"), 1, 0, 1, 19) ] def test_dependency_resolver_visits_file_dependencies(mock_path_lookup): - notebook_loader = NotebookLoader() - notebook_resolver = NotebookResolver(notebook_loader) - whitelist = Whitelist() - import_resolver = ImportFileResolver(FileLoader(), whitelist) - pip_resolver = PythonLibraryResolver(whitelist) - dependency_resolver = DependencyResolver(pip_resolver, notebook_resolver, import_resolver, mock_path_lookup) - maybe = dependency_resolver.build_local_file_dependency_graph(Path("root5.py")) + notebook_resolver = NotebookResolver(NotebookLoader()) + resolver = dependency_resolver(notebook_resolver, mock_path_lookup) + maybe = resolver.build_local_file_dependency_graph(Path("root5.py")) assert not maybe.failed assert maybe.graph.all_relative_names() == {"root5.py", "leaf4.py"} def test_dependency_resolver_skips_builtin_dependencies(mock_path_lookup): - notebook_loader = NotebookLoader() - notebook_resolver = NotebookResolver(notebook_loader) - import_resolver = ImportFileResolver(FileLoader(), Whitelist()) - dependency_resolver = DependencyResolver([], notebook_resolver, import_resolver, mock_path_lookup) - maybe = dependency_resolver.build_local_file_dependency_graph(Path("python_builtins.py")) + notebook_resolver = NotebookResolver(NotebookLoader()) + resolver = dependency_resolver(notebook_resolver, mock_path_lookup) + maybe = resolver.build_local_file_dependency_graph(Path("python_builtins.py")) assert not maybe.failed graph = maybe.graph maybe = graph.locate_dependency(Path("os")) @@ -155,27 +133,24 @@ def test_dependency_resolver_skips_builtin_dependencies(mock_path_lookup): def test_dependency_resolver_ignores_known_dependencies(mock_path_lookup): - notebook_loader = NotebookLoader() - notebook_resolver = NotebookResolver(notebook_loader) - import_resolver = ImportFileResolver(FileLoader(), Whitelist()) - pip_resolver = PythonLibraryResolver(Whitelist()) - dependency_resolver = DependencyResolver(pip_resolver, notebook_resolver, import_resolver, mock_path_lookup) - maybe = dependency_resolver.build_local_file_dependency_graph(Path("python_builtins.py")) + notebook_resolver = NotebookResolver(NotebookLoader()) + resolver = dependency_resolver(notebook_resolver, mock_path_lookup) + maybe = resolver.build_local_file_dependency_graph(Path("python_builtins.py")) assert maybe.graph graph = maybe.graph maybe_graph = graph.locate_dependency(Path("databricks")) assert not maybe_graph.graph -def test_dependency_resolver_terminates_at_known_libraries(empty_index, mock_notebook_resolver): +def test_dependency_resolver_terminates_at_known_libraries(empty_index, mock_notebook_resolver, mock_path_lookup): lookup = MockPathLookup() site_packages_path = locate_site_packages() lookup.append_path(site_packages_path) file_loader = FileLoader() import_resolver = ImportFileResolver(file_loader, Whitelist()) - pip_resolver = PythonLibraryResolver(Whitelist()) - dependency_resolver = DependencyResolver(pip_resolver, mock_notebook_resolver, import_resolver, lookup) - maybe = dependency_resolver.build_local_file_dependency_graph(Path("import-site-package.py")) + library_resolver = PythonLibraryResolver(Whitelist()) + resolver = DependencyResolver(library_resolver, mock_notebook_resolver, import_resolver, lookup) + maybe = resolver.build_local_file_dependency_graph(Path("import-site-package.py")) assert not maybe.failed graph = maybe.graph maybe = graph.locate_dependency(Path(site_packages_path, "certifi", "core.py")) @@ -184,21 +159,18 @@ def test_dependency_resolver_terminates_at_known_libraries(empty_index, mock_not def test_dependency_resolver_raises_problem_with_unfound_root_file(mock_path_lookup, mock_notebook_resolver): - import_resolver = ImportFileResolver(FileLoader(), Whitelist()) - pip_resolver = PythonLibraryResolver(Whitelist()) - dependency_resolver = DependencyResolver(pip_resolver, mock_notebook_resolver, import_resolver, mock_path_lookup) - maybe = dependency_resolver.build_local_file_dependency_graph(Path("non-existing.py")) + notebook_resolver = NotebookResolver(NotebookLoader()) + resolver = dependency_resolver(notebook_resolver, mock_path_lookup) + maybe = resolver.build_local_file_dependency_graph(Path("non-existing.py")) assert list(maybe.problems) == [ DependencyProblem('file-not-found', 'File not found: non-existing.py', Path("non-existing.py")) ] def test_dependency_resolver_raises_problem_with_unfound_root_notebook(mock_path_lookup): - notebook_loader = NotebookLoader() - notebook_resolver = NotebookResolver(notebook_loader) - pip_resolver = PythonLibraryResolver(Whitelist()) - dependency_resolver = DependencyResolver(pip_resolver, notebook_resolver, [], mock_path_lookup) - maybe = dependency_resolver.build_notebook_dependency_graph(Path("unknown_notebook")) + notebook_resolver = NotebookResolver(NotebookLoader()) + resolver = dependency_resolver(notebook_resolver, mock_path_lookup) + maybe = resolver.build_notebook_dependency_graph(Path("unknown_notebook")) assert list(maybe.problems) == [ DependencyProblem('notebook-not-found', 'Notebook not found: unknown_notebook', Path("unknown_notebook")) ] @@ -214,8 +186,8 @@ def load_dependency(self, path_lookup: PathLookup, dependency: Dependency) -> So whitelist = Whitelist() import_resolver = ImportFileResolver(file_loader, whitelist) pip_resolver = PythonLibraryResolver(whitelist) - dependency_resolver = DependencyResolver(pip_resolver, mock_notebook_resolver, import_resolver, mock_path_lookup) - maybe = dependency_resolver.build_local_file_dependency_graph(Path("import-sub-site-package.py")) + resolver = DependencyResolver(pip_resolver, mock_notebook_resolver, import_resolver, mock_path_lookup) + maybe = resolver.build_local_file_dependency_graph(Path("import-sub-site-package.py")) assert list(maybe.problems) == [ DependencyProblem( 'cannot-load-file', 'Could not load file import-sub-site-package.py', Path('') @@ -232,17 +204,35 @@ def load_dependency(self, path_lookup: PathLookup, dependency: Dependency) -> So notebook_loader = FailingNotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) pip_resolver = PythonLibraryResolver(Whitelist()) - dependency_resolver = DependencyResolver(pip_resolver, notebook_resolver, [], mock_path_lookup) - maybe = dependency_resolver.build_notebook_dependency_graph(Path("root5.py")) + resolver = DependencyResolver(pip_resolver, notebook_resolver, [], mock_path_lookup) + maybe = resolver.build_notebook_dependency_graph(Path("root5.py")) assert list(maybe.problems) == [ DependencyProblem('cannot-load-notebook', 'Could not load notebook root5.py', Path('')) ] def test_dependency_resolver_raises_problem_with_missing_file_loader(mock_notebook_resolver, mock_path_lookup): - pip_resolver = PythonLibraryResolver(Whitelist()) - dependency_resolver = DependencyResolver(pip_resolver, mock_notebook_resolver, [], mock_path_lookup) - maybe = dependency_resolver.build_local_file_dependency_graph(Path("import-sub-site-package.py")) + library_resolver = PythonLibraryResolver(Whitelist()) + import_resolver = create_autospec(BaseImportResolver) + import_resolver.resolve_import.return_value = None + resolver = DependencyResolver(library_resolver, mock_notebook_resolver, import_resolver, mock_path_lookup) + maybe = resolver.build_local_file_dependency_graph(Path("import-sub-site-package.py")) assert list(maybe.problems) == [ DependencyProblem('missing-file-resolver', 'Missing resolver for local files', Path('')) ] + + +def test_dependency_resolver_raises_problem_for_non_inferable_sys_path(mock_notebook_resolver, mock_path_lookup): + resolver = dependency_resolver(mock_notebook_resolver, mock_path_lookup) + maybe = resolver.build_local_file_dependency_graph(Path("sys-path-with-fstring.py")) + assert list(maybe.problems) == [ + DependencyProblem( + 'sys-path-cannot-compute', + "Can't update sys.path from f'{name}' because the expression cannot be computed", + Path('sys-path-with-fstring.py'), + 3, + 16, + 3, + 25, + ) + ] diff --git a/tests/unit/source_code/test_notebook.py b/tests/unit/source_code/test_notebook.py index de3b00c309..9c198e609c 100644 --- a/tests/unit/source_code/test_notebook.py +++ b/tests/unit/source_code/test_notebook.py @@ -7,6 +7,7 @@ from databricks.labs.ucx.source_code.base import Advisory from databricks.labs.ucx.source_code.graph import DependencyGraph, SourceContainer, DependencyResolver from databricks.labs.ucx.source_code.known import Whitelist +from databricks.labs.ucx.source_code.linters.files import ImportFileResolver, FileLoader from databricks.labs.ucx.source_code.linters.imports import DbutilsLinter from databricks.labs.ucx.source_code.notebooks.sources import Notebook from databricks.labs.ucx.source_code.notebooks.loaders import ( @@ -127,13 +128,18 @@ def test_notebook_generates_runnable_cells(source: tuple[str, Language, list[str assert cell.is_runnable() -def test_notebook_builds_leaf_dependency_graph(mock_path_lookup): +def dependency_resolver(mock_path_lookup): notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - pip_resolver = PythonLibraryResolver(Whitelist()) - dependency_resolver = DependencyResolver(pip_resolver, notebook_resolver, [], mock_path_lookup) - maybe = dependency_resolver.resolve_notebook(mock_path_lookup, Path("leaf1.py")) - graph = DependencyGraph(maybe.dependency, None, dependency_resolver, mock_path_lookup) + library_resolver = PythonLibraryResolver(Whitelist()) + import_resolver = ImportFileResolver(FileLoader(), Whitelist()) + return DependencyResolver(library_resolver, notebook_resolver, import_resolver, mock_path_lookup) + + +def test_notebook_builds_leaf_dependency_graph(mock_path_lookup): + resolver = dependency_resolver(mock_path_lookup) + maybe = resolver.resolve_notebook(mock_path_lookup, Path("leaf1.py")) + graph = DependencyGraph(maybe.dependency, None, resolver, mock_path_lookup) container = maybe.dependency.load(mock_path_lookup) problems = container.build_dependency_graph(graph) assert not problems @@ -147,12 +153,9 @@ def get_status_side_effect(*args): def test_notebook_builds_depth1_dependency_graph(mock_path_lookup): paths = ["root1.run.py", "leaf1.py", "leaf2.py"] - notebook_loader = NotebookLoader() - notebook_resolver = NotebookResolver(notebook_loader) - pip_resolver = PythonLibraryResolver(Whitelist()) - dependency_resolver = DependencyResolver(pip_resolver, notebook_resolver, [], mock_path_lookup) - maybe = dependency_resolver.resolve_notebook(mock_path_lookup, Path(paths[0])) - graph = DependencyGraph(maybe.dependency, None, dependency_resolver, mock_path_lookup) + resolver = dependency_resolver(mock_path_lookup) + maybe = resolver.resolve_notebook(mock_path_lookup, Path(paths[0])) + graph = DependencyGraph(maybe.dependency, None, resolver, mock_path_lookup) container = maybe.dependency.load(mock_path_lookup) problems = container.build_dependency_graph(graph) assert not problems @@ -161,12 +164,9 @@ def test_notebook_builds_depth1_dependency_graph(mock_path_lookup): def test_notebook_builds_depth2_dependency_graph(mock_path_lookup): paths = ["root2.run.py", "root1.run.py", "leaf1.py", "leaf2.py"] - notebook_loader = NotebookLoader() - notebook_resolver = NotebookResolver(notebook_loader) - pip_resolver = PythonLibraryResolver(Whitelist()) - dependency_resolver = DependencyResolver(pip_resolver, notebook_resolver, [], mock_path_lookup) - maybe = dependency_resolver.resolve_notebook(mock_path_lookup, Path(paths[0])) - graph = DependencyGraph(maybe.dependency, None, dependency_resolver, mock_path_lookup) + resolver = dependency_resolver(mock_path_lookup) + maybe = resolver.resolve_notebook(mock_path_lookup, Path(paths[0])) + graph = DependencyGraph(maybe.dependency, None, resolver, mock_path_lookup) container = maybe.dependency.load(mock_path_lookup) problems = container.build_dependency_graph(graph) assert not problems @@ -175,12 +175,9 @@ def test_notebook_builds_depth2_dependency_graph(mock_path_lookup): def test_notebook_builds_dependency_graph_avoiding_duplicates(mock_path_lookup): paths = ["root3.run.py", "root1.run.py", "leaf1.py", "leaf2.py"] - notebook_loader = NotebookLoader() - notebook_resolver = NotebookResolver(notebook_loader) - pip_resolver = PythonLibraryResolver(Whitelist()) - dependency_resolver = DependencyResolver(pip_resolver, notebook_resolver, [], mock_path_lookup) - maybe = dependency_resolver.resolve_notebook(mock_path_lookup, Path(paths[0])) - graph = DependencyGraph(maybe.dependency, None, dependency_resolver, mock_path_lookup) + resolver = dependency_resolver(mock_path_lookup) + maybe = resolver.resolve_notebook(mock_path_lookup, Path(paths[0])) + graph = DependencyGraph(maybe.dependency, None, resolver, mock_path_lookup) container = maybe.dependency.load(mock_path_lookup) problems = container.build_dependency_graph(graph) assert not problems @@ -190,12 +187,9 @@ def test_notebook_builds_dependency_graph_avoiding_duplicates(mock_path_lookup): def test_notebook_builds_cyclical_dependency_graph(mock_path_lookup): paths = ["cyclical1.run.py", "cyclical2.run.py"] - notebook_loader = NotebookLoader() - notebook_resolver = NotebookResolver(notebook_loader) - pip_resolver = PythonLibraryResolver(Whitelist()) - dependency_resolver = DependencyResolver(pip_resolver, notebook_resolver, [], mock_path_lookup) - maybe = dependency_resolver.resolve_notebook(mock_path_lookup, Path(paths[0])) - graph = DependencyGraph(maybe.dependency, None, dependency_resolver, mock_path_lookup) + resolver = dependency_resolver(mock_path_lookup) + maybe = resolver.resolve_notebook(mock_path_lookup, Path(paths[0])) + graph = DependencyGraph(maybe.dependency, None, resolver, mock_path_lookup) container = maybe.dependency.load(mock_path_lookup) problems = container.build_dependency_graph(graph) assert not problems @@ -204,34 +198,38 @@ def test_notebook_builds_cyclical_dependency_graph(mock_path_lookup): def test_notebook_builds_python_dependency_graph(mock_path_lookup): paths = ["root4.py", "leaf3.py"] - notebook_loader = NotebookLoader() - notebook_resolver = NotebookResolver(notebook_loader) - dependency_resolver = DependencyResolver([], notebook_resolver, [], mock_path_lookup) - maybe = dependency_resolver.resolve_notebook(mock_path_lookup, Path(paths[0])) - graph = DependencyGraph(maybe.dependency, None, dependency_resolver, mock_path_lookup) + resolver = dependency_resolver(mock_path_lookup) + maybe = resolver.resolve_notebook(mock_path_lookup, Path(paths[0])) + graph = DependencyGraph(maybe.dependency, None, resolver, mock_path_lookup) container = maybe.dependency.load(mock_path_lookup) problems = container.build_dependency_graph(graph) assert not problems assert graph.all_paths == {mock_path_lookup.cwd / path for path in paths} -def test_detects_manual_migration_in_dbutils_notebook_run_in_python_code_(): - sources: list[str] = _load_sources(SourceContainer, "run_notebooks.py") - linter = DbutilsLinter() - advices = list(linter.lint(sources[0])) - assert [ - Advisory( - code='dbutils-notebook-run-dynamic', - message="Path for 'dbutils.notebook.run' is not a constant and requires adjusting the notebook path", - start_line=14, - start_col=13, - end_line=14, - end_col=50, - ) - ] == advices +def test_notebook_builds_python_dependency_graph_with_loop(mock_path_lookup): + path = "run_notebooks.py" + resolver = dependency_resolver(mock_path_lookup) + maybe = resolver.resolve_notebook(mock_path_lookup, Path(path)) + graph = DependencyGraph(maybe.dependency, None, resolver, mock_path_lookup) + container = maybe.dependency.load(mock_path_lookup) + container.build_dependency_graph(graph) + expected_paths = [path, "leaf1.py", "leaf2.py", "leaf3.py"] + assert graph.all_paths == {mock_path_lookup.cwd / path for path in expected_paths} + + +def test_notebook_builds_python_dependency_graph_with_fstring_loop(mock_path_lookup): + path = "run_notebooks_with_fstring.py" + resolver = dependency_resolver(mock_path_lookup) + maybe = resolver.resolve_notebook(mock_path_lookup, Path(path)) + graph = DependencyGraph(maybe.dependency, None, resolver, mock_path_lookup) + container = maybe.dependency.load(mock_path_lookup) + container.build_dependency_graph(graph) + expected_paths = [path, "leaf1.py", "leaf3.py"] + assert graph.all_paths == {mock_path_lookup.cwd / path for path in expected_paths} -def test_detects_automatic_migration_in_dbutils_notebook_run_in_python_code_(): +def test_detects_automatic_migration_in_dbutils_notebook_run_in_python_code(): sources: list[str] = _load_sources(SourceContainer, "root4.py") linter = DbutilsLinter() advices = list(linter.lint(sources[0])) @@ -247,7 +245,7 @@ def test_detects_automatic_migration_in_dbutils_notebook_run_in_python_code_(): ] == advices -def test_detects_multiple_calls_to_dbutils_notebook_run_in_python_code_(): +def test_detects_multiple_calls_to_dbutils_notebook_run_in_python_code(): source = """ import stuff do_something_with_stuff(stuff) @@ -268,3 +266,14 @@ def test_does_not_detect_partial_call_to_dbutils_notebook_run_in_python_code_(): linter = DbutilsLinter() advices = list(linter.lint(source)) assert len(advices) == 0 + + +def test_raises_advice_when_dbutils_notebook_run_is_too_complex(): + source = """ +name = "xyz" +dbutils.notebook.run(f"Hey {name}") + """ + linter = DbutilsLinter() + advices = list(linter.lint(source)) + assert len(advices) == 1 + assert advices[0].code == "dbutils-notebook-run-dynamic"