From 360ca86137221f0cad068f876790b0cd9e41a270 Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Thu, 2 May 2024 23:50:26 +0200 Subject: [PATCH] ... --- src/databricks/labs/ucx/source_code/files.py | 23 +- src/databricks/labs/ucx/source_code/graph.py | 202 ++++++------------ .../labs/ucx/source_code/notebooks/cells.py | 67 +++--- .../labs/ucx/source_code/notebooks/loaders.py | 10 +- .../labs/ucx/source_code/notebooks/sources.py | 4 +- .../labs/ucx/source_code/site_packages.py | 13 +- .../labs/ucx/source_code/whitelist.py | 30 +-- tests/unit/source_code/test_dependencies.py | 47 ++-- tests/unit/source_code/test_notebook.py | 36 ++-- tests/unit/source_code/test_s3fs.py | 61 +++--- 10 files changed, 219 insertions(+), 274 deletions(-) diff --git a/src/databricks/labs/ucx/source_code/files.py b/src/databricks/labs/ucx/source_code/files.py index 6c6c04a22f..6a31d2109b 100644 --- a/src/databricks/labs/ucx/source_code/files.py +++ b/src/databricks/labs/ucx/source_code/files.py @@ -4,7 +4,7 @@ import logging import sys from pathlib import Path -from collections.abc import Callable, Iterable +from collections.abc import Iterable from databricks.sdk.service.workspace import Language @@ -42,11 +42,10 @@ def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProb linter = ASTLinter.parse(self._original_code) run_notebook_calls = PythonLinter.list_dbutils_notebook_run_calls(linter) for call in run_notebook_calls: - call_problems: list[DependencyProblem] = [] notebook_path_arg = PythonLinter.get_dbutils_notebook_run_path_arg(call) if isinstance(notebook_path_arg, ast.Constant): notebook_path = notebook_path_arg.value - parent.register_notebook(Path(notebook_path), call_problems.append) + maybe = parent.register_notebook(Path(notebook_path)) problems += [ problem.replace( source_path=self._path, @@ -55,7 +54,7 @@ def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProb end_line=call.end_lineno or 0, end_col=call.end_col_offset or 0, ) - for problem in call_problems + for problem in maybe.problems ] continue problem = DependencyProblem( @@ -69,11 +68,8 @@ def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProb ) problems.append(problem) # TODO https://github.com/databrickslabs/ucx/issues/1287 - for pair in PythonLinter.list_import_sources(linter): - import_name = pair[0] - import_problems: list[DependencyProblem] = [] - parent.register_import(import_name, import_problems.append) - node = pair[1] + for import_name, node in PythonLinter.list_import_sources(linter): + maybe = parent.register_import(import_name) problems += [ problem.replace( source_path=self._path, @@ -82,7 +78,7 @@ def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProb end_line=node.end_lineno or 0, end_col=node.end_col_offset or 0, ) - for problem in import_problems + for problem in maybe.problems ] return problems @@ -182,11 +178,12 @@ def resolve_local_file(self, path: Path) -> MaybeDependency: return MaybeDependency(Dependency(self._file_loader, path), []) return super().resolve_local_file(path) - def resolve_import(self, name: str, problem_collector: Callable[[DependencyProblem], None]) -> Dependency | None: + def resolve_import(self, name: str) -> MaybeDependency: fullpath = self._file_loader.full_path(Path(f"{name}.py")) if fullpath is not None: - return Dependency(self._file_loader, fullpath) - return super().resolve_import(name, problem_collector) + dependency = Dependency(self._file_loader, fullpath) + return MaybeDependency(dependency, []) + return super().resolve_import(name) class SysPathProvider: diff --git a/src/databricks/labs/ucx/source_code/graph.py b/src/databricks/labs/ucx/source_code/graph.py index 8382205bf0..19d582c1a9 100644 --- a/src/databricks/labs/ucx/source_code/graph.py +++ b/src/databricks/labs/ucx/source_code/graph.py @@ -4,7 +4,7 @@ import ast from dataclasses import dataclass from pathlib import Path -from collections.abc import Callable, Iterable +from collections.abc import Callable from databricks.labs.ucx.source_code.python_linter import ASTLinter, PythonLinter @@ -30,48 +30,35 @@ def dependency(self): def path(self): return self._dependency.path - def add_problems(self, problems: list[DependencyProblem]): - problems = [problem.replace(source_path=self.dependency.path) for problem in problems] - self._resolver.add_problems(problems) - - # TODO problem_collector is tactical, pending https://github.com/databrickslabs/ucx/issues/1559 - def register_notebook( - self, path: Path, problem_collector: Callable[[DependencyProblem], None] - ) -> DependencyGraph | None: - resolved = self._resolver.resolve_notebook(path, problem_collector) - if resolved is None: - return None - return self.register_dependency(resolved) - - # TODO problem_collector is tactical, pending https://github.com/databrickslabs/ucx/issues/1559 - def register_import( - self, name: str, problem_collector: Callable[[DependencyProblem], None] - ) -> DependencyGraph | None: - resolved = self._resolver.resolve_import(name, problem_collector) - if resolved is None: - return None - return self.register_dependency(resolved) - - def register_dependency(self, dependency: Dependency): - # TODO: return MaybeGraph + def register_notebook(self, path: Path) -> MaybeGraph: + maybe = self._resolver.resolve_notebook(path) + if not maybe.dependency: + return MaybeGraph(None, maybe.problems) + return self.register_dependency(maybe.dependency) + + def register_import(self, name: str) -> MaybeGraph: + maybe = self._resolver.resolve_import(name) + if not maybe.dependency: + return MaybeGraph(None, maybe.problems) + return self.register_dependency(maybe.dependency) + + def register_dependency(self, dependency: Dependency) -> MaybeGraph: # already registered ? - child_graph = self._locate_dependency(dependency) - if child_graph is not None: - self._dependencies[dependency] = child_graph - return child_graph + maybe = self.locate_dependency(dependency.path) + if maybe.graph is not None: + self._dependencies[dependency] = maybe.graph + return maybe # nay, create the child graph and populate it child_graph = DependencyGraph(dependency, self, self._resolver) self._dependencies[dependency] = child_graph container = dependency.load() if not container: - return None - container.build_dependency_graph(child_graph) - return child_graph - - def _locate_dependency(self, dependency: Dependency) -> DependencyGraph | None: - return self.locate_dependency(dependency.path) + problem = DependencyProblem('dependency-register-failed', 'Failed to register dependency', dependency.path) + return MaybeGraph(child_graph, [problem]) + problems = container.build_dependency_graph(child_graph) + return MaybeGraph(child_graph, problems) - def locate_dependency(self, path: Path) -> DependencyGraph | None: + def locate_dependency(self, path: Path) -> MaybeGraph: # need a list since unlike JS, Python won't let you assign closure variables found: list[DependencyGraph] = [] # TODO https://github.com/databrickslabs/ucx/issues/1287 @@ -89,7 +76,7 @@ def check_registered_dependency(graph): return False self.root.visit(check_registered_dependency, set()) - return found[0] if len(found) > 0 else None + return MaybeGraph(found[0] if found else None, []) @property def root(self): @@ -117,7 +104,7 @@ def all_paths(self) -> set[Path]: return {d.path for d in self.all_dependencies} # when visit_node returns True it interrupts the visit - def visit(self, visit_node: Callable[[DependencyGraph], bool | None], visited: set[Path]) -> bool | None: + def visit(self, visit_node: Callable[[DependencyGraph], bool | None], visited: set[Path]) -> bool: if self.path in visited: return False visited.add(self.path) @@ -128,24 +115,24 @@ def visit(self, visit_node: Callable[[DependencyGraph], bool | None], visited: s return True return False - def build_graph_from_python_source(self, python_code: str, problem_collector: Callable[[DependencyProblem], None]): + def build_graph_from_python_source(self, python_code: str) -> MaybeGraph: linter = ASTLinter.parse(python_code) calls = linter.locate(ast.Call, [("run", ast.Attribute), ("notebook", ast.Attribute), ("dbutils", ast.Name)]) + problems: list[DependencyProblem] = [] for call in calls: assert isinstance(call, ast.Call) path = PythonLinter.get_dbutils_notebook_run_path_arg(call) if isinstance(path, ast.Constant): path = path.value.strip().strip("'").strip('"') - call_problems: list[DependencyProblem] = [] - self.register_notebook(Path(path), call_problems.append) - for problem in call_problems: + maybe = self.register_notebook(Path(path)) + for problem in maybe.problems: problem = problem.replace( start_line=call.lineno, start_col=call.col_offset, end_line=call.end_lineno or 0, end_col=call.end_col_offset or 0, ) - problem_collector(problem) + problems.append(problem) else: problem = DependencyProblem( code='dependency-not-constant', @@ -155,19 +142,18 @@ def build_graph_from_python_source(self, python_code: str, problem_collector: Ca end_line=call.end_lineno or 0, end_col=call.end_col_offset or 0, ) - problem_collector(problem) - for pair in PythonLinter.list_import_sources(linter): - import_problems: list[DependencyProblem] = [] - self.register_import(pair[0], import_problems.append) - node = pair[1] - for problem in import_problems: + problems.append(problem) + for name, node in PythonLinter.list_import_sources(linter): + maybe = self.register_import(name) + for problem in maybe.problems: problem = problem.replace( start_line=node.lineno, start_col=node.col_offset, end_line=node.end_lineno or 0, end_col=node.end_col_offset or 0, ) - problem_collector(problem) + problems.append(problem) + return MaybeGraph(self, problems) class Dependency(abc.ABC): @@ -224,34 +210,26 @@ class BaseDependencyResolver(abc.ABC): def __init__(self, next_resolver: BaseDependencyResolver | None): self._next_resolver = next_resolver - self._problems: list[DependencyProblem] = [] @abc.abstractmethod def with_next_resolver(self, resolver: BaseDependencyResolver) -> BaseDependencyResolver: raise NotImplementedError() - @property - def problems(self): - return self._problems - - def add_problems(self, problems: list[DependencyProblem]): - self._problems.extend(problems) - @property def next_resolver(self): return self._next_resolver - def resolve_notebook(self, path: Path, problem_collector: Callable[[DependencyProblem], None]) -> Dependency | None: + def resolve_notebook(self, path: Path) -> MaybeDependency: assert self._next_resolver is not None - return self._next_resolver.resolve_notebook(path, problem_collector) + return self._next_resolver.resolve_notebook(path) def resolve_local_file(self, path: Path) -> MaybeDependency: assert self._next_resolver is not None return self._next_resolver.resolve_local_file(path) - def resolve_import(self, name: str, problem_collector: Callable[[DependencyProblem], None]) -> Dependency | None: + def resolve_import(self, name: str) -> MaybeDependency: assert self._next_resolver is not None - return self._next_resolver.resolve_import(name, problem_collector) + return self._next_resolver.resolve_import(name) class StubResolver(BaseDependencyResolver): @@ -262,14 +240,18 @@ def __init__(self): def with_next_resolver(self, resolver: BaseDependencyResolver) -> BaseDependencyResolver: raise NotImplementedError("Should never happen!") - def resolve_notebook(self, path: Path, problem_collector: Callable[[DependencyProblem], None]) -> Dependency | None: - return None + def resolve_notebook(self, path: Path) -> MaybeDependency: + return self._fail('notebook-not-found', f"Notebook not found: {path.as_posix()}") def resolve_local_file(self, path: Path) -> MaybeDependency: - return FailedDependency(path, 'file-not-found', f"File not found: {path.as_posix()}") + return self._fail('file-not-found', f"File not found: {path.as_posix()}") + + def resolve_import(self, name: str) -> MaybeDependency: + return self._fail('import-not-found', f"Could not locate import: {name}") - def resolve_import(self, name: str, problem_collector: Callable[[DependencyProblem], None]) -> Dependency | None: - return None + @staticmethod + def _fail(code: str, message: str): + return MaybeDependency(None, [DependencyProblem(code, message)]) @dataclass @@ -278,16 +260,6 @@ class MaybeDependency: problems: list[DependencyProblem] -class SomeDependency(MaybeDependency): - def __init__(self, dependency: Dependency): - super().__init__(dependency, []) - - -class FailedDependency(MaybeDependency): - def __init__(self, path: Path, code: str, message: str): - super().__init__(None, [DependencyProblem(code, message, path)]) - - class DependencyResolver: def __init__(self, resolvers: list[BaseDependencyResolver]): previous: BaseDependencyResolver = StubResolver() @@ -296,48 +268,14 @@ def __init__(self, resolvers: list[BaseDependencyResolver]): previous = resolver self._resolver: BaseDependencyResolver = previous - def resolve_notebook( - self, path: Path, problem_collector: Callable[[DependencyProblem], None] | None = None - ) -> Dependency | None: - problems: list[DependencyProblem] = [] - dependency = self._resolver.resolve_notebook(path, problems.append) - if dependency is None: - problem = DependencyProblem('notebook-not-found', f"Notebook not found: {path.as_posix()}") - problems.append(problem) - if problem_collector: - for problem in problems: - problem_collector(problem) - else: - self.add_problems(problems) - return dependency + def resolve_notebook(self, path: Path) -> MaybeDependency: + return self._resolver.resolve_notebook(path) def resolve_local_file(self, path: Path) -> MaybeDependency: return self._resolver.resolve_local_file(path) - def resolve_import( - self, name: str, problem_collector: Callable[[DependencyProblem], None] | None = None - ) -> Dependency | None: - problems: list[DependencyProblem] = [] - dependency = self._resolver.resolve_import(name, problems.append) - if dependency is None: - problem = DependencyProblem('import-not-found', f"Could not locate import: {name}") - problems.append(problem) - if problem_collector: - for problem in problems: - problem_collector(problem) - else: - self.add_problems(problems) - return dependency - - @property - def problems(self) -> Iterable[DependencyProblem]: - resolver = self._resolver - while resolver is not None: - yield from resolver.problems - resolver = resolver.next_resolver - - def add_problems(self, problems: list[DependencyProblem]): - self._resolver.add_problems(problems) + def resolve_import(self, name: str) -> MaybeDependency: + return self._resolver.resolve_import(name) MISSING_SOURCE_PATH = "" @@ -381,7 +319,7 @@ class MaybeGraph: @property def failed(self): - return self.graph is None + return len(self.problems) > 0 class DependencyGraphBuilder: @@ -389,28 +327,26 @@ class DependencyGraphBuilder: def __init__(self, resolver: DependencyResolver): self._resolver = resolver - @property - def problems(self): - return self._resolver.problems - def build_local_file_dependency_graph(self, path: Path) -> MaybeGraph: - resolution = self._resolver.resolve_local_file(path) - if not resolution.dependency: - return MaybeGraph(None, resolution.problems) - graph = DependencyGraph(resolution.dependency, None, self._resolver) - container = resolution.dependency.load() + maybe = self._resolver.resolve_local_file(path) + if not maybe.dependency: + return MaybeGraph(None, maybe.problems) + graph = DependencyGraph(maybe.dependency, None, self._resolver) + container = maybe.dependency.load() if container is not None: problems = container.build_dependency_graph(graph) if problems: return MaybeGraph(None, problems) return MaybeGraph(graph, []) - def build_notebook_dependency_graph(self, path: Path) -> DependencyGraph | None: - dependency = self._resolver.resolve_notebook(path) - if dependency is None: - return None - graph = DependencyGraph(dependency, None, self._resolver) - container = dependency.load() + def build_notebook_dependency_graph(self, path: Path) -> MaybeGraph: + maybe = self._resolver.resolve_notebook(path) + if not maybe.dependency: + return MaybeGraph(None, maybe.problems) + graph = DependencyGraph(maybe.dependency, None, self._resolver) + container = maybe.dependency.load() if container is not None: - container.build_dependency_graph(graph) - return graph + problems = container.build_dependency_graph(graph) + if problems: + return MaybeGraph(None, problems) + return MaybeGraph(graph, []) diff --git a/src/databricks/labs/ucx/source_code/notebooks/cells.py b/src/databricks/labs/ucx/source_code/notebooks/cells.py index 49d4f82d5b..bd3c3fec23 100644 --- a/src/databricks/labs/ucx/source_code/notebooks/cells.py +++ b/src/databricks/labs/ucx/source_code/notebooks/cells.py @@ -5,11 +5,9 @@ from ast import parse as parse_python from enum import Enum from pathlib import Path -from collections.abc import Callable from sqlglot import parse as parse_sql, ParseError as SQLParseError from databricks.sdk.service.workspace import Language -from databricks.labs.ucx.source_code.graph import DependencyGraph, DependencyProblem - +from databricks.labs.ucx.source_code.graph import DependencyGraph, DependencyProblem, MaybeGraph # use a specific logger for sqlglot warnings so we can disable them selectively sqlglot_logger = logging.getLogger(f"{__name__}.sqlglot") @@ -55,7 +53,7 @@ def is_runnable(self) -> bool: raise NotImplementedError() @abstractmethod - def build_dependency_graph(self, parent: DependencyGraph, problem_collector: Callable[[DependencyProblem], None]): + def build_dependency_graph(self, parent: DependencyGraph) -> MaybeGraph: raise NotImplementedError() @@ -72,8 +70,8 @@ def is_runnable(self) -> bool: except SyntaxError: return True - def build_dependency_graph(self, parent: DependencyGraph, problem_collector: Callable[[DependencyProblem], None]): - parent.build_graph_from_python_source(self._original_code, problem_collector) + def build_dependency_graph(self, parent: DependencyGraph) -> MaybeGraph: + return parent.build_graph_from_python_source(self._original_code) class RCell(Cell): @@ -85,8 +83,8 @@ def language(self): def is_runnable(self) -> bool: return True # TODO - def build_dependency_graph(self, parent: DependencyGraph, problem_collector: Callable[[DependencyProblem], None]): - pass # not in scope + def build_dependency_graph(self, parent: DependencyGraph) -> MaybeGraph: + return MaybeGraph(None, []) class ScalaCell(Cell): @@ -98,8 +96,8 @@ def language(self): def is_runnable(self) -> bool: return True # TODO - def build_dependency_graph(self, parent: DependencyGraph, problem_collector: Callable[[DependencyProblem], None]): - pass # TODO + def build_dependency_graph(self, parent: DependencyGraph) -> MaybeGraph: + return MaybeGraph(None, []) class SQLCell(Cell): @@ -116,8 +114,8 @@ def is_runnable(self) -> bool: sqlglot_logger.warning(f"Failed to parse SQL using 'sqlglot': {self._original_code}", exc_info=e) return True - def build_dependency_graph(self, parent: DependencyGraph, problem_collector: Callable[[DependencyProblem], None]): - pass # not in scope + def build_dependency_graph(self, parent: DependencyGraph) -> MaybeGraph: + return MaybeGraph(None, []) class MarkdownCell(Cell): @@ -129,8 +127,8 @@ def language(self): def is_runnable(self) -> bool: return True # TODO - def build_dependency_graph(self, parent: DependencyGraph, problem_collector: Callable[[DependencyProblem], None]): - pass # not in scope + def build_dependency_graph(self, parent: DependencyGraph) -> MaybeGraph: + return MaybeGraph(None, []) class RunCell(Cell): @@ -142,7 +140,7 @@ def language(self): def is_runnable(self) -> bool: return True # TODO - def build_dependency_graph(self, parent: DependencyGraph, problem_collector: Callable[[DependencyProblem], None]): + def build_dependency_graph(self, parent: DependencyGraph) -> MaybeGraph: command = f'{LANGUAGE_PREFIX}{self.language.magic_name}' lines = self._original_code.split('\n') for idx, line in enumerate(lines): @@ -152,15 +150,22 @@ def build_dependency_graph(self, parent: DependencyGraph, problem_collector: Cal path = path.strip().strip("'").strip('"') if len(path) == 0: continue - problems: list[DependencyProblem] = [] - parent.register_notebook(Path(path), problems.append) + maybe = parent.register_notebook(Path(path)) + if not maybe.problems: + return maybe start_line = self._original_offset + idx + 1 - for problem in problems: - problem = problem.replace( - start_line=start_line, start_col=0, end_line=start_line, end_col=len(line) - ) - problem_collector(problem) - return + return MaybeGraph( + maybe.graph, + [ + problem.replace( + start_line=start_line, + start_col=0, + end_line=start_line, + end_col=len(line), + ) + for problem in maybe.problems + ], + ) start_line = self._original_offset + 1 problem = DependencyProblem( 'invalid-run-cell', @@ -170,7 +175,7 @@ def build_dependency_graph(self, parent: DependencyGraph, problem_collector: Cal end_line=start_line, end_col=len(self._original_code), ) - problem_collector(problem) + return MaybeGraph(None, [problem]) def migrate_notebook_path(self): pass @@ -185,11 +190,8 @@ def language(self): def is_runnable(self) -> bool: return True # TODO - def build_dependency_graph(self, parent: DependencyGraph, problem_collector: Callable[[DependencyProblem], None]): - pass # nothing to do - - def migrate_notebook_path(self): - pass # nothing to do + def build_dependency_graph(self, parent: DependencyGraph) -> MaybeGraph: + return MaybeGraph(None, []) class PipCell(Cell): @@ -201,11 +203,8 @@ def language(self): def is_runnable(self) -> bool: return True # TODO - def build_dependency_graph(self, parent: DependencyGraph, problem_collector: Callable[[DependencyProblem], None]): - pass # nothing to do - - def migrate_notebook_path(self): - pass + def build_dependency_graph(self, parent: DependencyGraph) -> MaybeGraph: + return MaybeGraph(None, []) class CellLanguage(Enum): diff --git a/src/databricks/labs/ucx/source_code/notebooks/loaders.py b/src/databricks/labs/ucx/source_code/notebooks/loaders.py index 6ae700f6cd..2e12d8b8c2 100644 --- a/src/databricks/labs/ucx/source_code/notebooks/loaders.py +++ b/src/databricks/labs/ucx/source_code/notebooks/loaders.py @@ -2,7 +2,6 @@ import abc from pathlib import Path -from collections.abc import Callable from databricks.sdk import WorkspaceClient from databricks.sdk.service.workspace import ObjectType, ObjectInfo, ExportFormat @@ -10,10 +9,10 @@ from databricks.labs.ucx.source_code.files import FileLoader from databricks.labs.ucx.source_code.graph import ( BaseDependencyResolver, - DependencyProblem, Dependency, DependencyLoader, SourceContainer, + MaybeDependency, ) from databricks.labs.ucx.source_code.notebooks.sources import Notebook @@ -27,10 +26,11 @@ def __init__(self, notebook_loader: NotebookLoader, next_resolver: BaseDependenc def with_next_resolver(self, resolver: BaseDependencyResolver) -> BaseDependencyResolver: return NotebookResolver(self._notebook_loader, resolver) - def resolve_notebook(self, path: Path, problem_collector: Callable[[DependencyProblem], None]) -> Dependency | None: + def resolve_notebook(self, path: Path) -> MaybeDependency: if self._notebook_loader.is_notebook(path): - return Dependency(self._notebook_loader, path) - return super().resolve_notebook(path, problem_collector) + dependency = Dependency(self._notebook_loader, path) + return MaybeDependency(dependency, []) + return super().resolve_notebook(path) class NotebookLoader(DependencyLoader, abc.ABC): diff --git a/src/databricks/labs/ucx/source_code/notebooks/sources.py b/src/databricks/labs/ucx/source_code/notebooks/sources.py index fa95de803d..58f282df04 100644 --- a/src/databricks/labs/ucx/source_code/notebooks/sources.py +++ b/src/databricks/labs/ucx/source_code/notebooks/sources.py @@ -61,7 +61,9 @@ def to_migrated_code(self): def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProblem]: problems: list[DependencyProblem] = [] for cell in self._cells: - cell.build_dependency_graph(parent, problems.append) + maybe = cell.build_dependency_graph(parent) + if maybe.problems: + problems.extend(maybe.problems) return problems diff --git a/src/databricks/labs/ucx/source_code/site_packages.py b/src/databricks/labs/ucx/source_code/site_packages.py index 47c00e172b..6fbb75bc98 100644 --- a/src/databricks/labs/ucx/source_code/site_packages.py +++ b/src/databricks/labs/ucx/source_code/site_packages.py @@ -3,7 +3,6 @@ import os from dataclasses import dataclass from pathlib import Path -from collections.abc import Callable from databricks.labs.ucx.source_code.files import FileLoader, SysPathProvider from databricks.labs.ucx.source_code.graph import ( Dependency, @@ -12,6 +11,7 @@ DependencyGraph, BaseDependencyResolver, DependencyProblem, + MaybeDependency, ) @@ -32,12 +32,13 @@ def __init__( def with_next_resolver(self, resolver: BaseDependencyResolver) -> BaseDependencyResolver: return SitePackagesResolver(self._site_packages, self._file_loader, self._syspath_provider, resolver) - def resolve_import(self, name: str, problem_collector: Callable[[DependencyProblem], None]) -> Dependency | None: + def resolve_import(self, name: str) -> MaybeDependency: site_package = self._site_packages[name] if site_package is not None: container = SitePackageContainer(self._file_loader, site_package) - return Dependency(WrappingLoader(container), Path(name)) - return super().resolve_import(name, problem_collector) + dependency = Dependency(WrappingLoader(container), Path(name)) + return MaybeDependency(dependency, []) + return super().resolve_import(name) class SitePackageContainer(SourceContainer): @@ -49,7 +50,9 @@ def __init__(self, file_loader: FileLoader, site_package: SitePackage): def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProblem]: problems: list[DependencyProblem] = [] for module_path in self._site_package.module_paths: - parent.register_dependency(Dependency(self._file_loader, module_path)) + maybe = parent.register_dependency(Dependency(self._file_loader, module_path)) + if maybe.problems: + problems.extend(maybe.problems) return problems diff --git a/src/databricks/labs/ucx/source_code/whitelist.py b/src/databricks/labs/ucx/source_code/whitelist.py index 64e10d6d1a..ceca61510e 100644 --- a/src/databricks/labs/ucx/source_code/whitelist.py +++ b/src/databricks/labs/ucx/source_code/whitelist.py @@ -5,7 +5,7 @@ from dataclasses import dataclass, field from enum import Enum from pathlib import Path -from collections.abc import Callable, Iterable +from collections.abc import Iterable from yaml import load_all as load_yaml, Loader from databricks.labs.ucx.source_code.graph import ( @@ -15,6 +15,7 @@ DependencyGraph, BaseDependencyResolver, DependencyProblem, + MaybeDependency, ) @@ -28,30 +29,31 @@ def with_next_resolver(self, resolver: BaseDependencyResolver) -> BaseDependency return WhitelistResolver(self._whitelist, resolver) # TODO problem_collector is tactical, pending https://github.com/databrickslabs/ucx/issues/1559 - def resolve_import(self, name: str, problem_collector: Callable[[DependencyProblem], None]) -> Dependency | None: - if self._is_whitelisted(name): + def resolve_import(self, name: str) -> MaybeDependency: + problem, ok = self._is_whitelisted(name) + if problem is not None: + return MaybeDependency(None, [problem]) + if ok: container = StubContainer() - return Dependency(WrappingLoader(container), Path(name)) - return super().resolve_import(name, problem_collector) + dependency = Dependency(WrappingLoader(container), Path(name)) + return MaybeDependency(dependency, []) + return super().resolve_import(name) - def _is_whitelisted(self, name: str) -> bool: + def _is_whitelisted(self, name: str) -> tuple[DependencyProblem | None, bool]: compatibility = self._whitelist.compatibility(name) # TODO attach compatibility to dependency, see https://github.com/databrickslabs/ucx/issues/1382 if compatibility is None: - return False + return None, False if compatibility == UCCompatibility.NONE: # TODO move to linter, see https://github.com/databrickslabs/ucx/issues/1527 - self._problems.append( + return ( DependencyProblem( code="dependency-check", message=f"Use of dependency {name} is deprecated", - start_line=0, - start_col=0, - end_line=0, - end_col=0, - ) + ), + True, ) - return True + return None, True class StubContainer(SourceContainer): diff --git a/tests/unit/source_code/test_dependencies.py b/tests/unit/source_code/test_dependencies.py index ef642d2d4a..a1bab3f211 100644 --- a/tests/unit/source_code/test_dependencies.py +++ b/tests/unit/source_code/test_dependencies.py @@ -136,8 +136,8 @@ def get_status_side_effect(*args): ] ) builder = DependencyGraphBuilder(dependency_resolver) - builder.build_notebook_dependency_graph(Path("root1.run.py.txt")) - assert list(builder.problems) == [ + maybe = builder.build_notebook_dependency_graph(Path("root1.run.py.txt")) + assert list(maybe.problems) == [ DependencyProblem( 'notebook-not-found', 'Notebook not found: leaf2.py.txt', Path("root1.run.py.txt"), 19, 0, 19, 21 ) @@ -172,8 +172,8 @@ def is_file_side_effect(*args): ] ) builder = DependencyGraphBuilder(dependency_resolver) - builder.build_notebook_dependency_graph(Path(paths[0])) - assert list(builder.problems) == [ + maybe = builder.build_notebook_dependency_graph(Path(paths[0])) + assert list(maybe.problems) == [ DependencyProblem('notebook-not-found', 'Notebook not found: leaf3.py.txt', Path(paths[0]), 1, 0, 1, 38) ] @@ -206,8 +206,8 @@ def is_file_side_effect(*args): ] ) builder = DependencyGraphBuilder(dependency_resolver) - builder.build_notebook_dependency_graph(Path(paths[0])) - assert list(builder.problems) == [ + maybe = builder.build_notebook_dependency_graph(Path(paths[0])) + assert list(maybe.problems) == [ DependencyProblem( 'dependency-not-constant', "Can't check dependency not provided as a constant", Path(paths[0]), 2, 0, 2, 35 ) @@ -237,8 +237,8 @@ def test_dependency_graph_builder_raises_problem_with_invalid_run_cell(): ] ) builder = DependencyGraphBuilder(dependency_resolver) - builder.build_notebook_dependency_graph(Path(paths[0])) - assert list(builder.problems) == [ + maybe = builder.build_notebook_dependency_graph(Path(paths[0])) + assert list(maybe.problems) == [ DependencyProblem('invalid-run-cell', 'Missing notebook path in %run command', Path("leaf6.py.txt"), 5, 0, 5, 4) ] @@ -283,8 +283,8 @@ def test_dependency_graph_builder_raises_problem_with_unresolved_import(): ] ) builder = DependencyGraphBuilder(dependency_resolver) - builder.build_local_file_dependency_graph(Path(paths[0])) - assert list(builder.problems) == [ + maybe = builder.build_local_file_dependency_graph(Path(paths[0])) + assert list(maybe.problems) == [ DependencyProblem( 'import-not-found', 'Could not locate import: some_library', Path("root7.py.txt"), 1, 0, 1, 19 ) @@ -309,8 +309,8 @@ def test_dependency_graph_builder_raises_problem_with_non_constant_notebook_argu ] ) builder = DependencyGraphBuilder(dependency_resolver) - builder.build_local_file_dependency_graph(Path(paths[0])) - assert list(builder.problems) == [ + maybe = builder.build_local_file_dependency_graph(Path(paths[0])) + assert list(maybe.problems) == [ DependencyProblem( 'dependency-not-constant', "Can't check dependency not provided as a constant", @@ -365,12 +365,12 @@ def test_dependency_graph_builder_skips_builtin_dependencies(): maybe = builder.build_local_file_dependency_graph(Path("python_builtins.py.txt")) assert not maybe.failed graph = maybe.graph - child = graph.locate_dependency(Path("os")) - assert child - assert len(child.local_dependencies) == 0 - child = graph.locate_dependency(Path("pathlib")) - assert child - assert len(child.local_dependencies) == 0 + maybe = graph.locate_dependency(Path("os")) + assert not maybe.failed + assert len(maybe.graph.local_dependencies) == 0 + maybe = graph.locate_dependency(Path("pathlib")) + assert not maybe.failed + assert len(maybe.graph.local_dependencies) == 0 def test_dependency_graph_builder_ignores_known_dependencies(): @@ -394,7 +394,8 @@ def test_dependency_graph_builder_ignores_known_dependencies(): maybe = builder.build_local_file_dependency_graph(Path("python_builtins.py.txt")) assert not maybe.failed graph = maybe.graph - assert not graph.locate_dependency(Path("databricks")) + maybe_graph = graph.locate_dependency(Path("databricks")) + assert maybe_graph.failed def test_dependency_graph_builder_visits_site_packages(empty_index): @@ -439,8 +440,8 @@ def test_dependency_graph_builder_raises_problem_with_unfound_root_file(empty_in ] ) builder = DependencyGraphBuilder(dependency_resolver) - builder.build_local_file_dependency_graph(Path("root1.run.py.txt")) - assert list(builder.problems) == [DependencyProblem('file-not-found', 'File not found: root1.run.py.txt')] + maybe = builder.build_local_file_dependency_graph(Path("root1.run.py.txt")) + assert list(maybe.problems) == [DependencyProblem('file-not-found', 'File not found: root1.run.py.txt')] file_loader.load_dependency.assert_not_called() @@ -464,6 +465,6 @@ def test_dependency_graph_builder_raises_problem_with_unfound_root_notebook(empt ] ) builder = DependencyGraphBuilder(dependency_resolver) - builder.build_notebook_dependency_graph(Path("root2.run.py.txt")) - assert list(builder.problems) == [DependencyProblem('notebook-not-found', 'Notebook not found: root2.run.py.txt')] + maybe = builder.build_notebook_dependency_graph(Path("root2.run.py.txt")) + assert list(maybe.problems) == [DependencyProblem('notebook-not-found', 'Notebook not found: root2.run.py.txt')] file_loader.load_dependency.assert_not_called() diff --git a/tests/unit/source_code/test_notebook.py b/tests/unit/source_code/test_notebook.py index ac6c0868b0..e03de6e05a 100644 --- a/tests/unit/source_code/test_notebook.py +++ b/tests/unit/source_code/test_notebook.py @@ -139,9 +139,9 @@ def test_notebook_builds_leaf_dependency_graph(): NotebookResolver(notebook_loader), ] ) - dependency = dependency_resolver.resolve_notebook(Path(paths[0])) - graph = DependencyGraph(dependency, None, dependency_resolver) - container = dependency.load() + maybe = dependency_resolver.resolve_notebook(Path(paths[0])) + graph = DependencyGraph(maybe.dependency, None, dependency_resolver) + container = maybe.dependency.load() container.build_dependency_graph(graph) assert {str(path) for path in graph.all_paths} == {"leaf1.py.txt"} @@ -163,9 +163,9 @@ def test_notebook_builds_depth1_dependency_graph(): NotebookResolver(notebook_loader), ] ) - dependency = dependency_resolver.resolve_notebook(Path(paths[0])) - graph = DependencyGraph(dependency, None, dependency_resolver) - container = dependency.load() + maybe = dependency_resolver.resolve_notebook(Path(paths[0])) + graph = DependencyGraph(maybe.dependency, None, dependency_resolver) + container = maybe.dependency.load() container.build_dependency_graph(graph) actual = {path[2:] if path.startswith('./') else path for path in (str(path) for path in graph.all_paths)} assert actual == set(paths) @@ -183,9 +183,9 @@ def test_notebook_builds_depth2_dependency_graph(): NotebookResolver(notebook_loader), ] ) - dependency = dependency_resolver.resolve_notebook(Path(paths[0])) - graph = DependencyGraph(dependency, None, dependency_resolver) - container = dependency.load() + maybe = dependency_resolver.resolve_notebook(Path(paths[0])) + graph = DependencyGraph(maybe.dependency, None, dependency_resolver) + container = maybe.dependency.load() container.build_dependency_graph(graph) actual = {path[2:] if path.startswith('./') else path for path in (str(path) for path in graph.all_paths)} assert actual == set(paths) @@ -204,9 +204,9 @@ def test_notebook_builds_dependency_graph_avoiding_duplicates(): NotebookResolver(notebook_loader), ] ) - dependency = dependency_resolver.resolve_notebook(Path(paths[0])) - graph = DependencyGraph(dependency, None, dependency_resolver) - container = dependency.load() + maybe = dependency_resolver.resolve_notebook(Path(paths[0])) + graph = DependencyGraph(maybe.dependency, None, dependency_resolver) + container = maybe.dependency.load() container.build_dependency_graph(graph) # if visited once only, set and list will have same len assert len(set(visited)) == len(visited) @@ -225,9 +225,9 @@ def test_notebook_builds_cyclical_dependency_graph(): NotebookResolver(notebook_loader), ] ) - dependency = dependency_resolver.resolve_notebook(Path(paths[0])) - graph = DependencyGraph(dependency, None, dependency_resolver) - container = dependency.load() + maybe = dependency_resolver.resolve_notebook(Path(paths[0])) + graph = DependencyGraph(maybe.dependency, None, dependency_resolver) + container = maybe.dependency.load() container.build_dependency_graph(graph) actual = {path[2:] if path.startswith('./') else path for path in (str(path) for path in graph.all_paths)} assert actual == set(paths) @@ -245,9 +245,9 @@ def test_notebook_builds_python_dependency_graph(): NotebookResolver(notebook_loader), ] ) - dependency = dependency_resolver.resolve_notebook(Path(paths[0])) - graph = DependencyGraph(dependency, None, dependency_resolver) - container = dependency.load() + maybe = dependency_resolver.resolve_notebook(Path(paths[0])) + graph = DependencyGraph(maybe.dependency, None, dependency_resolver) + container = maybe.dependency.load() container.build_dependency_graph(graph) actual = {path[2:] if path.startswith('./') else path for path in (str(path) for path in graph.all_paths)} assert actual == set(paths) diff --git a/tests/unit/source_code/test_s3fs.py b/tests/unit/source_code/test_s3fs.py index e101675e0c..4e0b2d31ee 100644 --- a/tests/unit/source_code/test_s3fs.py +++ b/tests/unit/source_code/test_s3fs.py @@ -28,10 +28,11 @@ DependencyProblem( code='dependency-check', message=S3FS_DEPRECATION_MESSAGE, - start_line=0, + source_path=Path('path.py.txt'), + start_line=1, start_col=0, - end_line=0, - end_col=0, + end_line=1, + end_col=11, ) ], ), @@ -41,10 +42,11 @@ DependencyProblem( code='dependency-check', message=S3FS_DEPRECATION_MESSAGE, - start_line=0, + source_path=Path('path.py.txt'), + start_line=1, start_col=0, - end_line=0, - end_col=0, + end_line=1, + end_col=26, ) ], ), @@ -56,10 +58,11 @@ DependencyProblem( code='dependency-check', message=S3FS_DEPRECATION_MESSAGE, - start_line=0, + source_path=Path('path.py.txt'), + start_line=1, start_col=0, - end_line=0, - end_col=0, + end_line=1, + end_col=18, ) ], ), @@ -70,10 +73,11 @@ DependencyProblem( code='dependency-check', message=S3FS_DEPRECATION_MESSAGE, - start_line=0, - start_col=0, - end_line=0, - end_col=0, + source_path=Path('path.py.txt'), + start_line=2, + start_col=4, + end_line=2, + end_col=15, ) ], ), @@ -83,10 +87,11 @@ DependencyProblem( code='dependency-check', message=S3FS_DEPRECATION_MESSAGE, - start_line=0, + source_path=Path('path.py.txt'), + start_line=1, start_col=0, - end_line=0, - end_col=0, + end_line=1, + end_col=16, ) ], ), @@ -96,10 +101,11 @@ DependencyProblem( code='dependency-check', message='Use of dependency s3fs.subpackage is deprecated', - start_line=0, + source_path=Path('path.py.txt'), + start_line=1, start_col=0, - end_line=0, - end_col=0, + end_line=1, + end_col=37, ) ], ), @@ -118,9 +124,8 @@ def test_detect_s3fs_import(empty_index, source: str, expected: list[DependencyP ] ) builder = DependencyGraphBuilder(dependency_resolver) - builder.build_local_file_dependency_graph(Path("path")) - problems: list[DependencyProblem] = list(dependency_resolver.problems) - assert problems == expected + maybe = builder.build_local_file_dependency_graph(Path("path")) + assert maybe.problems == expected @pytest.mark.parametrize( @@ -130,10 +135,11 @@ def test_detect_s3fs_import(empty_index, source: str, expected: list[DependencyP DependencyProblem( code='dependency-check', message='Use of dependency s3fs is deprecated', - start_line=0, + source_path=Path('root9.py.txt'), + start_line=1, start_col=0, - end_line=0, - end_col=0, + end_line=1, + end_col=12, ), ], ), @@ -151,6 +157,5 @@ def test_detect_s3fs_import_in_dependencies(empty_index, expected: list[Dependen ] ) builder = DependencyGraphBuilder(dependency_resolver) - builder.build_local_file_dependency_graph(Path("root9.py.txt")) - problems: list[DependencyProblem] = list(dependency_resolver.problems) - assert problems == expected + maybe = builder.build_local_file_dependency_graph(Path("root9.py.txt")) + assert maybe.problems == expected