Skip to content

Commit

Permalink
Added workflow linter
Browse files Browse the repository at this point in the history
  • Loading branch information
nfx committed May 3, 2024
1 parent 27835b1 commit a3f4c52
Show file tree
Hide file tree
Showing 7 changed files with 86 additions and 53 deletions.
24 changes: 11 additions & 13 deletions src/databricks/labs/ucx/source_code/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
DependencyLoader,
Dependency,
BaseDependencyResolver,
MaybeDependency,
)

logger = logging.getLogger(__name__)
Expand All @@ -31,12 +32,13 @@ def __init__(self, path: Path, source: str, language: Language):
# using CellLanguage so we can reuse the facilities it provides
self._language = CellLanguage.of_language(language)

def build_dependency_graph(self, parent: DependencyGraph) -> None:
def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProblem]:
if self._language is not CellLanguage.PYTHON:
logger.warning(f"Unsupported language: {self._language.language}")
return
return []
# TODO replace the below with parent.build_graph_from_python_source
# can only be done after https://github.com/databrickslabs/ucx/issues/1287
problems: list[DependencyProblem] = []
linter = ASTLinter.parse(self._original_code)
run_notebook_calls = PythonLinter.list_dbutils_notebook_run_calls(linter)
for call in run_notebook_calls:
Expand All @@ -45,7 +47,7 @@ def build_dependency_graph(self, parent: DependencyGraph) -> None:
if isinstance(notebook_path_arg, ast.Constant):
notebook_path = notebook_path_arg.value
parent.register_notebook(Path(notebook_path), call_problems.append)
call_problems = [
problems += [
problem.replace(
source_path=self._path,
start_line=call.lineno,
Expand All @@ -55,7 +57,6 @@ def build_dependency_graph(self, parent: DependencyGraph) -> None:
)
for problem in call_problems
]
parent.add_problems(call_problems)
continue
problem = DependencyProblem(
code='dependency-not-constant',
Expand All @@ -66,14 +67,14 @@ def build_dependency_graph(self, parent: DependencyGraph) -> None:
end_line=call.end_lineno or 0,
end_col=call.end_col_offset or 0,
)
parent.add_problems([problem])
problems.append(problem)
# TODO https://github.com/databrickslabs/ucx/issues/1287
for pair in PythonLinter.list_import_sources(linter):
import_name = pair[0]
import_problems: list[DependencyProblem] = []
parent.register_import(import_name, import_problems.append)
node = pair[1]
import_problems = [
problems += [
problem.replace(
source_path=self._path,
start_line=node.lineno,
Expand All @@ -83,7 +84,7 @@ def build_dependency_graph(self, parent: DependencyGraph) -> None:
)
for problem in import_problems
]
parent.add_problems(import_problems)
return problems


class LocalFileMigrator:
Expand Down Expand Up @@ -176,13 +177,10 @@ def __init__(self, file_loader: FileLoader, next_resolver: BaseDependencyResolve
def with_next_resolver(self, resolver: BaseDependencyResolver) -> BaseDependencyResolver:
return LocalFileResolver(self._file_loader, resolver)

# TODO problem_collector is tactical, pending https://github.com/databrickslabs/ucx/issues/1559
def resolve_local_file(
self, path: Path, problem_collector: Callable[[DependencyProblem], None]
) -> Dependency | None:
def resolve_local_file(self, path: Path) -> MaybeDependency:
if self._file_loader.exists(path) and not self._file_loader.is_notebook(path):
return Dependency(self._file_loader, path)
return super().resolve_local_file(path, problem_collector)
return MaybeDependency(Dependency(self._file_loader, path), [])
return super().resolve_local_file(path)

def resolve_import(self, name: str, problem_collector: Callable[[DependencyProblem], None]) -> Dependency | None:
fullpath = self._file_loader.full_path(Path(f"{name}.py"))
Expand Down
75 changes: 44 additions & 31 deletions src/databricks/labs/ucx/source_code/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def register_import(
return self.register_dependency(resolved)

def register_dependency(self, dependency: Dependency):
# TODO: return MaybeGraph
# already registered ?
child_graph = self._locate_dependency(dependency)
if child_graph is not None:
Expand Down Expand Up @@ -192,7 +193,7 @@ def load(self) -> SourceContainer | None:
class SourceContainer(abc.ABC):

@abc.abstractmethod
def build_dependency_graph(self, parent: DependencyGraph) -> None:
def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProblem]:
raise NotImplementedError()


Expand Down Expand Up @@ -244,11 +245,9 @@ def resolve_notebook(self, path: Path, problem_collector: Callable[[DependencyPr
assert self._next_resolver is not None
return self._next_resolver.resolve_notebook(path, problem_collector)

def resolve_local_file(
self, path: Path, problem_collector: Callable[[DependencyProblem], None]
) -> Dependency | None:
def resolve_local_file(self, path: Path) -> MaybeDependency:
assert self._next_resolver is not None
return self._next_resolver.resolve_local_file(path, problem_collector)
return self._next_resolver.resolve_local_file(path)

def resolve_import(self, name: str, problem_collector: Callable[[DependencyProblem], None]) -> Dependency | None:
assert self._next_resolver is not None
Expand All @@ -266,15 +265,29 @@ def with_next_resolver(self, resolver: BaseDependencyResolver) -> BaseDependency
def resolve_notebook(self, path: Path, problem_collector: Callable[[DependencyProblem], None]) -> Dependency | None:
return None

def resolve_local_file(
self, path: Path, problem_collector: Callable[[DependencyProblem], None]
) -> Dependency | None:
return None
def resolve_local_file(self, path: Path) -> MaybeDependency:
return FailedDependency(path, 'file-not-found', f"File not found: {path.as_posix()}")

def resolve_import(self, name: str, problem_collector: Callable[[DependencyProblem], None]) -> Dependency | None:
return None


@dataclass
class MaybeDependency:
dependency: Dependency | None
problems: list[DependencyProblem]


class SomeDependency(MaybeDependency):
def __init__(self, dependency: Dependency):
super().__init__(dependency, [])


class FailedDependency(MaybeDependency):
def __init__(self, path: Path, code: str, message: str):
super().__init__(None, [DependencyProblem(code, message, path)])


class DependencyResolver:
def __init__(self, resolvers: list[BaseDependencyResolver]):
previous: BaseDependencyResolver = StubResolver()
Expand All @@ -298,20 +311,8 @@ def resolve_notebook(
self.add_problems(problems)
return dependency

def resolve_local_file(
self, path: Path, problem_collector: Callable[[DependencyProblem], None] | None = None
) -> Dependency | None:
problems: list[DependencyProblem] = []
dependency = self._resolver.resolve_local_file(path, problems.append)
if dependency is None:
problem = DependencyProblem('file-not-found', f"File not found: {path.as_posix()}")
problems.append(problem)
if problem_collector:
for problem in problems:
problem_collector(problem)
else:
self.add_problems(problems)
return dependency
def resolve_local_file(self, path: Path) -> MaybeDependency:
return self._resolver.resolve_local_file(path)

def resolve_import(
self, name: str, problem_collector: Callable[[DependencyProblem], None] | None = None
Expand Down Expand Up @@ -373,6 +374,16 @@ def replace(
)


@dataclass
class MaybeGraph:
graph: DependencyGraph | None
problems: list[DependencyProblem]

@property
def failed(self):
return self.graph is None


class DependencyGraphBuilder:

def __init__(self, resolver: DependencyResolver):
Expand All @@ -382,15 +393,17 @@ def __init__(self, resolver: DependencyResolver):
def problems(self):
return self._resolver.problems

def build_local_file_dependency_graph(self, path: Path) -> DependencyGraph | None:
dependency = self._resolver.resolve_local_file(path)
if dependency is None:
return None
graph = DependencyGraph(dependency, None, self._resolver)
container = dependency.load()
def build_local_file_dependency_graph(self, path: Path) -> MaybeGraph:
resolution = self._resolver.resolve_local_file(path)
if not resolution.dependency:
return MaybeGraph(None, resolution.problems)
graph = DependencyGraph(resolution.dependency, None, self._resolver)
container = resolution.dependency.load()
if container is not None:
container.build_dependency_graph(graph)
return graph
problems = container.build_dependency_graph(graph)
if problems:
return MaybeGraph(None, problems)
return MaybeGraph(graph, [])

def build_notebook_dependency_graph(self, path: Path) -> DependencyGraph | None:
dependency = self._resolver.resolve_notebook(path)
Expand Down
16 changes: 15 additions & 1 deletion src/databricks/labs/ucx/source_code/jobs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,20 @@
from databricks.sdk import WorkspaceClient

from databricks.labs.ucx.source_code.graph import DependencyResolver
from databricks.labs.ucx.source_code.notebooks.loaders import NotebookLoader


class WorkflowLinter:
def __init__(self, ws: WorkspaceClient):
def __init__(self, ws: WorkspaceClient, dependency_resolver: DependencyResolver):
self._ws = ws
self._dependency_resolver = dependency_resolver

def lint(self, job_id: int):
job = self._ws.jobs.get(job_id)
for task in job.settings.tasks:
if task.notebook_task:
self._lint_notebook(job_id)

notebook = NotebookLoader(self._ws).load(job_id)
graph = self._dependency_graph_builder.build(notebook)
return graph.lint()
4 changes: 2 additions & 2 deletions src/databricks/labs/ucx/source_code/notebooks/sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,11 @@ def to_migrated_code(self):
sources.append('') # following join will append lf
return '\n'.join(sources)

def build_dependency_graph(self, parent: DependencyGraph) -> None:
def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProblem]:
problems: list[DependencyProblem] = []
for cell in self._cells:
cell.build_dependency_graph(parent, problems.append)
parent.add_problems(problems)
return problems


class NotebookLinter:
Expand Down
4 changes: 3 additions & 1 deletion src/databricks/labs/ucx/source_code/site_packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,11 @@ def __init__(self, file_loader: FileLoader, site_package: SitePackage):
self._file_loader = file_loader
self._site_package = site_package

def build_dependency_graph(self, parent: DependencyGraph) -> None:
def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProblem]:
problems: list[DependencyProblem] = []
for module_path in self._site_package.module_paths:
parent.register_dependency(Dependency(self._file_loader, module_path))
return problems


class SitePackages:
Expand Down
4 changes: 2 additions & 2 deletions src/databricks/labs/ucx/source_code/whitelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ def _is_whitelisted(self, name: str) -> bool:

class StubContainer(SourceContainer):

def build_dependency_graph(self, parent: DependencyGraph) -> None:
pass
def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProblem]:
return []


class UCCompatibility(Enum):
Expand Down
12 changes: 9 additions & 3 deletions tests/unit/source_code/test_dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,9 @@ def test_dependency_graph_builder_skips_builtin_dependencies():
]
)
builder = DependencyGraphBuilder(dependency_resolver)
graph = builder.build_local_file_dependency_graph(Path("python_builtins.py.txt"))
maybe = builder.build_local_file_dependency_graph(Path("python_builtins.py.txt"))
assert not maybe.failed
graph = maybe.graph
child = graph.locate_dependency(Path("os"))
assert child
assert len(child.local_dependencies) == 0
Expand All @@ -389,7 +391,9 @@ def test_dependency_graph_builder_ignores_known_dependencies():
]
)
builder = DependencyGraphBuilder(dependency_resolver)
graph = builder.build_local_file_dependency_graph(Path("python_builtins.py.txt"))
maybe = builder.build_local_file_dependency_graph(Path("python_builtins.py.txt"))
assert not maybe.failed
graph = maybe.graph
assert not graph.locate_dependency(Path("databricks"))


Expand All @@ -412,7 +416,9 @@ def test_dependency_graph_builder_visits_site_packages(empty_index):
]
)
builder = DependencyGraphBuilder(dependency_resolver)
graph = builder.build_local_file_dependency_graph(Path("import-site-package.py.txt"))
maybe = builder.build_local_file_dependency_graph(Path("import-site-package.py.txt"))
assert not maybe.failed
graph = maybe.graph
assert graph.locate_dependency(Path(site_packages_path, "certifi/core.py"))
assert not graph.locate_dependency(Path("core.py"))
assert not graph.locate_dependency(Path("core"))
Expand Down

0 comments on commit a3f4c52

Please sign in to comment.