Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Infer values from child notebook loaded via dbutils.notebook.run() call #2083

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
7a3170f
append statements
ericvergnaud Jul 1, 2024
e56dda6
reuse python linters across cells
ericvergnaud Jul 1, 2024
fa9fb00
resolve cyclic import
ericvergnaud Jul 1, 2024
c8d05b0
fix merge issue
ericvergnaud Jul 2, 2024
f364b02
Merge branch 'main' into infer-values-across-notebook-cells
ericvergnaud Jul 3, 2024
c3cbc7b
successfully infers values from child notebook
ericvergnaud Jul 3, 2024
56e3530
successfully infers values from child notebook in run cell
ericvergnaud Jul 3, 2024
d9d3172
formatting
ericvergnaud Jul 3, 2024
1e5feff
formatting
ericvergnaud Jul 3, 2024
257fea6
PythonSequentialLinter is stateful so instantiate it when required
ericvergnaud Jul 4, 2024
dacf50a
Merge branch 'main' into infer-values-across-notebook-cells
ericvergnaud Jul 4, 2024
65072d6
Merge branch 'infer-values-across-notebook-cells' into infer-values-f…
ericvergnaud Jul 4, 2024
d244a5a
more tests
ericvergnaud Jul 3, 2024
1e9ed6a
peogress
ericvergnaud Jul 4, 2024
d6d9032
infer values from child notebooks loaded via dbutils.notebook.run
ericvergnaud Jul 4, 2024
d8089f0
fix crasher
ericvergnaud Jul 4, 2024
6c7f715
fix crasher
ericvergnaud Jul 4, 2024
e00d9fd
rename method
ericvergnaud Jul 8, 2024
14943fb
Merge branch 'main' into infer-values-from-child-notebook-in-dbutils-…
ericvergnaud Jul 8, 2024
e4fd7be
Merge branch 'main' into infer-values-from-child-notebook-in-dbutils-…
ericvergnaud Jul 9, 2024
fb358bf
Merge branch 'main' into infer-values-from-child-notebook-in-dbutils-…
ericvergnaud Jul 9, 2024
a6ac282
Merge branch 'main' into infer-values-from-child-notebook-in-dbutils-…
ericvergnaud Jul 10, 2024
5a8512d
Merge branch 'main' into infer-values-from-child-notebook-in-dbutils-…
ericvergnaud Jul 10, 2024
bea4042
address comments
ericvergnaud Jul 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 28 additions & 9 deletions src/databricks/labs/ucx/source_code/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from dataclasses import dataclass
from pathlib import Path

from astroid import AstroidSyntaxError, NodeNG # type: ignore
from astroid import AstroidSyntaxError, Module, NodeNG # type: ignore

from databricks.sdk.service import compute

Expand Down Expand Up @@ -205,23 +205,42 @@ def __init__(self, linters: list[PythonLinter]):

def lint(self, code: str) -> Iterable[Advice]:
try:
tree = Tree.normalize_and_parse(code)
if self._tree is None:
self._tree = tree
else:
tree = self._tree.append_statements(tree)
for linter in self._linters:
yield from linter.lint_tree(tree)
tree = self._parse_and_append(code)
yield from self.lint_tree(tree)
except AstroidSyntaxError as e:
yield Failure('syntax-error', str(e), 0, 0, 0, 0)

def lint_tree(self, tree: Tree) -> Iterable[Advice]:
for linter in self._linters:
yield from linter.lint_tree(tree)

def _parse_and_append(self, code: str) -> Tree:
tree = Tree.normalize_and_parse(code)
self.append_tree(tree)
return tree

def append_tree(self, tree: Tree):
if self._tree is None:
self._tree = Tree(Module("root"))
self._tree.append_tree(tree)

def append_nodes(self, nodes: list[NodeNG]):
if self._tree is None:
self._tree = Tree(Module("root"))
self._tree.append_nodes(nodes)

def append_globals(self, globs: dict):
if self._tree is None:
self._tree = Tree(Module("root"))
self._tree.append_globals(globs)

def process_child_cell(self, code: str):
try:
tree = Tree.normalize_and_parse(code)
if self._tree is None:
self._tree = tree
else:
self._tree.append_statements(tree)
self._tree.append_tree(tree)
except AstroidSyntaxError as e:
# error already reported when linting enclosing notebook
logger.warning(f"Failed to parse Python cell: {code}", exc_info=e)
12 changes: 6 additions & 6 deletions src/databricks/labs/ucx/source_code/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,16 +381,16 @@ def _lint_task(self, task: jobs.Task, job: jobs.Job):
if not container:
continue
if isinstance(container, Notebook):
yield from self._lint_notebook(container, ctx)
yield from self._lint_notebook(container, ctx, session_state)
if isinstance(container, LocalFile):
yield from self._lint_file(container, ctx)
yield from self._lint_file(container, ctx, session_state)

def _lint_file(self, file: LocalFile, ctx: LinterContext):
linter = FileLinter(ctx, self._path_lookup, file.path)
def _lint_file(self, file: LocalFile, ctx: LinterContext, session_state: CurrentSessionState):
linter = FileLinter(ctx, self._path_lookup, session_state, file.path)
for advice in linter.lint():
yield file.path, advice

def _lint_notebook(self, notebook: Notebook, ctx: LinterContext):
linter = NotebookLinter(ctx, self._path_lookup, notebook)
def _lint_notebook(self, notebook: Notebook, ctx: LinterContext, session_state: CurrentSessionState):
linter = NotebookLinter(ctx, self._path_lookup, session_state, notebook)
for advice in linter.lint():
yield notebook.path, advice
6 changes: 4 additions & 2 deletions src/databricks/labs/ucx/source_code/known.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from databricks.labs.blueprint.entrypoint import get_logger

from databricks.labs.ucx.hive_metastore.migration_status import MigrationIndex
from databricks.labs.ucx.source_code.base import CurrentSessionState
from databricks.labs.ucx.source_code.graph import DependencyProblem
from databricks.labs.ucx.source_code.linters.context import LinterContext
from databricks.labs.ucx.source_code.notebooks.sources import FileLinter
Expand Down Expand Up @@ -172,8 +173,9 @@ def _analyze_file(cls, known_distributions, library_root, dist_info, module_path
if module_ref.endswith(suffix):
module_ref = module_ref[: -len(suffix)]
logger.info(f"Processing module: {module_ref}")
ctx = LinterContext(empty_index)
linter = FileLinter(ctx, PathLookup.from_sys_path(module_path.parent), module_path)
session_state = CurrentSessionState()
ctx = LinterContext(empty_index, session_state)
linter = FileLinter(ctx, PathLookup.from_sys_path(module_path.parent), session_state, module_path)
known_problems = set()
for problem in linter.lint():
known_problems.add(KnownProblem(problem.code, problem.message))
Expand Down
2 changes: 1 addition & 1 deletion src/databricks/labs/ucx/source_code/linters/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def _lint_one(self, path: Path) -> Iterable[LocatedAdvice]:
if path.is_dir():
return []
ctx = self._new_linter_context()
linter = FileLinter(ctx, self._path_lookup, path)
linter = FileLinter(ctx, self._path_lookup, self._session_state, path)
return [advice.for_path(path) for advice in linter.lint()]


Expand Down
17 changes: 10 additions & 7 deletions src/databricks/labs/ucx/source_code/linters/imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import abc
import logging
from collections.abc import Callable, Iterable
from pathlib import Path
from typing import TypeVar, cast

from astroid import ( # type: ignore
Expand All @@ -19,6 +20,7 @@
from databricks.labs.ucx.source_code.base import Advice, Advisory, CurrentSessionState, PythonLinter
from databricks.labs.ucx.source_code.linters.python_ast import Tree, NodeBase, TreeVisitor
from databricks.labs.ucx.source_code.linters.python_infer import InferredValue
from databricks.labs.ucx.source_code.path_lookup import PathLookup

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -161,17 +163,18 @@ def __init__(self, node: NodeNG, path: str, is_append: bool):
self._path = path
self._is_append = is_append

@property
def node(self):
return self._node

@property
def path(self):
return self._path

@property
def is_append(self):
return self._is_append
def apply_to(self, path_lookup: PathLookup):
path = Path(self._path)
if not path.is_absolute():
path = path_lookup.cwd / path
if self._is_append:
path_lookup.append_path(path)
return
path_lookup.prepend_path(path)


class AbsolutePath(SysPathChange):
Expand Down
27 changes: 18 additions & 9 deletions src/databricks/labs/ucx/source_code/linters/python_ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,20 +209,29 @@ def _get_attribute_value(cls, node: Attribute):
logger.debug(f"Missing handler for {name}")
return None

def append_statements(self, tree: Tree) -> Tree:
def append_tree(self, tree: Tree) -> Tree:
if not isinstance(tree.node, Module):
raise NotImplementedError(f"Can't append statements from {type(tree.node).__name__}")
raise NotImplementedError(f"Can't append tree from {type(tree.node).__name__}")
tree_module: Module = cast(Module, tree.node)
self.append_nodes(tree_module.body)
self.append_globals(tree_module.globals)
# the following may seem strange but it's actually ok to use the original module as tree root
return tree

def append_globals(self, globs: dict):
if not isinstance(self.node, Module):
raise NotImplementedError(f"Can't append statements to {type(self.node).__name__}")
raise NotImplementedError(f"Can't append globals to {type(self.node).__name__}")
self_module: Module = cast(Module, self.node)
for stmt in tree_module.body:
stmt.parent = self_module
self_module.body.append(stmt)
for name, value in tree_module.globals.items():
for name, value in globs.items():
self_module.globals[name] = value
# the following may seem strange but it's actually ok to use the original module as tree root
return tree

def append_nodes(self, nodes: list[NodeNG]):
if not isinstance(self.node, Module):
raise NotImplementedError(f"Can't append statements to {type(self.node).__name__}")
self_module: Module = cast(Module, self.node)
for node in nodes:
node.parent = self_module
self_module.body.append(node)

def is_from_module(self, module_name: str):
# if his is the call's root node, check it against the required module
Expand Down
10 changes: 2 additions & 8 deletions src/databricks/labs/ucx/source_code/notebooks/cells.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ def build_graph_from_python_source(self, python_code: str) -> list[DependencyPro
magic_commands, command_problems = MagicCommand.extract_from_tree(tree, DependencyProblem.from_node)
problems.extend(command_problems)
nodes = syspath_changes + run_calls + import_sources + magic_commands
# need to execute things in intertwined sequence so concat and sort
# need to execute things in intertwined sequence so concat and sort them
for base_node in sorted(nodes, key=lambda node: (node.node.lineno, node.node.col_offset)):
for problem in self._process_node(base_node):
# Astroid line numbers are 1-based.
Expand Down Expand Up @@ -464,13 +464,7 @@ def _mutate_path_lookup(self, change: SysPathChange):
f"Can't update sys.path from {change.node.as_string()} because the expression cannot be computed",
)
return
path = Path(change.path)
if not path.is_absolute():
path = self._context.path_lookup.cwd / path
if change.is_append:
self._context.path_lookup.append_path(path)
return
self._context.path_lookup.prepend_path(path)
change.apply_to(self._context.path_lookup)


class MagicCommand(NodeBase):
Expand Down
Loading
Loading