Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve catch-all handling and avoid some pylint suppressions #1919

Merged
merged 28 commits into from
Jul 10, 2024
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
2f7ec58
support magic commands in python cells
ericvergnaud Jun 13, 2024
e0796d6
address comments
ericvergnaud Jun 14, 2024
9a7c1bd
address comments
ericvergnaud Jun 14, 2024
fe7de63
Merge branch 'main' into support-magic-commands-in-python-cells
ericvergnaud Jun 14, 2024
f24ab05
fix merge issues
ericvergnaud Jun 14, 2024
5433d90
raise problem with unsupported magics
ericvergnaud Jun 14, 2024
13258d5
parse magic commands when linting
ericvergnaud Jun 14, 2024
5327656
remove cyclic imports
ericvergnaud Jun 17, 2024
330bcc6
move 'magic command' code to 'cells'
ericvergnaud Jun 17, 2024
8509813
Merge branch 'main' into support-magic-commands-in-python-cells
ericvergnaud Jun 17, 2024
a6a13c2
support ! as line magic prefix
ericvergnaud Jun 18, 2024
a862663
Merge branch 'main' into support-magic-commands-in-python-cells
ericvergnaud Jun 18, 2024
24851a2
Cache the regex used for some magic detection.
asnare Jun 19, 2024
da7133a
Access protected member under test via a friend class.
asnare Jun 19, 2024
5fce3ba
Target python parsing errors more narrowly, and test the way they're …
asnare Jun 19, 2024
2a169e1
Annotate some test fixture types.
asnare Jun 19, 2024
df1878c
Type hint the fixture.
asnare Jun 19, 2024
4f31198
Log exception on internal error for analysis and support.
asnare Jun 19, 2024
ef94c88
When an internal error occurs, return all commands/problems so far in…
asnare Jun 19, 2024
a42b314
Don't bother checking the logs; log-capturing seems to be flaky.
asnare Jun 19, 2024
274a2a9
Remove unused import.
asnare Jun 19, 2024
ef3ac4f
Add a (deliberately) failing test to highlight something that we don'…
asnare Jun 19, 2024
33de8a1
Merge branch 'main' into support-magic-commands-in-python-cells-no-cheat
asnare Jul 3, 2024
d023f05
Restore file from main; this branch does not intend to modify this file.
asnare Jul 3, 2024
63f6e49
Merge branch 'main' into support-magic-commands-in-python-cells-no-cheat
asnare Jul 3, 2024
3473b9f
Merge branch 'main' into support-magic-commands-in-python-cells-no-cheat
asnare Jul 9, 2024
6006d56
Merge branch 'main' into support-magic-commands-in-python-cells-no-cheat
asnare Jul 9, 2024
193a4d8
Merge branch 'main' into support-magic-commands-in-python-cells-no-cheat
nfx Jul 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 15 additions & 86 deletions src/databricks/labs/ucx/source_code/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,10 @@
from pathlib import Path
from collections.abc import Callable

from astroid import ImportFrom, NodeNG # type: ignore

from databricks.labs.ucx.source_code.base import Advisory, CurrentSessionState
from databricks.labs.ucx.source_code.linters.imports import (
DbutilsLinter,
ImportSource,
NotebookRunCall,
SysPathChange,
UnresolvedPath,
from astroid import ( # type: ignore
NodeNG,
)
from databricks.labs.ucx.source_code.linters.python_ast import Tree, NodeBase
from databricks.labs.ucx.source_code.base import Advisory, CurrentSessionState
from databricks.labs.ucx.source_code.path_lookup import PathLookup

logger = logging.Logger(__name__)
Expand Down Expand Up @@ -138,7 +131,7 @@ def local_dependencies(self) -> set[Dependency]:
def all_paths(self) -> set[Path]:
# TODO: remove this public method, as it'll throw false positives
# for package imports, like certifi. a WorkflowTask is also a dependency,
# but it does not exist on a filesyste
# but it does not exist on a filesystem
return {d.path for d in self.all_dependencies}

def all_relative_names(self) -> set[str]:
Expand Down Expand Up @@ -167,84 +160,20 @@ def visit(self, visit_node: Callable[[DependencyGraph], bool | None], visited: s
return True
return False

def build_graph_from_python_source(self, python_code: str) -> list[DependencyProblem]:
"""Check python code for dependency-related problems.

Returns:
A list of dependency problems; position information is relative to the python code itself.
"""
problems: list[DependencyProblem] = []
try:
tree = Tree.parse(python_code)
except Exception as e: # pylint: disable=broad-except
problems.append(DependencyProblem('parse-error', f"Could not parse Python code: {e}"))
return problems
syspath_changes = SysPathChange.extract_from_tree(self._session_state, tree)
run_calls = DbutilsLinter.list_dbutils_notebook_run_calls(tree)
import_sources: list[ImportSource]
import_problems: list[DependencyProblem]
import_sources, import_problems = ImportSource.extract_from_tree(tree, DependencyProblem.from_node)
problems.extend(import_problems)
nodes = syspath_changes + run_calls + import_sources
# need to execute things in intertwined sequence so concat and sort
for base_node in sorted(nodes, key=lambda node: (node.node.lineno, node.node.col_offset)):
for problem in self._process_node(base_node):
# Astroid line numbers are 1-based.
problem = problem.replace(
start_line=base_node.node.lineno - 1,
start_col=base_node.node.col_offset,
end_line=(base_node.node.end_lineno or 1) - 1,
end_col=base_node.node.end_col_offset or 0,
)
problems.append(problem)
return problems

def _process_node(self, base_node: NodeBase):
if isinstance(base_node, SysPathChange):
yield from self._mutate_path_lookup(base_node)
elif isinstance(base_node, NotebookRunCall):
yield from self._register_notebook(base_node)
elif isinstance(base_node, ImportSource):
yield from self._register_import(base_node)
else:
logger.warning(f"Can't process {NodeBase.__name__} of type {type(base_node).__name__}")

def _register_import(self, base_node: ImportSource):
prefix = ""
if isinstance(base_node.node, ImportFrom) and base_node.node.level is not None:
prefix = "." * base_node.node.level
name = base_node.name or ""
yield from self.register_import(prefix + name)

def _register_notebook(self, base_node: NotebookRunCall):
has_unresolved, paths = base_node.get_notebook_paths(self._session_state)
if has_unresolved:
yield DependencyProblem(
'dependency-cannot-compute',
f"Can't check dependency from {base_node.node.as_string()} because the expression cannot be computed",
)
for path in paths:
yield from self.register_notebook(Path(path))

def _mutate_path_lookup(self, change: SysPathChange):
if isinstance(change, UnresolvedPath):
yield DependencyProblem(
'sys-path-cannot-compute',
f"Can't update sys.path from {change.node.as_string()} because the expression cannot be computed",
)
return
path = Path(change.path)
if not path.is_absolute():
path = self._path_lookup.cwd / path
if change.is_append:
self._path_lookup.append_path(path)
return
self._path_lookup.prepend_path(path)
def new_graph_builder_context(self):
return GraphBuilderContext(parent=self, path_lookup=self._path_lookup, session_state=self._session_state)

def __repr__(self):
return f"<DependencyGraph {self.path}>"


@dataclass
class GraphBuilderContext:
parent: DependencyGraph
path_lookup: PathLookup
session_state: CurrentSessionState


class Dependency(abc.ABC):

def __init__(self, loader: DependencyLoader, path: Path):
Expand Down Expand Up @@ -422,7 +351,7 @@ class DependencyProblem:
code: str
message: str
source_path: Path = Path(MISSING_SOURCE_PATH)
# Lines and colums are both 0-based: the first line is line 0.
# Lines and columns are both 0-based: the first line is line 0.
start_line: int = -1
start_col: int = -1
end_line: int = -1
Expand Down Expand Up @@ -470,7 +399,7 @@ def from_node(code: str, message: str, node: NodeNG) -> DependencyProblem:
start_line=(node.lineno or 1) - 1,
start_col=node.col_offset,
end_line=(node.end_lineno or 1) - 1,
end_col=(node.end_col_offset),
end_col=(node.end_col_offset or 0),
)


Expand Down
1 change: 1 addition & 0 deletions src/databricks/labs/ucx/source_code/linters/dbfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ def lint(self, code: str) -> Iterable[Advice]:
"""
Lints the code looking for file system paths that are deprecated
"""
code = Tree.convert_magic_lines_to_magic_commands(code)
tree = Tree.parse(code)
visitor = DetectDbfsVisitor(self._session_state)
visitor.visit(tree.node)
Expand Down
6 changes: 4 additions & 2 deletions src/databricks/labs/ucx/source_code/linters/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from databricks.labs.blueprint.tui import Prompts

from databricks.labs.ucx.source_code.linters.context import LinterContext
from databricks.labs.ucx.source_code.notebooks.cells import CellLanguage
from databricks.labs.ucx.source_code.notebooks.cells import CellLanguage, GraphBuilder
from databricks.labs.ucx.source_code.graph import (
BaseImportResolver,
BaseFileResolver,
Expand All @@ -40,7 +40,9 @@ def __init__(self, path: Path, source: str, language: Language):

def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProblem]:
if self._language is CellLanguage.PYTHON:
return parent.build_graph_from_python_source(self._original_code)
context = parent.new_graph_builder_context()
builder = GraphBuilder(context)
return builder.build_graph_from_python_source(self._original_code)
# supported language that does not generate dependencies
if self._language is CellLanguage.SQL:
return []
Expand Down
11 changes: 6 additions & 5 deletions src/databricks/labs/ucx/source_code/linters/imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@

logger = logging.getLogger(__name__)

P = TypeVar("P")
ProblemFactory = Callable[[str, str, NodeNG], P]
T = TypeVar("T")
ProblemFactory = Callable[[str, str, NodeNG], T]


class ImportSource(NodeBase):

@classmethod
def extract_from_tree(cls, tree: Tree, problem_factory: ProblemFactory) -> tuple[list[ImportSource], list[P]]:
problems: list[P] = []
def extract_from_tree(cls, tree: Tree, problem_factory: ProblemFactory) -> tuple[list[ImportSource], list[T]]:
problems: list[T] = []
sources: list[ImportSource] = []
try: # pylint: disable=too-many-try-statements
nodes = tree.locate(Import, [])
Expand Down Expand Up @@ -61,7 +61,7 @@ def _make_sources_for_import_from_nodes(cls, nodes: list[ImportFrom]) -> Iterabl
yield ImportSource(node, node.modname)

@classmethod
def _make_sources_for_import_call_nodes(cls, nodes: list[Call], problem_factory: ProblemFactory, problems: list[P]):
def _make_sources_for_import_call_nodes(cls, nodes: list[Call], problem_factory: ProblemFactory, problems: list[T]):
for node in nodes:
arg = node.args[0]
if isinstance(arg, Const):
Expand Down Expand Up @@ -116,6 +116,7 @@ def __init__(self, session_state: CurrentSessionState):
self._session_state = session_state

def lint(self, code: str) -> Iterable[Advice]:
code = Tree.convert_magic_lines_to_magic_commands(code)
tree = Tree.parse(code)
nodes = self.list_dbutils_notebook_run_calls(tree)
for node in nodes:
Expand Down
1 change: 1 addition & 0 deletions src/databricks/labs/ucx/source_code/linters/pyspark.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,7 @@ def name(self) -> str:

def lint(self, code: str) -> Iterable[Advice]:
try:
code = Tree.convert_magic_lines_to_magic_commands(code)
tree = Tree.parse(code)
except AstroidSyntaxError as e:
yield Failure('syntax-error', str(e), 0, 0, 0, 0)
Expand Down
10 changes: 10 additions & 0 deletions src/databricks/labs/ucx/source_code/linters/python_ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,16 @@ def parse(code: str):
root = parse(code)
return Tree(root)

@classmethod
def convert_magic_lines_to_magic_commands(cls, python_code: str):
lines = python_code.split("\n")
magic_markers = {"%", "!"}
for i, line in enumerate(lines):
if len(line) == 0 or line[0] not in magic_markers:
continue
lines[i] = f"magic_command({line.encode()!r})"
return "\n".join(lines)

def __init__(self, node: NodeNG):
self._node: NodeNG = node

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ def __init__(self, is_serverless: bool = False):
]

def lint(self, code: str) -> Iterator[Advice]:
code = Tree.convert_magic_lines_to_magic_commands(code)
tree = Tree.parse(code)
for matcher in self._matchers:
yield from matcher.lint_tree(tree.node)
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def __init__(self, dbr_version: tuple[int, int] | None):
def lint(self, code: str) -> Iterable[Advice]:
if self._skip_dbr:
return

code = Tree.convert_magic_lines_to_magic_commands(code)
tree = Tree.parse(code)
for node in tree.walk():
yield from self._linter.lint(node)
Loading
Loading