Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed errors related to unsupported cell languages #3026

Merged
merged 2 commits into from
Oct 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 30 additions & 9 deletions src/databricks/labs/ucx/source_code/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import shutil
import tempfile
from abc import ABC, abstractmethod
from collections.abc import Generator, Iterable
from collections.abc import Generator, Iterable, Callable
from contextlib import contextmanager
from dataclasses import dataclass
from datetime import datetime, timezone
Expand Down Expand Up @@ -651,14 +651,15 @@ def _collect_from_source(
path: Path,
inherited_tree: Tree | None,
) -> Iterable[T]:
iterable: Iterable[T] | None = None
if language is CellLanguage.SQL:
iterable = self._collect_from_sql(source)
if language is CellLanguage.PYTHON:
iterable = self._collect_from_python(source, inherited_tree)
if iterable is None:
logger.warning(f"Language {language.name} not supported yet!")
return
else:
fn: Callable[[str], Iterable[T]] | None = getattr(self, f"_collect_from_{language.name.lower()}", None)
if not fn:
raise ValueError(f"Language {language.name} not supported yet!")
# the below is for disabling a false pylint positive
# pylint: disable=not-callable
iterable = fn(source)
src_timestamp = datetime.fromtimestamp(path.stat().st_mtime, timezone.utc)
src_id = str(path)
for item in iterable:
Expand All @@ -667,8 +668,28 @@ def _collect_from_source(
@abstractmethod
def _collect_from_python(self, source: str, inherited_tree: Tree | None) -> Iterable[T]: ...

@abstractmethod
def _collect_from_sql(self, source: str) -> Iterable[T]: ...
def _collect_from_sql(self, _source: str) -> Iterable[T]:
return []

def _collect_from_r(self, _source: str) -> Iterable[T]:
logger.warning("Language R not supported yet!")
return []

def _collect_from_scala(self, _source: str) -> Iterable[T]:
logger.warning("Language scala not supported yet!")
return []

def _collect_from_shell(self, _source: str) -> Iterable[T]:
return []

def _collect_from_markdown(self, _source: str) -> Iterable[T]:
return []

def _collect_from_run(self, _source: str) -> Iterable[T]:
return []

def _collect_from_pip(self, _source: str) -> Iterable[T]:
return []


class DfsaCollectorWalker(_CollectorWalker[DirectFsAccess]):
Expand Down
24 changes: 23 additions & 1 deletion tests/unit/source_code/linters/test_directfs.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
from collections.abc import Iterable
from pathlib import Path
from unittest.mock import create_autospec

import pytest

from databricks.labs.ucx.source_code.base import Deprecation, Advice, CurrentSessionState, Failure
from databricks.labs.ucx.source_code.base import Deprecation, Advice, CurrentSessionState, Failure, DirectFsAccess
from databricks.labs.ucx.source_code.graph import DependencyGraph
from databricks.labs.ucx.source_code.jobs import DfsaCollectorWalker
from databricks.labs.ucx.source_code.linters.directfs import (
DIRECT_FS_ACCESS_PATTERNS,
DirectFsAccessPyLinter,
DirectFsAccessSqlLinter,
)
from databricks.labs.ucx.source_code.notebooks.cells import CellLanguage


@pytest.mark.parametrize(
Expand Down Expand Up @@ -145,3 +152,18 @@ def test_dfsa_queries_failure(query: str) -> None:
end_col=1024,
),
]


class _TestCollectorWalker(DfsaCollectorWalker):
# inherit from DfsaCollectorWalker because it's public

def collect_from_source(self, language: CellLanguage) -> Iterable[DirectFsAccess]:
return self._collect_from_source("empty", language, Path(""), None)


@pytest.mark.parametrize("language", list(iter(CellLanguage)))
def test_collector_supports_all_cell_languages(language, mock_path_lookup, migration_index):
graph = create_autospec(DependencyGraph)
graph.assert_not_called()
collector = _TestCollectorWalker(graph, set(), mock_path_lookup, CurrentSessionState(), migration_index)
list(collector.collect_from_source(language))
Loading