Skip to content

Commit

Permalink
Handle notebook unicode decode error
Browse files Browse the repository at this point in the history
  • Loading branch information
JCZuurmond committed Nov 26, 2024
1 parent 0f03525 commit ee6b648
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 3 deletions.
3 changes: 3 additions & 0 deletions src/databricks/labs/ucx/source_code/notebooks/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ def load_dependency(self, path_lookup: PathLookup, dependency: Dependency) -> So
exc_info=True,
)
return None
except UnicodeDecodeError:
logger.warning(f"Cannot decode non-UTF-8 encoded notebook from workspace: {absolute_path}")
return None
language = self._detect_language(absolute_path, content)
if not language:
logger.warning(f"Could not detect language for {absolute_path}")
Expand Down
17 changes: 14 additions & 3 deletions tests/unit/source_code/notebooks/test_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from pathlib import Path
from unittest.mock import create_autospec

import pytest
from databricks.sdk.service.workspace import Language

from databricks.labs.ucx.source_code.graph import Dependency
Expand All @@ -24,9 +25,19 @@ def detect_language(cls, path: Path, content: str):
assert not NotebookLoaderForTesting.detect_language(Path("hi"), "stuff")


def test_notebook_loader_loads_dependency_with_permission_error(caplog) -> None:
@pytest.mark.parametrize(
"error, message",
[
(PermissionError("Permission denied"), "Permission error while reading notebook from workspace"),
(
UnicodeDecodeError("utf-8", b"\x80\x81\x82", 0, 1, "invalid start byte"),
"Cannot decode non-UTF-8 encoded notebook from workspace",
),
],
)
def test_notebook_loader_loads_dependency_raises_error(caplog, error: Exception, message: str) -> None:
path = create_autospec(Path)
path.read_text.side_effect = PermissionError("Permission denied")
path.read_text.side_effect = error
path_lookup = create_autospec(PathLookup)
path_lookup.resolve.return_value = path
dependency = create_autospec(Dependency)
Expand All @@ -35,5 +46,5 @@ def test_notebook_loader_loads_dependency_with_permission_error(caplog) -> None:
with caplog.at_level(logging.WARNING, logger="databricks.labs.ucx.source_code.notebooks.loaders"):
found = NotebookLoader().load_dependency(path_lookup, dependency)

assert f"Permission error while reading notebook from workspace: {path}" in caplog.text
assert f"{message}: {path}" in caplog.text
assert found is None

0 comments on commit ee6b648

Please sign in to comment.