Skip to content

Commit

Permalink
Use stack of dependency resolvers (#1560)
Browse files Browse the repository at this point in the history
## Changes
Implement resolvers as a stack
Implements an initial version of SysPathProvider
Eliminates horrible hacks introduced in previous PRs 

### Linked issues
#1202
Resolves #1499
Resolves #1421


### Functionality 

- [ ] added relevant user documentation
- [ ] added new CLI command
- [ ] modified existing command: `databricks labs ucx ...`
- [ ] added a new workflow
- [ ] modified existing workflow: `...`
- [ ] added a new table
- [ ] modified existing table: `...`

### Tests

- [ ] manually tested
- [x] added unit tests
- [ ] added integration tests
- [ ] verified on staging environment (screenshot attached)

@nfx this PR does not address the tactical `problem_collector`
parameter, I have created a specific issue for that
#1559

---------

Co-authored-by: Serge Smertin <[email protected]>
  • Loading branch information
ericvergnaud and nfx authored May 1, 2024
1 parent 5af13a8 commit da71371
Show file tree
Hide file tree
Showing 24 changed files with 1,129 additions and 662 deletions.
4 changes: 4 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ unit test coverage suite and the clear difference between _unit tests_ and _inte

See https://mypy.readthedocs.io/en/stable/cheat_sheet_py3.html for more details

### `if typing.TYPE_CHECKING:` usage

We don't use `if typing.TYPE_CHECKING:` in our codebase, because PyCharm and `mypy` can handle it without it and `Refactoring -> Move` may produce incorrect results.

### ..., expression has type "None", variable has type "str"

* Add `assert ... is not None` if it's a body of a method. Example:
Expand Down
58 changes: 58 additions & 0 deletions src/databricks/labs/ucx/contexts/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@
from databricks.labs.ucx.hive_metastore.udfs import UdfsCrawler
from databricks.labs.ucx.hive_metastore.verification import VerifyHasMetastore
from databricks.labs.ucx.installer.workflows import DeployedWorkflows
from databricks.labs.ucx.source_code.notebooks.loaders import NotebookResolver, NotebookLoader, WorkspaceNotebookLoader
from databricks.labs.ucx.source_code.files import FileLoader, LocalFileResolver, SysPathProvider
from databricks.labs.ucx.source_code.graph import DependencyResolver, DependencyGraphBuilder
from databricks.labs.ucx.source_code.whitelist import WhitelistResolver, Whitelist
from databricks.labs.ucx.source_code.site_packages import SitePackagesResolver, SitePackages
from databricks.labs.ucx.source_code.languages import Languages
from databricks.labs.ucx.workspace_access import generic, redash
from databricks.labs.ucx.workspace_access.groups import GroupManager
Expand Down Expand Up @@ -334,3 +339,56 @@ def workspace_info(self):
@cached_property
def verify_has_metastore(self):
return VerifyHasMetastore(self.workspace_client)

@cached_property
def notebook_loader(self) -> NotebookLoader:
return WorkspaceNotebookLoader(self.workspace_client)

@cached_property
def notebook_resolver(self):
return NotebookResolver(self.notebook_loader)

@cached_property
def site_packages(self):
# TODO: actually load the site packages
return SitePackages([])

@cached_property
def syspath_provider(self):
return SysPathProvider.from_sys_path()

@cached_property
def file_loader(self):
return FileLoader(self.syspath_provider)

@cached_property
def site_packages_resolver(self):
return SitePackagesResolver(self.site_packages, self.file_loader, self.syspath_provider)

@cached_property
def whitelist(self):
# TODO: fill in the whitelist
return Whitelist()

@cached_property
def whitelist_resolver(self):
return WhitelistResolver(self.whitelist)

@cached_property
def file_resolver(self):
return LocalFileResolver(self.file_loader)

@cached_property
def dependency_resolver(self):
return DependencyResolver(
[
self.notebook_resolver,
self.site_packages_resolver,
self.whitelist_resolver,
self.file_resolver,
]
)

@cached_property
def dependency_graph_builder(self):
return DependencyGraphBuilder(self.dependency_resolver)
5 changes: 5 additions & 0 deletions src/databricks/labs/ucx/contexts/cli_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from databricks.labs.ucx.azure.locations import ExternalLocationsMigration
from databricks.labs.ucx.azure.resources import AzureAPIClient, AzureResources
from databricks.labs.ucx.contexts.application import GlobalContext
from databricks.labs.ucx.source_code.notebooks.loaders import NotebookLoader, LocalNotebookLoader
from databricks.labs.ucx.source_code.files import LocalFileMigrator
from databricks.labs.ucx.workspace_access.clusters import ClusterAccess

Expand Down Expand Up @@ -168,6 +169,10 @@ def iam_role_migration(self):
self.iam_credential_manager,
)

@cached_property
def notebook_loader(self) -> NotebookLoader:
return LocalNotebookLoader(self.syspath_provider)


class AccountContext(CliContext):
def __init__(self, ac: AccountClient, named_parameters: dict[str, str] | None = None):
Expand Down
2 changes: 2 additions & 0 deletions src/databricks/labs/ucx/source_code/base.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

from abc import abstractmethod
from collections.abc import Iterable
from dataclasses import dataclass
Expand Down
112 changes: 0 additions & 112 deletions src/databricks/labs/ucx/source_code/dependency_loaders.py

This file was deleted.

95 changes: 0 additions & 95 deletions src/databricks/labs/ucx/source_code/dependency_resolvers.py

This file was deleted.

Loading

0 comments on commit da71371

Please sign in to comment.