diff --git a/src/databricks/labs/ucx/assessment/azure.py b/src/databricks/labs/ucx/assessment/azure.py index 69b75c398e..1af38db7ab 100644 --- a/src/databricks/labs/ucx/assessment/azure.py +++ b/src/databricks/labs/ucx/assessment/azure.py @@ -6,7 +6,7 @@ from databricks.labs.lsql.backends import SqlBackend from databricks.sdk import WorkspaceClient from databricks.sdk.errors import NotFound -from databricks.sdk.service.compute import ClusterSource, Policy +from databricks.sdk.service.compute import ClusterSource, DataSecurityMode, Policy from databricks.labs.ucx.assessment.crawlers import azure_sp_conf_present_check, logger from databricks.labs.ucx.assessment.jobs import JobsMixin @@ -30,6 +30,15 @@ class AzureServicePrincipalInfo: storage_account: str | None = None +@dataclass +class ServicePrincipalClusterMapping: + # this class is created separately as we need cluster to spn mapping + # Cluster id where the spn is used + cluster_id: str + # spn info data class + spn_info: set[AzureServicePrincipalInfo] + + class AzureServicePrincipalCrawler(CrawlerBase[AzureServicePrincipalInfo], JobsMixin, SecretsMixin): def __init__(self, ws: WorkspaceClient, sbe: SqlBackend, schema): super().__init__(sbe, "hive_metastore", schema, "azure_service_principals", AzureServicePrincipalInfo) @@ -171,3 +180,16 @@ def _get_azure_spn_from_config(self, config: dict) -> set[AzureServicePrincipalI ) ) return set_service_principals + + def get_cluster_to_storage_mapping(self): + # this function gives a mapping between an interactive cluster and the spn used by it + # either directly or through a cluster policy. + set_service_principals = set[AzureServicePrincipalInfo]() + spn_cluster_mapping = [] + for cluster in self._ws.clusters.list(): + if cluster.cluster_source != ClusterSource.JOB and ( + cluster.data_security_mode in [DataSecurityMode.LEGACY_SINGLE_USER, DataSecurityMode.NONE] + ): + set_service_principals = self._get_azure_spn_from_cluster_config(cluster) + spn_cluster_mapping.append(ServicePrincipalClusterMapping(cluster.cluster_id, set_service_principals)) + return spn_cluster_mapping diff --git a/src/databricks/labs/ucx/hive_metastore/__init__.py b/src/databricks/labs/ucx/hive_metastore/__init__.py index df09a1be5a..a65ba1d779 100644 --- a/src/databricks/labs/ucx/hive_metastore/__init__.py +++ b/src/databricks/labs/ucx/hive_metastore/__init__.py @@ -1,4 +1,3 @@ -from databricks.labs.ucx.hive_metastore.grants import GrantsCrawler from databricks.labs.ucx.hive_metastore.locations import ( ExternalLocations, Mounts, @@ -6,4 +5,4 @@ ) from databricks.labs.ucx.hive_metastore.tables import TablesCrawler -__all__ = ["TablesCrawler", "GrantsCrawler", "Mounts", "ExternalLocations", "TablesInMounts"] +__all__ = ["TablesCrawler", "Mounts", "ExternalLocations", "TablesInMounts"] diff --git a/src/databricks/labs/ucx/hive_metastore/grants.py b/src/databricks/labs/ucx/hive_metastore/grants.py index b6b2913d64..6800b00537 100644 --- a/src/databricks/labs/ucx/hive_metastore/grants.py +++ b/src/databricks/labs/ucx/hive_metastore/grants.py @@ -4,17 +4,42 @@ from dataclasses import dataclass from functools import partial +from databricks.labs.blueprint.installation import Installation from databricks.labs.blueprint.parallel import ManyError, Threads -from databricks.sdk.service.catalog import SchemaInfo, TableInfo - +from databricks.labs.lsql.backends import SqlBackend, StatementExecutionBackend +from databricks.sdk import WorkspaceClient +from databricks.sdk.errors import ResourceDoesNotExist +from databricks.sdk.service.catalog import ExternalLocationInfo, SchemaInfo, TableInfo + +from databricks.labs.ucx.assessment.azure import ( + AzureServicePrincipalCrawler, + AzureServicePrincipalInfo, +) +from databricks.labs.ucx.azure.access import ( + AzureResourcePermissions, + StoragePermissionMapping, +) +from databricks.labs.ucx.azure.resources import AzureAPIClient, AzureResources +from databricks.labs.ucx.config import WorkspaceConfig from databricks.labs.ucx.framework.crawlers import CrawlerBase from databricks.labs.ucx.framework.utils import escape_sql_identifier -from databricks.labs.ucx.hive_metastore.tables import TablesCrawler +from databricks.labs.ucx.hive_metastore.locations import ( + ExternalLocations, + Mount, + Mounts, +) +from databricks.labs.ucx.hive_metastore.tables import Table, TablesCrawler from databricks.labs.ucx.hive_metastore.udfs import UdfsCrawler logger = logging.getLogger(__name__) +@dataclass +class ClusterLocationMapping: + cluster_id: str + locations: dict[str, str] + + @dataclass(frozen=True) class Grant: principal: str @@ -127,6 +152,7 @@ def uc_grant_sql(self, object_type: str | None = None, object_key: str | None = ("TABLE", "SELECT"): self._uc_action("SELECT"), ("TABLE", "MODIFY"): self._uc_action("MODIFY"), ("TABLE", "READ_METADATA"): self._uc_action("BROWSE"), + ("TABLE", "ALL PRIVILEGES"): self._uc_action("ALL PRIVILEGES"), ("TABLE", "OWN"): self._set_owner_sql, ("VIEW", "SELECT"): self._uc_action("SELECT"), ("VIEW", "READ_METADATA"): self._uc_action("BROWSE"), @@ -307,3 +333,225 @@ def grants( # TODO: https://github.com/databrickslabs/ucx/issues/406 logger.error(f"Couldn't fetch grants for object {on_type} {key}: {e}") return [] + + +class AzureACL: + def __init__( + self, + ws: WorkspaceClient, + backend: SqlBackend, + spn_crawler: AzureServicePrincipalCrawler, + resource_permissions: AzureResourcePermissions, + ): + self._backend = backend + self._ws = ws + self._spn_crawler = spn_crawler + self._resource_permissions = resource_permissions + + @classmethod + def for_cli(cls, ws: WorkspaceClient, installation: Installation): + config = installation.load(WorkspaceConfig) + sql_backend = StatementExecutionBackend(ws, config.warehouse_id) + locations = ExternalLocations(ws, sql_backend, config.inventory_database) + azure_client = AzureAPIClient( + ws.config.arm_environment.resource_manager_endpoint, + ws.config.arm_environment.service_management_endpoint, + ) + graph_client = AzureAPIClient("https://graph.microsoft.com", "https://graph.microsoft.com") + azurerm = AzureResources(azure_client, graph_client) + resource_permissions = AzureResourcePermissions(installation, ws, azurerm, locations) + spn_crawler = AzureServicePrincipalCrawler(ws, sql_backend, config.inventory_database) + return cls(ws, sql_backend, spn_crawler, resource_permissions) + + def get_eligible_locations_principals(self) -> dict[str, dict]: + cluster_locations = {} + eligible_locations = {} + spn_cluster_mapping = self._spn_crawler.get_cluster_to_storage_mapping() + if len(spn_cluster_mapping) == 0: + # if there are no interactive clusters , then return empty grants + logger.info("No interactive cluster found with spn configured") + return {} + external_locations = list(self._ws.external_locations.list()) + if len(external_locations) == 0: + # if there are no external locations, then throw an error to run migrate_locations cli command + msg = ( + "No external location found, If hive metastore tables are created in external storage, " + "ensure migrate-locations cli cmd is run to create the required locations." + ) + logger.error(msg) + raise ResourceDoesNotExist(msg) from None + + permission_mappings = self._resource_permissions.load() + if len(permission_mappings) == 0: + # if permission mapping is empty, raise an error to run principal_prefix cmd + msg = ( + "No storage permission file found. Please ensure principal-prefix-access cli " + "cmd is run to create the access permission file." + ) + logger.error(msg) + raise ResourceDoesNotExist(msg) from None + + for cluster_spn in spn_cluster_mapping: + for spn in cluster_spn.spn_info: + eligible_locations.update(self._get_external_locations(spn, external_locations, permission_mappings)) + cluster_locations[cluster_spn.cluster_id] = eligible_locations + return cluster_locations + + def _get_external_locations( + self, + spn: AzureServicePrincipalInfo, + external_locations: list[ExternalLocationInfo], + permission_mappings: list[StoragePermissionMapping], + ) -> dict[str, str]: + matching_location = {} + for location in external_locations: + if location.url is None: + continue + for permission_mapping in permission_mappings: + prefix = permission_mapping.prefix + if ( + location.url.startswith(permission_mapping.prefix) + and permission_mapping.client_id == spn.application_id + and spn.storage_account is not None + # check for storage account name starting after @ in the prefix url + and prefix[prefix.index('@') + 1 :].startswith(spn.storage_account) + ): + matching_location[location.url] = permission_mapping.privilege + return matching_location + + +class PrincipalACL: + def __init__( + self, + ws: WorkspaceClient, + backend: SqlBackend, + installation: Installation, + tables_crawler: TablesCrawler, + mounts_crawler: Mounts, + cluster_locations: dict[str, dict], + ): + self._backend = backend + self._ws = ws + self._installation = installation + self._tables_crawler = tables_crawler + self._mounts_crawler = mounts_crawler + self._cluster_locations = cluster_locations + + @classmethod + def for_cli(cls, ws: WorkspaceClient, installation: Installation, sql_backend: SqlBackend): + config = installation.load(WorkspaceConfig) + + tables_crawler = TablesCrawler(sql_backend, config.inventory_database) + mount_crawler = Mounts(sql_backend, ws, config.inventory_database) + if ws.config.is_azure: + azure_acl = AzureACL.for_cli(ws, installation) + return cls( + ws, + sql_backend, + installation, + tables_crawler, + mount_crawler, + azure_acl.get_eligible_locations_principals(), + ) + if ws.config.is_aws: + return None + if ws.config.is_gcp: + logger.error("UCX is not supported for GCP yet. Please run it on azure or aws") + return None + return None + + def get_interactive_cluster_grants(self) -> list[Grant]: + tables = self._tables_crawler.snapshot() + mounts = list(self._mounts_crawler.snapshot()) + grants: set[Grant] = set() + + for cluster_id, locations in self._cluster_locations.items(): + principals = self._get_cluster_principal_mapping(cluster_id) + if len(principals) == 0: + continue + cluster_usage = self._get_grants(locations, principals, tables, mounts) + grants.update(cluster_usage) + catalog_grants = [Grant(principal, "USE", "hive_metastore") for principal in principals] + grants.update(catalog_grants) + + return list(grants) + + def _get_privilege(self, table: Table, locations: dict[str, str], mounts: list[Mount]): + if table.view_text is not None: + # return nothing for view so that it goes to the separate view logic + return None + if table.location is None: + return None + if table.location.startswith('dbfs:/mnt') or table.location.startswith('/dbfs/mnt'): + mount_location = ExternalLocations.resolve_mount(table.location, mounts) + for loc, privilege in locations.items(): + if loc is not None and mount_location.startswith(loc): + return privilege + return None + if table.location.startswith('dbfs:/') or table.location.startswith('/dbfs/'): + return "WRITE_FILES" + + for loc, privilege in locations.items(): + if loc is not None and table.location.startswith(loc): + return privilege + return None + + def _get_database_grants(self, tables: list[Table], principals: list[str]) -> list[Grant]: + databases = {table.database for table in tables} + return [ + Grant(principal, "USE", "hive_metastore", database) for database in databases for principal in principals + ] + + def _get_grants( + self, locations: dict[str, str], principals: list[str], tables: list[Table], mounts: list[Mount] + ) -> list[Grant]: + grants = [] + filtered_tables = [] + for table in tables: + privilege = self._get_privilege(table, locations, mounts) + if privilege == "READ_FILES": + grants.extend( + [Grant(principal, "SELECT", table.catalog, table.database, table.name) for principal in principals] + ) + filtered_tables.append(table) + continue + if privilege == "WRITE_FILES": + grants.extend( + [ + Grant(principal, "ALL PRIVILEGES", table.catalog, table.database, table.name) + for principal in principals + ] + ) + filtered_tables.append(table) + continue + if table.view_text is not None: + grants.extend( + [ + Grant(principal, "ALL PRIVILEGES", table.catalog, table.database, view=table.name) + for principal in principals + ] + ) + filtered_tables.append(table) + + database_grants = self._get_database_grants(filtered_tables, principals) + + grants.extend(database_grants) + + return grants + + def _get_cluster_principal_mapping(self, cluster_id: str) -> list[str]: + # gets all the users,groups,spn which have access to the clusters and returns a dataclass of that mapping + principal_list = [] + cluster_permission = self._ws.permissions.get("clusters", cluster_id) + if cluster_permission.access_control_list is None: + return [] + for acl in cluster_permission.access_control_list: + if acl.user_name is not None: + principal_list.append(acl.user_name) + if acl.group_name is not None: + if acl.group_name == "admins": + continue + principal_list.append(acl.group_name) + if acl.service_principal_name is not None: + principal_list.append(acl.service_principal_name) + return principal_list diff --git a/src/databricks/labs/ucx/hive_metastore/locations.py b/src/databricks/labs/ucx/hive_metastore/locations.py index 687a8b0463..3505371982 100644 --- a/src/databricks/labs/ucx/hive_metastore/locations.py +++ b/src/databricks/labs/ucx/hive_metastore/locations.py @@ -47,7 +47,7 @@ def _external_locations(self, tables: list[Row], mounts) -> Iterable[ExternalLoc if not location: continue if location.startswith("dbfs:/mnt"): - location = self._resolve_mount(location, mounts) + location = self.resolve_mount(location, mounts) if ( not location.startswith("dbfs") and (self._prefix_size[0] < location.find(":/") < self._prefix_size[1]) @@ -58,7 +58,8 @@ def _external_locations(self, tables: list[Row], mounts) -> Iterable[ExternalLoc self._add_jdbc_location(external_locations, location, table) return external_locations - def _resolve_mount(self, location, mounts): + @staticmethod + def resolve_mount(location, mounts): for mount in mounts: if location[5:].startswith(mount.name.lower()): location = location[5:].replace(mount.name, mount.source) diff --git a/src/databricks/labs/ucx/hive_metastore/table_migrate.py b/src/databricks/labs/ucx/hive_metastore/table_migrate.py index 782419a284..11bcfa37c8 100644 --- a/src/databricks/labs/ucx/hive_metastore/table_migrate.py +++ b/src/databricks/labs/ucx/hive_metastore/table_migrate.py @@ -14,8 +14,8 @@ from databricks.labs.ucx.config import WorkspaceConfig from databricks.labs.ucx.framework.crawlers import CrawlerBase from databricks.labs.ucx.framework.utils import escape_sql_identifier -from databricks.labs.ucx.hive_metastore import GrantsCrawler, TablesCrawler -from databricks.labs.ucx.hive_metastore.grants import Grant +from databricks.labs.ucx.hive_metastore import TablesCrawler +from databricks.labs.ucx.hive_metastore.grants import Grant, GrantsCrawler, PrincipalACL from databricks.labs.ucx.hive_metastore.mapping import Rule, TableMapping from databricks.labs.ucx.hive_metastore.tables import ( AclMigrationWhat, @@ -52,6 +52,7 @@ def __init__( table_mapping: TableMapping, group_manager: GroupManager, migration_status_refresher: 'MigrationStatusRefresher', + principal_grants: PrincipalACL, ): self._tc = table_crawler self._gc = grant_crawler @@ -61,6 +62,7 @@ def __init__( self._group = group_manager self._migration_status_refresher = migration_status_refresher self._seen_tables: dict[str, str] = {} + self._principal_grants = principal_grants @classmethod def for_cli(cls, ws: WorkspaceClient, product='ucx'): @@ -72,9 +74,17 @@ def for_cli(cls, ws: WorkspaceClient, product='ucx'): grants_crawler = GrantsCrawler(table_crawler, udfs_crawler) table_mapping = TableMapping(installation, ws, sql_backend) group_manager = GroupManager(sql_backend, ws, config.inventory_database) + principal_grants = PrincipalACL.for_cli(ws, installation, sql_backend) migration_status_refresher = MigrationStatusRefresher(ws, sql_backend, config.inventory_database, table_crawler) return cls( - table_crawler, grants_crawler, ws, sql_backend, table_mapping, group_manager, migration_status_refresher + table_crawler, + grants_crawler, + ws, + sql_backend, + table_mapping, + group_manager, + migration_status_refresher, + principal_grants, ) def index(self): @@ -87,6 +97,7 @@ def migrate_tables(self, *, what: What | None = None, acl_strategy: list[AclMigr if acl_strategy is not None: grants_to_migrate = self._gc.snapshot() migrated_groups = self._group.snapshot() + principal_grants = self._principal_grants.get_interactive_cluster_grants() else: acl_strategy = [] for table in tables_to_migrate: @@ -95,6 +106,8 @@ def migrate_tables(self, *, what: What | None = None, acl_strategy: list[AclMigr continue if AclMigrationWhat.LEGACY_TACL in acl_strategy: grants.extend(self._match_grants(table.src, grants_to_migrate, migrated_groups)) + if AclMigrationWhat.PRINCIPAL in acl_strategy: + grants.extend(self._match_grants(table.src, principal_grants, migrated_groups)) tasks.append(partial(self._migrate_table, table.src, table.rule, grants)) Threads.strict("migrate tables", tasks) diff --git a/src/databricks/labs/ucx/runtime.py b/src/databricks/labs/ucx/runtime.py index c312372c2c..928c56fa41 100644 --- a/src/databricks/labs/ucx/runtime.py +++ b/src/databricks/labs/ucx/runtime.py @@ -11,13 +11,15 @@ from databricks.labs.ucx.assessment.init_scripts import GlobalInitScriptCrawler from databricks.labs.ucx.assessment.jobs import JobsCrawler, SubmitRunsCrawler from databricks.labs.ucx.assessment.pipelines import PipelinesCrawler +from databricks.labs.ucx.azure.access import AzureResourcePermissions +from databricks.labs.ucx.azure.resources import AzureAPIClient, AzureResources from databricks.labs.ucx.config import WorkspaceConfig from databricks.labs.ucx.framework.tasks import task, trigger -from databricks.labs.ucx.hive_metastore import ( - ExternalLocations, +from databricks.labs.ucx.hive_metastore import ExternalLocations, Mounts, TablesCrawler +from databricks.labs.ucx.hive_metastore.grants import ( + AzureACL, GrantsCrawler, - Mounts, - TablesCrawler, + PrincipalACL, ) from databricks.labs.ucx.hive_metastore.locations import TablesInMounts from databricks.labs.ucx.hive_metastore.mapping import TableMapping @@ -434,12 +436,35 @@ def migrate_external_tables_sync( table_crawler = TablesCrawler(sql_backend, cfg.inventory_database) udf_crawler = UdfsCrawler(sql_backend, cfg.inventory_database) grant_crawler = GrantsCrawler(table_crawler, udf_crawler) - table_mapping = TableMapping(install, ws, sql_backend) + table_mappings = TableMapping(install, ws, sql_backend) migration_status_refresher = MigrationStatusRefresher(ws, sql_backend, cfg.inventory_database, table_crawler) group_manager = GroupManager(sql_backend, ws, cfg.inventory_database) + mount_crawler = Mounts(sql_backend, ws, cfg.inventory_database) + cluster_locations = {} + if ws.config.is_azure: + locations = ExternalLocations(ws, sql_backend, cfg.inventory_database) + azure_client = AzureAPIClient( + ws.config.arm_environment.resource_manager_endpoint, + ws.config.arm_environment.service_management_endpoint, + ) + graph_client = AzureAPIClient("https://graph.microsoft.com", "https://graph.microsoft.com") + azurerm = AzureResources(azure_client, graph_client) + resource_permissions = AzureResourcePermissions(install, ws, azurerm, locations) + spn_crawler = AzureServicePrincipalCrawler(ws, sql_backend, cfg.inventory_database) + cluster_locations = AzureACL( + ws, sql_backend, spn_crawler, resource_permissions + ).get_eligible_locations_principals() + interactive_grants = PrincipalACL(ws, sql_backend, install, table_crawler, mount_crawler, cluster_locations) TablesMigrator( - table_crawler, grant_crawler, ws, sql_backend, table_mapping, group_manager, migration_status_refresher - ).migrate_tables(what=What.EXTERNAL_SYNC, acl_strategy=[AclMigrationWhat.LEGACY_TACL]) + table_crawler, + grant_crawler, + ws, + sql_backend, + table_mappings, + group_manager, + migration_status_refresher, + interactive_grants, + ).migrate_tables(what=What.EXTERNAL_SYNC, acl_strategy=[AclMigrationWhat.LEGACY_TACL, AclMigrationWhat.PRINCIPAL]) @task("migrate-tables", job_cluster="table_migration") @@ -454,12 +479,35 @@ def migrate_dbfs_root_delta_tables( table_crawler = TablesCrawler(sql_backend, cfg.inventory_database) udf_crawler = UdfsCrawler(sql_backend, cfg.inventory_database) grant_crawler = GrantsCrawler(table_crawler, udf_crawler) - table_mapping = TableMapping(install, ws, sql_backend) + table_mappings = TableMapping(install, ws, sql_backend) migration_status_refresher = MigrationStatusRefresher(ws, sql_backend, cfg.inventory_database, table_crawler) group_manager = GroupManager(sql_backend, ws, cfg.inventory_database) + mount_crawler = Mounts(sql_backend, ws, cfg.inventory_database) + cluster_locations = {} + if ws.config.is_azure: + locations = ExternalLocations(ws, sql_backend, cfg.inventory_database) + azure_client = AzureAPIClient( + ws.config.arm_environment.resource_manager_endpoint, + ws.config.arm_environment.service_management_endpoint, + ) + graph_client = AzureAPIClient("https://graph.microsoft.com", "https://graph.microsoft.com") + azurerm = AzureResources(azure_client, graph_client) + resource_permissions = AzureResourcePermissions(install, ws, azurerm, locations) + spn_crawler = AzureServicePrincipalCrawler(ws, sql_backend, cfg.inventory_database) + cluster_locations = AzureACL( + ws, sql_backend, spn_crawler, resource_permissions + ).get_eligible_locations_principals() + interactive_grants = PrincipalACL(ws, sql_backend, install, table_crawler, mount_crawler, cluster_locations) TablesMigrator( - table_crawler, grant_crawler, ws, sql_backend, table_mapping, group_manager, migration_status_refresher - ).migrate_tables(what=What.DBFS_ROOT_DELTA, acl_strategy=[AclMigrationWhat.LEGACY_TACL]) + table_crawler, + grant_crawler, + ws, + sql_backend, + table_mappings, + group_manager, + migration_status_refresher, + interactive_grants, + ).migrate_tables(what=What.DBFS_ROOT_DELTA, acl_strategy=[AclMigrationWhat.LEGACY_TACL, AclMigrationWhat.PRINCIPAL]) @task("migrate-groups-experimental", depends_on=[crawl_groups]) diff --git a/src/databricks/labs/ucx/workspace_access/manager.py b/src/databricks/labs/ucx/workspace_access/manager.py index f37762e90d..832d7e7471 100644 --- a/src/databricks/labs/ucx/workspace_access/manager.py +++ b/src/databricks/labs/ucx/workspace_access/manager.py @@ -14,7 +14,8 @@ Dataclass, DataclassInstance, ) -from databricks.labs.ucx.hive_metastore import GrantsCrawler, TablesCrawler +from databricks.labs.ucx.hive_metastore import TablesCrawler +from databricks.labs.ucx.hive_metastore.grants import GrantsCrawler from databricks.labs.ucx.hive_metastore.udfs import UdfsCrawler from databricks.labs.ucx.workspace_access import generic, redash, scim, secrets from databricks.labs.ucx.workspace_access.base import AclSupport, Permissions diff --git a/src/databricks/labs/ucx/workspace_access/tacl.py b/src/databricks/labs/ucx/workspace_access/tacl.py index b15e9f085e..32c3e8a3bd 100644 --- a/src/databricks/labs/ucx/workspace_access/tacl.py +++ b/src/databricks/labs/ucx/workspace_access/tacl.py @@ -9,8 +9,7 @@ from databricks.labs.lsql.backends import SqlBackend from databricks.sdk.retries import retried -from databricks.labs.ucx.hive_metastore import GrantsCrawler -from databricks.labs.ucx.hive_metastore.grants import Grant +from databricks.labs.ucx.hive_metastore.grants import Grant, GrantsCrawler from databricks.labs.ucx.workspace_access.base import AclSupport, Permissions from databricks.labs.ucx.workspace_access.groups import MigrationState diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 328efed7df..9f3ee74b93 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -15,8 +15,9 @@ AzureServicePrincipalCrawler, AzureServicePrincipalInfo, ) -from databricks.labs.ucx.hive_metastore import GrantsCrawler, TablesCrawler -from databricks.labs.ucx.hive_metastore.grants import Grant +from databricks.labs.ucx.hive_metastore import TablesCrawler +from databricks.labs.ucx.hive_metastore.grants import Grant, GrantsCrawler +from databricks.labs.ucx.hive_metastore.locations import Mount, Mounts from databricks.labs.ucx.hive_metastore.mapping import Rule, TableMapping from databricks.labs.ucx.hive_metastore.tables import Table from databricks.labs.ucx.hive_metastore.udfs import Udf, UdfsCrawler @@ -190,3 +191,12 @@ def __init__(self, spn_infos: list[AzureServicePrincipalInfo], *args): def snapshot(self) -> list[AzureServicePrincipalInfo]: return self._spn_infos + + +class StaticMountCrawler(Mounts): + def __init__(self, mounts: list[Mount], *args): + super().__init__(*args) + self._mounts = mounts + + def snapshot(self) -> list[Mount]: + return self._mounts diff --git a/tests/integration/hive_metastore/test_grants.py b/tests/integration/hive_metastore/test_grants.py index c739e8f1c3..0615b4f8af 100644 --- a/tests/integration/hive_metastore/test_grants.py +++ b/tests/integration/hive_metastore/test_grants.py @@ -5,7 +5,7 @@ from databricks.sdk.errors import NotFound from databricks.sdk.retries import retried -from databricks.labs.ucx.hive_metastore import GrantsCrawler +from databricks.labs.ucx.hive_metastore.grants import GrantsCrawler from ..conftest import StaticTablesCrawler, StaticUdfsCrawler diff --git a/tests/integration/hive_metastore/test_migrate.py b/tests/integration/hive_metastore/test_migrate.py index ed0a040a1e..b9bec4bd05 100644 --- a/tests/integration/hive_metastore/test_migrate.py +++ b/tests/integration/hive_metastore/test_migrate.py @@ -3,12 +3,20 @@ from unittest.mock import create_autospec import pytest +from databricks.labs.blueprint.installation import MockInstallation from databricks.sdk.errors import NotFound from databricks.sdk.retries import retried from databricks.sdk.service.catalog import Privilege, SecurableType - -from databricks.labs.ucx.hive_metastore import GrantsCrawler -from databricks.labs.ucx.hive_metastore.grants import Grant +from databricks.sdk.service.compute import DataSecurityMode +from databricks.sdk.service.iam import PermissionLevel + +from databricks.labs.ucx.hive_metastore.grants import ( + AzureACL, + Grant, + GrantsCrawler, + PrincipalACL, +) +from databricks.labs.ucx.hive_metastore.locations import Mount from databricks.labs.ucx.hive_metastore.mapping import Rule from databricks.labs.ucx.hive_metastore.table_migrate import ( MigrationStatusRefresher, @@ -19,12 +27,45 @@ from ..conftest import ( StaticGrantsCrawler, + StaticMountCrawler, StaticTableMapping, StaticTablesCrawler, StaticUdfsCrawler, ) logger = logging.getLogger(__name__) +_SPARK_CONF = { + "spark.databricks.cluster.profile": "singleNode", + "spark.master": "local[*]", + "fs.azure.account.auth.type.labsazurethings.dfs.core.windows.net": "OAuth", + "fs.azure.account.oauth.provider.type.labsazurethings.dfs.core.windows.net": "org.apache.hadoop.fs" + ".azurebfs.oauth2.ClientCredsTokenProvider", + "fs.azure.account.oauth2.client.id.labsazurethings.dfs.core.windows.net": "dummy_application_id", + "fs.azure.account.oauth2.client.secret.labsazurethings.dfs.core.windows.net": "dummy", + "fs.azure.account.oauth2.client.endpoint.labsazurethings.dfs.core.windows.net": "https://login" + ".microsoftonline.com/directory_12345/oauth2/token", +} + + +def principal_acl(ws, inventory_schema, sql_backend): + installation = MockInstallation( + { + "config.yml": { + 'inventory_database': inventory_schema, + }, + "azure_storage_account_info.csv": [ + { + 'prefix': 'dummy_prefix', + 'client_id': 'dummy_application_id', + 'principal': 'dummy_principal', + 'privilege': 'WRITE_FILES', + 'type': 'Application', + 'directory_id': 'dummy_directory', + } + ], + } + ) + return PrincipalACL.for_cli(ws, installation, sql_backend) @retried(on=[NotFound], timeout=timedelta(minutes=2)) @@ -56,8 +97,16 @@ def test_migrate_managed_tables(ws, sql_backend, inventory_schema, make_catalog, table_mapping = StaticTableMapping(ws, sql_backend, rules=rules) group_manager = GroupManager(sql_backend, ws, inventory_schema) migration_status_refresher = MigrationStatusRefresher(ws, sql_backend, inventory_schema, table_crawler) + principal_grants = principal_acl(ws, inventory_schema, sql_backend) table_migrate = TablesMigrator( - table_crawler, grant_crawler, ws, sql_backend, table_mapping, group_manager, migration_status_refresher + table_crawler, + grant_crawler, + ws, + sql_backend, + table_mapping, + group_manager, + migration_status_refresher, + principal_grants, ) table_migrate.migrate_tables() @@ -117,8 +166,16 @@ def test_migrate_tables_with_cache_should_not_create_table( table_mapping = StaticTableMapping(ws, sql_backend, rules=rules) group_manager = GroupManager(sql_backend, ws, inventory_schema) migration_status_refresher = MigrationStatusRefresher(ws, sql_backend, inventory_schema, table_crawler) + principal_grants = principal_acl(ws, inventory_schema, sql_backend) table_migrate = TablesMigrator( - table_crawler, grant_crawler, ws, sql_backend, table_mapping, group_manager, migration_status_refresher + table_crawler, + grant_crawler, + ws, + sql_backend, + table_mapping, + group_manager, + migration_status_refresher, + principal_grants, ) # FIXME: flaky: databricks.sdk.errors.platform.NotFound: Catalog 'ucx_cjazg' does not exist. @@ -169,6 +226,7 @@ def test_migrate_external_table( # pylint: disable=too-many-locals ] group_manager = GroupManager(sql_backend, ws, inventory_schema) migration_status_refresher = MigrationStatusRefresher(ws, sql_backend, inventory_schema, table_crawler) + principal_grants = principal_acl(ws, inventory_schema, sql_backend) table_migrate = TablesMigrator( table_crawler, grant_crawler, @@ -177,6 +235,7 @@ def test_migrate_external_table( # pylint: disable=too-many-locals StaticTableMapping(ws, sql_backend, rules=rules), group_manager, migration_status_refresher, + principal_grants, ) table_migrate.migrate_tables() @@ -227,6 +286,7 @@ def test_migrate_external_table_failed_sync( ] group_manager = GroupManager(sql_backend, ws, inventory_schema) migration_status_refresher = MigrationStatusRefresher(ws, sql_backend, inventory_schema, table_crawler) + principal_grants = principal_acl(ws, inventory_schema, sql_backend) table_migrate = TablesMigrator( table_crawler, grant_crawler, @@ -235,6 +295,7 @@ def test_migrate_external_table_failed_sync( StaticTableMapping(ws, sql_backend, rules=rules), group_manager, migration_status_refresher, + principal_grants, ) table_migrate.migrate_tables() @@ -281,8 +342,16 @@ def test_revert_migrated_table( table_mapping = StaticTableMapping(ws, sql_backend, rules=rules) group_manager = GroupManager(sql_backend, ws, inventory_schema) migration_status_refresher = MigrationStatusRefresher(ws, sql_backend, inventory_schema, table_crawler) + principal_grants = principal_acl(ws, inventory_schema, sql_backend) table_migrate = TablesMigrator( - table_crawler, grant_crawler, ws, sql_backend, table_mapping, group_manager, migration_status_refresher + table_crawler, + grant_crawler, + ws, + sql_backend, + table_mapping, + group_manager, + migration_status_refresher, + principal_grants, ) table_migrate.migrate_tables() @@ -393,8 +462,16 @@ def test_mapping_reverts_table( table_mapping = StaticTableMapping(ws, sql_backend, rules=rules) migration_status_refresher = MigrationStatusRefresher(ws, sql_backend, inventory_schema, table_crawler) group_manager = GroupManager(sql_backend, ws, inventory_schema) + principal_grants = principal_acl(ws, inventory_schema, sql_backend) table_migrate = TablesMigrator( - table_crawler, grant_crawler, ws, sql_backend, table_mapping, group_manager, migration_status_refresher + table_crawler, + grant_crawler, + ws, + sql_backend, + table_mapping, + group_manager, + migration_status_refresher, + principal_grants, ) table_migrate.migrate_tables() @@ -447,7 +524,7 @@ def test_mapping_reverts_table( assert "upgraded_to" not in results2 -@retried(on=[NotFound], timeout=timedelta(minutes=2)) +@retried(on=[NotFound], timeout=timedelta(minutes=3)) def test_migrate_managed_tables_with_acl( ws, sql_backend, inventory_schema, make_catalog, make_schema, make_table, make_user ): # pylint: disable=too-many-locals @@ -482,8 +559,45 @@ def test_migrate_managed_tables_with_acl( table_mapping = StaticTableMapping(ws, sql_backend, rules=rules) group_manager = GroupManager(sql_backend, ws, inventory_schema) migration_status_refresher = MigrationStatusRefresher(ws, sql_backend, inventory_schema, table_crawler) + installation = MockInstallation( + { + "config.yml": { + 'inventory_database': inventory_schema, + }, + "azure_storage_account_info.csv": [ + { + 'prefix': 'dummy_prefix', + 'client_id': 'dummy_application_id', + 'principal': 'dummy_principal', + 'privilege': 'WRITE_FILES', + 'type': 'Application', + 'directory_id': 'dummy_directory', + } + ], + } + ) + principal_grants = PrincipalACL( + ws, + sql_backend, + installation, + StaticTablesCrawler(sql_backend, inventory_schema, [src_managed_table]), + StaticMountCrawler( + [Mount('dummy_mount', 'abfss://dummy@dummy.dfs.core.windows.net/a')], + sql_backend, + ws, + inventory_schema, + ), + AzureACL.for_cli(ws, installation).get_eligible_locations_principals(), + ) table_migrate = TablesMigrator( - table_crawler, grant_crawler, ws, sql_backend, table_mapping, group_manager, migration_status_refresher + table_crawler, + grant_crawler, + ws, + sql_backend, + table_mapping, + group_manager, + migration_status_refresher, + principal_grants, ) table_migrate.migrate_tables(acl_strategy=[AclMigrationWhat.LEGACY_TACL]) @@ -497,3 +611,117 @@ def test_migrate_managed_tables_with_acl( assert target_table_properties[Table.UPGRADED_FROM_WS_PARAM] == str(ws.get_workspace_id()) assert target_table_grants.privilege_assignments[0].principal == user.user_name assert target_table_grants.privilege_assignments[0].privileges == [Privilege.MODIFY, Privilege.SELECT] + + +@pytest.fixture() +def test_prepare_principal_acl( + ws, + sql_backend, + inventory_schema, + env_or_skip, + make_dbfs_data_copy, + make_table, + make_catalog, + make_schema, + make_cluster, +): + cluster = make_cluster(single_node=True, spark_conf=_SPARK_CONF, data_security_mode=DataSecurityMode.NONE) + new_mounted_location = f'dbfs:/mnt/{env_or_skip("TEST_MOUNT_NAME")}/a/b/{inventory_schema}' + make_dbfs_data_copy(src_path=f'dbfs:/mnt/{env_or_skip("TEST_MOUNT_NAME")}/a/b/c', dst_path=new_mounted_location) + src_schema = make_schema(catalog_name="hive_metastore") + src_external_table = make_table( + catalog_name=src_schema.catalog_name, schema_name=src_schema.name, external_csv=new_mounted_location + ) + dst_catalog = make_catalog() + dst_schema = make_schema(catalog_name=dst_catalog.name, name=src_schema.name) + rules = [ + Rule( + "workspace", + dst_catalog.name, + src_schema.name, + dst_schema.name, + src_external_table.name, + src_external_table.name, + ), + ] + installation = MockInstallation( + { + "config.yml": { + 'warehouse_id': env_or_skip("TEST_DEFAULT_WAREHOUSE_ID"), + 'inventory_database': inventory_schema, + }, + "azure_storage_account_info.csv": [ + { + 'prefix': 'abfss://things@labsazurethings.dfs.core.windows.net', + 'client_id': 'dummy_application_id', + 'principal': 'principal_1', + 'privilege': 'WRITE_FILES', + 'type': 'Application', + 'directory_id': 'directory_id_ss1', + } + ], + } + ) + + principal_grants = PrincipalACL( + ws, + sql_backend, + installation, + StaticTablesCrawler(sql_backend, inventory_schema, [src_external_table]), + StaticMountCrawler( + [ + Mount( + f'/mnt/{env_or_skip("TEST_MOUNT_NAME")}/a', 'abfss://things@labsazurethings.dfs.core.windows.net/a' + ) + ], + sql_backend, + ws, + inventory_schema, + ), + AzureACL.for_cli(ws, installation).get_eligible_locations_principals(), + ) + table_migrate = TablesMigrator( + StaticTablesCrawler(sql_backend, inventory_schema, [src_external_table]), + StaticGrantsCrawler( + StaticTablesCrawler(sql_backend, inventory_schema, [src_external_table]), + StaticUdfsCrawler(sql_backend, inventory_schema, []), + [], + ), + ws, + sql_backend, + StaticTableMapping(ws, sql_backend, rules=rules), + GroupManager(sql_backend, ws, inventory_schema), + MigrationStatusRefresher( + ws, sql_backend, inventory_schema, StaticTablesCrawler(sql_backend, inventory_schema, [src_external_table]) + ), + principal_grants, + ) + return table_migrate, f"{dst_catalog.name}.{dst_schema.name}.{src_external_table.name}", cluster.cluster_id + + +@retried(on=[NotFound], timeout=timedelta(minutes=3)) +def test_migrate_managed_tables_with_principal_acl_azure( + ws, + make_user, + test_prepare_principal_acl, + make_cluster_permissions, + make_cluster, +): + if not ws.config.is_azure: + pytest.skip("temporary: only works in azure test env") + table_migrate, table_full_name, cluster_id = test_prepare_principal_acl + user = make_user() + make_cluster_permissions( + object_id=cluster_id, + permission_level=PermissionLevel.CAN_ATTACH_TO, + user_name=user.user_name, + ) + table_migrate.migrate_tables(acl_strategy=[AclMigrationWhat.PRINCIPAL]) + + target_table_grants = ws.grants.get(SecurableType.TABLE, table_full_name) + match = False + for _ in target_table_grants.privilege_assignments: + if _.principal == user.user_name and _.privileges == [Privilege.ALL_PRIVILEGES]: + match = True + break + assert match diff --git a/tests/integration/test_installation.py b/tests/integration/test_installation.py index a6a6f1095c..fcf9a1e772 100644 --- a/tests/integration/test_installation.py +++ b/tests/integration/test_installation.py @@ -24,8 +24,10 @@ from databricks.sdk.service.iam import PermissionLevel import databricks +from databricks.labs.ucx.azure.access import StoragePermissionMapping from databricks.labs.ucx.config import WorkspaceConfig from databricks.labs.ucx.hive_metastore.grants import Grant +from databricks.labs.ucx.hive_metastore.locations import Mount from databricks.labs.ucx.hive_metastore.mapping import Rule from databricks.labs.ucx.install import WorkspaceInstallation, WorkspaceInstaller from databricks.labs.ucx.installer.workflows import WorkflowsInstallation @@ -556,8 +558,17 @@ def test_check_inventory_database_exists(ws, new_installation): @retried(on=[NotFound], timeout=timedelta(minutes=10)) def test_table_migration_job( - ws, new_installation, make_catalog, make_schema, make_table, env_or_skip, make_random, make_dbfs_data_copy + ws, + new_installation, + make_catalog, + make_schema, + make_table, + env_or_skip, + make_random, + make_dbfs_data_copy, + sql_backend, ): + # skip this test if not in nightly test job or debug mode if os.path.basename(sys.argv[0]) not in {"_jb_pytest_runner.py", "testlauncher.py"}: env_or_skip("TEST_NIGHTLY") @@ -585,26 +596,46 @@ def test_table_migration_job( inventory_schema_suffix="_migrate_inventory", ) installation = product_info.current_installation(ws) - migrate_rules = [ - Rule( - "ws_name", - dst_catalog.name, - src_schema.name, - dst_schema.name, - src_managed_table.name, - src_managed_table.name, - ), - Rule( - "ws_name", - dst_catalog.name, - src_schema.name, - dst_schema.name, - src_external_table.name, - src_external_table.name, - ), - ] - installation.save(migrate_rules, filename='mapping.csv') + installation.save( + [ + Rule( + "ws_name", + dst_catalog.name, + src_schema.name, + dst_schema.name, + src_managed_table.name, + src_managed_table.name, + ), + Rule( + "ws_name", + dst_catalog.name, + src_schema.name, + dst_schema.name, + src_external_table.name, + src_external_table.name, + ), + ], + filename='mapping.csv', + ) + sql_backend.save_table( + f"{installation.load(WorkspaceConfig).inventory_database}.mounts", + [Mount(f'/mnt/{env_or_skip("TEST_MOUNT_NAME")}/a', 'abfss://things@labsazurethings.dfs.core.windows.net/a')], + Mount, + ) + installation.save( + [ + StoragePermissionMapping( + 'abfss://things@labsazurethings.dfs.core.windows.net', + 'dummy_application_id', + 'principal_1', + 'WRITE_FILES', + 'Application', + 'directory_id_ss1', + ) + ], + filename='azure_storage_account_info.csv', + ) workflows_install.run_workflow("migrate-tables") # assert the workflow is successful assert workflows_install.validate_step("migrate-tables") @@ -620,7 +651,15 @@ def test_table_migration_job( @retried(on=[NotFound], timeout=timedelta(minutes=5)) def test_table_migration_job_cluster_override( # pylint: disable=too-many-locals - ws, new_installation, make_catalog, make_schema, make_table, env_or_skip, make_random, make_dbfs_data_copy + ws, + new_installation, + make_catalog, + make_schema, + make_table, + env_or_skip, + make_random, + make_dbfs_data_copy, + sql_backend, ): # create external and managed tables to be migrated src_schema = make_schema(catalog_name="hive_metastore", name=f"migrate_{make_random(5).lower()}") @@ -655,7 +694,24 @@ def test_table_migration_job_cluster_override( # pylint: disable=too-many-local ), ] installation.save(migrate_rules, filename='mapping.csv') - + sql_backend.save_table( + f"{installation.load(WorkspaceConfig).inventory_database}.mounts", + [Mount(f'/mnt/{env_or_skip("TEST_MOUNT_NAME")}/a', 'abfss://things@labsazurethings.dfs.core.windows.net/a')], + Mount, + ) + installation.save( + [ + StoragePermissionMapping( + 'abfss://things@labsazurethings.dfs.core.windows.net', + 'dummy_application_id', + 'principal_1', + 'WRITE_FILES', + 'Application', + 'directory_id_ss1', + ) + ], + filename='azure_storage_account_info.csv', + ) workflows_install.run_workflow("migrate-tables") # assert the workflow is successful assert workflows_install.validate_step("migrate-tables") diff --git a/tests/integration/workspace_access/test_groups.py b/tests/integration/workspace_access/test_groups.py index fc8ab45ef4..a158ded514 100644 --- a/tests/integration/workspace_access/test_groups.py +++ b/tests/integration/workspace_access/test_groups.py @@ -7,8 +7,7 @@ from databricks.sdk.retries import retried from databricks.sdk.service.iam import Group, PermissionLevel, ResourceMeta -from databricks.labs.ucx.hive_metastore import GrantsCrawler -from databricks.labs.ucx.hive_metastore.grants import Grant +from databricks.labs.ucx.hive_metastore.grants import Grant, GrantsCrawler from databricks.labs.ucx.workspace_access.generic import ( GenericPermissionsSupport, Listing, diff --git a/tests/integration/workspace_access/test_tacl.py b/tests/integration/workspace_access/test_tacl.py index 6851784c88..9e15907a4a 100644 --- a/tests/integration/workspace_access/test_tacl.py +++ b/tests/integration/workspace_access/test_tacl.py @@ -2,7 +2,7 @@ import logging from collections import defaultdict -from databricks.labs.ucx.hive_metastore import GrantsCrawler +from databricks.labs.ucx.hive_metastore.grants import GrantsCrawler from databricks.labs.ucx.workspace_access.base import Permissions from databricks.labs.ucx.workspace_access.groups import MigratedGroup, MigrationState from databricks.labs.ucx.workspace_access.tacl import TableAclSupport diff --git a/tests/unit/assessment/clusters/azure-spn-secret-interactive-multiple-spn.json b/tests/unit/assessment/clusters/azure-spn-secret-interactive-multiple-spn.json new file mode 100644 index 0000000000..fc0bdc4995 --- /dev/null +++ b/tests/unit/assessment/clusters/azure-spn-secret-interactive-multiple-spn.json @@ -0,0 +1,20 @@ +{ + "autoscale": { + "min_workers": 1, + "max_workers": 6 + }, + "cluster_id": "azure-spn-secret-interactive", + "cluster_name": "Azure SPN Secret", + "data_security_mode": "NONE", + "cluster_source": "UI", + "spark_conf": { + "spark.hadoop.fs.azure.account.oauth2.client.id.abcde.dfs.core.windows.net": "{{secrets/abcff/sp_app_client_id}}", + "spark.hadoop.fs.azure.account.oauth2.client.endpoint.abcde.dfs.core.windows.net": "https://login.microsoftonline.com/dedededede/oauth2/token", + "spark.hadoop.fs.azure.account.oauth2.client.secret.abcde.dfs.core.windows.net": "{{secrets/abcff/sp_secret}}", + "spark.hadoop.fs.azure.account.oauth2.client.id.fgh.dfs.core.windows.net": "{{secrets/fgh/sp_app_client_id2}}", + "spark.hadoop.fs.azure.account.oauth2.client.endpoint.fgh.dfs.core.windows.net": "https://login.microsoftonline.com/dedededede/oauth2/token", + "spark.hadoop.fs.azure.account.oauth2.client.secret.fgh.dfs.core.windows.net": "{{secrets/fgh/sp_secret2}}" + }, + "spark_context_id": 5134472582179565315, + "spark_version": "13.3.x-cpu-ml-scala2.12" +} \ No newline at end of file diff --git a/tests/unit/assessment/test_azure.py b/tests/unit/assessment/test_azure.py index ec6b1bb562..7e5b2fce04 100644 --- a/tests/unit/assessment/test_azure.py +++ b/tests/unit/assessment/test_azure.py @@ -1,6 +1,17 @@ -from databricks.labs.lsql.backends import MockBackend +from unittest.mock import create_autospec -from databricks.labs.ucx.assessment.azure import AzureServicePrincipalCrawler +from databricks.labs.lsql.backends import MockBackend +from databricks.sdk import WorkspaceClient +from databricks.sdk.service.compute import ( + ClusterDetails, + ClusterSource, + DataSecurityMode, +) + +from databricks.labs.ucx.assessment.azure import ( + AzureServicePrincipalCrawler, + AzureServicePrincipalInfo, +) from .. import workspace_client_mock @@ -199,3 +210,47 @@ def test_jobs_assessment_with_spn_cluster_policy_not_found(): ws = workspace_client_mock(job_ids=['policy-not-found']) crawler = AzureServicePrincipalCrawler(ws, MockBackend(), "ucx").snapshot() assert len(crawler) == 1 + + +def test_get_cluster_to_storage_mapping_no_cluster_return_empty(): + ws = create_autospec(WorkspaceClient) + ws.clusters.list.return_value = [] + crawler = AzureServicePrincipalCrawler(ws, MockBackend(), "ucx") + assert not crawler.get_cluster_to_storage_mapping() + + +def test_get_cluster_to_storage_mapping_no_interactive_cluster_return_empty(): + ws = workspace_client_mock(cluster_ids=['azure-spn-secret']) + ws.clusters.list.return_value = [ + ClusterDetails(cluster_source=ClusterSource.JOB), + ClusterDetails(cluster_source=ClusterSource.UI, data_security_mode=DataSecurityMode.SINGLE_USER), + ] + crawler = AzureServicePrincipalCrawler(ws, MockBackend(), "ucx") + assert not crawler.get_cluster_to_storage_mapping() + + +def test_get_cluster_to_storage_mapping_interactive_cluster_no_spn_return_empty(): + ws = workspace_client_mock(cluster_ids=['azure-spn-secret-interactive-multiple-spn']) + + crawler = AzureServicePrincipalCrawler(ws, MockBackend(), "ucx") + cluster_spn_info = crawler.get_cluster_to_storage_mapping() + spn_info = { + AzureServicePrincipalInfo( + application_id='Hello, World!', + secret_scope='abcff', + secret_key='sp_secret', + tenant_id='dedededede', + storage_account='abcde', + ), + AzureServicePrincipalInfo( + application_id='Hello, World!', + secret_scope='fgh', + secret_key='sp_secret2', + tenant_id='dedededede', + storage_account='fgh', + ), + } + + assert cluster_spn_info[0].cluster_id == "azure-spn-secret-interactive" + assert len(cluster_spn_info[0].spn_info) == 2 + assert cluster_spn_info[0].spn_info == spn_info diff --git a/tests/unit/hive_metastore/test_principal_grants.py b/tests/unit/hive_metastore/test_principal_grants.py new file mode 100644 index 0000000000..e485b34897 --- /dev/null +++ b/tests/unit/hive_metastore/test_principal_grants.py @@ -0,0 +1,282 @@ +from unittest.mock import create_autospec + +import pytest +from databricks.labs.blueprint.installation import MockInstallation +from databricks.labs.lsql.backends import StatementExecutionBackend +from databricks.sdk import WorkspaceClient +from databricks.sdk.errors import ResourceDoesNotExist +from databricks.sdk.service import iam +from databricks.sdk.service.catalog import ExternalLocationInfo + +from databricks.labs.ucx.assessment.azure import ( + AzureServicePrincipalCrawler, + AzureServicePrincipalInfo, + ServicePrincipalClusterMapping, +) +from databricks.labs.ucx.azure.access import AzureResourcePermissions +from databricks.labs.ucx.azure.resources import AzureAPIClient, AzureResources +from databricks.labs.ucx.config import WorkspaceConfig +from databricks.labs.ucx.hive_metastore import Mounts, TablesCrawler +from databricks.labs.ucx.hive_metastore.grants import AzureACL, Grant, PrincipalACL +from databricks.labs.ucx.hive_metastore.locations import ExternalLocations, Mount +from databricks.labs.ucx.hive_metastore.tables import Table + + +@pytest.fixture +def ws(): + w = create_autospec(WorkspaceClient) + w.config.is_azure = True + w.external_locations.list.return_value = [ + ExternalLocationInfo(url="abfss://container1@storage1.dfs.core.windows.net/folder1"), + ExternalLocationInfo(url="abfss://container1@storage2.dfs.core.windows.net/folder2"), + ExternalLocationInfo(url="abfss://container1@storage3.dfs.core.windows.net/folder3"), + ] + + permissions = { + 'cluster1': iam.ObjectPermissions( + object_id='cluster1', + object_type="clusters", + access_control_list=[ + iam.AccessControlResponse(group_name='group1', all_permissions=[iam.Permission(inherited=False)]), + iam.AccessControlResponse( + user_name='foo.bar@imagine.com', + all_permissions=[iam.Permission(permission_level=iam.PermissionLevel.CAN_USE)], + ), + ], + ), + 'cluster2': iam.ObjectPermissions( + object_id='cluster2', + object_type="clusters", + access_control_list=[ + iam.AccessControlResponse( + service_principal_name='spn1', + all_permissions=[iam.Permission(permission_level=iam.PermissionLevel.CAN_USE)], + ), + ], + ), + 'cluster3': iam.ObjectPermissions(object_id='cluster2', object_type="clusters"), + } + w.permissions.get.side_effect = lambda _, object_id: permissions[object_id] + return w + + +def azure_acl(w, install, cluster_spn: list): + config = install.load(WorkspaceConfig) + sql_backend = StatementExecutionBackend(w, config.warehouse_id) + locations = create_autospec(ExternalLocations) + azure_client = AzureAPIClient( + w.config.arm_environment.resource_manager_endpoint, + w.config.arm_environment.service_management_endpoint, + ) + graph_client = AzureAPIClient("https://graph.microsoft.com", "https://graph.microsoft.com") + azurerm = AzureResources(azure_client, graph_client) + resource_permissions = AzureResourcePermissions(install, w, azurerm, locations) + spn_crawler = create_autospec(AzureServicePrincipalCrawler) + spn_crawler.get_cluster_to_storage_mapping.return_value = cluster_spn + return AzureACL(w, sql_backend, spn_crawler, resource_permissions) + + +def principal_acl(w, install, cluster_spn: list): + config = install.load(WorkspaceConfig) + sql_backend = StatementExecutionBackend(w, config.warehouse_id) + table_crawler = create_autospec(TablesCrawler) + tables = [ + Table( + 'hive_metastore', + 'schema1', + 'table1', + 'TABLE', + 'delta', + location='abfss://container1@storage1.dfs.core.windows.net/folder1/table1', + ), + Table('hive_metastore', 'schema1', 'view1', 'VIEW', 'delta', view_text="select * from table1"), + Table( + 'hive_metastore', + 'schema1', + 'table2', + 'TABLE', + 'delta', + location='abfss://container1@storage2.dfs.core.windows.net/folder2/table2', + ), + Table('hive_metastore', 'schema1', 'table3', 'TABLE', 'delta', location='dbfs:/mnt/folder1/table3'), + Table('hive_metastore', 'schema1', 'table5', 'TABLE', 'delta', location='dbfs:/hms/folder1/table1'), + Table( + 'hive_metastore', + 'schema2', + 'table4', + 'TABLE', + 'delta', + location='abfss://container1@storage3.dfs.core.windows.net/folder3/table3', + ), + ] + table_crawler.snapshot.return_value = tables + mount_crawler = create_autospec(Mounts) + mount_crawler.snapshot.return_value = [ + Mount('/mnt/folder1', 'abfss://container1@storage1.dfs.core.windows.net/folder1') + ] + + spn_crawler = create_autospec(AzureServicePrincipalCrawler) + spn_crawler.get_cluster_to_storage_mapping.return_value = cluster_spn + azure_locations = azure_acl(w, install, cluster_spn) + return PrincipalACL( + w, sql_backend, install, table_crawler, mount_crawler, azure_locations.get_eligible_locations_principals() + ) + + +@pytest.fixture +def installation(): + return MockInstallation( + { + "config.yml": {'warehouse_id': 'abc', 'connect': {'host': 'a', 'token': 'b'}, 'inventory_database': 'ucx'}, + "azure_storage_account_info.csv": [ + { + 'prefix': 'abfss://container1@storage1.dfs.core.windows.net', + 'client_id': 'client1', + 'principal': 'principal_1', + 'privilege': 'WRITE_FILES', + 'type': 'Application', + 'directory_id': 'directory_id_ss1', + }, + { + 'prefix': 'abfss://container1@storage2.dfs.core.windows.net', + 'client_id': 'client2', + 'principal': 'principal_1', + 'privilege': 'READ_FILES', + 'type': 'Application', + 'directory_id': 'directory_id_ss1', + }, + { + 'prefix': 'abfss://container1@storage3.dfs.core.windows.net', + 'client_id': 'client2', + 'principal': 'principal_1', + 'privilege': 'WRITE_FILES', + 'type': 'Application', + 'directory_id': 'directory_id_ss1', + }, + ], + } + ) + + +def test_for_cli_azure_acl(ws, installation): + assert isinstance(AzureACL.for_cli(ws, installation), AzureACL) + + +def test_for_cli_azure(ws, installation): + ws.config.is_azure = True + sql_backend = StatementExecutionBackend(ws, ws.config.warehouse_id) + assert isinstance(PrincipalACL.for_cli(ws, installation, sql_backend), PrincipalACL) + + +def test_for_cli_aws(ws, installation): + ws.config.is_azure = False + ws.config.is_aws = True + sql_backend = StatementExecutionBackend(ws, ws.config.warehouse_id) + assert PrincipalACL.for_cli(ws, installation, sql_backend) is None + + +def test_for_cli_gcp(ws, installation): + ws.config.is_azure = False + ws.config.is_aws = False + ws.config.is_gcp = True + sql_backend = StatementExecutionBackend(ws, ws.config.warehouse_id) + assert PrincipalACL.for_cli(ws, installation, sql_backend) is None + + +def test_get_eligible_locations_principals_no_cluster_mapping(ws, installation): + locations = azure_acl(ws, installation, []) + locations.get_eligible_locations_principals() + ws.external_locations.list.assert_not_called() + + +def test_get_eligible_locations_principals_no_external_location(ws, installation): + cluster_spn = ServicePrincipalClusterMapping( + 'abc', {AzureServicePrincipalInfo(application_id='Hello, World!', storage_account='abcde')} + ) + locations = azure_acl(ws, installation, [cluster_spn]) + ws.external_locations.list.return_value = [] + with pytest.raises(ResourceDoesNotExist): + locations.get_eligible_locations_principals() + + +def test_get_eligible_locations_principals_no_permission_mapping(ws): + cluster_spn = ServicePrincipalClusterMapping( + 'abc', {AzureServicePrincipalInfo(application_id='Hello, World!', storage_account='abcde')} + ) + install = MockInstallation( + { + "config.yml": {'warehouse_id': 'abc', 'connect': {'host': 'a', 'token': 'b'}, 'inventory_database': 'ucx'}, + "azure_storage_account_info.csv": [], + } + ) + locations = azure_acl(ws, install, [cluster_spn]) + + with pytest.raises(ResourceDoesNotExist): + locations.get_eligible_locations_principals() + + +def test_get_eligible_locations_principals(ws, installation): + cluster_spn = ServicePrincipalClusterMapping( + 'abc', {AzureServicePrincipalInfo(application_id='client1', storage_account='storage1')} + ) + locations = azure_acl(ws, installation, [cluster_spn]) + eligible_locations = locations.get_eligible_locations_principals() + assert len(eligible_locations) == 1 + assert eligible_locations['abc'] == {'abfss://container1@storage1.dfs.core.windows.net/folder1': 'WRITE_FILES'} + + +def test_interactive_cluster_no_acl(ws, installation): + cluster_spn = ServicePrincipalClusterMapping( + 'cluster3', {AzureServicePrincipalInfo(application_id='client1', storage_account='storage1')} + ) + grants = principal_acl(ws, installation, [cluster_spn]) + actual_grants = grants.get_interactive_cluster_grants() + assert len(actual_grants) == 0 + + +def test_interactive_cluster_single_spn(ws, installation): + cluster_spn = ServicePrincipalClusterMapping( + 'cluster1', + {AzureServicePrincipalInfo(application_id='client1', storage_account='storage1')}, + ) + grants = principal_acl(ws, installation, [cluster_spn]) + expected_grants = [ + Grant('group1', "ALL PRIVILEGES", "hive_metastore", 'schema1', 'table1'), + Grant('foo.bar@imagine.com', "ALL PRIVILEGES", "hive_metastore", 'schema1', 'table1'), + Grant('group1', "ALL PRIVILEGES", "hive_metastore", 'schema1', view='view1'), + Grant('foo.bar@imagine.com', "ALL PRIVILEGES", "hive_metastore", 'schema1', view='view1'), + Grant('group1', "ALL PRIVILEGES", "hive_metastore", 'schema1', 'table3'), + Grant('foo.bar@imagine.com', "ALL PRIVILEGES", "hive_metastore", 'schema1', 'table3'), + Grant('group1', "ALL PRIVILEGES", "hive_metastore", 'schema1', 'table5'), + Grant('foo.bar@imagine.com', "ALL PRIVILEGES", "hive_metastore", 'schema1', 'table5'), + Grant('group1', "USE", "hive_metastore", 'schema1'), + Grant('foo.bar@imagine.com', "USE", "hive_metastore", 'schema1'), + Grant('group1', "USE", "hive_metastore"), + Grant('foo.bar@imagine.com', "USE", "hive_metastore"), + ] + actual_grants = grants.get_interactive_cluster_grants() + for grant in expected_grants: + assert grant in actual_grants + + +def test_interactive_cluster_multiple_spn(ws, installation): + cluster_spn = ServicePrincipalClusterMapping( + 'cluster2', + { + AzureServicePrincipalInfo(application_id='client2', storage_account='storage2'), + AzureServicePrincipalInfo(application_id='client2', storage_account='storage3'), + }, + ) + grants = principal_acl(ws, installation, [cluster_spn]) + expected_grants = [ + Grant('spn1', "SELECT", "hive_metastore", 'schema1', 'table2'), + Grant('spn1', "ALL PRIVILEGES", "hive_metastore", 'schema2', 'table4'), + Grant('spn1', "ALL PRIVILEGES", "hive_metastore", 'schema1', 'table5'), + Grant('spn1', "ALL PRIVILEGES", "hive_metastore", 'schema1', view='view1'), + Grant('spn1', "USE", "hive_metastore", 'schema1'), + Grant('spn1', "USE", "hive_metastore", 'schema2'), + Grant('spn1', "USE", "hive_metastore"), + ] + actual_grants = grants.get_interactive_cluster_grants() + for grant in expected_grants: + assert grant in actual_grants diff --git a/tests/unit/hive_metastore/test_table_migrate.py b/tests/unit/hive_metastore/test_table_migrate.py index 90d22ec808..0d8dee2b32 100644 --- a/tests/unit/hive_metastore/test_table_migrate.py +++ b/tests/unit/hive_metastore/test_table_migrate.py @@ -8,7 +8,7 @@ from databricks.sdk import WorkspaceClient from databricks.sdk.service.catalog import CatalogInfo, SchemaInfo, TableInfo -from databricks.labs.ucx.hive_metastore import GrantsCrawler +from databricks.labs.ucx.hive_metastore.grants import Grant, GrantsCrawler, PrincipalACL from databricks.labs.ucx.hive_metastore.mapping import ( Rule, TableMapping, @@ -50,8 +50,16 @@ def test_migrate_dbfs_root_tables_should_produce_proper_queries(ws): table_mapping = table_mapping_mock(["managed_dbfs", "managed_mnt", "managed_other"]) group_manager = GroupManager(backend, ws, "inventory_database") migration_status_refresher = MigrationStatusRefresher(ws, backend, "inventory_database", table_crawler) + principal_grants = create_autospec(PrincipalACL) table_migrate = TablesMigrator( - table_crawler, grant_crawler, ws, backend, table_mapping, group_manager, migration_status_refresher + table_crawler, + grant_crawler, + ws, + backend, + table_mapping, + group_manager, + migration_status_refresher, + principal_grants, ) table_migrate.migrate_tables() @@ -83,8 +91,16 @@ def test_migrate_dbfs_root_tables_should_be_skipped_when_upgrading_external(ws): table_mapping = table_mapping_mock(["managed_dbfs"]) group_manager = GroupManager(backend, ws, "inventory_database") migration_status_refresher = MigrationStatusRefresher(ws, backend, "inventory_database", table_crawler) + principal_grants = create_autospec(PrincipalACL) table_migrate = TablesMigrator( - table_crawler, grant_crawler, ws, backend, table_mapping, group_manager, migration_status_refresher + table_crawler, + grant_crawler, + ws, + backend, + table_mapping, + group_manager, + migration_status_refresher, + principal_grants, ) table_migrate.migrate_tables(what=What.EXTERNAL_SYNC) @@ -102,8 +118,16 @@ def test_migrate_external_tables_should_produce_proper_queries(ws): table_mapping = table_mapping_mock(["external_src"]) group_manager = GroupManager(backend, ws, "inventory_database") migration_status_refresher = MigrationStatusRefresher(ws, backend, "inventory_database", table_crawler) + principal_grants = create_autospec(PrincipalACL) table_migrate = TablesMigrator( - table_crawler, grant_crawler, ws, backend, table_mapping, group_manager, migration_status_refresher + table_crawler, + grant_crawler, + ws, + backend, + table_mapping, + group_manager, + migration_status_refresher, + principal_grants, ) table_migrate.migrate_tables() @@ -128,8 +152,16 @@ def test_migrate_external_table_failed_sync(ws, caplog): table_mapping = table_mapping_mock(["external_src"]) group_manager = GroupManager(backend, ws, "inventory_database") migration_status_refresher = MigrationStatusRefresher(ws, backend, "inventory_database", table_crawler) + principal_grants = create_autospec(PrincipalACL) table_migrate = TablesMigrator( - table_crawler, grant_crawler, ws, backend, table_mapping, group_manager, migration_status_refresher + table_crawler, + grant_crawler, + ws, + backend, + table_mapping, + group_manager, + migration_status_refresher, + principal_grants, ) table_migrate.migrate_tables() assert "SYNC command failed to migrate" in caplog.text @@ -166,8 +198,16 @@ def test_migrate_already_upgraded_table_should_produce_no_queries(ws): ] group_manager = GroupManager(backend, ws, "inventory_database") migration_status_refresher = MigrationStatusRefresher(ws, backend, "inventory_database", table_crawler) + principal_grants = create_autospec(PrincipalACL) table_migrate = TablesMigrator( - table_crawler, grant_crawler, ws, backend, table_mapping, group_manager, migration_status_refresher + table_crawler, + grant_crawler, + ws, + backend, + table_mapping, + group_manager, + migration_status_refresher, + principal_grants, ) table_migrate.migrate_tables() @@ -185,8 +225,16 @@ def test_migrate_unsupported_format_table_should_produce_no_queries(ws): table_mapping = table_mapping_mock(["external_src_unsupported"]) group_manager = GroupManager(backend, ws, "inventory_database") migration_status_refresher = MigrationStatusRefresher(ws, backend, "inventory_database", table_crawler) + principal_grants = create_autospec(PrincipalACL) table_migrate = TablesMigrator( - table_crawler, grant_crawler, ws, backend, table_mapping, group_manager, migration_status_refresher + table_crawler, + grant_crawler, + ws, + backend, + table_mapping, + group_manager, + migration_status_refresher, + principal_grants, ) table_migrate.migrate_tables() @@ -203,8 +251,16 @@ def test_migrate_view_should_produce_proper_queries(ws): table_mapping = table_mapping_mock(["view"]) group_manager = GroupManager(backend, ws, "inventory_database") migration_status_refresher = MigrationStatusRefresher(ws, backend, "inventory_database", table_crawler) + principal_grants = create_autospec(PrincipalACL) table_migrate = TablesMigrator( - table_crawler, grant_crawler, ws, backend, table_mapping, group_manager, migration_status_refresher + table_crawler, + grant_crawler, + ws, + backend, + table_mapping, + group_manager, + migration_status_refresher, + principal_grants, ) table_migrate.migrate_tables() @@ -309,8 +365,16 @@ def get_table_migrate(backend: SqlBackend) -> TablesMigrator: group_manager = GroupManager(backend, client, "inventory_database") table_mapping = table_mapping_mock() migration_status_refresher = MigrationStatusRefresher(client, backend, "inventory_database", table_crawler) + principal_grants = create_autospec(PrincipalACL) table_migrate = TablesMigrator( - table_crawler, grant_crawler, client, backend, table_mapping, group_manager, migration_status_refresher + table_crawler, + grant_crawler, + client, + backend, + table_mapping, + group_manager, + migration_status_refresher, + principal_grants, ) return table_migrate @@ -372,8 +436,16 @@ def test_no_migrated_tables(ws): ] group_manager = GroupManager(backend, ws, "inventory_database") migration_status_refresher = MigrationStatusRefresher(ws, backend, "inventory_database", table_crawler) + principal_grants = create_autospec(PrincipalACL) table_migrate = TablesMigrator( - table_crawler, grant_crawler, ws, backend, table_mapping, group_manager, migration_status_refresher + table_crawler, + grant_crawler, + ws, + backend, + table_mapping, + group_manager, + migration_status_refresher, + principal_grants, ) table_migrate.migrate_tables() table_migrate.revert_migrated_tables("test_schema1", "test_table1") @@ -406,8 +478,16 @@ def test_empty_revert_report(ws): table_mapping = table_mapping_mock() group_manager = GroupManager(backend, ws, "inventory_database") migration_status_refresher = MigrationStatusRefresher(ws, backend, "inventory_database", table_crawler) + principal_grants = create_autospec(PrincipalACL) table_migrate = TablesMigrator( - table_crawler, grant_crawler, ws, backend, table_mapping, group_manager, migration_status_refresher + table_crawler, + grant_crawler, + ws, + backend, + table_mapping, + group_manager, + migration_status_refresher, + principal_grants, ) table_migrate.migrate_tables() assert not table_migrate.print_revert_report(delete_managed=False) @@ -429,8 +509,16 @@ def test_is_upgraded(ws): table_mapping = table_mapping_mock() group_manager = GroupManager(backend, ws, "inventory_database") migration_status_refresher = MigrationStatusRefresher(ws, backend, "inventory_database", table_crawler) + principal_grants = create_autospec(PrincipalACL) table_migrate = TablesMigrator( - table_crawler, grant_crawler, ws, backend, table_mapping, group_manager, migration_status_refresher + table_crawler, + grant_crawler, + ws, + backend, + table_mapping, + group_manager, + migration_status_refresher, + principal_grants, ) table_migrate.migrate_tables() assert table_migrate.is_migrated("schema1", "table1") @@ -625,8 +713,16 @@ def test_migrate_acls_should_produce_proper_queries(ws, caplog): table_mapping = table_mapping_mock(["managed_dbfs", "managed_mnt", "managed_other", "view"]) group_manager = GroupManager(backend, ws, "inventory_database") migration_status_refresher = MigrationStatusRefresher(ws, backend, "inventory_database", table_crawler) + principal_grants = create_autospec(PrincipalACL) table_migrate = TablesMigrator( - table_crawler, grant_crawler, ws, backend, table_mapping, group_manager, migration_status_refresher + table_crawler, + grant_crawler, + ws, + backend, + table_mapping, + group_manager, + migration_status_refresher, + principal_grants, ) table_migrate.migrate_tables(acl_strategy=[AclMigrationWhat.LEGACY_TACL]) @@ -642,3 +738,35 @@ def test_migrate_acls_should_produce_proper_queries(ws, caplog): assert "GRANT MODIFY ON VIEW ucx_default.db1_dst.view_dst TO `account group`" not in backend.queries assert "Cannot identify UC grant" in caplog.text + + +def test_migrate_principal_acls_should_produce_proper_queries(ws): + errors = {} + rows = {r"SYNC .*": MockBackend.rows("status_code", "description")[("SUCCESS", "test")]} + backend = MockBackend(fails_on_first=errors, rows=rows) + table_crawler = TablesCrawler(backend, "inventory_database") + udf_crawler = UdfsCrawler(backend, "inventory_database") + grant_crawler = GrantsCrawler(table_crawler, udf_crawler) + table_mapping = table_mapping_mock(["managed_dbfs", "managed_mnt", "managed_other", "view"]) + group_manager = GroupManager(backend, ws, "inventory_database") + migration_status_refresher = MigrationStatusRefresher(ws, backend, "inventory_database", table_crawler) + principal_grants = create_autospec(PrincipalACL) + expected_grants = [ + Grant('spn1', "ALL PRIVILEGES", "hive_metastore", 'db1_src', 'managed_dbfs'), + Grant('spn1', "USE", "hive_metastore", 'db1_src'), + Grant('spn1', "USE", "hive_metastore"), + ] + principal_grants.get_interactive_cluster_grants.return_value = expected_grants + table_migrate = TablesMigrator( + table_crawler, + grant_crawler, + ws, + backend, + table_mapping, + group_manager, + migration_status_refresher, + principal_grants, + ) + table_migrate.migrate_tables(acl_strategy=[AclMigrationWhat.PRINCIPAL]) + + assert "GRANT ALL PRIVILEGES ON TABLE ucx_default.db1_dst.managed_dbfs TO `spn1`" in backend.queries diff --git a/tests/unit/test_runtime.py b/tests/unit/test_runtime.py index b0267f7ddc..c6368145a2 100644 --- a/tests/unit/test_runtime.py +++ b/tests/unit/test_runtime.py @@ -55,6 +55,7 @@ def azure_mock_config() -> WorkspaceConfig: def mock_installation() -> MockInstallation: return MockInstallation( { + 'config.yml': {'warehouse_id': 'abc', 'connect': {'host': 'a', 'token': 'b'}, 'inventory_database': 'ucx'}, 'mapping.csv': [ { 'catalog_name': 'catalog', @@ -64,7 +65,7 @@ def mock_installation() -> MockInstallation: 'src_table': 'table', 'workspace_name': 'workspace', }, - ] + ], } ) diff --git a/tests/unit/workspace_access/test_tacl.py b/tests/unit/workspace_access/test_tacl.py index 1bceef9060..e8c6b1519c 100644 --- a/tests/unit/workspace_access/test_tacl.py +++ b/tests/unit/workspace_access/test_tacl.py @@ -3,8 +3,8 @@ import pytest from databricks.labs.lsql.backends import MockBackend -from databricks.labs.ucx.hive_metastore import GrantsCrawler, TablesCrawler -from databricks.labs.ucx.hive_metastore.grants import Grant +from databricks.labs.ucx.hive_metastore import TablesCrawler +from databricks.labs.ucx.hive_metastore.grants import Grant, GrantsCrawler from databricks.labs.ucx.hive_metastore.udfs import UdfsCrawler from databricks.labs.ucx.workspace_access.base import Permissions from databricks.labs.ucx.workspace_access.groups import MigratedGroup, MigrationState