From 63d9d4cd56e4477b0076e7241663e4fde1772f93 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Fri, 2 Feb 2024 15:56:15 -0800 Subject: [PATCH 01/76] init files for #339 --- src/databricks/labs/ucx/migration/__init__.py | 0 src/databricks/labs/ucx/migration/azure_credentials.py | 0 tests/integration/migration/__init__.py | 0 tests/integration/migration/test_azure_credentials.py | 0 tests/unit/migration/__init__.py | 0 tests/unit/migration/test_azure_credentials.py | 0 6 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/databricks/labs/ucx/migration/__init__.py create mode 100644 src/databricks/labs/ucx/migration/azure_credentials.py create mode 100644 tests/integration/migration/__init__.py create mode 100644 tests/integration/migration/test_azure_credentials.py create mode 100644 tests/unit/migration/__init__.py create mode 100644 tests/unit/migration/test_azure_credentials.py diff --git a/src/databricks/labs/ucx/migration/__init__.py b/src/databricks/labs/ucx/migration/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/databricks/labs/ucx/migration/azure_credentials.py b/src/databricks/labs/ucx/migration/azure_credentials.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/integration/migration/__init__.py b/tests/integration/migration/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/integration/migration/test_azure_credentials.py b/tests/integration/migration/test_azure_credentials.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/unit/migration/__init__.py b/tests/unit/migration/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/unit/migration/test_azure_credentials.py b/tests/unit/migration/test_azure_credentials.py new file mode 100644 index 0000000000..e69de29bb2 From 3c02941e18dbe8d047138adf746de8b5b37e4104 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Fri, 2 Feb 2024 16:59:35 -0800 Subject: [PATCH 02/76] 1. draft azure_credentials.py framework, adding AzureServicePrincipalMigration initiation, adding function to fetch client secret of service principals from SecretAPI, adding function to save action plan, adding draft cli 2. resolving conflict when rebase into main --- src/databricks/labs/ucx/azure/access.py | 6 + src/databricks/labs/ucx/cli.py | 37 ++++ .../labs/ucx/migration/azure_credentials.py | 170 ++++++++++++++++++ 3 files changed, 213 insertions(+) diff --git a/src/databricks/labs/ucx/azure/access.py b/src/databricks/labs/ucx/azure/access.py index e489d967bb..e05e5370d8 100644 --- a/src/databricks/labs/ucx/azure/access.py +++ b/src/databricks/labs/ucx/azure/access.py @@ -100,3 +100,9 @@ def _get_storage_accounts(self) -> list[str]: if storage_acct not in storage_accounts: storage_accounts.append(storage_acct) return storage_accounts + + def load_spn_permission(self, customized_csv: str) -> list[StoragePermissionMapping]: + """ + Load StoragePermissionMapping info from azure_storage_account_info.csv + """ + return self._installation.load(list[StoragePermissionMapping], self._filename) diff --git a/src/databricks/labs/ucx/cli.py b/src/databricks/labs/ucx/cli.py index c4a7192117..a7bfe2cb5b 100644 --- a/src/databricks/labs/ucx/cli.py +++ b/src/databricks/labs/ucx/cli.py @@ -19,6 +19,7 @@ from databricks.labs.ucx.hive_metastore.mapping import TableMapping from databricks.labs.ucx.hive_metastore.table_migrate import TableMove, TablesMigrate from databricks.labs.ucx.install import WorkspaceInstallation +from databricks.labs.ucx.migration.azure_credentials import AzureServicePrincipalMigration from databricks.labs.ucx.workspace_access.groups import GroupManager ucx = App(__file__) @@ -282,5 +283,41 @@ def _aws_principal_prefix_access(w: WorkspaceClient, aws_profile: str): logger.info(f"UC roles and bucket info saved {uc_role_path}") +@ucx.command +def migrate_azure_service_principals(w: WorkspaceClient): + """Migrate Azure Service Principals, which have Storage Blob Data Contributor, + Storage Blob Data Reader, Storage Blob Data Owner roles on ADLS Gen2 locations that are being used in + Databricks, to UC storage credentials. + + The Azure Service Principals to location mapping are listed in /Users/{user_name}/.ucx/azure_storage_account_info.csv + which is generated by save_azure_storage_accounts command. Please review the file and delete the Service Principals + you do not want to be migrated. + + The command will only migrate the Service Principals that have client secret stored in Databricks Secret. + """ + logger.info("Running migrate_azure_service_principals command") + prompts = Prompts() + if not w.config.is_azure: + logger.error("Workspace is not on azure, please run this command on azure databricks workspaces.") + return + + csv_confirmed = prompts.confirm(f"Have you reviewed the azure_storage_account_info.csv " + f"and confirm listed service principals are allowed to be checked for migration?") + if csv_confirmed is not True: + return + + service_principal_migration = AzureServicePrincipalMigration.for_cli(w) + action_plan_file = service_principal_migration.generate_migration_list() + logger.info("Azure Service Principals subject for migration are checked") + + migration_list_confirmed = prompts.confirm(f"Service Principals subject to be migrated to UC storage credentials " + f"are listed in {action_plan_file}. Please confirm to execute the migration.") + if migration_list_confirmed is not True: + return + + service_principal_migration.execute_migration() + return + + if __name__ == "__main__": ucx() diff --git a/src/databricks/labs/ucx/migration/azure_credentials.py b/src/databricks/labs/ucx/migration/azure_credentials.py index e69de29bb2..2a079f7d1e 100644 --- a/src/databricks/labs/ucx/migration/azure_credentials.py +++ b/src/databricks/labs/ucx/migration/azure_credentials.py @@ -0,0 +1,170 @@ +import base64 +import logging +from dataclasses import dataclass + +from databricks.labs.blueprint.installation import Installation +from databricks.sdk import WorkspaceClient +from databricks.sdk.errors import Unauthenticated, PermissionDenied, ResourceDoesNotExist, InternalError + +from databricks.labs.ucx.assessment.azure import AzureResourcePermissions, AzureResources, \ + StoragePermissionMapping, AzureServicePrincipalCrawler +from databricks.labs.ucx.config import WorkspaceConfig +from databricks.labs.ucx.framework.crawlers import StatementExecutionBackend +from databricks.labs.ucx.hive_metastore.locations import ExternalLocations + +logger = logging.getLogger(__name__) + + +@dataclass +class ServicePrincipalMigrationInfo(StoragePermissionMapping): + # Service Principal's client_secret stored in Databricks secret + client_secret: str + + @classmethod + def from_storage_permission_mapping(cls, storage_permission_mapping: StoragePermissionMapping, client_secret: str): + return cls(prefix=storage_permission_mapping.prefix, + client_id=storage_permission_mapping.client_id, + principal=storage_permission_mapping.principal, + privilege=storage_permission_mapping.privilege, + client_secret=client_secret) + + +class AzureServicePrincipalMigration: + + def __init__(self, installation: Installation, ws: WorkspaceClient, azure_resource_permissions: AzureResourcePermissions, + azure_sp_crawler: AzureServicePrincipalCrawler): + self._final_sp_list = None + self._installation = installation + self._ws = ws + self._azure_resource_permissions = azure_resource_permissions + self._azure_sp_crawler = azure_sp_crawler + self._action_plan = 'service_principals_for_storage_credentials.csv' + + + @classmethod + def for_cli(cls, ws: WorkspaceClient, customized_csv: str, replace_with_ac: bool, product='ucx'): + installation = Installation.current(ws, product) + config = installation.load(WorkspaceConfig) + sql_backend = StatementExecutionBackend(ws, config.warehouse_id) + azurerm = AzureResources(ws) + locations = ExternalLocations(ws, sql_backend, config.inventory_database) + + azure_resource_permissions = AzureResourcePermissions(installation, ws, azurerm, locations) + azure_sp_crawler = AzureServicePrincipalCrawler(ws, sql_backend, config.inventory_database) + + return cls(installation, ws, azure_resource_permissions, azure_sp_crawler, customized_csv, replace_with_ac) + + + def _list_storage_credentials(self): + # list existed storage credentials that is using service principal, capture the service principal's application_id + return {} + + + def _check_sp_in_storage_credentials(self, sp_list, sc_set): + # if sp is already used, take it off from the sp_list + return list() + + + def _fetch_client_secret(self, sp_list: list[StoragePermissionMapping]) -> list[ServicePrincipalMigrationInfo]: + # check AzureServicePrincipalInfo from AzureServicePrincipalCrawler, if AzureServicePrincipalInfo + # has secret_scope and secret_key not empty, fetch the client_secret and put it to the client_secret field + # + # The input StoragePermissionMapping may have managed identity mixed in, we will ignore them for now, as + # they won't have any client_secret, we will process managed identity in the future. + + # fetch client_secrets of crawled service principal, if any + azure_sp_info_with_client_secret = {} + azure_sp_infos = self._azure_sp_crawler.snapshot() + + for azure_sp_info in azure_sp_infos: + if azure_sp_info.secret_scope is None: + continue + if azure_sp_info.secret_key is None: + continue + + try: + secret_response = self._ws.secrets.get_secret(azure_sp_info.secret_scope, azure_sp_info.secret_key) + except Unauthenticated: + logger.info(f"User is unauthenticated to fetch secret value. Cannot fetch the service principal " + f"client_secret for {azure_sp_info.application_id}. Will not reuse this client_secret") + continue + except PermissionDenied: + logger.info(f"User does not have permission to read secret value for {azure_sp_info.secret_scope}.{azure_sp_info.secret_key}. " + f"Cannot fetch the service principal client_secret for {azure_sp_info.application_id}. " + f"Will not reuse this client_secret") + continue + except ResourceDoesNotExist: + logger.info(f"Secret {azure_sp_info.secret_scope}.{azure_sp_info.secret_key} does not exists. " + f"Cannot fetch the service principal client_secret for {azure_sp_info.application_id}. " + f"Will not reuse this client_secret") + continue + except InternalError: + logger.info(f"InternalError while reading secret {azure_sp_info.secret_scope}.{azure_sp_info.secret_key}. " + f"Cannot fetch the service principal client_secret for {azure_sp_info.application_id}. " + f"Will not reuse this client_secret") + continue + + # decode the bytes string from GetSecretResponse to utf-8 string + # TODO: handle different encoding if we have feedback from the customer + try: + secret_value = base64.b64decode(secret_response.value).decode("utf-8") + azure_sp_info_with_client_secret.update(azure_sp_info.application_id, secret_value) + except UnicodeDecodeError: + logger.info(f"Secret {azure_sp_info.secret_scope}.{azure_sp_info.secret_key} has Base64 bytes that cannot be decoded to utf-8 string . " + f"Cannot fetch the service principal client_secret for {azure_sp_info.application_id}. " + f"Will not reuse this client_secret") + + # update the list of ServicePrincipalMigrationInfo if client_secret is found + for sp in sp_list: + if sp.client_id in azure_sp_info_with_client_secret: + yield ServicePrincipalMigrationInfo.from_storage_permission_mapping(sp, azure_sp_info_with_client_secret[sp.client_id]) + + + def _save_action_plan(self, sp_list_with_secret) -> str | None: + # save action plan to a file for customer to review. + # client_secret need to be removed + sp_list_wo_secret = [] + for sp in sp_list_with_secret: + sp_list_wo_secret.append(StoragePermissionMapping(sp.prefix, sp.client_id, sp.principal, sp.privilege)) + + return self._installation.save(sp_list_wo_secret, filename=self._action_plan) + + + def generate_migration_list(self): + """ + Create the list of SP that need to be migrated, output an action plan as a csv file for users to confirm + :return: + """ + # load sp list from azure_storage_account_info.csv + loaded_sp_list = self._azure_resource_permissions.load_spn_permission() + # list existed storage credentials + sc_set = self._list_storage_credentials() + # check if the sp is already used in UC storage credential + filtered_sp_list = self._check_sp_in_storage_credentials(loaded_sp_list, sc_set) + # fetch sp client_secret if any + sp_list_with_secret = self._fetch_client_secret(filtered_sp_list) + self._final_sp_list = sp_list_with_secret + # output the action plan for customer to confirm + return self._save_action_plan(sp_list_with_secret) + + + def _create_sc_with_client_secret(self, sp): + + storage_credential="" + self._validate_sc(storage_credential) + return + + + def _validate_sc(self, storage_credential): + return + + def execute_migration(self): + """ + Execute the action plan after user confirmed + :return: + """ + for sp in self._final_sp_list: + self._create_sc_with_client_secret(sp) + return + + From 477b5df7eb3506ef8229a9d74c620cc265b682ca Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Sun, 4 Feb 2024 21:51:27 -0800 Subject: [PATCH 03/76] add _list_storage_credentials to azure service principal migration --- src/databricks/labs/ucx/migration/azure_credentials.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/databricks/labs/ucx/migration/azure_credentials.py b/src/databricks/labs/ucx/migration/azure_credentials.py index 2a079f7d1e..0ebea04f59 100644 --- a/src/databricks/labs/ucx/migration/azure_credentials.py +++ b/src/databricks/labs/ucx/migration/azure_credentials.py @@ -55,9 +55,15 @@ def for_cli(cls, ws: WorkspaceClient, customized_csv: str, replace_with_ac: bool return cls(installation, ws, azure_resource_permissions, azure_sp_crawler, customized_csv, replace_with_ac) - def _list_storage_credentials(self): + def _list_storage_credentials(self) -> set(str): # list existed storage credentials that is using service principal, capture the service principal's application_id - return {} + storage_credential_app_ids = set() + + storage_credentials = self._ws.storage_credentials.list(max_results=0) + for storage_credential in storage_credentials: + if storage_credential.azure_service_principal: + storage_credential_app_ids.add(storage_credential.azure_service_principal.application_id) + return storage_credential_app_ids def _check_sp_in_storage_credentials(self, sp_list, sc_set): From 3b9979a0f174c2e41bd9fb4d97ed95c8a381334f Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Sun, 4 Feb 2024 21:55:11 -0800 Subject: [PATCH 04/76] fix typo defining a set[str] for storage credentials --- src/databricks/labs/ucx/migration/azure_credentials.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/migration/azure_credentials.py b/src/databricks/labs/ucx/migration/azure_credentials.py index 0ebea04f59..27b167b3a4 100644 --- a/src/databricks/labs/ucx/migration/azure_credentials.py +++ b/src/databricks/labs/ucx/migration/azure_credentials.py @@ -55,7 +55,7 @@ def for_cli(cls, ws: WorkspaceClient, customized_csv: str, replace_with_ac: bool return cls(installation, ws, azure_resource_permissions, azure_sp_crawler, customized_csv, replace_with_ac) - def _list_storage_credentials(self) -> set(str): + def _list_storage_credentials(self) -> set[str]: # list existed storage credentials that is using service principal, capture the service principal's application_id storage_credential_app_ids = set() From 808e12333c4c292f7b4766dade5b8737f25f3cd6 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Sun, 4 Feb 2024 22:13:25 -0800 Subject: [PATCH 05/76] add _check_sp_in_storage_credentials to filter out service principals that are already used in UC storage credentials. --- .../labs/ucx/migration/azure_credentials.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/databricks/labs/ucx/migration/azure_credentials.py b/src/databricks/labs/ucx/migration/azure_credentials.py index 27b167b3a4..477f9a3571 100644 --- a/src/databricks/labs/ucx/migration/azure_credentials.py +++ b/src/databricks/labs/ucx/migration/azure_credentials.py @@ -61,14 +61,21 @@ def _list_storage_credentials(self) -> set[str]: storage_credentials = self._ws.storage_credentials.list(max_results=0) for storage_credential in storage_credentials: + # only add service principal's application_id, ignore managed identity based storage_credential if storage_credential.azure_service_principal: storage_credential_app_ids.add(storage_credential.azure_service_principal.application_id) + logger.info(f"Found {len(storage_credential_app_ids)} distinct service principals already used in UC storage credentials") return storage_credential_app_ids - def _check_sp_in_storage_credentials(self, sp_list, sc_set): + def _check_sp_in_storage_credentials(self, sp_list, sc_set) -> list[StoragePermissionMapping]: # if sp is already used, take it off from the sp_list - return list() + filtered_sp_list = [] + for service_principal in sp_list: + if service_principal.client_id not in sc_set: + filtered_sp_list.append(service_principal) + + return filtered_sp_list def _fetch_client_secret(self, sp_list: list[StoragePermissionMapping]) -> list[ServicePrincipalMigrationInfo]: From caaaec11ce0be767b6b62e9bdab7fbf56812c52b Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Mon, 5 Feb 2024 00:18:20 -0800 Subject: [PATCH 06/76] Add directory_id/tenant_id to crawled service principals. It is required when create UC storage credentials using service principal --- src/databricks/labs/ucx/assessment/azure.py | 1 + src/databricks/labs/ucx/azure/access.py | 3 +++ src/databricks/labs/ucx/azure/resources.py | 7 ++++++- src/databricks/labs/ucx/migration/azure_credentials.py | 5 +++-- 4 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/databricks/labs/ucx/assessment/azure.py b/src/databricks/labs/ucx/assessment/azure.py index cee554d58a..f6fc8eeba1 100644 --- a/src/databricks/labs/ucx/assessment/azure.py +++ b/src/databricks/labs/ucx/assessment/azure.py @@ -170,3 +170,4 @@ def _get_azure_spn_from_config(self, config: dict) -> set[AzureServicePrincipalI ) ) return set_service_principals + diff --git a/src/databricks/labs/ucx/azure/access.py b/src/databricks/labs/ucx/azure/access.py index e05e5370d8..179dcd7e31 100644 --- a/src/databricks/labs/ucx/azure/access.py +++ b/src/databricks/labs/ucx/azure/access.py @@ -17,6 +17,8 @@ class StoragePermissionMapping: client_id: str principal: str privilege: str + # Need this directory_id/tenant_id when create UC storage credentials using service principal + directory_id: str class AzureResourcePermissions: @@ -63,6 +65,7 @@ def _map_storage(self, storage: AzureResource) -> list[StoragePermissionMapping] client_id=role_assignment.principal.client_id, principal=role_assignment.principal.display_name, privilege=privilege, + directory_id = role_assignment.principal.directory_id ) ) return out diff --git a/src/databricks/labs/ucx/azure/resources.py b/src/databricks/labs/ucx/azure/resources.py index 9dc892a1a8..6c86d464b6 100644 --- a/src/databricks/labs/ucx/azure/resources.py +++ b/src/databricks/labs/ucx/azure/resources.py @@ -70,6 +70,8 @@ class Principal: client_id: str display_name: str object_id: str + # Need this directory_id/tenant_id when create UC storage credentials using service principal + directory_id: str @dataclass @@ -171,10 +173,13 @@ def _get_principal(self, principal_id: str) -> Principal | None: client_id = raw.get("appId") display_name = raw.get("displayName") object_id = raw.get("id") + # Need this directory_id/tenant_id when create UC storage credentials using service principal + directory_id = raw.get("appOwnerOrganizationId") assert client_id is not None assert display_name is not None assert object_id is not None - self._principals[principal_id] = Principal(client_id, display_name, object_id) + assert directory_id is not None + self._principals[principal_id] = Principal(client_id, display_name, object_id, directory_id) return self._principals[principal_id] def role_assignments( diff --git a/src/databricks/labs/ucx/migration/azure_credentials.py b/src/databricks/labs/ucx/migration/azure_credentials.py index 477f9a3571..a573dbf0cb 100644 --- a/src/databricks/labs/ucx/migration/azure_credentials.py +++ b/src/databricks/labs/ucx/migration/azure_credentials.py @@ -26,6 +26,7 @@ def from_storage_permission_mapping(cls, storage_permission_mapping: StoragePerm client_id=storage_permission_mapping.client_id, principal=storage_permission_mapping.principal, privilege=storage_permission_mapping.privilege, + directory_id=storage_permission_mapping.directory_id, client_secret=client_secret) @@ -161,8 +162,8 @@ def generate_migration_list(self): return self._save_action_plan(sp_list_with_secret) - def _create_sc_with_client_secret(self, sp): - + def _create_sc_with_client_secret(self, sp: ServicePrincipalMigrationInfo): + #self._ws.storage_credentials.create() storage_credential="" self._validate_sc(storage_credential) return From d8dacfcf9d9a5efe3eb936bf11b04d59a320d650 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Mon, 5 Feb 2024 00:20:43 -0800 Subject: [PATCH 07/76] remove unused parameter customized_csv from load_spn_permission() --- src/databricks/labs/ucx/azure/access.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/databricks/labs/ucx/azure/access.py b/src/databricks/labs/ucx/azure/access.py index 179dcd7e31..10949dcaa4 100644 --- a/src/databricks/labs/ucx/azure/access.py +++ b/src/databricks/labs/ucx/azure/access.py @@ -104,8 +104,11 @@ def _get_storage_accounts(self) -> list[str]: storage_accounts.append(storage_acct) return storage_accounts - def load_spn_permission(self, customized_csv: str) -> list[StoragePermissionMapping]: + def load_spn_permission(self) -> list[StoragePermissionMapping]: """ Load StoragePermissionMapping info from azure_storage_account_info.csv + :return: """ - return self._installation.load(list[StoragePermissionMapping], self._filename) + storage_account_infos = self._installation.load(list[StoragePermissionMapping], self._filename) + + return storage_account_infos From a7a78207171053e41f6a8b534b00809bf94e9bda Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Mon, 5 Feb 2024 01:03:35 -0800 Subject: [PATCH 08/76] add functions to create and validate the storage credential --- src/databricks/labs/ucx/cli.py | 1 + .../labs/ucx/migration/azure_credentials.py | 54 +++++++++++++++---- 2 files changed, 45 insertions(+), 10 deletions(-) diff --git a/src/databricks/labs/ucx/cli.py b/src/databricks/labs/ucx/cli.py index a7bfe2cb5b..4910b3af71 100644 --- a/src/databricks/labs/ucx/cli.py +++ b/src/databricks/labs/ucx/cli.py @@ -316,6 +316,7 @@ def migrate_azure_service_principals(w: WorkspaceClient): return service_principal_migration.execute_migration() + logger.info("Storage credentials created. Please check azure_service_principal_migration_result.csv for results ") return diff --git a/src/databricks/labs/ucx/migration/azure_credentials.py b/src/databricks/labs/ucx/migration/azure_credentials.py index a573dbf0cb..7d6cb46616 100644 --- a/src/databricks/labs/ucx/migration/azure_credentials.py +++ b/src/databricks/labs/ucx/migration/azure_credentials.py @@ -5,6 +5,7 @@ from databricks.labs.blueprint.installation import Installation from databricks.sdk import WorkspaceClient from databricks.sdk.errors import Unauthenticated, PermissionDenied, ResourceDoesNotExist, InternalError +from databricks.sdk.service.catalog import AzureServicePrincipal, Privilege, StorageCredentialInfo, ValidationResult from databricks.labs.ucx.assessment.azure import AzureResourcePermissions, AzureResources, \ StoragePermissionMapping, AzureServicePrincipalCrawler @@ -30,10 +31,25 @@ def from_storage_permission_mapping(cls, storage_permission_mapping: StoragePerm client_secret=client_secret) +@dataclass +class StorageCredentialValidationResult(StorageCredentialInfo, ValidationResult): + @classmethod + def from_storage_credential_validation(cls, storage_credential: StorageCredentialInfo, validation: ValidationResult): + return cls(name=storage_credential.name, + azure_service_principal=storage_credential.azure_service_principal, + created_by=storage_credential.created_by, + read_only=storage_credential.read_only, + message=validation.message, + operation=validation.operation, + result=validation.result + ) + + class AzureServicePrincipalMigration: def __init__(self, installation: Installation, ws: WorkspaceClient, azure_resource_permissions: AzureResourcePermissions, azure_sp_crawler: AzureServicePrincipalCrawler): + self._output_file = "azure_service_principal_migration_result.csv" self._final_sp_list = None self._installation = installation self._ws = ws @@ -162,23 +178,41 @@ def generate_migration_list(self): return self._save_action_plan(sp_list_with_secret) - def _create_sc_with_client_secret(self, sp: ServicePrincipalMigrationInfo): - #self._ws.storage_credentials.create() - storage_credential="" - self._validate_sc(storage_credential) - return + def _create_sc_with_client_secret(self, sp: ServicePrincipalMigrationInfo) -> list(StorageCredentialValidationResult): + # prepare the storage credential properties + name = sp.principal + azure_service_principal = AzureServicePrincipal(directory_id=sp.directory_id, + application_id=sp.client_id, + client_secret=sp.client_secret) + comment = f"Created by UCX during migration to UC using Azure Service Principal: {sp.principal}" + read_only = False + if sp.privilege == Privilege.READ_FILES: + read_only = True + # create the storage credential + storage_credential = self._ws.storage_credentials.create(name=name, + azure_service_principal=azure_service_principal, + comment=comment, + read_only=read_only) + + validation_result = self._validate_sc(storage_credential, sp.prefix) + yield validation_result + + def _validate_sc(self, storage_credential, location) -> StorageCredentialValidationResult: + validation = self._ws.storage_credentials.validate(storage_credential_name=storage_credential.name, + url=location) + return StorageCredentialValidationResult.from_storage_credential_validation(storage_credential, + validation) - def _validate_sc(self, storage_credential): - return - def execute_migration(self): + def execute_migration(self) -> str | None: """ Execute the action plan after user confirmed :return: """ + execution_result = [] for sp in self._final_sp_list: - self._create_sc_with_client_secret(sp) - return + execution_result.append(self._create_sc_with_client_secret(sp)) + return self._installation.save(execution_result, filename=self._output_file) From 76e82803fb12d66888c90ffcce25dda5baef7d8c Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Mon, 5 Feb 2024 01:21:03 -0800 Subject: [PATCH 09/76] rename the function for loading StoragePermissionMapping --- src/databricks/labs/ucx/azure/access.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/azure/access.py b/src/databricks/labs/ucx/azure/access.py index 10949dcaa4..11554a7e54 100644 --- a/src/databricks/labs/ucx/azure/access.py +++ b/src/databricks/labs/ucx/azure/access.py @@ -104,7 +104,7 @@ def _get_storage_accounts(self) -> list[str]: storage_accounts.append(storage_acct) return storage_accounts - def load_spn_permission(self) -> list[StoragePermissionMapping]: + def load(self) -> list[StoragePermissionMapping]: """ Load StoragePermissionMapping info from azure_storage_account_info.csv :return: From 38beb9c6603d816e5546912e4f1555ef1172d6fd Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Mon, 5 Feb 2024 12:32:39 -0800 Subject: [PATCH 10/76] extract logic of reading databricks secret and decode the value to a seperate function --- .../labs/ucx/migration/azure_credentials.py | 66 ++++++++++--------- 1 file changed, 35 insertions(+), 31 deletions(-) diff --git a/src/databricks/labs/ucx/migration/azure_credentials.py b/src/databricks/labs/ucx/migration/azure_credentials.py index 7d6cb46616..dd0ec3bc6e 100644 --- a/src/databricks/labs/ucx/migration/azure_credentials.py +++ b/src/databricks/labs/ucx/migration/azure_credentials.py @@ -95,6 +95,39 @@ def _check_sp_in_storage_credentials(self, sp_list, sc_set) -> list[StoragePermi return filtered_sp_list + def _read_databricks_secret(self, scope: str, key: str, application_id: str) -> str | None: + try: + secret_response = self._ws.secrets.get_secret(scope, key) + except Unauthenticated: + logger.error(f"User is unauthenticated to fetch Databricks secret value for service principal to storage credential migration.") + raise + except PermissionDenied: + logger.error(f"User does not have permission to Databricks secret value for service principal to storage credential migration.") + raise + except ResourceDoesNotExist: + logger.info(f"Secret {scope}.{key} does not exists. " + f"Cannot fetch the service principal client_secret for {application_id}. " + f"Will not reuse this client_secret") + return None + except InternalError: + logger.info(f"InternalError while reading secret {scope}.{key}. " + f"Cannot fetch the service principal client_secret for {application_id}. " + f"Will not reuse this client_secret") + print(f"{application_id} is not migrated due to InternalError while fetching the client_secret from Databricks secret." + f"You may rerun the migration command later to retry this service principal") + return None + + # decode the bytes string from GetSecretResponse to utf-8 string + # TODO: handle different encoding if we have feedback from the customer + try: + return base64.b64decode(secret_response.value).decode("utf-8") + except UnicodeDecodeError: + logger.info(f"Secret {scope}.{key} has Base64 bytes that cannot be decoded to utf-8 string . " + f"Cannot fetch the service principal client_secret for {application_id}. " + f"Will not reuse this client_secret") + return None + + def _fetch_client_secret(self, sp_list: list[StoragePermissionMapping]) -> list[ServicePrincipalMigrationInfo]: # check AzureServicePrincipalInfo from AzureServicePrincipalCrawler, if AzureServicePrincipalInfo # has secret_scope and secret_key not empty, fetch the client_secret and put it to the client_secret field @@ -111,38 +144,9 @@ def _fetch_client_secret(self, sp_list: list[StoragePermissionMapping]) -> list[ continue if azure_sp_info.secret_key is None: continue - - try: - secret_response = self._ws.secrets.get_secret(azure_sp_info.secret_scope, azure_sp_info.secret_key) - except Unauthenticated: - logger.info(f"User is unauthenticated to fetch secret value. Cannot fetch the service principal " - f"client_secret for {azure_sp_info.application_id}. Will not reuse this client_secret") - continue - except PermissionDenied: - logger.info(f"User does not have permission to read secret value for {azure_sp_info.secret_scope}.{azure_sp_info.secret_key}. " - f"Cannot fetch the service principal client_secret for {azure_sp_info.application_id}. " - f"Will not reuse this client_secret") - continue - except ResourceDoesNotExist: - logger.info(f"Secret {azure_sp_info.secret_scope}.{azure_sp_info.secret_key} does not exists. " - f"Cannot fetch the service principal client_secret for {azure_sp_info.application_id}. " - f"Will not reuse this client_secret") - continue - except InternalError: - logger.info(f"InternalError while reading secret {azure_sp_info.secret_scope}.{azure_sp_info.secret_key}. " - f"Cannot fetch the service principal client_secret for {azure_sp_info.application_id}. " - f"Will not reuse this client_secret") - continue - - # decode the bytes string from GetSecretResponse to utf-8 string - # TODO: handle different encoding if we have feedback from the customer - try: - secret_value = base64.b64decode(secret_response.value).decode("utf-8") + secret_value = self._read_databricks_secret(azure_sp_info.secret_scope, azure_sp_info.secret_key, azure_sp_info.application_id) + if secret_value: azure_sp_info_with_client_secret.update(azure_sp_info.application_id, secret_value) - except UnicodeDecodeError: - logger.info(f"Secret {azure_sp_info.secret_scope}.{azure_sp_info.secret_key} has Base64 bytes that cannot be decoded to utf-8 string . " - f"Cannot fetch the service principal client_secret for {azure_sp_info.application_id}. " - f"Will not reuse this client_secret") # update the list of ServicePrincipalMigrationInfo if client_secret is found for sp in sp_list: From 94d83385f9091380300d9db0d5d9096a4847b1e5 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Mon, 5 Feb 2024 14:02:50 -0800 Subject: [PATCH 11/76] simplify the cli function migrate_azure_service_principals by moving most logics and promots into execute_migration() --- src/databricks/labs/ucx/cli.py | 20 +------ .../labs/ucx/migration/azure_credentials.py | 57 ++++++++++++------- 2 files changed, 37 insertions(+), 40 deletions(-) diff --git a/src/databricks/labs/ucx/cli.py b/src/databricks/labs/ucx/cli.py index 4910b3af71..1318af36a3 100644 --- a/src/databricks/labs/ucx/cli.py +++ b/src/databricks/labs/ucx/cli.py @@ -297,26 +297,8 @@ def migrate_azure_service_principals(w: WorkspaceClient): """ logger.info("Running migrate_azure_service_principals command") prompts = Prompts() - if not w.config.is_azure: - logger.error("Workspace is not on azure, please run this command on azure databricks workspaces.") - return - - csv_confirmed = prompts.confirm(f"Have you reviewed the azure_storage_account_info.csv " - f"and confirm listed service principals are allowed to be checked for migration?") - if csv_confirmed is not True: - return - service_principal_migration = AzureServicePrincipalMigration.for_cli(w) - action_plan_file = service_principal_migration.generate_migration_list() - logger.info("Azure Service Principals subject for migration are checked") - - migration_list_confirmed = prompts.confirm(f"Service Principals subject to be migrated to UC storage credentials " - f"are listed in {action_plan_file}. Please confirm to execute the migration.") - if migration_list_confirmed is not True: - return - - service_principal_migration.execute_migration() - logger.info("Storage credentials created. Please check azure_service_principal_migration_result.csv for results ") + service_principal_migration.execute_migration(prompts) return diff --git a/src/databricks/labs/ucx/migration/azure_credentials.py b/src/databricks/labs/ucx/migration/azure_credentials.py index dd0ec3bc6e..80f578d0b5 100644 --- a/src/databricks/labs/ucx/migration/azure_credentials.py +++ b/src/databricks/labs/ucx/migration/azure_credentials.py @@ -3,6 +3,7 @@ from dataclasses import dataclass from databricks.labs.blueprint.installation import Installation +from databricks.labs.blueprint.tui import Prompts from databricks.sdk import WorkspaceClient from databricks.sdk.errors import Unauthenticated, PermissionDenied, ResourceDoesNotExist, InternalError from databricks.sdk.service.catalog import AzureServicePrincipal, Privilege, StorageCredentialInfo, ValidationResult @@ -154,35 +155,32 @@ def _fetch_client_secret(self, sp_list: list[StoragePermissionMapping]) -> list[ yield ServicePrincipalMigrationInfo.from_storage_permission_mapping(sp, azure_sp_info_with_client_secret[sp.client_id]) - def _save_action_plan(self, sp_list_with_secret) -> str | None: - # save action plan to a file for customer to review. - # client_secret need to be removed - sp_list_wo_secret = [] + def _print_action_plan(self, sp_list_with_secret): + # print action plan to console for customer to review. for sp in sp_list_with_secret: - sp_list_wo_secret.append(StoragePermissionMapping(sp.prefix, sp.client_id, sp.principal, sp.privilege)) + print(f"Service Principal name: {sp.principal}, application_id: {sp.client_id}, privilege {sp.privilege} on location {sp.prefix}") - return self._installation.save(sp_list_wo_secret, filename=self._action_plan) - - def generate_migration_list(self): + def _generate_migration_list(self): """ Create the list of SP that need to be migrated, output an action plan as a csv file for users to confirm :return: """ # load sp list from azure_storage_account_info.csv - loaded_sp_list = self._azure_resource_permissions.load_spn_permission() + sp_list = self._azure_resource_permissions.load() # list existed storage credentials sc_set = self._list_storage_credentials() # check if the sp is already used in UC storage credential - filtered_sp_list = self._check_sp_in_storage_credentials(loaded_sp_list, sc_set) + filtered_sp_list = self._check_sp_in_storage_credentials(sp_list, sc_set) # fetch sp client_secret if any sp_list_with_secret = self._fetch_client_secret(filtered_sp_list) self._final_sp_list = sp_list_with_secret # output the action plan for customer to confirm - return self._save_action_plan(sp_list_with_secret) + self._print_action_plan(sp_list_with_secret) + return - def _create_sc_with_client_secret(self, sp: ServicePrincipalMigrationInfo) -> list(StorageCredentialValidationResult): + def _create_storage_credential(self, sp: ServicePrincipalMigrationInfo) -> list(StorageCredentialValidationResult): # prepare the storage credential properties name = sp.principal azure_service_principal = AzureServicePrincipal(directory_id=sp.directory_id, @@ -198,25 +196,42 @@ def _create_sc_with_client_secret(self, sp: ServicePrincipalMigrationInfo) -> li comment=comment, read_only=read_only) - validation_result = self._validate_sc(storage_credential, sp.prefix) + validation_result = self._validate_storage_credential(storage_credential, sp.prefix) yield validation_result - def _validate_sc(self, storage_credential, location) -> StorageCredentialValidationResult: + def _validate_storage_credential(self, storage_credential, location) -> StorageCredentialValidationResult: validation = self._ws.storage_credentials.validate(storage_credential_name=storage_credential.name, url=location) return StorageCredentialValidationResult.from_storage_credential_validation(storage_credential, validation) - def execute_migration(self) -> str | None: - """ - Execute the action plan after user confirmed - :return: - """ + def execute_migration(self, prompts: Prompts): + if not self._w.config.is_azure: + logger.error("Workspace is not on azure, please run this command on azure databricks workspaces.") + return + + csv_confirmed = prompts.confirm(f"Have you reviewed the azure_storage_account_info.csv " + f"and confirm listed service principals are allowed to be checked for migration?") + if csv_confirmed is not True: + return + + self._generate_migration_list() + + plan_confirmed = prompts.confirm(f"Above Azure Service Principals will be migrated to UC storage credentials, please review and confirm.") + if plan_confirmed is not True: + return + execution_result = [] for sp in self._final_sp_list: - execution_result.append(self._create_sc_with_client_secret(sp)) - return self._installation.save(execution_result, filename=self._output_file) + execution_result.append(self._create_storage_credential(sp)) + + results_file = self._installation.save(execution_result, filename=self._output_file) + logger.info("Completed migration from Azure Service Principal migrated to UC Storage credentials") + print(f"Completed migration from Azure Service Principal migrated to UC Storage credentials. " + f"Please check {results_file} for validation results") + return + From 79b30c8f028616890353bd8416e3cae7a324c6c0 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Mon, 5 Feb 2024 14:22:07 -0800 Subject: [PATCH 12/76] fix a typo --- src/databricks/labs/ucx/migration/azure_credentials.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/migration/azure_credentials.py b/src/databricks/labs/ucx/migration/azure_credentials.py index 80f578d0b5..47b4845508 100644 --- a/src/databricks/labs/ucx/migration/azure_credentials.py +++ b/src/databricks/labs/ucx/migration/azure_credentials.py @@ -180,7 +180,7 @@ def _generate_migration_list(self): return - def _create_storage_credential(self, sp: ServicePrincipalMigrationInfo) -> list(StorageCredentialValidationResult): + def _create_storage_credential(self, sp: ServicePrincipalMigrationInfo) -> list[StorageCredentialValidationResult]: # prepare the storage credential properties name = sp.principal azure_service_principal = AzureServicePrincipal(directory_id=sp.directory_id, From 922af42f82bd4b64f1d1ab646207ae88a0a15cd9 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Mon, 5 Feb 2024 14:39:58 -0800 Subject: [PATCH 13/76] fix failed unit tests due to new directory_id added in StoragePermissionMapping, Principal --- tests/unit/azure/test_access.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/unit/azure/test_access.py b/tests/unit/azure/test_access.py index 0957f95c63..5a732e9009 100644 --- a/tests/unit/azure/test_access.py +++ b/tests/unit/azure/test_access.py @@ -132,12 +132,14 @@ def test_save_spn_permissions_valid_storage_accounts(caplog, mocker, az_token): 'prefix': 'abfss://container3@sto2.dfs.core.windows.net/', 'principal': 'disNameuser3', 'privilege': 'WRITE_FILES', + 'directory_id': '0000-0000', }, { 'client_id': 'appIduser3', 'prefix': 'abfss://container3@sto2.dfs.core.windows.net/', 'principal': 'disNameuser3', 'privilege': 'WRITE_FILES', + 'directory_id': '0000-0000', }, ], ) From 6254293261ed341694d6c64f4b4588b85c5ea1a2 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Mon, 5 Feb 2024 23:11:49 -0800 Subject: [PATCH 14/76] remove unused parameters from for_cli in migration/azure_credentials.py --- src/databricks/labs/ucx/migration/azure_credentials.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/databricks/labs/ucx/migration/azure_credentials.py b/src/databricks/labs/ucx/migration/azure_credentials.py index 47b4845508..c8a299cb40 100644 --- a/src/databricks/labs/ucx/migration/azure_credentials.py +++ b/src/databricks/labs/ucx/migration/azure_credentials.py @@ -60,7 +60,7 @@ def __init__(self, installation: Installation, ws: WorkspaceClient, azure_resour @classmethod - def for_cli(cls, ws: WorkspaceClient, customized_csv: str, replace_with_ac: bool, product='ucx'): + def for_cli(cls, ws: WorkspaceClient, product='ucx'): installation = Installation.current(ws, product) config = installation.load(WorkspaceConfig) sql_backend = StatementExecutionBackend(ws, config.warehouse_id) @@ -70,7 +70,7 @@ def for_cli(cls, ws: WorkspaceClient, customized_csv: str, replace_with_ac: bool azure_resource_permissions = AzureResourcePermissions(installation, ws, azurerm, locations) azure_sp_crawler = AzureServicePrincipalCrawler(ws, sql_backend, config.inventory_database) - return cls(installation, ws, azure_resource_permissions, azure_sp_crawler, customized_csv, replace_with_ac) + return cls(installation, ws, azure_resource_permissions, azure_sp_crawler) def _list_storage_credentials(self) -> set[str]: From 2eced1e23254c6fe488a0ffc0467d1b1b2bb5cbf Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Tue, 6 Feb 2024 00:13:33 -0800 Subject: [PATCH 15/76] add unit tests for _list_storage_credentials and _check_sp_in_storage_credentials in migration/azure_credentials.py --- .../unit/migration/test_azure_credentials.py | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/tests/unit/migration/test_azure_credentials.py b/tests/unit/migration/test_azure_credentials.py index e69de29bb2..4b026fb0aa 100644 --- a/tests/unit/migration/test_azure_credentials.py +++ b/tests/unit/migration/test_azure_credentials.py @@ -0,0 +1,54 @@ +from unittest.mock import create_autospec, MagicMock + +from databricks.sdk import WorkspaceClient +from databricks.sdk.service.catalog import AwsIamRole, AzureManagedIdentity, AzureServicePrincipal, StorageCredentialInfo + +from databricks.labs.ucx.assessment.azure import StoragePermissionMapping +from databricks.labs.ucx.migration.azure_credentials import AzureServicePrincipalMigration + + +def test_list_storage_credentials(): + w = create_autospec(WorkspaceClient) + + w.storage_credentials.list.return_value = [ + StorageCredentialInfo(aws_iam_role=AwsIamRole(role_arn="arn:aws:iam::123456789012:role/example-role-name")), + StorageCredentialInfo(azure_managed_identity=AzureManagedIdentity(access_connector_id="/subscriptions/.../providers/Microsoft.Databricks/...")), + StorageCredentialInfo(azure_service_principal=AzureServicePrincipal(application_id="b6420590-5e1c-4426-8950-a94cbe9b6115", + directory_id="62e43d7d-df53-4c64-86ed-c2c1a3ac60c3", + client_secret="secret")) + ] + + sp_migration = AzureServicePrincipalMigration(MagicMock(), w, MagicMock(), MagicMock()) + + expected = {"b6420590-5e1c-4426-8950-a94cbe9b6115"} + sp_migration._list_storage_credentials() + + assert expected == sp_migration._list_storage_credentials() + + +def test_sp_in_storage_credentials(): + storage_credentials_app_ids = {"no_match_id_1", "client_id_1", "client_id_2"} + + sp_no_match1 = StoragePermissionMapping(prefix="prefix3", client_id="client_id_3", principal="principal_3", privilege="READ_FILES", directory_id="directory_id_3") + sp_match1 = StoragePermissionMapping(prefix="prefix1", client_id="client_id_1", principal="principal_1", privilege="WRITE_FILES", directory_id="directory_id_1") + sp_match2 = StoragePermissionMapping(prefix="prefix2", client_id="client_id_2", principal="principal_2", privilege="WRITE_FILES", directory_id="directory_id_2") + service_principals = [sp_no_match1, sp_match1, sp_match2] + + sp_migration = AzureServicePrincipalMigration(MagicMock(), MagicMock(), MagicMock(), MagicMock()) + + filtered_sp_list = sp_migration._check_sp_in_storage_credentials(service_principals, storage_credentials_app_ids) + + assert filtered_sp_list == [sp_no_match1] + + +def test_sp_with_empty_storage_credentials(): + storage_credentials_app_ids = {} + + sp_no_match1 = StoragePermissionMapping(prefix="prefix3", client_id="client_id_3", principal="principal_3", privilege="READ_FILES", directory_id="directory_id_3") + service_principals = [sp_no_match1] + + sp_migration = AzureServicePrincipalMigration(MagicMock(), MagicMock(), MagicMock(), MagicMock()) + + filtered_sp_list = sp_migration._check_sp_in_storage_credentials(service_principals, storage_credentials_app_ids) + + assert filtered_sp_list == [sp_no_match1] \ No newline at end of file From 5f0aa41b9bc9c5df9b2be304d5562e8bec0bff52 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Tue, 6 Feb 2024 00:57:00 -0800 Subject: [PATCH 16/76] fix some fmt errors --- src/databricks/labs/ucx/cli.py | 4 +- .../labs/ucx/migration/azure_credentials.py | 196 +++++++++++------- .../unit/migration/test_azure_credentials.py | 63 ++++-- 3 files changed, 176 insertions(+), 87 deletions(-) diff --git a/src/databricks/labs/ucx/cli.py b/src/databricks/labs/ucx/cli.py index 1318af36a3..0c118d146c 100644 --- a/src/databricks/labs/ucx/cli.py +++ b/src/databricks/labs/ucx/cli.py @@ -19,7 +19,9 @@ from databricks.labs.ucx.hive_metastore.mapping import TableMapping from databricks.labs.ucx.hive_metastore.table_migrate import TableMove, TablesMigrate from databricks.labs.ucx.install import WorkspaceInstallation -from databricks.labs.ucx.migration.azure_credentials import AzureServicePrincipalMigration +from databricks.labs.ucx.migration.azure_credentials import ( + AzureServicePrincipalMigration, +) from databricks.labs.ucx.workspace_access.groups import GroupManager ucx = App(__file__) diff --git a/src/databricks/labs/ucx/migration/azure_credentials.py b/src/databricks/labs/ucx/migration/azure_credentials.py index c8a299cb40..71feba9ccf 100644 --- a/src/databricks/labs/ucx/migration/azure_credentials.py +++ b/src/databricks/labs/ucx/migration/azure_credentials.py @@ -5,11 +5,27 @@ from databricks.labs.blueprint.installation import Installation from databricks.labs.blueprint.tui import Prompts from databricks.sdk import WorkspaceClient -from databricks.sdk.errors import Unauthenticated, PermissionDenied, ResourceDoesNotExist, InternalError -from databricks.sdk.service.catalog import AzureServicePrincipal, Privilege, StorageCredentialInfo, ValidationResult - -from databricks.labs.ucx.assessment.azure import AzureResourcePermissions, AzureResources, \ - StoragePermissionMapping, AzureServicePrincipalCrawler +from databricks.sdk.errors import ( + InternalError, + PermissionDenied, + ResourceDoesNotExist, + Unauthenticated, +) +from databricks.sdk.service.catalog import ( + AzureServicePrincipal, + Privilege, + StorageCredentialInfo, + ValidateStorageCredentialResponse, + ValidationResult, + ValidationResultOperation, +) + +from databricks.labs.ucx.assessment.azure import ( + AzureResourcePermissions, + AzureResources, + AzureServicePrincipalCrawler, + StoragePermissionMapping, +) from databricks.labs.ucx.config import WorkspaceConfig from databricks.labs.ucx.framework.crawlers import StatementExecutionBackend from databricks.labs.ucx.hive_metastore.locations import ExternalLocations @@ -24,41 +40,56 @@ class ServicePrincipalMigrationInfo(StoragePermissionMapping): @classmethod def from_storage_permission_mapping(cls, storage_permission_mapping: StoragePermissionMapping, client_secret: str): - return cls(prefix=storage_permission_mapping.prefix, - client_id=storage_permission_mapping.client_id, - principal=storage_permission_mapping.principal, - privilege=storage_permission_mapping.privilege, - directory_id=storage_permission_mapping.directory_id, - client_secret=client_secret) + return cls( + prefix=storage_permission_mapping.prefix, + client_id=storage_permission_mapping.client_id, + principal=storage_permission_mapping.principal, + privilege=storage_permission_mapping.privilege, + directory_id=storage_permission_mapping.directory_id, + client_secret=client_secret, + ) @dataclass -class StorageCredentialValidationResult(StorageCredentialInfo, ValidationResult): +class StorageCredentialValidationResult: + name: str + azure_service_principal: AzureServicePrincipal + created_by: str + read_only: bool + message: str + operation: ValidationResultOperation + results: list[ValidationResult] + @classmethod - def from_storage_credential_validation(cls, storage_credential: StorageCredentialInfo, validation: ValidationResult): - return cls(name=storage_credential.name, - azure_service_principal=storage_credential.azure_service_principal, - created_by=storage_credential.created_by, - read_only=storage_credential.read_only, - message=validation.message, - operation=validation.operation, - result=validation.result - ) + def from_storage_credential_validation( + cls, storage_credential: StorageCredentialInfo, validation: ValidateStorageCredentialResponse + ): + return cls( + name=storage_credential.name, + azure_service_principal=storage_credential.azure_service_principal, + created_by=storage_credential.created_by, + read_only=storage_credential.read_only, + results=validation.results, + ) class AzureServicePrincipalMigration: - def __init__(self, installation: Installation, ws: WorkspaceClient, azure_resource_permissions: AzureResourcePermissions, - azure_sp_crawler: AzureServicePrincipalCrawler): + def __init__( + self, + installation: Installation, + ws: WorkspaceClient, + azure_resource_permissions: AzureResourcePermissions, + azure_sp_crawler: AzureServicePrincipalCrawler, + ): self._output_file = "azure_service_principal_migration_result.csv" - self._final_sp_list = None + self._final_sp_list: list[ServicePrincipalMigrationInfo] = [] self._installation = installation self._ws = ws self._azure_resource_permissions = azure_resource_permissions self._azure_sp_crawler = azure_sp_crawler self._action_plan = 'service_principals_for_storage_credentials.csv' - @classmethod def for_cli(cls, ws: WorkspaceClient, product='ucx'): installation = Installation.current(ws, product) @@ -72,7 +103,6 @@ def for_cli(cls, ws: WorkspaceClient, product='ucx'): return cls(installation, ws, azure_resource_permissions, azure_sp_crawler) - def _list_storage_credentials(self) -> set[str]: # list existed storage credentials that is using service principal, capture the service principal's application_id storage_credential_app_ids = set() @@ -82,10 +112,11 @@ def _list_storage_credentials(self) -> set[str]: # only add service principal's application_id, ignore managed identity based storage_credential if storage_credential.azure_service_principal: storage_credential_app_ids.add(storage_credential.azure_service_principal.application_id) - logger.info(f"Found {len(storage_credential_app_ids)} distinct service principals already used in UC storage credentials") + logger.info( + f"Found {len(storage_credential_app_ids)} distinct service principals already used in UC storage credentials" + ) return storage_credential_app_ids - def _check_sp_in_storage_credentials(self, sp_list, sc_set) -> list[StoragePermissionMapping]: # if sp is already used, take it off from the sp_list filtered_sp_list = [] @@ -95,27 +126,36 @@ def _check_sp_in_storage_credentials(self, sp_list, sc_set) -> list[StoragePermi return filtered_sp_list - def _read_databricks_secret(self, scope: str, key: str, application_id: str) -> str | None: try: secret_response = self._ws.secrets.get_secret(scope, key) except Unauthenticated: - logger.error(f"User is unauthenticated to fetch Databricks secret value for service principal to storage credential migration.") + logger.error( + "User is unauthenticated to fetch Databricks secret value for service principal to storage credential migration." + ) raise except PermissionDenied: - logger.error(f"User does not have permission to Databricks secret value for service principal to storage credential migration.") + logger.error( + "User does not have permission to Databricks secret value for service principal to storage credential migration." + ) raise except ResourceDoesNotExist: - logger.info(f"Secret {scope}.{key} does not exists. " - f"Cannot fetch the service principal client_secret for {application_id}. " - f"Will not reuse this client_secret") + logger.info( + f"Secret {scope}.{key} does not exists. " + f"Cannot fetch the service principal client_secret for {application_id}. " + f"Will not reuse this client_secret" + ) return None except InternalError: - logger.info(f"InternalError while reading secret {scope}.{key}. " - f"Cannot fetch the service principal client_secret for {application_id}. " - f"Will not reuse this client_secret") - print(f"{application_id} is not migrated due to InternalError while fetching the client_secret from Databricks secret." - f"You may rerun the migration command later to retry this service principal") + logger.info( + f"InternalError while reading secret {scope}.{key}. " + f"Cannot fetch the service principal client_secret for {application_id}. " + f"Will not reuse this client_secret" + ) + print( + f"{application_id} is not migrated due to InternalError while fetching the client_secret from Databricks secret." + f"You may rerun the migration command later to retry this service principal" + ) return None # decode the bytes string from GetSecretResponse to utf-8 string @@ -123,12 +163,13 @@ def _read_databricks_secret(self, scope: str, key: str, application_id: str) -> try: return base64.b64decode(secret_response.value).decode("utf-8") except UnicodeDecodeError: - logger.info(f"Secret {scope}.{key} has Base64 bytes that cannot be decoded to utf-8 string . " - f"Cannot fetch the service principal client_secret for {application_id}. " - f"Will not reuse this client_secret") + logger.info( + f"Secret {scope}.{key} has Base64 bytes that cannot be decoded to utf-8 string . " + f"Cannot fetch the service principal client_secret for {application_id}. " + f"Will not reuse this client_secret" + ) return None - def _fetch_client_secret(self, sp_list: list[StoragePermissionMapping]) -> list[ServicePrincipalMigrationInfo]: # check AzureServicePrincipalInfo from AzureServicePrincipalCrawler, if AzureServicePrincipalInfo # has secret_scope and secret_key not empty, fetch the client_secret and put it to the client_secret field @@ -137,7 +178,7 @@ def _fetch_client_secret(self, sp_list: list[StoragePermissionMapping]) -> list[ # they won't have any client_secret, we will process managed identity in the future. # fetch client_secrets of crawled service principal, if any - azure_sp_info_with_client_secret = {} + azure_sp_info_with_client_secret: dict[str, str] = {} azure_sp_infos = self._azure_sp_crawler.snapshot() for azure_sp_info in azure_sp_infos: @@ -145,21 +186,29 @@ def _fetch_client_secret(self, sp_list: list[StoragePermissionMapping]) -> list[ continue if azure_sp_info.secret_key is None: continue - secret_value = self._read_databricks_secret(azure_sp_info.secret_scope, azure_sp_info.secret_key, azure_sp_info.application_id) + secret_value = self._read_databricks_secret( + azure_sp_info.secret_scope, azure_sp_info.secret_key, azure_sp_info.application_id + ) if secret_value: - azure_sp_info_with_client_secret.update(azure_sp_info.application_id, secret_value) + azure_sp_info_with_client_secret[azure_sp_info.application_id] = secret_value # update the list of ServicePrincipalMigrationInfo if client_secret is found + sp_list_with_secret = [] for sp in sp_list: if sp.client_id in azure_sp_info_with_client_secret: - yield ServicePrincipalMigrationInfo.from_storage_permission_mapping(sp, azure_sp_info_with_client_secret[sp.client_id]) - + sp_list_with_secret.append( + ServicePrincipalMigrationInfo.from_storage_permission_mapping( + sp, azure_sp_info_with_client_secret[sp.client_id] + ) + ) + return sp_list_with_secret def _print_action_plan(self, sp_list_with_secret): # print action plan to console for customer to review. for sp in sp_list_with_secret: - print(f"Service Principal name: {sp.principal}, application_id: {sp.client_id}, privilege {sp.privilege} on location {sp.prefix}") - + print( + f"Service Principal name: {sp.principal}, application_id: {sp.client_id}, privilege {sp.privilege} on location {sp.prefix}" + ) def _generate_migration_list(self): """ @@ -179,47 +228,47 @@ def _generate_migration_list(self): self._print_action_plan(sp_list_with_secret) return - - def _create_storage_credential(self, sp: ServicePrincipalMigrationInfo) -> list[StorageCredentialValidationResult]: + def _create_storage_credential(self, sp: ServicePrincipalMigrationInfo): # prepare the storage credential properties name = sp.principal - azure_service_principal = AzureServicePrincipal(directory_id=sp.directory_id, - application_id=sp.client_id, - client_secret=sp.client_secret) + azure_service_principal = AzureServicePrincipal( + directory_id=sp.directory_id, application_id=sp.client_id, client_secret=sp.client_secret + ) comment = f"Created by UCX during migration to UC using Azure Service Principal: {sp.principal}" read_only = False if sp.privilege == Privilege.READ_FILES: read_only = True # create the storage credential - storage_credential = self._ws.storage_credentials.create(name=name, - azure_service_principal=azure_service_principal, - comment=comment, - read_only=read_only) + storage_credential = self._ws.storage_credentials.create( + name=name, azure_service_principal=azure_service_principal, comment=comment, read_only=read_only + ) validation_result = self._validate_storage_credential(storage_credential, sp.prefix) yield validation_result - def _validate_storage_credential(self, storage_credential, location) -> StorageCredentialValidationResult: - validation = self._ws.storage_credentials.validate(storage_credential_name=storage_credential.name, - url=location) - return StorageCredentialValidationResult.from_storage_credential_validation(storage_credential, - validation) - + validation = self._ws.storage_credentials.validate( + storage_credential_name=storage_credential.name, url=location + ) + return StorageCredentialValidationResult.from_storage_credential_validation(storage_credential, validation) def execute_migration(self, prompts: Prompts): - if not self._w.config.is_azure: + if not self._ws.config.is_azure: logger.error("Workspace is not on azure, please run this command on azure databricks workspaces.") return - csv_confirmed = prompts.confirm(f"Have you reviewed the azure_storage_account_info.csv " - f"and confirm listed service principals are allowed to be checked for migration?") + csv_confirmed = prompts.confirm( + "Have you reviewed the azure_storage_account_info.csv " + "and confirm listed service principals are allowed to be checked for migration?" + ) if csv_confirmed is not True: return self._generate_migration_list() - plan_confirmed = prompts.confirm(f"Above Azure Service Principals will be migrated to UC storage credentials, please review and confirm.") + plan_confirmed = prompts.confirm( + "Above Azure Service Principals will be migrated to UC storage credentials, please review and confirm." + ) if plan_confirmed is not True: return @@ -229,9 +278,8 @@ def execute_migration(self, prompts: Prompts): results_file = self._installation.save(execution_result, filename=self._output_file) logger.info("Completed migration from Azure Service Principal migrated to UC Storage credentials") - print(f"Completed migration from Azure Service Principal migrated to UC Storage credentials. " - f"Please check {results_file} for validation results") + print( + f"Completed migration from Azure Service Principal migrated to UC Storage credentials. " + f"Please check {results_file} for validation results" + ) return - - - diff --git a/tests/unit/migration/test_azure_credentials.py b/tests/unit/migration/test_azure_credentials.py index 4b026fb0aa..ecf4f54470 100644 --- a/tests/unit/migration/test_azure_credentials.py +++ b/tests/unit/migration/test_azure_credentials.py @@ -1,10 +1,17 @@ -from unittest.mock import create_autospec, MagicMock +from unittest.mock import MagicMock, create_autospec from databricks.sdk import WorkspaceClient -from databricks.sdk.service.catalog import AwsIamRole, AzureManagedIdentity, AzureServicePrincipal, StorageCredentialInfo +from databricks.sdk.service.catalog import ( + AwsIamRole, + AzureManagedIdentity, + AzureServicePrincipal, + StorageCredentialInfo, +) from databricks.labs.ucx.assessment.azure import StoragePermissionMapping -from databricks.labs.ucx.migration.azure_credentials import AzureServicePrincipalMigration +from databricks.labs.ucx.migration.azure_credentials import ( + AzureServicePrincipalMigration, +) def test_list_storage_credentials(): @@ -12,10 +19,18 @@ def test_list_storage_credentials(): w.storage_credentials.list.return_value = [ StorageCredentialInfo(aws_iam_role=AwsIamRole(role_arn="arn:aws:iam::123456789012:role/example-role-name")), - StorageCredentialInfo(azure_managed_identity=AzureManagedIdentity(access_connector_id="/subscriptions/.../providers/Microsoft.Databricks/...")), - StorageCredentialInfo(azure_service_principal=AzureServicePrincipal(application_id="b6420590-5e1c-4426-8950-a94cbe9b6115", - directory_id="62e43d7d-df53-4c64-86ed-c2c1a3ac60c3", - client_secret="secret")) + StorageCredentialInfo( + azure_managed_identity=AzureManagedIdentity( + access_connector_id="/subscriptions/.../providers/Microsoft.Databricks/..." + ) + ), + StorageCredentialInfo( + azure_service_principal=AzureServicePrincipal( + application_id="b6420590-5e1c-4426-8950-a94cbe9b6115", + directory_id="62e43d7d-df53-4c64-86ed-c2c1a3ac60c3", + client_secret="secret", + ) + ), ] sp_migration = AzureServicePrincipalMigration(MagicMock(), w, MagicMock(), MagicMock()) @@ -29,9 +44,27 @@ def test_list_storage_credentials(): def test_sp_in_storage_credentials(): storage_credentials_app_ids = {"no_match_id_1", "client_id_1", "client_id_2"} - sp_no_match1 = StoragePermissionMapping(prefix="prefix3", client_id="client_id_3", principal="principal_3", privilege="READ_FILES", directory_id="directory_id_3") - sp_match1 = StoragePermissionMapping(prefix="prefix1", client_id="client_id_1", principal="principal_1", privilege="WRITE_FILES", directory_id="directory_id_1") - sp_match2 = StoragePermissionMapping(prefix="prefix2", client_id="client_id_2", principal="principal_2", privilege="WRITE_FILES", directory_id="directory_id_2") + sp_no_match1 = StoragePermissionMapping( + prefix="prefix3", + client_id="client_id_3", + principal="principal_3", + privilege="READ_FILES", + directory_id="directory_id_3", + ) + sp_match1 = StoragePermissionMapping( + prefix="prefix1", + client_id="client_id_1", + principal="principal_1", + privilege="WRITE_FILES", + directory_id="directory_id_1", + ) + sp_match2 = StoragePermissionMapping( + prefix="prefix2", + client_id="client_id_2", + principal="principal_2", + privilege="WRITE_FILES", + directory_id="directory_id_2", + ) service_principals = [sp_no_match1, sp_match1, sp_match2] sp_migration = AzureServicePrincipalMigration(MagicMock(), MagicMock(), MagicMock(), MagicMock()) @@ -44,11 +77,17 @@ def test_sp_in_storage_credentials(): def test_sp_with_empty_storage_credentials(): storage_credentials_app_ids = {} - sp_no_match1 = StoragePermissionMapping(prefix="prefix3", client_id="client_id_3", principal="principal_3", privilege="READ_FILES", directory_id="directory_id_3") + sp_no_match1 = StoragePermissionMapping( + prefix="prefix3", + client_id="client_id_3", + principal="principal_3", + privilege="READ_FILES", + directory_id="directory_id_3", + ) service_principals = [sp_no_match1] sp_migration = AzureServicePrincipalMigration(MagicMock(), MagicMock(), MagicMock(), MagicMock()) filtered_sp_list = sp_migration._check_sp_in_storage_credentials(service_principals, storage_credentials_app_ids) - assert filtered_sp_list == [sp_no_match1] \ No newline at end of file + assert filtered_sp_list == [sp_no_match1] From 40ab9cd6136b96c8bb2eec05e539b733fa592039 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Sun, 11 Feb 2024 14:21:33 -0800 Subject: [PATCH 17/76] replace _check_sp_in_storage_credentials() by list comprehension --- src/databricks/labs/ucx/migration/azure_credentials.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/databricks/labs/ucx/migration/azure_credentials.py b/src/databricks/labs/ucx/migration/azure_credentials.py index 71feba9ccf..8da5f42149 100644 --- a/src/databricks/labs/ucx/migration/azure_credentials.py +++ b/src/databricks/labs/ucx/migration/azure_credentials.py @@ -117,14 +117,6 @@ def _list_storage_credentials(self) -> set[str]: ) return storage_credential_app_ids - def _check_sp_in_storage_credentials(self, sp_list, sc_set) -> list[StoragePermissionMapping]: - # if sp is already used, take it off from the sp_list - filtered_sp_list = [] - for service_principal in sp_list: - if service_principal.client_id not in sc_set: - filtered_sp_list.append(service_principal) - - return filtered_sp_list def _read_databricks_secret(self, scope: str, key: str, application_id: str) -> str | None: try: @@ -220,7 +212,7 @@ def _generate_migration_list(self): # list existed storage credentials sc_set = self._list_storage_credentials() # check if the sp is already used in UC storage credential - filtered_sp_list = self._check_sp_in_storage_credentials(sp_list, sc_set) + filtered_sp_list = [sp for sp in sp_list if sp not in sc_set] # fetch sp client_secret if any sp_list_with_secret = self._fetch_client_secret(filtered_sp_list) self._final_sp_list = sp_list_with_secret From fdccb909df6c2c3e3911e8f60d07364a2bb776eb Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Sun, 11 Feb 2024 14:44:52 -0800 Subject: [PATCH 18/76] change ServicePrincipalMigrationInfo from a dataclass to a namedtuple --- .../labs/ucx/migration/azure_credentials.py | 43 ++++++++----------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/src/databricks/labs/ucx/migration/azure_credentials.py b/src/databricks/labs/ucx/migration/azure_credentials.py index 8da5f42149..9a53fc58bc 100644 --- a/src/databricks/labs/ucx/migration/azure_credentials.py +++ b/src/databricks/labs/ucx/migration/azure_credentials.py @@ -1,5 +1,6 @@ import base64 import logging +from collections import namedtuple from dataclasses import dataclass from databricks.labs.blueprint.installation import Installation @@ -33,21 +34,8 @@ logger = logging.getLogger(__name__) -@dataclass -class ServicePrincipalMigrationInfo(StoragePermissionMapping): - # Service Principal's client_secret stored in Databricks secret - client_secret: str - - @classmethod - def from_storage_permission_mapping(cls, storage_permission_mapping: StoragePermissionMapping, client_secret: str): - return cls( - prefix=storage_permission_mapping.prefix, - client_id=storage_permission_mapping.client_id, - principal=storage_permission_mapping.principal, - privilege=storage_permission_mapping.privilege, - directory_id=storage_permission_mapping.directory_id, - client_secret=client_secret, - ) +# A namedtuple to host service_principal and its client_secret info +ServicePrincipalMigrationInfo = namedtuple("ServicePrincipalMigrationInfo", "service_principal client_secret") @dataclass @@ -189,17 +177,18 @@ def _fetch_client_secret(self, sp_list: list[StoragePermissionMapping]) -> list[ for sp in sp_list: if sp.client_id in azure_sp_info_with_client_secret: sp_list_with_secret.append( - ServicePrincipalMigrationInfo.from_storage_permission_mapping( - sp, azure_sp_info_with_client_secret[sp.client_id] - ) + ServicePrincipalMigrationInfo(service_principal=sp, client_secret=azure_sp_info_with_client_secret[sp.client_id]) ) return sp_list_with_secret - def _print_action_plan(self, sp_list_with_secret): + def _print_action_plan(self, sp_list_with_secret: list[ServicePrincipalMigrationInfo]): # print action plan to console for customer to review. for sp in sp_list_with_secret: print( - f"Service Principal name: {sp.principal}, application_id: {sp.client_id}, privilege {sp.privilege} on location {sp.prefix}" + f"Service Principal name: {sp.service_principal.principal}, " + f"application_id: {sp.service_principal.client_id}, " + f"privilege {sp.service_principal.privilege} " + f"on location {sp.service_principal.prefix}" ) def _generate_migration_list(self): @@ -220,22 +209,24 @@ def _generate_migration_list(self): self._print_action_plan(sp_list_with_secret) return - def _create_storage_credential(self, sp: ServicePrincipalMigrationInfo): + def _create_storage_credential(self, sp_migration: ServicePrincipalMigrationInfo): # prepare the storage credential properties - name = sp.principal + name = sp_migration.service_principal.principal azure_service_principal = AzureServicePrincipal( - directory_id=sp.directory_id, application_id=sp.client_id, client_secret=sp.client_secret + directory_id=sp_migration.service_principal.directory_id, + application_id=sp_migration.service_principal.client_id, + client_secret=sp_migration.client_secret ) - comment = f"Created by UCX during migration to UC using Azure Service Principal: {sp.principal}" + comment = f"Created by UCX during migration to UC using Azure Service Principal: {sp_migration.service_principal.principal}" read_only = False - if sp.privilege == Privilege.READ_FILES: + if sp_migration.service_principal.privilege == Privilege.READ_FILES: read_only = True # create the storage credential storage_credential = self._ws.storage_credentials.create( name=name, azure_service_principal=azure_service_principal, comment=comment, read_only=read_only ) - validation_result = self._validate_storage_credential(storage_credential, sp.prefix) + validation_result = self._validate_storage_credential(storage_credential, sp_migration.service_principal.prefix) yield validation_result def _validate_storage_credential(self, storage_credential, location) -> StorageCredentialValidationResult: From 477d0d64e11adc1eef01212023aed4325bd0d4ea Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Sun, 11 Feb 2024 14:46:25 -0800 Subject: [PATCH 19/76] do not catch PermissionDenied and Unauthenticated exceptions when fetching the service principal client_secret from Databricks secret to make the code cleaner --- src/databricks/labs/ucx/migration/azure_credentials.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/databricks/labs/ucx/migration/azure_credentials.py b/src/databricks/labs/ucx/migration/azure_credentials.py index 9a53fc58bc..b5aa099b2e 100644 --- a/src/databricks/labs/ucx/migration/azure_credentials.py +++ b/src/databricks/labs/ucx/migration/azure_credentials.py @@ -109,16 +109,6 @@ def _list_storage_credentials(self) -> set[str]: def _read_databricks_secret(self, scope: str, key: str, application_id: str) -> str | None: try: secret_response = self._ws.secrets.get_secret(scope, key) - except Unauthenticated: - logger.error( - "User is unauthenticated to fetch Databricks secret value for service principal to storage credential migration." - ) - raise - except PermissionDenied: - logger.error( - "User does not have permission to Databricks secret value for service principal to storage credential migration." - ) - raise except ResourceDoesNotExist: logger.info( f"Secret {scope}.{key} does not exists. " From feb3d58e68b97c94ab9369383167cc5a47b71f58 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Sun, 11 Feb 2024 14:51:01 -0800 Subject: [PATCH 20/76] use return not yield in _create_storage_credential --- src/databricks/labs/ucx/migration/azure_credentials.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/migration/azure_credentials.py b/src/databricks/labs/ucx/migration/azure_credentials.py index b5aa099b2e..47a458079a 100644 --- a/src/databricks/labs/ucx/migration/azure_credentials.py +++ b/src/databricks/labs/ucx/migration/azure_credentials.py @@ -217,7 +217,7 @@ def _create_storage_credential(self, sp_migration: ServicePrincipalMigrationInfo ) validation_result = self._validate_storage_credential(storage_credential, sp_migration.service_principal.prefix) - yield validation_result + return validation_result def _validate_storage_credential(self, storage_credential, location) -> StorageCredentialValidationResult: validation = self._ws.storage_credentials.validate( From 7dc14a1cb435a1c7c7cdedff569c33e9fce96392 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Sun, 11 Feb 2024 14:56:00 -0800 Subject: [PATCH 21/76] move azure check and service principal confirmation prompts to for_cli function --- src/databricks/labs/ucx/cli.py | 2 +- .../labs/ucx/migration/azure_credentials.py | 23 ++++++++++--------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/databricks/labs/ucx/cli.py b/src/databricks/labs/ucx/cli.py index 0c118d146c..019e732889 100644 --- a/src/databricks/labs/ucx/cli.py +++ b/src/databricks/labs/ucx/cli.py @@ -299,7 +299,7 @@ def migrate_azure_service_principals(w: WorkspaceClient): """ logger.info("Running migrate_azure_service_principals command") prompts = Prompts() - service_principal_migration = AzureServicePrincipalMigration.for_cli(w) + service_principal_migration = AzureServicePrincipalMigration.for_cli(w, prompts) service_principal_migration.execute_migration(prompts) return diff --git a/src/databricks/labs/ucx/migration/azure_credentials.py b/src/databricks/labs/ucx/migration/azure_credentials.py index 47a458079a..e05a6047c7 100644 --- a/src/databricks/labs/ucx/migration/azure_credentials.py +++ b/src/databricks/labs/ucx/migration/azure_credentials.py @@ -79,7 +79,18 @@ def __init__( self._action_plan = 'service_principals_for_storage_credentials.csv' @classmethod - def for_cli(cls, ws: WorkspaceClient, product='ucx'): + def for_cli(cls, ws: WorkspaceClient, prompts: Prompts, product='ucx'): + if not ws.config.is_azure: + logger.error("Workspace is not on azure, please run this command on azure databricks workspaces.") + return + + csv_confirmed = prompts.confirm( + "Have you reviewed the azure_storage_account_info.csv " + "and confirm listed service principals are allowed to be checked for migration?" + ) + if csv_confirmed is not True: + return + installation = Installation.current(ws, product) config = installation.load(WorkspaceConfig) sql_backend = StatementExecutionBackend(ws, config.warehouse_id) @@ -226,16 +237,6 @@ def _validate_storage_credential(self, storage_credential, location) -> StorageC return StorageCredentialValidationResult.from_storage_credential_validation(storage_credential, validation) def execute_migration(self, prompts: Prompts): - if not self._ws.config.is_azure: - logger.error("Workspace is not on azure, please run this command on azure databricks workspaces.") - return - - csv_confirmed = prompts.confirm( - "Have you reviewed the azure_storage_account_info.csv " - "and confirm listed service principals are allowed to be checked for migration?" - ) - if csv_confirmed is not True: - return self._generate_migration_list() From 9f6cfc99d30ab6c64f6ebb64599c86f343a40ea6 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Sun, 11 Feb 2024 16:09:19 -0800 Subject: [PATCH 22/76] add unit test for _read_databricks_secret --- .../unit/migration/test_azure_credentials.py | 84 ++++++++----------- 1 file changed, 34 insertions(+), 50 deletions(-) diff --git a/tests/unit/migration/test_azure_credentials.py b/tests/unit/migration/test_azure_credentials.py index ecf4f54470..5a2a498bb0 100644 --- a/tests/unit/migration/test_azure_credentials.py +++ b/tests/unit/migration/test_azure_credentials.py @@ -1,12 +1,20 @@ +import logging +import pytest + from unittest.mock import MagicMock, create_autospec from databricks.sdk import WorkspaceClient +from databricks.sdk.errors import ( + InternalError, + ResourceDoesNotExist +) from databricks.sdk.service.catalog import ( AwsIamRole, AzureManagedIdentity, AzureServicePrincipal, StorageCredentialInfo, ) +from databricks.sdk.service.workspace import GetSecretResponse from databricks.labs.ucx.assessment.azure import StoragePermissionMapping from databricks.labs.ucx.migration.azure_credentials import ( @@ -41,53 +49,29 @@ def test_list_storage_credentials(): assert expected == sp_migration._list_storage_credentials() -def test_sp_in_storage_credentials(): - storage_credentials_app_ids = {"no_match_id_1", "client_id_1", "client_id_2"} - - sp_no_match1 = StoragePermissionMapping( - prefix="prefix3", - client_id="client_id_3", - principal="principal_3", - privilege="READ_FILES", - directory_id="directory_id_3", - ) - sp_match1 = StoragePermissionMapping( - prefix="prefix1", - client_id="client_id_1", - principal="principal_1", - privilege="WRITE_FILES", - directory_id="directory_id_1", - ) - sp_match2 = StoragePermissionMapping( - prefix="prefix2", - client_id="client_id_2", - principal="principal_2", - privilege="WRITE_FILES", - directory_id="directory_id_2", - ) - service_principals = [sp_no_match1, sp_match1, sp_match2] - - sp_migration = AzureServicePrincipalMigration(MagicMock(), MagicMock(), MagicMock(), MagicMock()) - - filtered_sp_list = sp_migration._check_sp_in_storage_credentials(service_principals, storage_credentials_app_ids) - - assert filtered_sp_list == [sp_no_match1] - - -def test_sp_with_empty_storage_credentials(): - storage_credentials_app_ids = {} - - sp_no_match1 = StoragePermissionMapping( - prefix="prefix3", - client_id="client_id_3", - principal="principal_3", - privilege="READ_FILES", - directory_id="directory_id_3", - ) - service_principals = [sp_no_match1] - - sp_migration = AzureServicePrincipalMigration(MagicMock(), MagicMock(), MagicMock(), MagicMock()) - - filtered_sp_list = sp_migration._check_sp_in_storage_credentials(service_principals, storage_credentials_app_ids) - - assert filtered_sp_list == [sp_no_match1] +@pytest.mark.parametrize("secret_bytes_value, expected_return", + [(GetSecretResponse(value="aGVsbG8gd29ybGQ="), "hello world"), + (GetSecretResponse(value="T2zhLCBNdW5kbyE="), None) + ]) +def test_read_secret_value_decode(secret_bytes_value, expected_return): + w = create_autospec(WorkspaceClient) + w.secrets.get_secret.return_value = secret_bytes_value + + sp_migration = AzureServicePrincipalMigration(MagicMock(), w, MagicMock(), MagicMock()) + assert sp_migration._read_databricks_secret("test_scope","test_key", "000") == expected_return + + +@pytest.mark.parametrize("exception, expected_log, expected_return", + [(ResourceDoesNotExist(), "Will not reuse this client_secret", None), + (InternalError(), "Will not reuse this client_secret", None) + ]) +def test_read_secret_read_exception(caplog, exception, expected_log, expected_return): + caplog.set_level(logging.INFO) + w = create_autospec(WorkspaceClient) + w.secrets.get_secret.side_effect = exception + + sp_migration = AzureServicePrincipalMigration(MagicMock(), w, MagicMock(), MagicMock()) + secret_value = sp_migration._read_databricks_secret("test_scope","test_key", "000") + + assert expected_log in caplog.text + assert secret_value == expected_return \ No newline at end of file From b365488093a4cd709dd323f2f65cf1135f468eac Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Sun, 11 Feb 2024 18:38:11 -0800 Subject: [PATCH 23/76] dd unit tests for _fetch_client_secret and _print_action_plan --- .../labs/ucx/migration/azure_credentials.py | 4 +- .../unit/migration/test_azure_credentials.py | 52 +++++++++++++++++-- 2 files changed, 50 insertions(+), 6 deletions(-) diff --git a/src/databricks/labs/ucx/migration/azure_credentials.py b/src/databricks/labs/ucx/migration/azure_credentials.py index e05a6047c7..1f88c48605 100644 --- a/src/databricks/labs/ucx/migration/azure_credentials.py +++ b/src/databricks/labs/ucx/migration/azure_credentials.py @@ -163,9 +163,9 @@ def _fetch_client_secret(self, sp_list: list[StoragePermissionMapping]) -> list[ azure_sp_infos = self._azure_sp_crawler.snapshot() for azure_sp_info in azure_sp_infos: - if azure_sp_info.secret_scope is None: + if not azure_sp_info.secret_scope: continue - if azure_sp_info.secret_key is None: + if not azure_sp_info.secret_key: continue secret_value = self._read_databricks_secret( azure_sp_info.secret_scope, azure_sp_info.secret_key, azure_sp_info.application_id diff --git a/tests/unit/migration/test_azure_credentials.py b/tests/unit/migration/test_azure_credentials.py index 5a2a498bb0..e8858fd4e0 100644 --- a/tests/unit/migration/test_azure_credentials.py +++ b/tests/unit/migration/test_azure_credentials.py @@ -1,7 +1,7 @@ import logging import pytest -from unittest.mock import MagicMock, create_autospec +from unittest.mock import MagicMock, create_autospec, Mock from databricks.sdk import WorkspaceClient from databricks.sdk.errors import ( @@ -16,10 +16,12 @@ ) from databricks.sdk.service.workspace import GetSecretResponse -from databricks.labs.ucx.assessment.azure import StoragePermissionMapping +from databricks.labs.ucx.assessment.azure import StoragePermissionMapping, \ + AzureServicePrincipalCrawler, AzureServicePrincipalInfo from databricks.labs.ucx.migration.azure_credentials import ( - AzureServicePrincipalMigration, + AzureServicePrincipalMigration, ServicePrincipalMigrationInfo, ) +from tests.unit.framework.mocks import MockBackend def test_list_storage_credentials(): @@ -74,4 +76,46 @@ def test_read_secret_read_exception(caplog, exception, expected_log, expected_re secret_value = sp_migration._read_databricks_secret("test_scope","test_key", "000") assert expected_log in caplog.text - assert secret_value == expected_return \ No newline at end of file + assert secret_value == expected_return + + +def test_fetch_client_secret(): + w = create_autospec(WorkspaceClient) + w.secrets.get_secret.return_value = GetSecretResponse(value="aGVsbG8gd29ybGQ=") + + crawled_sp = [AzureServicePrincipalInfo("app_secret1", "test_scope", "test_key", "tenant_id_1", "storage1"), + AzureServicePrincipalInfo("app_secret2", "test_scope", "test_key", "tenant_id_1", "storage1"), + AzureServicePrincipalInfo("app_no_secret1", "", "", "tenant_id_1", "storage1"), + AzureServicePrincipalInfo("app_no_secret2", "test_scope", "", "tenant_id_1", "storage1"),] + sp_crawler = AzureServicePrincipalCrawler(w, MockBackend(), "ucx") + sp_crawler._try_fetch = Mock(return_value=crawled_sp) + sp_crawler._crawl = Mock(return_value=crawled_sp) + + sp_to_be_checked = [StoragePermissionMapping(prefix="prefix1",client_id="app_secret1",principal="principal_1",privilege="WRITE_FILES",directory_id="directory_id_1"), + StoragePermissionMapping(prefix="prefix2",client_id="app_secret2",principal="principal_2",privilege="READ_FILES",directory_id="directory_id_1"), + StoragePermissionMapping(prefix="prefix3",client_id="app_no_secret1",principal="principal_3",privilege="WRITE_FILES",directory_id="directory_id_2"), + StoragePermissionMapping(prefix="prefix4",client_id="app_no_secret2",principal="principal_4",privilege="READ_FILES",directory_id="directory_id_2")] + + expected_sp_list = [ServicePrincipalMigrationInfo(StoragePermissionMapping(prefix="prefix1",client_id="app_secret1",principal="principal_1",privilege="WRITE_FILES",directory_id="directory_id_1"), "hello world"), + ServicePrincipalMigrationInfo(StoragePermissionMapping(prefix="prefix2",client_id="app_secret2",principal="principal_2",privilege="READ_FILES",directory_id="directory_id_1"), "hello world")] + + sp_migration = AzureServicePrincipalMigration(MagicMock(), w, MagicMock(), sp_crawler) + filtered_sp_list = sp_migration._fetch_client_secret(sp_to_be_checked) + + assert filtered_sp_list == expected_sp_list + + +def test_print_action_plan(capsys): + sp_list_with_secret = [ServicePrincipalMigrationInfo(StoragePermissionMapping(prefix="prefix1",client_id="app_secret1",principal="principal_1",privilege="WRITE_FILES",directory_id="directory_id_1"), "hello world")] + sp_migration = AzureServicePrincipalMigration(MagicMock(), MagicMock(), MagicMock(), MagicMock()) + sp_migration._print_action_plan(sp_list_with_secret) + + expected_print = (f"Service Principal name: principal_1, " + f"application_id: app_secret1, " + f"privilege WRITE_FILES " + f"on location prefix1\n") + assert expected_print == capsys.readouterr().out + + + + From a2a51cf85b71db846f129456edf3aac4ec8a014e Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Sun, 11 Feb 2024 19:05:05 -0800 Subject: [PATCH 24/76] add unit tests for for_cli in azure_credentials.py --- .../unit/migration/test_azure_credentials.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/unit/migration/test_azure_credentials.py b/tests/unit/migration/test_azure_credentials.py index e8858fd4e0..4e9a336dfc 100644 --- a/tests/unit/migration/test_azure_credentials.py +++ b/tests/unit/migration/test_azure_credentials.py @@ -16,12 +16,34 @@ ) from databricks.sdk.service.workspace import GetSecretResponse +from databricks.labs.blueprint.tui import MockPrompts from databricks.labs.ucx.assessment.azure import StoragePermissionMapping, \ AzureServicePrincipalCrawler, AzureServicePrincipalInfo from databricks.labs.ucx.migration.azure_credentials import ( AzureServicePrincipalMigration, ServicePrincipalMigrationInfo, ) from tests.unit.framework.mocks import MockBackend +from tests.unit.test_cli import ws + + +def test_for_cli_not_azure(): + w = create_autospec(WorkspaceClient) + w.config.is_azure.return_value = False + assert AzureServicePrincipalMigration.for_cli(w, MagicMock()) is None + + +def test_for_cli_not_prompts(): + w = create_autospec(WorkspaceClient) + w.config.is_azure.return_value = True + prompts = MockPrompts({"Have you reviewed the azure_storage_account_info.csv *": "No"}) + assert AzureServicePrincipalMigration.for_cli(w, prompts) is None + + +def test_for_cli(ws): + ws.config.is_azure.return_value = True + prompts = MockPrompts({"Have you reviewed the azure_storage_account_info.csv *": "Yes"}) + + assert isinstance(AzureServicePrincipalMigration.for_cli(ws, prompts), AzureServicePrincipalMigration) def test_list_storage_credentials(): From 1fa336c9cba74a6a1b37abbcea5b8f5006a453ae Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Sun, 11 Feb 2024 21:19:41 -0800 Subject: [PATCH 25/76] add unit test for _generate_migration_list --- .../labs/ucx/migration/azure_credentials.py | 3 +-- .../unit/migration/test_azure_credentials.py | 25 ++++++++++++++++++- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/src/databricks/labs/ucx/migration/azure_credentials.py b/src/databricks/labs/ucx/migration/azure_credentials.py index 1f88c48605..1fde03af5f 100644 --- a/src/databricks/labs/ucx/migration/azure_credentials.py +++ b/src/databricks/labs/ucx/migration/azure_credentials.py @@ -76,7 +76,6 @@ def __init__( self._ws = ws self._azure_resource_permissions = azure_resource_permissions self._azure_sp_crawler = azure_sp_crawler - self._action_plan = 'service_principals_for_storage_credentials.csv' @classmethod def for_cli(cls, ws: WorkspaceClient, prompts: Prompts, product='ucx'): @@ -202,7 +201,7 @@ def _generate_migration_list(self): # list existed storage credentials sc_set = self._list_storage_credentials() # check if the sp is already used in UC storage credential - filtered_sp_list = [sp for sp in sp_list if sp not in sc_set] + filtered_sp_list = [sp for sp in sp_list if sp.client_id not in sc_set] # fetch sp client_secret if any sp_list_with_secret = self._fetch_client_secret(filtered_sp_list) self._final_sp_list = sp_list_with_secret diff --git a/tests/unit/migration/test_azure_credentials.py b/tests/unit/migration/test_azure_credentials.py index 4e9a336dfc..7e76de970b 100644 --- a/tests/unit/migration/test_azure_credentials.py +++ b/tests/unit/migration/test_azure_credentials.py @@ -1,7 +1,7 @@ import logging import pytest -from unittest.mock import MagicMock, create_autospec, Mock +from unittest.mock import MagicMock, create_autospec, Mock, patch from databricks.sdk import WorkspaceClient from databricks.sdk.errors import ( @@ -139,5 +139,28 @@ def test_print_action_plan(capsys): assert expected_print == capsys.readouterr().out +def test_generate_migration_list(capsys, mocker, ws): + ws.config.is_azure.return_value = True + ws.secrets.get_secret.return_value = GetSecretResponse(value="aGVsbG8gd29ybGQ=") + ws.storage_credentials.list.return_value = [ + StorageCredentialInfo( + azure_service_principal=AzureServicePrincipal( + application_id="app_secret1", + directory_id="directory_id_1", + client_secret="hello world", + ) + ) + ] + + prompts = MockPrompts({"Have you reviewed the azure_storage_account_info.csv *": "Yes"}) + + mocker.patch("databricks.labs.ucx.assessment.azure.AzureResourcePermissions.load", return_value = [StoragePermissionMapping(prefix="prefix1",client_id="app_secret1",principal="principal_1",privilege="WRITE_FILES",directory_id="directory_id_1"), + StoragePermissionMapping(prefix="prefix2",client_id="app_secret2",principal="principal_2",privilege="READ_FILES",directory_id="directory_id_1")]) + mocker.patch("databricks.labs.ucx.assessment.azure.AzureServicePrincipalCrawler.snapshot", return_value=[AzureServicePrincipalInfo("app_secret1", "test_scope", "test_key", "tenant_id_1", "storage1"), + AzureServicePrincipalInfo("app_secret2", "test_scope", "test_key", "tenant_id_1", "storage1")]) + + sp_migration = AzureServicePrincipalMigration.for_cli(ws, prompts) + sp_migration._generate_migration_list() + assert "app_secret2" in capsys.readouterr().out From 16de6835aaf649b32cca79736439f963cbac9672 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Sun, 11 Feb 2024 22:55:05 -0800 Subject: [PATCH 26/76] add unit test for execute_migration, set keyword arg filename when calling installation.load --- src/databricks/labs/ucx/azure/access.py | 2 +- .../labs/ucx/migration/azure_credentials.py | 13 ++- .../unit/migration/test_azure_credentials.py | 85 +++++++++++++++++-- 3 files changed, 86 insertions(+), 14 deletions(-) diff --git a/src/databricks/labs/ucx/azure/access.py b/src/databricks/labs/ucx/azure/access.py index 11554a7e54..1e2022c61f 100644 --- a/src/databricks/labs/ucx/azure/access.py +++ b/src/databricks/labs/ucx/azure/access.py @@ -109,6 +109,6 @@ def load(self) -> list[StoragePermissionMapping]: Load StoragePermissionMapping info from azure_storage_account_info.csv :return: """ - storage_account_infos = self._installation.load(list[StoragePermissionMapping], self._filename) + storage_account_infos = self._installation.load(list[StoragePermissionMapping], filename=self._filename) return storage_account_infos diff --git a/src/databricks/labs/ucx/migration/azure_credentials.py b/src/databricks/labs/ucx/migration/azure_credentials.py index 1fde03af5f..048bcfa777 100644 --- a/src/databricks/labs/ucx/migration/azure_credentials.py +++ b/src/databricks/labs/ucx/migration/azure_credentials.py @@ -8,9 +8,7 @@ from databricks.sdk import WorkspaceClient from databricks.sdk.errors import ( InternalError, - PermissionDenied, - ResourceDoesNotExist, - Unauthenticated, + ResourceDoesNotExist ) from databricks.sdk.service.catalog import ( AzureServicePrincipal, @@ -18,7 +16,6 @@ StorageCredentialInfo, ValidateStorageCredentialResponse, ValidationResult, - ValidationResultOperation, ) from databricks.labs.ucx.assessment.azure import ( @@ -44,8 +41,6 @@ class StorageCredentialValidationResult: azure_service_principal: AzureServicePrincipal created_by: str read_only: bool - message: str - operation: ValidationResultOperation results: list[ValidationResult] @classmethod @@ -57,7 +52,7 @@ def from_storage_credential_validation( azure_service_principal=storage_credential.azure_service_principal, created_by=storage_credential.created_by, read_only=storage_credential.read_only, - results=validation.results, + results=validation.results ) @@ -219,7 +214,9 @@ def _create_storage_credential(self, sp_migration: ServicePrincipalMigrationInfo ) comment = f"Created by UCX during migration to UC using Azure Service Principal: {sp_migration.service_principal.principal}" read_only = False - if sp_migration.service_principal.privilege == Privilege.READ_FILES: + p = Privilege.READ_FILES + sp = sp_migration.service_principal.privilege + if sp_migration.service_principal.privilege == Privilege.READ_FILES.value: read_only = True # create the storage credential storage_credential = self._ws.storage_credentials.create( diff --git a/tests/unit/migration/test_azure_credentials.py b/tests/unit/migration/test_azure_credentials.py index 7e76de970b..b262161a05 100644 --- a/tests/unit/migration/test_azure_credentials.py +++ b/tests/unit/migration/test_azure_credentials.py @@ -13,6 +13,8 @@ AzureManagedIdentity, AzureServicePrincipal, StorageCredentialInfo, + ValidateStorageCredentialResponse, + ValidationResult ) from databricks.sdk.service.workspace import GetSecretResponse @@ -26,21 +28,22 @@ from tests.unit.test_cli import ws -def test_for_cli_not_azure(): +def test_for_cli_not_azure(caplog): w = create_autospec(WorkspaceClient) - w.config.is_azure.return_value = False + w.config.is_azure = False assert AzureServicePrincipalMigration.for_cli(w, MagicMock()) is None + assert "Workspace is not on azure, please run this command on azure databricks workspaces." in caplog.text def test_for_cli_not_prompts(): w = create_autospec(WorkspaceClient) - w.config.is_azure.return_value = True + w.config.is_azure = True prompts = MockPrompts({"Have you reviewed the azure_storage_account_info.csv *": "No"}) assert AzureServicePrincipalMigration.for_cli(w, prompts) is None def test_for_cli(ws): - ws.config.is_azure.return_value = True + ws.config.is_azure = True prompts = MockPrompts({"Have you reviewed the azure_storage_account_info.csv *": "Yes"}) assert isinstance(AzureServicePrincipalMigration.for_cli(ws, prompts), AzureServicePrincipalMigration) @@ -140,7 +143,7 @@ def test_print_action_plan(capsys): def test_generate_migration_list(capsys, mocker, ws): - ws.config.is_azure.return_value = True + ws.config.is_azure = True ws.secrets.get_secret.return_value = GetSecretResponse(value="aGVsbG8gd29ybGQ=") ws.storage_credentials.list.return_value = [ StorageCredentialInfo( @@ -164,3 +167,75 @@ def test_generate_migration_list(capsys, mocker, ws): assert "app_secret2" in capsys.readouterr().out + +def test_execute_migration_no_confirmation(mocker, ws): + ws.config.is_azure = True + prompts = MockPrompts({"Have you reviewed the azure_storage_account_info.csv *": "Yes", + "Above Azure Service Principals will be migrated to UC storage credentials*": "No"}) + + mocker.patch("databricks.labs.ucx.migration.azure_credentials.AzureServicePrincipalMigration._generate_migration_list") + + with patch("databricks.labs.ucx.migration.azure_credentials.AzureServicePrincipalMigration._create_storage_credential") as c: + sp_migration = AzureServicePrincipalMigration.for_cli(ws, prompts) + sp_migration.execute_migration(prompts) + c.assert_not_called() + + +def side_effect_create_storage_credential(name, azure_service_principal, comment, read_only): + return StorageCredentialInfo(name=name, azure_service_principal=azure_service_principal, comment=comment, read_only=read_only) + +def side_effect_validate_storage_credential(storage_credential_name, url): + if "read" in storage_credential_name: + response = { + "is_dir": True, + "results": [ + { + "message": "", + "operation":["DELETE", "LIST", "READ", "WRITE"], + "result": ["SKIP", "PASS", "PASS", "SKIP"] + } + ] + } + return ValidateStorageCredentialResponse.from_dict(response) + else: + response = { + "is_dir": True, + "results": [ + { + "message": "", + "operation":["DELETE", "LIST", "READ", "WRITE"], + "result": ["PASS", "PASS", "PASS", "PASS"] + } + ] + } + return ValidateStorageCredentialResponse.from_dict(response) + +def test_execute_migration(capsys, mocker, ws): + ws.config.is_azure = True + ws.secrets.get_secret.return_value = GetSecretResponse(value="aGVsbG8gd29ybGQ=") + ws.storage_credentials.list.return_value = [ + StorageCredentialInfo( + azure_service_principal=AzureServicePrincipal( + application_id="app_secret1", + directory_id="directory_id_1", + client_secret="hello world", + ) + ) + ] + ws.storage_credentials.create.side_effect = side_effect_create_storage_credential + ws.storage_credentials.validate.side_effect = side_effect_validate_storage_credential + + prompts = MockPrompts({"Have you reviewed the azure_storage_account_info.csv *": "Yes", + "Above Azure Service Principals will be migrated to UC storage credentials*": "Yes"}) + + mocker.patch("databricks.labs.ucx.assessment.azure.AzureResourcePermissions.load", return_value = [StoragePermissionMapping(prefix="prefix1",client_id="app_secret1",principal="principal_1",privilege="WRITE_FILES",directory_id="directory_id_1"), + StoragePermissionMapping(prefix="prefix2",client_id="app_secret2",principal="principal_read",privilege="READ_FILES",directory_id="directory_id_1"), + StoragePermissionMapping(prefix="prefix3",client_id="app_secret3",principal="principal_write",privilege="WRITE_FILES",directory_id="directory_id_2")]) + mocker.patch("databricks.labs.ucx.assessment.azure.AzureServicePrincipalCrawler.snapshot", return_value=[AzureServicePrincipalInfo("app_secret1", "test_scope", "test_key", "tenant_id_1", "storage1"), + AzureServicePrincipalInfo("app_secret2", "test_scope", "test_key", "tenant_id_1", "storage1"), + AzureServicePrincipalInfo("app_secret3", "test_scope", "test_key", "tenant_id_2", "storage1")]) + + sp_migration = AzureServicePrincipalMigration.for_cli(ws, prompts) + sp_migration.execute_migration(prompts) + + assert "Completed migration" in capsys.readouterr().out \ No newline at end of file From 54db4a3b2bf8ce5b9644cee9158d302d8b6bda3c Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Sun, 11 Feb 2024 23:14:55 -0800 Subject: [PATCH 27/76] add unit test for migrate_azure_service_principals cli --- tests/unit/migration/test_azure_credentials.py | 3 +-- tests/unit/test_cli.py | 11 +++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/tests/unit/migration/test_azure_credentials.py b/tests/unit/migration/test_azure_credentials.py index b262161a05..47b9cfdf37 100644 --- a/tests/unit/migration/test_azure_credentials.py +++ b/tests/unit/migration/test_azure_credentials.py @@ -13,8 +13,7 @@ AzureManagedIdentity, AzureServicePrincipal, StorageCredentialInfo, - ValidateStorageCredentialResponse, - ValidationResult + ValidateStorageCredentialResponse ) from databricks.sdk.service.workspace import GetSecretResponse diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index 1b552c9a25..7a857a56fb 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -15,6 +15,7 @@ ensure_assessment_run, installations, manual_workspace_info, + migrate_azure_service_principals, move, open_remote_config, principal_prefix_access, @@ -305,3 +306,13 @@ def test_save_storage_and_principal_gcp(ws, caplog): ws.config.is_gcp = True principal_prefix_access(ws) assert "This cmd is only supported for azure and aws workspaces" in caplog.messages + + +def test_migrate_azure_service_principals(ws): + ws.config.is_azure = True + with (patch("databricks.labs.blueprint.tui.Prompts.confirm", return_value=True), + patch("databricks.labs.blueprint.installation.Installation.load"), + patch("databricks.labs.blueprint.installation.Installation.save") as s): + migrate_azure_service_principals(ws) + s.assert_called_once() + From d856520825b5639ab89a310de48e64d5cadb4690 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Mon, 12 Feb 2024 00:47:02 -0800 Subject: [PATCH 28/76] refactor unit tests and apply make fmt changes --- src/databricks/labs/ucx/cli.py | 1 - .../labs/ucx/migration/azure_credentials.py | 32 +- .../unit/migration/test_azure_credentials.py | 324 +++++++++++++----- tests/unit/test_cli.py | 8 +- 4 files changed, 255 insertions(+), 110 deletions(-) diff --git a/src/databricks/labs/ucx/cli.py b/src/databricks/labs/ucx/cli.py index 019e732889..871561e7a6 100644 --- a/src/databricks/labs/ucx/cli.py +++ b/src/databricks/labs/ucx/cli.py @@ -301,7 +301,6 @@ def migrate_azure_service_principals(w: WorkspaceClient): prompts = Prompts() service_principal_migration = AzureServicePrincipalMigration.for_cli(w, prompts) service_principal_migration.execute_migration(prompts) - return if __name__ == "__main__": diff --git a/src/databricks/labs/ucx/migration/azure_credentials.py b/src/databricks/labs/ucx/migration/azure_credentials.py index 048bcfa777..2eb36f3da7 100644 --- a/src/databricks/labs/ucx/migration/azure_credentials.py +++ b/src/databricks/labs/ucx/migration/azure_credentials.py @@ -6,10 +6,7 @@ from databricks.labs.blueprint.installation import Installation from databricks.labs.blueprint.tui import Prompts from databricks.sdk import WorkspaceClient -from databricks.sdk.errors import ( - InternalError, - ResourceDoesNotExist -) +from databricks.sdk.errors import InternalError, ResourceDoesNotExist from databricks.sdk.service.catalog import ( AzureServicePrincipal, Privilege, @@ -48,11 +45,11 @@ def from_storage_credential_validation( cls, storage_credential: StorageCredentialInfo, validation: ValidateStorageCredentialResponse ): return cls( - name=storage_credential.name, - azure_service_principal=storage_credential.azure_service_principal, - created_by=storage_credential.created_by, - read_only=storage_credential.read_only, - results=validation.results + name=storage_credential.name or "", + azure_service_principal=storage_credential.azure_service_principal or AzureServicePrincipal("", "", ""), + created_by=storage_credential.created_by or "", + read_only=storage_credential.read_only or False, + results=validation.results or [], ) @@ -76,14 +73,14 @@ def __init__( def for_cli(cls, ws: WorkspaceClient, prompts: Prompts, product='ucx'): if not ws.config.is_azure: logger.error("Workspace is not on azure, please run this command on azure databricks workspaces.") - return + return None csv_confirmed = prompts.confirm( "Have you reviewed the azure_storage_account_info.csv " "and confirm listed service principals are allowed to be checked for migration?" ) if csv_confirmed is not True: - return + return None installation = Installation.current(ws, product) config = installation.load(WorkspaceConfig) @@ -110,7 +107,6 @@ def _list_storage_credentials(self) -> set[str]: ) return storage_credential_app_ids - def _read_databricks_secret(self, scope: str, key: str, application_id: str) -> str | None: try: secret_response = self._ws.secrets.get_secret(scope, key) @@ -136,6 +132,8 @@ def _read_databricks_secret(self, scope: str, key: str, application_id: str) -> # decode the bytes string from GetSecretResponse to utf-8 string # TODO: handle different encoding if we have feedback from the customer try: + if secret_response.value is None: + return None return base64.b64decode(secret_response.value).decode("utf-8") except UnicodeDecodeError: logger.info( @@ -172,7 +170,9 @@ def _fetch_client_secret(self, sp_list: list[StoragePermissionMapping]) -> list[ for sp in sp_list: if sp.client_id in azure_sp_info_with_client_secret: sp_list_with_secret.append( - ServicePrincipalMigrationInfo(service_principal=sp, client_secret=azure_sp_info_with_client_secret[sp.client_id]) + ServicePrincipalMigrationInfo( + service_principal=sp, client_secret=azure_sp_info_with_client_secret[sp.client_id] + ) ) return sp_list_with_secret @@ -189,7 +189,6 @@ def _print_action_plan(self, sp_list_with_secret: list[ServicePrincipalMigration def _generate_migration_list(self): """ Create the list of SP that need to be migrated, output an action plan as a csv file for users to confirm - :return: """ # load sp list from azure_storage_account_info.csv sp_list = self._azure_resource_permissions.load() @@ -202,7 +201,6 @@ def _generate_migration_list(self): self._final_sp_list = sp_list_with_secret # output the action plan for customer to confirm self._print_action_plan(sp_list_with_secret) - return def _create_storage_credential(self, sp_migration: ServicePrincipalMigrationInfo): # prepare the storage credential properties @@ -210,12 +208,10 @@ def _create_storage_credential(self, sp_migration: ServicePrincipalMigrationInfo azure_service_principal = AzureServicePrincipal( directory_id=sp_migration.service_principal.directory_id, application_id=sp_migration.service_principal.client_id, - client_secret=sp_migration.client_secret + client_secret=sp_migration.client_secret, ) comment = f"Created by UCX during migration to UC using Azure Service Principal: {sp_migration.service_principal.principal}" read_only = False - p = Privilege.READ_FILES - sp = sp_migration.service_principal.privilege if sp_migration.service_principal.privilege == Privilege.READ_FILES.value: read_only = True # create the storage credential diff --git a/tests/unit/migration/test_azure_credentials.py b/tests/unit/migration/test_azure_credentials.py index 47b9cfdf37..0e4f7dd799 100644 --- a/tests/unit/migration/test_azure_credentials.py +++ b/tests/unit/migration/test_azure_credentials.py @@ -1,44 +1,108 @@ +import csv +import io import logging -import pytest - -from unittest.mock import MagicMock, create_autospec, Mock, patch +from unittest.mock import MagicMock, Mock, create_autospec, patch +import pytest +import yaml +from databricks.labs.blueprint.tui import MockPrompts from databricks.sdk import WorkspaceClient -from databricks.sdk.errors import ( - InternalError, - ResourceDoesNotExist -) +from databricks.sdk.errors import InternalError, NotFound, ResourceDoesNotExist +from databricks.sdk.service import sql from databricks.sdk.service.catalog import ( AwsIamRole, AzureManagedIdentity, AzureServicePrincipal, StorageCredentialInfo, - ValidateStorageCredentialResponse + ValidateStorageCredentialResponse, ) from databricks.sdk.service.workspace import GetSecretResponse -from databricks.labs.blueprint.tui import MockPrompts -from databricks.labs.ucx.assessment.azure import StoragePermissionMapping, \ - AzureServicePrincipalCrawler, AzureServicePrincipalInfo +from databricks.labs.ucx.assessment.azure import ( + AzureServicePrincipalCrawler, + AzureServicePrincipalInfo, + StoragePermissionMapping, +) from databricks.labs.ucx.migration.azure_credentials import ( - AzureServicePrincipalMigration, ServicePrincipalMigrationInfo, + AzureServicePrincipalMigration, + ServicePrincipalMigrationInfo, ) from tests.unit.framework.mocks import MockBackend -from tests.unit.test_cli import ws -def test_for_cli_not_azure(caplog): - w = create_autospec(WorkspaceClient) - w.config.is_azure = False - assert AzureServicePrincipalMigration.for_cli(w, MagicMock()) is None +@pytest.fixture +def ws(): + storage_permission_mappings = [ + { + "prefix": "prefix1", + "client_id": "app_secret1", + "principal": "principal_1", + "privilege": "WRITE_FILES", + "directory_id": "directory_id_1", + }, + { + "prefix": "prefix2", + "client_id": "app_secret2", + "principal": "principal_read", + "privilege": "READ_FILES", + "directory_id": "directory_id_1", + }, + { + "prefix": "prefix3", + "client_id": "app_secret3", + "principal": "principal_write", + "privilege": "WRITE_FILES", + "directory_id": "directory_id_2", + }, + ] + csv_output = io.StringIO() + fieldnames = storage_permission_mappings[0].keys() + csv_writer = csv.DictWriter(csv_output, fieldnames=fieldnames, dialect="excel") + csv_writer.writeheader() + for mapping in storage_permission_mappings: + csv_writer.writerow(mapping) + + state = { + "/Users/foo/.ucx/azure_storage_account_info.csv": csv_output.getvalue(), + "/Users/foo/.ucx/config.yml": yaml.dump( + { + 'version': 2, + 'inventory_database': 'ucx', + 'warehouse_id': 'test', + 'connect': { + 'host': 'foo', + 'token': 'bar', + }, + } + ), + } + + def download(path: str) -> io.BytesIO: + if path not in state: + raise NotFound(path) + return io.BytesIO(state[path].encode('utf-8')) + + ws = create_autospec(WorkspaceClient) + ws.config.host = 'https://localhost' + ws.current_user.me().user_name = "foo" + ws.workspace.download = download + ws.statement_execution.execute_statement.return_value = sql.ExecuteStatementResponse( + status=sql.StatementStatus(state=sql.StatementState.SUCCEEDED), + manifest=sql.ResultManifest(schema=sql.ResultSchema()), + ) + return ws + + +def test_for_cli_not_azure(caplog, ws): + ws.config.is_azure = False + assert AzureServicePrincipalMigration.for_cli(ws, MagicMock()) is None assert "Workspace is not on azure, please run this command on azure databricks workspaces." in caplog.text -def test_for_cli_not_prompts(): - w = create_autospec(WorkspaceClient) - w.config.is_azure = True +def test_for_cli_not_prompts(ws): + ws.config.is_azure = True prompts = MockPrompts({"Have you reviewed the azure_storage_account_info.csv *": "No"}) - assert AzureServicePrincipalMigration.for_cli(w, prompts) is None + assert AzureServicePrincipalMigration.for_cli(ws, prompts) is None def test_for_cli(ws): @@ -48,10 +112,8 @@ def test_for_cli(ws): assert isinstance(AzureServicePrincipalMigration.for_cli(ws, prompts), AzureServicePrincipalMigration) -def test_list_storage_credentials(): - w = create_autospec(WorkspaceClient) - - w.storage_credentials.list.return_value = [ +def test_list_storage_credentials(ws): + ws.storage_credentials.list.return_value = [ StorageCredentialInfo(aws_iam_role=AwsIamRole(role_arn="arn:aws:iam::123456789012:role/example-role-name")), StorageCredentialInfo( azure_managed_identity=AzureManagedIdentity( @@ -67,7 +129,7 @@ def test_list_storage_credentials(): ), ] - sp_migration = AzureServicePrincipalMigration(MagicMock(), w, MagicMock(), MagicMock()) + sp_migration = AzureServicePrincipalMigration(MagicMock(), ws, MagicMock(), MagicMock()) expected = {"b6420590-5e1c-4426-8950-a94cbe9b6115"} sp_migration._list_storage_credentials() @@ -75,69 +137,134 @@ def test_list_storage_credentials(): assert expected == sp_migration._list_storage_credentials() -@pytest.mark.parametrize("secret_bytes_value, expected_return", - [(GetSecretResponse(value="aGVsbG8gd29ybGQ="), "hello world"), - (GetSecretResponse(value="T2zhLCBNdW5kbyE="), None) - ]) -def test_read_secret_value_decode(secret_bytes_value, expected_return): - w = create_autospec(WorkspaceClient) - w.secrets.get_secret.return_value = secret_bytes_value +@pytest.mark.parametrize( + "secret_bytes_value, expected_return", + [ + (GetSecretResponse(value="aGVsbG8gd29ybGQ="), "hello world"), + (GetSecretResponse(value="T2zhLCBNdW5kbyE="), None), + (GetSecretResponse(value=None), None), + ], +) +def test_read_secret_value_decode(ws, secret_bytes_value, expected_return): + ws.secrets.get_secret.return_value = secret_bytes_value - sp_migration = AzureServicePrincipalMigration(MagicMock(), w, MagicMock(), MagicMock()) - assert sp_migration._read_databricks_secret("test_scope","test_key", "000") == expected_return + sp_migration = AzureServicePrincipalMigration(MagicMock(), ws, MagicMock(), MagicMock()) + assert sp_migration._read_databricks_secret("test_scope", "test_key", "000") == expected_return -@pytest.mark.parametrize("exception, expected_log, expected_return", - [(ResourceDoesNotExist(), "Will not reuse this client_secret", None), - (InternalError(), "Will not reuse this client_secret", None) - ]) -def test_read_secret_read_exception(caplog, exception, expected_log, expected_return): +@pytest.mark.parametrize( + "exception, expected_log, expected_return", + [ + (ResourceDoesNotExist(), "Will not reuse this client_secret", None), + (InternalError(), "Will not reuse this client_secret", None), + ], +) +def test_read_secret_read_exception(caplog, ws, exception, expected_log, expected_return): caplog.set_level(logging.INFO) - w = create_autospec(WorkspaceClient) - w.secrets.get_secret.side_effect = exception + ws.secrets.get_secret.side_effect = exception - sp_migration = AzureServicePrincipalMigration(MagicMock(), w, MagicMock(), MagicMock()) - secret_value = sp_migration._read_databricks_secret("test_scope","test_key", "000") + sp_migration = AzureServicePrincipalMigration(MagicMock(), ws, MagicMock(), MagicMock()) + secret_value = sp_migration._read_databricks_secret("test_scope", "test_key", "000") assert expected_log in caplog.text assert secret_value == expected_return -def test_fetch_client_secret(): - w = create_autospec(WorkspaceClient) - w.secrets.get_secret.return_value = GetSecretResponse(value="aGVsbG8gd29ybGQ=") +def test_fetch_client_secret(ws): + ws.secrets.get_secret.return_value = GetSecretResponse(value="aGVsbG8gd29ybGQ=") - crawled_sp = [AzureServicePrincipalInfo("app_secret1", "test_scope", "test_key", "tenant_id_1", "storage1"), - AzureServicePrincipalInfo("app_secret2", "test_scope", "test_key", "tenant_id_1", "storage1"), - AzureServicePrincipalInfo("app_no_secret1", "", "", "tenant_id_1", "storage1"), - AzureServicePrincipalInfo("app_no_secret2", "test_scope", "", "tenant_id_1", "storage1"),] - sp_crawler = AzureServicePrincipalCrawler(w, MockBackend(), "ucx") + crawled_sp = [ + AzureServicePrincipalInfo("app_secret1", "test_scope", "test_key", "tenant_id_1", "storage1"), + AzureServicePrincipalInfo("app_secret2", "test_scope", "test_key", "tenant_id_1", "storage1"), + AzureServicePrincipalInfo("app_no_secret1", "", "", "tenant_id_1", "storage1"), + AzureServicePrincipalInfo("app_no_secret2", "test_scope", "", "tenant_id_1", "storage1"), + ] + sp_crawler = AzureServicePrincipalCrawler(ws, MockBackend(), "ucx") sp_crawler._try_fetch = Mock(return_value=crawled_sp) sp_crawler._crawl = Mock(return_value=crawled_sp) - sp_to_be_checked = [StoragePermissionMapping(prefix="prefix1",client_id="app_secret1",principal="principal_1",privilege="WRITE_FILES",directory_id="directory_id_1"), - StoragePermissionMapping(prefix="prefix2",client_id="app_secret2",principal="principal_2",privilege="READ_FILES",directory_id="directory_id_1"), - StoragePermissionMapping(prefix="prefix3",client_id="app_no_secret1",principal="principal_3",privilege="WRITE_FILES",directory_id="directory_id_2"), - StoragePermissionMapping(prefix="prefix4",client_id="app_no_secret2",principal="principal_4",privilege="READ_FILES",directory_id="directory_id_2")] + sp_to_be_checked = [ + StoragePermissionMapping( + prefix="prefix1", + client_id="app_secret1", + principal="principal_1", + privilege="WRITE_FILES", + directory_id="directory_id_1", + ), + StoragePermissionMapping( + prefix="prefix2", + client_id="app_secret2", + principal="principal_2", + privilege="READ_FILES", + directory_id="directory_id_1", + ), + StoragePermissionMapping( + prefix="prefix3", + client_id="app_no_secret1", + principal="principal_3", + privilege="WRITE_FILES", + directory_id="directory_id_2", + ), + StoragePermissionMapping( + prefix="prefix4", + client_id="app_no_secret2", + principal="principal_4", + privilege="READ_FILES", + directory_id="directory_id_2", + ), + ] - expected_sp_list = [ServicePrincipalMigrationInfo(StoragePermissionMapping(prefix="prefix1",client_id="app_secret1",principal="principal_1",privilege="WRITE_FILES",directory_id="directory_id_1"), "hello world"), - ServicePrincipalMigrationInfo(StoragePermissionMapping(prefix="prefix2",client_id="app_secret2",principal="principal_2",privilege="READ_FILES",directory_id="directory_id_1"), "hello world")] + expected_sp_list = [ + ServicePrincipalMigrationInfo( + StoragePermissionMapping( + prefix="prefix1", + client_id="app_secret1", + principal="principal_1", + privilege="WRITE_FILES", + directory_id="directory_id_1", + ), + "hello world", + ), + ServicePrincipalMigrationInfo( + StoragePermissionMapping( + prefix="prefix2", + client_id="app_secret2", + principal="principal_2", + privilege="READ_FILES", + directory_id="directory_id_1", + ), + "hello world", + ), + ] - sp_migration = AzureServicePrincipalMigration(MagicMock(), w, MagicMock(), sp_crawler) + sp_migration = AzureServicePrincipalMigration(MagicMock(), ws, MagicMock(), sp_crawler) filtered_sp_list = sp_migration._fetch_client_secret(sp_to_be_checked) assert filtered_sp_list == expected_sp_list def test_print_action_plan(capsys): - sp_list_with_secret = [ServicePrincipalMigrationInfo(StoragePermissionMapping(prefix="prefix1",client_id="app_secret1",principal="principal_1",privilege="WRITE_FILES",directory_id="directory_id_1"), "hello world")] + sp_list_with_secret = [ + ServicePrincipalMigrationInfo( + StoragePermissionMapping( + prefix="prefix1", + client_id="app_secret1", + principal="principal_1", + privilege="WRITE_FILES", + directory_id="directory_id_1", + ), + "hello world", + ) + ] sp_migration = AzureServicePrincipalMigration(MagicMock(), MagicMock(), MagicMock(), MagicMock()) sp_migration._print_action_plan(sp_list_with_secret) - expected_print = (f"Service Principal name: principal_1, " - f"application_id: app_secret1, " - f"privilege WRITE_FILES " - f"on location prefix1\n") + expected_print = ( + "Service Principal name: principal_1, " + "application_id: app_secret1, " + "privilege WRITE_FILES " + "on location prefix1\n" + ) assert expected_print == capsys.readouterr().out @@ -156,10 +283,13 @@ def test_generate_migration_list(capsys, mocker, ws): prompts = MockPrompts({"Have you reviewed the azure_storage_account_info.csv *": "Yes"}) - mocker.patch("databricks.labs.ucx.assessment.azure.AzureResourcePermissions.load", return_value = [StoragePermissionMapping(prefix="prefix1",client_id="app_secret1",principal="principal_1",privilege="WRITE_FILES",directory_id="directory_id_1"), - StoragePermissionMapping(prefix="prefix2",client_id="app_secret2",principal="principal_2",privilege="READ_FILES",directory_id="directory_id_1")]) - mocker.patch("databricks.labs.ucx.assessment.azure.AzureServicePrincipalCrawler.snapshot", return_value=[AzureServicePrincipalInfo("app_secret1", "test_scope", "test_key", "tenant_id_1", "storage1"), - AzureServicePrincipalInfo("app_secret2", "test_scope", "test_key", "tenant_id_1", "storage1")]) + mocker.patch( + "databricks.labs.ucx.assessment.azure.AzureServicePrincipalCrawler.snapshot", + return_value=[ + AzureServicePrincipalInfo("app_secret1", "test_scope", "test_key", "tenant_id_1", "storage1"), + AzureServicePrincipalInfo("app_secret2", "test_scope", "test_key", "tenant_id_1", "storage1"), + ], + ) sp_migration = AzureServicePrincipalMigration.for_cli(ws, prompts) sp_migration._generate_migration_list() @@ -169,19 +299,30 @@ def test_generate_migration_list(capsys, mocker, ws): def test_execute_migration_no_confirmation(mocker, ws): ws.config.is_azure = True - prompts = MockPrompts({"Have you reviewed the azure_storage_account_info.csv *": "Yes", - "Above Azure Service Principals will be migrated to UC storage credentials*": "No"}) + prompts = MockPrompts( + { + "Have you reviewed the azure_storage_account_info.csv *": "Yes", + "Above Azure Service Principals will be migrated to UC storage credentials*": "No", + } + ) - mocker.patch("databricks.labs.ucx.migration.azure_credentials.AzureServicePrincipalMigration._generate_migration_list") + mocker.patch( + "databricks.labs.ucx.migration.azure_credentials.AzureServicePrincipalMigration._generate_migration_list" + ) - with patch("databricks.labs.ucx.migration.azure_credentials.AzureServicePrincipalMigration._create_storage_credential") as c: + with patch( + "databricks.labs.ucx.migration.azure_credentials.AzureServicePrincipalMigration._create_storage_credential" + ) as c: sp_migration = AzureServicePrincipalMigration.for_cli(ws, prompts) sp_migration.execute_migration(prompts) c.assert_not_called() def side_effect_create_storage_credential(name, azure_service_principal, comment, read_only): - return StorageCredentialInfo(name=name, azure_service_principal=azure_service_principal, comment=comment, read_only=read_only) + return StorageCredentialInfo( + name=name, azure_service_principal=azure_service_principal, comment=comment, read_only=read_only + ) + def side_effect_validate_storage_credential(storage_credential_name, url): if "read" in storage_credential_name: @@ -190,10 +331,10 @@ def side_effect_validate_storage_credential(storage_credential_name, url): "results": [ { "message": "", - "operation":["DELETE", "LIST", "READ", "WRITE"], - "result": ["SKIP", "PASS", "PASS", "SKIP"] + "operation": ["DELETE", "LIST", "READ", "WRITE"], + "result": ["SKIP", "PASS", "PASS", "SKIP"], } - ] + ], } return ValidateStorageCredentialResponse.from_dict(response) else: @@ -202,13 +343,14 @@ def side_effect_validate_storage_credential(storage_credential_name, url): "results": [ { "message": "", - "operation":["DELETE", "LIST", "READ", "WRITE"], - "result": ["PASS", "PASS", "PASS", "PASS"] + "operation": ["DELETE", "LIST", "READ", "WRITE"], + "result": ["PASS", "PASS", "PASS", "PASS"], } - ] + ], } return ValidateStorageCredentialResponse.from_dict(response) + def test_execute_migration(capsys, mocker, ws): ws.config.is_azure = True ws.secrets.get_secret.return_value = GetSecretResponse(value="aGVsbG8gd29ybGQ=") @@ -224,17 +366,23 @@ def test_execute_migration(capsys, mocker, ws): ws.storage_credentials.create.side_effect = side_effect_create_storage_credential ws.storage_credentials.validate.side_effect = side_effect_validate_storage_credential - prompts = MockPrompts({"Have you reviewed the azure_storage_account_info.csv *": "Yes", - "Above Azure Service Principals will be migrated to UC storage credentials*": "Yes"}) + prompts = MockPrompts( + { + "Have you reviewed the azure_storage_account_info.csv *": "Yes", + "Above Azure Service Principals will be migrated to UC storage credentials*": "Yes", + } + ) - mocker.patch("databricks.labs.ucx.assessment.azure.AzureResourcePermissions.load", return_value = [StoragePermissionMapping(prefix="prefix1",client_id="app_secret1",principal="principal_1",privilege="WRITE_FILES",directory_id="directory_id_1"), - StoragePermissionMapping(prefix="prefix2",client_id="app_secret2",principal="principal_read",privilege="READ_FILES",directory_id="directory_id_1"), - StoragePermissionMapping(prefix="prefix3",client_id="app_secret3",principal="principal_write",privilege="WRITE_FILES",directory_id="directory_id_2")]) - mocker.patch("databricks.labs.ucx.assessment.azure.AzureServicePrincipalCrawler.snapshot", return_value=[AzureServicePrincipalInfo("app_secret1", "test_scope", "test_key", "tenant_id_1", "storage1"), - AzureServicePrincipalInfo("app_secret2", "test_scope", "test_key", "tenant_id_1", "storage1"), - AzureServicePrincipalInfo("app_secret3", "test_scope", "test_key", "tenant_id_2", "storage1")]) + mocker.patch( + "databricks.labs.ucx.assessment.azure.AzureServicePrincipalCrawler.snapshot", + return_value=[ + AzureServicePrincipalInfo("app_secret1", "test_scope", "test_key", "tenant_id_1", "storage1"), + AzureServicePrincipalInfo("app_secret2", "test_scope", "test_key", "tenant_id_1", "storage1"), + AzureServicePrincipalInfo("app_secret3", "test_scope", "test_key", "tenant_id_2", "storage1"), + ], + ) sp_migration = AzureServicePrincipalMigration.for_cli(ws, prompts) sp_migration.execute_migration(prompts) - assert "Completed migration" in capsys.readouterr().out \ No newline at end of file + assert "Completed migration" in capsys.readouterr().out diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index 7a857a56fb..27cbbde3a8 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -310,9 +310,11 @@ def test_save_storage_and_principal_gcp(ws, caplog): def test_migrate_azure_service_principals(ws): ws.config.is_azure = True - with (patch("databricks.labs.blueprint.tui.Prompts.confirm", return_value=True), - patch("databricks.labs.blueprint.installation.Installation.load"), - patch("databricks.labs.blueprint.installation.Installation.save") as s): + with ( + patch("databricks.labs.blueprint.tui.Prompts.confirm", return_value=True), + patch("databricks.labs.blueprint.installation.Installation.load"), + patch("databricks.labs.blueprint.installation.Installation.save") as s, + ): migrate_azure_service_principals(ws) s.assert_called_once() From e0fb9f806b8df7cd257023081b310478508cf89f Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Mon, 12 Feb 2024 01:32:16 -0800 Subject: [PATCH 29/76] fix failed unit tests in test_azure.py due to rebase to main. --- tests/unit/azure/azure/mappings.json | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/unit/azure/azure/mappings.json b/tests/unit/azure/azure/mappings.json index 59678edccd..7df33168fd 100644 --- a/tests/unit/azure/azure/mappings.json +++ b/tests/unit/azure/azure/mappings.json @@ -8,12 +8,14 @@ "/v1.0/directoryObjects/user2": { "appId": "appIduser2", "displayName": "disNameuser2", - "id": "Iduser2" + "id": "Iduser2", + "appOwnerOrganizationId": "0000-0000" }, "/v1.0/directoryObjects/user3": { "appId": "appIduser3", "displayName": "disNameuser3", - "id": "Iduser3" + "id": "Iduser3", + "appOwnerOrganizationId": "0000-0000" }, "/subscriptions": { "value": [ From 45e0c0ee500c1a665535fd287b1d4ca763617d3e Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Mon, 12 Feb 2024 01:44:18 -0800 Subject: [PATCH 30/76] moved all azure service principal to storage credential migration code to databricks.labs.ucx.azure from databricks.labs.ucx.migration --- .../labs/ucx/{migration => azure}/azure_credentials.py | 0 src/databricks/labs/ucx/cli.py | 2 +- src/databricks/labs/ucx/migration/__init__.py | 0 .../integration/{migration => azure}/test_azure_credentials.py | 0 tests/integration/migration/__init__.py | 0 tests/unit/{migration => azure}/test_azure_credentials.py | 2 +- tests/unit/migration/__init__.py | 0 7 files changed, 2 insertions(+), 2 deletions(-) rename src/databricks/labs/ucx/{migration => azure}/azure_credentials.py (100%) delete mode 100644 src/databricks/labs/ucx/migration/__init__.py rename tests/integration/{migration => azure}/test_azure_credentials.py (100%) delete mode 100644 tests/integration/migration/__init__.py rename tests/unit/{migration => azure}/test_azure_credentials.py (99%) delete mode 100644 tests/unit/migration/__init__.py diff --git a/src/databricks/labs/ucx/migration/azure_credentials.py b/src/databricks/labs/ucx/azure/azure_credentials.py similarity index 100% rename from src/databricks/labs/ucx/migration/azure_credentials.py rename to src/databricks/labs/ucx/azure/azure_credentials.py diff --git a/src/databricks/labs/ucx/cli.py b/src/databricks/labs/ucx/cli.py index 871561e7a6..dc94fa3661 100644 --- a/src/databricks/labs/ucx/cli.py +++ b/src/databricks/labs/ucx/cli.py @@ -19,7 +19,7 @@ from databricks.labs.ucx.hive_metastore.mapping import TableMapping from databricks.labs.ucx.hive_metastore.table_migrate import TableMove, TablesMigrate from databricks.labs.ucx.install import WorkspaceInstallation -from databricks.labs.ucx.migration.azure_credentials import ( +from databricks.labs.ucx.azure.azure_credentials import ( AzureServicePrincipalMigration, ) from databricks.labs.ucx.workspace_access.groups import GroupManager diff --git a/src/databricks/labs/ucx/migration/__init__.py b/src/databricks/labs/ucx/migration/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/integration/migration/test_azure_credentials.py b/tests/integration/azure/test_azure_credentials.py similarity index 100% rename from tests/integration/migration/test_azure_credentials.py rename to tests/integration/azure/test_azure_credentials.py diff --git a/tests/integration/migration/__init__.py b/tests/integration/migration/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/unit/migration/test_azure_credentials.py b/tests/unit/azure/test_azure_credentials.py similarity index 99% rename from tests/unit/migration/test_azure_credentials.py rename to tests/unit/azure/test_azure_credentials.py index 0e4f7dd799..4a390d1149 100644 --- a/tests/unit/migration/test_azure_credentials.py +++ b/tests/unit/azure/test_azure_credentials.py @@ -23,7 +23,7 @@ AzureServicePrincipalInfo, StoragePermissionMapping, ) -from databricks.labs.ucx.migration.azure_credentials import ( +from databricks.labs.ucx.azure.azure_credentials import ( AzureServicePrincipalMigration, ServicePrincipalMigrationInfo, ) diff --git a/tests/unit/migration/__init__.py b/tests/unit/migration/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 From 260ad5dbcb78ff70eb19b2b717267a9aa7c27e08 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Tue, 13 Feb 2024 14:07:47 -0800 Subject: [PATCH 31/76] 1. catch the external location overlaps exception while validating the storage credentials. 2. add flag to accomodate integration test --- .../labs/ucx/azure/azure_credentials.py | 46 +++++++++++++++++-- src/databricks/labs/ucx/cli.py | 5 +- 2 files changed, 43 insertions(+), 8 deletions(-) diff --git a/src/databricks/labs/ucx/azure/azure_credentials.py b/src/databricks/labs/ucx/azure/azure_credentials.py index 2eb36f3da7..d6ebc808e1 100644 --- a/src/databricks/labs/ucx/azure/azure_credentials.py +++ b/src/databricks/labs/ucx/azure/azure_credentials.py @@ -7,6 +7,7 @@ from databricks.labs.blueprint.tui import Prompts from databricks.sdk import WorkspaceClient from databricks.sdk.errors import InternalError, ResourceDoesNotExist +from databricks.sdk.errors.platform import InvalidParameterValue from databricks.sdk.service.catalog import ( AzureServicePrincipal, Privilege, @@ -61,6 +62,7 @@ def __init__( ws: WorkspaceClient, azure_resource_permissions: AzureResourcePermissions, azure_sp_crawler: AzureServicePrincipalCrawler, + integration_test_flag="", ): self._output_file = "azure_service_principal_migration_result.csv" self._final_sp_list: list[ServicePrincipalMigrationInfo] = [] @@ -68,6 +70,7 @@ def __init__( self._ws = ws self._azure_resource_permissions = azure_resource_permissions self._azure_sp_crawler = azure_sp_crawler + self._integration_test_flag = integration_test_flag @classmethod def for_cli(cls, ws: WorkspaceClient, prompts: Prompts, product='ucx'): @@ -98,6 +101,18 @@ def _list_storage_credentials(self) -> set[str]: storage_credential_app_ids = set() storage_credentials = self._ws.storage_credentials.list(max_results=0) + + # if we are doing integration test + if self._integration_test_flag: + for storage_credential in storage_credentials: + if not storage_credential.azure_service_principal: + continue + if self._integration_test_flag == storage_credential.name: + # return the storage credential created during integration test + return {storage_credential.azure_service_principal.application_id} + # return no storage credential if there is none created during integration test + return {""} + for storage_credential in storage_credentials: # only add service principal's application_id, ignore managed identity based storage_credential if storage_credential.azure_service_principal: @@ -222,11 +237,29 @@ def _create_storage_credential(self, sp_migration: ServicePrincipalMigrationInfo validation_result = self._validate_storage_credential(storage_credential, sp_migration.service_principal.prefix) return validation_result - def _validate_storage_credential(self, storage_credential, location) -> StorageCredentialValidationResult: - validation = self._ws.storage_credentials.validate( - storage_credential_name=storage_credential.name, url=location - ) - return StorageCredentialValidationResult.from_storage_credential_validation(storage_credential, validation) + def _validate_storage_credential(self, storage_credential, location: str) -> StorageCredentialValidationResult: + # storage_credential validation creates a temp UC external location, which cannot overlap with + # existing UC external locations. So add a sub folder to the validation location just in case + try: + validation = self._ws.storage_credentials.validate( + storage_credential_name=storage_credential.name, url=location + ) + return StorageCredentialValidationResult.from_storage_credential_validation(storage_credential, validation) + except InvalidParameterValue: + logger.warning( + "There is an existing external location overlaps with the prefix that is mapped to the service principal and used for validating the migrated storage credential. Skip the validation" + ) + return StorageCredentialValidationResult.from_storage_credential_validation( + storage_credential, + ValidateStorageCredentialResponse( + is_dir=None, + results=[ + ValidationResult( + message="The validation is skipped because an existing external location overlaps with the location used for validation." + ) + ], + ), + ) def execute_migration(self, prompts: Prompts): @@ -242,6 +275,9 @@ def execute_migration(self, prompts: Prompts): for sp in self._final_sp_list: execution_result.append(self._create_storage_credential(sp)) + if self._integration_test_flag: + print(execution_result) + results_file = self._installation.save(execution_result, filename=self._output_file) logger.info("Completed migration from Azure Service Principal migrated to UC Storage credentials") print( diff --git a/src/databricks/labs/ucx/cli.py b/src/databricks/labs/ucx/cli.py index dc94fa3661..aeef5faa71 100644 --- a/src/databricks/labs/ucx/cli.py +++ b/src/databricks/labs/ucx/cli.py @@ -12,16 +12,15 @@ from databricks.labs.ucx.account import AccountWorkspaces, WorkspaceInfo from databricks.labs.ucx.assessment.aws import AWSResourcePermissions + from databricks.labs.ucx.azure.access import AzureResourcePermissions +from databricks.labs.ucx.azure.azure_credentials import AzureServicePrincipalMigration from databricks.labs.ucx.config import WorkspaceConfig from databricks.labs.ucx.framework.crawlers import StatementExecutionBackend from databricks.labs.ucx.hive_metastore import ExternalLocations, TablesCrawler from databricks.labs.ucx.hive_metastore.mapping import TableMapping from databricks.labs.ucx.hive_metastore.table_migrate import TableMove, TablesMigrate from databricks.labs.ucx.install import WorkspaceInstallation -from databricks.labs.ucx.azure.azure_credentials import ( - AzureServicePrincipalMigration, -) from databricks.labs.ucx.workspace_access.groups import GroupManager ucx = App(__file__) From c317767c62fa71e2b0f5fcc37496a610e2ae8c1f Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Tue, 13 Feb 2024 14:50:12 -0800 Subject: [PATCH 32/76] add unit tests for _integration_test_flag and storage credential validation exception when prefix overlaps with existing external location --- .../labs/ucx/azure/azure_credentials.py | 3 - tests/unit/azure/test_azure_credentials.py | 58 +++++++++++++++++-- 2 files changed, 53 insertions(+), 8 deletions(-) diff --git a/src/databricks/labs/ucx/azure/azure_credentials.py b/src/databricks/labs/ucx/azure/azure_credentials.py index d6ebc808e1..d4567d48d4 100644 --- a/src/databricks/labs/ucx/azure/azure_credentials.py +++ b/src/databricks/labs/ucx/azure/azure_credentials.py @@ -275,9 +275,6 @@ def execute_migration(self, prompts: Prompts): for sp in self._final_sp_list: execution_result.append(self._create_storage_credential(sp)) - if self._integration_test_flag: - print(execution_result) - results_file = self._installation.save(execution_result, filename=self._output_file) logger.info("Completed migration from Azure Service Principal migrated to UC Storage credentials") print( diff --git a/tests/unit/azure/test_azure_credentials.py b/tests/unit/azure/test_azure_credentials.py index 4a390d1149..211b824d4a 100644 --- a/tests/unit/azure/test_azure_credentials.py +++ b/tests/unit/azure/test_azure_credentials.py @@ -8,6 +8,7 @@ from databricks.labs.blueprint.tui import MockPrompts from databricks.sdk import WorkspaceClient from databricks.sdk.errors import InternalError, NotFound, ResourceDoesNotExist +from databricks.sdk.errors.platform import InvalidParameterValue from databricks.sdk.service import sql from databricks.sdk.service.catalog import ( AwsIamRole, @@ -54,6 +55,13 @@ def ws(): "privilege": "WRITE_FILES", "directory_id": "directory_id_2", }, + { + "prefix": "overlap_with_external_location", + "client_id": "app_secret4", + "principal": "principal_overlap", + "privilege": "WRITE_FILES", + "directory_id": "directory_id_2", + }, ] csv_output = io.StringIO() fieldnames = storage_permission_mappings[0].keys() @@ -137,6 +145,42 @@ def test_list_storage_credentials(ws): assert expected == sp_migration._list_storage_credentials() +def test_list_storage_credentials_for_integration_test(ws): + ws.storage_credentials.list.return_value = [ + StorageCredentialInfo(aws_iam_role=AwsIamRole(role_arn="arn:aws:iam::123456789012:role/example-role-name")), + StorageCredentialInfo( + azure_managed_identity=AzureManagedIdentity( + access_connector_id="/subscriptions/.../providers/Microsoft.Databricks/..." + ) + ), + StorageCredentialInfo( + name="spn_for_integration_test", + azure_service_principal=AzureServicePrincipal( + application_id="b6420590-5e1c-4426-8950-a94cbe9b6115", + directory_id="62e43d7d-df53-4c64-86ed-c2c1a3ac60c3", + client_secret="secret", + ), + ), + ] + + # test storage credential: spn_for_integration_test is picked up + # in integration test, we only pick up the existing storage credential created in integration test and ignore the others + sp_migration = AzureServicePrincipalMigration( + MagicMock(), ws, MagicMock(), MagicMock(), integration_test_flag="spn_for_integration_test" + ) + expected = {"b6420590-5e1c-4426-8950-a94cbe9b6115"} + sp_migration._list_storage_credentials() + assert expected == sp_migration._list_storage_credentials() + + # test storage credential is not picked up + # if integration test does not create storage credential, we use dummy integration_test_flag to filter out other existing storage credentials + sp_migration = AzureServicePrincipalMigration( + MagicMock(), ws, MagicMock(), MagicMock(), integration_test_flag="other_spn" + ) + sp_migration._list_storage_credentials() + assert {""} == sp_migration._list_storage_credentials() + + @pytest.mark.parametrize( "secret_bytes_value, expected_return", [ @@ -306,12 +350,10 @@ def test_execute_migration_no_confirmation(mocker, ws): } ) - mocker.patch( - "databricks.labs.ucx.migration.azure_credentials.AzureServicePrincipalMigration._generate_migration_list" - ) + mocker.patch("databricks.labs.ucx.azure.azure_credentials.AzureServicePrincipalMigration._generate_migration_list") with patch( - "databricks.labs.ucx.migration.azure_credentials.AzureServicePrincipalMigration._create_storage_credential" + "databricks.labs.ucx.azure.azure_credentials.AzureServicePrincipalMigration._create_storage_credential" ) as c: sp_migration = AzureServicePrincipalMigration.for_cli(ws, prompts) sp_migration.execute_migration(prompts) @@ -325,6 +367,8 @@ def side_effect_create_storage_credential(name, azure_service_principal, comment def side_effect_validate_storage_credential(storage_credential_name, url): + if "overlap" in storage_credential_name: + raise InvalidParameterValue if "read" in storage_credential_name: response = { "is_dir": True, @@ -351,7 +395,7 @@ def side_effect_validate_storage_credential(storage_credential_name, url): return ValidateStorageCredentialResponse.from_dict(response) -def test_execute_migration(capsys, mocker, ws): +def test_execute_migration(caplog, capsys, mocker, ws): ws.config.is_azure = True ws.secrets.get_secret.return_value = GetSecretResponse(value="aGVsbG8gd29ybGQ=") ws.storage_credentials.list.return_value = [ @@ -379,10 +423,14 @@ def test_execute_migration(capsys, mocker, ws): AzureServicePrincipalInfo("app_secret1", "test_scope", "test_key", "tenant_id_1", "storage1"), AzureServicePrincipalInfo("app_secret2", "test_scope", "test_key", "tenant_id_1", "storage1"), AzureServicePrincipalInfo("app_secret3", "test_scope", "test_key", "tenant_id_2", "storage1"), + AzureServicePrincipalInfo("app_secret4", "test_scope", "test_key", "tenant_id_2", "storage1"), ], ) sp_migration = AzureServicePrincipalMigration.for_cli(ws, prompts) sp_migration.execute_migration(prompts) + # assert migration is complete assert "Completed migration" in capsys.readouterr().out + # assert the validation exception is caught when prefix overlaps with existing external location + assert "Skip the validation" in caplog.text From 5301a442f9613a1be020d8c49c057eb69a24f050 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Tue, 13 Feb 2024 16:08:02 -0800 Subject: [PATCH 33/76] improve unit test for test_execute_migration --- tests/unit/azure/test_azure_credentials.py | 28 +++++++--------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/tests/unit/azure/test_azure_credentials.py b/tests/unit/azure/test_azure_credentials.py index 211b824d4a..c4b417f4ea 100644 --- a/tests/unit/azure/test_azure_credentials.py +++ b/tests/unit/azure/test_azure_credentials.py @@ -370,28 +370,10 @@ def side_effect_validate_storage_credential(storage_credential_name, url): if "overlap" in storage_credential_name: raise InvalidParameterValue if "read" in storage_credential_name: - response = { - "is_dir": True, - "results": [ - { - "message": "", - "operation": ["DELETE", "LIST", "READ", "WRITE"], - "result": ["SKIP", "PASS", "PASS", "SKIP"], - } - ], - } + response = {"isDir": True, "results": [{"message": "", "operation": "WRITE", "result": "SKIP"}]} return ValidateStorageCredentialResponse.from_dict(response) else: - response = { - "is_dir": True, - "results": [ - { - "message": "", - "operation": ["DELETE", "LIST", "READ", "WRITE"], - "result": ["PASS", "PASS", "PASS", "PASS"], - } - ], - } + response = {"isDir": True, "results": [{"message": "", "operation": "WRITE", "result": "PASS"}]} return ValidateStorageCredentialResponse.from_dict(response) @@ -428,9 +410,15 @@ def test_execute_migration(caplog, capsys, mocker, ws): ) sp_migration = AzureServicePrincipalMigration.for_cli(ws, prompts) + sp_migration._installation.save = MagicMock() sp_migration.execute_migration(prompts) # assert migration is complete assert "Completed migration" in capsys.readouterr().out # assert the validation exception is caught when prefix overlaps with existing external location assert "Skip the validation" in caplog.text + # assert validation results + save_args = sp_migration._installation.save.call_args.args[0] + assert any("The validation is skipped" in arg.results[0].message for arg in save_args) + assert any("PASS" in arg.results[0].result.value for arg in save_args) + assert any("SKIP" in arg.results[0].result.value for arg in save_args) From 87d1af8b0d63a4113d4de4d48ed0ff6cb96916a7 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Wed, 14 Feb 2024 11:41:27 -0800 Subject: [PATCH 34/76] add read_only flag to storage credential validation --- .../labs/ucx/azure/azure_credentials.py | 6 +- .../azure/test_azure_credentials.py | 152 ++++++++++++++++++ 2 files changed, 155 insertions(+), 3 deletions(-) diff --git a/src/databricks/labs/ucx/azure/azure_credentials.py b/src/databricks/labs/ucx/azure/azure_credentials.py index d4567d48d4..77e081195e 100644 --- a/src/databricks/labs/ucx/azure/azure_credentials.py +++ b/src/databricks/labs/ucx/azure/azure_credentials.py @@ -234,15 +234,15 @@ def _create_storage_credential(self, sp_migration: ServicePrincipalMigrationInfo name=name, azure_service_principal=azure_service_principal, comment=comment, read_only=read_only ) - validation_result = self._validate_storage_credential(storage_credential, sp_migration.service_principal.prefix) + validation_result = self._validate_storage_credential(storage_credential, sp_migration.service_principal.prefix, read_only) return validation_result - def _validate_storage_credential(self, storage_credential, location: str) -> StorageCredentialValidationResult: + def _validate_storage_credential(self, storage_credential, location: str, read_only: bool) -> StorageCredentialValidationResult: # storage_credential validation creates a temp UC external location, which cannot overlap with # existing UC external locations. So add a sub folder to the validation location just in case try: validation = self._ws.storage_credentials.validate( - storage_credential_name=storage_credential.name, url=location + storage_credential_name=storage_credential.name, url=location, read_only=read_only ) return StorageCredentialValidationResult.from_storage_credential_validation(storage_credential, validation) except InvalidParameterValue: diff --git a/tests/integration/azure/test_azure_credentials.py b/tests/integration/azure/test_azure_credentials.py index e69de29bb2..131ed68cf4 100644 --- a/tests/integration/azure/test_azure_credentials.py +++ b/tests/integration/azure/test_azure_credentials.py @@ -0,0 +1,152 @@ +import base64 +import re +from unittest.mock import MagicMock + +import pytest +from databricks.labs.blueprint.tui import MockPrompts + +from databricks.labs.ucx.assessment.azure import ( + AzureServicePrincipalInfo, + StoragePermissionMapping, +) +from databricks.labs.ucx.assessment.crawlers import ( + _SECRET_PATTERN, +) +from databricks.labs.ucx.azure.azure_credentials import AzureServicePrincipalMigration + + +@pytest.fixture +def prepare_spn_migration_test(ws, debug_env, make_random): + def inner(read_only=False) -> dict: + # cluster_conf = ws.clusters.get(cluster_id=debug_env["TEST_DEFAULT_CLUSTER_ID"]) + spark_conf = ws.clusters.get(cluster_id="0214-064652-su15myvb").spark_conf + + application_id = spark_conf.get("fs.azure.account.oauth2.client.id") + + secret_matched = re.search(_SECRET_PATTERN, spark_conf.get("fs.azure.account.oauth2.client.secret")) + secret_scope, secret_key = ( + secret_matched.group(1).split("/")[1], + secret_matched.group(1).split("/")[2], + ) + + secret_response = ws.secrets.get_secret(secret_scope, secret_key) + client_secret = base64.b64decode(secret_response.value).decode("utf-8") + + end_point = spark_conf.get("fs.azure.account.oauth2.client.endpoint") + directory_id = end_point.split("/")[3] + + # application_id = "9cbd8a1a-8169-4ff8-a929-f1e9572c090c" + # directory_id = "9f37a392-f0ae-4280-9796-f1864a10effc" + name = f"testinfra_storageaccess_{make_random(4).lower()}" + + azure_resource_permissions = MagicMock() + azure_resource_permissions.load.return_value = [ + StoragePermissionMapping( + prefix="abfss://things@labsazurethings.dfs.core.windows.net/avoid_ext_loc_overlap", + client_id=application_id, + principal=name, + privilege="READ_FILES" if read_only else "WRITE_FILES", + directory_id=directory_id, + ) + ] + + azure_sp_crawler = MagicMock() + azure_sp_crawler.snapshot.return_value = [ + AzureServicePrincipalInfo( + application_id=application_id, + secret_scope=secret_scope, + secret_key=secret_key, + tenant_id="test", + storage_account="test", + ) + ] + + installation = MagicMock() + installation.save.return_value = "azure_service_principal_migration_result.csv" + + return { + "storage_credential_name": name, + "application_id": application_id, + "directory_id": directory_id, + "client_secret": client_secret, + "azure_resource_permissions": azure_resource_permissions, + "azure_sp_crawler": azure_sp_crawler, + "installation": installation, + } + + return inner + + +@pytest.fixture +def execute_migration(ws): + def inner(variables: dict, integration_test_flag: str) -> AzureServicePrincipalMigration: + spn_migration = AzureServicePrincipalMigration( + variables["installation"], + ws, + variables["azure_resource_permissions"], + variables["azure_sp_crawler"], + integration_test_flag=integration_test_flag, + ) + spn_migration.execute_migration( + MockPrompts({"Above Azure Service Principals will be migrated to UC storage credentials *": "Yes"}) + ) + return spn_migration + + return inner + + +def test_spn_migration_existed_storage_credential( + ws, execute_migration, make_storage_credential_from_spn, prepare_spn_migration_test +): + variables = prepare_spn_migration_test(read_only=False) + + # create a storage credential for this test + make_storage_credential_from_spn( + name=variables["storage_credential_name"], + application_id=variables["application_id"], + client_secret=variables["client_secret"], + directory_id=variables["directory_id"], + ) + + # test that the spn migration will be skipped due to above storage credential is existed + spn_migration = execute_migration(variables=variables, integration_test_flag=variables["storage_credential_name"]) + + # because storage_credential is existing, no spn should be migrated + assert not spn_migration._final_sp_list + + +@pytest.mark.parametrize("read_only", [False, True]) +def test_spn_migration(ws, execute_migration, prepare_spn_migration_test, read_only): + variables = prepare_spn_migration_test(read_only) + + try: + spn_migration = execute_migration(variables=variables, integration_test_flag="lets_migrate_the_spn") + + assert spn_migration._final_sp_list[0].service_principal.principal == variables["storage_credential_name"] + assert ws.storage_credentials.get(variables["storage_credential_name"]).read_only is read_only + + validation_result = spn_migration._installation.save.call_args.args[0][0] + if read_only: + # We only assert that write validation are not performed for read only storage credential here. + # In real life, the READ validation for read only storage credential may fail if there is no file, + # but that is fine, as the storage credential is created, and we just cannot validate it until it's really used. + assert not any( + (res.operation is not None) + and ("WRITE" in res.operation.value) + for res in validation_result.results + ) + else: + assert any( + (res.operation is not None) + and ("WRITE" in res.operation.value) + and ("PASS" in res.result.value) + for res in validation_result.results + ) + assert any( + (res.operation is not None) + and ("DELETE" in res.operation.value) + and ("PASS" in res.result.value) + for res in validation_result.results + ) + finally: + ws.storage_credentials.delete(name=variables["storage_credential_name"], force=True) From 6099a56d263e7afc0b94da9c75f3af2beea6e7ea Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Wed, 14 Feb 2024 11:57:45 -0800 Subject: [PATCH 35/76] remove some redundants --- src/databricks/labs/ucx/azure/access.py | 5 +---- src/databricks/labs/ucx/azure/azure_credentials.py | 7 ++----- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/src/databricks/labs/ucx/azure/access.py b/src/databricks/labs/ucx/azure/access.py index 1e2022c61f..ad55b24f0e 100644 --- a/src/databricks/labs/ucx/azure/access.py +++ b/src/databricks/labs/ucx/azure/access.py @@ -107,8 +107,5 @@ def _get_storage_accounts(self) -> list[str]: def load(self) -> list[StoragePermissionMapping]: """ Load StoragePermissionMapping info from azure_storage_account_info.csv - :return: """ - storage_account_infos = self._installation.load(list[StoragePermissionMapping], filename=self._filename) - - return storage_account_infos + return self._installation.load(list[StoragePermissionMapping], filename=self._filename) diff --git a/src/databricks/labs/ucx/azure/azure_credentials.py b/src/databricks/labs/ucx/azure/azure_credentials.py index 77e081195e..8f88bade89 100644 --- a/src/databricks/labs/ucx/azure/azure_credentials.py +++ b/src/databricks/labs/ucx/azure/azure_credentials.py @@ -136,10 +136,7 @@ def _read_databricks_secret(self, scope: str, key: str, application_id: str) -> logger.info( f"InternalError while reading secret {scope}.{key}. " f"Cannot fetch the service principal client_secret for {application_id}. " - f"Will not reuse this client_secret" - ) - print( - f"{application_id} is not migrated due to InternalError while fetching the client_secret from Databricks secret." + f"Will not reuse this client_secret. " f"You may rerun the migration command later to retry this service principal" ) return None @@ -186,7 +183,7 @@ def _fetch_client_secret(self, sp_list: list[StoragePermissionMapping]) -> list[ if sp.client_id in azure_sp_info_with_client_secret: sp_list_with_secret.append( ServicePrincipalMigrationInfo( - service_principal=sp, client_secret=azure_sp_info_with_client_secret[sp.client_id] + sp, azure_sp_info_with_client_secret[sp.client_id] ) ) return sp_list_with_secret From 1d8e69c5c9712b5b5c1099a33a9343e68af3f969 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Wed, 14 Feb 2024 11:59:49 -0800 Subject: [PATCH 36/76] add integration tests for azure service principal migration. --- src/databricks/labs/ucx/mixins/fixtures.py | 23 +++++++++++++++++++ .../azure/test_azure_credentials.py | 9 +++----- tests/unit/azure/test_azure_credentials.py | 14 +++++++---- 3 files changed, 36 insertions(+), 10 deletions(-) diff --git a/src/databricks/labs/ucx/mixins/fixtures.py b/src/databricks/labs/ucx/mixins/fixtures.py index ae22cbd27b..0479f1182b 100644 --- a/src/databricks/labs/ucx/mixins/fixtures.py +++ b/src/databricks/labs/ucx/mixins/fixtures.py @@ -19,10 +19,12 @@ from databricks.sdk.retries import retried from databricks.sdk.service import compute, iam, jobs, pipelines, sql, workspace from databricks.sdk.service.catalog import ( + AzureServicePrincipal, CatalogInfo, DataSourceFormat, FunctionInfo, SchemaInfo, + StorageCredentialInfo, TableInfo, TableType, ) @@ -1072,3 +1074,24 @@ def remove(query: Query): logger.info(f"Can't drop query {e}") yield from factory("query", create, remove) + + +@pytest.fixture +def make_storage_credential_from_spn(ws): + def create( + *, name: str, application_id: str, client_secret: str, directory_id: str, read_only=False + ) -> StorageCredentialInfo: + azure_service_principal = AzureServicePrincipal( + directory_id=directory_id, + application_id=application_id, + client_secret=client_secret, + ) + storage_credential = ws.storage_credentials.create( + name=name, azure_service_principal=azure_service_principal, read_only=read_only + ) + return storage_credential + + def remove(storage_credential: StorageCredentialInfo): + ws.storage_credentials.delete(name=storage_credential.name, force=True) + + yield from factory("storage_credential_from_spn", create, remove) diff --git a/tests/integration/azure/test_azure_credentials.py b/tests/integration/azure/test_azure_credentials.py index 131ed68cf4..807dafcaec 100644 --- a/tests/integration/azure/test_azure_credentials.py +++ b/tests/integration/azure/test_azure_credentials.py @@ -18,8 +18,7 @@ @pytest.fixture def prepare_spn_migration_test(ws, debug_env, make_random): def inner(read_only=False) -> dict: - # cluster_conf = ws.clusters.get(cluster_id=debug_env["TEST_DEFAULT_CLUSTER_ID"]) - spark_conf = ws.clusters.get(cluster_id="0214-064652-su15myvb").spark_conf + spark_conf = ws.clusters.get(cluster_id=debug_env["TEST_LEGACY_SPN_CLUSTER_ID"]).spark_conf application_id = spark_conf.get("fs.azure.account.oauth2.client.id") @@ -35,8 +34,6 @@ def inner(read_only=False) -> dict: end_point = spark_conf.get("fs.azure.account.oauth2.client.endpoint") directory_id = end_point.split("/")[3] - # application_id = "9cbd8a1a-8169-4ff8-a929-f1e9572c090c" - # directory_id = "9f37a392-f0ae-4280-9796-f1864a10effc" name = f"testinfra_storageaccess_{make_random(4).lower()}" azure_resource_permissions = MagicMock() @@ -109,7 +106,7 @@ def test_spn_migration_existed_storage_credential( ) # test that the spn migration will be skipped due to above storage credential is existed - spn_migration = execute_migration(variables=variables, integration_test_flag=variables["storage_credential_name"]) + spn_migration = execute_migration(variables, integration_test_flag=variables["storage_credential_name"]) # because storage_credential is existing, no spn should be migrated assert not spn_migration._final_sp_list @@ -120,7 +117,7 @@ def test_spn_migration(ws, execute_migration, prepare_spn_migration_test, read_o variables = prepare_spn_migration_test(read_only) try: - spn_migration = execute_migration(variables=variables, integration_test_flag="lets_migrate_the_spn") + spn_migration = execute_migration(variables, integration_test_flag="lets_migrate_the_spn") assert spn_migration._final_sp_list[0].service_principal.principal == variables["storage_credential_name"] assert ws.storage_credentials.get(variables["storage_credential_name"]).read_only is read_only diff --git a/tests/unit/azure/test_azure_credentials.py b/tests/unit/azure/test_azure_credentials.py index c4b417f4ea..655f172cec 100644 --- a/tests/unit/azure/test_azure_credentials.py +++ b/tests/unit/azure/test_azure_credentials.py @@ -366,11 +366,11 @@ def side_effect_create_storage_credential(name, azure_service_principal, comment ) -def side_effect_validate_storage_credential(storage_credential_name, url): +def side_effect_validate_storage_credential(storage_credential_name, url, read_only): if "overlap" in storage_credential_name: raise InvalidParameterValue if "read" in storage_credential_name: - response = {"isDir": True, "results": [{"message": "", "operation": "WRITE", "result": "SKIP"}]} + response = {"isDir": True, "results": [{"message": "", "operation": "READ", "result": "PASS"}]} return ValidateStorageCredentialResponse.from_dict(response) else: response = {"isDir": True, "results": [{"message": "", "operation": "WRITE", "result": "PASS"}]} @@ -420,5 +420,11 @@ def test_execute_migration(caplog, capsys, mocker, ws): # assert validation results save_args = sp_migration._installation.save.call_args.args[0] assert any("The validation is skipped" in arg.results[0].message for arg in save_args) - assert any("PASS" in arg.results[0].result.value for arg in save_args) - assert any("SKIP" in arg.results[0].result.value for arg in save_args) + assert any(("READ" in arg.results[0].operation.value) + and ("PASS" in arg.results[0].result.value) + for arg in save_args + ) + assert any(("WRITE" in arg.results[0].operation.value) + and ("PASS" in arg.results[0].result.value) + for arg in save_args + ) From b55a8122dc094436a040fe48e62e3ff2bda84728 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Wed, 14 Feb 2024 12:07:09 -0800 Subject: [PATCH 37/76] apply make fmt changes --- .../labs/ucx/azure/azure_credentials.py | 12 +++++---- .../azure/test_azure_credentials.py | 27 ++++++++----------- tests/unit/azure/test_azure_credentials.py | 12 ++++----- 3 files changed, 23 insertions(+), 28 deletions(-) diff --git a/src/databricks/labs/ucx/azure/azure_credentials.py b/src/databricks/labs/ucx/azure/azure_credentials.py index 8f88bade89..031d6b184a 100644 --- a/src/databricks/labs/ucx/azure/azure_credentials.py +++ b/src/databricks/labs/ucx/azure/azure_credentials.py @@ -182,9 +182,7 @@ def _fetch_client_secret(self, sp_list: list[StoragePermissionMapping]) -> list[ for sp in sp_list: if sp.client_id in azure_sp_info_with_client_secret: sp_list_with_secret.append( - ServicePrincipalMigrationInfo( - sp, azure_sp_info_with_client_secret[sp.client_id] - ) + ServicePrincipalMigrationInfo(sp, azure_sp_info_with_client_secret[sp.client_id]) ) return sp_list_with_secret @@ -231,10 +229,14 @@ def _create_storage_credential(self, sp_migration: ServicePrincipalMigrationInfo name=name, azure_service_principal=azure_service_principal, comment=comment, read_only=read_only ) - validation_result = self._validate_storage_credential(storage_credential, sp_migration.service_principal.prefix, read_only) + validation_result = self._validate_storage_credential( + storage_credential, sp_migration.service_principal.prefix, read_only + ) return validation_result - def _validate_storage_credential(self, storage_credential, location: str, read_only: bool) -> StorageCredentialValidationResult: + def _validate_storage_credential( + self, storage_credential, location: str, read_only: bool + ) -> StorageCredentialValidationResult: # storage_credential validation creates a temp UC external location, which cannot overlap with # existing UC external locations. So add a sub folder to the validation location just in case try: diff --git a/tests/integration/azure/test_azure_credentials.py b/tests/integration/azure/test_azure_credentials.py index 807dafcaec..2a4d8cf187 100644 --- a/tests/integration/azure/test_azure_credentials.py +++ b/tests/integration/azure/test_azure_credentials.py @@ -9,9 +9,7 @@ AzureServicePrincipalInfo, StoragePermissionMapping, ) -from databricks.labs.ucx.assessment.crawlers import ( - _SECRET_PATTERN, -) +from databricks.labs.ucx.assessment.crawlers import _SECRET_PATTERN from databricks.labs.ucx.azure.azure_credentials import AzureServicePrincipalMigration @@ -23,10 +21,13 @@ def inner(read_only=False) -> dict: application_id = spark_conf.get("fs.azure.account.oauth2.client.id") secret_matched = re.search(_SECRET_PATTERN, spark_conf.get("fs.azure.account.oauth2.client.secret")) - secret_scope, secret_key = ( - secret_matched.group(1).split("/")[1], - secret_matched.group(1).split("/")[2], - ) + if secret_matched: + secret_scope, secret_key = ( + secret_matched.group(1).split("/")[1], + secret_matched.group(1).split("/")[2], + ) + assert secret_scope is not None + assert secret_key is not None secret_response = ws.secrets.get_secret(secret_scope, secret_key) client_secret = base64.b64decode(secret_response.value).decode("utf-8") @@ -128,21 +129,15 @@ def test_spn_migration(ws, execute_migration, prepare_spn_migration_test, read_o # In real life, the READ validation for read only storage credential may fail if there is no file, # but that is fine, as the storage credential is created, and we just cannot validate it until it's really used. assert not any( - (res.operation is not None) - and ("WRITE" in res.operation.value) - for res in validation_result.results + (res.operation is not None) and ("WRITE" in res.operation.value) for res in validation_result.results ) else: assert any( - (res.operation is not None) - and ("WRITE" in res.operation.value) - and ("PASS" in res.result.value) + (res.operation is not None) and ("WRITE" in res.operation.value) and ("PASS" in res.result.value) for res in validation_result.results ) assert any( - (res.operation is not None) - and ("DELETE" in res.operation.value) - and ("PASS" in res.result.value) + (res.operation is not None) and ("DELETE" in res.operation.value) and ("PASS" in res.result.value) for res in validation_result.results ) finally: diff --git a/tests/unit/azure/test_azure_credentials.py b/tests/unit/azure/test_azure_credentials.py index 655f172cec..30787650b6 100644 --- a/tests/unit/azure/test_azure_credentials.py +++ b/tests/unit/azure/test_azure_credentials.py @@ -420,11 +420,9 @@ def test_execute_migration(caplog, capsys, mocker, ws): # assert validation results save_args = sp_migration._installation.save.call_args.args[0] assert any("The validation is skipped" in arg.results[0].message for arg in save_args) - assert any(("READ" in arg.results[0].operation.value) - and ("PASS" in arg.results[0].result.value) - for arg in save_args + assert any( + ("READ" in arg.results[0].operation.value) and ("PASS" in arg.results[0].result.value) for arg in save_args + ) + assert any( + ("WRITE" in arg.results[0].operation.value) and ("PASS" in arg.results[0].result.value) for arg in save_args ) - assert any(("WRITE" in arg.results[0].operation.value) - and ("PASS" in arg.results[0].result.value) - for arg in save_args - ) From e7a38678a10b118f06d24c94e1ee762b7fbdf8ba Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Wed, 14 Feb 2024 13:15:52 -0800 Subject: [PATCH 38/76] fix leftover and failed unit tests after rebase --- src/databricks/labs/ucx/azure/access.py | 2 +- src/databricks/labs/ucx/azure/azure_credentials.py | 6 +++--- src/databricks/labs/ucx/cli.py | 1 - tests/integration/azure/test_azure_credentials.py | 6 ++---- tests/unit/azure/test_azure_credentials.py | 2 +- tests/unit/azure/test_resources.py | 4 ++-- 6 files changed, 9 insertions(+), 12 deletions(-) diff --git a/src/databricks/labs/ucx/azure/access.py b/src/databricks/labs/ucx/azure/access.py index ad55b24f0e..c2c4b4a94d 100644 --- a/src/databricks/labs/ucx/azure/access.py +++ b/src/databricks/labs/ucx/azure/access.py @@ -65,7 +65,7 @@ def _map_storage(self, storage: AzureResource) -> list[StoragePermissionMapping] client_id=role_assignment.principal.client_id, principal=role_assignment.principal.display_name, privilege=privilege, - directory_id = role_assignment.principal.directory_id + directory_id=role_assignment.principal.directory_id, ) ) return out diff --git a/src/databricks/labs/ucx/azure/azure_credentials.py b/src/databricks/labs/ucx/azure/azure_credentials.py index 031d6b184a..c8334269c3 100644 --- a/src/databricks/labs/ucx/azure/azure_credentials.py +++ b/src/databricks/labs/ucx/azure/azure_credentials.py @@ -16,12 +16,12 @@ ValidationResult, ) -from databricks.labs.ucx.assessment.azure import ( +from databricks.labs.ucx.assessment.azure import AzureServicePrincipalCrawler +from databricks.labs.ucx.azure.access import ( AzureResourcePermissions, - AzureResources, - AzureServicePrincipalCrawler, StoragePermissionMapping, ) +from databricks.labs.ucx.azure.resources import AzureResources from databricks.labs.ucx.config import WorkspaceConfig from databricks.labs.ucx.framework.crawlers import StatementExecutionBackend from databricks.labs.ucx.hive_metastore.locations import ExternalLocations diff --git a/src/databricks/labs/ucx/cli.py b/src/databricks/labs/ucx/cli.py index aeef5faa71..5b076a0b81 100644 --- a/src/databricks/labs/ucx/cli.py +++ b/src/databricks/labs/ucx/cli.py @@ -12,7 +12,6 @@ from databricks.labs.ucx.account import AccountWorkspaces, WorkspaceInfo from databricks.labs.ucx.assessment.aws import AWSResourcePermissions - from databricks.labs.ucx.azure.access import AzureResourcePermissions from databricks.labs.ucx.azure.azure_credentials import AzureServicePrincipalMigration from databricks.labs.ucx.config import WorkspaceConfig diff --git a/tests/integration/azure/test_azure_credentials.py b/tests/integration/azure/test_azure_credentials.py index 2a4d8cf187..a15b7f242c 100644 --- a/tests/integration/azure/test_azure_credentials.py +++ b/tests/integration/azure/test_azure_credentials.py @@ -5,11 +5,9 @@ import pytest from databricks.labs.blueprint.tui import MockPrompts -from databricks.labs.ucx.assessment.azure import ( - AzureServicePrincipalInfo, - StoragePermissionMapping, -) +from databricks.labs.ucx.assessment.azure import AzureServicePrincipalInfo from databricks.labs.ucx.assessment.crawlers import _SECRET_PATTERN +from databricks.labs.ucx.azure.access import StoragePermissionMapping from databricks.labs.ucx.azure.azure_credentials import AzureServicePrincipalMigration diff --git a/tests/unit/azure/test_azure_credentials.py b/tests/unit/azure/test_azure_credentials.py index 30787650b6..08e9b9f63c 100644 --- a/tests/unit/azure/test_azure_credentials.py +++ b/tests/unit/azure/test_azure_credentials.py @@ -22,8 +22,8 @@ from databricks.labs.ucx.assessment.azure import ( AzureServicePrincipalCrawler, AzureServicePrincipalInfo, - StoragePermissionMapping, ) +from databricks.labs.ucx.azure.access import StoragePermissionMapping from databricks.labs.ucx.azure.azure_credentials import ( AzureServicePrincipalMigration, ServicePrincipalMigrationInfo, diff --git a/tests/unit/azure/test_resources.py b/tests/unit/azure/test_resources.py index 6c1b067f44..c0c468a20c 100644 --- a/tests/unit/azure/test_resources.py +++ b/tests/unit/azure/test_resources.py @@ -61,7 +61,7 @@ def test_role_assignments_storage(mocker, az_token): assert len(role_assignments) == 1 for role_assignment in role_assignments: assert role_assignment.role_name == "Contributor" - assert role_assignment.principal == Principal("appIduser2", "disNameuser2", "Iduser2") + assert role_assignment.principal == Principal("appIduser2", "disNameuser2", "Iduser2", "0000-0000") assert str(role_assignment.scope) == resource_id assert role_assignment.resource == AzureResource(resource_id) @@ -75,6 +75,6 @@ def test_role_assignments_container(mocker, az_token): assert len(role_assignments) == 1 for role_assignment in role_assignments: assert role_assignment.role_name == "Contributor" - assert role_assignment.principal == Principal("appIduser2", "disNameuser2", "Iduser2") + assert role_assignment.principal == Principal("appIduser2", "disNameuser2", "Iduser2", "0000-0000") assert str(role_assignment.scope) == resource_id assert role_assignment.resource == AzureResource(resource_id) From ea73a1d5367495070a635c9d5c47480bb11f78a3 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Wed, 14 Feb 2024 15:38:26 -0800 Subject: [PATCH 39/76] rename AzureServicePrincipalMigration to ServicePrincipalMigration, rename execute_migration to run --- .../labs/ucx/azure/azure_credentials.py | 21 ++++---- src/databricks/labs/ucx/cli.py | 6 +-- .../azure/test_azure_credentials.py | 27 +++++----- tests/unit/azure/test_azure_credentials.py | 51 ++++++++++--------- 4 files changed, 54 insertions(+), 51 deletions(-) diff --git a/src/databricks/labs/ucx/azure/azure_credentials.py b/src/databricks/labs/ucx/azure/azure_credentials.py index c8334269c3..5b2ef25efe 100644 --- a/src/databricks/labs/ucx/azure/azure_credentials.py +++ b/src/databricks/labs/ucx/azure/azure_credentials.py @@ -54,7 +54,7 @@ def from_storage_credential_validation( ) -class AzureServicePrincipalMigration: +class ServicePrincipalMigration: def __init__( self, @@ -65,7 +65,6 @@ def __init__( integration_test_flag="", ): self._output_file = "azure_service_principal_migration_result.csv" - self._final_sp_list: list[ServicePrincipalMigrationInfo] = [] self._installation = installation self._ws = ws self._azure_resource_permissions = azure_resource_permissions @@ -111,7 +110,7 @@ def _list_storage_credentials(self) -> set[str]: # return the storage credential created during integration test return {storage_credential.azure_service_principal.application_id} # return no storage credential if there is none created during integration test - return {""} + return {} for storage_credential in storage_credentials: # only add service principal's application_id, ignore managed identity based storage_credential @@ -186,17 +185,17 @@ def _fetch_client_secret(self, sp_list: list[StoragePermissionMapping]) -> list[ ) return sp_list_with_secret - def _print_action_plan(self, sp_list_with_secret: list[ServicePrincipalMigrationInfo]): + def _print_action_plan(self, sp_list: list[ServicePrincipalMigrationInfo]): # print action plan to console for customer to review. - for sp in sp_list_with_secret: - print( + for sp in sp_list: + logger.info( f"Service Principal name: {sp.service_principal.principal}, " f"application_id: {sp.service_principal.client_id}, " f"privilege {sp.service_principal.privilege} " f"on location {sp.service_principal.prefix}" ) - def _generate_migration_list(self): + def _generate_migration_list(self) -> list[ServicePrincipalMigrationInfo]: """ Create the list of SP that need to be migrated, output an action plan as a csv file for users to confirm """ @@ -208,9 +207,9 @@ def _generate_migration_list(self): filtered_sp_list = [sp for sp in sp_list if sp.client_id not in sc_set] # fetch sp client_secret if any sp_list_with_secret = self._fetch_client_secret(filtered_sp_list) - self._final_sp_list = sp_list_with_secret # output the action plan for customer to confirm self._print_action_plan(sp_list_with_secret) + return sp_list_with_secret def _create_storage_credential(self, sp_migration: ServicePrincipalMigrationInfo): # prepare the storage credential properties @@ -260,9 +259,9 @@ def _validate_storage_credential( ), ) - def execute_migration(self, prompts: Prompts): + def run(self, prompts: Prompts): - self._generate_migration_list() + sp_list_with_secret = self._generate_migration_list() plan_confirmed = prompts.confirm( "Above Azure Service Principals will be migrated to UC storage credentials, please review and confirm." @@ -271,7 +270,7 @@ def execute_migration(self, prompts: Prompts): return execution_result = [] - for sp in self._final_sp_list: + for sp in sp_list_with_secret: execution_result.append(self._create_storage_credential(sp)) results_file = self._installation.save(execution_result, filename=self._output_file) diff --git a/src/databricks/labs/ucx/cli.py b/src/databricks/labs/ucx/cli.py index 5b076a0b81..dc671f09ee 100644 --- a/src/databricks/labs/ucx/cli.py +++ b/src/databricks/labs/ucx/cli.py @@ -13,7 +13,7 @@ from databricks.labs.ucx.account import AccountWorkspaces, WorkspaceInfo from databricks.labs.ucx.assessment.aws import AWSResourcePermissions from databricks.labs.ucx.azure.access import AzureResourcePermissions -from databricks.labs.ucx.azure.azure_credentials import AzureServicePrincipalMigration +from databricks.labs.ucx.azure.azure_credentials import ServicePrincipalMigration from databricks.labs.ucx.config import WorkspaceConfig from databricks.labs.ucx.framework.crawlers import StatementExecutionBackend from databricks.labs.ucx.hive_metastore import ExternalLocations, TablesCrawler @@ -297,8 +297,8 @@ def migrate_azure_service_principals(w: WorkspaceClient): """ logger.info("Running migrate_azure_service_principals command") prompts = Prompts() - service_principal_migration = AzureServicePrincipalMigration.for_cli(w, prompts) - service_principal_migration.execute_migration(prompts) + service_principal_migration = ServicePrincipalMigration.for_cli(w, prompts) + service_principal_migration.run(prompts) if __name__ == "__main__": diff --git a/tests/integration/azure/test_azure_credentials.py b/tests/integration/azure/test_azure_credentials.py index a15b7f242c..281707e673 100644 --- a/tests/integration/azure/test_azure_credentials.py +++ b/tests/integration/azure/test_azure_credentials.py @@ -1,6 +1,6 @@ import base64 import re -from unittest.mock import MagicMock +from unittest.mock import MagicMock, patch import pytest from databricks.labs.blueprint.tui import MockPrompts @@ -8,7 +8,7 @@ from databricks.labs.ucx.assessment.azure import AzureServicePrincipalInfo from databricks.labs.ucx.assessment.crawlers import _SECRET_PATTERN from databricks.labs.ucx.azure.access import StoragePermissionMapping -from databricks.labs.ucx.azure.azure_credentials import AzureServicePrincipalMigration +from databricks.labs.ucx.azure.azure_credentials import ServicePrincipalMigration @pytest.fixture @@ -75,15 +75,15 @@ def inner(read_only=False) -> dict: @pytest.fixture def execute_migration(ws): - def inner(variables: dict, integration_test_flag: str) -> AzureServicePrincipalMigration: - spn_migration = AzureServicePrincipalMigration( + def inner(variables: dict, integration_test_flag: str) -> ServicePrincipalMigration: + spn_migration = ServicePrincipalMigration( variables["installation"], ws, variables["azure_resource_permissions"], variables["azure_sp_crawler"], integration_test_flag=integration_test_flag, ) - spn_migration.execute_migration( + spn_migration.run( MockPrompts({"Above Azure Service Principals will be migrated to UC storage credentials *": "Yes"}) ) return spn_migration @@ -92,7 +92,7 @@ def inner(variables: dict, integration_test_flag: str) -> AzureServicePrincipalM def test_spn_migration_existed_storage_credential( - ws, execute_migration, make_storage_credential_from_spn, prepare_spn_migration_test + execute_migration, make_storage_credential_from_spn, prepare_spn_migration_test ): variables = prepare_spn_migration_test(read_only=False) @@ -104,11 +104,11 @@ def test_spn_migration_existed_storage_credential( directory_id=variables["directory_id"], ) - # test that the spn migration will be skipped due to above storage credential is existed - spn_migration = execute_migration(variables, integration_test_flag=variables["storage_credential_name"]) - - # because storage_credential is existing, no spn should be migrated - assert not spn_migration._final_sp_list + with patch("databricks.labs.ucx.azure.azure_credentials.ServicePrincipalMigration._create_storage_credential") as create_storage_credential: + # test that the spn migration will be skipped due to above storage credential is existed + execute_migration(variables, integration_test_flag=variables["storage_credential_name"]) + # because storage_credential is existing, no spn should be migrated + create_storage_credential.assert_not_called() @pytest.mark.parametrize("read_only", [False, True]) @@ -118,8 +118,9 @@ def test_spn_migration(ws, execute_migration, prepare_spn_migration_test, read_o try: spn_migration = execute_migration(variables, integration_test_flag="lets_migrate_the_spn") - assert spn_migration._final_sp_list[0].service_principal.principal == variables["storage_credential_name"] - assert ws.storage_credentials.get(variables["storage_credential_name"]).read_only is read_only + storage_credential = ws.storage_credentials.get(variables["storage_credential_name"]) + assert storage_credential is not None + assert storage_credential.read_only is read_only validation_result = spn_migration._installation.save.call_args.args[0][0] if read_only: diff --git a/tests/unit/azure/test_azure_credentials.py b/tests/unit/azure/test_azure_credentials.py index 08e9b9f63c..b247b85cc9 100644 --- a/tests/unit/azure/test_azure_credentials.py +++ b/tests/unit/azure/test_azure_credentials.py @@ -25,7 +25,7 @@ ) from databricks.labs.ucx.azure.access import StoragePermissionMapping from databricks.labs.ucx.azure.azure_credentials import ( - AzureServicePrincipalMigration, + ServicePrincipalMigration, ServicePrincipalMigrationInfo, ) from tests.unit.framework.mocks import MockBackend @@ -103,21 +103,21 @@ def download(path: str) -> io.BytesIO: def test_for_cli_not_azure(caplog, ws): ws.config.is_azure = False - assert AzureServicePrincipalMigration.for_cli(ws, MagicMock()) is None + assert ServicePrincipalMigration.for_cli(ws, MagicMock()) is None assert "Workspace is not on azure, please run this command on azure databricks workspaces." in caplog.text def test_for_cli_not_prompts(ws): ws.config.is_azure = True prompts = MockPrompts({"Have you reviewed the azure_storage_account_info.csv *": "No"}) - assert AzureServicePrincipalMigration.for_cli(ws, prompts) is None + assert ServicePrincipalMigration.for_cli(ws, prompts) is None def test_for_cli(ws): ws.config.is_azure = True prompts = MockPrompts({"Have you reviewed the azure_storage_account_info.csv *": "Yes"}) - assert isinstance(AzureServicePrincipalMigration.for_cli(ws, prompts), AzureServicePrincipalMigration) + assert isinstance(ServicePrincipalMigration.for_cli(ws, prompts), ServicePrincipalMigration) def test_list_storage_credentials(ws): @@ -137,7 +137,7 @@ def test_list_storage_credentials(ws): ), ] - sp_migration = AzureServicePrincipalMigration(MagicMock(), ws, MagicMock(), MagicMock()) + sp_migration = ServicePrincipalMigration(MagicMock(), ws, MagicMock(), MagicMock()) expected = {"b6420590-5e1c-4426-8950-a94cbe9b6115"} sp_migration._list_storage_credentials() @@ -165,7 +165,7 @@ def test_list_storage_credentials_for_integration_test(ws): # test storage credential: spn_for_integration_test is picked up # in integration test, we only pick up the existing storage credential created in integration test and ignore the others - sp_migration = AzureServicePrincipalMigration( + sp_migration = ServicePrincipalMigration( MagicMock(), ws, MagicMock(), MagicMock(), integration_test_flag="spn_for_integration_test" ) expected = {"b6420590-5e1c-4426-8950-a94cbe9b6115"} @@ -174,11 +174,11 @@ def test_list_storage_credentials_for_integration_test(ws): # test storage credential is not picked up # if integration test does not create storage credential, we use dummy integration_test_flag to filter out other existing storage credentials - sp_migration = AzureServicePrincipalMigration( + sp_migration = ServicePrincipalMigration( MagicMock(), ws, MagicMock(), MagicMock(), integration_test_flag="other_spn" ) sp_migration._list_storage_credentials() - assert {""} == sp_migration._list_storage_credentials() + assert {} == sp_migration._list_storage_credentials() @pytest.mark.parametrize( @@ -192,7 +192,7 @@ def test_list_storage_credentials_for_integration_test(ws): def test_read_secret_value_decode(ws, secret_bytes_value, expected_return): ws.secrets.get_secret.return_value = secret_bytes_value - sp_migration = AzureServicePrincipalMigration(MagicMock(), ws, MagicMock(), MagicMock()) + sp_migration = ServicePrincipalMigration(MagicMock(), ws, MagicMock(), MagicMock()) assert sp_migration._read_databricks_secret("test_scope", "test_key", "000") == expected_return @@ -207,7 +207,7 @@ def test_read_secret_read_exception(caplog, ws, exception, expected_log, expecte caplog.set_level(logging.INFO) ws.secrets.get_secret.side_effect = exception - sp_migration = AzureServicePrincipalMigration(MagicMock(), ws, MagicMock(), MagicMock()) + sp_migration = ServicePrincipalMigration(MagicMock(), ws, MagicMock(), MagicMock()) secret_value = sp_migration._read_databricks_secret("test_scope", "test_key", "000") assert expected_log in caplog.text @@ -281,13 +281,14 @@ def test_fetch_client_secret(ws): ), ] - sp_migration = AzureServicePrincipalMigration(MagicMock(), ws, MagicMock(), sp_crawler) + sp_migration = ServicePrincipalMigration(MagicMock(), ws, MagicMock(), sp_crawler) filtered_sp_list = sp_migration._fetch_client_secret(sp_to_be_checked) assert filtered_sp_list == expected_sp_list -def test_print_action_plan(capsys): +def test_print_action_plan(caplog): + caplog.set_level(logging.INFO) sp_list_with_secret = [ ServicePrincipalMigrationInfo( StoragePermissionMapping( @@ -300,19 +301,20 @@ def test_print_action_plan(capsys): "hello world", ) ] - sp_migration = AzureServicePrincipalMigration(MagicMock(), MagicMock(), MagicMock(), MagicMock()) + sp_migration = ServicePrincipalMigration(MagicMock(), MagicMock(), MagicMock(), MagicMock()) sp_migration._print_action_plan(sp_list_with_secret) expected_print = ( "Service Principal name: principal_1, " "application_id: app_secret1, " "privilege WRITE_FILES " - "on location prefix1\n" + "on location prefix1" ) - assert expected_print == capsys.readouterr().out + assert expected_print in caplog.text -def test_generate_migration_list(capsys, mocker, ws): +def test_generate_migration_list(caplog, mocker, ws): + caplog.set_level(logging.INFO) ws.config.is_azure = True ws.secrets.get_secret.return_value = GetSecretResponse(value="aGVsbG8gd29ybGQ=") ws.storage_credentials.list.return_value = [ @@ -335,10 +337,11 @@ def test_generate_migration_list(capsys, mocker, ws): ], ) - sp_migration = AzureServicePrincipalMigration.for_cli(ws, prompts) + sp_migration = ServicePrincipalMigration.for_cli(ws, prompts) sp_migration._generate_migration_list() - assert "app_secret2" in capsys.readouterr().out + assert "app_secret2" in caplog.text + assert "app_secret1" not in caplog.text def test_execute_migration_no_confirmation(mocker, ws): @@ -350,13 +353,13 @@ def test_execute_migration_no_confirmation(mocker, ws): } ) - mocker.patch("databricks.labs.ucx.azure.azure_credentials.AzureServicePrincipalMigration._generate_migration_list") + mocker.patch("databricks.labs.ucx.azure.azure_credentials.ServicePrincipalMigration._generate_migration_list") with patch( - "databricks.labs.ucx.azure.azure_credentials.AzureServicePrincipalMigration._create_storage_credential" + "databricks.labs.ucx.azure.azure_credentials.ServicePrincipalMigration._create_storage_credential" ) as c: - sp_migration = AzureServicePrincipalMigration.for_cli(ws, prompts) - sp_migration.execute_migration(prompts) + sp_migration = ServicePrincipalMigration.for_cli(ws, prompts) + sp_migration.run(prompts) c.assert_not_called() @@ -409,9 +412,9 @@ def test_execute_migration(caplog, capsys, mocker, ws): ], ) - sp_migration = AzureServicePrincipalMigration.for_cli(ws, prompts) + sp_migration = ServicePrincipalMigration.for_cli(ws, prompts) sp_migration._installation.save = MagicMock() - sp_migration.execute_migration(prompts) + sp_migration.run(prompts) # assert migration is complete assert "Completed migration" in capsys.readouterr().out From decc1ee74dee40c7375e424c228b7a3dfa65c403 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Wed, 14 Feb 2024 16:20:29 -0800 Subject: [PATCH 40/76] add guard rail to explicitly remove the client_secret from validation result, even though it supposes to be None --- src/databricks/labs/ucx/azure/azure_credentials.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/azure/azure_credentials.py b/src/databricks/labs/ucx/azure/azure_credentials.py index 5b2ef25efe..68f8e4bce9 100644 --- a/src/databricks/labs/ucx/azure/azure_credentials.py +++ b/src/databricks/labs/ucx/azure/azure_credentials.py @@ -45,9 +45,20 @@ class StorageCredentialValidationResult: def from_storage_credential_validation( cls, storage_credential: StorageCredentialInfo, validation: ValidateStorageCredentialResponse ): + if storage_credential.azure_service_principal: + # Guard rail to explicitly remove the client_secret, just in case the azure_service_principal + # in StorageCredentialInfo returned by WorkspaceClient.storage_credentials.create exposes the + # client_secret due to potential bugs in the future. + service_principal = AzureServicePrincipal(storage_credential.azure_service_principal.directory_id, + storage_credential.azure_service_principal.application_id, + "" + ) + else: + service_principal = AzureServicePrincipal("", "", "") + return cls( name=storage_credential.name or "", - azure_service_principal=storage_credential.azure_service_principal or AzureServicePrincipal("", "", ""), + azure_service_principal=service_principal, created_by=storage_credential.created_by or "", read_only=storage_credential.read_only or False, results=validation.results or [], From bf0dbccf70ce802f4735fd87b799bbccae51125d Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Wed, 14 Feb 2024 17:17:50 -0800 Subject: [PATCH 41/76] switch ServicePrincipalMigrationInfo back to dataclass. --- .../labs/ucx/azure/azure_credentials.py | 27 ++++++++++--------- .../azure/test_azure_credentials.py | 5 +++- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/src/databricks/labs/ucx/azure/azure_credentials.py b/src/databricks/labs/ucx/azure/azure_credentials.py index 68f8e4bce9..13364d3e67 100644 --- a/src/databricks/labs/ucx/azure/azure_credentials.py +++ b/src/databricks/labs/ucx/azure/azure_credentials.py @@ -29,8 +29,11 @@ logger = logging.getLogger(__name__) -# A namedtuple to host service_principal and its client_secret info -ServicePrincipalMigrationInfo = namedtuple("ServicePrincipalMigrationInfo", "service_principal client_secret") +# A dataclass to host service_principal info and its client_secret info +@dataclass +class ServicePrincipalMigrationInfo: + permission_mapping: StoragePermissionMapping + client_secret: str @dataclass @@ -200,10 +203,10 @@ def _print_action_plan(self, sp_list: list[ServicePrincipalMigrationInfo]): # print action plan to console for customer to review. for sp in sp_list: logger.info( - f"Service Principal name: {sp.service_principal.principal}, " - f"application_id: {sp.service_principal.client_id}, " - f"privilege {sp.service_principal.privilege} " - f"on location {sp.service_principal.prefix}" + f"Service Principal name: {sp.permission_mapping.principal}, " + f"application_id: {sp.permission_mapping.client_id}, " + f"privilege {sp.permission_mapping.privilege} " + f"on location {sp.permission_mapping.prefix}" ) def _generate_migration_list(self) -> list[ServicePrincipalMigrationInfo]: @@ -224,15 +227,15 @@ def _generate_migration_list(self) -> list[ServicePrincipalMigrationInfo]: def _create_storage_credential(self, sp_migration: ServicePrincipalMigrationInfo): # prepare the storage credential properties - name = sp_migration.service_principal.principal + name = sp_migration.permission_mapping.principal azure_service_principal = AzureServicePrincipal( - directory_id=sp_migration.service_principal.directory_id, - application_id=sp_migration.service_principal.client_id, + directory_id=sp_migration.permission_mapping.directory_id, + application_id=sp_migration.permission_mapping.client_id, client_secret=sp_migration.client_secret, ) - comment = f"Created by UCX during migration to UC using Azure Service Principal: {sp_migration.service_principal.principal}" + comment = f"Created by UCX during migration to UC using Azure Service Principal: {sp_migration.permission_mapping.principal}" read_only = False - if sp_migration.service_principal.privilege == Privilege.READ_FILES.value: + if sp_migration.permission_mapping.privilege == Privilege.READ_FILES.value: read_only = True # create the storage credential storage_credential = self._ws.storage_credentials.create( @@ -240,7 +243,7 @@ def _create_storage_credential(self, sp_migration: ServicePrincipalMigrationInfo ) validation_result = self._validate_storage_credential( - storage_credential, sp_migration.service_principal.prefix, read_only + storage_credential, sp_migration.permission_mapping.prefix, read_only ) return validation_result diff --git a/tests/integration/azure/test_azure_credentials.py b/tests/integration/azure/test_azure_credentials.py index 281707e673..80dc917a7d 100644 --- a/tests/integration/azure/test_azure_credentials.py +++ b/tests/integration/azure/test_azure_credentials.py @@ -11,10 +11,13 @@ from databricks.labs.ucx.azure.azure_credentials import ServicePrincipalMigration + + + @pytest.fixture def prepare_spn_migration_test(ws, debug_env, make_random): def inner(read_only=False) -> dict: - spark_conf = ws.clusters.get(cluster_id=debug_env["TEST_LEGACY_SPN_CLUSTER_ID"]).spark_conf + spark_conf = ws.clusters.get(debug_env["TEST_LEGACY_SPN_CLUSTER_ID"]).spark_conf application_id = spark_conf.get("fs.azure.account.oauth2.client.id") From 8f96530fc67f5af5fdce3748b1d16492ee6ab4a8 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Wed, 14 Feb 2024 18:33:36 -0800 Subject: [PATCH 42/76] 1. remove azure_ prefix from all service principal migratrion files 2. split out all storage credential management code out to a new class. And a subclass of it is created to facilitate integration test --- .../{azure_credentials.py => credentials.py} | 152 +++++++++--------- src/databricks/labs/ucx/cli.py | 2 +- ...ure_credentials.py => test_credentials.py} | 34 ++-- tests/integration/conftest.py | 25 +++ ...ure_credentials.py => test_credentials.py} | 2 +- 5 files changed, 124 insertions(+), 91 deletions(-) rename src/databricks/labs/ucx/azure/{azure_credentials.py => credentials.py} (87%) rename tests/integration/azure/{test_azure_credentials.py => test_credentials.py} (82%) rename tests/unit/azure/{test_azure_credentials.py => test_credentials.py} (99%) diff --git a/src/databricks/labs/ucx/azure/azure_credentials.py b/src/databricks/labs/ucx/azure/credentials.py similarity index 87% rename from src/databricks/labs/ucx/azure/azure_credentials.py rename to src/databricks/labs/ucx/azure/credentials.py index 13364d3e67..ff6af42804 100644 --- a/src/databricks/labs/ucx/azure/azure_credentials.py +++ b/src/databricks/labs/ucx/azure/credentials.py @@ -68,6 +68,75 @@ def from_storage_credential_validation( ) +class StorageCredentialManager: + def __init__(self, ws: WorkspaceClient): + self._ws = ws + + def list_storage_credentials(self) -> set[str]: + # list existed storage credentials that is using service principal, capture the service principal's application_id + application_ids = set() + + storage_credentials = self._ws.storage_credentials.list(max_results=0) + + for storage_credential in storage_credentials: + # only add service principal's application_id, ignore managed identity based storage_credential + if storage_credential.azure_service_principal: + application_ids.add(storage_credential.azure_service_principal.application_id) + + logger.info( + f"Found {len(application_ids)} distinct service principals already used in UC storage credentials" + ) + return application_ids + + + def create_storage_credential(self, sp_migration: ServicePrincipalMigrationInfo) -> StorageCredentialInfo: + # prepare the storage credential properties + name = sp_migration.permission_mapping.principal + azure_service_principal = AzureServicePrincipal( + directory_id=sp_migration.permission_mapping.directory_id, + application_id=sp_migration.permission_mapping.client_id, + client_secret=sp_migration.client_secret, + ) + comment = f"Created by UCX during migration to UC using Azure Service Principal: {sp_migration.permission_mapping.principal}" + read_only = False + if sp_migration.permission_mapping.privilege == Privilege.READ_FILES.value: + read_only = True + # create the storage credential + return self._ws.storage_credentials.create( + name, azure_service_principal=azure_service_principal, comment=comment, read_only=read_only + ) + + + def validate_storage_credential( + self, storage_credential, sp_migration: ServicePrincipalMigrationInfo + ) -> StorageCredentialValidationResult: + read_only = False + if sp_migration.permission_mapping.privilege == Privilege.READ_FILES.value: + read_only = True + # storage_credential validation creates a temp UC external location, which cannot overlap with + # existing UC external locations. So add a sub folder to the validation location just in case + try: + validation = self._ws.storage_credentials.validate( + storage_credential_name=storage_credential.name, url=sp_migration.permission_mapping.prefix, read_only=read_only + ) + return StorageCredentialValidationResult.from_storage_credential_validation(storage_credential, validation) + except InvalidParameterValue: + logger.warning( + "There is an existing external location overlaps with the prefix that is mapped to the service principal and used for validating the migrated storage credential. Skip the validation" + ) + return StorageCredentialValidationResult.from_storage_credential_validation( + storage_credential, + ValidateStorageCredentialResponse( + is_dir=None, + results=[ + ValidationResult( + message="The validation is skipped because an existing external location overlaps with the location used for validation." + ) + ], + ), + ) + + class ServicePrincipalMigration: def __init__( @@ -76,14 +145,14 @@ def __init__( ws: WorkspaceClient, azure_resource_permissions: AzureResourcePermissions, azure_sp_crawler: AzureServicePrincipalCrawler, - integration_test_flag="", + storage_credential_manager: StorageCredentialManager ): self._output_file = "azure_service_principal_migration_result.csv" self._installation = installation self._ws = ws self._azure_resource_permissions = azure_resource_permissions self._azure_sp_crawler = azure_sp_crawler - self._integration_test_flag = integration_test_flag + self._storage_credential_manager = storage_credential_manager @classmethod def for_cli(cls, ws: WorkspaceClient, prompts: Prompts, product='ucx'): @@ -107,33 +176,10 @@ def for_cli(cls, ws: WorkspaceClient, prompts: Prompts, product='ucx'): azure_resource_permissions = AzureResourcePermissions(installation, ws, azurerm, locations) azure_sp_crawler = AzureServicePrincipalCrawler(ws, sql_backend, config.inventory_database) - return cls(installation, ws, azure_resource_permissions, azure_sp_crawler) + storage_credential_manager = StorageCredentialManager(ws) - def _list_storage_credentials(self) -> set[str]: - # list existed storage credentials that is using service principal, capture the service principal's application_id - storage_credential_app_ids = set() - - storage_credentials = self._ws.storage_credentials.list(max_results=0) + return cls(installation, ws, azure_resource_permissions, azure_sp_crawler, storage_credential_manager) - # if we are doing integration test - if self._integration_test_flag: - for storage_credential in storage_credentials: - if not storage_credential.azure_service_principal: - continue - if self._integration_test_flag == storage_credential.name: - # return the storage credential created during integration test - return {storage_credential.azure_service_principal.application_id} - # return no storage credential if there is none created during integration test - return {} - - for storage_credential in storage_credentials: - # only add service principal's application_id, ignore managed identity based storage_credential - if storage_credential.azure_service_principal: - storage_credential_app_ids.add(storage_credential.azure_service_principal.application_id) - logger.info( - f"Found {len(storage_credential_app_ids)} distinct service principals already used in UC storage credentials" - ) - return storage_credential_app_ids def _read_databricks_secret(self, scope: str, key: str, application_id: str) -> str | None: try: @@ -216,7 +262,7 @@ def _generate_migration_list(self) -> list[ServicePrincipalMigrationInfo]: # load sp list from azure_storage_account_info.csv sp_list = self._azure_resource_permissions.load() # list existed storage credentials - sc_set = self._list_storage_credentials() + sc_set = self._storage_credential_manager.list_storage_credentials() # check if the sp is already used in UC storage credential filtered_sp_list = [sp for sp in sp_list if sp.client_id not in sc_set] # fetch sp client_secret if any @@ -225,53 +271,6 @@ def _generate_migration_list(self) -> list[ServicePrincipalMigrationInfo]: self._print_action_plan(sp_list_with_secret) return sp_list_with_secret - def _create_storage_credential(self, sp_migration: ServicePrincipalMigrationInfo): - # prepare the storage credential properties - name = sp_migration.permission_mapping.principal - azure_service_principal = AzureServicePrincipal( - directory_id=sp_migration.permission_mapping.directory_id, - application_id=sp_migration.permission_mapping.client_id, - client_secret=sp_migration.client_secret, - ) - comment = f"Created by UCX during migration to UC using Azure Service Principal: {sp_migration.permission_mapping.principal}" - read_only = False - if sp_migration.permission_mapping.privilege == Privilege.READ_FILES.value: - read_only = True - # create the storage credential - storage_credential = self._ws.storage_credentials.create( - name=name, azure_service_principal=azure_service_principal, comment=comment, read_only=read_only - ) - - validation_result = self._validate_storage_credential( - storage_credential, sp_migration.permission_mapping.prefix, read_only - ) - return validation_result - - def _validate_storage_credential( - self, storage_credential, location: str, read_only: bool - ) -> StorageCredentialValidationResult: - # storage_credential validation creates a temp UC external location, which cannot overlap with - # existing UC external locations. So add a sub folder to the validation location just in case - try: - validation = self._ws.storage_credentials.validate( - storage_credential_name=storage_credential.name, url=location, read_only=read_only - ) - return StorageCredentialValidationResult.from_storage_credential_validation(storage_credential, validation) - except InvalidParameterValue: - logger.warning( - "There is an existing external location overlaps with the prefix that is mapped to the service principal and used for validating the migrated storage credential. Skip the validation" - ) - return StorageCredentialValidationResult.from_storage_credential_validation( - storage_credential, - ValidateStorageCredentialResponse( - is_dir=None, - results=[ - ValidationResult( - message="The validation is skipped because an existing external location overlaps with the location used for validation." - ) - ], - ), - ) def run(self, prompts: Prompts): @@ -285,7 +284,8 @@ def run(self, prompts: Prompts): execution_result = [] for sp in sp_list_with_secret: - execution_result.append(self._create_storage_credential(sp)) + storage_credential = self._storage_credential_manager.create_storage_credential(sp) + execution_result.append(self._storage_credential_manager.validate_storage_credential(storage_credential, sp)) results_file = self._installation.save(execution_result, filename=self._output_file) logger.info("Completed migration from Azure Service Principal migrated to UC Storage credentials") diff --git a/src/databricks/labs/ucx/cli.py b/src/databricks/labs/ucx/cli.py index dc671f09ee..ae3da50d1c 100644 --- a/src/databricks/labs/ucx/cli.py +++ b/src/databricks/labs/ucx/cli.py @@ -13,7 +13,7 @@ from databricks.labs.ucx.account import AccountWorkspaces, WorkspaceInfo from databricks.labs.ucx.assessment.aws import AWSResourcePermissions from databricks.labs.ucx.azure.access import AzureResourcePermissions -from databricks.labs.ucx.azure.azure_credentials import ServicePrincipalMigration +from databricks.labs.ucx.azure.credentials import ServicePrincipalMigration from databricks.labs.ucx.config import WorkspaceConfig from databricks.labs.ucx.framework.crawlers import StatementExecutionBackend from databricks.labs.ucx.hive_metastore import ExternalLocations, TablesCrawler diff --git a/tests/integration/azure/test_azure_credentials.py b/tests/integration/azure/test_credentials.py similarity index 82% rename from tests/integration/azure/test_azure_credentials.py rename to tests/integration/azure/test_credentials.py index 80dc917a7d..321aad9466 100644 --- a/tests/integration/azure/test_azure_credentials.py +++ b/tests/integration/azure/test_credentials.py @@ -1,4 +1,5 @@ import base64 +import logging import re from unittest.mock import MagicMock, patch @@ -8,10 +9,8 @@ from databricks.labs.ucx.assessment.azure import AzureServicePrincipalInfo from databricks.labs.ucx.assessment.crawlers import _SECRET_PATTERN from databricks.labs.ucx.azure.access import StoragePermissionMapping -from databricks.labs.ucx.azure.azure_credentials import ServicePrincipalMigration - - - +from databricks.labs.ucx.azure.credentials import ServicePrincipalMigration +from tests.integration.conftest import StaticStorageCredentialManager @pytest.fixture @@ -78,13 +77,13 @@ def inner(read_only=False) -> dict: @pytest.fixture def execute_migration(ws): - def inner(variables: dict, integration_test_flag: str) -> ServicePrincipalMigration: + def inner(variables: dict, credentials: list[str]) -> ServicePrincipalMigration: spn_migration = ServicePrincipalMigration( variables["installation"], ws, variables["azure_resource_permissions"], variables["azure_sp_crawler"], - integration_test_flag=integration_test_flag, + StaticStorageCredentialManager(ws, credentials) ) spn_migration.run( MockPrompts({"Above Azure Service Principals will be migrated to UC storage credentials *": "Yes"}) @@ -95,8 +94,9 @@ def inner(variables: dict, integration_test_flag: str) -> ServicePrincipalMigrat def test_spn_migration_existed_storage_credential( - execute_migration, make_storage_credential_from_spn, prepare_spn_migration_test + caplog, execute_migration, make_storage_credential_from_spn, prepare_spn_migration_test ): + caplog.set_level(logging.INFO) variables = prepare_spn_migration_test(read_only=False) # create a storage credential for this test @@ -107,11 +107,19 @@ def test_spn_migration_existed_storage_credential( directory_id=variables["directory_id"], ) - with patch("databricks.labs.ucx.azure.azure_credentials.ServicePrincipalMigration._create_storage_credential") as create_storage_credential: - # test that the spn migration will be skipped due to above storage credential is existed - execute_migration(variables, integration_test_flag=variables["storage_credential_name"]) - # because storage_credential is existing, no spn should be migrated - create_storage_credential.assert_not_called() + # test that the spn migration will be skipped due to above storage credential is existed + execute_migration(variables, [variables["storage_credential_name"]]) + + # assert no action plan is logged since no spn migrated + patterns = [ + "Service Principal name:", + "application_id:", + "privilege", + "on location" + ] + for record in caplog.records: + if all(pattern in record.message for pattern in patterns): + assert False, "Migration action plan should not be logged when no service principal will be migrated" @pytest.mark.parametrize("read_only", [False, True]) @@ -119,7 +127,7 @@ def test_spn_migration(ws, execute_migration, prepare_spn_migration_test, read_o variables = prepare_spn_migration_test(read_only) try: - spn_migration = execute_migration(variables, integration_test_flag="lets_migrate_the_spn") + spn_migration = execute_migration(variables, ["lets_migrate_the_spn"]) storage_credential = ws.storage_credentials.get(variables["storage_credential_name"]) assert storage_credential is not None diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 4c8156c086..1050d8f530 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -10,6 +10,7 @@ from databricks.labs.ucx.__about__ import __version__ from databricks.labs.ucx.account import WorkspaceInfo +from databricks.labs.ucx.azure.credentials import StorageCredentialManager from databricks.labs.ucx.framework.crawlers import SqlBackend from databricks.labs.ucx.hive_metastore import TablesCrawler from databricks.labs.ucx.hive_metastore.mapping import Rule, TableMapping @@ -154,3 +155,27 @@ def load(self): def save(self, tables: TablesCrawler, workspace_info: WorkspaceInfo) -> str: raise RuntimeWarning("not available") + + +class StaticStorageCredentialManager(StorageCredentialManager): + # During integration test, we only want to list storage_credentials that are created during the test. + # So we provide a credential name list so the test can ignore credentials that are not in the list. + def __init__(self, ws: WorkspaceClient, credential_names=[]): + super().__init__(ws) + self._credential_names = credential_names + + def list_storage_credentials(self) -> set[str]: + application_ids = set() + + storage_credentials = self._ws.storage_credentials.list(max_results=0) + + for storage_credential in storage_credentials: + if not storage_credential.azure_service_principal: + continue + if storage_credential.name in self._credential_names: + application_ids.add(storage_credential.azure_service_principal.application_id) + + logger.info( + f"Found {len(application_ids)} distinct service principals already used in storage credentials during integration test" + ) + return application_ids diff --git a/tests/unit/azure/test_azure_credentials.py b/tests/unit/azure/test_credentials.py similarity index 99% rename from tests/unit/azure/test_azure_credentials.py rename to tests/unit/azure/test_credentials.py index b247b85cc9..39f42d7024 100644 --- a/tests/unit/azure/test_azure_credentials.py +++ b/tests/unit/azure/test_credentials.py @@ -24,7 +24,7 @@ AzureServicePrincipalInfo, ) from databricks.labs.ucx.azure.access import StoragePermissionMapping -from databricks.labs.ucx.azure.azure_credentials import ( +from databricks.labs.ucx.azure.credentials import ( ServicePrincipalMigration, ServicePrincipalMigrationInfo, ) From f4ec60d44ef2db40f1729aa6b3f98896ea73d7b2 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Wed, 14 Feb 2024 22:11:14 -0800 Subject: [PATCH 43/76] improve azure/credentials.py for testability --- src/databricks/labs/ucx/azure/credentials.py | 12 ++- tests/integration/azure/test_credentials.py | 84 +++++++++----------- tests/integration/conftest.py | 41 +++++++++- 3 files changed, 86 insertions(+), 51 deletions(-) diff --git a/src/databricks/labs/ucx/azure/credentials.py b/src/databricks/labs/ucx/azure/credentials.py index ff6af42804..5c85cc0d6e 100644 --- a/src/databricks/labs/ucx/azure/credentials.py +++ b/src/databricks/labs/ucx/azure/credentials.py @@ -272,7 +272,11 @@ def _generate_migration_list(self) -> list[ServicePrincipalMigrationInfo]: return sp_list_with_secret - def run(self, prompts: Prompts): + def save(self, migration_results: list[StorageCredentialValidationResult]) -> str: + return self._installation.save(migration_results, filename=self._output_file) + + + def run(self, prompts: Prompts) -> list[StorageCredentialValidationResult]: sp_list_with_secret = self._generate_migration_list() @@ -280,17 +284,17 @@ def run(self, prompts: Prompts): "Above Azure Service Principals will be migrated to UC storage credentials, please review and confirm." ) if plan_confirmed is not True: - return + return [] execution_result = [] for sp in sp_list_with_secret: storage_credential = self._storage_credential_manager.create_storage_credential(sp) execution_result.append(self._storage_credential_manager.validate_storage_credential(storage_credential, sp)) - results_file = self._installation.save(execution_result, filename=self._output_file) + results_file = self.save(execution_result) logger.info("Completed migration from Azure Service Principal migrated to UC Storage credentials") print( f"Completed migration from Azure Service Principal migrated to UC Storage credentials. " f"Please check {results_file} for validation results" ) - return + return execution_result diff --git a/tests/integration/azure/test_credentials.py b/tests/integration/azure/test_credentials.py index 321aad9466..f1e755a1c9 100644 --- a/tests/integration/azure/test_credentials.py +++ b/tests/integration/azure/test_credentials.py @@ -1,20 +1,24 @@ import base64 import logging import re -from unittest.mock import MagicMock, patch import pytest +from databricks.labs.blueprint.installation import Installation from databricks.labs.blueprint.tui import MockPrompts from databricks.labs.ucx.assessment.azure import AzureServicePrincipalInfo from databricks.labs.ucx.assessment.crawlers import _SECRET_PATTERN from databricks.labs.ucx.azure.access import StoragePermissionMapping from databricks.labs.ucx.azure.credentials import ServicePrincipalMigration -from tests.integration.conftest import StaticStorageCredentialManager +from databricks.labs.ucx.azure.resources import AzureResources +from databricks.labs.ucx.hive_metastore import ExternalLocations +from tests.integration.conftest import StaticStorageCredentialManager, \ + StaticAzureResourcePermissions, StaticAzureServicePrincipalCrawler, \ + StaticServicePrincipalMigration @pytest.fixture -def prepare_spn_migration_test(ws, debug_env, make_random): +def prepare_spn_migration_test(ws, debug_env, make_random, sql_backend): def inner(read_only=False) -> dict: spark_conf = ws.clusters.get(debug_env["TEST_LEGACY_SPN_CLUSTER_ID"]).spark_conf @@ -35,10 +39,15 @@ def inner(read_only=False) -> dict: end_point = spark_conf.get("fs.azure.account.oauth2.client.endpoint") directory_id = end_point.split("/")[3] - name = f"testinfra_storageaccess_{make_random(4).lower()}" + random = make_random(6).lower() + name = f"testinfra_storageaccess_{random}" - azure_resource_permissions = MagicMock() - azure_resource_permissions.load.return_value = [ + + installation = Installation(ws, 'ucx') + azurerm = AzureResources(ws) + locations = ExternalLocations(ws, sql_backend, "dont_need_a_schema") + + azure_resource_permissions = StaticAzureResourcePermissions(installation, ws, azurerm, locations, [ StoragePermissionMapping( prefix="abfss://things@labsazurethings.dfs.core.windows.net/avoid_ext_loc_overlap", client_id=application_id, @@ -46,10 +55,9 @@ def inner(read_only=False) -> dict: privilege="READ_FILES" if read_only else "WRITE_FILES", directory_id=directory_id, ) - ] + ]) - azure_sp_crawler = MagicMock() - azure_sp_crawler.snapshot.return_value = [ + azure_sp_crawler = StaticAzureServicePrincipalCrawler(ws, sql_backend, "dont_need_a_schema", [ AzureServicePrincipalInfo( application_id=application_id, secret_scope=secret_scope, @@ -57,10 +65,7 @@ def inner(read_only=False) -> dict: tenant_id="test", storage_account="test", ) - ] - - installation = MagicMock() - installation.save.return_value = "azure_service_principal_migration_result.csv" + ]) return { "storage_credential_name": name, @@ -78,17 +83,16 @@ def inner(read_only=False) -> dict: @pytest.fixture def execute_migration(ws): def inner(variables: dict, credentials: list[str]) -> ServicePrincipalMigration: - spn_migration = ServicePrincipalMigration( + spn_migration = StaticServicePrincipalMigration( variables["installation"], ws, variables["azure_resource_permissions"], variables["azure_sp_crawler"], StaticStorageCredentialManager(ws, credentials) ) - spn_migration.run( + return spn_migration.run( MockPrompts({"Above Azure Service Principals will be migrated to UC storage credentials *": "Yes"}) ) - return spn_migration return inner @@ -108,18 +112,10 @@ def test_spn_migration_existed_storage_credential( ) # test that the spn migration will be skipped due to above storage credential is existed - execute_migration(variables, [variables["storage_credential_name"]]) + migration_result = execute_migration(variables, [variables["storage_credential_name"]]) - # assert no action plan is logged since no spn migrated - patterns = [ - "Service Principal name:", - "application_id:", - "privilege", - "on location" - ] - for record in caplog.records: - if all(pattern in record.message for pattern in patterns): - assert False, "Migration action plan should not be logged when no service principal will be migrated" + # assert no spn migrated since migration_result will be empty + assert not migration_result @pytest.mark.parametrize("read_only", [False, True]) @@ -127,28 +123,26 @@ def test_spn_migration(ws, execute_migration, prepare_spn_migration_test, read_o variables = prepare_spn_migration_test(read_only) try: - spn_migration = execute_migration(variables, ["lets_migrate_the_spn"]) + migration_results = execute_migration(variables, ["lets_migrate_the_spn"]) storage_credential = ws.storage_credentials.get(variables["storage_credential_name"]) assert storage_credential is not None assert storage_credential.read_only is read_only - validation_result = spn_migration._installation.save.call_args.args[0][0] - if read_only: - # We only assert that write validation are not performed for read only storage credential here. - # In real life, the READ validation for read only storage credential may fail if there is no file, - # but that is fine, as the storage credential is created, and we just cannot validate it until it's really used. - assert not any( - (res.operation is not None) and ("WRITE" in res.operation.value) for res in validation_result.results - ) - else: - assert any( - (res.operation is not None) and ("WRITE" in res.operation.value) and ("PASS" in res.result.value) - for res in validation_result.results - ) - assert any( - (res.operation is not None) and ("DELETE" in res.operation.value) and ("PASS" in res.result.value) - for res in validation_result.results - ) + for res in migration_results[0].results: + if res.operation is None: + #TODO: file a ticket to SDK team, PATH_EXISTS and HIERARCHICAL_NAMESPACE_ENABLED + # should be added to the validation operations. They are None right now. + # Once it's fixed, the None check here can be removed + continue + if read_only: + if res.operation.value in ("WRITE", "DELETE"): + # We only assert that write validation are not performed for read only storage credential here. + # In real life, the READ validation for read only storage credential may fail if there is no file, + # but that is fine, as the storage credential is created, and we just cannot validate it until it's really used. + assert False, "WRITE operation should not be checked for read-only storage credential" + if not read_only: + if res.result.value == "FAIL": + assert False, f"{res.operation.value} operation is failed while validating storage credential" finally: ws.storage_credentials.delete(name=variables["storage_credential_name"], force=True) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 1050d8f530..b6515ab633 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -10,9 +10,14 @@ from databricks.labs.ucx.__about__ import __version__ from databricks.labs.ucx.account import WorkspaceInfo -from databricks.labs.ucx.azure.credentials import StorageCredentialManager +from databricks.labs.ucx.assessment.azure import AzureServicePrincipalCrawler, \ + AzureServicePrincipalInfo +from databricks.labs.ucx.azure.access import AzureResourcePermissions, StoragePermissionMapping +from databricks.labs.ucx.azure.credentials import StorageCredentialManager, \ + ServicePrincipalMigration, StorageCredentialValidationResult +from databricks.labs.ucx.azure.resources import AzureResources from databricks.labs.ucx.framework.crawlers import SqlBackend -from databricks.labs.ucx.hive_metastore import TablesCrawler +from databricks.labs.ucx.hive_metastore import TablesCrawler, ExternalLocations from databricks.labs.ucx.hive_metastore.mapping import Rule, TableMapping from databricks.labs.ucx.hive_metastore.tables import Table from databricks.labs.ucx.hive_metastore.udfs import Udf, UdfsCrawler @@ -157,6 +162,21 @@ def save(self, tables: TablesCrawler, workspace_info: WorkspaceInfo) -> str: raise RuntimeWarning("not available") +class StaticServicePrincipalMigration(ServicePrincipalMigration): + def __init__( + self, + installation: Installation, + ws: WorkspaceClient, + azure_resource_permissions: AzureResourcePermissions, + azure_sp_crawler: AzureServicePrincipalCrawler, + storage_credential_manager: StorageCredentialManager + ): + super().__init__(installation, ws, azure_resource_permissions, azure_sp_crawler, storage_credential_manager) + + def save(self, migration_results: list[StorageCredentialValidationResult]) -> str: + return "azure_service_principal_migration_result.csv" + + class StaticStorageCredentialManager(StorageCredentialManager): # During integration test, we only want to list storage_credentials that are created during the test. # So we provide a credential name list so the test can ignore credentials that are not in the list. @@ -179,3 +199,20 @@ def list_storage_credentials(self) -> set[str]: f"Found {len(application_ids)} distinct service principals already used in storage credentials during integration test" ) return application_ids + + +class StaticAzureServicePrincipalCrawler(AzureServicePrincipalCrawler): + def __init__(self, ws: WorkspaceClient, sbe: SqlBackend, schema: str, spn_infos: list[AzureServicePrincipalInfo]): + super().__init__(ws, sbe, schema) + self.spn_infos = spn_infos + + def snapshot(self) -> list[AzureServicePrincipalInfo]: + return self.spn_infos + +class StaticAzureResourcePermissions(AzureResourcePermissions): + def __init__(self, installation: Installation, ws: WorkspaceClient, azurerm: AzureResources, lc: ExternalLocations, permission_mappings: list[StoragePermissionMapping]): + super().__init__(installation, ws, azurerm, lc) + self._permission_mappings = permission_mappings + + def load(self) -> list[StoragePermissionMapping]: + return self._permission_mappings \ No newline at end of file From 1eea9bcc7ba0a5bfe762e97568428343476926b1 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Wed, 14 Feb 2024 23:05:07 -0800 Subject: [PATCH 44/76] imporve readability of integration test integration/azure/test_credentials.py --- src/databricks/labs/ucx/azure/credentials.py | 1 - tests/integration/azure/test_credentials.py | 114 +++++++++---------- 2 files changed, 55 insertions(+), 60 deletions(-) diff --git a/src/databricks/labs/ucx/azure/credentials.py b/src/databricks/labs/ucx/azure/credentials.py index 5c85cc0d6e..daae49479c 100644 --- a/src/databricks/labs/ucx/azure/credentials.py +++ b/src/databricks/labs/ucx/azure/credentials.py @@ -1,6 +1,5 @@ import base64 import logging -from collections import namedtuple from dataclasses import dataclass from databricks.labs.blueprint.installation import Installation diff --git a/tests/integration/azure/test_credentials.py b/tests/integration/azure/test_credentials.py index f1e755a1c9..2d9b7d5256 100644 --- a/tests/integration/azure/test_credentials.py +++ b/tests/integration/azure/test_credentials.py @@ -1,6 +1,7 @@ import base64 import logging import re +from dataclasses import dataclass import pytest from databricks.labs.blueprint.installation import Installation @@ -9,7 +10,8 @@ from databricks.labs.ucx.assessment.azure import AzureServicePrincipalInfo from databricks.labs.ucx.assessment.crawlers import _SECRET_PATTERN from databricks.labs.ucx.azure.access import StoragePermissionMapping -from databricks.labs.ucx.azure.credentials import ServicePrincipalMigration +from databricks.labs.ucx.azure.credentials import ServicePrincipalMigration, \ + StorageCredentialValidationResult from databricks.labs.ucx.azure.resources import AzureResources from databricks.labs.ucx.hive_metastore import ExternalLocations from tests.integration.conftest import StaticStorageCredentialManager, \ @@ -17,32 +19,46 @@ StaticServicePrincipalMigration +@dataclass +class MigrationTestInfo: + credential_name: str + application_id: str + directory_id: str + secret_scope: str + secret_key: str + client_secret: str + + @pytest.fixture -def prepare_spn_migration_test(ws, debug_env, make_random, sql_backend): - def inner(read_only=False) -> dict: - spark_conf = ws.clusters.get(debug_env["TEST_LEGACY_SPN_CLUSTER_ID"]).spark_conf +def extract_test_info(ws, debug_env, make_random): + random = make_random(6).lower() + credential_name = f"testinfra_storageaccess_{random}" - application_id = spark_conf.get("fs.azure.account.oauth2.client.id") + spark_conf = ws.clusters.get(debug_env["TEST_LEGACY_SPN_CLUSTER_ID"]).spark_conf - secret_matched = re.search(_SECRET_PATTERN, spark_conf.get("fs.azure.account.oauth2.client.secret")) - if secret_matched: - secret_scope, secret_key = ( - secret_matched.group(1).split("/")[1], - secret_matched.group(1).split("/")[2], - ) - assert secret_scope is not None - assert secret_key is not None + application_id = spark_conf.get("fs.azure.account.oauth2.client.id") - secret_response = ws.secrets.get_secret(secret_scope, secret_key) - client_secret = base64.b64decode(secret_response.value).decode("utf-8") + end_point = spark_conf.get("fs.azure.account.oauth2.client.endpoint") + directory_id = end_point.split("/")[3] - end_point = spark_conf.get("fs.azure.account.oauth2.client.endpoint") - directory_id = end_point.split("/")[3] + secret_matched = re.search(_SECRET_PATTERN, spark_conf.get("fs.azure.account.oauth2.client.secret")) + if secret_matched: + secret_scope, secret_key = ( + secret_matched.group(1).split("/")[1], + secret_matched.group(1).split("/")[2], + ) + assert secret_scope is not None + assert secret_key is not None - random = make_random(6).lower() - name = f"testinfra_storageaccess_{random}" + secret_response = ws.secrets.get_secret(secret_scope, secret_key) + client_secret = base64.b64decode(secret_response.value).decode("utf-8") + return MigrationTestInfo(credential_name, application_id, directory_id, secret_scope, secret_key, client_secret) + +@pytest.fixture +def run_migration(ws, sql_backend) -> ServicePrincipalMigration: + def inner(test_info: MigrationTestInfo, credentials: list[str], read_only=False) -> list[StorageCredentialValidationResult]: installation = Installation(ws, 'ucx') azurerm = AzureResources(ws) locations = ExternalLocations(ws, sql_backend, "dont_need_a_schema") @@ -50,44 +66,28 @@ def inner(read_only=False) -> dict: azure_resource_permissions = StaticAzureResourcePermissions(installation, ws, azurerm, locations, [ StoragePermissionMapping( prefix="abfss://things@labsazurethings.dfs.core.windows.net/avoid_ext_loc_overlap", - client_id=application_id, - principal=name, + client_id=test_info.application_id, + principal=test_info.credential_name, privilege="READ_FILES" if read_only else "WRITE_FILES", - directory_id=directory_id, + directory_id=test_info.directory_id ) ]) azure_sp_crawler = StaticAzureServicePrincipalCrawler(ws, sql_backend, "dont_need_a_schema", [ AzureServicePrincipalInfo( - application_id=application_id, - secret_scope=secret_scope, - secret_key=secret_key, + application_id=test_info.application_id, + secret_scope=test_info.secret_scope, + secret_key=test_info.secret_key, tenant_id="test", storage_account="test", ) ]) - return { - "storage_credential_name": name, - "application_id": application_id, - "directory_id": directory_id, - "client_secret": client_secret, - "azure_resource_permissions": azure_resource_permissions, - "azure_sp_crawler": azure_sp_crawler, - "installation": installation, - } - - return inner - - -@pytest.fixture -def execute_migration(ws): - def inner(variables: dict, credentials: list[str]) -> ServicePrincipalMigration: spn_migration = StaticServicePrincipalMigration( - variables["installation"], + installation, ws, - variables["azure_resource_permissions"], - variables["azure_sp_crawler"], + azure_resource_permissions, + azure_sp_crawler, StaticStorageCredentialManager(ws, credentials) ) return spn_migration.run( @@ -98,37 +98,33 @@ def inner(variables: dict, credentials: list[str]) -> ServicePrincipalMigration: def test_spn_migration_existed_storage_credential( - caplog, execute_migration, make_storage_credential_from_spn, prepare_spn_migration_test + extract_test_info, make_storage_credential_from_spn, run_migration ): - caplog.set_level(logging.INFO) - variables = prepare_spn_migration_test(read_only=False) - # create a storage credential for this test make_storage_credential_from_spn( - name=variables["storage_credential_name"], - application_id=variables["application_id"], - client_secret=variables["client_secret"], - directory_id=variables["directory_id"], + name=extract_test_info.credential_name, + application_id=extract_test_info.application_id, + client_secret=extract_test_info.client_secret, + directory_id=extract_test_info.directory_id ) # test that the spn migration will be skipped due to above storage credential is existed - migration_result = execute_migration(variables, [variables["storage_credential_name"]]) + migration_result = run_migration(extract_test_info, [extract_test_info.credential_name]) # assert no spn migrated since migration_result will be empty assert not migration_result @pytest.mark.parametrize("read_only", [False, True]) -def test_spn_migration(ws, execute_migration, prepare_spn_migration_test, read_only): - variables = prepare_spn_migration_test(read_only) - +def test_spn_migration(ws, extract_test_info, run_migration, read_only): try: - migration_results = execute_migration(variables, ["lets_migrate_the_spn"]) + migration_results = run_migration(extract_test_info, ["lets_migrate_the_spn"], read_only) - storage_credential = ws.storage_credentials.get(variables["storage_credential_name"]) + storage_credential = ws.storage_credentials.get(extract_test_info.credential_name) assert storage_credential is not None assert storage_credential.read_only is read_only + # assert the storage credential validation results for res in migration_results[0].results: if res.operation is None: #TODO: file a ticket to SDK team, PATH_EXISTS and HIERARCHICAL_NAMESPACE_ENABLED @@ -145,4 +141,4 @@ def test_spn_migration(ws, execute_migration, prepare_spn_migration_test, read_o if res.result.value == "FAIL": assert False, f"{res.operation.value} operation is failed while validating storage credential" finally: - ws.storage_credentials.delete(name=variables["storage_credential_name"], force=True) + ws.storage_credentials.delete(extract_test_info.credential_name, force=True) From 09d66a6a80a5217c4d099c2ace8e5091c6ea638c Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Thu, 15 Feb 2024 10:15:47 -0800 Subject: [PATCH 45/76] shorten the vaiables name; use proper `| None` in dataclass type definition; fix a wrong return type hint --- src/databricks/labs/ucx/azure/access.py | 4 +- src/databricks/labs/ucx/azure/credentials.py | 96 +++++++++----------- tests/integration/azure/test_credentials.py | 82 +++++++++-------- tests/integration/conftest.py | 44 ++++++--- tests/unit/azure/test_credentials.py | 4 +- 5 files changed, 122 insertions(+), 108 deletions(-) diff --git a/src/databricks/labs/ucx/azure/access.py b/src/databricks/labs/ucx/azure/access.py index c2c4b4a94d..113f58ede0 100644 --- a/src/databricks/labs/ucx/azure/access.py +++ b/src/databricks/labs/ucx/azure/access.py @@ -105,7 +105,5 @@ def _get_storage_accounts(self) -> list[str]: return storage_accounts def load(self) -> list[StoragePermissionMapping]: - """ - Load StoragePermissionMapping info from azure_storage_account_info.csv - """ + """Load StoragePermissionMapping info from azure_storage_account_info.csv""" return self._installation.load(list[StoragePermissionMapping], filename=self._filename) diff --git a/src/databricks/labs/ucx/azure/credentials.py b/src/databricks/labs/ucx/azure/credentials.py index daae49479c..0c0cbc013b 100644 --- a/src/databricks/labs/ucx/azure/credentials.py +++ b/src/databricks/labs/ucx/azure/credentials.py @@ -37,11 +37,11 @@ class ServicePrincipalMigrationInfo: @dataclass class StorageCredentialValidationResult: - name: str - azure_service_principal: AzureServicePrincipal - created_by: str - read_only: bool - results: list[ValidationResult] + name: str | None = None + azure_service_principal: AzureServicePrincipal | None = None + created_by: str | None = None + read_only: bool | None = None + results: list[ValidationResult] | None = None @classmethod def from_storage_credential_validation( @@ -51,19 +51,18 @@ def from_storage_credential_validation( # Guard rail to explicitly remove the client_secret, just in case the azure_service_principal # in StorageCredentialInfo returned by WorkspaceClient.storage_credentials.create exposes the # client_secret due to potential bugs in the future. - service_principal = AzureServicePrincipal(storage_credential.azure_service_principal.directory_id, - storage_credential.azure_service_principal.application_id, - "" + service_principal = AzureServicePrincipal( + storage_credential.azure_service_principal.directory_id, + storage_credential.azure_service_principal.application_id, + "", ) - else: - service_principal = AzureServicePrincipal("", "", "") return cls( - name=storage_credential.name or "", + name=storage_credential.name, azure_service_principal=service_principal, - created_by=storage_credential.created_by or "", - read_only=storage_credential.read_only or False, - results=validation.results or [], + created_by=storage_credential.created_by, + read_only=storage_credential.read_only, + results=validation.results, ) @@ -82,16 +81,13 @@ def list_storage_credentials(self) -> set[str]: if storage_credential.azure_service_principal: application_ids.add(storage_credential.azure_service_principal.application_id) - logger.info( - f"Found {len(application_ids)} distinct service principals already used in UC storage credentials" - ) + logger.info(f"Found {len(application_ids)} distinct service principals already used in UC storage credentials") return application_ids - def create_storage_credential(self, sp_migration: ServicePrincipalMigrationInfo) -> StorageCredentialInfo: # prepare the storage credential properties name = sp_migration.permission_mapping.principal - azure_service_principal = AzureServicePrincipal( + service_principal = AzureServicePrincipal( directory_id=sp_migration.permission_mapping.directory_id, application_id=sp_migration.permission_mapping.client_id, client_secret=sp_migration.client_secret, @@ -102,12 +98,11 @@ def create_storage_credential(self, sp_migration: ServicePrincipalMigrationInfo) read_only = True # create the storage credential return self._ws.storage_credentials.create( - name, azure_service_principal=azure_service_principal, comment=comment, read_only=read_only + name, azure_service_principal=service_principal, comment=comment, read_only=read_only ) - def validate_storage_credential( - self, storage_credential, sp_migration: ServicePrincipalMigrationInfo + self, storage_credential, sp_migration: ServicePrincipalMigrationInfo ) -> StorageCredentialValidationResult: read_only = False if sp_migration.permission_mapping.privilege == Privilege.READ_FILES.value: @@ -116,7 +111,9 @@ def validate_storage_credential( # existing UC external locations. So add a sub folder to the validation location just in case try: validation = self._ws.storage_credentials.validate( - storage_credential_name=storage_credential.name, url=sp_migration.permission_mapping.prefix, read_only=read_only + storage_credential_name=storage_credential.name, + url=sp_migration.permission_mapping.prefix, + read_only=read_only, ) return StorageCredentialValidationResult.from_storage_credential_validation(storage_credential, validation) except InvalidParameterValue: @@ -142,29 +139,29 @@ def __init__( self, installation: Installation, ws: WorkspaceClient, - azure_resource_permissions: AzureResourcePermissions, - azure_sp_crawler: AzureServicePrincipalCrawler, - storage_credential_manager: StorageCredentialManager + resource_permissions: AzureResourcePermissions, + sp_crawler: AzureServicePrincipalCrawler, + storage_credential_manager: StorageCredentialManager, ): self._output_file = "azure_service_principal_migration_result.csv" self._installation = installation self._ws = ws - self._azure_resource_permissions = azure_resource_permissions - self._azure_sp_crawler = azure_sp_crawler + self._resource_permissions = resource_permissions + self._sp_crawler = sp_crawler self._storage_credential_manager = storage_credential_manager @classmethod def for_cli(cls, ws: WorkspaceClient, prompts: Prompts, product='ucx'): if not ws.config.is_azure: logger.error("Workspace is not on azure, please run this command on azure databricks workspaces.") - return None + raise SystemExit() - csv_confirmed = prompts.confirm( + msg = ( "Have you reviewed the azure_storage_account_info.csv " "and confirm listed service principals are allowed to be checked for migration?" ) - if csv_confirmed is not True: - return None + if not prompts.confirm(msg): + raise SystemExit() installation = Installation.current(ws, product) config = installation.load(WorkspaceConfig) @@ -172,13 +169,12 @@ def for_cli(cls, ws: WorkspaceClient, prompts: Prompts, product='ucx'): azurerm = AzureResources(ws) locations = ExternalLocations(ws, sql_backend, config.inventory_database) - azure_resource_permissions = AzureResourcePermissions(installation, ws, azurerm, locations) - azure_sp_crawler = AzureServicePrincipalCrawler(ws, sql_backend, config.inventory_database) + resource_permissions = AzureResourcePermissions(installation, ws, azurerm, locations) + sp_crawler = AzureServicePrincipalCrawler(ws, sql_backend, config.inventory_database) storage_credential_manager = StorageCredentialManager(ws) - return cls(installation, ws, azure_resource_permissions, azure_sp_crawler, storage_credential_manager) - + return cls(installation, ws, resource_permissions, sp_crawler, storage_credential_manager) def _read_databricks_secret(self, scope: str, key: str, application_id: str) -> str | None: try: @@ -221,27 +217,25 @@ def _fetch_client_secret(self, sp_list: list[StoragePermissionMapping]) -> list[ # they won't have any client_secret, we will process managed identity in the future. # fetch client_secrets of crawled service principal, if any - azure_sp_info_with_client_secret: dict[str, str] = {} - azure_sp_infos = self._azure_sp_crawler.snapshot() + sp_info_with_client_secret: dict[str, str] = {} + sp_infos = self._sp_crawler.snapshot() - for azure_sp_info in azure_sp_infos: - if not azure_sp_info.secret_scope: + for sp_info in sp_infos: + if not sp_info.secret_scope: continue - if not azure_sp_info.secret_key: + if not sp_info.secret_key: continue secret_value = self._read_databricks_secret( - azure_sp_info.secret_scope, azure_sp_info.secret_key, azure_sp_info.application_id + sp_info.secret_scope, sp_info.secret_key, sp_info.application_id ) if secret_value: - azure_sp_info_with_client_secret[azure_sp_info.application_id] = secret_value + sp_info_with_client_secret[sp_info.application_id] = secret_value # update the list of ServicePrincipalMigrationInfo if client_secret is found sp_list_with_secret = [] for sp in sp_list: - if sp.client_id in azure_sp_info_with_client_secret: - sp_list_with_secret.append( - ServicePrincipalMigrationInfo(sp, azure_sp_info_with_client_secret[sp.client_id]) - ) + if sp.client_id in sp_info_with_client_secret: + sp_list_with_secret.append(ServicePrincipalMigrationInfo(sp, sp_info_with_client_secret[sp.client_id])) return sp_list_with_secret def _print_action_plan(self, sp_list: list[ServicePrincipalMigrationInfo]): @@ -259,7 +253,7 @@ def _generate_migration_list(self) -> list[ServicePrincipalMigrationInfo]: Create the list of SP that need to be migrated, output an action plan as a csv file for users to confirm """ # load sp list from azure_storage_account_info.csv - sp_list = self._azure_resource_permissions.load() + sp_list = self._resource_permissions.load() # list existed storage credentials sc_set = self._storage_credential_manager.list_storage_credentials() # check if the sp is already used in UC storage credential @@ -270,11 +264,9 @@ def _generate_migration_list(self) -> list[ServicePrincipalMigrationInfo]: self._print_action_plan(sp_list_with_secret) return sp_list_with_secret - def save(self, migration_results: list[StorageCredentialValidationResult]) -> str: return self._installation.save(migration_results, filename=self._output_file) - def run(self, prompts: Prompts) -> list[StorageCredentialValidationResult]: sp_list_with_secret = self._generate_migration_list() @@ -288,7 +280,9 @@ def run(self, prompts: Prompts) -> list[StorageCredentialValidationResult]: execution_result = [] for sp in sp_list_with_secret: storage_credential = self._storage_credential_manager.create_storage_credential(sp) - execution_result.append(self._storage_credential_manager.validate_storage_credential(storage_credential, sp)) + execution_result.append( + self._storage_credential_manager.validate_storage_credential(storage_credential, sp) + ) results_file = self.save(execution_result) logger.info("Completed migration from Azure Service Principal migrated to UC Storage credentials") diff --git a/tests/integration/azure/test_credentials.py b/tests/integration/azure/test_credentials.py index 2d9b7d5256..22f4ceca65 100644 --- a/tests/integration/azure/test_credentials.py +++ b/tests/integration/azure/test_credentials.py @@ -1,5 +1,4 @@ import base64 -import logging import re from dataclasses import dataclass @@ -10,13 +9,15 @@ from databricks.labs.ucx.assessment.azure import AzureServicePrincipalInfo from databricks.labs.ucx.assessment.crawlers import _SECRET_PATTERN from databricks.labs.ucx.azure.access import StoragePermissionMapping -from databricks.labs.ucx.azure.credentials import ServicePrincipalMigration, \ - StorageCredentialValidationResult +from databricks.labs.ucx.azure.credentials import StorageCredentialValidationResult from databricks.labs.ucx.azure.resources import AzureResources from databricks.labs.ucx.hive_metastore import ExternalLocations -from tests.integration.conftest import StaticStorageCredentialManager, \ - StaticAzureResourcePermissions, StaticAzureServicePrincipalCrawler, \ - StaticServicePrincipalMigration +from tests.integration.conftest import ( + StaticAzureResourcePermissions, + StaticAzureServicePrincipalCrawler, + StaticServicePrincipalMigration, + StaticStorageCredentialManager, +) @dataclass @@ -57,38 +58,47 @@ def extract_test_info(ws, debug_env, make_random): @pytest.fixture -def run_migration(ws, sql_backend) -> ServicePrincipalMigration: - def inner(test_info: MigrationTestInfo, credentials: list[str], read_only=False) -> list[StorageCredentialValidationResult]: +def run_migration(ws, sql_backend): + def inner( + test_info: MigrationTestInfo, credentials: list[str], read_only=False + ) -> list[StorageCredentialValidationResult]: installation = Installation(ws, 'ucx') azurerm = AzureResources(ws) locations = ExternalLocations(ws, sql_backend, "dont_need_a_schema") - azure_resource_permissions = StaticAzureResourcePermissions(installation, ws, azurerm, locations, [ - StoragePermissionMapping( - prefix="abfss://things@labsazurethings.dfs.core.windows.net/avoid_ext_loc_overlap", - client_id=test_info.application_id, - principal=test_info.credential_name, - privilege="READ_FILES" if read_only else "WRITE_FILES", - directory_id=test_info.directory_id - ) - ]) - - azure_sp_crawler = StaticAzureServicePrincipalCrawler(ws, sql_backend, "dont_need_a_schema", [ - AzureServicePrincipalInfo( - application_id=test_info.application_id, - secret_scope=test_info.secret_scope, - secret_key=test_info.secret_key, - tenant_id="test", - storage_account="test", - ) - ]) - - spn_migration = StaticServicePrincipalMigration( + resource_permissions = StaticAzureResourcePermissions( installation, ws, - azure_resource_permissions, - azure_sp_crawler, - StaticStorageCredentialManager(ws, credentials) + azurerm, + locations, + [ + StoragePermissionMapping( + prefix="abfss://things@labsazurethings.dfs.core.windows.net/avoid_ext_loc_overlap", + client_id=test_info.application_id, + principal=test_info.credential_name, + privilege="READ_FILES" if read_only else "WRITE_FILES", + directory_id=test_info.directory_id, + ) + ], + ) + + sp_crawler = StaticAzureServicePrincipalCrawler( + ws, + sql_backend, + "dont_need_a_schema", + [ + AzureServicePrincipalInfo( + application_id=test_info.application_id, + secret_scope=test_info.secret_scope, + secret_key=test_info.secret_key, + tenant_id="test", + storage_account="test", + ) + ], + ) + + spn_migration = StaticServicePrincipalMigration( + installation, ws, resource_permissions, sp_crawler, StaticStorageCredentialManager(ws, credentials) ) return spn_migration.run( MockPrompts({"Above Azure Service Principals will be migrated to UC storage credentials *": "Yes"}) @@ -97,15 +107,13 @@ def inner(test_info: MigrationTestInfo, credentials: list[str], read_only=False) return inner -def test_spn_migration_existed_storage_credential( - extract_test_info, make_storage_credential_from_spn, run_migration -): +def test_spn_migration_existed_storage_credential(extract_test_info, make_storage_credential_from_spn, run_migration): # create a storage credential for this test make_storage_credential_from_spn( name=extract_test_info.credential_name, application_id=extract_test_info.application_id, client_secret=extract_test_info.client_secret, - directory_id=extract_test_info.directory_id + directory_id=extract_test_info.directory_id, ) # test that the spn migration will be skipped due to above storage credential is existed @@ -127,7 +135,7 @@ def test_spn_migration(ws, extract_test_info, run_migration, read_only): # assert the storage credential validation results for res in migration_results[0].results: if res.operation is None: - #TODO: file a ticket to SDK team, PATH_EXISTS and HIERARCHICAL_NAMESPACE_ENABLED + # TODO: file a ticket to SDK team, PATH_EXISTS and HIERARCHICAL_NAMESPACE_ENABLED # should be added to the validation operations. They are None right now. # Once it's fixed, the None check here can be removed continue diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index b6515ab633..e7518abaa8 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -10,14 +10,22 @@ from databricks.labs.ucx.__about__ import __version__ from databricks.labs.ucx.account import WorkspaceInfo -from databricks.labs.ucx.assessment.azure import AzureServicePrincipalCrawler, \ - AzureServicePrincipalInfo -from databricks.labs.ucx.azure.access import AzureResourcePermissions, StoragePermissionMapping -from databricks.labs.ucx.azure.credentials import StorageCredentialManager, \ - ServicePrincipalMigration, StorageCredentialValidationResult +from databricks.labs.ucx.assessment.azure import ( + AzureServicePrincipalCrawler, + AzureServicePrincipalInfo, +) +from databricks.labs.ucx.azure.access import ( + AzureResourcePermissions, + StoragePermissionMapping, +) +from databricks.labs.ucx.azure.credentials import ( + ServicePrincipalMigration, + StorageCredentialManager, + StorageCredentialValidationResult, +) from databricks.labs.ucx.azure.resources import AzureResources from databricks.labs.ucx.framework.crawlers import SqlBackend -from databricks.labs.ucx.hive_metastore import TablesCrawler, ExternalLocations +from databricks.labs.ucx.hive_metastore import ExternalLocations, TablesCrawler from databricks.labs.ucx.hive_metastore.mapping import Rule, TableMapping from databricks.labs.ucx.hive_metastore.tables import Table from databricks.labs.ucx.hive_metastore.udfs import Udf, UdfsCrawler @@ -164,12 +172,12 @@ def save(self, tables: TablesCrawler, workspace_info: WorkspaceInfo) -> str: class StaticServicePrincipalMigration(ServicePrincipalMigration): def __init__( - self, - installation: Installation, - ws: WorkspaceClient, - azure_resource_permissions: AzureResourcePermissions, - azure_sp_crawler: AzureServicePrincipalCrawler, - storage_credential_manager: StorageCredentialManager + self, + installation: Installation, + ws: WorkspaceClient, + azure_resource_permissions: AzureResourcePermissions, + azure_sp_crawler: AzureServicePrincipalCrawler, + storage_credential_manager: StorageCredentialManager, ): super().__init__(installation, ws, azure_resource_permissions, azure_sp_crawler, storage_credential_manager) @@ -209,10 +217,18 @@ def __init__(self, ws: WorkspaceClient, sbe: SqlBackend, schema: str, spn_infos: def snapshot(self) -> list[AzureServicePrincipalInfo]: return self.spn_infos + class StaticAzureResourcePermissions(AzureResourcePermissions): - def __init__(self, installation: Installation, ws: WorkspaceClient, azurerm: AzureResources, lc: ExternalLocations, permission_mappings: list[StoragePermissionMapping]): + def __init__( + self, + installation: Installation, + ws: WorkspaceClient, + azurerm: AzureResources, + lc: ExternalLocations, + permission_mappings: list[StoragePermissionMapping], + ): super().__init__(installation, ws, azurerm, lc) self._permission_mappings = permission_mappings def load(self) -> list[StoragePermissionMapping]: - return self._permission_mappings \ No newline at end of file + return self._permission_mappings diff --git a/tests/unit/azure/test_credentials.py b/tests/unit/azure/test_credentials.py index 39f42d7024..cdf3a217aa 100644 --- a/tests/unit/azure/test_credentials.py +++ b/tests/unit/azure/test_credentials.py @@ -355,9 +355,7 @@ def test_execute_migration_no_confirmation(mocker, ws): mocker.patch("databricks.labs.ucx.azure.azure_credentials.ServicePrincipalMigration._generate_migration_list") - with patch( - "databricks.labs.ucx.azure.azure_credentials.ServicePrincipalMigration._create_storage_credential" - ) as c: + with patch("databricks.labs.ucx.azure.azure_credentials.ServicePrincipalMigration._create_storage_credential") as c: sp_migration = ServicePrincipalMigration.for_cli(ws, prompts) sp_migration.run(prompts) c.assert_not_called() From d86f91c41af2f84a1c1ccf14ff9c4303c64684ea Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Thu, 15 Feb 2024 13:57:20 -0800 Subject: [PATCH 46/76] shorten variable name when create storage credential --- src/databricks/labs/ucx/azure/credentials.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/databricks/labs/ucx/azure/credentials.py b/src/databricks/labs/ucx/azure/credentials.py index 0c0cbc013b..7366a704b7 100644 --- a/src/databricks/labs/ucx/azure/credentials.py +++ b/src/databricks/labs/ucx/azure/credentials.py @@ -84,17 +84,17 @@ def list_storage_credentials(self) -> set[str]: logger.info(f"Found {len(application_ids)} distinct service principals already used in UC storage credentials") return application_ids - def create_storage_credential(self, sp_migration: ServicePrincipalMigrationInfo) -> StorageCredentialInfo: + def create_storage_credential(self, sp: ServicePrincipalMigrationInfo) -> StorageCredentialInfo: # prepare the storage credential properties - name = sp_migration.permission_mapping.principal + name = sp.permission_mapping.principal service_principal = AzureServicePrincipal( - directory_id=sp_migration.permission_mapping.directory_id, - application_id=sp_migration.permission_mapping.client_id, - client_secret=sp_migration.client_secret, + directory_id=sp.permission_mapping.directory_id, + application_id=sp.permission_mapping.client_id, + client_secret=sp.client_secret, ) - comment = f"Created by UCX during migration to UC using Azure Service Principal: {sp_migration.permission_mapping.principal}" + comment = f"Created by UCX during migration to UC using Azure Service Principal: {sp.permission_mapping.principal}" read_only = False - if sp_migration.permission_mapping.privilege == Privilege.READ_FILES.value: + if sp.permission_mapping.privilege == Privilege.READ_FILES.value: read_only = True # create the storage credential return self._ws.storage_credentials.create( From 1397b442cd481e58d0cf439ddb683828dbdff259 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Thu, 15 Feb 2024 21:14:51 -0800 Subject: [PATCH 47/76] refactor unit tests to test different cases by calling public functions only. --- src/databricks/labs/ucx/azure/credentials.py | 25 +- tests/integration/azure/test_credentials.py | 55 +- tests/integration/conftest.py | 26 +- tests/unit/azure/test_credentials.py | 504 ++++++++----------- tests/unit/test_cli.py | 4 +- 5 files changed, 266 insertions(+), 348 deletions(-) diff --git a/src/databricks/labs/ucx/azure/credentials.py b/src/databricks/labs/ucx/azure/credentials.py index 7366a704b7..219fc4e809 100644 --- a/src/databricks/labs/ucx/azure/credentials.py +++ b/src/databricks/labs/ucx/azure/credentials.py @@ -92,7 +92,9 @@ def create_storage_credential(self, sp: ServicePrincipalMigrationInfo) -> Storag application_id=sp.permission_mapping.client_id, client_secret=sp.client_secret, ) - comment = f"Created by UCX during migration to UC using Azure Service Principal: {sp.permission_mapping.principal}" + comment = ( + f"Created by UCX during migration to UC using Azure Service Principal: {sp.permission_mapping.principal}" + ) read_only = False if sp.permission_mapping.privilege == Privilege.READ_FILES.value: read_only = True @@ -102,17 +104,17 @@ def create_storage_credential(self, sp: ServicePrincipalMigrationInfo) -> Storag ) def validate_storage_credential( - self, storage_credential, sp_migration: ServicePrincipalMigrationInfo + self, storage_credential: StorageCredentialInfo, sp: ServicePrincipalMigrationInfo ) -> StorageCredentialValidationResult: read_only = False - if sp_migration.permission_mapping.privilege == Privilege.READ_FILES.value: + if sp.permission_mapping.privilege == Privilege.READ_FILES.value: read_only = True # storage_credential validation creates a temp UC external location, which cannot overlap with # existing UC external locations. So add a sub folder to the validation location just in case try: validation = self._ws.storage_credentials.validate( storage_credential_name=storage_credential.name, - url=sp_migration.permission_mapping.prefix, + url=sp.permission_mapping.prefix, read_only=read_only, ) return StorageCredentialValidationResult.from_storage_credential_validation(storage_credential, validation) @@ -284,10 +286,13 @@ def run(self, prompts: Prompts) -> list[StorageCredentialValidationResult]: self._storage_credential_manager.validate_storage_credential(storage_credential, sp) ) - results_file = self.save(execution_result) - logger.info("Completed migration from Azure Service Principal migrated to UC Storage credentials") - print( - f"Completed migration from Azure Service Principal migrated to UC Storage credentials. " - f"Please check {results_file} for validation results" - ) + if execution_result: + results_file = self.save(execution_result) + logger.info("Completed migration from Azure Service Principal migrated to UC Storage credentials") + print( + f"Completed migration from Azure Service Principal migrated to UC Storage credentials. " + f"Please check {results_file} for validation results" + ) + else: + logger.info("No Azure Service Principal migrated to UC Storage credentials") return execution_result diff --git a/tests/integration/azure/test_credentials.py b/tests/integration/azure/test_credentials.py index 22f4ceca65..84bab48073 100644 --- a/tests/integration/azure/test_credentials.py +++ b/tests/integration/azure/test_credentials.py @@ -13,8 +13,8 @@ from databricks.labs.ucx.azure.resources import AzureResources from databricks.labs.ucx.hive_metastore import ExternalLocations from tests.integration.conftest import ( - StaticAzureResourcePermissions, - StaticAzureServicePrincipalCrawler, + StaticResourcePermissions, + StaticServicePrincipalCrawler, StaticServicePrincipalMigration, StaticStorageCredentialManager, ) @@ -66,36 +66,27 @@ def inner( azurerm = AzureResources(ws) locations = ExternalLocations(ws, sql_backend, "dont_need_a_schema") - resource_permissions = StaticAzureResourcePermissions( - installation, - ws, - azurerm, - locations, - [ - StoragePermissionMapping( - prefix="abfss://things@labsazurethings.dfs.core.windows.net/avoid_ext_loc_overlap", - client_id=test_info.application_id, - principal=test_info.credential_name, - privilege="READ_FILES" if read_only else "WRITE_FILES", - directory_id=test_info.directory_id, - ) - ], - ) - - sp_crawler = StaticAzureServicePrincipalCrawler( - ws, - sql_backend, - "dont_need_a_schema", - [ - AzureServicePrincipalInfo( - application_id=test_info.application_id, - secret_scope=test_info.secret_scope, - secret_key=test_info.secret_key, - tenant_id="test", - storage_account="test", - ) - ], - ) + permission_mappings = [ + StoragePermissionMapping( + prefix="abfss://things@labsazurethings.dfs.core.windows.net/avoid_ext_loc_overlap", + client_id=test_info.application_id, + principal=test_info.credential_name, + privilege="READ_FILES" if read_only else "WRITE_FILES", + directory_id=test_info.directory_id, + ) + ] + resource_permissions = StaticResourcePermissions(permission_mappings, installation, ws, azurerm, locations) + + sp_infos = [ + AzureServicePrincipalInfo( + application_id=test_info.application_id, + secret_scope=test_info.secret_scope, + secret_key=test_info.secret_key, + tenant_id="test", + storage_account="test", + ) + ] + sp_crawler = StaticServicePrincipalCrawler(sp_infos, ws, sql_backend, "dont_need_a_schema") spn_migration = StaticServicePrincipalMigration( installation, ws, resource_permissions, sp_crawler, StaticStorageCredentialManager(ws, credentials) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index e7518abaa8..84c7641f00 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -23,9 +23,8 @@ StorageCredentialManager, StorageCredentialValidationResult, ) -from databricks.labs.ucx.azure.resources import AzureResources from databricks.labs.ucx.framework.crawlers import SqlBackend -from databricks.labs.ucx.hive_metastore import ExternalLocations, TablesCrawler +from databricks.labs.ucx.hive_metastore import TablesCrawler from databricks.labs.ucx.hive_metastore.mapping import Rule, TableMapping from databricks.labs.ucx.hive_metastore.tables import Table from databricks.labs.ucx.hive_metastore.udfs import Udf, UdfsCrawler @@ -209,25 +208,18 @@ def list_storage_credentials(self) -> set[str]: return application_ids -class StaticAzureServicePrincipalCrawler(AzureServicePrincipalCrawler): - def __init__(self, ws: WorkspaceClient, sbe: SqlBackend, schema: str, spn_infos: list[AzureServicePrincipalInfo]): - super().__init__(ws, sbe, schema) - self.spn_infos = spn_infos +class StaticServicePrincipalCrawler(AzureServicePrincipalCrawler): + def __init__(self, spn_infos: list[AzureServicePrincipalInfo], *args): + super().__init__(*args) + self._spn_infos = spn_infos def snapshot(self) -> list[AzureServicePrincipalInfo]: - return self.spn_infos + return self._spn_infos -class StaticAzureResourcePermissions(AzureResourcePermissions): - def __init__( - self, - installation: Installation, - ws: WorkspaceClient, - azurerm: AzureResources, - lc: ExternalLocations, - permission_mappings: list[StoragePermissionMapping], - ): - super().__init__(installation, ws, azurerm, lc) +class StaticResourcePermissions(AzureResourcePermissions): + def __init__(self, permission_mappings: list[StoragePermissionMapping], *args): + super().__init__(*args) self._permission_mappings = permission_mappings def load(self) -> list[StoragePermissionMapping]: diff --git a/tests/unit/azure/test_credentials.py b/tests/unit/azure/test_credentials.py index cdf3a217aa..04e06eafc1 100644 --- a/tests/unit/azure/test_credentials.py +++ b/tests/unit/azure/test_credentials.py @@ -1,15 +1,15 @@ -import csv import io import logging -from unittest.mock import MagicMock, Mock, create_autospec, patch +import re +from unittest.mock import MagicMock, create_autospec import pytest import yaml +from databricks.labs.blueprint.installation import MockInstallation from databricks.labs.blueprint.tui import MockPrompts from databricks.sdk import WorkspaceClient from databricks.sdk.errors import InternalError, NotFound, ResourceDoesNotExist from databricks.sdk.errors.platform import InvalidParameterValue -from databricks.sdk.service import sql from databricks.sdk.service.catalog import ( AwsIamRole, AzureManagedIdentity, @@ -23,55 +23,20 @@ AzureServicePrincipalCrawler, AzureServicePrincipalInfo, ) -from databricks.labs.ucx.azure.access import StoragePermissionMapping +from databricks.labs.ucx.azure.access import ( + AzureResourcePermissions, + StoragePermissionMapping, +) from databricks.labs.ucx.azure.credentials import ( ServicePrincipalMigration, ServicePrincipalMigrationInfo, + StorageCredentialManager, ) -from tests.unit.framework.mocks import MockBackend @pytest.fixture def ws(): - storage_permission_mappings = [ - { - "prefix": "prefix1", - "client_id": "app_secret1", - "principal": "principal_1", - "privilege": "WRITE_FILES", - "directory_id": "directory_id_1", - }, - { - "prefix": "prefix2", - "client_id": "app_secret2", - "principal": "principal_read", - "privilege": "READ_FILES", - "directory_id": "directory_id_1", - }, - { - "prefix": "prefix3", - "client_id": "app_secret3", - "principal": "principal_write", - "privilege": "WRITE_FILES", - "directory_id": "directory_id_2", - }, - { - "prefix": "overlap_with_external_location", - "client_id": "app_secret4", - "principal": "principal_overlap", - "privilege": "WRITE_FILES", - "directory_id": "directory_id_2", - }, - ] - csv_output = io.StringIO() - fieldnames = storage_permission_mappings[0].keys() - csv_writer = csv.DictWriter(csv_output, fieldnames=fieldnames, dialect="excel") - csv_writer.writeheader() - for mapping in storage_permission_mappings: - csv_writer.writerow(mapping) - state = { - "/Users/foo/.ucx/azure_storage_account_info.csv": csv_output.getvalue(), "/Users/foo/.ucx/config.yml": yaml.dump( { 'version': 2, @@ -85,48 +50,41 @@ def ws(): ), } - def download(path: str) -> io.BytesIO: + def download(path: str) -> io.StringIO: if path not in state: raise NotFound(path) - return io.BytesIO(state[path].encode('utf-8')) + return io.StringIO(state[path]) ws = create_autospec(WorkspaceClient) ws.config.host = 'https://localhost' ws.current_user.me().user_name = "foo" ws.workspace.download = download - ws.statement_execution.execute_statement.return_value = sql.ExecuteStatementResponse( - status=sql.StatementStatus(state=sql.StatementState.SUCCEEDED), - manifest=sql.ResultManifest(schema=sql.ResultSchema()), - ) return ws -def test_for_cli_not_azure(caplog, ws): - ws.config.is_azure = False - assert ServicePrincipalMigration.for_cli(ws, MagicMock()) is None - assert "Workspace is not on azure, please run this command on azure databricks workspaces." in caplog.text - - -def test_for_cli_not_prompts(ws): - ws.config.is_azure = True - prompts = MockPrompts({"Have you reviewed the azure_storage_account_info.csv *": "No"}) - assert ServicePrincipalMigration.for_cli(ws, prompts) is None - +def side_effect_create_storage_credential(name, azure_service_principal, comment, read_only): + return StorageCredentialInfo( + name=name, azure_service_principal=azure_service_principal, comment=comment, read_only=read_only + ) -def test_for_cli(ws): - ws.config.is_azure = True - prompts = MockPrompts({"Have you reviewed the azure_storage_account_info.csv *": "Yes"}) - assert isinstance(ServicePrincipalMigration.for_cli(ws, prompts), ServicePrincipalMigration) +def side_effect_validate_storage_credential(storage_credential_name, url, read_only): + if "overlap" in storage_credential_name: + raise InvalidParameterValue + if read_only: + response = {"isDir": True, "results": [{"message": "", "operation": "READ", "result": "PASS"}]} + return ValidateStorageCredentialResponse.from_dict(response) + else: + response = {"isDir": True, "results": [{"message": "", "operation": "WRITE", "result": "PASS"}]} + return ValidateStorageCredentialResponse.from_dict(response) -def test_list_storage_credentials(ws): +@pytest.fixture +def credential_manager(ws): ws.storage_credentials.list.return_value = [ StorageCredentialInfo(aws_iam_role=AwsIamRole(role_arn="arn:aws:iam::123456789012:role/example-role-name")), StorageCredentialInfo( - azure_managed_identity=AzureManagedIdentity( - access_connector_id="/subscriptions/.../providers/Microsoft.Databricks/..." - ) + azure_managed_identity=AzureManagedIdentity("/subscriptions/.../providers/Microsoft.Databricks/...") ), StorageCredentialInfo( azure_service_principal=AzureServicePrincipal( @@ -135,99 +93,140 @@ def test_list_storage_credentials(ws): client_secret="secret", ) ), + StorageCredentialInfo( + azure_service_principal=AzureServicePrincipal( + application_id="app_secret2", + directory_id="directory_id_1", + client_secret="secret", + ) + ), ] - sp_migration = ServicePrincipalMigration(MagicMock(), ws, MagicMock(), MagicMock()) + ws.storage_credentials.create.side_effect = side_effect_create_storage_credential + ws.storage_credentials.validate.side_effect = side_effect_validate_storage_credential - expected = {"b6420590-5e1c-4426-8950-a94cbe9b6115"} - sp_migration._list_storage_credentials() + return StorageCredentialManager(ws) - assert expected == sp_migration._list_storage_credentials() +def test_list_storage_credentials(credential_manager): + assert {"b6420590-5e1c-4426-8950-a94cbe9b6115", "app_secret2"} == credential_manager.list_storage_credentials() -def test_list_storage_credentials_for_integration_test(ws): - ws.storage_credentials.list.return_value = [ - StorageCredentialInfo(aws_iam_role=AwsIamRole(role_arn="arn:aws:iam::123456789012:role/example-role-name")), - StorageCredentialInfo( - azure_managed_identity=AzureManagedIdentity( - access_connector_id="/subscriptions/.../providers/Microsoft.Databricks/..." - ) - ), - StorageCredentialInfo( - name="spn_for_integration_test", - azure_service_principal=AzureServicePrincipal( - application_id="b6420590-5e1c-4426-8950-a94cbe9b6115", - directory_id="62e43d7d-df53-4c64-86ed-c2c1a3ac60c3", - client_secret="secret", - ), - ), - ] - # test storage credential: spn_for_integration_test is picked up - # in integration test, we only pick up the existing storage credential created in integration test and ignore the others - sp_migration = ServicePrincipalMigration( - MagicMock(), ws, MagicMock(), MagicMock(), integration_test_flag="spn_for_integration_test" +def test_create_storage_credentials(credential_manager): + sp_1 = ServicePrincipalMigrationInfo( + StoragePermissionMapping( + prefix="prefix1", + client_id="app_secret1", + principal="principal_write", + privilege="WRITE_FILES", + directory_id="directory_id_1", + ), + "test", ) - expected = {"b6420590-5e1c-4426-8950-a94cbe9b6115"} - sp_migration._list_storage_credentials() - assert expected == sp_migration._list_storage_credentials() - - # test storage credential is not picked up - # if integration test does not create storage credential, we use dummy integration_test_flag to filter out other existing storage credentials - sp_migration = ServicePrincipalMigration( - MagicMock(), ws, MagicMock(), MagicMock(), integration_test_flag="other_spn" + sp_2 = ServicePrincipalMigrationInfo( + StoragePermissionMapping( + prefix="prefix2", + client_id="app_secret2", + principal="principal_read", + privilege="READ_FILES", + directory_id="directory_id_1", + ), + "test", ) - sp_migration._list_storage_credentials() - assert {} == sp_migration._list_storage_credentials() + storage_credential = credential_manager.create_storage_credential(sp_1) + assert sp_1.permission_mapping.principal == storage_credential.name + assert storage_credential.read_only is False -@pytest.mark.parametrize( - "secret_bytes_value, expected_return", - [ - (GetSecretResponse(value="aGVsbG8gd29ybGQ="), "hello world"), - (GetSecretResponse(value="T2zhLCBNdW5kbyE="), None), - (GetSecretResponse(value=None), None), - ], -) -def test_read_secret_value_decode(ws, secret_bytes_value, expected_return): - ws.secrets.get_secret.return_value = secret_bytes_value + storage_credential = credential_manager.create_storage_credential(sp_2) + assert sp_2.permission_mapping.principal == storage_credential.name + assert storage_credential.read_only is True - sp_migration = ServicePrincipalMigration(MagicMock(), ws, MagicMock(), MagicMock()) - assert sp_migration._read_databricks_secret("test_scope", "test_key", "000") == expected_return +def test_validate_storage_credentials(credential_manager): + sp_1 = ServicePrincipalMigrationInfo( + StoragePermissionMapping( + prefix="prefix1", + client_id="app_secret1", + principal="principal_1", + privilege="WRITE_FILES", + directory_id="directory_id_1", + ), + "test", + ) + sc_1 = StorageCredentialInfo( + name=sp_1.permission_mapping.principal, + azure_service_principal=AzureServicePrincipal( + sp_1.permission_mapping.directory_id, sp_1.permission_mapping.client_id, sp_1.client_secret + ), + read_only=False, + ) -@pytest.mark.parametrize( - "exception, expected_log, expected_return", - [ - (ResourceDoesNotExist(), "Will not reuse this client_secret", None), - (InternalError(), "Will not reuse this client_secret", None), - ], -) -def test_read_secret_read_exception(caplog, ws, exception, expected_log, expected_return): - caplog.set_level(logging.INFO) - ws.secrets.get_secret.side_effect = exception + sp_2 = ServicePrincipalMigrationInfo( + StoragePermissionMapping( + prefix="prefix2", + client_id="app_secret2", + principal="principal_read", + privilege="READ_FILES", + directory_id="directory_id_1", + ), + "test", + ) + sc_2 = StorageCredentialInfo( + name=sp_2.permission_mapping.principal, + azure_service_principal=AzureServicePrincipal( + sp_2.permission_mapping.directory_id, sp_2.permission_mapping.client_id, sp_2.client_secret + ), + read_only=True, + ) - sp_migration = ServicePrincipalMigration(MagicMock(), ws, MagicMock(), MagicMock()) - secret_value = sp_migration._read_databricks_secret("test_scope", "test_key", "000") + sp_3 = ServicePrincipalMigrationInfo( + StoragePermissionMapping( + prefix="overlap_with_external_location", + client_id="app_secret4", + principal="principal_overlap", + privilege="WRITE_FILES", + directory_id="directory_id_2", + ), + "test", + ) + sc_3 = StorageCredentialInfo( + name=sp_3.permission_mapping.principal, + azure_service_principal=AzureServicePrincipal( + sp_3.permission_mapping.directory_id, sp_3.permission_mapping.client_id, sp_3.client_secret + ), + ) - assert expected_log in caplog.text - assert secret_value == expected_return + # validate normal storage credential + validation = credential_manager.validate_storage_credential(sc_1, sp_1) + assert validation.read_only is False + assert validation.name == sp_1.permission_mapping.principal + for result in validation.results: + if result.operation.value == "WRITE": + assert result.result.value == "PASS" + + # validate read-only storage credential + validation = credential_manager.validate_storage_credential(sc_2, sp_2) + assert validation.read_only is True + assert validation.name == sp_2.permission_mapping.principal + for result in validation.results: + if result.operation.value == "READ": + assert result.result.value == "PASS" + + # prefix used for validation overlaps with existing external location + validation = credential_manager.validate_storage_credential(sc_3, sp_3) + assert ( + validation.results[0].message + == "The validation is skipped because an existing external location overlaps with the location used for validation." + ) -def test_fetch_client_secret(ws): +@pytest.fixture +def sp_migration(ws, credential_manager): ws.secrets.get_secret.return_value = GetSecretResponse(value="aGVsbG8gd29ybGQ=") - crawled_sp = [ - AzureServicePrincipalInfo("app_secret1", "test_scope", "test_key", "tenant_id_1", "storage1"), - AzureServicePrincipalInfo("app_secret2", "test_scope", "test_key", "tenant_id_1", "storage1"), - AzureServicePrincipalInfo("app_no_secret1", "", "", "tenant_id_1", "storage1"), - AzureServicePrincipalInfo("app_no_secret2", "test_scope", "", "tenant_id_1", "storage1"), - ] - sp_crawler = AzureServicePrincipalCrawler(ws, MockBackend(), "ucx") - sp_crawler._try_fetch = Mock(return_value=crawled_sp) - sp_crawler._crawl = Mock(return_value=crawled_sp) - - sp_to_be_checked = [ + rp = create_autospec(AzureResourcePermissions) + rp.load.return_value = [ StoragePermissionMapping( prefix="prefix1", client_id="app_secret1", @@ -238,192 +237,123 @@ def test_fetch_client_secret(ws): StoragePermissionMapping( prefix="prefix2", client_id="app_secret2", - principal="principal_2", + principal="principal_read", privilege="READ_FILES", directory_id="directory_id_1", ), StoragePermissionMapping( prefix="prefix3", - client_id="app_no_secret1", - principal="principal_3", + client_id="app_secret3", + principal="principal_write", privilege="WRITE_FILES", directory_id="directory_id_2", ), StoragePermissionMapping( - prefix="prefix4", - client_id="app_no_secret2", - principal="principal_4", - privilege="READ_FILES", + prefix="overlap_with_external_location", + client_id="app_secret4", + principal="principal_overlap", + privilege="WRITE_FILES", directory_id="directory_id_2", ), ] - expected_sp_list = [ - ServicePrincipalMigrationInfo( - StoragePermissionMapping( - prefix="prefix1", - client_id="app_secret1", - principal="principal_1", - privilege="WRITE_FILES", - directory_id="directory_id_1", - ), - "hello world", - ), - ServicePrincipalMigrationInfo( - StoragePermissionMapping( - prefix="prefix2", - client_id="app_secret2", - principal="principal_2", - privilege="READ_FILES", - directory_id="directory_id_1", - ), - "hello world", - ), + sp_crawler = create_autospec(AzureServicePrincipalCrawler) + sp_crawler.snapshot.return_value = [ + AzureServicePrincipalInfo("app_secret1", "test_scope", "test_key", "tenant_id_1", "storage1"), + AzureServicePrincipalInfo("app_secret2", "test_scope", "test_key", "tenant_id_1", "storage1"), + AzureServicePrincipalInfo("app_secret3", "test_scope", "", "tenant_id_2", "storage1"), + AzureServicePrincipalInfo("app_secret4", "", "", "tenant_id_2", "storage1"), ] - sp_migration = ServicePrincipalMigration(MagicMock(), ws, MagicMock(), sp_crawler) - filtered_sp_list = sp_migration._fetch_client_secret(sp_to_be_checked) - - assert filtered_sp_list == expected_sp_list + return ServicePrincipalMigration(MockInstallation(), ws, rp, sp_crawler, credential_manager) -def test_print_action_plan(caplog): - caplog.set_level(logging.INFO) - sp_list_with_secret = [ - ServicePrincipalMigrationInfo( - StoragePermissionMapping( - prefix="prefix1", - client_id="app_secret1", - principal="principal_1", - privilege="WRITE_FILES", - directory_id="directory_id_1", - ), - "hello world", - ) - ] - sp_migration = ServicePrincipalMigration(MagicMock(), MagicMock(), MagicMock(), MagicMock()) - sp_migration._print_action_plan(sp_list_with_secret) - - expected_print = ( - "Service Principal name: principal_1, " - "application_id: app_secret1, " - "privilege WRITE_FILES " - "on location prefix1" - ) - assert expected_print in caplog.text +def test_for_cli_not_azure(caplog, ws): + ws.config.is_azure = False + with pytest.raises(SystemExit): + ServicePrincipalMigration.for_cli(ws, MagicMock()) + assert "Workspace is not on azure, please run this command on azure databricks workspaces." in caplog.text -def test_generate_migration_list(caplog, mocker, ws): - caplog.set_level(logging.INFO) +def test_for_cli_not_prompts(ws): ws.config.is_azure = True - ws.secrets.get_secret.return_value = GetSecretResponse(value="aGVsbG8gd29ybGQ=") - ws.storage_credentials.list.return_value = [ - StorageCredentialInfo( - azure_service_principal=AzureServicePrincipal( - application_id="app_secret1", - directory_id="directory_id_1", - client_secret="hello world", - ) - ) - ] + prompts = MockPrompts({"Have you reviewed the azure_storage_account_info.csv *": "No"}) + with pytest.raises(SystemExit): + ServicePrincipalMigration.for_cli(ws, prompts) + +def test_for_cli(ws): + ws.config.is_azure = True prompts = MockPrompts({"Have you reviewed the azure_storage_account_info.csv *": "Yes"}) - mocker.patch( - "databricks.labs.ucx.assessment.azure.AzureServicePrincipalCrawler.snapshot", - return_value=[ - AzureServicePrincipalInfo("app_secret1", "test_scope", "test_key", "tenant_id_1", "storage1"), - AzureServicePrincipalInfo("app_secret2", "test_scope", "test_key", "tenant_id_1", "storage1"), - ], - ) + assert isinstance(ServicePrincipalMigration.for_cli(ws, prompts), ServicePrincipalMigration) - sp_migration = ServicePrincipalMigration.for_cli(ws, prompts) - sp_migration._generate_migration_list() - assert "app_secret2" in caplog.text - assert "app_secret1" not in caplog.text +@pytest.mark.parametrize( + "secret_bytes_value, num_migrated", + [ + (GetSecretResponse(value="aGVsbG8gd29ybGQ="), 1), + (GetSecretResponse(value="T2zhLCBNdW5kbyE="), 0), + (GetSecretResponse(value=None), 0), + ], +) +def test_read_secret_value_decode(ws, sp_migration, secret_bytes_value, num_migrated): + ws.secrets.get_secret.return_value = secret_bytes_value + prompts = MockPrompts({"Above Azure Service Principals will be migrated to UC storage credentials*": "Yes"}) + assert len(sp_migration.run(prompts)) == num_migrated -def test_execute_migration_no_confirmation(mocker, ws): - ws.config.is_azure = True - prompts = MockPrompts( - { - "Have you reviewed the azure_storage_account_info.csv *": "Yes", - "Above Azure Service Principals will be migrated to UC storage credentials*": "No", - } - ) - mocker.patch("databricks.labs.ucx.azure.azure_credentials.ServicePrincipalMigration._generate_migration_list") +@pytest.mark.parametrize( + "exception, log_pattern, num_migrated", + [ + (ResourceDoesNotExist(), r"Secret.* does not exists", 0), + (InternalError(), r"InternalError while reading secret .*", 0), + ], +) +def test_read_secret_read_exception(caplog, ws, sp_migration, exception, log_pattern, num_migrated): + caplog.set_level(logging.INFO) + ws.secrets.get_secret.side_effect = exception - with patch("databricks.labs.ucx.azure.azure_credentials.ServicePrincipalMigration._create_storage_credential") as c: - sp_migration = ServicePrincipalMigration.for_cli(ws, prompts) - sp_migration.run(prompts) - c.assert_not_called() + prompts = MockPrompts({"Above Azure Service Principals will be migrated to UC storage credentials*": "Yes"}) + assert len(sp_migration.run(prompts)) == num_migrated + assert re.search(log_pattern, caplog.text) -def side_effect_create_storage_credential(name, azure_service_principal, comment, read_only): - return StorageCredentialInfo( - name=name, azure_service_principal=azure_service_principal, comment=comment, read_only=read_only - ) +def test_print_action_plan(caplog, ws, sp_migration): + caplog.set_level(logging.INFO) + ws.secrets.get_secret.return_value = GetSecretResponse(value="aGVsbG8gd29ybGQ=") -def side_effect_validate_storage_credential(storage_credential_name, url, read_only): - if "overlap" in storage_credential_name: - raise InvalidParameterValue - if "read" in storage_credential_name: - response = {"isDir": True, "results": [{"message": "", "operation": "READ", "result": "PASS"}]} - return ValidateStorageCredentialResponse.from_dict(response) - else: - response = {"isDir": True, "results": [{"message": "", "operation": "WRITE", "result": "PASS"}]} - return ValidateStorageCredentialResponse.from_dict(response) + prompts = MockPrompts({"Above Azure Service Principals will be migrated to UC storage credentials*": "Yes"}) + sp_migration.run(prompts) -def test_execute_migration(caplog, capsys, mocker, ws): - ws.config.is_azure = True - ws.secrets.get_secret.return_value = GetSecretResponse(value="aGVsbG8gd29ybGQ=") - ws.storage_credentials.list.return_value = [ - StorageCredentialInfo( - azure_service_principal=AzureServicePrincipal( - application_id="app_secret1", - directory_id="directory_id_1", - client_secret="hello world", - ) - ) - ] - ws.storage_credentials.create.side_effect = side_effect_create_storage_credential - ws.storage_credentials.validate.side_effect = side_effect_validate_storage_credential + log_pattern = r"Service Principal name: .*" "application_id: .*" "privilege .*" "on location .*" + for msg in caplog.messages: + if re.search(log_pattern, msg): + assert True + return + assert False, "Action plan is not logged" + +def test_run_without_confirmation(ws, sp_migration): + ws.secrets.get_secret.return_value = GetSecretResponse(value="aGVsbG8gd29ybGQ=") prompts = MockPrompts( { - "Have you reviewed the azure_storage_account_info.csv *": "Yes", - "Above Azure Service Principals will be migrated to UC storage credentials*": "Yes", + "Above Azure Service Principals will be migrated to UC storage credentials*": "No", } ) - mocker.patch( - "databricks.labs.ucx.assessment.azure.AzureServicePrincipalCrawler.snapshot", - return_value=[ - AzureServicePrincipalInfo("app_secret1", "test_scope", "test_key", "tenant_id_1", "storage1"), - AzureServicePrincipalInfo("app_secret2", "test_scope", "test_key", "tenant_id_1", "storage1"), - AzureServicePrincipalInfo("app_secret3", "test_scope", "test_key", "tenant_id_2", "storage1"), - AzureServicePrincipalInfo("app_secret4", "test_scope", "test_key", "tenant_id_2", "storage1"), - ], - ) + assert sp_migration.run(prompts) == [] - sp_migration = ServicePrincipalMigration.for_cli(ws, prompts) - sp_migration._installation.save = MagicMock() - sp_migration.run(prompts) - # assert migration is complete - assert "Completed migration" in capsys.readouterr().out - # assert the validation exception is caught when prefix overlaps with existing external location - assert "Skip the validation" in caplog.text - # assert validation results - save_args = sp_migration._installation.save.call_args.args[0] - assert any("The validation is skipped" in arg.results[0].message for arg in save_args) - assert any( - ("READ" in arg.results[0].operation.value) and ("PASS" in arg.results[0].result.value) for arg in save_args - ) - assert any( - ("WRITE" in arg.results[0].operation.value) and ("PASS" in arg.results[0].result.value) for arg in save_args - ) +def test_run(ws, sp_migration): + prompts = MockPrompts({"Above Azure Service Principals will be migrated to UC storage credentials*": "Yes"}) + + results = sp_migration.run(prompts) + for result in results: + if result.name != "principal_1": + assert ( + False + ), "Service principal with no client_secret in databricks secret or already be used in storage credential should not be migrated" diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index 27cbbde3a8..38726d32c7 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -313,8 +313,8 @@ def test_migrate_azure_service_principals(ws): with ( patch("databricks.labs.blueprint.tui.Prompts.confirm", return_value=True), patch("databricks.labs.blueprint.installation.Installation.load"), - patch("databricks.labs.blueprint.installation.Installation.save") as s, + patch("databricks.labs.blueprint.installation.Installation.save"), ): migrate_azure_service_principals(ws) - s.assert_called_once() + ws.storage_credentials.list.assert_called() From 64a5f75d6f04a2739063a87cef6a33b85f3e0f5a Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Thu, 15 Feb 2024 21:25:48 -0800 Subject: [PATCH 48/76] fix import error after merge from main --- tests/integration/azure/test_credentials.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/azure/test_credentials.py b/tests/integration/azure/test_credentials.py index 84bab48073..203c12060d 100644 --- a/tests/integration/azure/test_credentials.py +++ b/tests/integration/azure/test_credentials.py @@ -7,7 +7,7 @@ from databricks.labs.blueprint.tui import MockPrompts from databricks.labs.ucx.assessment.azure import AzureServicePrincipalInfo -from databricks.labs.ucx.assessment.crawlers import _SECRET_PATTERN +from databricks.labs.ucx.assessment.crawlers import SECRET_PATTERN from databricks.labs.ucx.azure.access import StoragePermissionMapping from databricks.labs.ucx.azure.credentials import StorageCredentialValidationResult from databricks.labs.ucx.azure.resources import AzureResources @@ -42,7 +42,7 @@ def extract_test_info(ws, debug_env, make_random): end_point = spark_conf.get("fs.azure.account.oauth2.client.endpoint") directory_id = end_point.split("/")[3] - secret_matched = re.search(_SECRET_PATTERN, spark_conf.get("fs.azure.account.oauth2.client.secret")) + secret_matched = re.search(SECRET_PATTERN, spark_conf.get("fs.azure.account.oauth2.client.secret")) if secret_matched: secret_scope, secret_key = ( secret_matched.group(1).split("/")[1], From ae546c4811ca016d9be24e8897a436fb18fdbb58 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Fri, 16 Feb 2024 09:28:31 -0800 Subject: [PATCH 49/76] change _print_action_plan to take StoragePermissionMapping as input to make the function no visibility to client_secret --- src/databricks/labs/ucx/azure/credentials.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/databricks/labs/ucx/azure/credentials.py b/src/databricks/labs/ucx/azure/credentials.py index 219fc4e809..bcbffd2961 100644 --- a/src/databricks/labs/ucx/azure/credentials.py +++ b/src/databricks/labs/ucx/azure/credentials.py @@ -240,14 +240,14 @@ def _fetch_client_secret(self, sp_list: list[StoragePermissionMapping]) -> list[ sp_list_with_secret.append(ServicePrincipalMigrationInfo(sp, sp_info_with_client_secret[sp.client_id])) return sp_list_with_secret - def _print_action_plan(self, sp_list: list[ServicePrincipalMigrationInfo]): + def _print_action_plan(self, sp_list: list[StoragePermissionMapping]): # print action plan to console for customer to review. for sp in sp_list: logger.info( - f"Service Principal name: {sp.permission_mapping.principal}, " - f"application_id: {sp.permission_mapping.client_id}, " - f"privilege {sp.permission_mapping.privilege} " - f"on location {sp.permission_mapping.prefix}" + f"Service Principal name: {sp.principal}, " + f"application_id: {sp.client_id}, " + f"privilege {sp.privilege} " + f"on location {sp.prefix}" ) def _generate_migration_list(self) -> list[ServicePrincipalMigrationInfo]: @@ -262,8 +262,12 @@ def _generate_migration_list(self) -> list[ServicePrincipalMigrationInfo]: filtered_sp_list = [sp for sp in sp_list if sp.client_id not in sc_set] # fetch sp client_secret if any sp_list_with_secret = self._fetch_client_secret(filtered_sp_list) + # output the action plan for customer to confirm - self._print_action_plan(sp_list_with_secret) + # but first make a copy of the list and strip out the client_secret + sp_candidates = [sp.permission_mapping for sp in sp_list_with_secret] + self._print_action_plan(sp_candidates) + return sp_list_with_secret def save(self, migration_results: list[StorageCredentialValidationResult]) -> str: From b86d1b0440de43cc4453f2c0b2d3abbb74adf977 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Fri, 16 Feb 2024 09:45:30 -0800 Subject: [PATCH 50/76] decompose azure_service_principal returned by the storage credential validation into just application_id and directory_id, and pass them to the final migration result. So the client_secret field is stripped out from migration result, even though it has None value. --- src/databricks/labs/ucx/azure/credentials.py | 35 ++++++++++---------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/src/databricks/labs/ucx/azure/credentials.py b/src/databricks/labs/ucx/azure/credentials.py index bcbffd2961..05feee3efa 100644 --- a/src/databricks/labs/ucx/azure/credentials.py +++ b/src/databricks/labs/ucx/azure/credentials.py @@ -38,31 +38,29 @@ class ServicePrincipalMigrationInfo: @dataclass class StorageCredentialValidationResult: name: str | None = None - azure_service_principal: AzureServicePrincipal | None = None + application_id: str | None = None + directory_id: str | None = None created_by: str | None = None read_only: bool | None = None + validated_on: str | None = None results: list[ValidationResult] | None = None @classmethod - def from_storage_credential_validation( - cls, storage_credential: StorageCredentialInfo, validation: ValidateStorageCredentialResponse + def from_validation( + cls, storage_credential: StorageCredentialInfo, validation: ValidateStorageCredentialResponse, prefix: str ): if storage_credential.azure_service_principal: - # Guard rail to explicitly remove the client_secret, just in case the azure_service_principal - # in StorageCredentialInfo returned by WorkspaceClient.storage_credentials.create exposes the - # client_secret due to potential bugs in the future. - service_principal = AzureServicePrincipal( - storage_credential.azure_service_principal.directory_id, - storage_credential.azure_service_principal.application_id, - "", - ) + application_id = storage_credential.azure_service_principal.application_id + directory_id = storage_credential.azure_service_principal.directory_id return cls( - name=storage_credential.name, - azure_service_principal=service_principal, - created_by=storage_credential.created_by, - read_only=storage_credential.read_only, - results=validation.results, + storage_credential.name, + application_id, + directory_id, + storage_credential.created_by, + storage_credential.read_only, + prefix, + validation.results, ) @@ -117,12 +115,12 @@ def validate_storage_credential( url=sp.permission_mapping.prefix, read_only=read_only, ) - return StorageCredentialValidationResult.from_storage_credential_validation(storage_credential, validation) + return StorageCredentialValidationResult.from_validation(storage_credential, validation, sp.permission_mapping.prefix) except InvalidParameterValue: logger.warning( "There is an existing external location overlaps with the prefix that is mapped to the service principal and used for validating the migrated storage credential. Skip the validation" ) - return StorageCredentialValidationResult.from_storage_credential_validation( + return StorageCredentialValidationResult.from_validation( storage_credential, ValidateStorageCredentialResponse( is_dir=None, @@ -132,6 +130,7 @@ def validate_storage_credential( ) ], ), + sp.permission_mapping.prefix ) From 2b72a2f54b586e985d35da4df6fea607648c3cd0 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Fri, 16 Feb 2024 10:02:38 -0800 Subject: [PATCH 51/76] small re-format --- src/databricks/labs/ucx/azure/credentials.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/databricks/labs/ucx/azure/credentials.py b/src/databricks/labs/ucx/azure/credentials.py index 05feee3efa..ab8472cd5d 100644 --- a/src/databricks/labs/ucx/azure/credentials.py +++ b/src/databricks/labs/ucx/azure/credentials.py @@ -115,7 +115,9 @@ def validate_storage_credential( url=sp.permission_mapping.prefix, read_only=read_only, ) - return StorageCredentialValidationResult.from_validation(storage_credential, validation, sp.permission_mapping.prefix) + return StorageCredentialValidationResult.from_validation( + storage_credential, validation, sp.permission_mapping.prefix + ) except InvalidParameterValue: logger.warning( "There is an existing external location overlaps with the prefix that is mapped to the service principal and used for validating the migrated storage credential. Skip the validation" @@ -130,7 +132,7 @@ def validate_storage_credential( ) ], ), - sp.permission_mapping.prefix + sp.permission_mapping.prefix, ) From f1c446db89a01a933333ccabb10e06f4d5c16de4 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Fri, 16 Feb 2024 12:26:46 -0800 Subject: [PATCH 52/76] clean up unnecessary keyword parameter in make_storage_credential_from_spn fixture --- src/databricks/labs/ucx/mixins/fixtures.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/databricks/labs/ucx/mixins/fixtures.py b/src/databricks/labs/ucx/mixins/fixtures.py index 0479f1182b..740a45b9b2 100644 --- a/src/databricks/labs/ucx/mixins/fixtures.py +++ b/src/databricks/labs/ucx/mixins/fixtures.py @@ -1079,19 +1079,19 @@ def remove(query: Query): @pytest.fixture def make_storage_credential_from_spn(ws): def create( - *, name: str, application_id: str, client_secret: str, directory_id: str, read_only=False + *, credential_name: str, application_id: str, client_secret: str, directory_id: str, read_only=False ) -> StorageCredentialInfo: azure_service_principal = AzureServicePrincipal( - directory_id=directory_id, - application_id=application_id, - client_secret=client_secret, + directory_id, + application_id, + client_secret, ) storage_credential = ws.storage_credentials.create( - name=name, azure_service_principal=azure_service_principal, read_only=read_only + credential_name, azure_service_principal=azure_service_principal, read_only=read_only ) return storage_credential def remove(storage_credential: StorageCredentialInfo): - ws.storage_credentials.delete(name=storage_credential.name, force=True) + ws.storage_credentials.delete(storage_credential.name, force=True) yield from factory("storage_credential_from_spn", create, remove) From f26886ed9b4a26d40ee70d8e24156ddfe2ade53a Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Fri, 16 Feb 2024 12:28:45 -0800 Subject: [PATCH 53/76] fix integration test due to parameter change --- tests/integration/azure/test_credentials.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/azure/test_credentials.py b/tests/integration/azure/test_credentials.py index 203c12060d..16860e2270 100644 --- a/tests/integration/azure/test_credentials.py +++ b/tests/integration/azure/test_credentials.py @@ -101,7 +101,7 @@ def inner( def test_spn_migration_existed_storage_credential(extract_test_info, make_storage_credential_from_spn, run_migration): # create a storage credential for this test make_storage_credential_from_spn( - name=extract_test_info.credential_name, + credential_name=extract_test_info.credential_name, application_id=extract_test_info.application_id, client_secret=extract_test_info.client_secret, directory_id=extract_test_info.directory_id, From d9675d23f6125ef107f7dc5658d129d7475c4e86 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Tue, 20 Feb 2024 00:14:59 -0800 Subject: [PATCH 54/76] remove patch("databricks.*") in unit test for migrate_azure_service_principals, except for prompts --- tests/unit/test_cli.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index 38726d32c7..cd10aa563e 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -1,7 +1,8 @@ +import csv import io import json import subprocess -from unittest.mock import create_autospec, patch +from unittest.mock import create_autospec, patch, MagicMock import pytest import yaml @@ -44,11 +45,14 @@ def ws(): } ), '/Users/foo/.ucx/state.json': json.dumps({'resources': {'jobs': {'assessment': '123'}}}), + "/Users/foo/.ucx/azure_storage_account_info.csv": "prefix,client_id,principal,privilege,directory_id\ntest,test,test,test,test" } - def download(path: str) -> io.StringIO: + def download(path: str) -> io.StringIO | io.BytesIO: if path not in state: raise NotFound(path) + if ".csv" in path: + return io.BytesIO(state[path].encode('utf-8')) return io.StringIO(state[path]) workspace_client = create_autospec(WorkspaceClient) @@ -310,11 +314,8 @@ def test_save_storage_and_principal_gcp(ws, caplog): def test_migrate_azure_service_principals(ws): ws.config.is_azure = True - with ( - patch("databricks.labs.blueprint.tui.Prompts.confirm", return_value=True), - patch("databricks.labs.blueprint.installation.Installation.load"), - patch("databricks.labs.blueprint.installation.Installation.save"), - ): + ws.workspace.upload.return_value = "test" + with patch("databricks.labs.blueprint.tui.Prompts.confirm", return_value=True): migrate_azure_service_principals(ws) ws.storage_credentials.list.assert_called() From 718ca75fa452fbfea635fcc5bc60b80e1a900fa7 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Tue, 20 Feb 2024 11:25:09 -0800 Subject: [PATCH 55/76] fix failed tests after rebase --- tests/integration/azure/test_credentials.py | 10 +++------- tests/unit/azure/test_access.py | 6 ++++-- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/tests/integration/azure/test_credentials.py b/tests/integration/azure/test_credentials.py index 16860e2270..335a8cb9e9 100644 --- a/tests/integration/azure/test_credentials.py +++ b/tests/integration/azure/test_credentials.py @@ -7,7 +7,6 @@ from databricks.labs.blueprint.tui import MockPrompts from databricks.labs.ucx.assessment.azure import AzureServicePrincipalInfo -from databricks.labs.ucx.assessment.crawlers import SECRET_PATTERN from databricks.labs.ucx.azure.access import StoragePermissionMapping from databricks.labs.ucx.azure.credentials import StorageCredentialValidationResult from databricks.labs.ucx.azure.resources import AzureResources @@ -42,12 +41,9 @@ def extract_test_info(ws, debug_env, make_random): end_point = spark_conf.get("fs.azure.account.oauth2.client.endpoint") directory_id = end_point.split("/")[3] - secret_matched = re.search(SECRET_PATTERN, spark_conf.get("fs.azure.account.oauth2.client.secret")) - if secret_matched: - secret_scope, secret_key = ( - secret_matched.group(1).split("/")[1], - secret_matched.group(1).split("/")[2], - ) + secret_matched = re.findall(r"{{secrets\/(.*)\/(.*)}}", spark_conf.get("fs.azure.account.oauth2.client.secret")) + secret_scope = secret_matched[0][0] + secret_key = secret_matched[0][1] assert secret_scope is not None assert secret_key is not None diff --git a/tests/unit/azure/test_access.py b/tests/unit/azure/test_access.py index 5a732e9009..04b84d0e72 100644 --- a/tests/unit/azure/test_access.py +++ b/tests/unit/azure/test_access.py @@ -68,13 +68,13 @@ def test_save_spn_permissions_valid_azure_storage_account(): AzureRoleAssignment( resource=AzureResource(f'{containers}/container1'), scope=AzureResource(f'{containers}/container1'), - principal=Principal('a', 'b', 'c'), + principal=Principal('a', 'b', 'c', '0000-0000'), role_name='Storage Blob Data Contributor', ), AzureRoleAssignment( resource=AzureResource(f'{storage_accounts}/storage1'), scope=AzureResource(f'{storage_accounts}/storage1'), - principal=Principal('d', 'e', 'f'), + principal=Principal('d', 'e', 'f', '0000-0000'), role_name='Button Clicker', ), ] @@ -88,12 +88,14 @@ def test_save_spn_permissions_valid_azure_storage_account(): 'prefix': 'abfss://container1@storage1.dfs.core.windows.net/', 'principal': 'b', 'privilege': 'WRITE_FILES', + 'directory_id': '0000-0000', }, { 'client_id': 'a', 'prefix': 'abfss://container2@storage1.dfs.core.windows.net/', 'principal': 'b', 'privilege': 'WRITE_FILES', + 'directory_id': '0000-0000', }, ], ) From dba32af3b97b23d181ecf2fa6fecd75f9744c57e Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Tue, 20 Feb 2024 11:32:53 -0800 Subject: [PATCH 56/76] apply updates from make fmt. Correct AzureResourcePermissions.load to use _installation.load instead of save --- src/databricks/labs/ucx/assessment/azure.py | 1 - src/databricks/labs/ucx/azure/access.py | 4 ---- tests/unit/test_cli.py | 6 ++---- 3 files changed, 2 insertions(+), 9 deletions(-) diff --git a/src/databricks/labs/ucx/assessment/azure.py b/src/databricks/labs/ucx/assessment/azure.py index f6fc8eeba1..cee554d58a 100644 --- a/src/databricks/labs/ucx/assessment/azure.py +++ b/src/databricks/labs/ucx/assessment/azure.py @@ -170,4 +170,3 @@ def _get_azure_spn_from_config(self, config: dict) -> set[AzureServicePrincipalI ) ) return set_service_principals - diff --git a/src/databricks/labs/ucx/azure/access.py b/src/databricks/labs/ucx/azure/access.py index 113f58ede0..78a8f7f917 100644 --- a/src/databricks/labs/ucx/azure/access.py +++ b/src/databricks/labs/ucx/azure/access.py @@ -103,7 +103,3 @@ def _get_storage_accounts(self) -> list[str]: if storage_acct not in storage_accounts: storage_accounts.append(storage_acct) return storage_accounts - - def load(self) -> list[StoragePermissionMapping]: - """Load StoragePermissionMapping info from azure_storage_account_info.csv""" - return self._installation.load(list[StoragePermissionMapping], filename=self._filename) diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index cd10aa563e..d1d1d62ee0 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -1,8 +1,7 @@ -import csv import io import json import subprocess -from unittest.mock import create_autospec, patch, MagicMock +from unittest.mock import create_autospec, patch import pytest import yaml @@ -45,7 +44,7 @@ def ws(): } ), '/Users/foo/.ucx/state.json': json.dumps({'resources': {'jobs': {'assessment': '123'}}}), - "/Users/foo/.ucx/azure_storage_account_info.csv": "prefix,client_id,principal,privilege,directory_id\ntest,test,test,test,test" + "/Users/foo/.ucx/azure_storage_account_info.csv": "prefix,client_id,principal,privilege,directory_id\ntest,test,test,test,test", } def download(path: str) -> io.StringIO | io.BytesIO: @@ -318,4 +317,3 @@ def test_migrate_azure_service_principals(ws): with patch("databricks.labs.blueprint.tui.Prompts.confirm", return_value=True): migrate_azure_service_principals(ws) ws.storage_credentials.list.assert_called() - From 958c37a623a2531c8fd86061ca3f65568cfe3bc9 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Tue, 20 Feb 2024 12:21:40 -0800 Subject: [PATCH 57/76] update variable name sp to spn based on pylint suggestion --- src/databricks/labs/ucx/azure/credentials.py | 46 ++++++++++---------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/src/databricks/labs/ucx/azure/credentials.py b/src/databricks/labs/ucx/azure/credentials.py index ab8472cd5d..1313c9d7c5 100644 --- a/src/databricks/labs/ucx/azure/credentials.py +++ b/src/databricks/labs/ucx/azure/credentials.py @@ -82,19 +82,19 @@ def list_storage_credentials(self) -> set[str]: logger.info(f"Found {len(application_ids)} distinct service principals already used in UC storage credentials") return application_ids - def create_storage_credential(self, sp: ServicePrincipalMigrationInfo) -> StorageCredentialInfo: + def create_storage_credential(self, spn: ServicePrincipalMigrationInfo) -> StorageCredentialInfo: # prepare the storage credential properties - name = sp.permission_mapping.principal + name = spn.permission_mapping.principal service_principal = AzureServicePrincipal( - directory_id=sp.permission_mapping.directory_id, - application_id=sp.permission_mapping.client_id, - client_secret=sp.client_secret, + spn.permission_mapping.directory_id, + spn.permission_mapping.client_id, + spn.client_secret, ) comment = ( - f"Created by UCX during migration to UC using Azure Service Principal: {sp.permission_mapping.principal}" + f"Created by UCX during migration to UC using Azure Service Principal: {spn.permission_mapping.principal}" ) read_only = False - if sp.permission_mapping.privilege == Privilege.READ_FILES.value: + if spn.permission_mapping.privilege == Privilege.READ_FILES.value: read_only = True # create the storage credential return self._ws.storage_credentials.create( @@ -102,21 +102,21 @@ def create_storage_credential(self, sp: ServicePrincipalMigrationInfo) -> Storag ) def validate_storage_credential( - self, storage_credential: StorageCredentialInfo, sp: ServicePrincipalMigrationInfo + self, storage_credential: StorageCredentialInfo, spn: ServicePrincipalMigrationInfo ) -> StorageCredentialValidationResult: read_only = False - if sp.permission_mapping.privilege == Privilege.READ_FILES.value: + if spn.permission_mapping.privilege == Privilege.READ_FILES.value: read_only = True # storage_credential validation creates a temp UC external location, which cannot overlap with # existing UC external locations. So add a sub folder to the validation location just in case try: validation = self._ws.storage_credentials.validate( storage_credential_name=storage_credential.name, - url=sp.permission_mapping.prefix, + url=spn.permission_mapping.prefix, read_only=read_only, ) return StorageCredentialValidationResult.from_validation( - storage_credential, validation, sp.permission_mapping.prefix + storage_credential, validation, spn.permission_mapping.prefix ) except InvalidParameterValue: logger.warning( @@ -132,7 +132,7 @@ def validate_storage_credential( ) ], ), - sp.permission_mapping.prefix, + spn.permission_mapping.prefix, ) @@ -236,19 +236,19 @@ def _fetch_client_secret(self, sp_list: list[StoragePermissionMapping]) -> list[ # update the list of ServicePrincipalMigrationInfo if client_secret is found sp_list_with_secret = [] - for sp in sp_list: - if sp.client_id in sp_info_with_client_secret: - sp_list_with_secret.append(ServicePrincipalMigrationInfo(sp, sp_info_with_client_secret[sp.client_id])) + for spn in sp_list: + if spn.client_id in sp_info_with_client_secret: + sp_list_with_secret.append(ServicePrincipalMigrationInfo(spn, sp_info_with_client_secret[spn.client_id])) return sp_list_with_secret def _print_action_plan(self, sp_list: list[StoragePermissionMapping]): # print action plan to console for customer to review. - for sp in sp_list: + for spn in sp_list: logger.info( - f"Service Principal name: {sp.principal}, " - f"application_id: {sp.client_id}, " - f"privilege {sp.privilege} " - f"on location {sp.prefix}" + f"Service Principal name: {spn.principal}, " + f"application_id: {spn.client_id}, " + f"privilege {spn.privilege} " + f"on location {spn.prefix}" ) def _generate_migration_list(self) -> list[ServicePrincipalMigrationInfo]: @@ -285,10 +285,10 @@ def run(self, prompts: Prompts) -> list[StorageCredentialValidationResult]: return [] execution_result = [] - for sp in sp_list_with_secret: - storage_credential = self._storage_credential_manager.create_storage_credential(sp) + for spn in sp_list_with_secret: + storage_credential = self._storage_credential_manager.create_storage_credential(spn) execution_result.append( - self._storage_credential_manager.validate_storage_credential(storage_credential, sp) + self._storage_credential_manager.validate_storage_credential(storage_credential, spn) ) if execution_result: From 0a7413ca332c0ad9165ba684617add34abcbba74 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Tue, 20 Feb 2024 12:45:12 -0800 Subject: [PATCH 58/76] apply pylint recommendations on tests/unit/azure/test_credentials.py --- src/databricks/labs/ucx/azure/credentials.py | 4 +++- tests/unit/azure/test_credentials.py | 25 ++++++++++---------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src/databricks/labs/ucx/azure/credentials.py b/src/databricks/labs/ucx/azure/credentials.py index 1313c9d7c5..b9052023f4 100644 --- a/src/databricks/labs/ucx/azure/credentials.py +++ b/src/databricks/labs/ucx/azure/credentials.py @@ -238,7 +238,9 @@ def _fetch_client_secret(self, sp_list: list[StoragePermissionMapping]) -> list[ sp_list_with_secret = [] for spn in sp_list: if spn.client_id in sp_info_with_client_secret: - sp_list_with_secret.append(ServicePrincipalMigrationInfo(spn, sp_info_with_client_secret[spn.client_id])) + sp_list_with_secret.append( + ServicePrincipalMigrationInfo(spn, sp_info_with_client_secret[spn.client_id]) + ) return sp_list_with_secret def _print_action_plan(self, sp_list: list[StoragePermissionMapping]): diff --git a/tests/unit/azure/test_credentials.py b/tests/unit/azure/test_credentials.py index 04e06eafc1..16328827b1 100644 --- a/tests/unit/azure/test_credentials.py +++ b/tests/unit/azure/test_credentials.py @@ -55,11 +55,11 @@ def download(path: str) -> io.StringIO: raise NotFound(path) return io.StringIO(state[path]) - ws = create_autospec(WorkspaceClient) - ws.config.host = 'https://localhost' - ws.current_user.me().user_name = "foo" - ws.workspace.download = download - return ws + ws_mock = create_autospec(WorkspaceClient) + ws_mock.config.host = 'https://localhost' + ws_mock.current_user.me().user_name = "foo" + ws_mock.workspace.download = download + return ws_mock def side_effect_create_storage_credential(name, azure_service_principal, comment, read_only): @@ -68,15 +68,14 @@ def side_effect_create_storage_credential(name, azure_service_principal, comment ) -def side_effect_validate_storage_credential(storage_credential_name, url, read_only): +def side_effect_validate_storage_credential(storage_credential_name, url, read_only): # pylint: disable=unused-argument if "overlap" in storage_credential_name: raise InvalidParameterValue if read_only: response = {"isDir": True, "results": [{"message": "", "operation": "READ", "result": "PASS"}]} return ValidateStorageCredentialResponse.from_dict(response) - else: - response = {"isDir": True, "results": [{"message": "", "operation": "WRITE", "result": "PASS"}]} - return ValidateStorageCredentialResponse.from_dict(response) + response = {"isDir": True, "results": [{"message": "", "operation": "WRITE", "result": "PASS"}]} + return ValidateStorageCredentialResponse.from_dict(response) @pytest.fixture @@ -225,8 +224,8 @@ def test_validate_storage_credentials(credential_manager): def sp_migration(ws, credential_manager): ws.secrets.get_secret.return_value = GetSecretResponse(value="aGVsbG8gd29ybGQ=") - rp = create_autospec(AzureResourcePermissions) - rp.load.return_value = [ + arp = create_autospec(AzureResourcePermissions) + arp.load.return_value = [ StoragePermissionMapping( prefix="prefix1", client_id="app_secret1", @@ -265,7 +264,7 @@ def sp_migration(ws, credential_manager): AzureServicePrincipalInfo("app_secret4", "", "", "tenant_id_2", "storage1"), ] - return ServicePrincipalMigration(MockInstallation(), ws, rp, sp_crawler, credential_manager) + return ServicePrincipalMigration(MockInstallation(), ws, arp, sp_crawler, credential_manager) def test_for_cli_not_azure(caplog, ws): @@ -329,7 +328,7 @@ def test_print_action_plan(caplog, ws, sp_migration): sp_migration.run(prompts) - log_pattern = r"Service Principal name: .*" "application_id: .*" "privilege .*" "on location .*" + log_pattern = r"Service Principal name: .* application_id: .* privilege .* on location .*" for msg in caplog.messages: if re.search(log_pattern, msg): assert True From 39a99bc75b0c33341910c10434171942f8b017c2 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Tue, 20 Feb 2024 13:58:42 -0800 Subject: [PATCH 59/76] apply pylint recommendations for azure service principal to storage credential migration tests --- src/databricks/labs/ucx/mixins/fixtures.py | 2 +- tests/integration/azure/test_credentials.py | 49 +++++++++++---------- tests/integration/conftest.py | 14 +----- tests/unit/azure/test_credentials.py | 14 +----- 4 files changed, 29 insertions(+), 50 deletions(-) diff --git a/src/databricks/labs/ucx/mixins/fixtures.py b/src/databricks/labs/ucx/mixins/fixtures.py index 740a45b9b2..df870c1360 100644 --- a/src/databricks/labs/ucx/mixins/fixtures.py +++ b/src/databricks/labs/ucx/mixins/fixtures.py @@ -1077,7 +1077,7 @@ def remove(query: Query): @pytest.fixture -def make_storage_credential_from_spn(ws): +def make_storage_credential_spn(ws): def create( *, credential_name: str, application_id: str, client_secret: str, directory_id: str, read_only=False ) -> StorageCredentialInfo: diff --git a/tests/integration/azure/test_credentials.py b/tests/integration/azure/test_credentials.py index 335a8cb9e9..40b478ca05 100644 --- a/tests/integration/azure/test_credentials.py +++ b/tests/integration/azure/test_credentials.py @@ -56,7 +56,7 @@ def extract_test_info(ws, debug_env, make_random): @pytest.fixture def run_migration(ws, sql_backend): def inner( - test_info: MigrationTestInfo, credentials: list[str], read_only=False + test_info: MigrationTestInfo, credentials: set[str], read_only=False ) -> list[StorageCredentialValidationResult]: installation = Installation(ws, 'ucx') azurerm = AzureResources(ws) @@ -94,9 +94,9 @@ def inner( return inner -def test_spn_migration_existed_storage_credential(extract_test_info, make_storage_credential_from_spn, run_migration): +def test_spn_migration_existed_storage_credential(extract_test_info, make_storage_credential_spn, run_migration): # create a storage credential for this test - make_storage_credential_from_spn( + make_storage_credential_spn( credential_name=extract_test_info.credential_name, application_id=extract_test_info.application_id, client_secret=extract_test_info.client_secret, @@ -104,7 +104,7 @@ def test_spn_migration_existed_storage_credential(extract_test_info, make_storag ) # test that the spn migration will be skipped due to above storage credential is existed - migration_result = run_migration(extract_test_info, [extract_test_info.credential_name]) + migration_result = run_migration(extract_test_info, {extract_test_info.credential_name}) # assert no spn migrated since migration_result will be empty assert not migration_result @@ -113,27 +113,28 @@ def test_spn_migration_existed_storage_credential(extract_test_info, make_storag @pytest.mark.parametrize("read_only", [False, True]) def test_spn_migration(ws, extract_test_info, run_migration, read_only): try: - migration_results = run_migration(extract_test_info, ["lets_migrate_the_spn"], read_only) + migration_results = run_migration(extract_test_info, {"lets_migrate_the_spn"}, read_only) storage_credential = ws.storage_credentials.get(extract_test_info.credential_name) - assert storage_credential is not None - assert storage_credential.read_only is read_only - - # assert the storage credential validation results - for res in migration_results[0].results: - if res.operation is None: - # TODO: file a ticket to SDK team, PATH_EXISTS and HIERARCHICAL_NAMESPACE_ENABLED - # should be added to the validation operations. They are None right now. - # Once it's fixed, the None check here can be removed - continue - if read_only: - if res.operation.value in ("WRITE", "DELETE"): - # We only assert that write validation are not performed for read only storage credential here. - # In real life, the READ validation for read only storage credential may fail if there is no file, - # but that is fine, as the storage credential is created, and we just cannot validate it until it's really used. - assert False, "WRITE operation should not be checked for read-only storage credential" - if not read_only: - if res.result.value == "FAIL": - assert False, f"{res.operation.value} operation is failed while validating storage credential" finally: ws.storage_credentials.delete(extract_test_info.credential_name, force=True) + + assert storage_credential is not None + assert storage_credential.read_only is read_only + + # assert the storage credential validation results + for res in migration_results[0].results: + if res.operation is None: + # TODO: file a ticket to SDK team, PATH_EXISTS and HIERARCHICAL_NAMESPACE_ENABLED + # should be added to the validation operations. They are None right now. + # Once it's fixed, the None check here can be removed + continue + if read_only: + if res.operation.value in {"WRITE", "DELETE"}: + # We only assert that write validation are not performed for read only storage credential here. + # In real life, the READ validation for read only storage credential may fail if there is no file, + # but that is fine, as the storage credential is created, and we just cannot validate it until it's really used. + assert False, "WRITE operation should not be checked for read-only storage credential" + if not read_only: + if res.result.value == "FAIL": + assert False, f"{res.operation.value} operation is failed while validating storage credential" diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 84c7641f00..d6dd138362 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -170,16 +170,6 @@ def save(self, tables: TablesCrawler, workspace_info: WorkspaceInfo) -> str: class StaticServicePrincipalMigration(ServicePrincipalMigration): - def __init__( - self, - installation: Installation, - ws: WorkspaceClient, - azure_resource_permissions: AzureResourcePermissions, - azure_sp_crawler: AzureServicePrincipalCrawler, - storage_credential_manager: StorageCredentialManager, - ): - super().__init__(installation, ws, azure_resource_permissions, azure_sp_crawler, storage_credential_manager) - def save(self, migration_results: list[StorageCredentialValidationResult]) -> str: return "azure_service_principal_migration_result.csv" @@ -187,8 +177,8 @@ def save(self, migration_results: list[StorageCredentialValidationResult]) -> st class StaticStorageCredentialManager(StorageCredentialManager): # During integration test, we only want to list storage_credentials that are created during the test. # So we provide a credential name list so the test can ignore credentials that are not in the list. - def __init__(self, ws: WorkspaceClient, credential_names=[]): - super().__init__(ws) + def __init__(self, ws_client: WorkspaceClient, credential_names: set[str]): + super().__init__(ws_client) self._credential_names = credential_names def list_storage_credentials(self) -> set[str]: diff --git a/tests/unit/azure/test_credentials.py b/tests/unit/azure/test_credentials.py index 16328827b1..6764e850a7 100644 --- a/tests/unit/azure/test_credentials.py +++ b/tests/unit/azure/test_credentials.py @@ -36,19 +36,7 @@ @pytest.fixture def ws(): - state = { - "/Users/foo/.ucx/config.yml": yaml.dump( - { - 'version': 2, - 'inventory_database': 'ucx', - 'warehouse_id': 'test', - 'connect': { - 'host': 'foo', - 'token': 'bar', - }, - } - ), - } + state = {"/Users/foo/.ucx/config.yml": yaml.dump({'version': 2, 'inventory_database': 'ucx'})} def download(path: str) -> io.StringIO: if path not in state: From 7fadc28d050ed1055369f6ce3e9453411c9cc4bf Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Tue, 20 Feb 2024 14:08:59 -0800 Subject: [PATCH 60/76] remove unnecessary keyword areguments in tests for azure service principal migration --- tests/integration/azure/test_credentials.py | 20 ++-- tests/unit/azure/test_credentials.py | 106 +++++++++----------- 2 files changed, 60 insertions(+), 66 deletions(-) diff --git a/tests/integration/azure/test_credentials.py b/tests/integration/azure/test_credentials.py index 40b478ca05..44fd597920 100644 --- a/tests/integration/azure/test_credentials.py +++ b/tests/integration/azure/test_credentials.py @@ -64,22 +64,22 @@ def inner( permission_mappings = [ StoragePermissionMapping( - prefix="abfss://things@labsazurethings.dfs.core.windows.net/avoid_ext_loc_overlap", - client_id=test_info.application_id, - principal=test_info.credential_name, - privilege="READ_FILES" if read_only else "WRITE_FILES", - directory_id=test_info.directory_id, + "abfss://things@labsazurethings.dfs.core.windows.net/avoid_ext_loc_overlap", + test_info.application_id, + test_info.credential_name, + "READ_FILES" if read_only else "WRITE_FILES", + test_info.directory_id, ) ] resource_permissions = StaticResourcePermissions(permission_mappings, installation, ws, azurerm, locations) sp_infos = [ AzureServicePrincipalInfo( - application_id=test_info.application_id, - secret_scope=test_info.secret_scope, - secret_key=test_info.secret_key, - tenant_id="test", - storage_account="test", + test_info.application_id, + test_info.secret_scope, + test_info.secret_key, + "test", + "test", ) ] sp_crawler = StaticServicePrincipalCrawler(sp_infos, ws, sql_backend, "dont_need_a_schema") diff --git a/tests/unit/azure/test_credentials.py b/tests/unit/azure/test_credentials.py index 6764e850a7..160bae418d 100644 --- a/tests/unit/azure/test_credentials.py +++ b/tests/unit/azure/test_credentials.py @@ -69,24 +69,18 @@ def side_effect_validate_storage_credential(storage_credential_name, url, read_o @pytest.fixture def credential_manager(ws): ws.storage_credentials.list.return_value = [ - StorageCredentialInfo(aws_iam_role=AwsIamRole(role_arn="arn:aws:iam::123456789012:role/example-role-name")), + StorageCredentialInfo(aws_iam_role=AwsIamRole("arn:aws:iam::123456789012:role/example-role-name")), StorageCredentialInfo( azure_managed_identity=AzureManagedIdentity("/subscriptions/.../providers/Microsoft.Databricks/...") ), StorageCredentialInfo( azure_service_principal=AzureServicePrincipal( - application_id="b6420590-5e1c-4426-8950-a94cbe9b6115", - directory_id="62e43d7d-df53-4c64-86ed-c2c1a3ac60c3", - client_secret="secret", - ) - ), - StorageCredentialInfo( - azure_service_principal=AzureServicePrincipal( - application_id="app_secret2", - directory_id="directory_id_1", - client_secret="secret", + "62e43d7d-df53-4c64-86ed-c2c1a3ac60c3", + "b6420590-5e1c-4426-8950-a94cbe9b6115", + "secret", ) ), + StorageCredentialInfo(azure_service_principal=AzureServicePrincipal("directory_id_1", "app_secret2", "secret")), ] ws.storage_credentials.create.side_effect = side_effect_create_storage_credential @@ -102,21 +96,21 @@ def test_list_storage_credentials(credential_manager): def test_create_storage_credentials(credential_manager): sp_1 = ServicePrincipalMigrationInfo( StoragePermissionMapping( - prefix="prefix1", - client_id="app_secret1", - principal="principal_write", - privilege="WRITE_FILES", - directory_id="directory_id_1", + "prefix1", + "app_secret1", + "principal_write", + "WRITE_FILES", + "directory_id_1", ), "test", ) sp_2 = ServicePrincipalMigrationInfo( StoragePermissionMapping( - prefix="prefix2", - client_id="app_secret2", - principal="principal_read", - privilege="READ_FILES", - directory_id="directory_id_1", + "prefix2", + "app_secret2", + "principal_read", + "READ_FILES", + "directory_id_1", ), "test", ) @@ -133,11 +127,11 @@ def test_create_storage_credentials(credential_manager): def test_validate_storage_credentials(credential_manager): sp_1 = ServicePrincipalMigrationInfo( StoragePermissionMapping( - prefix="prefix1", - client_id="app_secret1", - principal="principal_1", - privilege="WRITE_FILES", - directory_id="directory_id_1", + "prefix1", + "app_secret1", + "principal_1", + "WRITE_FILES", + "directory_id_1", ), "test", ) @@ -151,11 +145,11 @@ def test_validate_storage_credentials(credential_manager): sp_2 = ServicePrincipalMigrationInfo( StoragePermissionMapping( - prefix="prefix2", - client_id="app_secret2", - principal="principal_read", - privilege="READ_FILES", - directory_id="directory_id_1", + "prefix2", + "app_secret2", + "principal_read", + "READ_FILES", + "directory_id_1", ), "test", ) @@ -169,11 +163,11 @@ def test_validate_storage_credentials(credential_manager): sp_3 = ServicePrincipalMigrationInfo( StoragePermissionMapping( - prefix="overlap_with_external_location", - client_id="app_secret4", - principal="principal_overlap", - privilege="WRITE_FILES", - directory_id="directory_id_2", + "overlap_with_external_location", + "app_secret4", + "principal_overlap", + "WRITE_FILES", + "directory_id_2", ), "test", ) @@ -215,32 +209,32 @@ def sp_migration(ws, credential_manager): arp = create_autospec(AzureResourcePermissions) arp.load.return_value = [ StoragePermissionMapping( - prefix="prefix1", - client_id="app_secret1", - principal="principal_1", - privilege="WRITE_FILES", - directory_id="directory_id_1", + "prefix1", + "app_secret1", + "principal_1", + "WRITE_FILES", + "directory_id_1", ), StoragePermissionMapping( - prefix="prefix2", - client_id="app_secret2", - principal="principal_read", - privilege="READ_FILES", - directory_id="directory_id_1", + "prefix2", + "app_secret2", + "principal_read", + "READ_FILES", + "directory_id_1", ), StoragePermissionMapping( - prefix="prefix3", - client_id="app_secret3", - principal="principal_write", - privilege="WRITE_FILES", - directory_id="directory_id_2", + "prefix3", + "app_secret3", + "principal_write", + "WRITE_FILES", + "directory_id_2", ), StoragePermissionMapping( - prefix="overlap_with_external_location", - client_id="app_secret4", - principal="principal_overlap", - privilege="WRITE_FILES", - directory_id="directory_id_2", + "overlap_with_external_location", + "app_secret4", + "principal_overlap", + "WRITE_FILES", + "directory_id_2", ), ] From 89bd9f6b1e89eafec529add1addd187baa2c137f Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Tue, 20 Feb 2024 14:15:29 -0800 Subject: [PATCH 61/76] flip an assertion order in a unit test for readability --- tests/unit/azure/test_credentials.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/azure/test_credentials.py b/tests/unit/azure/test_credentials.py index 160bae418d..8a72176591 100644 --- a/tests/unit/azure/test_credentials.py +++ b/tests/unit/azure/test_credentials.py @@ -90,7 +90,7 @@ def credential_manager(ws): def test_list_storage_credentials(credential_manager): - assert {"b6420590-5e1c-4426-8950-a94cbe9b6115", "app_secret2"} == credential_manager.list_storage_credentials() + assert credential_manager.list_storage_credentials() == {"b6420590-5e1c-4426-8950-a94cbe9b6115", "app_secret2"} def test_create_storage_credentials(credential_manager): From f396d22de0ff2b218bca8b4cd1524b0a84e9e083 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Tue, 20 Feb 2024 14:57:20 -0800 Subject: [PATCH 62/76] Apply SecretsMixin from PR #950 --- src/databricks/labs/ucx/assessment/secrets.py | 5 +- src/databricks/labs/ucx/azure/credentials.py | 49 ++++--------------- tests/unit/azure/test_credentials.py | 30 +++++------- 3 files changed, 27 insertions(+), 57 deletions(-) diff --git a/src/databricks/labs/ucx/assessment/secrets.py b/src/databricks/labs/ucx/assessment/secrets.py index 20fed01bb1..60cb37c7c7 100644 --- a/src/databricks/labs/ucx/assessment/secrets.py +++ b/src/databricks/labs/ucx/assessment/secrets.py @@ -22,7 +22,10 @@ def _get_secret_if_exists(self, secret_scope, secret_key) -> str | None: assert secret.value is not None return base64.b64decode(secret.value).decode("utf-8") except NotFound: - logger.warning(f'removed on the backend: {secret_scope}{secret_key}') + logger.warning(f'removed on the backend: {secret_scope}/{secret_key}') + return None + except UnicodeDecodeError: + logger.info(f"Secret {secret_scope}/{secret_key} has Base64 bytes that cannot be decoded to utf-8 string.") return None def _get_value_from_config_key(self, config: dict, key: str, get_secret: bool = True) -> str | None: diff --git a/src/databricks/labs/ucx/azure/credentials.py b/src/databricks/labs/ucx/azure/credentials.py index b9052023f4..8e1bf31417 100644 --- a/src/databricks/labs/ucx/azure/credentials.py +++ b/src/databricks/labs/ucx/azure/credentials.py @@ -1,11 +1,9 @@ -import base64 import logging from dataclasses import dataclass from databricks.labs.blueprint.installation import Installation from databricks.labs.blueprint.tui import Prompts from databricks.sdk import WorkspaceClient -from databricks.sdk.errors import InternalError, ResourceDoesNotExist from databricks.sdk.errors.platform import InvalidParameterValue from databricks.sdk.service.catalog import ( AzureServicePrincipal, @@ -16,6 +14,7 @@ ) from databricks.labs.ucx.assessment.azure import AzureServicePrincipalCrawler +from databricks.labs.ucx.assessment.secrets import SecretsMixin from databricks.labs.ucx.azure.access import ( AzureResourcePermissions, StoragePermissionMapping, @@ -136,7 +135,7 @@ def validate_storage_credential( ) -class ServicePrincipalMigration: +class ServicePrincipalMigration(SecretsMixin): def __init__( self, @@ -179,39 +178,6 @@ def for_cli(cls, ws: WorkspaceClient, prompts: Prompts, product='ucx'): return cls(installation, ws, resource_permissions, sp_crawler, storage_credential_manager) - def _read_databricks_secret(self, scope: str, key: str, application_id: str) -> str | None: - try: - secret_response = self._ws.secrets.get_secret(scope, key) - except ResourceDoesNotExist: - logger.info( - f"Secret {scope}.{key} does not exists. " - f"Cannot fetch the service principal client_secret for {application_id}. " - f"Will not reuse this client_secret" - ) - return None - except InternalError: - logger.info( - f"InternalError while reading secret {scope}.{key}. " - f"Cannot fetch the service principal client_secret for {application_id}. " - f"Will not reuse this client_secret. " - f"You may rerun the migration command later to retry this service principal" - ) - return None - - # decode the bytes string from GetSecretResponse to utf-8 string - # TODO: handle different encoding if we have feedback from the customer - try: - if secret_response.value is None: - return None - return base64.b64decode(secret_response.value).decode("utf-8") - except UnicodeDecodeError: - logger.info( - f"Secret {scope}.{key} has Base64 bytes that cannot be decoded to utf-8 string . " - f"Cannot fetch the service principal client_secret for {application_id}. " - f"Will not reuse this client_secret" - ) - return None - def _fetch_client_secret(self, sp_list: list[StoragePermissionMapping]) -> list[ServicePrincipalMigrationInfo]: # check AzureServicePrincipalInfo from AzureServicePrincipalCrawler, if AzureServicePrincipalInfo # has secret_scope and secret_key not empty, fetch the client_secret and put it to the client_secret field @@ -228,11 +194,16 @@ def _fetch_client_secret(self, sp_list: list[StoragePermissionMapping]) -> list[ continue if not sp_info.secret_key: continue - secret_value = self._read_databricks_secret( - sp_info.secret_scope, sp_info.secret_key, sp_info.application_id - ) + + secret_value = self._get_secret_if_exists(sp_info.secret_scope, sp_info.secret_key) + if secret_value: sp_info_with_client_secret[sp_info.application_id] = secret_value + else: + logger.info( + f"Cannot fetch the service principal client_secret for {sp_info.application_id}. " + f"This service principal will be skipped for migration" + ) # update the list of ServicePrincipalMigrationInfo if client_secret is found sp_list_with_secret = [] diff --git a/tests/unit/azure/test_credentials.py b/tests/unit/azure/test_credentials.py index 8a72176591..d4d47d9322 100644 --- a/tests/unit/azure/test_credentials.py +++ b/tests/unit/azure/test_credentials.py @@ -8,7 +8,7 @@ from databricks.labs.blueprint.installation import MockInstallation from databricks.labs.blueprint.tui import MockPrompts from databricks.sdk import WorkspaceClient -from databricks.sdk.errors import InternalError, NotFound, ResourceDoesNotExist +from databricks.sdk.errors import NotFound, ResourceDoesNotExist from databricks.sdk.errors.platform import InvalidParameterValue from databricks.sdk.service.catalog import ( AwsIamRole, @@ -272,11 +272,7 @@ def test_for_cli(ws): @pytest.mark.parametrize( "secret_bytes_value, num_migrated", - [ - (GetSecretResponse(value="aGVsbG8gd29ybGQ="), 1), - (GetSecretResponse(value="T2zhLCBNdW5kbyE="), 0), - (GetSecretResponse(value=None), 0), - ], + [(GetSecretResponse(value="aGVsbG8gd29ybGQ="), 1), (GetSecretResponse(value="T2zhLCBNdW5kbyE="), 0)], ) def test_read_secret_value_decode(ws, sp_migration, secret_bytes_value, num_migrated): ws.secrets.get_secret.return_value = secret_bytes_value @@ -285,21 +281,21 @@ def test_read_secret_value_decode(ws, sp_migration, secret_bytes_value, num_migr assert len(sp_migration.run(prompts)) == num_migrated -@pytest.mark.parametrize( - "exception, log_pattern, num_migrated", - [ - (ResourceDoesNotExist(), r"Secret.* does not exists", 0), - (InternalError(), r"InternalError while reading secret .*", 0), - ], -) -def test_read_secret_read_exception(caplog, ws, sp_migration, exception, log_pattern, num_migrated): +def test_read_secret_value_none(ws, sp_migration): + ws.secrets.get_secret.return_value = GetSecretResponse(value=None) + prompts = MockPrompts({"Above Azure Service Principals will be migrated to UC storage credentials*": "Yes"}) + with pytest.raises(AssertionError): + sp_migration.run(prompts) + + +def test_read_secret_read_exception(caplog, ws, sp_migration): caplog.set_level(logging.INFO) - ws.secrets.get_secret.side_effect = exception + ws.secrets.get_secret.side_effect = ResourceDoesNotExist() prompts = MockPrompts({"Above Azure Service Principals will be migrated to UC storage credentials*": "Yes"}) - assert len(sp_migration.run(prompts)) == num_migrated - assert re.search(log_pattern, caplog.text) + assert len(sp_migration.run(prompts)) == 0 + assert re.search(r"removed on the backend: .*", caplog.text) def test_print_action_plan(caplog, ws, sp_migration): From ced30f184cd5d43693d41385414d2d911bb180e7 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Wed, 21 Feb 2024 14:29:26 -0800 Subject: [PATCH 63/76] add auth type to unit test --- tests/unit/azure/test_credentials.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/azure/test_credentials.py b/tests/unit/azure/test_credentials.py index d4d47d9322..c911bb58a0 100644 --- a/tests/unit/azure/test_credentials.py +++ b/tests/unit/azure/test_credentials.py @@ -265,6 +265,7 @@ def test_for_cli_not_prompts(ws): def test_for_cli(ws): ws.config.is_azure = True + ws.config.auth_type = "azure-cli" prompts = MockPrompts({"Have you reviewed the azure_storage_account_info.csv *": "Yes"}) assert isinstance(ServicePrincipalMigration.for_cli(ws, prompts), ServicePrincipalMigration) From 7074827a01bdd85b0d2182e240abf54644f1de7a Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Wed, 21 Feb 2024 15:10:32 -0800 Subject: [PATCH 64/76] add connect field to config.yml for unit test azure/test_credentials.py --- tests/unit/azure/test_credentials.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/unit/azure/test_credentials.py b/tests/unit/azure/test_credentials.py index c911bb58a0..982d8851e1 100644 --- a/tests/unit/azure/test_credentials.py +++ b/tests/unit/azure/test_credentials.py @@ -36,7 +36,18 @@ @pytest.fixture def ws(): - state = {"/Users/foo/.ucx/config.yml": yaml.dump({'version': 2, 'inventory_database': 'ucx'})} + state = { + "/Users/foo/.ucx/config.yml": yaml.dump( + { + 'version': 2, + 'inventory_database': 'ucx', + 'connect': { + 'host': 'foo', + 'token': 'bar', + }, + } + ) + } def download(path: str) -> io.StringIO: if path not in state: From 791bb4e1de8218c621c83959fd36c387c3d133ed Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Wed, 21 Feb 2024 15:34:37 -0800 Subject: [PATCH 65/76] rename some functions and variables for readability --- src/databricks/labs/ucx/assessment/secrets.py | 2 +- src/databricks/labs/ucx/azure/credentials.py | 12 ++++++------ tests/integration/conftest.py | 2 +- tests/unit/azure/test_credentials.py | 6 +++--- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/databricks/labs/ucx/assessment/secrets.py b/src/databricks/labs/ucx/assessment/secrets.py index 60cb37c7c7..0a87fe82a3 100644 --- a/src/databricks/labs/ucx/assessment/secrets.py +++ b/src/databricks/labs/ucx/assessment/secrets.py @@ -25,7 +25,7 @@ def _get_secret_if_exists(self, secret_scope, secret_key) -> str | None: logger.warning(f'removed on the backend: {secret_scope}/{secret_key}') return None except UnicodeDecodeError: - logger.info(f"Secret {secret_scope}/{secret_key} has Base64 bytes that cannot be decoded to utf-8 string.") + logger.warning(f"Secret {secret_scope}/{secret_key} has Base64 bytes that cannot be decoded to utf-8 string.") return None def _get_value_from_config_key(self, config: dict, key: str, get_secret: bool = True) -> str | None: diff --git a/src/databricks/labs/ucx/azure/credentials.py b/src/databricks/labs/ucx/azure/credentials.py index 8e1bf31417..dc0adee79a 100644 --- a/src/databricks/labs/ucx/azure/credentials.py +++ b/src/databricks/labs/ucx/azure/credentials.py @@ -67,7 +67,7 @@ class StorageCredentialManager: def __init__(self, ws: WorkspaceClient): self._ws = ws - def list_storage_credentials(self) -> set[str]: + def list(self) -> set[str]: # list existed storage credentials that is using service principal, capture the service principal's application_id application_ids = set() @@ -81,7 +81,7 @@ def list_storage_credentials(self) -> set[str]: logger.info(f"Found {len(application_ids)} distinct service principals already used in UC storage credentials") return application_ids - def create_storage_credential(self, spn: ServicePrincipalMigrationInfo) -> StorageCredentialInfo: + def create_with_client_secret(self, spn: ServicePrincipalMigrationInfo) -> StorageCredentialInfo: # prepare the storage credential properties name = spn.permission_mapping.principal service_principal = AzureServicePrincipal( @@ -142,14 +142,14 @@ def __init__( installation: Installation, ws: WorkspaceClient, resource_permissions: AzureResourcePermissions, - sp_crawler: AzureServicePrincipalCrawler, + service_principal_crawler: AzureServicePrincipalCrawler, storage_credential_manager: StorageCredentialManager, ): self._output_file = "azure_service_principal_migration_result.csv" self._installation = installation self._ws = ws self._resource_permissions = resource_permissions - self._sp_crawler = sp_crawler + self._sp_crawler = service_principal_crawler self._storage_credential_manager = storage_credential_manager @classmethod @@ -231,7 +231,7 @@ def _generate_migration_list(self) -> list[ServicePrincipalMigrationInfo]: # load sp list from azure_storage_account_info.csv sp_list = self._resource_permissions.load() # list existed storage credentials - sc_set = self._storage_credential_manager.list_storage_credentials() + sc_set = self._storage_credential_manager.list() # check if the sp is already used in UC storage credential filtered_sp_list = [sp for sp in sp_list if sp.client_id not in sc_set] # fetch sp client_secret if any @@ -259,7 +259,7 @@ def run(self, prompts: Prompts) -> list[StorageCredentialValidationResult]: execution_result = [] for spn in sp_list_with_secret: - storage_credential = self._storage_credential_manager.create_storage_credential(spn) + storage_credential = self._storage_credential_manager.create_with_client_secret(spn) execution_result.append( self._storage_credential_manager.validate_storage_credential(storage_credential, spn) ) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index d6dd138362..85d9ce9d77 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -181,7 +181,7 @@ def __init__(self, ws_client: WorkspaceClient, credential_names: set[str]): super().__init__(ws_client) self._credential_names = credential_names - def list_storage_credentials(self) -> set[str]: + def list(self) -> set[str]: application_ids = set() storage_credentials = self._ws.storage_credentials.list(max_results=0) diff --git a/tests/unit/azure/test_credentials.py b/tests/unit/azure/test_credentials.py index 982d8851e1..67a96e9e01 100644 --- a/tests/unit/azure/test_credentials.py +++ b/tests/unit/azure/test_credentials.py @@ -101,7 +101,7 @@ def credential_manager(ws): def test_list_storage_credentials(credential_manager): - assert credential_manager.list_storage_credentials() == {"b6420590-5e1c-4426-8950-a94cbe9b6115", "app_secret2"} + assert credential_manager.list() == {"b6420590-5e1c-4426-8950-a94cbe9b6115", "app_secret2"} def test_create_storage_credentials(credential_manager): @@ -126,11 +126,11 @@ def test_create_storage_credentials(credential_manager): "test", ) - storage_credential = credential_manager.create_storage_credential(sp_1) + storage_credential = credential_manager.create_with_client_secret(sp_1) assert sp_1.permission_mapping.principal == storage_credential.name assert storage_credential.read_only is False - storage_credential = credential_manager.create_storage_credential(sp_2) + storage_credential = credential_manager.create_with_client_secret(sp_2) assert sp_2.permission_mapping.principal == storage_credential.name assert storage_credential.read_only is True From d59abef025c3f52b7e9b6fb076f74bb0eea9eafa Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Wed, 21 Feb 2024 15:48:26 -0800 Subject: [PATCH 66/76] simplify side_effect_validate_storage_credential --- src/databricks/labs/ucx/assessment/secrets.py | 4 +++- tests/unit/azure/test_credentials.py | 15 +++++++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/databricks/labs/ucx/assessment/secrets.py b/src/databricks/labs/ucx/assessment/secrets.py index 0a87fe82a3..715d6d82aa 100644 --- a/src/databricks/labs/ucx/assessment/secrets.py +++ b/src/databricks/labs/ucx/assessment/secrets.py @@ -25,7 +25,9 @@ def _get_secret_if_exists(self, secret_scope, secret_key) -> str | None: logger.warning(f'removed on the backend: {secret_scope}/{secret_key}') return None except UnicodeDecodeError: - logger.warning(f"Secret {secret_scope}/{secret_key} has Base64 bytes that cannot be decoded to utf-8 string.") + logger.warning( + f"Secret {secret_scope}/{secret_key} has Base64 bytes that cannot be decoded to utf-8 string." + ) return None def _get_value_from_config_key(self, config: dict, key: str, get_secret: bool = True) -> str | None: diff --git a/tests/unit/azure/test_credentials.py b/tests/unit/azure/test_credentials.py index 67a96e9e01..4a60937556 100644 --- a/tests/unit/azure/test_credentials.py +++ b/tests/unit/azure/test_credentials.py @@ -16,6 +16,9 @@ AzureServicePrincipal, StorageCredentialInfo, ValidateStorageCredentialResponse, + ValidationResult, + ValidationResultOperation, + ValidationResultResult, ) from databricks.sdk.service.workspace import GetSecretResponse @@ -71,10 +74,14 @@ def side_effect_validate_storage_credential(storage_credential_name, url, read_o if "overlap" in storage_credential_name: raise InvalidParameterValue if read_only: - response = {"isDir": True, "results": [{"message": "", "operation": "READ", "result": "PASS"}]} - return ValidateStorageCredentialResponse.from_dict(response) - response = {"isDir": True, "results": [{"message": "", "operation": "WRITE", "result": "PASS"}]} - return ValidateStorageCredentialResponse.from_dict(response) + return ValidateStorageCredentialResponse( + is_dir=True, + results=[ValidationResult(operation=ValidationResultOperation.READ, result=ValidationResultResult.PASS)], + ) + return ValidateStorageCredentialResponse( + is_dir=True, + results=[ValidationResult(operation=ValidationResultOperation.WRITE, result=ValidationResultResult.PASS)], + ) @pytest.fixture From a27586372fe5d0e914f032bf1f05cf65916d92da Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Wed, 21 Feb 2024 23:12:34 -0800 Subject: [PATCH 67/76] split test_validate_storage_credentials into 3 test functions for better readability --- tests/unit/azure/test_credentials.py | 71 ++++++++++++++++------------ 1 file changed, 41 insertions(+), 30 deletions(-) diff --git a/tests/unit/azure/test_credentials.py b/tests/unit/azure/test_credentials.py index 4a60937556..6c5377fb2d 100644 --- a/tests/unit/azure/test_credentials.py +++ b/tests/unit/azure/test_credentials.py @@ -143,7 +143,7 @@ def test_create_storage_credentials(credential_manager): def test_validate_storage_credentials(credential_manager): - sp_1 = ServicePrincipalMigrationInfo( + service_principal = ServicePrincipalMigrationInfo( StoragePermissionMapping( "prefix1", "app_secret1", @@ -153,15 +153,27 @@ def test_validate_storage_credentials(credential_manager): ), "test", ) - sc_1 = StorageCredentialInfo( - name=sp_1.permission_mapping.principal, + storage_credential = StorageCredentialInfo( + name=service_principal.permission_mapping.principal, azure_service_principal=AzureServicePrincipal( - sp_1.permission_mapping.directory_id, sp_1.permission_mapping.client_id, sp_1.client_secret + service_principal.permission_mapping.directory_id, + service_principal.permission_mapping.client_id, + service_principal.client_secret, ), read_only=False, ) - sp_2 = ServicePrincipalMigrationInfo( + # validate normal storage credential + validation = credential_manager.validate_storage_credential(storage_credential, service_principal) + assert validation.read_only is False + assert validation.name == storage_credential.name + for result in validation.results: + if result.operation.value == "WRITE": + assert result.result.value == "PASS" + + +def test_validate_read_only_storage_credentials(credential_manager): + service_principal = ServicePrincipalMigrationInfo( StoragePermissionMapping( "prefix2", "app_secret2", @@ -171,15 +183,27 @@ def test_validate_storage_credentials(credential_manager): ), "test", ) - sc_2 = StorageCredentialInfo( - name=sp_2.permission_mapping.principal, + storage_credential = StorageCredentialInfo( + name=service_principal.permission_mapping.principal, azure_service_principal=AzureServicePrincipal( - sp_2.permission_mapping.directory_id, sp_2.permission_mapping.client_id, sp_2.client_secret + service_principal.permission_mapping.directory_id, + service_principal.permission_mapping.client_id, + service_principal.client_secret, ), read_only=True, ) - sp_3 = ServicePrincipalMigrationInfo( + # validate read-only storage credential + validation = credential_manager.validate_storage_credential(storage_credential, service_principal) + assert validation.read_only is True + assert validation.name == storage_credential.name + for result in validation.results: + if result.operation.value == "READ": + assert result.result.value == "PASS" + + +def test_validate_storage_credentials_overlap_location(credential_manager): + service_principal = ServicePrincipalMigrationInfo( StoragePermissionMapping( "overlap_with_external_location", "app_secret4", @@ -189,31 +213,18 @@ def test_validate_storage_credentials(credential_manager): ), "test", ) - sc_3 = StorageCredentialInfo( - name=sp_3.permission_mapping.principal, + storage_credential = StorageCredentialInfo( + name=service_principal.permission_mapping.principal, azure_service_principal=AzureServicePrincipal( - sp_3.permission_mapping.directory_id, sp_3.permission_mapping.client_id, sp_3.client_secret + service_principal.permission_mapping.directory_id, + service_principal.permission_mapping.client_id, + service_principal.client_secret, ), ) - # validate normal storage credential - validation = credential_manager.validate_storage_credential(sc_1, sp_1) - assert validation.read_only is False - assert validation.name == sp_1.permission_mapping.principal - for result in validation.results: - if result.operation.value == "WRITE": - assert result.result.value == "PASS" - - # validate read-only storage credential - validation = credential_manager.validate_storage_credential(sc_2, sp_2) - assert validation.read_only is True - assert validation.name == sp_2.permission_mapping.principal - for result in validation.results: - if result.operation.value == "READ": - assert result.result.value == "PASS" - - # prefix used for validation overlaps with existing external location - validation = credential_manager.validate_storage_credential(sc_3, sp_3) + # prefix used for validation overlaps with existing external location will raise InvalidParameterValue + # assert the exception is handled + validation = credential_manager.validate_storage_credential(storage_credential, service_principal) assert ( validation.results[0].message == "The validation is skipped because an existing external location overlaps with the location used for validation." From b42856c3346ccf3710b58d7314bc56143ba9856c Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Thu, 22 Feb 2024 00:55:50 -0800 Subject: [PATCH 68/76] simplify the storage credential validation result to only keep failure message if any. --- src/databricks/labs/ucx/azure/credentials.py | 53 ++++---- tests/integration/azure/test_credentials.py | 25 ++-- tests/unit/azure/test_credentials.py | 120 +++++++++++-------- 3 files changed, 103 insertions(+), 95 deletions(-) diff --git a/src/databricks/labs/ucx/azure/credentials.py b/src/databricks/labs/ucx/azure/credentials.py index dc0adee79a..6968cc036c 100644 --- a/src/databricks/labs/ucx/azure/credentials.py +++ b/src/databricks/labs/ucx/azure/credentials.py @@ -9,8 +9,7 @@ AzureServicePrincipal, Privilege, StorageCredentialInfo, - ValidateStorageCredentialResponse, - ValidationResult, + ValidationResultResult, ) from databricks.labs.ucx.assessment.azure import AzureServicePrincipalCrawler @@ -39,27 +38,18 @@ class StorageCredentialValidationResult: name: str | None = None application_id: str | None = None directory_id: str | None = None - created_by: str | None = None read_only: bool | None = None validated_on: str | None = None - results: list[ValidationResult] | None = None + failures: list[str] | None = None @classmethod - def from_validation( - cls, storage_credential: StorageCredentialInfo, validation: ValidateStorageCredentialResponse, prefix: str - ): + def from_validation(cls, storage_credential: StorageCredentialInfo, failures: list[str] | None, prefix: str): if storage_credential.azure_service_principal: application_id = storage_credential.azure_service_principal.application_id directory_id = storage_credential.azure_service_principal.directory_id return cls( - storage_credential.name, - application_id, - directory_id, - storage_credential.created_by, - storage_credential.read_only, - prefix, - validation.results, + storage_credential.name, application_id, directory_id, storage_credential.read_only, prefix, failures ) @@ -106,34 +96,41 @@ def validate_storage_credential( read_only = False if spn.permission_mapping.privilege == Privilege.READ_FILES.value: read_only = True - # storage_credential validation creates a temp UC external location, which cannot overlap with - # existing UC external locations. So add a sub folder to the validation location just in case + try: validation = self._ws.storage_credentials.validate( storage_credential_name=storage_credential.name, url=spn.permission_mapping.prefix, read_only=read_only, ) - return StorageCredentialValidationResult.from_validation( - storage_credential, validation, spn.permission_mapping.prefix - ) except InvalidParameterValue: logger.warning( - "There is an existing external location overlaps with the prefix that is mapped to the service principal and used for validating the migrated storage credential. Skip the validation" + "There is an existing external location overlaps with the prefix that is mapped to " + "the service principal and used for validating the migrated storage credential. Skip the validation" ) return StorageCredentialValidationResult.from_validation( storage_credential, - ValidateStorageCredentialResponse( - is_dir=None, - results=[ - ValidationResult( - message="The validation is skipped because an existing external location overlaps with the location used for validation." - ) - ], - ), + [ + "The validation is skipped because an existing external location overlaps with the location used for validation." + ], spn.permission_mapping.prefix, ) + if not validation.results: + return StorageCredentialValidationResult.from_validation( + storage_credential, ["Validation returned none results."], spn.permission_mapping.prefix + ) + + failures = [] + for result in validation.results: + if result.operation is None: + continue + if result.result == ValidationResultResult.FAIL: + failures.append(f"{result.operation.value} validation failed with message: {result.message}") + return StorageCredentialValidationResult.from_validation( + storage_credential, None if not failures else failures, spn.permission_mapping.prefix + ) + class ServicePrincipalMigration(SecretsMixin): diff --git a/tests/integration/azure/test_credentials.py b/tests/integration/azure/test_credentials.py index 44fd597920..6c27c28152 100644 --- a/tests/integration/azure/test_credentials.py +++ b/tests/integration/azure/test_credentials.py @@ -122,19 +122,12 @@ def test_spn_migration(ws, extract_test_info, run_migration, read_only): assert storage_credential is not None assert storage_credential.read_only is read_only - # assert the storage credential validation results - for res in migration_results[0].results: - if res.operation is None: - # TODO: file a ticket to SDK team, PATH_EXISTS and HIERARCHICAL_NAMESPACE_ENABLED - # should be added to the validation operations. They are None right now. - # Once it's fixed, the None check here can be removed - continue - if read_only: - if res.operation.value in {"WRITE", "DELETE"}: - # We only assert that write validation are not performed for read only storage credential here. - # In real life, the READ validation for read only storage credential may fail if there is no file, - # but that is fine, as the storage credential is created, and we just cannot validate it until it's really used. - assert False, "WRITE operation should not be checked for read-only storage credential" - if not read_only: - if res.result.value == "FAIL": - assert False, f"{res.operation.value} operation is failed while validating storage credential" + if read_only: + failures = migration_results[0].failures + # in this test LIST should fail as validation path does not exist + assert failures + match = re.match(r"LIST validation failed with message: .*The specified path does not exist", failures[0]) + assert match is not None, "LIST validation should fail" + else: + # all validation should pass + assert not migration_results[0].failures diff --git a/tests/unit/azure/test_credentials.py b/tests/unit/azure/test_credentials.py index 6c5377fb2d..cf0c0c0a35 100644 --- a/tests/unit/azure/test_credentials.py +++ b/tests/unit/azure/test_credentials.py @@ -73,6 +73,18 @@ def side_effect_create_storage_credential(name, azure_service_principal, comment def side_effect_validate_storage_credential(storage_credential_name, url, read_only): # pylint: disable=unused-argument if "overlap" in storage_credential_name: raise InvalidParameterValue + if "none" in storage_credential_name: + return ValidateStorageCredentialResponse() + if "fail" in storage_credential_name: + return ValidateStorageCredentialResponse( + is_dir=True, + results=[ + ValidationResult( + operation=ValidationResultOperation.LIST, result=ValidationResultResult.FAIL, message="fail" + ), + ValidationResult(operation=None, result=ValidationResultResult.FAIL, message="fail"), + ], + ) if read_only: return ValidateStorageCredentialResponse( is_dir=True, @@ -143,22 +155,15 @@ def test_create_storage_credentials(credential_manager): def test_validate_storage_credentials(credential_manager): - service_principal = ServicePrincipalMigrationInfo( - StoragePermissionMapping( - "prefix1", - "app_secret1", - "principal_1", - "WRITE_FILES", - "directory_id_1", - ), - "test", - ) + service_principal = MagicMock() + service_principal.permission_mapping.privilege = "WRITE_FILES" + storage_credential = StorageCredentialInfo( - name=service_principal.permission_mapping.principal, + name="principal_1", azure_service_principal=AzureServicePrincipal( - service_principal.permission_mapping.directory_id, - service_principal.permission_mapping.client_id, - service_principal.client_secret, + "directory_id_1", + "client_id", + "test", ), read_only=False, ) @@ -167,28 +172,19 @@ def test_validate_storage_credentials(credential_manager): validation = credential_manager.validate_storage_credential(storage_credential, service_principal) assert validation.read_only is False assert validation.name == storage_credential.name - for result in validation.results: - if result.operation.value == "WRITE": - assert result.result.value == "PASS" + assert not validation.failures def test_validate_read_only_storage_credentials(credential_manager): - service_principal = ServicePrincipalMigrationInfo( - StoragePermissionMapping( - "prefix2", - "app_secret2", - "principal_read", - "READ_FILES", - "directory_id_1", - ), - "test", - ) + service_principal = MagicMock() + service_principal.permission_mapping.privilege = "READ_FILES" + storage_credential = StorageCredentialInfo( - name=service_principal.permission_mapping.principal, + name="principal_read", azure_service_principal=AzureServicePrincipal( - service_principal.permission_mapping.directory_id, - service_principal.permission_mapping.client_id, - service_principal.client_secret, + "directory_id_1", + "client_id", + "test", ), read_only=True, ) @@ -197,38 +193,60 @@ def test_validate_read_only_storage_credentials(credential_manager): validation = credential_manager.validate_storage_credential(storage_credential, service_principal) assert validation.read_only is True assert validation.name == storage_credential.name - for result in validation.results: - if result.operation.value == "READ": - assert result.result.value == "PASS" + assert not validation.failures def test_validate_storage_credentials_overlap_location(credential_manager): - service_principal = ServicePrincipalMigrationInfo( - StoragePermissionMapping( - "overlap_with_external_location", - "app_secret4", - "principal_overlap", - "WRITE_FILES", + service_principal = MagicMock() + service_principal.permission_mapping.privilege = "WRITE_FILES" + + storage_credential = StorageCredentialInfo( + name="overlap", + azure_service_principal=AzureServicePrincipal( "directory_id_2", + "client_id", + "test", ), - "test", ) + + # prefix used for validation overlaps with existing external location will raise InvalidParameterValue + # assert InvalidParameterValue is handled + validation = credential_manager.validate_storage_credential(storage_credential, service_principal) + assert validation.failures == [ + "The validation is skipped because an existing external location overlaps with the location used for validation." + ] + + +def test_validate_storage_credentials_non_response(credential_manager): + service_principal = MagicMock() + service_principal.permission_mapping.privilege = "WRITE_FILES" + storage_credential = StorageCredentialInfo( - name=service_principal.permission_mapping.principal, + name="none", azure_service_principal=AzureServicePrincipal( - service_principal.permission_mapping.directory_id, - service_principal.permission_mapping.client_id, - service_principal.client_secret, + "directory_id_2", + "client_id", + "test", ), ) - - # prefix used for validation overlaps with existing external location will raise InvalidParameterValue - # assert the exception is handled validation = credential_manager.validate_storage_credential(storage_credential, service_principal) - assert ( - validation.results[0].message - == "The validation is skipped because an existing external location overlaps with the location used for validation." + assert validation.failures == ["Validation returned none results."] + + +def test_validate_storage_credentials_failed_operation(credential_manager): + service_principal = MagicMock() + service_principal.permission_mapping.privilege = "WRITE_FILES" + + storage_credential = StorageCredentialInfo( + name="fail", + azure_service_principal=AzureServicePrincipal( + "directory_id_2", + "client_id", + "test", + ), ) + validation = credential_manager.validate_storage_credential(storage_credential, service_principal) + assert validation.failures == ["LIST validation failed with message: fail"] @pytest.fixture From f58bd7cbcfc4d2bebd8e4650c402bc0954a4a819 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Thu, 22 Feb 2024 01:22:52 -0800 Subject: [PATCH 69/76] further simplify function names and codes --- src/databricks/labs/ucx/azure/credentials.py | 32 +++++++++----------- tests/unit/azure/test_credentials.py | 10 +++--- 2 files changed, 19 insertions(+), 23 deletions(-) diff --git a/src/databricks/labs/ucx/azure/credentials.py b/src/databricks/labs/ucx/azure/credentials.py index 6968cc036c..9e6883f5d6 100644 --- a/src/databricks/labs/ucx/azure/credentials.py +++ b/src/databricks/labs/ucx/azure/credentials.py @@ -82,36 +82,35 @@ def create_with_client_secret(self, spn: ServicePrincipalMigrationInfo) -> Stora comment = ( f"Created by UCX during migration to UC using Azure Service Principal: {spn.permission_mapping.principal}" ) - read_only = False - if spn.permission_mapping.privilege == Privilege.READ_FILES.value: - read_only = True + # create the storage credential return self._ws.storage_credentials.create( - name, azure_service_principal=service_principal, comment=comment, read_only=read_only + name, + azure_service_principal=service_principal, + comment=comment, + read_only=spn.permission_mapping.privilege == Privilege.READ_FILES.value, ) - def validate_storage_credential( + def validate( self, storage_credential: StorageCredentialInfo, spn: ServicePrincipalMigrationInfo ) -> StorageCredentialValidationResult: - read_only = False - if spn.permission_mapping.privilege == Privilege.READ_FILES.value: - read_only = True - try: validation = self._ws.storage_credentials.validate( storage_credential_name=storage_credential.name, url=spn.permission_mapping.prefix, - read_only=read_only, + read_only=spn.permission_mapping.privilege == Privilege.READ_FILES.value, ) except InvalidParameterValue: logger.warning( "There is an existing external location overlaps with the prefix that is mapped to " - "the service principal and used for validating the migrated storage credential. Skip the validation" + "the service principal and used for validating the migrated storage credential. " + "Skip the validation" ) return StorageCredentialValidationResult.from_validation( storage_credential, [ - "The validation is skipped because an existing external location overlaps with the location used for validation." + "The validation is skipped because an existing external location overlaps " + "with the location used for validation." ], spn.permission_mapping.prefix, ) @@ -257,15 +256,12 @@ def run(self, prompts: Prompts) -> list[StorageCredentialValidationResult]: execution_result = [] for spn in sp_list_with_secret: storage_credential = self._storage_credential_manager.create_with_client_secret(spn) - execution_result.append( - self._storage_credential_manager.validate_storage_credential(storage_credential, spn) - ) + execution_result.append(self._storage_credential_manager.validate(storage_credential, spn)) if execution_result: results_file = self.save(execution_result) - logger.info("Completed migration from Azure Service Principal migrated to UC Storage credentials") - print( - f"Completed migration from Azure Service Principal migrated to UC Storage credentials. " + logger.info( + f"Completed migration from Azure Service Principal migrated to UC Storage credentials" f"Please check {results_file} for validation results" ) else: diff --git a/tests/unit/azure/test_credentials.py b/tests/unit/azure/test_credentials.py index cf0c0c0a35..f02e86f4b2 100644 --- a/tests/unit/azure/test_credentials.py +++ b/tests/unit/azure/test_credentials.py @@ -169,7 +169,7 @@ def test_validate_storage_credentials(credential_manager): ) # validate normal storage credential - validation = credential_manager.validate_storage_credential(storage_credential, service_principal) + validation = credential_manager.validate(storage_credential, service_principal) assert validation.read_only is False assert validation.name == storage_credential.name assert not validation.failures @@ -190,7 +190,7 @@ def test_validate_read_only_storage_credentials(credential_manager): ) # validate read-only storage credential - validation = credential_manager.validate_storage_credential(storage_credential, service_principal) + validation = credential_manager.validate(storage_credential, service_principal) assert validation.read_only is True assert validation.name == storage_credential.name assert not validation.failures @@ -211,7 +211,7 @@ def test_validate_storage_credentials_overlap_location(credential_manager): # prefix used for validation overlaps with existing external location will raise InvalidParameterValue # assert InvalidParameterValue is handled - validation = credential_manager.validate_storage_credential(storage_credential, service_principal) + validation = credential_manager.validate(storage_credential, service_principal) assert validation.failures == [ "The validation is skipped because an existing external location overlaps with the location used for validation." ] @@ -229,7 +229,7 @@ def test_validate_storage_credentials_non_response(credential_manager): "test", ), ) - validation = credential_manager.validate_storage_credential(storage_credential, service_principal) + validation = credential_manager.validate(storage_credential, service_principal) assert validation.failures == ["Validation returned none results."] @@ -245,7 +245,7 @@ def test_validate_storage_credentials_failed_operation(credential_manager): "test", ), ) - validation = credential_manager.validate_storage_credential(storage_credential, service_principal) + validation = credential_manager.validate(storage_credential, service_principal) assert validation.failures == ["LIST validation failed with message: fail"] From ee84a78d4a47b5b62c56d025f2297d612dd466b3 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Thu, 22 Feb 2024 01:36:22 -0800 Subject: [PATCH 70/76] improve intgration tests. --- tests/integration/azure/test_credentials.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/integration/azure/test_credentials.py b/tests/integration/azure/test_credentials.py index 6c27c28152..37c593858a 100644 --- a/tests/integration/azure/test_credentials.py +++ b/tests/integration/azure/test_credentials.py @@ -30,11 +30,11 @@ class MigrationTestInfo: @pytest.fixture -def extract_test_info(ws, debug_env, make_random): +def extract_test_info(ws, env_or_skip, make_random): random = make_random(6).lower() credential_name = f"testinfra_storageaccess_{random}" - spark_conf = ws.clusters.get(debug_env["TEST_LEGACY_SPN_CLUSTER_ID"]).spark_conf + spark_conf = ws.clusters.get(env_or_skip("TEST_LEGACY_SPN_CLUSTER_ID")).spark_conf application_id = spark_conf.get("fs.azure.account.oauth2.client.id") @@ -54,11 +54,11 @@ def extract_test_info(ws, debug_env, make_random): @pytest.fixture -def run_migration(ws, sql_backend): +def run_migration(ws, sql_backend, make_random): def inner( test_info: MigrationTestInfo, credentials: set[str], read_only=False ) -> list[StorageCredentialValidationResult]: - installation = Installation(ws, 'ucx') + installation = Installation(ws, make_random(4)) azurerm = AzureResources(ws) locations = ExternalLocations(ws, sql_backend, "dont_need_a_schema") @@ -114,7 +114,6 @@ def test_spn_migration_existed_storage_credential(extract_test_info, make_storag def test_spn_migration(ws, extract_test_info, run_migration, read_only): try: migration_results = run_migration(extract_test_info, {"lets_migrate_the_spn"}, read_only) - storage_credential = ws.storage_credentials.get(extract_test_info.credential_name) finally: ws.storage_credentials.delete(extract_test_info.credential_name, force=True) From 6f12b583f6b7d9d607afd4371e26643da1006626 Mon Sep 17 00:00:00 2001 From: qziyuan <91635877+qziyuan@users.noreply.github.com> Date: Thu, 22 Feb 2024 13:18:57 -0800 Subject: [PATCH 71/76] fix logging wording Co-authored-by: vuong-nguyen <44292934+nkvuong@users.noreply.github.com> --- src/databricks/labs/ucx/azure/credentials.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/azure/credentials.py b/src/databricks/labs/ucx/azure/credentials.py index 9e6883f5d6..52b637628c 100644 --- a/src/databricks/labs/ucx/azure/credentials.py +++ b/src/databricks/labs/ucx/azure/credentials.py @@ -261,7 +261,7 @@ def run(self, prompts: Prompts) -> list[StorageCredentialValidationResult]: if execution_result: results_file = self.save(execution_result) logger.info( - f"Completed migration from Azure Service Principal migrated to UC Storage credentials" + f"Completed migration from Azure Service Principal to UC Storage credentials" f"Please check {results_file} for validation results" ) else: From 272d05b05698277d47786a6ea6d74ac16563cae2 Mon Sep 17 00:00:00 2001 From: qziyuan <91635877+qziyuan@users.noreply.github.com> Date: Thu, 22 Feb 2024 13:19:41 -0800 Subject: [PATCH 72/76] update cli comment to use new method name principal_prefix_access Co-authored-by: vuong-nguyen <44292934+nkvuong@users.noreply.github.com> --- src/databricks/labs/ucx/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/cli.py b/src/databricks/labs/ucx/cli.py index ae3da50d1c..520af039d6 100644 --- a/src/databricks/labs/ucx/cli.py +++ b/src/databricks/labs/ucx/cli.py @@ -290,7 +290,7 @@ def migrate_azure_service_principals(w: WorkspaceClient): Databricks, to UC storage credentials. The Azure Service Principals to location mapping are listed in /Users/{user_name}/.ucx/azure_storage_account_info.csv - which is generated by save_azure_storage_accounts command. Please review the file and delete the Service Principals + which is generated by principal_prefix_access command. Please review the file and delete the Service Principals you do not want to be migrated. The command will only migrate the Service Principals that have client secret stored in Databricks Secret. From cff18fadbef1d72cc32800811e097dc5dd9def77 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Thu, 22 Feb 2024 15:21:32 -0800 Subject: [PATCH 73/76] update migrate_credentials cli; Refactor the code so MockInstallation can be used in unit test. --- labs.yml | 3 + src/databricks/labs/ucx/azure/credentials.py | 7 +- src/databricks/labs/ucx/cli.py | 18 ++- tests/unit/azure/test_credentials.py | 137 +++++++++---------- tests/unit/test_cli.py | 6 +- 5 files changed, 84 insertions(+), 87 deletions(-) diff --git a/labs.yml b/labs.yml index ae357b169b..b089593eec 100644 --- a/labs.yml +++ b/labs.yml @@ -114,3 +114,6 @@ commands: Workspace Group Name\tMembers Count\tAccount Group Name\tMembers Count {{range .}}{{.wf_group_name}}\t{{.wf_group_members_count}}\t{{.acc_group_name}}\t{{.acc_group_members_count}} {{end}} + + - name: migrate_credentials + description: Migrate credentials for storage access to UC storage credential diff --git a/src/databricks/labs/ucx/azure/credentials.py b/src/databricks/labs/ucx/azure/credentials.py index 52b637628c..9e20fd576b 100644 --- a/src/databricks/labs/ucx/azure/credentials.py +++ b/src/databricks/labs/ucx/azure/credentials.py @@ -149,11 +149,7 @@ def __init__( self._storage_credential_manager = storage_credential_manager @classmethod - def for_cli(cls, ws: WorkspaceClient, prompts: Prompts, product='ucx'): - if not ws.config.is_azure: - logger.error("Workspace is not on azure, please run this command on azure databricks workspaces.") - raise SystemExit() - + def for_cli(cls, ws: WorkspaceClient, installation: Installation, prompts: Prompts): msg = ( "Have you reviewed the azure_storage_account_info.csv " "and confirm listed service principals are allowed to be checked for migration?" @@ -161,7 +157,6 @@ def for_cli(cls, ws: WorkspaceClient, prompts: Prompts, product='ucx'): if not prompts.confirm(msg): raise SystemExit() - installation = Installation.current(ws, product) config = installation.load(WorkspaceConfig) sql_backend = StatementExecutionBackend(ws, config.warehouse_id) azurerm = AzureResources(ws) diff --git a/src/databricks/labs/ucx/cli.py b/src/databricks/labs/ucx/cli.py index 520af039d6..e9160a1f95 100644 --- a/src/databricks/labs/ucx/cli.py +++ b/src/databricks/labs/ucx/cli.py @@ -284,21 +284,25 @@ def _aws_principal_prefix_access(w: WorkspaceClient, aws_profile: str): @ucx.command -def migrate_azure_service_principals(w: WorkspaceClient): - """Migrate Azure Service Principals, which have Storage Blob Data Contributor, +def migrate_credentials(w: WorkspaceClient): + """For Azure, this command migrate Azure Service Principals, which have Storage Blob Data Contributor, Storage Blob Data Reader, Storage Blob Data Owner roles on ADLS Gen2 locations that are being used in Databricks, to UC storage credentials. - The Azure Service Principals to location mapping are listed in /Users/{user_name}/.ucx/azure_storage_account_info.csv which is generated by principal_prefix_access command. Please review the file and delete the Service Principals you do not want to be migrated. - The command will only migrate the Service Principals that have client secret stored in Databricks Secret. """ - logger.info("Running migrate_azure_service_principals command") prompts = Prompts() - service_principal_migration = ServicePrincipalMigration.for_cli(w, prompts) - service_principal_migration.run(prompts) + if w.config.is_azure: + logger.info("Running migrate_credentials for Azure") + installation = Installation.current(w, 'ucx') + service_principal_migration = ServicePrincipalMigration.for_cli(w, installation, prompts) + service_principal_migration.run(prompts) + if w.config.is_aws: + logger.error("migrate_credentials is not yet supported in AWS") + if w.config.is_gcp: + logger.error("migrate_credentials is not yet supported in GCP") if __name__ == "__main__": diff --git a/tests/unit/azure/test_credentials.py b/tests/unit/azure/test_credentials.py index f02e86f4b2..cb4163b004 100644 --- a/tests/unit/azure/test_credentials.py +++ b/tests/unit/azure/test_credentials.py @@ -1,14 +1,12 @@ -import io import logging import re from unittest.mock import MagicMock, create_autospec import pytest -import yaml from databricks.labs.blueprint.installation import MockInstallation from databricks.labs.blueprint.tui import MockPrompts from databricks.sdk import WorkspaceClient -from databricks.sdk.errors import NotFound, ResourceDoesNotExist +from databricks.sdk.errors import ResourceDoesNotExist from databricks.sdk.errors.platform import InvalidParameterValue from databricks.sdk.service.catalog import ( AwsIamRole, @@ -35,33 +33,59 @@ ServicePrincipalMigrationInfo, StorageCredentialManager, ) +from databricks.labs.ucx.azure.resources import AzureResources +from databricks.labs.ucx.hive_metastore import ExternalLocations @pytest.fixture def ws(): - state = { - "/Users/foo/.ucx/config.yml": yaml.dump( - { + return create_autospec(WorkspaceClient) + + +@pytest.fixture +def installation(): + return MockInstallation( + { + "config.yml": { 'version': 2, 'inventory_database': 'ucx', 'connect': { 'host': 'foo', 'token': 'bar', }, - } - ) - } - - def download(path: str) -> io.StringIO: - if path not in state: - raise NotFound(path) - return io.StringIO(state[path]) - - ws_mock = create_autospec(WorkspaceClient) - ws_mock.config.host = 'https://localhost' - ws_mock.current_user.me().user_name = "foo" - ws_mock.workspace.download = download - return ws_mock + }, + "azure_storage_account_info.csv": [ + { + 'prefix': 'prefix1', + 'client_id': 'app_secret1', + 'principal': 'principal_1', + 'privilege': 'WRITE_FILES', + 'directory_id': 'directory_id_1', + }, + { + 'prefix': 'prefix2', + 'client_id': 'app_secret2', + 'principal': 'principal_read', + 'privilege': 'READ_FILES', + 'directory_id': 'directory_id_1', + }, + { + 'prefix': 'prefix3', + 'client_id': 'app_secret3', + 'principal': 'principal_write', + 'privilege': 'WRITE_FILES', + 'directory_id': 'directory_id_2', + }, + { + 'prefix': 'overlap_with_external_location', + 'client_id': 'app_secret4', + 'principal': 'principal_overlap', + 'privilege': 'WRITE_FILES', + 'directory_id': 'directory_id_2', + }, + ], + } + ) def side_effect_create_storage_credential(name, azure_service_principal, comment, read_only): @@ -250,40 +274,12 @@ def test_validate_storage_credentials_failed_operation(credential_manager): @pytest.fixture -def sp_migration(ws, credential_manager): +def sp_migration(ws, installation, credential_manager): ws.secrets.get_secret.return_value = GetSecretResponse(value="aGVsbG8gd29ybGQ=") - arp = create_autospec(AzureResourcePermissions) - arp.load.return_value = [ - StoragePermissionMapping( - "prefix1", - "app_secret1", - "principal_1", - "WRITE_FILES", - "directory_id_1", - ), - StoragePermissionMapping( - "prefix2", - "app_secret2", - "principal_read", - "READ_FILES", - "directory_id_1", - ), - StoragePermissionMapping( - "prefix3", - "app_secret3", - "principal_write", - "WRITE_FILES", - "directory_id_2", - ), - StoragePermissionMapping( - "overlap_with_external_location", - "app_secret4", - "principal_overlap", - "WRITE_FILES", - "directory_id_2", - ), - ] + arp = AzureResourcePermissions( + installation, ws, create_autospec(AzureResources), create_autospec(ExternalLocations) + ) sp_crawler = create_autospec(AzureServicePrincipalCrawler) sp_crawler.snapshot.return_value = [ @@ -293,29 +289,22 @@ def sp_migration(ws, credential_manager): AzureServicePrincipalInfo("app_secret4", "", "", "tenant_id_2", "storage1"), ] - return ServicePrincipalMigration(MockInstallation(), ws, arp, sp_crawler, credential_manager) - + return ServicePrincipalMigration(installation, ws, arp, sp_crawler, credential_manager) -def test_for_cli_not_azure(caplog, ws): - ws.config.is_azure = False - with pytest.raises(SystemExit): - ServicePrincipalMigration.for_cli(ws, MagicMock()) - assert "Workspace is not on azure, please run this command on azure databricks workspaces." in caplog.text - -def test_for_cli_not_prompts(ws): +def test_for_cli_not_prompts(ws, installation): ws.config.is_azure = True prompts = MockPrompts({"Have you reviewed the azure_storage_account_info.csv *": "No"}) with pytest.raises(SystemExit): - ServicePrincipalMigration.for_cli(ws, prompts) + ServicePrincipalMigration.for_cli(ws, installation, prompts) -def test_for_cli(ws): +def test_for_cli(ws, installation): ws.config.is_azure = True ws.config.auth_type = "azure-cli" prompts = MockPrompts({"Have you reviewed the azure_storage_account_info.csv *": "Yes"}) - assert isinstance(ServicePrincipalMigration.for_cli(ws, prompts), ServicePrincipalMigration) + assert isinstance(ServicePrincipalMigration.for_cli(ws, installation, prompts), ServicePrincipalMigration) @pytest.mark.parametrize( @@ -373,12 +362,18 @@ def test_run_without_confirmation(ws, sp_migration): assert sp_migration.run(prompts) == [] -def test_run(ws, sp_migration): +def test_run(ws, installation, sp_migration): prompts = MockPrompts({"Above Azure Service Principals will be migrated to UC storage credentials*": "Yes"}) - results = sp_migration.run(prompts) - for result in results: - if result.name != "principal_1": - assert ( - False - ), "Service principal with no client_secret in databricks secret or already be used in storage credential should not be migrated" + sp_migration.run(prompts) + installation.assert_file_written( + "azure_service_principal_migration_result.csv", + [ + { + 'application_id': 'app_secret1', + 'directory_id': 'directory_id_1', + 'name': 'principal_1', + 'validated_on': 'prefix1', + } + ], + ) diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index d1d1d62ee0..2ddaddd9c2 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -15,7 +15,7 @@ ensure_assessment_run, installations, manual_workspace_info, - migrate_azure_service_principals, + migrate_credentials, move, open_remote_config, principal_prefix_access, @@ -311,9 +311,9 @@ def test_save_storage_and_principal_gcp(ws, caplog): assert "This cmd is only supported for azure and aws workspaces" in caplog.messages -def test_migrate_azure_service_principals(ws): +def test_migrate_credentials_azure(ws): ws.config.is_azure = True ws.workspace.upload.return_value = "test" with patch("databricks.labs.blueprint.tui.Prompts.confirm", return_value=True): - migrate_azure_service_principals(ws) + migrate_credentials(ws) ws.storage_credentials.list.assert_called() From 71643da8e73c0875f09457c2063cc22a2bb5240c Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Thu, 22 Feb 2024 16:13:56 -0800 Subject: [PATCH 74/76] Remove StaticStorageCredentialManager and add include_names for integration test --- src/databricks/labs/ucx/azure/credentials.py | 18 +++++++++++---- tests/integration/azure/test_credentials.py | 11 +++++---- tests/integration/conftest.py | 24 -------------------- tests/unit/azure/test_credentials.py | 5 ++++ 4 files changed, 24 insertions(+), 34 deletions(-) diff --git a/src/databricks/labs/ucx/azure/credentials.py b/src/databricks/labs/ucx/azure/credentials.py index 9e20fd576b..4592d4eb1f 100644 --- a/src/databricks/labs/ucx/azure/credentials.py +++ b/src/databricks/labs/ucx/azure/credentials.py @@ -57,12 +57,20 @@ class StorageCredentialManager: def __init__(self, ws: WorkspaceClient): self._ws = ws - def list(self) -> set[str]: + def list(self, include_names: set[str] | None = None) -> set[str]: # list existed storage credentials that is using service principal, capture the service principal's application_id application_ids = set() storage_credentials = self._ws.storage_credentials.list(max_results=0) + if include_names: + # we only check UC storage credentials listed in include_names + for storage_credential in storage_credentials: + if storage_credential.name in include_names: + application_ids.add(storage_credential.azure_service_principal.application_id) + logger.info(f"Found {len(application_ids)} distinct service principals already used in UC storage credentials listed in include_names") + return application_ids + for storage_credential in storage_credentials: # only add service principal's application_id, ignore managed identity based storage_credential if storage_credential.azure_service_principal: @@ -215,14 +223,14 @@ def _print_action_plan(self, sp_list: list[StoragePermissionMapping]): f"on location {spn.prefix}" ) - def _generate_migration_list(self) -> list[ServicePrincipalMigrationInfo]: + def _generate_migration_list(self, include_names: set[str] | None = None) -> list[ServicePrincipalMigrationInfo]: """ Create the list of SP that need to be migrated, output an action plan as a csv file for users to confirm """ # load sp list from azure_storage_account_info.csv sp_list = self._resource_permissions.load() # list existed storage credentials - sc_set = self._storage_credential_manager.list() + sc_set = self._storage_credential_manager.list(include_names) # check if the sp is already used in UC storage credential filtered_sp_list = [sp for sp in sp_list if sp.client_id not in sc_set] # fetch sp client_secret if any @@ -238,9 +246,9 @@ def _generate_migration_list(self) -> list[ServicePrincipalMigrationInfo]: def save(self, migration_results: list[StorageCredentialValidationResult]) -> str: return self._installation.save(migration_results, filename=self._output_file) - def run(self, prompts: Prompts) -> list[StorageCredentialValidationResult]: + def run(self, prompts: Prompts, include_names: set[str] | None = None) -> list[StorageCredentialValidationResult]: - sp_list_with_secret = self._generate_migration_list() + sp_list_with_secret = self._generate_migration_list(include_names) plan_confirmed = prompts.confirm( "Above Azure Service Principals will be migrated to UC storage credentials, please review and confirm." diff --git a/tests/integration/azure/test_credentials.py b/tests/integration/azure/test_credentials.py index 37c593858a..9446eed8d6 100644 --- a/tests/integration/azure/test_credentials.py +++ b/tests/integration/azure/test_credentials.py @@ -8,14 +8,14 @@ from databricks.labs.ucx.assessment.azure import AzureServicePrincipalInfo from databricks.labs.ucx.azure.access import StoragePermissionMapping -from databricks.labs.ucx.azure.credentials import StorageCredentialValidationResult +from databricks.labs.ucx.azure.credentials import StorageCredentialValidationResult, \ + StorageCredentialManager from databricks.labs.ucx.azure.resources import AzureResources from databricks.labs.ucx.hive_metastore import ExternalLocations from tests.integration.conftest import ( StaticResourcePermissions, StaticServicePrincipalCrawler, - StaticServicePrincipalMigration, - StaticStorageCredentialManager, + StaticServicePrincipalMigration ) @@ -85,10 +85,11 @@ def inner( sp_crawler = StaticServicePrincipalCrawler(sp_infos, ws, sql_backend, "dont_need_a_schema") spn_migration = StaticServicePrincipalMigration( - installation, ws, resource_permissions, sp_crawler, StaticStorageCredentialManager(ws, credentials) + installation, ws, resource_permissions, sp_crawler, StorageCredentialManager(ws) ) return spn_migration.run( - MockPrompts({"Above Azure Service Principals will be migrated to UC storage credentials *": "Yes"}) + MockPrompts({"Above Azure Service Principals will be migrated to UC storage credentials *": "Yes"}), + credentials ) return inner diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 85d9ce9d77..f97fa3095e 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -174,30 +174,6 @@ def save(self, migration_results: list[StorageCredentialValidationResult]) -> st return "azure_service_principal_migration_result.csv" -class StaticStorageCredentialManager(StorageCredentialManager): - # During integration test, we only want to list storage_credentials that are created during the test. - # So we provide a credential name list so the test can ignore credentials that are not in the list. - def __init__(self, ws_client: WorkspaceClient, credential_names: set[str]): - super().__init__(ws_client) - self._credential_names = credential_names - - def list(self) -> set[str]: - application_ids = set() - - storage_credentials = self._ws.storage_credentials.list(max_results=0) - - for storage_credential in storage_credentials: - if not storage_credential.azure_service_principal: - continue - if storage_credential.name in self._credential_names: - application_ids.add(storage_credential.azure_service_principal.application_id) - - logger.info( - f"Found {len(application_ids)} distinct service principals already used in storage credentials during integration test" - ) - return application_ids - - class StaticServicePrincipalCrawler(AzureServicePrincipalCrawler): def __init__(self, spn_infos: list[AzureServicePrincipalInfo], *args): super().__init__(*args) diff --git a/tests/unit/azure/test_credentials.py b/tests/unit/azure/test_credentials.py index cb4163b004..bab89ca157 100644 --- a/tests/unit/azure/test_credentials.py +++ b/tests/unit/azure/test_credentials.py @@ -128,6 +128,7 @@ def credential_manager(ws): azure_managed_identity=AzureManagedIdentity("/subscriptions/.../providers/Microsoft.Databricks/...") ), StorageCredentialInfo( + name = "included_test", azure_service_principal=AzureServicePrincipal( "62e43d7d-df53-4c64-86ed-c2c1a3ac60c3", "b6420590-5e1c-4426-8950-a94cbe9b6115", @@ -146,6 +147,10 @@ def credential_manager(ws): def test_list_storage_credentials(credential_manager): assert credential_manager.list() == {"b6420590-5e1c-4426-8950-a94cbe9b6115", "app_secret2"} +def test_list_included_storage_credentials(credential_manager): + include_names = {"included_test"} + assert credential_manager.list(include_names) == {"b6420590-5e1c-4426-8950-a94cbe9b6115"} + def test_create_storage_credentials(credential_manager): sp_1 = ServicePrincipalMigrationInfo( From dab2bc56b5ba964e73f1ba47143423120bc21abc Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Thu, 22 Feb 2024 16:29:53 -0800 Subject: [PATCH 75/76] Remove StaticServicePrincipalMigration and StaticResourcePermissions and replace it with MockInstallation --- src/databricks/labs/ucx/azure/credentials.py | 6 ++- tests/integration/azure/test_credentials.py | 48 ++++++++++---------- tests/integration/conftest.py | 23 ---------- tests/unit/azure/test_credentials.py | 5 +- 4 files changed, 33 insertions(+), 49 deletions(-) diff --git a/src/databricks/labs/ucx/azure/credentials.py b/src/databricks/labs/ucx/azure/credentials.py index 4592d4eb1f..ee68b87f89 100644 --- a/src/databricks/labs/ucx/azure/credentials.py +++ b/src/databricks/labs/ucx/azure/credentials.py @@ -66,9 +66,13 @@ def list(self, include_names: set[str] | None = None) -> set[str]: if include_names: # we only check UC storage credentials listed in include_names for storage_credential in storage_credentials: + if not storage_credential.azure_service_principal: + continue if storage_credential.name in include_names: application_ids.add(storage_credential.azure_service_principal.application_id) - logger.info(f"Found {len(application_ids)} distinct service principals already used in UC storage credentials listed in include_names") + logger.info( + f"Found {len(application_ids)} distinct service principals already used in UC storage credentials listed in include_names" + ) return application_ids for storage_credential in storage_credentials: diff --git a/tests/integration/azure/test_credentials.py b/tests/integration/azure/test_credentials.py index 9446eed8d6..7c3ecbfa55 100644 --- a/tests/integration/azure/test_credentials.py +++ b/tests/integration/azure/test_credentials.py @@ -3,20 +3,19 @@ from dataclasses import dataclass import pytest -from databricks.labs.blueprint.installation import Installation +from databricks.labs.blueprint.installation import MockInstallation from databricks.labs.blueprint.tui import MockPrompts from databricks.labs.ucx.assessment.azure import AzureServicePrincipalInfo -from databricks.labs.ucx.azure.access import StoragePermissionMapping -from databricks.labs.ucx.azure.credentials import StorageCredentialValidationResult, \ - StorageCredentialManager +from databricks.labs.ucx.azure.access import AzureResourcePermissions +from databricks.labs.ucx.azure.credentials import ( + ServicePrincipalMigration, + StorageCredentialManager, + StorageCredentialValidationResult, +) from databricks.labs.ucx.azure.resources import AzureResources from databricks.labs.ucx.hive_metastore import ExternalLocations -from tests.integration.conftest import ( - StaticResourcePermissions, - StaticServicePrincipalCrawler, - StaticServicePrincipalMigration -) +from tests.integration.conftest import StaticServicePrincipalCrawler @dataclass @@ -54,24 +53,27 @@ def extract_test_info(ws, env_or_skip, make_random): @pytest.fixture -def run_migration(ws, sql_backend, make_random): +def run_migration(ws, sql_backend): def inner( test_info: MigrationTestInfo, credentials: set[str], read_only=False ) -> list[StorageCredentialValidationResult]: - installation = Installation(ws, make_random(4)) azurerm = AzureResources(ws) locations = ExternalLocations(ws, sql_backend, "dont_need_a_schema") - permission_mappings = [ - StoragePermissionMapping( - "abfss://things@labsazurethings.dfs.core.windows.net/avoid_ext_loc_overlap", - test_info.application_id, - test_info.credential_name, - "READ_FILES" if read_only else "WRITE_FILES", - test_info.directory_id, - ) - ] - resource_permissions = StaticResourcePermissions(permission_mappings, installation, ws, azurerm, locations) + installation = MockInstallation( + { + "azure_storage_account_info.csv": [ + { + 'prefix': 'abfss://things@labsazurethings.dfs.core.windows.net/avoid_ext_loc_overlap', + 'client_id': test_info.application_id, + 'principal': test_info.credential_name, + 'privilege': "READ_FILES" if read_only else "WRITE_FILES", + 'directory_id': test_info.directory_id, + }, + ] + } + ) + resource_permissions = AzureResourcePermissions(installation, ws, azurerm, locations) sp_infos = [ AzureServicePrincipalInfo( @@ -84,12 +86,12 @@ def inner( ] sp_crawler = StaticServicePrincipalCrawler(sp_infos, ws, sql_backend, "dont_need_a_schema") - spn_migration = StaticServicePrincipalMigration( + spn_migration = ServicePrincipalMigration( installation, ws, resource_permissions, sp_crawler, StorageCredentialManager(ws) ) return spn_migration.run( MockPrompts({"Above Azure Service Principals will be migrated to UC storage credentials *": "Yes"}), - credentials + credentials, ) return inner diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index f97fa3095e..5f7beaf291 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -14,15 +14,6 @@ AzureServicePrincipalCrawler, AzureServicePrincipalInfo, ) -from databricks.labs.ucx.azure.access import ( - AzureResourcePermissions, - StoragePermissionMapping, -) -from databricks.labs.ucx.azure.credentials import ( - ServicePrincipalMigration, - StorageCredentialManager, - StorageCredentialValidationResult, -) from databricks.labs.ucx.framework.crawlers import SqlBackend from databricks.labs.ucx.hive_metastore import TablesCrawler from databricks.labs.ucx.hive_metastore.mapping import Rule, TableMapping @@ -169,11 +160,6 @@ def save(self, tables: TablesCrawler, workspace_info: WorkspaceInfo) -> str: raise RuntimeWarning("not available") -class StaticServicePrincipalMigration(ServicePrincipalMigration): - def save(self, migration_results: list[StorageCredentialValidationResult]) -> str: - return "azure_service_principal_migration_result.csv" - - class StaticServicePrincipalCrawler(AzureServicePrincipalCrawler): def __init__(self, spn_infos: list[AzureServicePrincipalInfo], *args): super().__init__(*args) @@ -181,12 +167,3 @@ def __init__(self, spn_infos: list[AzureServicePrincipalInfo], *args): def snapshot(self) -> list[AzureServicePrincipalInfo]: return self._spn_infos - - -class StaticResourcePermissions(AzureResourcePermissions): - def __init__(self, permission_mappings: list[StoragePermissionMapping], *args): - super().__init__(*args) - self._permission_mappings = permission_mappings - - def load(self) -> list[StoragePermissionMapping]: - return self._permission_mappings diff --git a/tests/unit/azure/test_credentials.py b/tests/unit/azure/test_credentials.py index bab89ca157..3a5682e1c9 100644 --- a/tests/unit/azure/test_credentials.py +++ b/tests/unit/azure/test_credentials.py @@ -128,12 +128,12 @@ def credential_manager(ws): azure_managed_identity=AzureManagedIdentity("/subscriptions/.../providers/Microsoft.Databricks/...") ), StorageCredentialInfo( - name = "included_test", + name="included_test", azure_service_principal=AzureServicePrincipal( "62e43d7d-df53-4c64-86ed-c2c1a3ac60c3", "b6420590-5e1c-4426-8950-a94cbe9b6115", "secret", - ) + ), ), StorageCredentialInfo(azure_service_principal=AzureServicePrincipal("directory_id_1", "app_secret2", "secret")), ] @@ -147,6 +147,7 @@ def credential_manager(ws): def test_list_storage_credentials(credential_manager): assert credential_manager.list() == {"b6420590-5e1c-4426-8950-a94cbe9b6115", "app_secret2"} + def test_list_included_storage_credentials(credential_manager): include_names = {"included_test"} assert credential_manager.list(include_names) == {"b6420590-5e1c-4426-8950-a94cbe9b6115"} From 448e6416c72c492472f31d7808fa4e2dade5988b Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Thu, 22 Feb 2024 17:39:21 -0800 Subject: [PATCH 76/76] let validate function take StoragePermissionMapping as input; Remove mock for dataclass in unit test. --- src/databricks/labs/ucx/azure/credentials.py | 46 +++++------ tests/unit/azure/test_credentials.py | 80 ++++---------------- 2 files changed, 37 insertions(+), 89 deletions(-) diff --git a/src/databricks/labs/ucx/azure/credentials.py b/src/databricks/labs/ucx/azure/credentials.py index ee68b87f89..c1182587c0 100644 --- a/src/databricks/labs/ucx/azure/credentials.py +++ b/src/databricks/labs/ucx/azure/credentials.py @@ -35,21 +35,22 @@ class ServicePrincipalMigrationInfo: @dataclass class StorageCredentialValidationResult: - name: str | None = None - application_id: str | None = None - directory_id: str | None = None - read_only: bool | None = None - validated_on: str | None = None + name: str + application_id: str + directory_id: str + read_only: bool + validated_on: str failures: list[str] | None = None @classmethod - def from_validation(cls, storage_credential: StorageCredentialInfo, failures: list[str] | None, prefix: str): - if storage_credential.azure_service_principal: - application_id = storage_credential.azure_service_principal.application_id - directory_id = storage_credential.azure_service_principal.directory_id - + def from_validation(cls, permission_mapping: StoragePermissionMapping, failures: list[str] | None): return cls( - storage_credential.name, application_id, directory_id, storage_credential.read_only, prefix, failures + permission_mapping.principal, + permission_mapping.client_id, + permission_mapping.directory_id, + permission_mapping.privilege == Privilege.READ_FILES.value, + permission_mapping.prefix, + failures, ) @@ -103,14 +104,12 @@ def create_with_client_secret(self, spn: ServicePrincipalMigrationInfo) -> Stora read_only=spn.permission_mapping.privilege == Privilege.READ_FILES.value, ) - def validate( - self, storage_credential: StorageCredentialInfo, spn: ServicePrincipalMigrationInfo - ) -> StorageCredentialValidationResult: + def validate(self, permission_mapping: StoragePermissionMapping) -> StorageCredentialValidationResult: try: validation = self._ws.storage_credentials.validate( - storage_credential_name=storage_credential.name, - url=spn.permission_mapping.prefix, - read_only=spn.permission_mapping.privilege == Privilege.READ_FILES.value, + storage_credential_name=permission_mapping.principal, + url=permission_mapping.prefix, + read_only=permission_mapping.privilege == Privilege.READ_FILES.value, ) except InvalidParameterValue: logger.warning( @@ -119,17 +118,16 @@ def validate( "Skip the validation" ) return StorageCredentialValidationResult.from_validation( - storage_credential, + permission_mapping, [ "The validation is skipped because an existing external location overlaps " "with the location used for validation." ], - spn.permission_mapping.prefix, ) if not validation.results: return StorageCredentialValidationResult.from_validation( - storage_credential, ["Validation returned none results."], spn.permission_mapping.prefix + permission_mapping, ["Validation returned none results."] ) failures = [] @@ -138,9 +136,7 @@ def validate( continue if result.result == ValidationResultResult.FAIL: failures.append(f"{result.operation.value} validation failed with message: {result.message}") - return StorageCredentialValidationResult.from_validation( - storage_credential, None if not failures else failures, spn.permission_mapping.prefix - ) + return StorageCredentialValidationResult.from_validation(permission_mapping, None if not failures else failures) class ServicePrincipalMigration(SecretsMixin): @@ -262,8 +258,8 @@ def run(self, prompts: Prompts, include_names: set[str] | None = None) -> list[S execution_result = [] for spn in sp_list_with_secret: - storage_credential = self._storage_credential_manager.create_with_client_secret(spn) - execution_result.append(self._storage_credential_manager.validate(storage_credential, spn)) + self._storage_credential_manager.create_with_client_secret(spn) + execution_result.append(self._storage_credential_manager.validate(spn.permission_mapping)) if execution_result: results_file = self.save(execution_result) diff --git a/tests/unit/azure/test_credentials.py b/tests/unit/azure/test_credentials.py index 3a5682e1c9..c7996bf27f 100644 --- a/tests/unit/azure/test_credentials.py +++ b/tests/unit/azure/test_credentials.py @@ -1,6 +1,6 @@ import logging import re -from unittest.mock import MagicMock, create_autospec +from unittest.mock import create_autospec import pytest from databricks.labs.blueprint.installation import MockInstallation @@ -185,97 +185,49 @@ def test_create_storage_credentials(credential_manager): def test_validate_storage_credentials(credential_manager): - service_principal = MagicMock() - service_principal.permission_mapping.privilege = "WRITE_FILES" - - storage_credential = StorageCredentialInfo( - name="principal_1", - azure_service_principal=AzureServicePrincipal( - "directory_id_1", - "client_id", - "test", - ), - read_only=False, - ) + permission_mapping = StoragePermissionMapping("prefix", "client_id", "principal_1", "WRITE_FILES", "directory_id") # validate normal storage credential - validation = credential_manager.validate(storage_credential, service_principal) + validation = credential_manager.validate(permission_mapping) assert validation.read_only is False - assert validation.name == storage_credential.name + assert validation.name == permission_mapping.principal assert not validation.failures def test_validate_read_only_storage_credentials(credential_manager): - service_principal = MagicMock() - service_principal.permission_mapping.privilege = "READ_FILES" - - storage_credential = StorageCredentialInfo( - name="principal_read", - azure_service_principal=AzureServicePrincipal( - "directory_id_1", - "client_id", - "test", - ), - read_only=True, + permission_mapping = StoragePermissionMapping( + "prefix", "client_id", "principal_read", "READ_FILES", "directory_id_1" ) # validate read-only storage credential - validation = credential_manager.validate(storage_credential, service_principal) + validation = credential_manager.validate(permission_mapping) assert validation.read_only is True - assert validation.name == storage_credential.name + assert validation.name == permission_mapping.principal assert not validation.failures def test_validate_storage_credentials_overlap_location(credential_manager): - service_principal = MagicMock() - service_principal.permission_mapping.privilege = "WRITE_FILES" - - storage_credential = StorageCredentialInfo( - name="overlap", - azure_service_principal=AzureServicePrincipal( - "directory_id_2", - "client_id", - "test", - ), - ) + permission_mapping = StoragePermissionMapping("prefix", "client_id", "overlap", "WRITE_FILES", "directory_id_2") # prefix used for validation overlaps with existing external location will raise InvalidParameterValue # assert InvalidParameterValue is handled - validation = credential_manager.validate(storage_credential, service_principal) + validation = credential_manager.validate(permission_mapping) assert validation.failures == [ "The validation is skipped because an existing external location overlaps with the location used for validation." ] def test_validate_storage_credentials_non_response(credential_manager): - service_principal = MagicMock() - service_principal.permission_mapping.privilege = "WRITE_FILES" - - storage_credential = StorageCredentialInfo( - name="none", - azure_service_principal=AzureServicePrincipal( - "directory_id_2", - "client_id", - "test", - ), - ) - validation = credential_manager.validate(storage_credential, service_principal) + permission_mapping = StoragePermissionMapping("prefix", "client_id", "none", "WRITE_FILES", "directory_id") + + validation = credential_manager.validate(permission_mapping) assert validation.failures == ["Validation returned none results."] def test_validate_storage_credentials_failed_operation(credential_manager): - service_principal = MagicMock() - service_principal.permission_mapping.privilege = "WRITE_FILES" - - storage_credential = StorageCredentialInfo( - name="fail", - azure_service_principal=AzureServicePrincipal( - "directory_id_2", - "client_id", - "test", - ), - ) - validation = credential_manager.validate(storage_credential, service_principal) + permission_mapping = StoragePermissionMapping("prefix", "client_id", "fail", "WRITE_FILES", "directory_id_2") + + validation = credential_manager.validate(permission_mapping) assert validation.failures == ["LIST validation failed with message: fail"]