From d5a9d6631bee5cd169aab5548fd7415e0d5e9a23 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Fri, 15 Mar 2024 17:37:38 -0700 Subject: [PATCH 1/3] skip unsupported location while migrating to external location. Remove mock.patch the api call in unit test --- src/databricks/labs/ucx/azure/locations.py | 11 +++ tests/unit/azure/test_locations.py | 98 ++++++++++++++++++---- 2 files changed, 91 insertions(+), 18 deletions(-) diff --git a/src/databricks/labs/ucx/azure/locations.py b/src/databricks/labs/ucx/azure/locations.py index 8397e20328..bc023f6d32 100644 --- a/src/databricks/labs/ucx/azure/locations.py +++ b/src/databricks/labs/ucx/azure/locations.py @@ -162,11 +162,22 @@ def _create_external_location( # if no credential found return None + def _rm_unsupported_location(self, location_urls: list[str]) -> list[str]: + # remove unsupported external location + supported_urls = [] + for url in location_urls: + if url.startswith("abfss://"): + supported_urls.append(url) + continue + logger.warning(f"Skip unsupported location: {url}") + return supported_urls + def run(self): # list missing external locations in UC _, missing_locations = self._hms_locations.match_table_external_locations() # Extract the location URLs from the missing locations missing_loc_urls = [loc.location for loc in missing_locations] + missing_loc_urls = self._rm_unsupported_location(missing_loc_urls) # get prefix to storage credential name mapping prefix_mapping_write, prefix_mapping_read = self._prefix_credential_name_mapping() diff --git a/tests/unit/azure/test_locations.py b/tests/unit/azure/test_locations.py index eb27b2ecb8..bde5a0b308 100644 --- a/tests/unit/azure/test_locations.py +++ b/tests/unit/azure/test_locations.py @@ -16,9 +16,9 @@ from databricks.labs.ucx.azure.access import AzureResourcePermissions from databricks.labs.ucx.azure.locations import ExternalLocationsMigration -from databricks.labs.ucx.azure.resources import AzureAPIClient, AzureResources +from databricks.labs.ucx.azure.resources import AzureResources from databricks.labs.ucx.hive_metastore import ExternalLocations -from tests.unit.azure import get_az_api_mapping +from tests.unit.azure import azure_api_client @pytest.fixture @@ -27,13 +27,7 @@ def ws(): def location_migration_for_test(ws, mock_backend, mock_installation): - azure_mgmt_client = AzureAPIClient( - ws.config.arm_environment.resource_manager_endpoint, - ws.config.arm_environment.service_management_endpoint, - ) - graph_client = AzureAPIClient("https://graph.microsoft.com", "https://graph.microsoft.com") - azurerm = AzureResources(azure_mgmt_client, graph_client) - + azurerm = AzureResources(azure_api_client(), azure_api_client()) location_crawler = ExternalLocations(ws, mock_backend, "location_test") return ExternalLocationsMigration( @@ -120,6 +114,83 @@ def test_run_service_principal(ws): ) +def test_run_unsupported_location(ws, caplog): + """test run with service principal based storage credentials""" + + # mock crawled HMS external locations with two unsupported locations adl and wasbs + row_factory = type("Row", (Row,), {"__columns__": ["location", "table_count"]}) + mock_backend = MockBackend( + rows={ + r"SELECT \* FROM location_test.external_locations": [ + row_factory(["abfss://container1@test.dfs.core.windows.net/one/", 1]), + row_factory(["adl://container2@test.dfs.core.windows.net/", 2]), + row_factory(["wasbs://container2@test.dfs.core.windows.net/", 2]), + ] + } + ) + + # mock listing storage credentials + ws.storage_credentials.list.return_value = [ + StorageCredentialInfo( + name="credential_sp1", + azure_service_principal=AzureServicePrincipal( + "directory_id_1", + "application_id_1", + "test_secret", + ), + ) + ] + + # mock listing UC external locations, no HMS external location will be matched + ws.external_locations.list.return_value = [ExternalLocationInfo(name="none", url="none")] + + # mock installation with permission mapping + mock_installation = MockInstallation( + { + "azure_storage_account_info.csv": [ + { + 'prefix': 'abfss://container1@test.dfs.core.windows.net/', + 'client_id': 'application_id_1', + 'principal': 'credential_sp1', + 'privilege': 'WRITE_FILES', + 'type': 'Application', + 'directory_id': 'directory_id_1', + }, + { + 'prefix': 'adl://container2@test.dfs.core.windows.net/', + 'client_id': 'application_id_1', + 'principal': 'credential_sp1', + 'privilege': 'WRITE_FILES', + 'type': 'Application', + 'directory_id': 'directory_id_1', + }, + { + 'prefix': 'wasbs://container2@test.dfs.core.windows.net/', + 'client_id': 'application_id_1', + 'principal': 'credential_sp1', + 'privilege': 'WRITE_FILES', + 'type': 'Application', + 'directory_id': 'directory_id_1', + }, + ], + } + ) + + location_migration = location_migration_for_test(ws, mock_backend, mock_installation) + location_migration.run() + + ws.external_locations.create.assert_called_once_with( + "container1_test_one", + "abfss://container1@test.dfs.core.windows.net/one/", + "credential_sp1", + comment="Created by UCX", + read_only=False, + skip_validation=False, + ) + assert "Skip unsupported location: adl://container2@test.dfs.core.windows.net" in caplog.text + assert "Skip unsupported location: wasbs://container2@test.dfs.core.windows.net" in caplog.text + + def test_run_managed_identity(ws, mocker): """test run with managed identity based storage credentials""" @@ -177,11 +248,6 @@ def test_run_managed_identity(ws, mocker): } ) - # mock Azure resource manager and graph API calls for getting application_id of managed identity - # TODO: (qziyuan) use a better way to mock the API calls - # pylint: disable-next=prohibited-patch - mocker.patch("databricks.sdk.core.ApiClient.do", side_effect=get_az_api_mapping) - location_migration = location_migration_for_test(ws, mock_backend, mock_installation) location_migration.run() @@ -399,10 +465,6 @@ def test_corner_cases_with_missing_fields(ws, caplog, mocker): ], } ) - # return None when getting application_id of managed identity - # TODO: (qziyuan) use a better way to mock the API calls - # pylint: disable-next=prohibited-patch - mocker.patch("databricks.sdk.core.ApiClient.do", return_value={"dummy": "dummy"}) location_migration = location_migration_for_test(ws, mock_backend, mock_installation) location_migration.run() From 0ff38837b23d6c07491a948b9b32ce3aff91110d Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Fri, 15 Mar 2024 17:42:07 -0700 Subject: [PATCH 2/3] fix function comment --- tests/unit/azure/test_locations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/azure/test_locations.py b/tests/unit/azure/test_locations.py index bde5a0b308..7fef0c990c 100644 --- a/tests/unit/azure/test_locations.py +++ b/tests/unit/azure/test_locations.py @@ -115,7 +115,7 @@ def test_run_service_principal(ws): def test_run_unsupported_location(ws, caplog): - """test run with service principal based storage credentials""" + """test unsupported location will not be migrated""" # mock crawled HMS external locations with two unsupported locations adl and wasbs row_factory = type("Row", (Row,), {"__columns__": ["location", "table_count"]}) From ebba5b7b144c1bf02f66f0141aba653431e818f3 Mon Sep 17 00:00:00 2001 From: Ziyuan Qin Date: Sun, 17 Mar 2024 18:59:37 -0700 Subject: [PATCH 3/3] update unit test to use new MockBackend.rows. Rename fuction name from _rm to _filter. --- src/databricks/labs/ucx/azure/locations.py | 4 +- tests/unit/azure/test_locations.py | 49 +++++++++------------- 2 files changed, 22 insertions(+), 31 deletions(-) diff --git a/src/databricks/labs/ucx/azure/locations.py b/src/databricks/labs/ucx/azure/locations.py index bc023f6d32..8faa013a24 100644 --- a/src/databricks/labs/ucx/azure/locations.py +++ b/src/databricks/labs/ucx/azure/locations.py @@ -162,7 +162,7 @@ def _create_external_location( # if no credential found return None - def _rm_unsupported_location(self, location_urls: list[str]) -> list[str]: + def _filter_unsupported_location(self, location_urls: list[str]) -> list[str]: # remove unsupported external location supported_urls = [] for url in location_urls: @@ -177,7 +177,7 @@ def run(self): _, missing_locations = self._hms_locations.match_table_external_locations() # Extract the location URLs from the missing locations missing_loc_urls = [loc.location for loc in missing_locations] - missing_loc_urls = self._rm_unsupported_location(missing_loc_urls) + missing_loc_urls = self._filter_unsupported_location(missing_loc_urls) # get prefix to storage credential name mapping prefix_mapping_write, prefix_mapping_read = self._prefix_credential_name_mapping() diff --git a/tests/unit/azure/test_locations.py b/tests/unit/azure/test_locations.py index 7fef0c990c..8bc024281f 100644 --- a/tests/unit/azure/test_locations.py +++ b/tests/unit/azure/test_locations.py @@ -3,7 +3,6 @@ import pytest from databricks.labs.blueprint.installation import MockInstallation -from databricks.labs.lsql import Row from databricks.labs.lsql.backends import MockBackend from databricks.sdk import WorkspaceClient from databricks.sdk.errors.platform import InvalidParameterValue, PermissionDenied @@ -39,12 +38,11 @@ def test_run_service_principal(ws): """test run with service principal based storage credentials""" # mock crawled HMS external locations - row_factory = type("Row", (Row,), {"__columns__": ["location", "table_count"]}) mock_backend = MockBackend( rows={ - r"SELECT \* FROM location_test.external_locations": [ - row_factory(["abfss://container1@test.dfs.core.windows.net/one/", 1]), - row_factory(["abfss://container2@test.dfs.core.windows.net/", 2]), + r"SELECT \* FROM location_test.external_locations": MockBackend.rows("location", "table_count")[ + ("abfss://container1@test.dfs.core.windows.net/one/", 1), + ("abfss://container2@test.dfs.core.windows.net/", 2), ] } ) @@ -114,17 +112,14 @@ def test_run_service_principal(ws): ) -def test_run_unsupported_location(ws, caplog): - """test unsupported location will not be migrated""" - +def test_skip_unsupported_location(ws, caplog): # mock crawled HMS external locations with two unsupported locations adl and wasbs - row_factory = type("Row", (Row,), {"__columns__": ["location", "table_count"]}) mock_backend = MockBackend( rows={ - r"SELECT \* FROM location_test.external_locations": [ - row_factory(["abfss://container1@test.dfs.core.windows.net/one/", 1]), - row_factory(["adl://container2@test.dfs.core.windows.net/", 2]), - row_factory(["wasbs://container2@test.dfs.core.windows.net/", 2]), + r"SELECT \* FROM location_test.external_locations": MockBackend.rows("location", "table_count")[ + ("abfss://container1@test.dfs.core.windows.net/one/", 1), + ("adl://container2@test.dfs.core.windows.net/", 2), + ("wasbs://container2@test.dfs.core.windows.net/", 2), ] } ) @@ -195,12 +190,11 @@ def test_run_managed_identity(ws, mocker): """test run with managed identity based storage credentials""" # mock crawled HMS external locations - row_factory = type("Row", (Row,), {"__columns__": ["location", "table_count"]}) mock_backend = MockBackend( rows={ - r"SELECT \* FROM location_test.external_locations": [ - row_factory(["abfss://container4@test.dfs.core.windows.net/", 4]), - row_factory(["abfss://container5@test.dfs.core.windows.net/a/b/", 5]), + r"SELECT \* FROM location_test.external_locations": MockBackend.rows("location", "table_count")[ + ("abfss://container4@test.dfs.core.windows.net/", 4), + ("abfss://container5@test.dfs.core.windows.net/a/b/", 5), ] } ) @@ -286,12 +280,11 @@ def test_location_failed_to_read(ws): """If read-only location is empty, READ permission check will fail with PermissionDenied""" # mock crawled HMS external locations - row_factory = type("Row", (Row,), {"__columns__": ["location", "table_count"]}) mock_backend = MockBackend( rows={ - r"SELECT \* FROM location_test.external_locations": [ - row_factory(["abfss://empty@test.dfs.core.windows.net/", 1]), - row_factory(["abfss://other_permission_denied@test.dfs.core.windows.net/", 2]), + r"SELECT \* FROM location_test.external_locations": MockBackend.rows("location", "table_count")[ + ("abfss://empty@test.dfs.core.windows.net/", 1), + ("abfss://other_permission_denied@test.dfs.core.windows.net/", 2), ] } ) @@ -354,12 +347,11 @@ def test_overlapping_locations(ws, caplog): caplog.set_level(logging.INFO) # mock crawled HMS external locations - row_factory = type("Row", (Row,), {"__columns__": ["location", "table_count"]}) mock_backend = MockBackend( rows={ - r"SELECT \* FROM location_test.external_locations": [ - row_factory(["abfss://overlap_location@test.dfs.core.windows.net/a/", 1]), - row_factory(["abfss://other_invalid_parameter@test.dfs.core.windows.net/a/", 1]), + r"SELECT \* FROM location_test.external_locations": MockBackend.rows("location", "table_count")[ + ("abfss://overlap_location@test.dfs.core.windows.net/a/", 1), + ("abfss://other_invalid_parameter@test.dfs.core.windows.net/a/", 1), ] } ) @@ -419,12 +411,11 @@ def test_corner_cases_with_missing_fields(ws, caplog, mocker): caplog.set_level(logging.INFO) # mock crawled HMS external locations - row_factory = type("Row", (Row,), {"__columns__": ["location", "table_count"]}) mock_backend = MockBackend( rows={ - r"SELECT \* FROM location_test.external_locations": [ - row_factory(["abfss://container1@test.dfs.core.windows.net/", 1]), - row_factory(["abfss://container2@test.dfs.core.windows.net/", 2]), + r"SELECT \* FROM location_test.external_locations": MockBackend.rows("location", "table_count")[ + ("abfss://container1@test.dfs.core.windows.net/", 1), + ("abfss://container2@test.dfs.core.windows.net/", 2), ] } )