From 7133ca9be26d7b870b6c1a745ca11b7fafb4a7f2 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 1 Jul 2024 08:45:50 +0200 Subject: [PATCH 01/16] Add type hints to regex sub strategy --- src/databricks/labs/ucx/workspace_access/groups.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/databricks/labs/ucx/workspace_access/groups.py b/src/databricks/labs/ucx/workspace_access/groups.py index 4f502de923..879c4193cd 100644 --- a/src/databricks/labs/ucx/workspace_access/groups.py +++ b/src/databricks/labs/ucx/workspace_access/groups.py @@ -281,11 +281,12 @@ def generate_migrated_groups(self): class RegexSubStrategy(GroupMigrationStrategy): def __init__( self, - workspace_groups_in_workspace, - account_groups_in_account, - /, - renamed_groups_prefix, - include_group_names=None, + workspace_groups_in_workspace: dict[str, Group], + account_groups_in_account: dict[str, Group], + *, + # TODO: Check if hints below could be non optional + renamed_groups_prefix: str | None, + include_group_names: list[str] | None = None, workspace_group_regex: str | None = None, workspace_group_replace: str | None = None, ): From 841aa76c8390860c1d94c77029c13802291cd265 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 1 Jul 2024 09:08:39 +0200 Subject: [PATCH 02/16] Move temporary name down --- src/databricks/labs/ucx/workspace_access/groups.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/workspace_access/groups.py b/src/databricks/labs/ucx/workspace_access/groups.py index 879c4193cd..533ea81333 100644 --- a/src/databricks/labs/ucx/workspace_access/groups.py +++ b/src/databricks/labs/ucx/workspace_access/groups.py @@ -302,7 +302,6 @@ def __init__( def generate_migrated_groups(self): workspace_groups = self.get_filtered_groups() for group in workspace_groups.values(): - temporary_name = f"{self.renamed_groups_prefix}{group.display_name}" name_in_account = self._safe_sub( group.display_name, self.workspace_group_regex, self.workspace_group_replace ) @@ -312,6 +311,7 @@ def generate_migrated_groups(self): f"Couldn't find a matching account group for {group.display_name} group with regex substitution" ) continue + temporary_name = f"{self.renamed_groups_prefix}{group.display_name}" yield MigratedGroup( id_in_workspace=group.id, name_in_workspace=group.display_name, From dc9f02ea3383078400d7476a08047bb5f7591d6b Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 1 Jul 2024 09:09:03 +0200 Subject: [PATCH 03/16] Reorder init --- src/databricks/labs/ucx/workspace_access/groups.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/workspace_access/groups.py b/src/databricks/labs/ucx/workspace_access/groups.py index 533ea81333..97ac7c07d3 100644 --- a/src/databricks/labs/ucx/workspace_access/groups.py +++ b/src/databricks/labs/ucx/workspace_access/groups.py @@ -296,8 +296,8 @@ def __init__( include_group_names=include_group_names, renamed_groups_prefix=renamed_groups_prefix, ) - self.workspace_group_replace = workspace_group_replace self.workspace_group_regex = workspace_group_regex + self.workspace_group_replace = workspace_group_replace def generate_migrated_groups(self): workspace_groups = self.get_filtered_groups() From f8d61ec88385f76bd005eb9da1ce9e7666df8ef3 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 1 Jul 2024 09:12:54 +0200 Subject: [PATCH 04/16] Add type hint for generate migrated groups --- src/databricks/labs/ucx/workspace_access/groups.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/workspace_access/groups.py b/src/databricks/labs/ucx/workspace_access/groups.py index 97ac7c07d3..107c1cbb2d 100644 --- a/src/databricks/labs/ucx/workspace_access/groups.py +++ b/src/databricks/labs/ucx/workspace_access/groups.py @@ -299,7 +299,7 @@ def __init__( self.workspace_group_regex = workspace_group_regex self.workspace_group_replace = workspace_group_replace - def generate_migrated_groups(self): + def generate_migrated_groups(self) -> Iterable[MigratedGroup]: workspace_groups = self.get_filtered_groups() for group in workspace_groups.values(): name_in_account = self._safe_sub( From 0b2dc402a28a3b1e6db02e52b412aa3675c5221d Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 1 Jul 2024 09:15:14 +0200 Subject: [PATCH 05/16] Add test to replace regex with empty string --- tests/unit/workspace_access/test_groups.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/unit/workspace_access/test_groups.py b/tests/unit/workspace_access/test_groups.py index 6235260520..06a5d62dad 100644 --- a/tests/unit/workspace_access/test_groups.py +++ b/tests/unit/workspace_access/test_groups.py @@ -16,6 +16,7 @@ GroupManager, MigratedGroup, MigrationState, + RegexSubStrategy, ) @@ -1108,3 +1109,23 @@ def test_migration_state_with_filtered_group(): roles='', ) ] + + +def test_regex_sub_strategy_replaces_with_empty_replace(): + workspace_groups = {"group_old": Group("group_old")} + account_groups = {"group": Group("group")} + strategy = RegexSubStrategy( + workspace_groups, + account_groups, + renamed_groups_prefix="ucx-renamed-", + include_group_names=["group_old"], + workspace_group_regex="_old", + workspace_group_replace="", + ) + + migrated_group = next(strategy.generate_migrated_groups(), None) + + assert migrated_group is not None + assert migrated_group.name_in_workspace == "group_old" + assert migrated_group.name_in_account == "group" + assert migrated_group.temporary_name == "ucx-renamed-group_old" From 3ab908ff6dff0e28c95ef4e712a36751fcca2bc0 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 1 Jul 2024 09:22:18 +0200 Subject: [PATCH 06/16] Remove duplicate mock --- tests/unit/workspace_access/test_groups.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/workspace_access/test_groups.py b/tests/unit/workspace_access/test_groups.py index 06a5d62dad..c908c60202 100644 --- a/tests/unit/workspace_access/test_groups.py +++ b/tests/unit/workspace_access/test_groups.py @@ -145,7 +145,6 @@ def test_snapshot_should_consider_groups_defined_in_conf(): wsclient = create_autospec(WorkspaceClient) group1 = Group(id="1", display_name="de", meta=ResourceMeta(resource_type="WorkspaceGroup")) group2 = Group(id="2", display_name="ds", meta=ResourceMeta(resource_type="WorkspaceGroup")) - wsclient.groups.list.return_value = [group1, group2] acc_group_1 = Group(id="11", display_name="de", external_id="1234") acc_group_2 = Group(id="12", display_name="ds", external_id="1235") wsclient.api_client.do.return_value = { From beeb1ba230795f6740958acfdcb497399d6a2c8f Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 1 Jul 2024 09:41:22 +0200 Subject: [PATCH 07/16] Test substitute groups with empty string --- tests/unit/workspace_access/test_groups.py | 25 ++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tests/unit/workspace_access/test_groups.py b/tests/unit/workspace_access/test_groups.py index c908c60202..610f1144ad 100644 --- a/tests/unit/workspace_access/test_groups.py +++ b/tests/unit/workspace_access/test_groups.py @@ -771,6 +771,31 @@ def test_snapshot_with_group_matched_by_external_id_not_found(caplog): assert "Couldn't find a matching account group for de_(1234) group with external_id" in caplog.text +def test_snapshot_migrated_groups_when_substitute_with_empty_string(): + backend = MockBackend() + + workspace_group = Group(display_name="group_old", id="1") + ws = create_autospec(WorkspaceClient) + ws.groups.list.return_value = [workspace_group] + ws.groups.get.return_value = workspace_group + + account_group = Group(display_name="group") + ws.api_client.do.return_value = {"Resources": [account_group.as_dict()]} + + group_manager = GroupManager( + backend, + ws, + inventory_database="inv", + workspace_group_regex="_old", + workspace_group_replace="", + ) + migrated_groups = group_manager.snapshot() + + assert len(migrated_groups) == 1 + assert migrated_groups[0].name_in_workspace == "group_old" + assert migrated_groups[0].name_in_account == "group" + + def test_configure_include_groups(): configure_groups = ConfigureGroups( MockPrompts( From dec6b3ac36990b8930f3c9aa19a54cad5093239d Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 1 Jul 2024 10:03:13 +0200 Subject: [PATCH 08/16] Check for regexes to be not none --- src/databricks/labs/ucx/workspace_access/groups.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/databricks/labs/ucx/workspace_access/groups.py b/src/databricks/labs/ucx/workspace_access/groups.py index 107c1cbb2d..4b291daebe 100644 --- a/src/databricks/labs/ucx/workspace_access/groups.py +++ b/src/databricks/labs/ucx/workspace_access/groups.py @@ -673,7 +673,7 @@ def _reflect_account_group_to_workspace(self, account_group_id: str): def _get_strategy( self, workspace_groups_in_workspace: dict[str, Group], account_groups_in_account: dict[str, Group] ) -> GroupMigrationStrategy: - if self._workspace_group_regex and self._workspace_group_replace: + if self._workspace_group_regex is not None and self._workspace_group_replace is not None: return RegexSubStrategy( workspace_groups_in_workspace, account_groups_in_account, @@ -682,7 +682,7 @@ def _get_strategy( workspace_group_regex=self._workspace_group_regex, workspace_group_replace=self._workspace_group_replace, ) - if self._workspace_group_regex and self._account_group_regex: + if self._workspace_group_regex is not None and self._account_group_regex is not None: return RegexMatchStrategy( workspace_groups_in_workspace, account_groups_in_account, From 5ef5790b41f96a8fd10a7bb9adcfccf2e96113fc Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 1 Jul 2024 10:12:16 +0200 Subject: [PATCH 09/16] Make type hints stricter --- .../labs/ucx/workspace_access/groups.py | 31 +++++++++---------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/src/databricks/labs/ucx/workspace_access/groups.py b/src/databricks/labs/ucx/workspace_access/groups.py index 4b291daebe..d79c7db840 100644 --- a/src/databricks/labs/ucx/workspace_access/groups.py +++ b/src/databricks/labs/ucx/workspace_access/groups.py @@ -208,9 +208,9 @@ def __init__( self, workspace_groups_in_workspace, account_groups_in_account, - /, - renamed_groups_prefix, - include_group_names=None, + *, + renamed_groups_prefix: str, + include_group_names: list[str] | None, ): super().__init__( workspace_groups_in_workspace, @@ -246,9 +246,9 @@ def __init__( self, workspace_groups_in_workspace, account_groups_in_account, - /, - renamed_groups_prefix, - include_group_names=None, + *, + renamed_groups_prefix: str, + include_group_names: list[str] | None, ): super().__init__( workspace_groups_in_workspace, @@ -284,11 +284,10 @@ def __init__( workspace_groups_in_workspace: dict[str, Group], account_groups_in_account: dict[str, Group], *, - # TODO: Check if hints below could be non optional - renamed_groups_prefix: str | None, - include_group_names: list[str] | None = None, - workspace_group_regex: str | None = None, - workspace_group_replace: str | None = None, + renamed_groups_prefix: str, + include_group_names: list[str] | None, + workspace_group_regex: str, + workspace_group_replace: str, ): super().__init__( workspace_groups_in_workspace, @@ -329,11 +328,11 @@ def __init__( self, workspace_groups_in_workspace, account_groups_in_account, - /, - renamed_groups_prefix, - include_group_names=None, - workspace_group_regex: str | None = None, - account_group_regex: str | None = None, + *, + renamed_groups_prefix: str, + include_group_names: list[str] | None, + workspace_group_regex: str, + account_group_regex: str, ): super().__init__( workspace_groups_in_workspace, From f30c1fcb0872daad8418666ec8fe8540aedfe4b0 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 1 Jul 2024 10:17:04 +0200 Subject: [PATCH 10/16] Add return type hint --- src/databricks/labs/ucx/workspace_access/groups.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/databricks/labs/ucx/workspace_access/groups.py b/src/databricks/labs/ucx/workspace_access/groups.py index d79c7db840..371e326fd5 100644 --- a/src/databricks/labs/ucx/workspace_access/groups.py +++ b/src/databricks/labs/ucx/workspace_access/groups.py @@ -167,7 +167,7 @@ def __init__( self.include_group_names = include_group_names @abstractmethod - def generate_migrated_groups(self): + def generate_migrated_groups(self) -> Iterable[MigratedGroup]: raise NotImplementedError def get_filtered_groups(self): @@ -219,7 +219,7 @@ def __init__( renamed_groups_prefix=renamed_groups_prefix, ) - def generate_migrated_groups(self): + def generate_migrated_groups(self) -> Iterable[MigratedGroup]: workspace_groups = self.get_filtered_groups() for group in workspace_groups.values(): temporary_name = f"{self.renamed_groups_prefix}{group.display_name}" @@ -257,7 +257,7 @@ def __init__( renamed_groups_prefix=renamed_groups_prefix, ) - def generate_migrated_groups(self): + def generate_migrated_groups(self) -> Iterable[MigratedGroup]: workspace_groups = self.get_filtered_groups() account_groups_by_id = {group.external_id: group for group in self.account_groups_in_account.values()} for group in workspace_groups.values(): @@ -343,7 +343,7 @@ def __init__( self.account_group_regex = account_group_regex self.workspace_group_regex = workspace_group_regex - def generate_migrated_groups(self): + def generate_migrated_groups(self) -> Iterable[MigratedGroup]: workspace_groups_by_match = { self._safe_match(group_name, self.workspace_group_regex): group for group_name, group in self.get_filtered_groups().items() From ee61d12788b98b318918065041d72646df140c0c Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 1 Jul 2024 10:18:14 +0200 Subject: [PATCH 11/16] Move temporary name down --- src/databricks/labs/ucx/workspace_access/groups.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/databricks/labs/ucx/workspace_access/groups.py b/src/databricks/labs/ucx/workspace_access/groups.py index 371e326fd5..fc4b0f8b05 100644 --- a/src/databricks/labs/ucx/workspace_access/groups.py +++ b/src/databricks/labs/ucx/workspace_access/groups.py @@ -222,13 +222,13 @@ def __init__( def generate_migrated_groups(self) -> Iterable[MigratedGroup]: workspace_groups = self.get_filtered_groups() for group in workspace_groups.values(): - temporary_name = f"{self.renamed_groups_prefix}{group.display_name}" account_group = self.account_groups_in_account.get(group.display_name) if not account_group: logger.info( f"Couldn't find a matching account group for {group.display_name} group using name matching" ) continue + temporary_name = f"{self.renamed_groups_prefix}{group.display_name}" yield MigratedGroup( id_in_workspace=group.id, name_in_workspace=group.display_name, @@ -261,11 +261,11 @@ def generate_migrated_groups(self) -> Iterable[MigratedGroup]: workspace_groups = self.get_filtered_groups() account_groups_by_id = {group.external_id: group for group in self.account_groups_in_account.values()} for group in workspace_groups.values(): - temporary_name = f"{self.renamed_groups_prefix}{group.display_name}" account_group = account_groups_by_id.get(group.external_id) if not account_group: logger.info(f"Couldn't find a matching account group for {group.display_name} group with external_id") continue + temporary_name = f"{self.renamed_groups_prefix}{group.display_name}" yield MigratedGroup( id_in_workspace=group.id, name_in_workspace=group.display_name, @@ -353,13 +353,13 @@ def generate_migrated_groups(self) -> Iterable[MigratedGroup]: for group_name, group in self.account_groups_in_account.items() } for group_match, ws_group in workspace_groups_by_match.items(): - temporary_name = f"{self.renamed_groups_prefix}{ws_group.display_name}" account_group = account_groups_by_match.get(group_match) if not account_group: logger.info( f"Couldn't find a matching account group for {ws_group.display_name} group with regex matching" ) continue + temporary_name = f"{self.renamed_groups_prefix}{ws_group.display_name}" yield MigratedGroup( id_in_workspace=ws_group.id, name_in_workspace=ws_group.display_name, From 297eb46776a6deebec786c213272dd348f09f2ad Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 1 Jul 2024 10:28:56 +0200 Subject: [PATCH 12/16] Allow empty substitution value on ConfigureGroups --- src/databricks/labs/ucx/workspace_access/groups.py | 6 +++--- tests/unit/workspace_access/test_groups.py | 7 ++++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/databricks/labs/ucx/workspace_access/groups.py b/src/databricks/labs/ucx/workspace_access/groups.py index fc4b0f8b05..cc43e74bc4 100644 --- a/src/databricks/labs/ucx/workspace_access/groups.py +++ b/src/databricks/labs/ucx/workspace_access/groups.py @@ -756,7 +756,7 @@ def _configure_substitution(self): if not match_value: return False sub_value = self._ask_for_group("Enter the substitution value") - if not sub_value: + if sub_value is None: return False self.workspace_group_regex = match_value self.workspace_group_replace = sub_value @@ -788,8 +788,8 @@ def _configure_external(self): return True @staticmethod - def _is_valid_group_str(group_str: str): - return group_str and not re.search(r"[\s#,+ \\<>;]", group_str) + def _is_valid_group_str(group_str: str | None) -> bool: + return group_str is not None and not re.search(r"[\s#,+ \\<>;]", group_str) @staticmethod def _validate_regex(regex_input: str) -> bool: diff --git a/tests/unit/workspace_access/test_groups.py b/tests/unit/workspace_access/test_groups.py index 610f1144ad..1ecf1eca92 100644 --- a/tests/unit/workspace_access/test_groups.py +++ b/tests/unit/workspace_access/test_groups.py @@ -856,21 +856,22 @@ def test_configure_external_id(): assert configure_groups.group_match_by_external_id -def test_configure_substitute(): +@pytest.mark.parametrize("substitution_value", ["business", ""]) +def test_configure_substitute(substitution_value): configure_groups = ConfigureGroups( MockPrompts( { "Backup prefix": "", r"Choose how to map the workspace groups.*": "4", # substitute r".*for substitution": "biz", - r".*substitution value": "business", + r".*substitution value": substitution_value, ".*": "", } ) ) configure_groups.run() assert configure_groups.workspace_group_regex == "biz" - assert configure_groups.workspace_group_replace == "business" + assert configure_groups.workspace_group_replace == substitution_value def test_configure_match(): From bf4f07dfb6ce3cc15c45ceab742cee94a64b7bef Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 1 Jul 2024 10:43:57 +0200 Subject: [PATCH 13/16] Separate group and substitute question --- src/databricks/labs/ucx/workspace_access/groups.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/databricks/labs/ucx/workspace_access/groups.py b/src/databricks/labs/ucx/workspace_access/groups.py index cc43e74bc4..7772f9a724 100644 --- a/src/databricks/labs/ucx/workspace_access/groups.py +++ b/src/databricks/labs/ucx/workspace_access/groups.py @@ -716,6 +716,7 @@ class ConfigureGroups: def __init__(self, prompts: Prompts): self._prompts = prompts self._ask_for_group = functools.partial(self._prompts.question, validate=self._is_valid_group_str) + self._ask_for_substitute = functools.partial(self._prompts.question, validate=self._is_valid_substitute_str) self._ask_for_regex = functools.partial(self._prompts.question, validate=self._validate_regex) def run(self): @@ -755,7 +756,7 @@ def _configure_substitution(self): match_value = self._ask_for_regex("Enter a regular expression for substitution") if not match_value: return False - sub_value = self._ask_for_group("Enter the substitution value") + sub_value = self._ask_for_substitute("Enter the substitution value") if sub_value is None: return False self.workspace_group_regex = match_value @@ -787,9 +788,12 @@ def _configure_external(self): self.group_match_by_external_id = True return True + def _is_valid_group_str(self, group_str: str) -> bool: + return len(group_str) > 0 and self._is_valid_substitute_str(group_str) + @staticmethod - def _is_valid_group_str(group_str: str | None) -> bool: - return group_str is not None and not re.search(r"[\s#,+ \\<>;]", group_str) + def _is_valid_substitute_str(substitute: str) -> bool: + return not re.search(r"[\s#,+ \\<>;]", substitute) @staticmethod def _validate_regex(regex_input: str) -> bool: From ae30f18aee5eeed89f8e7ce8cb44856cac2ce521 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 1 Jul 2024 10:59:37 +0200 Subject: [PATCH 14/16] Compile regex pattern in strategy init --- .../labs/ucx/workspace_access/groups.py | 28 ++++++++++++------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/src/databricks/labs/ucx/workspace_access/groups.py b/src/databricks/labs/ucx/workspace_access/groups.py index 7772f9a724..d12802087a 100644 --- a/src/databricks/labs/ucx/workspace_access/groups.py +++ b/src/databricks/labs/ucx/workspace_access/groups.py @@ -182,9 +182,9 @@ def get_filtered_groups(self): } @staticmethod - def _safe_match(group_name: str, match_re: str) -> str: + def _safe_match(group_name: str, pattern: re.Pattern) -> str: try: - match = re.search(match_re, group_name) + match = pattern.search(group_name) if not match: return group_name match_groups = match.groups() @@ -195,11 +195,11 @@ def _safe_match(group_name: str, match_re: str) -> str: return group_name @staticmethod - def _safe_sub(group_name: str, match_re: str, replace: str) -> str: + def _safe_sub(group_name: str, pattern: re.Pattern, replace: str) -> str: try: - return re.sub(match_re, replace, group_name) + return pattern.sub(replace, group_name) except re.error: - logger.warning(f"Failed to apply Regex Expression {match_re} on Group Name {group_name}") + logger.warning(f"Failed to apply Regex Expression {pattern} on Group Name {group_name}") return group_name @@ -295,14 +295,18 @@ def __init__( include_group_names=include_group_names, renamed_groups_prefix=renamed_groups_prefix, ) - self.workspace_group_regex = workspace_group_regex + self.workspace_group_regex = workspace_group_regex # Keep to support legacy public API self.workspace_group_replace = workspace_group_replace + self._workspace_group_pattern = re.compile(self.workspace_group_regex) + def generate_migrated_groups(self) -> Iterable[MigratedGroup]: workspace_groups = self.get_filtered_groups() for group in workspace_groups.values(): name_in_account = self._safe_sub( - group.display_name, self.workspace_group_regex, self.workspace_group_replace + group.display_name, + self._workspace_group_pattern, + self.workspace_group_replace, ) account_group = self.account_groups_in_account.get(name_in_account) if not account_group: @@ -340,16 +344,20 @@ def __init__( include_group_names=include_group_names, renamed_groups_prefix=renamed_groups_prefix, ) - self.account_group_regex = account_group_regex + # Keep to support legacy public API self.workspace_group_regex = workspace_group_regex + self.account_group_regex = account_group_regex + + self._workspace_group_pattern = re.compile(self.workspace_group_regex) + self._account_group_pattern = re.compile(self.account_group_regex) def generate_migrated_groups(self) -> Iterable[MigratedGroup]: workspace_groups_by_match = { - self._safe_match(group_name, self.workspace_group_regex): group + self._safe_match(group_name, self._workspace_group_pattern): group for group_name, group in self.get_filtered_groups().items() } account_groups_by_match = { - self._safe_match(group_name, self.account_group_regex): group + self._safe_match(group_name, self._account_group_pattern): group for group_name, group in self.account_groups_in_account.items() } for group_match, ws_group in workspace_groups_by_match.items(): From 407fd0dc5936da9a0e6a8a9a8e7aa6a38e498c23 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 1 Jul 2024 11:03:33 +0200 Subject: [PATCH 15/16] Compile substitute string --- src/databricks/labs/ucx/workspace_access/groups.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/databricks/labs/ucx/workspace_access/groups.py b/src/databricks/labs/ucx/workspace_access/groups.py index d12802087a..f941cfd5db 100644 --- a/src/databricks/labs/ucx/workspace_access/groups.py +++ b/src/databricks/labs/ucx/workspace_access/groups.py @@ -721,6 +721,8 @@ class ConfigureGroups: group_match_by_external_id = None include_group_names = None + _valid_substitute_pattern = re.compile(r"[\s#,+ \\<>;]") + def __init__(self, prompts: Prompts): self._prompts = prompts self._ask_for_group = functools.partial(self._prompts.question, validate=self._is_valid_group_str) @@ -799,9 +801,8 @@ def _configure_external(self): def _is_valid_group_str(self, group_str: str) -> bool: return len(group_str) > 0 and self._is_valid_substitute_str(group_str) - @staticmethod - def _is_valid_substitute_str(substitute: str) -> bool: - return not re.search(r"[\s#,+ \\<>;]", substitute) + def _is_valid_substitute_str(self, substitute: str) -> bool: + return not self._valid_substitute_pattern.search(substitute) @staticmethod def _validate_regex(regex_input: str) -> bool: From 842d363c26d02c65a30fbae9d3f668e638879ad0 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 1 Jul 2024 11:28:55 +0200 Subject: [PATCH 16/16] Rename variable --- src/databricks/labs/ucx/workspace_access/groups.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/databricks/labs/ucx/workspace_access/groups.py b/src/databricks/labs/ucx/workspace_access/groups.py index f941cfd5db..ec9587fadf 100644 --- a/src/databricks/labs/ucx/workspace_access/groups.py +++ b/src/databricks/labs/ucx/workspace_access/groups.py @@ -766,11 +766,11 @@ def _configure_substitution(self): match_value = self._ask_for_regex("Enter a regular expression for substitution") if not match_value: return False - sub_value = self._ask_for_substitute("Enter the substitution value") - if sub_value is None: + substitute = self._ask_for_substitute("Enter the substitution value") + if substitute is None: return False self.workspace_group_regex = match_value - self.workspace_group_replace = sub_value + self.workspace_group_replace = substitute return True def _configure_matching(self):