@@ -42,6 +42,13 @@ def setup_tacl(_: WorkspaceConfig):
42
42
"""(Optimization) Starts `tacl` job cluster in parallel to crawling tables."""
43
43
44
44
45
+ def _must_truncate_bytes (cfg : WorkspaceConfig ) -> int :
46
+ if not cfg .connect :
47
+ return 96
48
+ truncate_bytes = cfg .connect .debug_truncate_bytes
49
+ return truncate_bytes if truncate_bytes else 96
50
+
51
+
45
52
@task ("assessment" , depends_on = [crawl_tables , setup_tacl ], job_cluster = "tacl" )
46
53
def crawl_grants (cfg : WorkspaceConfig ):
47
54
"""Scans the previously created Delta table named `$inventory_database.tables` and issues a `SHOW GRANTS`
@@ -52,9 +59,9 @@ def crawl_grants(cfg: WorkspaceConfig):
52
59
53
60
Note: This job runs on a separate cluster (named `tacl`) as it requires the proper configuration to have the Table
54
61
ACLs enabled and available for retrieval."""
55
- backend = RuntimeBackend ()
56
- tables = TablesCrawler (backend , cfg .inventory_database )
57
- udfs = UdfsCrawler (backend , cfg .inventory_database )
62
+ sql_backend = RuntimeBackend (debug_truncate_bytes = _must_truncate_bytes ( cfg ) )
63
+ tables = TablesCrawler (sql_backend , cfg .inventory_database )
64
+ udfs = UdfsCrawler (sql_backend , cfg .inventory_database )
58
65
grants = GrantsCrawler (tables , udfs )
59
66
grants .snapshot ()
60
67
@@ -65,8 +72,8 @@ def estimate_table_size_for_migration(cfg: WorkspaceConfig):
65
72
"synced". These tables will have to be cloned in the migration process.
66
73
Assesses the size of these tables and create `$inventory_database.table_size` table to list these sizes.
67
74
The table size is a factor in deciding whether to clone these tables."""
68
- backend = RuntimeBackend ()
69
- table_size = TableSizeCrawler (backend , cfg .inventory_database )
75
+ sql_backend = RuntimeBackend (debug_truncate_bytes = _must_truncate_bytes ( cfg ) )
76
+ table_size = TableSizeCrawler (sql_backend , cfg .inventory_database )
70
77
table_size .snapshot ()
71
78
72
79
@@ -79,7 +86,8 @@ def crawl_mounts(cfg: WorkspaceConfig):
79
86
The assessment involves scanning the workspace to compile a list of all existing mount points and subsequently
80
87
storing this information in the `$inventory.mounts` table. This is crucial for planning the migration."""
81
88
ws = WorkspaceClient (config = cfg .to_databricks_config ())
82
- mounts = Mounts (backend = RuntimeBackend (), ws = ws , inventory_database = cfg .inventory_database )
89
+ sql_backend = RuntimeBackend (debug_truncate_bytes = _must_truncate_bytes (cfg ))
90
+ mounts = Mounts (backend = sql_backend , ws = ws , inventory_database = cfg .inventory_database )
83
91
mounts .snapshot ()
84
92
85
93
@@ -94,7 +102,8 @@ def guess_external_locations(cfg: WorkspaceConfig):
94
102
- Scanning all these locations to identify folders that can act as shared path prefixes
95
103
- These identified external locations will be created subsequently prior to the actual table migration"""
96
104
ws = WorkspaceClient (config = cfg .to_databricks_config ())
97
- crawler = ExternalLocations (ws , RuntimeBackend (), cfg .inventory_database )
105
+ sql_backend = RuntimeBackend (debug_truncate_bytes = _must_truncate_bytes (cfg ))
106
+ crawler = ExternalLocations (ws , sql_backend , cfg .inventory_database )
98
107
crawler .snapshot ()
99
108
100
109
@@ -110,7 +119,8 @@ def assess_jobs(cfg: WorkspaceConfig):
110
119
- Clusters referencing DBFS locations in one or more config options
111
120
"""
112
121
ws = WorkspaceClient (config = cfg .to_databricks_config ())
113
- crawler = JobsCrawler (ws , RuntimeBackend (), cfg .inventory_database )
122
+ sql_backend = RuntimeBackend (debug_truncate_bytes = _must_truncate_bytes (cfg ))
123
+ crawler = JobsCrawler (ws , sql_backend , cfg .inventory_database )
114
124
crawler .snapshot ()
115
125
116
126
@@ -126,7 +136,8 @@ def assess_clusters(cfg: WorkspaceConfig):
126
136
- Clusters referencing DBFS locations in one or more config options
127
137
"""
128
138
ws = WorkspaceClient (config = cfg .to_databricks_config ())
129
- crawler = ClustersCrawler (ws , RuntimeBackend (), cfg .inventory_database )
139
+ sql_backend = RuntimeBackend (debug_truncate_bytes = _must_truncate_bytes (cfg ))
140
+ crawler = ClustersCrawler (ws , sql_backend , cfg .inventory_database )
130
141
crawler .snapshot ()
131
142
132
143
@@ -142,7 +153,8 @@ def assess_pipelines(cfg: WorkspaceConfig):
142
153
Subsequently, a list of all the pipelines with matching configurations are stored in the
143
154
`$inventory.pipelines` table."""
144
155
ws = WorkspaceClient (config = cfg .to_databricks_config ())
145
- crawler = PipelinesCrawler (ws , RuntimeBackend (), cfg .inventory_database )
156
+ sql_backend = RuntimeBackend (debug_truncate_bytes = _must_truncate_bytes (cfg ))
157
+ crawler = PipelinesCrawler (ws , sql_backend , cfg .inventory_database )
146
158
crawler .snapshot ()
147
159
148
160
@@ -159,7 +171,8 @@ def assess_azure_service_principals(cfg: WorkspaceConfig):
159
171
in the `$inventory.azure_service_principals` table."""
160
172
ws = WorkspaceClient (config = cfg .to_databricks_config ())
161
173
if ws .config .is_azure :
162
- crawler = AzureServicePrincipalCrawler (ws , RuntimeBackend (), cfg .inventory_database )
174
+ sql_backend = RuntimeBackend (debug_truncate_bytes = _must_truncate_bytes (cfg ))
175
+ crawler = AzureServicePrincipalCrawler (ws , sql_backend , cfg .inventory_database )
163
176
crawler .snapshot ()
164
177
165
178
@@ -171,7 +184,8 @@ def assess_global_init_scripts(cfg: WorkspaceConfig):
171
184
It looks in:
172
185
- the list of all the global init scripts are saved in the `$inventory.azure_service_principals` table."""
173
186
ws = WorkspaceClient (config = cfg .to_databricks_config ())
174
- crawler = GlobalInitScriptCrawler (ws , RuntimeBackend (), cfg .inventory_database )
187
+ sql_backend = RuntimeBackend (debug_truncate_bytes = _must_truncate_bytes (cfg ))
188
+ crawler = GlobalInitScriptCrawler (ws , sql_backend , cfg .inventory_database )
175
189
crawler .snapshot ()
176
190
177
191
@@ -183,8 +197,9 @@ def workspace_listing(cfg: WorkspaceConfig):
183
197
It uses multi-threading to parallelize the listing process to speed up execution on big workspaces.
184
198
It accepts starting path as the parameter defaulted to the root path '/'."""
185
199
ws = WorkspaceClient (config = cfg .to_databricks_config ())
200
+ sql_backend = RuntimeBackend (debug_truncate_bytes = _must_truncate_bytes (cfg ))
186
201
crawler = WorkspaceListing (
187
- ws , RuntimeBackend () , cfg .inventory_database , num_threads = cfg .num_threads , start_path = cfg .workspace_start_path
202
+ ws , sql_backend , cfg .inventory_database , num_threads = cfg .num_threads , start_path = cfg .workspace_start_path
188
203
)
189
204
crawler .snapshot ()
190
205
@@ -197,9 +212,10 @@ def crawl_permissions(cfg: WorkspaceConfig):
197
212
This is the first step for the _group migration_ process, which is continued in the `migrate-groups` workflow.
198
213
This step includes preparing Legacy Table ACLs for local group migration."""
199
214
ws = WorkspaceClient (config = cfg .to_databricks_config ())
215
+ sql_backend = RuntimeBackend (debug_truncate_bytes = _must_truncate_bytes (cfg ))
200
216
permission_manager = PermissionManager .factory (
201
217
ws ,
202
- RuntimeBackend () ,
218
+ sql_backend ,
203
219
cfg .inventory_database ,
204
220
num_threads = cfg .num_threads ,
205
221
workspace_start_path = cfg .workspace_start_path ,
@@ -211,7 +227,7 @@ def crawl_permissions(cfg: WorkspaceConfig):
211
227
@task ("assessment" )
212
228
def crawl_groups (cfg : WorkspaceConfig ):
213
229
"""Scans all groups for the local group migration scope"""
214
- sql_backend = RuntimeBackend ()
230
+ sql_backend = RuntimeBackend (debug_truncate_bytes = _must_truncate_bytes ( cfg ) )
215
231
ws = WorkspaceClient (config = cfg .to_databricks_config ())
216
232
group_manager = GroupManager (
217
233
sql_backend ,
@@ -251,7 +267,7 @@ def assessment_report(_: WorkspaceConfig):
251
267
@task ("migrate-groups" , depends_on = [crawl_groups ])
252
268
def rename_workspace_local_groups (cfg : WorkspaceConfig ):
253
269
"""Renames workspace local groups by adding `ucx-renamed-` prefix."""
254
- sql_backend = RuntimeBackend ()
270
+ sql_backend = RuntimeBackend (debug_truncate_bytes = _must_truncate_bytes ( cfg ) )
255
271
ws = WorkspaceClient (config = cfg .to_databricks_config ())
256
272
verify_has_metastore = VerifyHasMetastore (ws )
257
273
if verify_has_metastore .verify_metastore ():
@@ -275,7 +291,7 @@ def rename_workspace_local_groups(cfg: WorkspaceConfig):
275
291
def reflect_account_groups_on_workspace (cfg : WorkspaceConfig ):
276
292
"""Adds matching account groups to this workspace. The matching account level group(s) must preexist(s) for this
277
293
step to be successful. This process does not create the account level group(s)."""
278
- sql_backend = RuntimeBackend ()
294
+ sql_backend = RuntimeBackend (debug_truncate_bytes = _must_truncate_bytes ( cfg ) )
279
295
ws = WorkspaceClient (config = cfg .to_databricks_config ())
280
296
group_manager = GroupManager (
281
297
sql_backend ,
@@ -302,10 +318,10 @@ def apply_permissions_to_account_groups(cfg: WorkspaceConfig):
302
318
permissions, Secret Scopes, Notebooks, Directories, Repos, Files.
303
319
304
320
See [interactive tutorial here](https://app.getreprise.com/launch/myM3VNn/)."""
305
- backend = RuntimeBackend ()
321
+ sql_backend = RuntimeBackend (debug_truncate_bytes = _must_truncate_bytes ( cfg ) )
306
322
ws = WorkspaceClient (config = cfg .to_databricks_config ())
307
323
group_manager = GroupManager (
308
- backend ,
324
+ sql_backend ,
309
325
ws ,
310
326
cfg .inventory_database ,
311
327
cfg .include_group_names ,
@@ -323,7 +339,7 @@ def apply_permissions_to_account_groups(cfg: WorkspaceConfig):
323
339
324
340
permission_manager = PermissionManager .factory (
325
341
ws ,
326
- backend ,
342
+ sql_backend ,
327
343
cfg .inventory_database ,
328
344
num_threads = cfg .num_threads ,
329
345
workspace_start_path = cfg .workspace_start_path ,
@@ -336,10 +352,10 @@ def delete_backup_groups(cfg: WorkspaceConfig):
336
352
"""Last step of the group migration process. Removes all workspace-level backup groups, along with their
337
353
permissions. Execute this workflow only after you've confirmed that workspace-local migration worked
338
354
successfully for all the groups involved."""
339
- backend = RuntimeBackend ()
355
+ sql_backend = RuntimeBackend (debug_truncate_bytes = _must_truncate_bytes ( cfg ) )
340
356
ws = WorkspaceClient (config = cfg .to_databricks_config ())
341
357
group_manager = GroupManager (
342
- backend ,
358
+ sql_backend ,
343
359
ws ,
344
360
cfg .inventory_database ,
345
361
cfg .include_group_names ,
@@ -356,7 +372,8 @@ def delete_backup_groups(cfg: WorkspaceConfig):
356
372
def destroy_schema (cfg : WorkspaceConfig ):
357
373
"""This _clean-up_ workflow allows to removes the `$inventory` database, with all the inventory tables created by
358
374
the previous workflow runs. Use this to reset the entire state and start with the assessment step again."""
359
- RuntimeBackend ().execute (f"DROP DATABASE { cfg .inventory_database } CASCADE" )
375
+ sql_backend = RuntimeBackend (debug_truncate_bytes = _must_truncate_bytes (cfg ))
376
+ sql_backend .execute (f"DROP DATABASE { cfg .inventory_database } CASCADE" )
360
377
361
378
362
379
def main (* argv ):
0 commit comments