@@ -278,6 +278,26 @@ def _configure_new_installation(self) -> WorkspaceConfig:
278
278
279
279
policy_id , instance_profile , spark_conf_dict = self ._policy_installer .create (inventory_database )
280
280
281
+ # Save configurable spark_conf for table migration cluster
282
+ # parallelism will not be needed if backlog is fixed in https://databricks.atlassian.net/browse/ES-975874
283
+ parallelism = self ._prompts .question (
284
+ "Parallelism for migrating dbfs root delta tables with deep clone" , default = "200" , valid_number = True
285
+ )
286
+ if not spark_conf_dict :
287
+ spark_conf_dict = {}
288
+ spark_conf_dict .update ({'spark.sql.sources.parallelPartitionDiscovery.parallelism' : parallelism })
289
+ # mix max workers for auto-scale migration job cluster
290
+ min_workers = int (
291
+ self ._prompts .question (
292
+ "Min workers for auto-scale job cluster for table migration" , default = "1" , valid_number = True
293
+ )
294
+ )
295
+ max_workers = int (
296
+ self ._prompts .question (
297
+ "Max workers for auto-scale job cluster for table migration" , default = "10" , valid_number = True
298
+ )
299
+ )
300
+
281
301
# Check if terraform is being used
282
302
is_terraform_used = self ._prompts .confirm ("Do you use Terraform to deploy your infrastructure?" )
283
303
@@ -294,6 +314,8 @@ def _configure_new_installation(self) -> WorkspaceConfig:
294
314
num_threads = num_threads ,
295
315
instance_profile = instance_profile ,
296
316
spark_conf = spark_conf_dict ,
317
+ min_workers = min_workers ,
318
+ max_workers = max_workers ,
297
319
policy_id = policy_id ,
298
320
is_terraform_used = is_terraform_used ,
299
321
include_databases = self ._select_databases (),
@@ -798,38 +820,59 @@ def _job_wheel_task(self, jobs_task: jobs.Task, task: Task, remote_wheel: str) -
798
820
),
799
821
)
800
822
823
+ def _job_cluster_spark_conf (self , cluster_key : str ):
824
+ conf_from_installation = self ._config .spark_conf if self ._config .spark_conf else {}
825
+ if cluster_key == "main" :
826
+ spark_conf = {
827
+ "spark.databricks.cluster.profile" : "singleNode" ,
828
+ "spark.master" : "local[*]" ,
829
+ }
830
+ return spark_conf | conf_from_installation
831
+ if cluster_key == "tacl" :
832
+ return {"spark.databricks.acl.sqlOnly" : "true" } | conf_from_installation
833
+ if cluster_key == "table_migration" :
834
+ return {"spark.sql.sources.parallelPartitionDiscovery.parallelism" : "200" } | conf_from_installation
835
+ return conf_from_installation
836
+
801
837
def _job_clusters (self , names : set [str ]):
802
838
clusters = []
803
- spark_conf = {
804
- "spark.databricks.cluster.profile" : "singleNode" ,
805
- "spark.master" : "local[*]" ,
806
- }
807
- if self ._config .spark_conf is not None :
808
- spark_conf = spark_conf | self ._config .spark_conf
809
- spec = compute .ClusterSpec (
810
- data_security_mode = compute .DataSecurityMode .LEGACY_SINGLE_USER ,
811
- spark_conf = spark_conf ,
812
- custom_tags = {"ResourceClass" : "SingleNode" },
813
- num_workers = 0 ,
814
- policy_id = self .config .policy_id ,
815
- )
816
839
if "main" in names :
817
840
clusters .append (
818
841
jobs .JobCluster (
819
842
job_cluster_key = "main" ,
820
- new_cluster = spec ,
843
+ new_cluster = compute .ClusterSpec (
844
+ data_security_mode = compute .DataSecurityMode .LEGACY_SINGLE_USER ,
845
+ spark_conf = self ._job_cluster_spark_conf ("main" ),
846
+ custom_tags = {"ResourceClass" : "SingleNode" },
847
+ num_workers = 0 ,
848
+ policy_id = self .config .policy_id ,
849
+ ),
821
850
)
822
851
)
823
852
if "tacl" in names :
824
853
clusters .append (
825
854
jobs .JobCluster (
826
855
job_cluster_key = "tacl" ,
827
- new_cluster = replace (
828
- spec ,
856
+ new_cluster = compute .ClusterSpec (
829
857
data_security_mode = compute .DataSecurityMode .LEGACY_TABLE_ACL ,
830
- spark_conf = { "spark.databricks.acl.sqlOnly" : "true" } ,
858
+ spark_conf = self . _job_cluster_spark_conf ( "tacl" ) ,
831
859
num_workers = 1 , # ShowPermissionsCommand needs a worker
832
- custom_tags = {},
860
+ policy_id = self .config .policy_id ,
861
+ ),
862
+ )
863
+ )
864
+ if "table_migration" in names :
865
+ clusters .append (
866
+ jobs .JobCluster (
867
+ job_cluster_key = "table_migration" ,
868
+ new_cluster = compute .ClusterSpec (
869
+ data_security_mode = compute .DataSecurityMode .SINGLE_USER ,
870
+ spark_conf = self ._job_cluster_spark_conf ("table_migration" ),
871
+ policy_id = self .config .policy_id ,
872
+ autoscale = compute .AutoScale (
873
+ max_workers = self .config .max_workers ,
874
+ min_workers = self .config .min_workers ,
875
+ ),
833
876
),
834
877
)
835
878
)
0 commit comments