-
Notifications
You must be signed in to change notification settings - Fork 87
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Extract command codes and unify the checks for spark_conf, cluster_policy, init_scripts #855
Changes from 7 commits
6abfa89
ebeb187
a424045
0e75835
a28e654
c859a1a
1a6e1fa
fe7ff09
06ebd30
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,12 @@ | ||
import base64 | ||
import json | ||
import logging | ||
import re | ||
|
||
from databricks.sdk import WorkspaceClient | ||
from databricks.sdk.errors import NotFound | ||
from databricks.sdk.service import compute | ||
from databricks.sdk.service.compute import ClusterDetails, Policy | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. line 9 is redundant because you've imported the whole There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed |
||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
@@ -92,3 +96,72 @@ | |
if (10, 0) <= version < (11, 3): | ||
return "kinda works" | ||
return "supported" | ||
|
||
|
||
def _check_spark_conf(conf: dict[str, str], source) -> list[str]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these functions are private (starting with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Moved those functions back to cluster mixin. I make |
||
failures = [] | ||
for k in INCOMPATIBLE_SPARK_CONFIG_KEYS: | ||
if k in conf: | ||
failures.append(f"unsupported config: {k}") | ||
for value in conf.values(): | ||
if "dbfs:/mnt" in value or "/dbfs/mnt" in value: | ||
failures.append(f"using DBFS mount in configuration: {value}") | ||
# Checking if Azure cluster config is present in spark config | ||
if _azure_sp_conf_present_check(conf): | ||
failures.append(f"{_AZURE_SP_CONF_FAILURE_MSG} {source}.") | ||
return failures | ||
|
||
|
||
def _safe_get_cluster_policy(ws: WorkspaceClient, policy_id: str) -> Policy | None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so, logical question - do we want to do diamond shape dependencies or not? e.g. flowchart TD
assessment --> crawlers
crawlers --> clusters
crawlers --> jobs
crawlers --> init_scripts
crawlers --> pipelines
crawlers --> azure_spns
clusters --> runtime.py
jobs --> runtime.py
init_scripts --> runtime.py
pipelines --> runtime.py
azure_spns --> runtime.py
or something like this: flowchart TD
assessment --> crawlers
azure_spns -->|mixin| clusters
crawlers --> clusters
clusters -->|mixin| jobs
crawlers --> jobs
azure_spns -->|mixin| init_scripts
crawlers --> init_scripts
jobs -->|mixin| pipelines
crawlers --> pipelines
crawlers --> azure_spns
clusters --> runtime.py
jobs --> runtime.py
init_scripts --> runtime.py
pipelines --> runtime.py
azure_spns --> runtime.py
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd like to select the first dependency structure, it's more clear to me.
|
||
try: | ||
return ws.cluster_policies.get(policy_id) | ||
except NotFound: | ||
logger.warning(f"The cluster policy was deleted: {policy_id}") | ||
return None | ||
|
||
|
||
def _check_cluster_policy(ws: WorkspaceClient, cluster, source): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. let's add type annotations to top-level members if we really don't want mixins There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed, type annotations added even we go back to mixin. |
||
failures = [] | ||
policy = _safe_get_cluster_policy(ws, cluster.policy_id) | ||
if policy: | ||
if policy.definition: | ||
if _azure_sp_conf_present_check(json.loads(policy.definition)): | ||
failures.append(f"{_AZURE_SP_CONF_FAILURE_MSG} {source}.") | ||
if policy.policy_family_definition_overrides: | ||
if _azure_sp_conf_present_check(json.loads(policy.policy_family_definition_overrides)): | ||
failures.append(f"{_AZURE_SP_CONF_FAILURE_MSG} {source}.") | ||
return failures | ||
|
||
|
||
def _check_cluster_init_script(ws: WorkspaceClient, init_scripts, source): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if we're not doing mixins, then why this function is defined so far away from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. move them to mixin and make these two functions next to each other. |
||
failures = [] | ||
for init_script_info in init_scripts: | ||
init_script_data = _get_init_script_data(ws, init_script_info) | ||
failures.extend(_check_init_script(init_script_data, source)) | ||
return failures | ||
|
||
|
||
def _check_init_script(init_script_data, source): | ||
failures = [] | ||
if not init_script_data: | ||
return failures | ||
if _azure_sp_conf_in_init_scripts(init_script_data): | ||
failures.append(f"{_AZURE_SP_CONF_FAILURE_MSG} {source}.") | ||
return failures | ||
|
||
|
||
def _check_cluster_failures(ws: WorkspaceClient, cluster: ClusterDetails | compute.ClusterSpec, source): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. don't do union types, they already caused a lot of bugs. convert cluster spec into cluster details instead: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed, it was a miss when merge with #845 |
||
failures = [] | ||
|
||
support_status = spark_version_compatibility(cluster.spark_version) | ||
if support_status != "supported": | ||
failures.append(f"not supported DBR: {cluster.spark_version}") | ||
if cluster.spark_conf is not None: | ||
failures.extend(_check_spark_conf(cluster.spark_conf, source)) | ||
# Checking if Azure cluster config is present in cluster policies | ||
if cluster.policy_id: | ||
failures.extend(_check_cluster_policy(ws, cluster, source)) | ||
if cluster.init_scripts: | ||
failures.extend(_check_cluster_init_script(ws, cluster.init_scripts, source)) | ||
|
||
return failures |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,11 +3,9 @@ | |
from dataclasses import dataclass | ||
|
||
from databricks.sdk import WorkspaceClient | ||
from databricks.sdk.service.compute import ClusterDetails | ||
from databricks.sdk.service.jobs import BaseJob | ||
|
||
from databricks.labs.ucx.assessment.clusters import ClustersMixin | ||
from databricks.labs.ucx.assessment.crawlers import logger | ||
from databricks.labs.ucx.assessment.crawlers import _check_cluster_failures, logger | ||
from databricks.labs.ucx.framework.crawlers import CrawlerBase, SqlBackend | ||
|
||
|
||
|
@@ -20,7 +18,7 @@ class JobInfo: | |
creator: str | None = None | ||
|
||
|
||
class JobsMixin(ClustersMixin): | ||
class JobsMixin: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For consistency, shouldn't it be named CheckJobsMixin? |
||
@staticmethod | ||
def _get_cluster_configs_from_all_jobs(all_jobs, all_clusters_by_id): | ||
for j in all_jobs: | ||
|
@@ -85,10 +83,8 @@ def _assess_jobs(self, all_jobs: list[BaseJob], all_clusters_by_id) -> Iterable[ | |
job_id = job.job_id | ||
if not job_id: | ||
continue | ||
cluster_details = ClusterDetails.from_dict(cluster_config.as_dict()) | ||
cluster_failures = self._check_cluster_failures(cluster_details) | ||
for failure in json.loads(cluster_failures.failures): | ||
job_assessment[job_id].add(failure) | ||
cluster_failures = _check_cluster_failures(self._ws, cluster_config, "Job cluster") | ||
job_assessment[job_id].update(cluster_failures) | ||
|
||
# TODO: next person looking at this - rewrite, as this code makes no sense | ||
for job_key in job_details.keys(): # pylint: disable=consider-using-dict-items,consider-iterating-dictionary | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
[ | ||
{ | ||
"autoscale": { | ||
"max_workers": 6, | ||
"min_workers": 1 | ||
}, | ||
"cluster_source": "JOB", | ||
"creator_user_name":"[email protected]", | ||
"cluster_id": "0123-190044-1122334422", | ||
"cluster_name": "Single User Cluster Name", | ||
"policy_id": "single-user-with-spn", | ||
"spark_version": "9.3.x-cpu-ml-scala2.12", | ||
"spark_conf" : { | ||
"spark.databricks.delta.preview.enabled": "true" | ||
}, | ||
"spark_context_id":"5134472582179565315" | ||
}, | ||
{ | ||
"autoscale": { | ||
"max_workers": 6, | ||
"min_workers": 1 | ||
}, | ||
"creator_user_name":"[email protected]", | ||
"cluster_id": "0123-190044-1122334411", | ||
"cluster_name": "Single User Cluster Name", | ||
"policy_id": "azure-oauth", | ||
"spark_version": "13.3.x-cpu-ml-scala2.12", | ||
"spark_conf" : { | ||
"spark.databricks.delta.preview.enabled": "true" | ||
}, | ||
"spark_context_id":"5134472582179565315" | ||
} | ||
] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
don't import logger, initialize one in the top of the module.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fixed