Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update migration progress workflow to also re-lint dashboards and jobs #3025

Merged
merged 2 commits into from
Oct 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions docs/table_persistence.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ Table Utilization:
| permissions | RW | | RW | RO | | RO | | |
| jobs | RW | RW | | | RO | | | |
| clusters | RW | RW | | | | | | |
| directfs_in_paths | RW | | | | | | | RW |
| directfs_in_queries | RW | | | | | | | RW |
| directfs_in_paths | RW | RW | | | | | | RW |
| directfs_in_queries | RW | RW | | | | | | RW |
| external_locations | RW | | | RO | | | | |
| workspace | RW | | RO | | RO | | | |
| workspace_objects | RW | | | | | | | |
Expand All @@ -27,8 +27,8 @@ Table Utilization:
| submit_runs | RW | | | | | | | |
| policies | RW | RW | | | | | | |
| migration_status | | RW | | RW | | RW | | |
| query_problems | RW | | | | | | | RW |
| workflow_problems | RW | | | | | | | RW |
| query_problems | RW | RW | | | | | | RW |
| workflow_problems | RW | RW | | | | | | RW |
| udfs | RW | RW | RO | | | | | |
| logs | RW | | RW | RW | | RW | RW | |
| recon_results | | | | | | | RW | |
Expand Down
4 changes: 2 additions & 2 deletions src/databricks/labs/ucx/contexts/workflow_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from databricks.labs.ucx.assessment.pipelines import PipelinesCrawler
from databricks.labs.ucx.config import WorkspaceConfig
from databricks.labs.ucx.contexts.application import GlobalContext
from databricks.labs.ucx.hive_metastore import TablesInMounts
from databricks.labs.ucx.hive_metastore import TablesInMounts, TablesCrawler
from databricks.labs.ucx.hive_metastore.table_size import TableSizeCrawler
from databricks.labs.ucx.hive_metastore.tables import FasterTableScanCrawler
from databricks.labs.ucx.installer.logs import TaskRunWarningRecorder
Expand Down Expand Up @@ -84,7 +84,7 @@ def global_init_scripts_crawler(self) -> GlobalInitScriptCrawler:
return GlobalInitScriptCrawler(self.workspace_client, self.sql_backend, self.inventory_database)

@cached_property
def tables_crawler(self) -> FasterTableScanCrawler:
def tables_crawler(self) -> TablesCrawler:
return FasterTableScanCrawler(self.sql_backend, self.inventory_database, self.config.include_databases)

@cached_property
Expand Down
17 changes: 15 additions & 2 deletions src/databricks/labs/ucx/progress/workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@
class MigrationProgress(Workflow):
"""Experimental workflow that rescans the environment to reflect and track progress that has been made.

This is a subset of the assessment workflow and covers:
It overlaps substantially with the assessment workflow, covering:

- Clusters
- Dashboards
- Grants
- Jobs
- Jobs (inventory & linting)
- Pipelines
- Policies
- Tables
Expand Down Expand Up @@ -122,6 +123,18 @@ def refresh_table_migration_status(self, ctx: RuntimeContext) -> None:
"""
ctx.migration_status_refresher.snapshot(force_refresh=True)

@job_task
def assess_dashboards(self, ctx: RuntimeContext):
"""Scans all dashboards for migration issues in SQL code of embedded widgets.
Also stores direct filesystem accesses for display in the migration dashboard."""
ctx.query_linter.refresh_report(ctx.sql_backend, ctx.inventory_database)

@job_task
def assess_workflows(self, ctx: RuntimeContext):
"""Scans all jobs for migration issues in notebooks.
Also stores direct filesystem accesses for display in the migration dashboard."""
ctx.workflow_linter.refresh_report(ctx.sql_backend, ctx.inventory_database)

@job_task(
depends_on=[
crawl_grants,
Expand Down
52 changes: 29 additions & 23 deletions tests/unit/progress/test_workflows.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,51 @@
from typing import get_type_hints
from unittest.mock import create_autospec

import pytest
from databricks.sdk import WorkspaceClient
from databricks.sdk.service.catalog import CatalogInfo, MetastoreAssignment
from databricks.sdk.service.jobs import BaseRun, RunResultState, RunState

from databricks.labs.ucx.assessment.clusters import ClustersCrawler, PoliciesCrawler
from databricks.labs.ucx.assessment.jobs import JobsCrawler
from databricks.labs.ucx.assessment.pipelines import PipelinesCrawler
from databricks.labs.ucx.progress.workflows import MigrationProgress
from databricks.labs.ucx.contexts.workflow_task import RuntimeContext
from databricks.labs.ucx.hive_metastore import TablesCrawler
from databricks.labs.ucx.hive_metastore.grants import GrantsCrawler
from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationStatusRefresher
from databricks.labs.ucx.hive_metastore.udfs import UdfsCrawler


@pytest.mark.parametrize(
"task, crawler, crawler_class",
[
(MigrationProgress.crawl_tables, RuntimeContext.tables_crawler, TablesCrawler),
(MigrationProgress.crawl_udfs, RuntimeContext.udfs_crawler, UdfsCrawler),
(MigrationProgress.crawl_grants, RuntimeContext.grants_crawler, GrantsCrawler),
(MigrationProgress.assess_jobs, RuntimeContext.jobs_crawler, JobsCrawler),
(MigrationProgress.assess_clusters, RuntimeContext.clusters_crawler, ClustersCrawler),
(MigrationProgress.assess_pipelines, RuntimeContext.pipelines_crawler, PipelinesCrawler),
(MigrationProgress.crawl_cluster_policies, RuntimeContext.policies_crawler, PoliciesCrawler),
(
MigrationProgress.refresh_table_migration_status,
RuntimeContext.migration_status_refresher,
TableMigrationStatusRefresher,
),
],
"task, crawler",
(
(MigrationProgress.crawl_tables, RuntimeContext.tables_crawler),
(MigrationProgress.crawl_udfs, RuntimeContext.udfs_crawler),
(MigrationProgress.crawl_grants, RuntimeContext.grants_crawler),
(MigrationProgress.assess_jobs, RuntimeContext.jobs_crawler),
(MigrationProgress.assess_clusters, RuntimeContext.clusters_crawler),
(MigrationProgress.assess_pipelines, RuntimeContext.pipelines_crawler),
(MigrationProgress.crawl_cluster_policies, RuntimeContext.policies_crawler),
(MigrationProgress.refresh_table_migration_status, RuntimeContext.migration_status_refresher),
),
)
def test_migration_progress_runtime_refresh(run_workflow, task, crawler, crawler_class) -> None:
def test_migration_progress_runtime_refresh(run_workflow, task, crawler) -> None:
crawler_class = get_type_hints(crawler.func)["return"]
mock_crawler = create_autospec(crawler_class)
crawler_name = crawler.attrname
run_workflow(task, **{crawler_name: mock_crawler})
mock_crawler.snapshot.assert_called_once_with(force_refresh=True)


@pytest.mark.parametrize(
"task, linter",
(
(MigrationProgress.assess_dashboards, RuntimeContext.query_linter),
(MigrationProgress.assess_workflows, RuntimeContext.workflow_linter),
),
)
def test_linter_runtime_refresh(run_workflow, task, linter) -> None:
linter_class = get_type_hints(linter.func)["return"]
mock_linter = create_autospec(linter_class)
linter_name = linter.attrname
ctx = run_workflow(task, **{linter_name: mock_linter})
mock_linter.refresh_report.assert_called_once_with(ctx.sql_backend, ctx.inventory_database)


def test_migration_progress_with_valid_prerequisites(run_workflow) -> None:
ws = create_autospec(WorkspaceClient)
ws.metastores.current.return_value = MetastoreAssignment(metastore_id="test", workspace_id=123456789)
Expand Down