Skip to content

Commit

Permalink
Automatically upgrade existing installations to avoid breaking changes (
Browse files Browse the repository at this point in the history
#985)

This PR incorporates the work from
databrickslabs/blueprint#50, which enables
smoother cross-version upgrades.

Fix #471
  • Loading branch information
nfx authored and dmoore247 committed Mar 23, 2024
1 parent b82fe72 commit 55dd9e2
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 30 deletions.
96 changes: 66 additions & 30 deletions src/databricks/labs/ucx/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import sys
import time
import webbrowser
from collections.abc import Callable
from dataclasses import replace
from datetime import datetime, timedelta
from pathlib import Path
Expand All @@ -16,6 +17,7 @@
from databricks.labs.blueprint.installer import InstallState
from databricks.labs.blueprint.parallel import ManyError, Threads
from databricks.labs.blueprint.tui import Prompts
from databricks.labs.blueprint.upgrades import Upgrades
from databricks.labs.blueprint.wheels import ProductInfo, WheelsV2, find_project_root
from databricks.sdk import WorkspaceClient
from databricks.sdk.errors import ( # pylint: disable=redefined-builtin
Expand Down Expand Up @@ -176,54 +178,65 @@ def __init__(self, prompts: Prompts, installation: Installation, ws: WorkspaceCl
self._installation = installation
self._prompts = prompts

def run(self):
def run(
self,
verify_timeout=timedelta(minutes=2),
sql_backend_factory: Callable[[WorkspaceConfig], SqlBackend] | None = None,
wheel_builder_factory: Callable[[], WheelsV2] | None = None,
):
logger.info(f"Installing UCX v{PRODUCT_INFO.version()}")
config = self.configure()
sql_backend = StatementExecutionBackend(self._ws, config.warehouse_id)
wheels = WheelsV2(self._installation, PRODUCT_INFO)
if not sql_backend_factory:
sql_backend_factory = self._new_sql_backend
if not wheel_builder_factory:
wheel_builder_factory = self._new_wheel_builder
workspace_installation = WorkspaceInstallation(
config,
self._installation,
sql_backend,
wheels,
sql_backend_factory(config),
wheel_builder_factory(),
self._ws,
self._prompts,
verify_timeout=timedelta(minutes=2),
verify_timeout=verify_timeout,
)
workspace_installation.run()
try:
workspace_installation.run()
except ManyError as err:
if len(err.errs) == 1:
raise err.errs[0] from None
raise err

def _new_wheel_builder(self):
return WheelsV2(self._installation, PRODUCT_INFO)

def _new_sql_backend(self, config: WorkspaceConfig) -> SqlBackend:
return StatementExecutionBackend(self._ws, config.warehouse_id)

def configure(self) -> WorkspaceConfig:
try:
return self._installation.load(WorkspaceConfig)
config = self._installation.load(WorkspaceConfig)
self._apply_upgrades()
return config
except NotFound as err:
logger.debug(f"Cannot find previous installation: {err}")
return self._configure_new_installation()

def _apply_upgrades(self):
try:
upgrades = Upgrades(PRODUCT_INFO, self._installation)
upgrades.apply(self._ws)
except NotFound as err:
logger.warning(f"Installed version is too old: {err}")
return

def _configure_new_installation(self) -> WorkspaceConfig:
logger.info("Please answer a couple of questions to configure Unity Catalog migration")
HiveMetastoreLineageEnabler(self._ws).apply(self._prompts)
inventory_database = self._prompts.question(
"Inventory Database stored in hive_metastore", default="ucx", valid_regex=r"^\w+$"
)

def warehouse_type(_):
return _.warehouse_type.value if not _.enable_serverless_compute else "SERVERLESS"

pro_warehouses = {"[Create new PRO SQL warehouse]": "create_new"} | {
f"{_.name} ({_.id}, {warehouse_type(_)}, {_.state.value})": _.id
for _ in self._ws.warehouses.list()
if _.warehouse_type == EndpointInfoWarehouseType.PRO
}
warehouse_id = self._prompts.choice_from_dict(
"Select PRO or SERVERLESS SQL warehouse to run assessment dashboards on", pro_warehouses
)
if warehouse_id == "create_new":
new_warehouse = self._ws.warehouses.create(
name=f"{WAREHOUSE_PREFIX} {time.time_ns()}",
spot_instance_policy=SpotInstancePolicy.COST_OPTIMIZED,
warehouse_type=CreateWarehouseRequestWarehouseType.PRO,
cluster_size="Small",
max_num_clusters=1,
)
warehouse_id = new_warehouse.id

warehouse_id = self._configure_warehouse()
configure_groups = ConfigureGroups(self._prompts)
configure_groups.run()
log_level = self._prompts.question("Log level", default="INFO").upper()
Expand Down Expand Up @@ -269,6 +282,29 @@ def warehouse_type(_):
webbrowser.open(ws_file_url)
return config

def _configure_warehouse(self):
def warehouse_type(_):
return _.warehouse_type.value if not _.enable_serverless_compute else "SERVERLESS"

pro_warehouses = {"[Create new PRO SQL warehouse]": "create_new"} | {
f"{_.name} ({_.id}, {warehouse_type(_)}, {_.state.value})": _.id
for _ in self._ws.warehouses.list()
if _.warehouse_type == EndpointInfoWarehouseType.PRO
}
warehouse_id = self._prompts.choice_from_dict(
"Select PRO or SERVERLESS SQL warehouse to run assessment dashboards on", pro_warehouses
)
if warehouse_id == "create_new":
new_warehouse = self._ws.warehouses.create(
name=f"{WAREHOUSE_PREFIX} {time.time_ns()}",
spot_instance_policy=SpotInstancePolicy.COST_OPTIMIZED,
warehouse_type=CreateWarehouseRequestWarehouseType.PRO,
cluster_size="Small",
max_num_clusters=1,
)
warehouse_id = new_warehouse.id
return warehouse_id

@staticmethod
def _policy_config(value: str):
return {"type": "fixed", "value": value}
Expand Down Expand Up @@ -370,7 +406,7 @@ def __init__(

@classmethod
def current(cls, ws: WorkspaceClient):
installation = Installation.current(ws, PRODUCT_INFO.product_name())
installation = PRODUCT_INFO.current_installation(ws)
config = installation.load(WorkspaceConfig)
sql_backend = StatementExecutionBackend(ws, config.warehouse_id)
wheels = WheelsV2(installation, PRODUCT_INFO)
Expand Down
11 changes: 11 additions & 0 deletions src/databricks/labs/ucx/upgrades/v0.4.0_added_log_dir.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# pylint: disable=invalid-name,unused-argument
import logging

from databricks.labs.blueprint.installation import Installation
from databricks.sdk import WorkspaceClient

logger = logging.getLogger(__name__)


def upgrade(installation: Installation, ws: WorkspaceClient):
installation.upload('logs/README.md', b'# This folder contains logs from UCX workflows')
53 changes: 53 additions & 0 deletions tests/unit/test_install.py
Original file line number Diff line number Diff line change
Expand Up @@ -1339,3 +1339,56 @@ def test_open_config(ws, mocker, mock_installation):
install.configure()

webbrowser_open.assert_called_with('https://localhost/#workspace~/mock/config.yml')


def test_runs_upgrades_on_too_old_version(ws, any_prompt):
existing_installation = MockInstallation(
{
'state.json': {'resources': {'dashboards': {'assessment_main': 'abc'}}},
'config.yml': {
'inventory_database': 'x',
'warehouse_id': 'abc',
'connect': {'host': '...', 'token': '...'},
},
}
)
install = WorkspaceInstaller(any_prompt, existing_installation, ws)

sql_backend = MockBackend()
wheels = create_autospec(WheelsV2)

# TODO: (HariGS-DB) remove this, once added the policy upgrade
# TODO: fix along https://github.com/databrickslabs/ucx/issues/1012
with pytest.raises(InvalidParameterValue):
install.run(
verify_timeout=timedelta(seconds=1),
sql_backend_factory=lambda _: sql_backend,
wheel_builder_factory=lambda: wheels,
)


def test_runs_upgrades_on_more_recent_version(ws, any_prompt):
existing_installation = MockInstallation(
{
'version.json': {'version': '0.3.0', 'wheel': '...', 'date': '...'},
'state.json': {'resources': {'dashboards': {'assessment_main': 'abc'}}},
'config.yml': {
'inventory_database': 'x',
'warehouse_id': 'abc',
'policy_id': 'abc', # TODO: (HariGS-DB) remove this, once added the policy upgrade
'connect': {'host': '...', 'token': '...'},
},
}
)
install = WorkspaceInstaller(any_prompt, existing_installation, ws)

sql_backend = MockBackend()
wheels = create_autospec(WheelsV2)

install.run(
verify_timeout=timedelta(seconds=1),
sql_backend_factory=lambda _: sql_backend,
wheel_builder_factory=lambda: wheels,
)

existing_installation.assert_file_uploaded('logs/README.md')

0 comments on commit 55dd9e2

Please sign in to comment.