Skip to content

Commit

Permalink
Merge branch 'main' into whitelist-datasets
Browse files Browse the repository at this point in the history
# Conflicts:
#	src/databricks/labs/ucx/source_code/known.json
  • Loading branch information
ericvergnaud committed Jul 3, 2024
2 parents be43cc6 + f729f0c commit dfc8de0
Show file tree
Hide file tree
Showing 41 changed files with 21,776 additions and 119 deletions.
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ See [contributing instructions](CONTRIBUTING.md) to help improve this project.
- Account level Identity Setup. See instructions for [AWS](https://docs.databricks.com/en/administration-guide/users-groups/best-practices.html), [Azure](https://learn.microsoft.com/en-us/azure/databricks/administration-guide/users-groups/best-practices), and [GCP](https://docs.gcp.databricks.com/administration-guide/users-groups/best-practices.html).
- Unity Catalog Metastore Created (per region). See instructions for [AWS](https://docs.databricks.com/en/data-governance/unity-catalog/create-metastore.html), [Azure](https://learn.microsoft.com/en-us/azure/databricks/data-governance/unity-catalog/create-metastore), and [GCP](https://docs.gcp.databricks.com/data-governance/unity-catalog/create-metastore.html).
- If your Databricks Workspace relies on an external Hive Metastore (such as AWS Glue), make sure to read [this guide](docs/external_hms_glue.md).
- Databricks Workspace has to have network access to [pypi.org](https://pypi.org) to download `databricks-sdk`, `databricks-labs-lsql`, `databricks-labs-blueprint`, `sqlglot` and `pyyaml` packages.
- A PRO or Serverless SQL Warehouse to render the [report](docs/assessment.md) for the [assessment workflow](#assessment-workflow).

Once you [install UCX](#install-ucx), you can proceed to the [assessment workflow](#assessment-workflow) to ensure
Expand Down Expand Up @@ -171,7 +170,7 @@ For large organization with many workspaces, grouping workspaces into collection
User should be an account admin to be able to join a collection.

After this, UCX will be installed locally and a number of assets will be deployed in the selected workspace.
These assets are available under the installation folder, i.e. `/Users/<your user>/.ucx/`.
These assets are available under the installation folder, i.e. `/Applications/ucx` is the default installation folder. Please check [here](#advanced-force-install-over-existing-ucx) for more details.

You can also install a specific version by specifying it like `@v0.13.2` - `databricks labs install [email protected]`.

Expand Down
9 changes: 9 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,17 @@ dependencies = ["databricks-sdk>=0.27,<0.30",
"sqlglot>=25.4.1,<25.5",
"astroid>=3.2.2"]

[project.optional-dependencies]
pylsp = [
"python-lsp-server>=1.9.0"
]

[project.entry-points.databricks]
runtime = "databricks.labs.ucx.runtime:main"

[project.entry-points.pylsp]
plugin = "databricks.labs.ucx.source_code.lsp_plugin"

[project.urls]
Issues = "https://github.com/databricks/ucx/issues"
Source = "https://github.com/databricks/ucx"
Expand All @@ -74,6 +82,7 @@ dependencies = [
"pytest-mock~=3.14.0",
"pytest-timeout~=2.3.1",
"pytest-xdist~=3.5.0",
"python-lsp-server>=1.9.0",
"ruff~=0.3.4",
"types-PyYAML~=6.0.12",
"types-requests~=2.31.0",
Expand Down
8 changes: 8 additions & 0 deletions src/databricks/labs/ucx/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
from databricks.sdk.core import with_user_agent_extra, with_product
from databricks.labs.blueprint.logger import install_logger
from databricks.labs.ucx.__about__ import __version__

install_logger()

# Add ucx/<version> for projects depending on ucx as a library
with_user_agent_extra("ucx", __version__)

# Add ucx/<version> for re-packaging of ucx, where product name is omitted
with_product("ucx", __version__)
52 changes: 26 additions & 26 deletions src/databricks/labs/ucx/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from databricks.labs.lsql.backends import SqlBackend, StatementExecutionBackend
from databricks.labs.lsql.deployment import SchemaDeployer
from databricks.sdk import WorkspaceClient, AccountClient
from databricks.sdk.core import with_user_agent_extra
from databricks.sdk.errors import (
AlreadyExists,
BadRequest,
Expand Down Expand Up @@ -75,6 +76,7 @@
NUM_USER_ATTEMPTS = 10 # number of attempts user gets at answering a question

logger = logging.getLogger(__name__)
with_user_agent_extra("cmd", "install")


def deploy_schema(sql_backend: SqlBackend, inventory_schema: str):
Expand Down Expand Up @@ -690,43 +692,41 @@ def join_collection(
if self.is_account_install:
# skip joining collection when installer is running for all account workspaces
return None
collection_workspace: Workspace
prompt_message = "Do you want to join the current installation to an existing collection?"
if target_workspace_id is None and not self.prompts.confirm(prompt_message):
return None
account_client = self._get_safe_account_client()
ctx = AccountContext(account_client)
ids_to_workspace = self._get_workspace_info(current_workspace_id)
if target_workspace_id is None:
if self.prompts.confirm("Do you want to join the current installation to an existing collection?"):
# If joining a collection as part of the installation then collection_workspace_id would be empty
try:
# if user is account admin list and show available workspaces to select from
accessible_workspaces = ctx.account_workspaces.get_accessible_workspaces()
target_workspace = self._get_collection_workspace(
accessible_workspaces,
account_client,
)
assert target_workspace is not None
target_workspace_id = target_workspace.workspace_id

except PermissionDenied:
# if the user is not account admin, allow user to enter the workspace_id to join as collection.
# if no workspace_id is entered, then exit
logger.warning("User doesnt have account admin permission, cant list workspaces")
target_workspace_id = int(
self.prompts.question(
"Please enter, the workspace id to join as a collection (enter 0 to skip it)",
valid_number=True,
default="0",
)
# If joining a collection as part of the installation then collection_workspace_id would be empty
try:
# if user is account admin list and show available workspaces to select from
accessible_workspaces = ctx.account_workspaces.get_accessible_workspaces()
target_workspace = self._get_collection_workspace(
accessible_workspaces,
account_client,
)
assert target_workspace is not None
target_workspace_id = target_workspace.workspace_id
except PermissionDenied:
# if the user is not account admin, allow user to enter the workspace_id to join as collection.
# if no workspace_id is entered, then exit
logger.warning("User doesnt have account admin permission, cant list workspaces")
target_workspace_id = int(
self.prompts.question(
"Please enter, the workspace id to join as a collection (enter 0 to skip it)",
valid_number=True,
default="0",
)
else:
return None
)
if target_workspace_id == 0 or target_workspace_id is None:
# if user didn't enter workspace id
logger.info("Skipping joining collection...")
return None
# below code is executed if either joining an existing collection (through the cli)
# or selecting one while installing
collection_workspace = AccountInstaller._get_workspace(
collection_workspace: Workspace = AccountInstaller._get_workspace(
target_workspace_id,
ids_to_workspace,
)
Expand Down
2 changes: 1 addition & 1 deletion src/databricks/labs/ucx/queries/views/objects.sql
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ SELECT object_type, object_id, failures FROM (
SELECT "tables" as object_type, CONCAT(t.catalog, '.', t.database, '.', t.name) AS object_id,
TO_JSON(
FILTER(ARRAY(
IF(NOT STARTSWITH(t.table_format, "DELTA"), CONCAT("Non-DELTA format: ", t.table_format), NULL),
IF(NOT STARTSWITH(t.table_format, "DELTA") AND t.object_type != "VIEW", CONCAT("Non-DELTA format: ", t.table_format), NULL),
IF(STARTSWITH(t.location, "wasb"), "Unsupported Storage Type: wasb://", NULL),
IF(STARTSWITH(t.location, "adl"), "Unsupported Storage Type: adl://", NULL),
CASE
Expand Down
4 changes: 4 additions & 0 deletions src/databricks/labs/ucx/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import sys
from pathlib import Path

from databricks.sdk.config import with_user_agent_extra

from databricks.labs.ucx.__about__ import __version__
from databricks.labs.ucx.assessment.workflows import Assessment, Failing
from databricks.labs.ucx.contexts.workflow_task import RuntimeContext
Expand Down Expand Up @@ -78,6 +80,8 @@ def trigger(self, *argv):
workflow = self._workflows[workflow_name]
if task_name == "parse_logs":
return ctx.task_run_warning_recorder.snapshot()
# both CLI commands and workflow names appear in telemetry under `cmd`
with_user_agent_extra("cmd", workflow_name)
# `{{parent_run_id}}` is the run of entire workflow, whereas `{{run_id}}` is the run of a task
workflow_run_id = named_parameters.get("parent_run_id", "unknown_run_id")
job_id = named_parameters.get("job_id", "unknown_job_id")
Expand Down
4 changes: 2 additions & 2 deletions src/databricks/labs/ucx/source_code/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,11 +113,11 @@ class Advisory(Advice):
"""A warning that does not prevent the code from running."""


class Failure(Advisory):
class Failure(Advice):
"""An error that prevents the code from running."""


class Deprecation(Advisory):
class Deprecation(Advice):
"""An advisory that suggests to replace the code with a newer version."""


Expand Down
Loading

0 comments on commit dfc8de0

Please sign in to comment.