diff --git a/.codegen.json b/.codegen.json index a00911e0b0..b2fd6857fd 100644 --- a/.codegen.json +++ b/.codegen.json @@ -11,4 +11,4 @@ "pytest -n 4 --cov src --cov-report=xml --timeout 30 tests/unit --durations 20" ] } -} \ No newline at end of file +} diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index fc9d764b44..8eeef6eb4a 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -3,7 +3,7 @@ contact_links: - name: General Databricks questions url: https://help.databricks.com/ about: Issues related to Databricks and not related to UCX - + - name: UCX Documentation url: https://github.com/databrickslabs/ucx/tree/main/docs about: Documentation about UCX diff --git a/.github/ISSUE_TEMPLATE/feature.yml b/.github/ISSUE_TEMPLATE/feature.yml index 8bf7ad146b..4572167772 100644 --- a/.github/ISSUE_TEMPLATE/feature.yml +++ b/.github/ISSUE_TEMPLATE/feature.yml @@ -33,4 +33,3 @@ body: description: Add any other context, references or screenshots about the feature request here. validations: required: false - \ No newline at end of file diff --git a/.github/codecov.yml b/.github/codecov.yml index aa8cf6b8d7..aaa25bf74e 100644 --- a/.github/codecov.yml +++ b/.github/codecov.yml @@ -7,4 +7,4 @@ coverage: patch: default: target: auto - threshold: 0.5% \ No newline at end of file + threshold: 0.5% diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 8c763bf215..b728efb6e0 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -7,4 +7,4 @@ updates: - package-ecosystem: "github-actions" directory: "/" schedule: - interval: "daily" \ No newline at end of file + interval: "daily" diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 26db60bacf..40fd8e0780 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -7,7 +7,7 @@ Resolves #.. -### Functionality +### Functionality - [ ] added relevant user documentation - [ ] added new CLI command diff --git a/.github/workflows/no-cheat.yml b/.github/workflows/no-cheat.yml index 3e150d0eeb..c1864acf45 100644 --- a/.github/workflows/no-cheat.yml +++ b/.github/workflows/no-cheat.yml @@ -27,4 +27,4 @@ jobs: if [ "${CHEAT}" -ne 0 ]; then echo "Do not cheat the linter: ${CHEAT}" exit 1 - fi \ No newline at end of file + fi diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 0a0fb71ce8..d645b0c9fc 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -22,12 +22,12 @@ jobs: cache: 'pip' cache-dependency-path: '**/pyproject.toml' python-version: '3.10' - + - name: Build wheels run: | pip install hatch==1.9.4 hatch build - + - name: Draft release uses: softprops/action-gh-release@v2 with: @@ -38,11 +38,11 @@ jobs: - uses: pypa/gh-action-pypi-publish@release/v1 name: Publish package distributions to PyPI - + - name: Sign artifacts with Sigstore uses: sigstore/gh-action-sigstore-python@v2.1.1 with: inputs: | dist/databricks_*.whl dist/databricks_*.tar.gz - release-signing-artifacts: true \ No newline at end of file + release-signing-artifacts: true diff --git a/docs/external_hms_glue.md b/docs/external_hms_glue.md index 4c1a149e44..69ca592cfc 100644 --- a/docs/external_hms_glue.md +++ b/docs/external_hms_glue.md @@ -10,14 +10,14 @@ External Hive Metastore Integration * [Additional Considerations](#additional-considerations) -UCX works with both the default workspace metastore, or an external Hive metastore. This document outlines the current +UCX works with both the default workspace metastore, or an external Hive metastore. This document outlines the current integration and how to set up UCX to work with your existing external metastore. # Installation The setup process follows the following steps -- UCX scan existing cluster policies, and Databricks SQL data access configuration for Spark configurations key that +- UCX scan existing cluster policies, and Databricks SQL data access configuration for Spark configurations key that enables external Hive metastore: - Spark config `spark.databricks.hive.metastore.glueCatalog.enabled=true` - for Glue Catalog - Spark config containing prefixes `spark.sql.hive.metastore` - for external Hive metastore @@ -25,15 +25,15 @@ enables external Hive metastore: _We have identified one or more cluster policies set up for an external metastore. Would you like to set UCX to connect to the external metastore?_ - Selecting **Yes** will display a list of the matching policies and allow the user to select the appropriate policies. -- The chosen policy will be used as the template to set up UCX job clusters via a new policy. UCX will clone the +- The chosen policy will be used as the template to set up UCX job clusters via a new policy. UCX will clone the necessary Spark configurations and data access configurations, e.g. Instance Profile over to this new policy. - When prompted for an inventory database, please specify a new name instead of the default `ucx` to avoid conflict. This is because the inventory database will be created in the external metastore, which is shared across multiple workspaces. -- UCX **DOES NOT** update the data access configuration for SQL Warehouses. This is because Databricks SQL settings apply +- UCX **DOES NOT** update the data access configuration for SQL Warehouses. This is because Databricks SQL settings apply to all warehouses in a workspace, and can introduce unexpected changes to existing workload. **Note** -As UCX uses both job clusters and SQL Warehouses, it is important to ensure that both are configured to use the same +As UCX uses both job clusters and SQL Warehouses, it is important to ensure that both are configured to use the same external Hive metastore. If the SQL Warehouses are not configured for external Hive metastore, please manually update the data access configuration. See [Enable data access configuration](https://learn.microsoft.com/en-us/azure/databricks/admin/sql/data-access-configuration) for more details @@ -41,7 +41,7 @@ the data access configuration. See [Enable data access configuration](https://le # Manual Override -If the workspace does not have a cluster policy or SQL data access configuration for external Hive metastore, there are +If the workspace does not have a cluster policy or SQL data access configuration for external Hive metastore, there are two options to manually enable this: - *Pre-installation*: create a custer policy with the appropriate Spark configuration and data access for external metastore: - See the following documentation pages for more details: [Glue catalog](https://docs.databricks.com/en/archive/external-metastores/aws-glue-metastore.html) and [External Hive Metastore](https://learn.microsoft.com/en-us/azure/databricks/archive/external-metastores/external-hive-metastore). @@ -70,13 +70,13 @@ following the post-installation steps above. # Assessment Workflow -Once UCX is set up with external Hive metastore the assessment workflow will scan tables & views from the external +Once UCX is set up with external Hive metastore the assessment workflow will scan tables & views from the external Hive metastore instead of the default workspace metastore. If the external Hive metastore is shared between multiple workspaces, please specify a different inventory database name for each UCX installation. This is to avoid conflicts between the inventory databases. -As the inventory database is stored in the external Hive metastore, it can only be queried from a cluster or SQL warehouse +As the inventory database is stored in the external Hive metastore, it can only be queried from a cluster or SQL warehouse with external Hive metastore configuration. The assessment dashboard will also fail if the SQL warehouse is not configured correctly. [[back to top](#external-hive-metastore-integration)] @@ -91,14 +91,14 @@ metastore is redundant and will be a no-op. # Additional Considerations -If a workspace is set up with multiple external Hive metastores, you will need to plan the approach carefully. Below are +If a workspace is set up with multiple external Hive metastores, you will need to plan the approach carefully. Below are a few considerations to keep in mind: - You can have multiple UCX installations in a workspace, each set up with a different external Hive metastore. As the SQL data access configuration is shared across the entire workspace, you will need to manually update them when running each UCX installation. - You can uninstall UCX and reinstall it with a different external Hive metastore. This still requires manual updates to the SQL data access configuration, but it is a cleaner approach. -- You can manually modify the cluster policy and SQL data access configuration to point to the correct external Hive +- You can manually modify the cluster policy and SQL data access configuration to point to the correct external Hive metastore, after UCX has been installed. This is the most flexible approach, but requires manual intervention. -[[back to top](#external-hive-metastore-integration)] \ No newline at end of file +[[back to top](#external-hive-metastore-integration)] diff --git a/docs/group_name_conflict.md b/docs/group_name_conflict.md index 07d022f7e8..d9223434ca 100644 --- a/docs/group_name_conflict.md +++ b/docs/group_name_conflict.md @@ -19,7 +19,7 @@ Choose how to map the workspace groups: [3] Match by External ID [4] Regex Substitution [5] Regex Matching -Enter a number between 0 and 5: +Enter a number between 0 and 5: ``` The user then input the Prefix/Suffix/Regular Expression. @@ -41,4 +41,4 @@ Group Translation Scenarios: | Prefix | prefix: [Prefix] | ^ | [Prefix] | [EMPTY] | data_engineers --> prod_data_engineers | | Suffix | suffix: [Prefix] | $ | [Suffix] | [EMPTY] | data_engineers --> data_engineers_prod | | Substitution | Search Regex: [Regex]
Replace Text:[Replacement_Text] | [WS_Regex] | [ [Replacement_Text] | [Empty] | corp_tech_data_engineers --> prod_data_engineers | -| Partial Lookup | Workspace Regex: [WS_Regex]
Account Regex: [Acct Regex] | [WS_Regex] | [Empty] | [Acct_Regex] | data_engineers(12345) --> data_engs(12345) | \ No newline at end of file +| Partial Lookup | Workspace Regex: [WS_Regex]
Account Regex: [Acct Regex] | [WS_Regex] | [Empty] | [Acct_Regex] | data_engineers(12345) --> data_engs(12345) | diff --git a/docs/local-group-migration.md b/docs/local-group-migration.md index 9323a604c9..74ea243049 100644 --- a/docs/local-group-migration.md +++ b/docs/local-group-migration.md @@ -15,16 +15,16 @@ Workspace Group Migration * [Troubleshooting](#troubleshooting) -This feature introduces the ability to migrate groups from workspace level to account level in +This feature introduces the ability to migrate groups from workspace level to account level in the [group migration workflow](../README.md#group-migration-workflow). It helps you to upgrade all Databricks workspace assets: -Legacy Table ACLs, Entitlements, AWS instance profiles, Clusters, Cluster policies, Instance Pools, +Legacy Table ACLs, Entitlements, AWS instance profiles, Clusters, Cluster policies, Instance Pools, Databricks SQL warehouses, Delta Live Tables, Jobs, MLflow experiments, MLflow registry, SQL Dashboards & Queries, SQL Alerts, Token and Password usage permissions that are set on the workspace level, Secret scopes, Notebooks, -Directories, Repos, and Files. +Directories, Repos, and Files. -It ensures that all the necessary groups are available in the workspace with the correct permissions, and removes any unnecessary groups and permissions. -The tasks in the group migration workflow depend on the output of the assessment workflow and can be executed in sequence to ensure a successful migration. -The output of each task is stored in Delta tables in the `$inventory_database` schema. +It ensures that all the necessary groups are available in the workspace with the correct permissions, and removes any unnecessary groups and permissions. +The tasks in the group migration workflow depend on the output of the assessment workflow and can be executed in sequence to ensure a successful migration. +The output of each task is stored in Delta tables in the `$inventory_database` schema. The group migration workflow can be executed multiple times to ensure that all the groups are migrated successfully and that all the necessary permissions are assigned. @@ -39,45 +39,45 @@ The group migration workflow can be executed multiple times to ensure that all t # Design -`MigratedGroup` class represents a group that has been migrated from one name to another and stores information about -the original and new names, as well as the group's members, external ID, and roles. The `MigrationState` class holds -the state of the migration process and provides methods for getting the target principal and temporary name for a given +`MigratedGroup` class represents a group that has been migrated from one name to another and stores information about +the original and new names, as well as the group's members, external ID, and roles. The `MigrationState` class holds +the state of the migration process and provides methods for getting the target principal and temporary name for a given group name. [[back to top](#workspace-group-migration)] ## Group Manager -The `GroupManager` class is a `CrawlerBase` subclass that manages groups in a Databricks workspace. It provides methods -for renaming groups, reflecting account groups on the workspace, deleting original workspace groups, and validating -group membership. The class also provides methods for listing workspace and account groups, getting group details, and +The `GroupManager` class is a `CrawlerBase` subclass that manages groups in a Databricks workspace. It provides methods +for renaming groups, reflecting account groups on the workspace, deleting original workspace groups, and validating +group membership. The class also provides methods for listing workspace and account groups, getting group details, and deleting groups. -The `GroupMigrationStrategy` abstract base class defines the interface for a strategy that generates a list -of `MigratedGroup` objects based on a mapping between workspace and account groups. -The `MatchingNamesStrategy`, `MatchByExternalIdStrategy`, `RegexSubStrategy`, and `RegexMatchStrategy` classes are +The `GroupMigrationStrategy` abstract base class defines the interface for a strategy that generates a list +of `MigratedGroup` objects based on a mapping between workspace and account groups. +The `MatchingNamesStrategy`, `MatchByExternalIdStrategy`, `RegexSubStrategy`, and `RegexMatchStrategy` classes are concrete implementations of this interface. See [group name conflicts](group_name_conflict.md) for more details. -The `ConfigureGroups` class provides a command-line interface for configuring the group migration process during [installation](../README.md#installation). -It prompts the user to enter information about the group migration strategy, such as the renamed group prefix, regular expressions -for matching and substitution, and a list of groups to migrate. The class also provides methods for validating user input +The `ConfigureGroups` class provides a command-line interface for configuring the group migration process during [installation](../README.md#installation). +It prompts the user to enter information about the group migration strategy, such as the renamed group prefix, regular expressions +for matching and substitution, and a list of groups to migrate. The class also provides methods for validating user input and setting class variables based on the user's responses. [[back to top](#workspace-group-migration)] ## Permission Manager -It enables to crawl, save, and apply permissions for [clusters](#generic-permissions), -[tables and UDFs (User-Defined Functions)](#legacy-table-access-controls), [secret scopes](#secret-scope-permissions), +It enables to crawl, save, and apply permissions for [clusters](#generic-permissions), +[tables and UDFs (User-Defined Functions)](#legacy-table-access-controls), [secret scopes](#secret-scope-permissions), [entitlements](#entitlements-and-roles), and [dashboards](#dashboard-permissions). -To use the module, you can create a `PermissionManager` instance by calling the `factory` method, which sets up -the necessary [`AclSupport` objects](#acl-support) for different types of objects in the workspace. Once the instance -is created, you can call the `inventorize_permissions` method to crawl and save the permissions for all objects to +To use the module, you can create a `PermissionManager` instance by calling the `factory` method, which sets up +the necessary [`AclSupport` objects](#acl-support) for different types of objects in the workspace. Once the instance +is created, you can call the `inventorize_permissions` method to crawl and save the permissions for all objects to the inventory database in the `permissions` table. -The `apply_group_permissions` method allows you to apply the permissions to a list of account groups, while -the [`verify_group_permissions` method](../README.md#validate-groups-membership-command) verifies that the permissions are valid. +The `apply_group_permissions` method allows you to apply the permissions to a list of account groups, while +the [`verify_group_permissions` method](../README.md#validate-groups-membership-command) verifies that the permissions are valid. [[back to top](#workspace-group-migration)] @@ -90,15 +90,15 @@ The `AclSupport` objects define how to crawl, save, and apply permissions for sp * `get_verify_task`: A method that returns a callable that verifies that the permissions for a given `Permissions` object are applied correctly to the destination group. This method can be used to ensure that permissions are applied as expected, helping to improve the reliability and security of your Databricks workspace. * `object_types`: An abstract method that returns a set of strings representing the object types that the `AclSupport` instance supports. This method should be implemented to provide the necessary information about the object types supported by the `AclSupport` class. -The `Permissions` dataclass is used to represent the permissions for a specific object type and ID. The dataclass includes a `raw` attribute +The `Permissions` dataclass is used to represent the permissions for a specific object type and ID. The dataclass includes a `raw` attribute that contains the raw permission data as a string, providing a convenient way to work with the underlying permission data. [[back to top](#workspace-group-migration)] ### Generic Permissions -The `GenericPermissionsSupport` class is a concrete implementation of the [`AclSupport` interface](#acl-support) for -migrating permissions on various objects in a Databricks workspace. It is designed to be flexible and support almost any +The `GenericPermissionsSupport` class is a concrete implementation of the [`AclSupport` interface](#acl-support) for +migrating permissions on various objects in a Databricks workspace. It is designed to be flexible and support almost any object type in the workspace: - clusters @@ -116,18 +116,18 @@ object type in the workspace: - notebooks - workspace folders -It takes in an instance of the `WorkspaceClient` class, a list of `Listing` objects, and a `verify_timeout` parameter in -its constructor. The `Listing` objects are responsible for listing the objects in the workspace, and +It takes in an instance of the `WorkspaceClient` class, a list of `Listing` objects, and a `verify_timeout` parameter in +its constructor. The `Listing` objects are responsible for listing the objects in the workspace, and the `GenericPermissionsSupport` class uses these listings to crawl the ACL permissions for each object. -The `_apply_grant` method applies the ACL permission to the target principal in the database, and the `_verify` method -checks if the ACL permission in the `Grant` object matches the ACL permission for that object and principal in the database. -If the ACL permission does not match, the method raises a `ValueError` with an error message. The `get_verify_task` method -takes in a `Permissions` object and returns a callable object that calls the `_verify` method with the object type, +The `_apply_grant` method applies the ACL permission to the target principal in the database, and the `_verify` method +checks if the ACL permission in the `Grant` object matches the ACL permission for that object and principal in the database. +If the ACL permission does not match, the method raises a `ValueError` with an error message. The `get_verify_task` method +takes in a `Permissions` object and returns a callable object that calls the `_verify` method with the object type, object ID, and `Grant` object from the `Permissions` object. -he `_safe_get_permissions` and `_safe_updatepermissions` methods are used to safely get and update the permissions for -a given object type and ID, respectively. These methods handle exceptions that may occur during the API calls and log +he `_safe_get_permissions` and `_safe_updatepermissions` methods are used to safely get and update the permissions for +a given object type and ID, respectively. These methods handle exceptions that may occur during the API calls and log appropriate warning messages. [[back to top](#workspace-group-migration)] @@ -140,8 +140,8 @@ Reflected in [RedashPermissionsSupport](../src/databricks/labs/ucx/workspace_acc ### Entitlements and Roles -The `ScimSupport` is [`AclSupport`](#acl-support) that creates a snapshot of all the groups in the workspace, including their display name, id, meta, roles, and entitlements. -The `_is_item_relevant` method checks if a permission item is relevant to the current migration state. The `get_crawler_tasks` method returns an iterator of partial functions +The `ScimSupport` is [`AclSupport`](#acl-support) that creates a snapshot of all the groups in the workspace, including their display name, id, meta, roles, and entitlements. +The `_is_item_relevant` method checks if a permission item is relevant to the current migration state. The `get_crawler_tasks` method returns an iterator of partial functions for crawling the permissions of each group in the snapshot. It checks if the group has any roles or entitlements and returns a partial function to crawl the corresponding property. See [examples](../tests/integration/workspace_access/test_scim.py) for more details on how to use it as a library. @@ -150,9 +150,9 @@ See [examples](../tests/integration/workspace_access/test_scim.py) for more deta ### Secret Scope Permissions -`SecretScopesSupport` is a concrete implementation of the [`AclSupport` interface](#acl-support) for crawling ACLs of -all secret scopes, applying and verifying ACLs, and checking if a `Permissions` object is relevant to the current -migration state. It simplifies the process of managing permissions on secret scopes by checking if the ACLs have been +`SecretScopesSupport` is a concrete implementation of the [`AclSupport` interface](#acl-support) for crawling ACLs of +all secret scopes, applying and verifying ACLs, and checking if a `Permissions` object is relevant to the current +migration state. It simplifies the process of managing permissions on secret scopes by checking if the ACLs have been applied correctly, and if not, automatically reapplying them. [[back to top](#workspace-group-migration)] @@ -160,11 +160,11 @@ applied correctly, and if not, automatically reapplying them. ### Legacy Table Access Controls The `TableAclSupport` class is initialized with an instance of `GrantsCrawler` and `SqlBackend` classes, along with a `verify_timeout` parameter. -The class offers methods for crawling table ACL permissions, applying and verifying ACL permissions, and checking if a `Permissions` object is relevant to the current migration state. -The `get_crawler_tasks` method returns an iterator of callable objects, each of which returns a `Permissions` object for a specific table ACL permission. +The class offers methods for crawling table ACL permissions, applying and verifying ACL permissions, and checking if a `Permissions` object is relevant to the current migration state. +The `get_crawler_tasks` method returns an iterator of callable objects, each of which returns a `Permissions` object for a specific table ACL permission. The `_from_reduced` method creates a `Grant` object for each set of folded actions, and the `get_apply_task` method applies the ACL permission in the `Permissions` object to the target principal in the `MigrationState` object. -Furthermore, the `_apply_grant` method applies the ACL permission to the target principal in the database, while the `_verify` method checks if the ACL permission in -the `Grant` object matches the ACL permission for that object and principal in the database. The `get_verify_task` method calls the `_verify` method with the object type, +Furthermore, the `_apply_grant` method applies the ACL permission to the target principal in the database, while the `_verify` method checks if the ACL permission in +the `Grant` object matches the ACL permission for that object and principal in the database. The `get_verify_task` method calls the `_verify` method with the object type, object ID, and `Grant` object from the `Permissions` object. [[back to top](#workspace-group-migration)] @@ -291,4 +291,4 @@ finally: file_handler.close() ``` -[[back to top](#workspace-group-migration)] \ No newline at end of file +[[back to top](#workspace-group-migration)] diff --git a/docs/table_upgrade.md b/docs/table_upgrade.md index 0e9a2279a7..315f1e144d 100644 --- a/docs/table_upgrade.md +++ b/docs/table_upgrade.md @@ -162,16 +162,16 @@ upgrade. This feedback is presented in the migration dashboard: ## Data Access Permissions -The code provided is a Python module that defines a `Grant` dataclass and a `GrantsCrawler` class. The `Grant` dataclass -represents a grant of privileges in a database system, with attributes for the principal, action type, catalog, database, -table, view, UDF, and flags for any file and anonymous function. The `GrantsCrawler` class is a crawler that fetches grants -for databases, tables, views, UDFs, and anonymous functions in a Hive metastore. - -It uses a `TablesCrawler` and `UdfsCrawler` to fetch table and UDF information, respectively. The `GrantsCrawler` class -provides methods for fetching grants based on different parameters and returning them as an iterable of `Grant` objects. -It also provides methods for getting grants for a specific table or schema. The code includes a `_type_and_key` method -that normalizes the input parameters and returns a tuple of the object type and key, which is used to fetch grants for -the specified object. The code also includes methods for generating SQL statements to grant and revoke privileges in +The code provided is a Python module that defines a `Grant` dataclass and a `GrantsCrawler` class. The `Grant` dataclass +represents a grant of privileges in a database system, with attributes for the principal, action type, catalog, database, +table, view, UDF, and flags for any file and anonymous function. The `GrantsCrawler` class is a crawler that fetches grants +for databases, tables, views, UDFs, and anonymous functions in a Hive metastore. + +It uses a `TablesCrawler` and `UdfsCrawler` to fetch table and UDF information, respectively. The `GrantsCrawler` class +provides methods for fetching grants based on different parameters and returning them as an iterable of `Grant` objects. +It also provides methods for getting grants for a specific table or schema. The code includes a `_type_and_key` method +that normalizes the input parameters and returns a tuple of the object type and key, which is used to fetch grants for +the specified object. The code also includes methods for generating SQL statements to grant and revoke privileges in Hive and Unity Catalog (UC) systems. [[back to top](#table-upgrade)] @@ -180,86 +180,86 @@ Hive and Unity Catalog (UC) systems. The module includes two classes, `ExternalLocations` and `Mounts`, which inherit from `CrawlerBase`. -`ExternalLocations` is a class for crawling and managing external locations used by tables in a Databricks workspace. -It has methods for creating a list of external locations based on tables in a given schema and a method for generating +`ExternalLocations` is a class for crawling and managing external locations used by tables in a Databricks workspace. +It has methods for creating a list of external locations based on tables in a given schema and a method for generating Terraform definitions for any missing external locations. The class has a `_external_locations` method that filters and p rocesses the external locations based on certain conditions. -`Mounts` is a class for managing mounts in a Databricks workspace. It has methods for listing and deduplicating mounts, -as well as a method for creating a snapshot of the current mounts. The `_deduplicate_mounts` method removes any duplicate +`Mounts` is a class for managing mounts in a Databricks workspace. It has methods for listing and deduplicating mounts, +as well as a method for creating a snapshot of the current mounts. The `_deduplicate_mounts` method removes any duplicate mounts based on their name and source. [[back to top](#table-upgrade)] ## Table Mapping -The module includes two dataclasses, `Rule` and `TableToMigrate`, which encapsulate information about the source and target tables for migration. -The `Rule` dataclass includes information about the source and target catalog, schema, and table names, as well as a method for generating -the unique key for the target table in the Unity Catalog (UC) and the Hive Metastore (HMS). The `TableToMigrate` dataclass includes +The module includes two dataclasses, `Rule` and `TableToMigrate`, which encapsulate information about the source and target tables for migration. +The `Rule` dataclass includes information about the source and target catalog, schema, and table names, as well as a method for generating +the unique key for the target table in the Unity Catalog (UC) and the Hive Metastore (HMS). The `TableToMigrate` dataclass includes a `Table` object representing the source table and a `Rule` object representing the migration rule for that table. -At the heart of the module is the `TableMapping` class, which is the main class for managing table mappings. -The `TableMapping` class includes several methods for managing the table mappings, such as loading and saving -the mappings to a file, skipping tables and schemas, and checking if a table is already migrated or marked to be skipped. -The `TableMapping` class is initialized with an `Installation` object, a `WorkspaceClient` object, and a `SqlBackend` object, +At the heart of the module is the `TableMapping` class, which is the main class for managing table mappings. +The `TableMapping` class includes several methods for managing the table mappings, such as loading and saving +the mappings to a file, skipping tables and schemas, and checking if a table is already migrated or marked to be skipped. +The `TableMapping` class is initialized with an `Installation` object, a `WorkspaceClient` object, and a `SqlBackend` object, which are used to interact with the Unity Catalog, the workspace, and to execute SQL queries. [[back to top](#table-upgrade)] ## Migrating Tables -The `TablesMigrate` class is designed for migrating tables from one schema to another within a Databricks workspace. -This class requires instances of `TablesCrawler`, `WorkspaceClient`, `SqlBackend`, and `TableMapping` as inputs. -The `migrate_tables` method is responsible for migrating tables and takes an optional argument `what` to filter tables -based on their type. This method internally calls the `_migrate_table` method which is responsible for migrating +The `TablesMigrate` class is designed for migrating tables from one schema to another within a Databricks workspace. +This class requires instances of `TablesCrawler`, `WorkspaceClient`, `SqlBackend`, and `TableMapping` as inputs. +The `migrate_tables` method is responsible for migrating tables and takes an optional argument `what` to filter tables +based on their type. This method internally calls the `_migrate_table` method which is responsible for migrating the actual table and determining the appropriate migration method based on the table's type. -The `_migrate_external_table`, `_migrate_dbfs_root_table`, and `_migrate_view` methods are used to migrate external -tables, DBFS root tables, and views, respectively. The `_init_seen_tables`, `_table_already_upgraded`, `_get_tables_to_revert`, -and `_revert_migrated_table` methods are used for managing the state of the migration process. The `_init_seen_tables` method -initializes the list of tables that have been seen during the migration process. The `_table_already_upgraded` method checks +The `_migrate_external_table`, `_migrate_dbfs_root_table`, and `_migrate_view` methods are used to migrate external +tables, DBFS root tables, and views, respectively. The `_init_seen_tables`, `_table_already_upgraded`, `_get_tables_to_revert`, +and `_revert_migrated_table` methods are used for managing the state of the migration process. The `_init_seen_tables` method +initializes the list of tables that have been seen during the migration process. The `_table_already_upgraded` method checks if a table has already been upgraded. The `_get_tables_to_revert` method retrieves the list of tables that can be reverted. The `_revert_migrated_table` method is responsible for reverting the migration of a table. -The `is_upgraded` method checks if a table has been upgraded or not. The `print_revert_report` method generates a report +The `is_upgraded` method checks if a table has been upgraded or not. The `print_revert_report` method generates a report of the tables that can be reverted. [[back to top](#table-upgrade)] ## Moving tables -The `TableMove` class is a newly developed feature that enables the movement or aliasing of tables and views from one -schema to another within UC. This class requires an instance of `WorkspaceClient` and `SqlBackend` as inputs and provides -two primary methods: `move_tables` and `alias_tables`. The `move_tables` method moves tables to a new schema, while +The `TableMove` class is a newly developed feature that enables the movement or aliasing of tables and views from one +schema to another within UC. This class requires an instance of `WorkspaceClient` and `SqlBackend` as inputs and provides +two primary methods: `move_tables` and `alias_tables`. The `move_tables` method moves tables to a new schema, while the `alias_tables` method creates aliases of tables and views in a different schema. The `_move_table`, `_alias_table`, and `_move_view` methods are responsible for performing the actual movement, aliasing, -and recreating of the table or view in the destination schema, taking into account any dependencies or permissions -associated with the object. The `_reapply_grants` method reapplies the grants on the migrated table or view, ensuring -that the necessary permissions are maintained. The `_recreate_table` and `_recreate_view` methods recreate the table or +and recreating of the table or view in the destination schema, taking into account any dependencies or permissions +associated with the object. The `_reapply_grants` method reapplies the grants on the migrated table or view, ensuring +that the necessary permissions are maintained. The `_recreate_table` and `_recreate_view` methods recreate the table or view in the destination schema, including any dependencies or permissions associated with the object. [[back to top](#table-upgrade)] ## Table Size Estimation -The Table Size Crawler is a new feature of the data crawler system that calculates the size of tables in a Hive Metastore. -The `TableSizeCrawler` class is developed to inherit from `CrawlerBase` and is initialized with a SQL Execution Backend -and a schema name. This class uses the `TablesCrawler` class to obtain a snapshot of tables and then iterates over them +The Table Size Crawler is a new feature of the data crawler system that calculates the size of tables in a Hive Metastore. +The `TableSizeCrawler` class is developed to inherit from `CrawlerBase` and is initialized with a SQL Execution Backend +and a schema name. This class uses the `TablesCrawler` class to obtain a snapshot of tables and then iterates over them to calculate the size of each table using the `_safe_get_table_size` method which queries the Spark SQL engine. -The `TableSizeCrawler` class has several methods, including `snapshot`, `_try_load`, and `_crawl`. The `snapshot` method +The `TableSizeCrawler` class has several methods, including `snapshot`, `_try_load`, and `_crawl`. The `snapshot` method returns a list of `TableSize` objects representing the snapshot of tables, filtered to include only those with a non-null -size. The `_try_load` method tries to load table information from the database and raises a `TABLE_OR_VIEW_NOT_FOUND` -error if the table cannot be found. The `_crawl` method crawls and lists tables using the `tables_crawler` object and +size. The `_try_load` method tries to load table information from the database and raises a `TABLE_OR_VIEW_NOT_FOUND` +error if the table cannot be found. The `_crawl` method crawls and lists tables using the `tables_crawler` object and calculates the size of DBFS root tables, skipping tables that are not of type `TABLE` or are not DBFS root tables. [[back to top](#table-upgrade)] ## Table Crawler -The `TablesCrawler` is designed for crawling and listing tables within Hive Metastore. It can fetch detailed information -about each table, including the table's name, external location, and storage format. This information can be used to +The `TablesCrawler` is designed for crawling and listing tables within Hive Metastore. It can fetch detailed information +about each table, including the table's name, external location, and storage format. This information can be used to better understand the structure and contents of the tables in the Databricks workspace. [[back to top](#table-upgrade)] diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md index 9115543e2b..db85db5b0c 100644 --- a/docs/troubleshooting.md +++ b/docs/troubleshooting.md @@ -97,7 +97,7 @@ logs ``` ### Reading log files -Open the downloaded log files in a text editor or viewer. +Open the downloaded log files in a text editor or viewer. VSCode is an excellent example as it will allow you to search the entire folder for `ERROR` @@ -132,7 +132,7 @@ If Azure CLI has already been installed and authenticated, but you see the follo `14:50:33 ERROR [d.labs.ucx] In order to obtain AAD token, Please run azure cli to authenticate.` -Resolve this in macOS by running the command with an explicit auth type set: `DATABRICKS_AUTH_TYPE=azure-cli databricks labs ucx ...`. +Resolve this in macOS by running the command with an explicit auth type set: `DATABRICKS_AUTH_TYPE=azure-cli databricks labs ucx ...`. To resolve this issue in Windows, proceed with the following steps: 1. Open `%userprofile%` (the path like `C:\Users\`) @@ -143,7 +143,7 @@ To resolve this issue in Windows, proceed with the following steps: ### Resolving common errors on UCX install #### Error on installing the ucx inventory database -Your platform administrators may have implemented policies in one manner or another to prevent arbitrary database creation. +Your platform administrators may have implemented policies in one manner or another to prevent arbitrary database creation. - You may be prohibited from creating a database with a default location to `dbfs:/`. - You may be required to create a database on an external Hive Metastore (HMS) and need compute configured to do so. @@ -169,4 +169,4 @@ See the gathering log information sections elsewhere in this document. ### Resolving other common errors - If you have an external Hive Metastore (HMS) such as Glue Catalog or a MySQL, Postgres or SQL server database, please consult the [External Hive Metastore Integration guide](external_hms_glue.md) - If you are running table upgrade commands and workflows. Please consult the [Table Upgrade guide](table_upgrade.md) -- If you are trying to understand the Assessment report, please consult the [Assessment documentation](assessment.md) \ No newline at end of file +- If you are trying to understand the Assessment report, please consult the [Assessment documentation](assessment.md) diff --git a/src/databricks/labs/ucx/mixins/README.md b/src/databricks/labs/ucx/mixins/README.md index 2110e37f75..37fc37912c 100644 --- a/src/databricks/labs/ucx/mixins/README.md +++ b/src/databricks/labs/ucx/mixins/README.md @@ -1 +1 @@ -This file contains code that can potentially get backported to Databricks SDK for Python \ No newline at end of file +This file contains code that can potentially get backported to Databricks SDK for Python diff --git a/src/databricks/labs/ucx/queries/assessment/azure/05_0_azure_service_principals.sql b/src/databricks/labs/ucx/queries/assessment/azure/05_0_azure_service_principals.sql index 5f6970ea13..9d258c5813 100644 --- a/src/databricks/labs/ucx/queries/assessment/azure/05_0_azure_service_principals.sql +++ b/src/databricks/labs/ucx/queries/assessment/azure/05_0_azure_service_principals.sql @@ -7,4 +7,4 @@ select if(tenant_id = '', "NA", tenant_id) tenant_id, if(storage_account = '', "NA", storage_account) storage_account from - $inventory.azure_service_principals \ No newline at end of file + $inventory.azure_service_principals diff --git a/src/databricks/labs/ucx/queries/assessment/estimates/00_0_metastore_assignment.md b/src/databricks/labs/ucx/queries/assessment/estimates/00_0_metastore_assignment.md index 38bd7aa7e0..609f651a1c 100644 --- a/src/databricks/labs/ucx/queries/assessment/estimates/00_0_metastore_assignment.md +++ b/src/databricks/labs/ucx/queries/assessment/estimates/00_0_metastore_assignment.md @@ -8,8 +8,8 @@ This section assumes that your workspace has been attached to a UC metastore, it If you haven't created a metastore yet, follow the docs below to attach your workspace to the metastore: -[[AWS]](https://docs.databricks.com/en/data-governance/unity-catalog/enable-workspaces.html) -[[Azure]](https://learn.microsoft.com/en-us/azure/databricks/data-governance/unity-catalog/enable-workspaces) +[[AWS]](https://docs.databricks.com/en/data-governance/unity-catalog/enable-workspaces.html) +[[Azure]](https://learn.microsoft.com/en-us/azure/databricks/data-governance/unity-catalog/enable-workspaces) [[GCP]](https://docs.gcp.databricks.com/data-governance/unity-catalog/enable-workspaces.html) If any incompatible submit runs has been detected, follow the steps highlighted below: diff --git a/src/databricks/labs/ucx/queries/assessment/estimates/00_4_is_incompatible_submit_run_detected.sql b/src/databricks/labs/ucx/queries/assessment/estimates/00_4_is_incompatible_submit_run_detected.sql index 4ac37ca560..2101c967f1 100644 --- a/src/databricks/labs/ucx/queries/assessment/estimates/00_4_is_incompatible_submit_run_detected.sql +++ b/src/databricks/labs/ucx/queries/assessment/estimates/00_4_is_incompatible_submit_run_detected.sql @@ -2,7 +2,7 @@ -- widget title=Incompatible submit runs, row=0, col=2, size_x=4, size_y=8 SELECT * FROM (SELECT object_type, object_id, EXPLODE(from_json(failures, 'array')) AS finding -FROM $inventory.objects) -WHERE finding = "no data security mode specified" +FROM $inventory.objects) +WHERE finding = "no data security mode specified" AND object_type = "submit_runs" diff --git a/src/databricks/labs/ucx/queries/assessment/estimates/01_0_group_migration.md b/src/databricks/labs/ucx/queries/assessment/estimates/01_0_group_migration.md index 5bfa92ffb6..6dc4ab31fe 100644 --- a/src/databricks/labs/ucx/queries/assessment/estimates/01_0_group_migration.md +++ b/src/databricks/labs/ucx/queries/assessment/estimates/01_0_group_migration.md @@ -2,7 +2,7 @@ ## 2 - Group migration -The second step of succesfully adopting UC if migrating your workspace local groups to the account. +The second step of succesfully adopting UC if migrating your workspace local groups to the account. This step is a relatively low risk as it's an additive operation, it won't disturb your currently running pipelines. Follow those steps in order to successfully migrate your groups to the account: @@ -18,4 +18,4 @@ If you're using an Identity Provider: 3. Trigger a sync from your IdP to the account 1. To validate that all groups are properly setup for the group migration, run [validate-group-membership](https://github.com/databrickslabs/ucx/blob/main/README.md#validate-groups-membership-command) -Once the account groups are setup, perform the group migration by using the Group migration workflow, more information in the [docs](https://github.com/databrickslabs/ucx/blob/main/README.md#group-migration-workflow) \ No newline at end of file +Once the account groups are setup, perform the group migration by using the Group migration workflow, more information in the [docs](https://github.com/databrickslabs/ucx/blob/main/README.md#group-migration-workflow) diff --git a/src/databricks/labs/ucx/queries/assessment/estimates/01_2_group_migration.sql b/src/databricks/labs/ucx/queries/assessment/estimates/01_2_group_migration.sql index db32768594..1adb2d549a 100644 --- a/src/databricks/labs/ucx/queries/assessment/estimates/01_2_group_migration.sql +++ b/src/databricks/labs/ucx/queries/assessment/estimates/01_2_group_migration.sql @@ -1,3 +1,3 @@ -- viz type=table, name=Workspace local groups, columns=id_in_workspace,name_in_workspace,name_in_account,temporary_name,members,entitlements,external_id,roles -- widget title=Workspace local groups to migrate, row=1, col=2, size_x=3, size_y=8 -SELECT * FROM $inventory.groups \ No newline at end of file +SELECT * FROM $inventory.groups diff --git a/src/databricks/labs/ucx/queries/assessment/estimates/02_0_data_modeling.md b/src/databricks/labs/ucx/queries/assessment/estimates/02_0_data_modeling.md index 11f5250a6b..e0b8dedb83 100644 --- a/src/databricks/labs/ucx/queries/assessment/estimates/02_0_data_modeling.md +++ b/src/databricks/labs/ucx/queries/assessment/estimates/02_0_data_modeling.md @@ -1,10 +1,10 @@ -- widget title=Table estimates, row=2, col=0, size_x=2, size_y=8 ## 3 - UC Data modeling -The third step of a successful UC migration is defining your target data model on UC. +The third step of a successful UC migration is defining your target data model on UC. This step is required in order to choose in which catalogs the existing data in Hive Metastore will land. -As a starting point, consider creating a catalog that has the same name as your workspace. +As a starting point, consider creating a catalog that has the same name as your workspace. For example, a table `database.table1` will land in the `workspace_name.database.table1` table. The complexity factor is relative to the number of databases and tables identified during the assessment. diff --git a/src/databricks/labs/ucx/queries/assessment/estimates/02_2_uc_data_modeling.sql b/src/databricks/labs/ucx/queries/assessment/estimates/02_2_uc_data_modeling.sql index 63ba1aa20c..a8a4b76788 100644 --- a/src/databricks/labs/ucx/queries/assessment/estimates/02_2_uc_data_modeling.sql +++ b/src/databricks/labs/ucx/queries/assessment/estimates/02_2_uc_data_modeling.sql @@ -1,3 +1,3 @@ -- viz type=table, name=Tables to migrate, columns=catalog,database,name,object_type,table_format,location,view_text,upgraded_to,storage_properties -- widget title=Tables to migrate, row=2, col=2, size_x=3, size_y=8 -select * from $inventory.tables; \ No newline at end of file +select * from $inventory.tables; diff --git a/src/databricks/labs/ucx/queries/assessment/estimates/02_5_uc_data_modeling_complexity.sql b/src/databricks/labs/ucx/queries/assessment/estimates/02_5_uc_data_modeling_complexity.sql index 13b12530f6..14fbae8be3 100644 --- a/src/databricks/labs/ucx/queries/assessment/estimates/02_5_uc_data_modeling_complexity.sql +++ b/src/databricks/labs/ucx/queries/assessment/estimates/02_5_uc_data_modeling_complexity.sql @@ -6,4 +6,4 @@ when distinct_tables between 1 and 100 then "S" when distinct_tables between 101 and 300 then "M" when distinct_tables > 301 then "L" else NULL end as uc_model_complexity from -(select count(distinct concat(database,".",name)) as distinct_tables from $inventory.tables); \ No newline at end of file +(select count(distinct concat(database,".",name)) as distinct_tables from $inventory.tables); diff --git a/src/databricks/labs/ucx/queries/assessment/estimates/03_0_data_migration.md b/src/databricks/labs/ucx/queries/assessment/estimates/03_0_data_migration.md index dfe9869eb7..9565ed94d3 100644 --- a/src/databricks/labs/ucx/queries/assessment/estimates/03_0_data_migration.md +++ b/src/databricks/labs/ucx/queries/assessment/estimates/03_0_data_migration.md @@ -1,8 +1,8 @@ -- widget title=Table estimates, row=3, col=0, size_x=2, size_y=8 ## 4 - Data migration to UC -Once you have defined your data model in UC and that you've created appropriate Storage Credentials and External Locations, -you can then migrate your data to UC +Once you have defined your data model in UC and that you've created appropriate Storage Credentials and External Locations, +you can then migrate your data to UC Assumptions for a single table migration estimates: diff --git a/src/databricks/labs/ucx/queries/assessment/estimates/03_2_data_migration_summary.sql b/src/databricks/labs/ucx/queries/assessment/estimates/03_2_data_migration_summary.sql index b0c1c886a9..0c4b1d012d 100644 --- a/src/databricks/labs/ucx/queries/assessment/estimates/03_2_data_migration_summary.sql +++ b/src/databricks/labs/ucx/queries/assessment/estimates/03_2_data_migration_summary.sql @@ -1,3 +1,3 @@ -- viz type=table, name=Table estimates, columns=table_name,object_type,table_format,estimated_hours -- widget title=Table estimates, row=3, col=2, size_x=3, size_y=8 -SELECT * FROM $inventory.table_estimates \ No newline at end of file +SELECT * FROM $inventory.table_estimates diff --git a/src/databricks/labs/ucx/queries/assessment/estimates/03_5_data_migration_complexity.sql b/src/databricks/labs/ucx/queries/assessment/estimates/03_5_data_migration_complexity.sql index fc2f43bd1c..a2c46de2ce 100644 --- a/src/databricks/labs/ucx/queries/assessment/estimates/03_5_data_migration_complexity.sql +++ b/src/databricks/labs/ucx/queries/assessment/estimates/03_5_data_migration_complexity.sql @@ -8,4 +8,4 @@ CASE WHEN total_estimated_hours < 30 THEN "S" ELSE NULL END as data_migration_complexity FROM (SELECT sum(estimated_hours) AS total_estimated_hours -FROM $inventory.table_estimates) \ No newline at end of file +FROM $inventory.table_estimates) diff --git a/src/databricks/labs/ucx/queries/assessment/interactive/00_0_interactive.md b/src/databricks/labs/ucx/queries/assessment/interactive/00_0_interactive.md index dd4422372a..7b784805fc 100644 --- a/src/databricks/labs/ucx/queries/assessment/interactive/00_0_interactive.md +++ b/src/databricks/labs/ucx/queries/assessment/interactive/00_0_interactive.md @@ -16,4 +16,4 @@ To use this report: ### Compute Access Mode Limitation Summary -This widget will display a summary of the findings, the # workspaces, notebooks, clusters and users potentially impacted by [compute access mode limitations](https://docs.databricks.com/en/compute/access-mode-limitations.html#compute-access-mode-limitations) \ No newline at end of file +This widget will display a summary of the findings, the # workspaces, notebooks, clusters and users potentially impacted by [compute access mode limitations](https://docs.databricks.com/en/compute/access-mode-limitations.html#compute-access-mode-limitations) diff --git a/src/databricks/labs/ucx/queries/assessment/interactive/01_0_compute_access_mode_limitation_summary.sql b/src/databricks/labs/ucx/queries/assessment/interactive/01_0_compute_access_mode_limitation_summary.sql index 7a671bc0eb..0f5235f906 100644 --- a/src/databricks/labs/ucx/queries/assessment/interactive/01_0_compute_access_mode_limitation_summary.sql +++ b/src/databricks/labs/ucx/queries/assessment/interactive/01_0_compute_access_mode_limitation_summary.sql @@ -2,10 +2,10 @@ -- widget title=Compute Access Mode Limitation Summary, row=1, col=0, size_x=6, size_y=12 -- Scan notebook command history for potential paper cut issues -- https://docs.databricks.com/en/compute/access-mode-limitations.html#compute-access-mode-limitations --- -WITH +-- +WITH iteractive_cluster_commands ( - SELECT + SELECT a.event_id, a.request_params.notebookId AS notebook_id, a.request_params.clusterId AS cluster_id, @@ -18,7 +18,7 @@ iteractive_cluster_commands ( a.request_params.commandText, md5(a.request_params.commandText) commandHash FROM system.access.audit a - LEFT OUTER JOIN $inventory.clusters AS c + LEFT OUTER JOIN $inventory.clusters AS c ON a.request_params.clusterId = c.cluster_id AND a.action_name = 'runCommand' WHERE a.event_date >= DATE_SUB(CURRENT_DATE(), 90) @@ -31,23 +31,23 @@ pattern_matcher( array_except(array(p.issue, lp.issue, rv.issue,dbr_type.issue), array(null)) issues, a.* FROM iteractive_cluster_commands a - LEFT OUTER JOIN $inventory.code_patterns p + LEFT OUTER JOIN $inventory.code_patterns p ON a.commandLanguage in ('python','scala') AND contains(a.commandText, p.pattern) - LEFT OUTER JOIN misc_patterns lp + LEFT OUTER JOIN misc_patterns lp ON a.commandLanguage = lp.commandLanguage - LEFT OUTER JOIN misc_patterns rv -- runtime version + LEFT OUTER JOIN misc_patterns rv -- runtime version ON (a.commandLanguage = rv.commandLanguage OR rv.commandLanguage is null) AND a.dbr_version_major < rv.dbr_version_major AND rv.dbr_version_major is not null - LEFT OUTER JOIN misc_patterns dbr_type + LEFT OUTER JOIN misc_patterns dbr_type ON a.dbr_type = dbr_type.dbr_type and a.dbr_type in ('cpu','gpu') ), exp ( select distinct explode(issues) issue, workspace_id, notebook_id, cluster_id, email FROM pattern_matcher ) -SELECT +SELECT issue `Finding`, -- concat('',issue,'') as link, count(distinct workspace_id) `# workspaces`, @@ -56,4 +56,4 @@ SELECT count(distinct email) `# users` FROM exp group by 1 -order by 1 \ No newline at end of file +order by 1 diff --git a/src/databricks/labs/ucx/queries/assessment/interactive/02_0_cluster_summary.md b/src/databricks/labs/ucx/queries/assessment/interactive/02_0_cluster_summary.md index fc6ad0a60e..3fe2bbb439 100644 --- a/src/databricks/labs/ucx/queries/assessment/interactive/02_0_cluster_summary.md +++ b/src/databricks/labs/ucx/queries/assessment/interactive/02_0_cluster_summary.md @@ -12,4 +12,4 @@ Typical upgrade paths are: - For users with single node python ML requirements, Shared Compute with `%pip install` library support or Personal Compute with pools and compute controls may provide a better experience and better manageability. -- For single node ML users on a crowded driver node of a large shared cluster, will get a better experience with Personal Compute policies combined with (warm) Compute pools \ No newline at end of file +- For single node ML users on a crowded driver node of a large shared cluster, will get a better experience with Personal Compute policies combined with (warm) Compute pools diff --git a/src/databricks/labs/ucx/queries/assessment/interactive/03_0_cluster_summary.sql b/src/databricks/labs/ucx/queries/assessment/interactive/03_0_cluster_summary.sql index a46d89ccbe..14a588c4ca 100644 --- a/src/databricks/labs/ucx/queries/assessment/interactive/03_0_cluster_summary.sql +++ b/src/databricks/labs/ucx/queries/assessment/interactive/03_0_cluster_summary.sql @@ -1,9 +1,9 @@ -- viz type=table, name=Findings by Cluster, columns=distinct_findings,Commands,Users,First_command,Last_command,workspace_id,cluster_id,cluster_name,dbr_version,creator -- widget title=Findings by Cluster, row=3, col=0, size_x=6, size_y=12 --- -WITH +-- +WITH iteractive_cluster_commands ( - SELECT + SELECT a.event_id, a.request_params.notebookId AS notebook_id, a.request_params.clusterId AS cluster_id, @@ -32,16 +32,16 @@ pattern_matcher( explode(array_except(array(p.issue, lp.issue, rv.issue,dbr_type.issue), array(null))) issue, a.* FROM iteractive_cluster_commands a - LEFT OUTER JOIN $inventory.code_patterns p + LEFT OUTER JOIN $inventory.code_patterns p ON a.commandLanguage in ('python','scala') AND contains(a.commandText, p.pattern) - LEFT OUTER JOIN misc_patterns lp + LEFT OUTER JOIN misc_patterns lp ON a.commandLanguage = lp.commandLanguage - LEFT OUTER JOIN misc_patterns rv -- runtime version + LEFT OUTER JOIN misc_patterns rv -- runtime version ON (a.commandLanguage = rv.commandLanguage OR rv.commandLanguage is null) AND a.dbr_version_major < rv.dbr_version_major AND rv.dbr_version_major is not null - LEFT OUTER JOIN misc_patterns dbr_type + LEFT OUTER JOIN misc_patterns dbr_type ON a.dbr_type = dbr_type.dbr_type and a.dbr_type in ('cpu','gpu') ) SELECT @@ -58,4 +58,4 @@ SELECT FROM pattern_matcher GROUP BY ALL HAVING max(event_date) >= DATE_SUB(CURRENT_DATE(), 15) -- active in last N days -ORDER BY `Last command` desc, `First command` asc, coalesce(cluster_name,cluster_id) \ No newline at end of file +ORDER BY `Last command` desc, `First command` asc, coalesce(cluster_name,cluster_id) diff --git a/src/databricks/labs/ucx/queries/assessment/main/00_5_count_total_views.sql b/src/databricks/labs/ucx/queries/assessment/main/00_5_count_total_views.sql index bef2499c88..7d555f01ab 100644 --- a/src/databricks/labs/ucx/queries/assessment/main/00_5_count_total_views.sql +++ b/src/databricks/labs/ucx/queries/assessment/main/00_5_count_total_views.sql @@ -1,4 +1,4 @@ -- viz type=counter, name=Total View Count, counter_label=Total Views, value_column=count_total_views -- widget row=1, col=4, size_x=1, size_y=3 -SELECT count(*) AS count_total_views +SELECT count(*) AS count_total_views FROM $inventory.tables where object_type = 'VIEW' diff --git a/src/databricks/labs/ucx/queries/assessment/main/00___assessment_overview.md b/src/databricks/labs/ucx/queries/assessment/main/00___assessment_overview.md index 341f88148a..6f15f7fe81 100644 --- a/src/databricks/labs/ucx/queries/assessment/main/00___assessment_overview.md +++ b/src/databricks/labs/ucx/queries/assessment/main/00___assessment_overview.md @@ -2,4 +2,4 @@ # Assessment Overview -[Quick link to dashboard documentation](https://github.com/databrickslabs/ucx/blob/main/docs/assessment.md) \ No newline at end of file +[Quick link to dashboard documentation](https://github.com/databrickslabs/ucx/blob/main/docs/assessment.md) diff --git a/src/databricks/labs/ucx/queries/assessment/main/01_0_count_jobs.sql b/src/databricks/labs/ucx/queries/assessment/main/01_0_count_jobs.sql index 796759ad7d..0caada5641 100644 --- a/src/databricks/labs/ucx/queries/assessment/main/01_0_count_jobs.sql +++ b/src/databricks/labs/ucx/queries/assessment/main/01_0_count_jobs.sql @@ -1,4 +1,4 @@ -- viz type=counter, name=Total Job Count, counter_label=Total Jobs, value_column=count_total_jobs -- widget row=2, col=0, size_x=2, size_y=5 -SELECT count(*) AS count_total_jobs -FROM $inventory.jobs WHERE job_name not like '[UCX]%' \ No newline at end of file +SELECT count(*) AS count_total_jobs +FROM $inventory.jobs WHERE job_name not like '[UCX]%' diff --git a/src/databricks/labs/ucx/queries/assessment/main/02_2_count_table_by_storage.sql b/src/databricks/labs/ucx/queries/assessment/main/02_2_count_table_by_storage.sql index 4e484bb3c1..ff417146cb 100644 --- a/src/databricks/labs/ucx/queries/assessment/main/02_2_count_table_by_storage.sql +++ b/src/databricks/labs/ucx/queries/assessment/main/02_2_count_table_by_storage.sql @@ -16,4 +16,4 @@ SELECT END AS storage FROM $inventory.tables) GROUP BY storage -ORDER BY storage; \ No newline at end of file +ORDER BY storage; diff --git a/src/databricks/labs/ucx/queries/assessment/main/05_0_object_readiness.sql b/src/databricks/labs/ucx/queries/assessment/main/05_0_object_readiness.sql index 3ad263f8d4..b8ea4b0318 100644 --- a/src/databricks/labs/ucx/queries/assessment/main/05_0_object_readiness.sql +++ b/src/databricks/labs/ucx/queries/assessment/main/05_0_object_readiness.sql @@ -1,10 +1,10 @@ -- viz type=table, name=Object Type Readiness, columns=object_type,readiness -- widget title=Readiness, row=7, col=0, size_x=2, size_y=8 WITH raw AS ( - SELECT object_type, object_id, IF(failures == '[]', 1, 0) AS ready + SELECT object_type, object_id, IF(failures == '[]', 1, 0) AS ready FROM $inventory.objects ) SELECT object_type, CONCAT(ROUND(SUM(ready) / COUNT(*) * 100, 1), '%') AS readiness FROM raw GROUP BY object_type -ORDER BY readiness DESC \ No newline at end of file +ORDER BY readiness DESC diff --git a/src/databricks/labs/ucx/queries/assessment/main/05_2_assessment_summary.sql b/src/databricks/labs/ucx/queries/assessment/main/05_2_assessment_summary.sql index 309f48cb7b..9c214adc98 100644 --- a/src/databricks/labs/ucx/queries/assessment/main/05_2_assessment_summary.sql +++ b/src/databricks/labs/ucx/queries/assessment/main/05_2_assessment_summary.sql @@ -4,7 +4,7 @@ WITH raw AS ( SELECT EXPLODE(FROM_JSON(failures, 'array')) AS finding FROM $inventory.objects WHERE failures <> '[]' ) -SELECT finding as `finding`, COUNT(*) AS count -FROM raw +SELECT finding as `finding`, COUNT(*) AS count +FROM raw GROUP BY finding ORDER BY count DESC, finding DESC diff --git a/src/databricks/labs/ucx/queries/assessment/main/10_0_all_udfs.sql b/src/databricks/labs/ucx/queries/assessment/main/10_0_all_udfs.sql index 5315bab00a..ebea97de30 100644 --- a/src/databricks/labs/ucx/queries/assessment/main/10_0_all_udfs.sql +++ b/src/databricks/labs/ucx/queries/assessment/main/10_0_all_udfs.sql @@ -1,3 +1,3 @@ -- viz type=table, name=UDF Summary, search_by=name, columns=catalog,database,name,func_type,func_input,func_returns,deterministic,data_access,body,comment -- widget title=UDF Summary, row=14, col=0, size_x=8, size_y=8 -SELECT * FROM $inventory.udfs \ No newline at end of file +SELECT * FROM $inventory.udfs diff --git a/src/databricks/labs/ucx/queries/assessment/main/10_0_database_summary.sql b/src/databricks/labs/ucx/queries/assessment/main/10_0_database_summary.sql index 0dee651d02..090d5158c5 100644 --- a/src/databricks/labs/ucx/queries/assessment/main/10_0_database_summary.sql +++ b/src/databricks/labs/ucx/queries/assessment/main/10_0_database_summary.sql @@ -50,4 +50,4 @@ WITH table_stats AS ( GROUP BY `database` ) SELECT * FROM database_stats FULL JOIN grant_stats USING (`database`) -ORDER BY tables DESC \ No newline at end of file +ORDER BY tables DESC diff --git a/src/databricks/labs/ucx/queries/assessment/main/15_3_mount_points.sql b/src/databricks/labs/ucx/queries/assessment/main/15_3_mount_points.sql index 31acd404d6..52faf6d75a 100644 --- a/src/databricks/labs/ucx/queries/assessment/main/15_3_mount_points.sql +++ b/src/databricks/labs/ucx/queries/assessment/main/15_3_mount_points.sql @@ -2,4 +2,4 @@ -- widget title=Mount Points, row=17, col=3, size_x=3, size_y=8 SELECT name, source -FROM $inventory.mounts \ No newline at end of file +FROM $inventory.mounts diff --git a/src/databricks/labs/ucx/queries/assessment/main/20_0_cluster_policies.sql b/src/databricks/labs/ucx/queries/assessment/main/20_0_cluster_policies.sql index a59ff7e70e..ed8daed749 100644 --- a/src/databricks/labs/ucx/queries/assessment/main/20_0_cluster_policies.sql +++ b/src/databricks/labs/ucx/queries/assessment/main/20_0_cluster_policies.sql @@ -6,4 +6,4 @@ SELECT policy.spark_version as policy_spark_version FROM $inventory.clusters as cluster JOIN $inventory.policies as policy -ON cluster.policy_id=policy.policy_id \ No newline at end of file +ON cluster.policy_id=policy.policy_id diff --git a/src/databricks/labs/ucx/queries/assessment/main/20_0_clusters.sql b/src/databricks/labs/ucx/queries/assessment/main/20_0_clusters.sql index fc7819e71d..1e2fc27bf0 100644 --- a/src/databricks/labs/ucx/queries/assessment/main/20_0_clusters.sql +++ b/src/databricks/labs/ucx/queries/assessment/main/20_0_clusters.sql @@ -6,4 +6,4 @@ SELECT EXPLODE(FROM_JSON(failures, 'array')) AS finding, creator FROM $inventory.clusters WHERE NOT STARTSWITH(cluster_name, "job-") -ORDER BY cluster_id DESC \ No newline at end of file +ORDER BY cluster_id DESC diff --git a/src/databricks/labs/ucx/queries/assessment/main/30_3_job_details.sql b/src/databricks/labs/ucx/queries/assessment/main/30_3_job_details.sql index 1ccadc2672..17c2d48b91 100644 --- a/src/databricks/labs/ucx/queries/assessment/main/30_3_job_details.sql +++ b/src/databricks/labs/ucx/queries/assessment/main/30_3_job_details.sql @@ -8,4 +8,4 @@ SELECT creator FROM $inventory.jobs WHERE job_name not like '[UCX]%' -ORDER BY job_id DESC \ No newline at end of file +ORDER BY job_id DESC diff --git a/src/databricks/labs/ucx/queries/assessment/main/30_3_jobs.sql b/src/databricks/labs/ucx/queries/assessment/main/30_3_jobs.sql index 43b8364b74..d39973eec5 100644 --- a/src/databricks/labs/ucx/queries/assessment/main/30_3_jobs.sql +++ b/src/databricks/labs/ucx/queries/assessment/main/30_3_jobs.sql @@ -7,4 +7,4 @@ SELECT creator FROM $inventory.jobs WHERE job_name not like '[UCX]%' -ORDER BY job_id DESC \ No newline at end of file +ORDER BY job_id DESC diff --git a/src/databricks/labs/ucx/queries/assessment/main/30_4_submit_runs.sql b/src/databricks/labs/ucx/queries/assessment/main/30_4_submit_runs.sql index c9b4f1de2a..96817307fd 100644 --- a/src/databricks/labs/ucx/queries/assessment/main/30_4_submit_runs.sql +++ b/src/databricks/labs/ucx/queries/assessment/main/30_4_submit_runs.sql @@ -5,4 +5,4 @@ SELECT EXPLODE(FROM_JSON(failures, 'array')) AS finding, FROM_JSON(run_ids, 'array') AS run_ids FROM $inventory.submit_runs -ORDER BY hashed_id DESC \ No newline at end of file +ORDER BY hashed_id DESC diff --git a/src/databricks/labs/ucx/queries/assessment/main/40_0_pipelines.sql b/src/databricks/labs/ucx/queries/assessment/main/40_0_pipelines.sql index 9d1e10273a..1e5c78b826 100644 --- a/src/databricks/labs/ucx/queries/assessment/main/40_0_pipelines.sql +++ b/src/databricks/labs/ucx/queries/assessment/main/40_0_pipelines.sql @@ -5,4 +5,4 @@ SELECT pipeline_name, creator_name FROM $inventory.pipelines -ORDER BY pipeline_name DESC \ No newline at end of file +ORDER BY pipeline_name DESC diff --git a/src/databricks/labs/ucx/queries/assessment/main/40_2_logs.sql b/src/databricks/labs/ucx/queries/assessment/main/40_2_logs.sql index 4683dc12af..e1ff5e594e 100644 --- a/src/databricks/labs/ucx/queries/assessment/main/40_2_logs.sql +++ b/src/databricks/labs/ucx/queries/assessment/main/40_2_logs.sql @@ -13,4 +13,4 @@ FROM $inventory.logs WHERE job_run_id = ( SELECT DISTINCT job_run_id FROM $inventory.logs WHERE timestamp = (SELECT MAX(timestamp) FROM $inventory.logs) ) -ORDER BY timestamp ASC \ No newline at end of file +ORDER BY timestamp ASC diff --git a/src/databricks/labs/ucx/queries/assessment/main/40_3_global_init_scripts.sql b/src/databricks/labs/ucx/queries/assessment/main/40_3_global_init_scripts.sql index a9a306b0fe..b1e9f79f74 100644 --- a/src/databricks/labs/ucx/queries/assessment/main/40_3_global_init_scripts.sql +++ b/src/databricks/labs/ucx/queries/assessment/main/40_3_global_init_scripts.sql @@ -6,4 +6,4 @@ SELECT created_by FROM $inventory.global_init_scripts -ORDER BY script_name DESC \ No newline at end of file +ORDER BY script_name DESC diff --git a/src/databricks/labs/ucx/queries/assessment/main/README.md b/src/databricks/labs/ucx/queries/assessment/main/README.md index f83e1e03c4..2361d36f04 100644 --- a/src/databricks/labs/ucx/queries/assessment/main/README.md +++ b/src/databricks/labs/ucx/queries/assessment/main/README.md @@ -3,4 +3,4 @@ All files in this directory follow the virtual grid of a dashboard: * total width is 6 columns -* all files are named as `__something.sql` \ No newline at end of file +* all files are named as `__something.sql` diff --git a/src/databricks/labs/ucx/queries/migration/main/01_0_data_object_migration_status.md b/src/databricks/labs/ucx/queries/migration/main/01_0_data_object_migration_status.md index fa0352a2e6..60a441241b 100644 --- a/src/databricks/labs/ucx/queries/migration/main/01_0_data_object_migration_status.md +++ b/src/databricks/labs/ucx/queries/migration/main/01_0_data_object_migration_status.md @@ -2,11 +2,11 @@ ## Table migration status -The two widgets on the right show high-level summary of the table migration. The first widget shows the migration +The two widgets on the right show high-level summary of the table migration. The first widget shows the migration progress, and the second widget shows the data reconciliation results. -The table below assists with verifying if, how the tables are migrated and their correctness. It can be filtered on the +The table below assists with verifying if, how the tables are migrated and their correctness. It can be filtered on the table name and migration status. Next to table metadata, the table shows: - The table name before migrating - The migration status diff --git a/src/databricks/labs/ucx/queries/migration/main/01_1_data_object_migration_summary.sql b/src/databricks/labs/ucx/queries/migration/main/01_1_data_object_migration_summary.sql index 6c5d57798d..d92f61ae2e 100644 --- a/src/databricks/labs/ucx/queries/migration/main/01_1_data_object_migration_summary.sql +++ b/src/databricks/labs/ucx/queries/migration/main/01_1_data_object_migration_summary.sql @@ -13,4 +13,4 @@ FROM $inventory.tables AS tables LEFT JOIN $inventory.migration_status AS migration_status - ON tables.`database` = migration_status.src_schema AND tables.name = migration_status.src_table \ No newline at end of file + ON tables.`database` = migration_status.src_schema AND tables.name = migration_status.src_table diff --git a/src/databricks/labs/ucx/queries/migration/main/02_1_code_compatibility_problems.sql b/src/databricks/labs/ucx/queries/migration/main/02_1_code_compatibility_problems.sql index 70994baca7..02a5d18076 100644 --- a/src/databricks/labs/ucx/queries/migration/main/02_1_code_compatibility_problems.sql +++ b/src/databricks/labs/ucx/queries/migration/main/02_1_code_compatibility_problems.sql @@ -11,4 +11,4 @@ SELECT start_col, end_line, end_col -FROM $inventory.workflow_problems \ No newline at end of file +FROM $inventory.workflow_problems diff --git a/src/databricks/labs/ucx/queries/migration/main/02_1_data_reconciliation_summary.sql b/src/databricks/labs/ucx/queries/migration/main/02_1_data_reconciliation_summary.sql index a0acfd69aa..57ebfdc528 100644 --- a/src/databricks/labs/ucx/queries/migration/main/02_1_data_reconciliation_summary.sql +++ b/src/databricks/labs/ucx/queries/migration/main/02_1_data_reconciliation_summary.sql @@ -11,4 +11,4 @@ SELECT count(*) AS total, concat(round(success / total * 100, 2), '%') AS success_rate FROM - $inventory.reconciliation_results \ No newline at end of file + $inventory.reconciliation_results diff --git a/src/databricks/labs/ucx/queries/migration/main/03_1_data_reconciliation_status.sql b/src/databricks/labs/ucx/queries/migration/main/03_1_data_reconciliation_status.sql index 34e68e252a..aebe8dcacd 100644 --- a/src/databricks/labs/ucx/queries/migration/main/03_1_data_reconciliation_status.sql +++ b/src/databricks/labs/ucx/queries/migration/main/03_1_data_reconciliation_status.sql @@ -28,4 +28,4 @@ FROM ON tables.`database` = migration_status.src_schema AND tables.name = migration_status.src_table LEFT JOIN $inventory.reconciliation_results AS reconciliation_results - ON tables.`database` = reconciliation_results.src_schema AND tables.name = reconciliation_results.src_table \ No newline at end of file + ON tables.`database` = reconciliation_results.src_schema AND tables.name = reconciliation_results.src_table diff --git a/src/databricks/labs/ucx/queries/views/code_patterns.sql b/src/databricks/labs/ucx/queries/views/code_patterns.sql index 2baf021f0e..64fd69c3f5 100644 --- a/src/databricks/labs/ucx/queries/views/code_patterns.sql +++ b/src/databricks/labs/ucx/queries/views/code_patterns.sql @@ -9,22 +9,22 @@ SELECT col1 AS pattern, col2 AS issue FROM values ('._jvm', 'AF302.5 - Arbitrary Java'), ('._jvm.org.apache.log4j', 'AF302.6 - Arbitrary Java'), ('spark.udf.registerJavaFunction', 'AF303.1 - Java UDF'), - + ('spark.read.format("jdbc")', 'AF304.1 - JDBC datasource'), ('boto3', 'AF305.1 - boto3'), ('s3fs', 'AF305.2 - s3fs'), ('dbutils.notebook.entry_point.getDbutils().notebook().getContext().toJson()', 'AF306.1 - dbutils...getContext'), ('dbutils.notebook.entry_point.getDbutils().notebook().getContext()', 'AF306.2 - dbutils...getContext'), - + ('dbutils.credentials.', 'AF310.1 - credential passthrough'), ('dbutils.fs.', 'AF311.1 - dbutils.fs'), ('dbutils.fs.mount', 'AF311.2 - dbutils mount(s)'), ('dbutils.fs.refreshMounts', 'AF311.3 - dbutils mount(s)'), ('dbutils.fs.unmount', 'AF311.4 - dbutils mount(s)'), - ('dbfs:/mnt', 'AF311.5 - mount points'), - ('dbfs:/', 'AF311.6 - dbfs usage'), - ('/dbfs/', 'AF311.7 - dbfs usage'), + ('dbfs:/mnt', 'AF311.5 - mount points'), + ('dbfs:/', 'AF311.6 - dbfs usage'), + ('/dbfs/', 'AF311.7 - dbfs usage'), ('spark.sparkContext', 'AF313.1 - SparkContext'), @@ -54,7 +54,7 @@ SELECT col1 AS pattern, col2 AS issue FROM values ('.union', 'AF313.24 - SparkContext'), ('.wholeTextFiles', 'AF313.25 - SparkContext'), - ('sparknlp', 'AF314.1 - Distributed ML'), + ('sparknlp', 'AF314.1 - Distributed ML'), ('xgboost.spark', 'AF314.2 - Distributed ML'), ('catboost_spark', 'AF314.3 - Distributed ML'), ('ai.catboost:catboost-spark', 'AF314.4 - Distributed ML'), @@ -69,7 +69,7 @@ SELECT col1 AS pattern, col2 AS issue FROM values ('UserDefinedAggregateFunction', 'AF315.1 - UDAF scala issue'), ('applyInPandas', 'AF315.2 - applyInPandas'), ('mapInPandas', 'AF315.3 - mapInPandas'), - + ('.trigger(continuous', 'AF330.1 - Streaming'), ('kafka.sasl.client.callback.handler.class', 'AF330.2 - Streaming'), @@ -83,4 +83,4 @@ SELECT col1 AS pattern, col2 AS issue FROM values ('applyInPandasWithState', 'AF330.10 - Streaming'), ('.format("socket")', 'AF330.11 - Streaming'), ('StreamingQueryListener', 'AF330.12 - Streaming'), - ('applyInPandasWithState', 'AF330.13 - Streaming') \ No newline at end of file + ('applyInPandasWithState', 'AF330.13 - Streaming') diff --git a/src/databricks/labs/ucx/queries/views/misc_patterns.sql b/src/databricks/labs/ucx/queries/views/misc_patterns.sql index 7a3d8f8a80..9f91f1203e 100644 --- a/src/databricks/labs/ucx/queries/views/misc_patterns.sql +++ b/src/databricks/labs/ucx/queries/views/misc_patterns.sql @@ -1,6 +1,6 @@ -SELECT +SELECT col1 AS commandLanguage, -- r, scala, python, sql - col2 as dbr_version_major, -- INT + col2 as dbr_version_major, -- INT col3 as dbr_version_minor, -- INT col4 as dbr_type, -- STRING col5 AS pattern, -- expansion / compatibility with code patterns @@ -10,4 +10,4 @@ FROM VALUES ('scala', 13, 3, null, null, 'AF300.3 - Scala Language support'), (null, 11, 3, null, null, 'AF300.4 - Minimum DBR version'), (null, null, null, 'cpu', null, 'AF300.5 - ML Runtime cpu'), - (null, null, null, 'gpu', null, 'AF300.6 - ML Runtime gpu') \ No newline at end of file + (null, null, null, 'gpu', null, 'AF300.6 - ML Runtime gpu') diff --git a/src/databricks/labs/ucx/queries/views/reconciliation_results.sql b/src/databricks/labs/ucx/queries/views/reconciliation_results.sql index ef60632235..d54c6d3838 100644 --- a/src/databricks/labs/ucx/queries/views/reconciliation_results.sql +++ b/src/databricks/labs/ucx/queries/views/reconciliation_results.sql @@ -27,4 +27,4 @@ SELECT schema_comparison_result.data AS column_comparison, error_message FROM - flattened \ No newline at end of file + flattened diff --git a/src/databricks/labs/ucx/queries/views/table_estimates.sql b/src/databricks/labs/ucx/queries/views/table_estimates.sql index 3139020962..3903aa794c 100644 --- a/src/databricks/labs/ucx/queries/views/table_estimates.sql +++ b/src/databricks/labs/ucx/queries/views/table_estimates.sql @@ -10,4 +10,4 @@ when object_type == "EXTERNAL" and table_format != "DELTA" then 1 -- Can vary de when object_type == "VIEW" then 2 -- Can vary depending of view complexity and number of tables used in the view else NULL end as estimated_hours from $inventory.tables -where not startswith(name, "__apply_changes") \ No newline at end of file +where not startswith(name, "__apply_changes") diff --git a/src/databricks/labs/ucx/recon/data_comparator.py b/src/databricks/labs/ucx/recon/data_comparator.py index 7195058280..37e1beaa41 100644 --- a/src/databricks/labs/ucx/recon/data_comparator.py +++ b/src/databricks/labs/ucx/recon/data_comparator.py @@ -15,18 +15,18 @@ class StandardDataComparator(DataComparator): _DATA_COMPARISON_QUERY_TEMPLATE = """ WITH compare_results AS ( - SELECT - CASE + SELECT + CASE WHEN source.hash_value IS NULL AND target.hash_value IS NULL THEN TRUE WHEN source.hash_value IS NULL OR target.hash_value IS NULL THEN FALSE WHEN source.hash_value = target.hash_value THEN TRUE ELSE FALSE END AS is_match, - CASE + CASE WHEN target.hash_value IS NULL THEN 1 ELSE 0 END AS target_missing_count, - CASE + CASE WHEN source.hash_value IS NULL THEN 1 ELSE 0 END AS source_missing_count @@ -40,7 +40,7 @@ class StandardDataComparator(DataComparator): ) AS target ON source.hash_value = target.hash_value ) - SELECT + SELECT COUNT(*) AS total_mismatches, COALESCE(SUM(target_missing_count), 0) AS target_missing_count, COALESCE(SUM(source_missing_count), 0) AS source_missing_count diff --git a/src/databricks/labs/ucx/recon/metadata_retriever.py b/src/databricks/labs/ucx/recon/metadata_retriever.py index f3b3cfeb95..5e4a86b03a 100644 --- a/src/databricks/labs/ucx/recon/metadata_retriever.py +++ b/src/databricks/labs/ucx/recon/metadata_retriever.py @@ -37,14 +37,14 @@ def _build_metadata_query(cls, entity: TableIdentifier) -> str: return f"DESCRIBE TABLE {entity.fqn_escaped}" query = f""" - SELECT - LOWER(column_name) AS col_name, + SELECT + LOWER(column_name) AS col_name, full_data_type AS data_type - FROM + FROM {entity.catalog_escaped}.information_schema.columns WHERE - LOWER(table_catalog)='{entity.catalog}' AND - LOWER(table_schema)='{entity.schema}' AND + LOWER(table_catalog)='{entity.catalog}' AND + LOWER(table_schema)='{entity.schema}' AND LOWER(table_name) ='{entity.table}' ORDER BY col_name""" diff --git a/tests/unit/assessment/clusters/azure-spn-secret-interactive-multiple-spn.json b/tests/unit/assessment/clusters/azure-spn-secret-interactive-multiple-spn.json index fc0bdc4995..472e715a1d 100644 --- a/tests/unit/assessment/clusters/azure-spn-secret-interactive-multiple-spn.json +++ b/tests/unit/assessment/clusters/azure-spn-secret-interactive-multiple-spn.json @@ -17,4 +17,4 @@ }, "spark_context_id": 5134472582179565315, "spark_version": "13.3.x-cpu-ml-scala2.12" -} \ No newline at end of file +} diff --git a/tests/unit/assessment/clusters/azure-spn-secret.json b/tests/unit/assessment/clusters/azure-spn-secret.json index 25f180c99f..941c72f111 100644 --- a/tests/unit/assessment/clusters/azure-spn-secret.json +++ b/tests/unit/assessment/clusters/azure-spn-secret.json @@ -13,4 +13,4 @@ }, "spark_context_id": 5134472582179565315, "spark_version": "13.3.x-cpu-ml-scala2.12" -} \ No newline at end of file +} diff --git a/tests/unit/assessment/clusters/init-scripts-dbfs.json b/tests/unit/assessment/clusters/init-scripts-dbfs.json index 0012714caa..0dfb769362 100644 --- a/tests/unit/assessment/clusters/init-scripts-dbfs.json +++ b/tests/unit/assessment/clusters/init-scripts-dbfs.json @@ -29,4 +29,4 @@ } } ] -} \ No newline at end of file +} diff --git a/tests/unit/assessment/clusters/init-scripts-file.json b/tests/unit/assessment/clusters/init-scripts-file.json index 13c5ff631c..75769b1e57 100644 --- a/tests/unit/assessment/clusters/init-scripts-file.json +++ b/tests/unit/assessment/clusters/init-scripts-file.json @@ -24,4 +24,4 @@ } } ] -} \ No newline at end of file +} diff --git a/tests/unit/assessment/clusters/init-scripts-no-match.json b/tests/unit/assessment/clusters/init-scripts-no-match.json index 723312509f..f85a68df59 100644 --- a/tests/unit/assessment/clusters/init-scripts-no-match.json +++ b/tests/unit/assessment/clusters/init-scripts-no-match.json @@ -14,4 +14,4 @@ } } ] -} \ No newline at end of file +} diff --git a/tests/unit/assessment/clusters/job-cluster.json b/tests/unit/assessment/clusters/job-cluster.json index 3a7445e03a..199f7e21a9 100644 --- a/tests/unit/assessment/clusters/job-cluster.json +++ b/tests/unit/assessment/clusters/job-cluster.json @@ -13,4 +13,4 @@ "spark.databricks.delta.preview.enabled": "true" }, "spark_context_id":"5134472582179565315" -} \ No newline at end of file +} diff --git a/tests/unit/assessment/clusters/legacy-passthrough.json b/tests/unit/assessment/clusters/legacy-passthrough.json index ce3478e956..a243f29845 100644 --- a/tests/unit/assessment/clusters/legacy-passthrough.json +++ b/tests/unit/assessment/clusters/legacy-passthrough.json @@ -2,4 +2,4 @@ "cluster_name": "Passthrough cluster", "spark_version": "12.3.x-cpu-ml-scala2.12", "data_security_mode": "LEGACY_PASSTHROUGH" -} \ No newline at end of file +} diff --git a/tests/unit/assessment/clusters/no-isolation.json b/tests/unit/assessment/clusters/no-isolation.json index 8e549821f3..ae95fcf868 100644 --- a/tests/unit/assessment/clusters/no-isolation.json +++ b/tests/unit/assessment/clusters/no-isolation.json @@ -2,4 +2,4 @@ "cluster_name": "No isolation shared", "spark_version": "12.3.x-cpu-ml-scala2.12", "data_security_mode": "NONE" -} \ No newline at end of file +} diff --git a/tests/unit/assessment/clusters/outdated-autoscale.json b/tests/unit/assessment/clusters/outdated-autoscale.json index 2fe7ff61da..546d2dbd25 100644 --- a/tests/unit/assessment/clusters/outdated-autoscale.json +++ b/tests/unit/assessment/clusters/outdated-autoscale.json @@ -6,4 +6,4 @@ "cluster_id": "outdated-autoscale", "cluster_name": "Outdated Shared Autoscale", "spark_version": "9.3.x-cpu-ml-scala2.12" -} \ No newline at end of file +} diff --git a/tests/unit/assessment/clusters/passthrough.json b/tests/unit/assessment/clusters/passthrough.json index baf5ace1ce..396d7d7dc3 100644 --- a/tests/unit/assessment/clusters/passthrough.json +++ b/tests/unit/assessment/clusters/passthrough.json @@ -10,4 +10,4 @@ "spark_conf" : { "spark.databricks.passthrough.enabled": "True" } -} \ No newline at end of file +} diff --git a/tests/unit/assessment/clusters/policy-azure-oauth.json b/tests/unit/assessment/clusters/policy-azure-oauth.json index 1713f36c4e..26bf3f5b46 100644 --- a/tests/unit/assessment/clusters/policy-azure-oauth.json +++ b/tests/unit/assessment/clusters/policy-azure-oauth.json @@ -12,4 +12,4 @@ "spark.databricks.delta.preview.enabled": "true" }, "spark_context_id": "5134472582179565315" -} \ No newline at end of file +} diff --git a/tests/unit/assessment/clusters/policy-deleted.json b/tests/unit/assessment/clusters/policy-deleted.json index 389a7cf7da..4dd0336127 100644 --- a/tests/unit/assessment/clusters/policy-deleted.json +++ b/tests/unit/assessment/clusters/policy-deleted.json @@ -12,4 +12,4 @@ "spark.databricks.delta.preview.enabled": "true" }, "spark_context_id": "5134472582179565315" -} \ No newline at end of file +} diff --git a/tests/unit/assessment/clusters/policy-single-user-with-empty-appid-spn.json b/tests/unit/assessment/clusters/policy-single-user-with-empty-appid-spn.json index d168b63459..3d9cd56ed8 100644 --- a/tests/unit/assessment/clusters/policy-single-user-with-empty-appid-spn.json +++ b/tests/unit/assessment/clusters/policy-single-user-with-empty-appid-spn.json @@ -9,4 +9,4 @@ "cluster_id": "0810-225833-atlanta69", "cluster_name": "Tech Summit FY24 Cluster-1", "policy_id": "single-user-with-empty-appid-spn" -} \ No newline at end of file +} diff --git a/tests/unit/assessment/clusters/policy-single-user-with-spn.json b/tests/unit/assessment/clusters/policy-single-user-with-spn.json index 496703da49..9aca28d2b8 100644 --- a/tests/unit/assessment/clusters/policy-single-user-with-spn.json +++ b/tests/unit/assessment/clusters/policy-single-user-with-spn.json @@ -12,4 +12,4 @@ "spark.databricks.delta.preview.enabled": "true" }, "spark_context_id": "5134472582179565315" -} \ No newline at end of file +} diff --git a/tests/unit/assessment/clusters/policy-spn-in-policy-overrides.json b/tests/unit/assessment/clusters/policy-spn-in-policy-overrides.json index 8c475b629f..cf34e3a166 100644 --- a/tests/unit/assessment/clusters/policy-spn-in-policy-overrides.json +++ b/tests/unit/assessment/clusters/policy-spn-in-policy-overrides.json @@ -9,4 +9,4 @@ "cluster_id": "0810-225833-atlanta69", "cluster_name": "Tech Summit FY24 Cluster-1", "policy_id": "spn-in-policy-overrides" -} \ No newline at end of file +} diff --git a/tests/unit/assessment/clusters/simplest-autoscale.json b/tests/unit/assessment/clusters/simplest-autoscale.json index 7b36a9552b..34d583bb0f 100644 --- a/tests/unit/assessment/clusters/simplest-autoscale.json +++ b/tests/unit/assessment/clusters/simplest-autoscale.json @@ -7,4 +7,4 @@ "cluster_name": "Simplest Shared Autoscale", "policy_id": "single-user-with-spn", "spark_version": "13.3.x-cpu-ml-scala2.12" -} \ No newline at end of file +} diff --git a/tests/unit/assessment/jobruns/dbt_task.json b/tests/unit/assessment/jobruns/dbt_task.json index 4c2d32ce3b..38dcd46e25 100644 --- a/tests/unit/assessment/jobruns/dbt_task.json +++ b/tests/unit/assessment/jobruns/dbt_task.json @@ -20,4 +20,4 @@ } } ] -} \ No newline at end of file +} diff --git a/tests/unit/assessment/jobruns/gitsource_task.json b/tests/unit/assessment/jobruns/gitsource_task.json index cd7179b4c6..3d7d32d1d0 100644 --- a/tests/unit/assessment/jobruns/gitsource_task.json +++ b/tests/unit/assessment/jobruns/gitsource_task.json @@ -17,4 +17,4 @@ } } ] -} \ No newline at end of file +} diff --git a/tests/unit/assessment/jobruns/jar_task.json b/tests/unit/assessment/jobruns/jar_task.json index 9db65106be..3220668898 100644 --- a/tests/unit/assessment/jobruns/jar_task.json +++ b/tests/unit/assessment/jobruns/jar_task.json @@ -13,4 +13,4 @@ } } ] -} \ No newline at end of file +} diff --git a/tests/unit/assessment/jobruns/notebook_dupe_task.json b/tests/unit/assessment/jobruns/notebook_dupe_task.json index 45a5bc57d4..eba5311f2f 100644 --- a/tests/unit/assessment/jobruns/notebook_dupe_task.json +++ b/tests/unit/assessment/jobruns/notebook_dupe_task.json @@ -12,4 +12,4 @@ } } ] -} \ No newline at end of file +} diff --git a/tests/unit/assessment/jobruns/notebook_no_failure_task.json b/tests/unit/assessment/jobruns/notebook_no_failure_task.json index 9ea4bad124..30826280dc 100644 --- a/tests/unit/assessment/jobruns/notebook_no_failure_task.json +++ b/tests/unit/assessment/jobruns/notebook_no_failure_task.json @@ -12,4 +12,4 @@ } } ] -} \ No newline at end of file +} diff --git a/tests/unit/assessment/jobruns/notebook_no_sec_comp_task.json b/tests/unit/assessment/jobruns/notebook_no_sec_comp_task.json index b502cc27f2..148229f3f7 100644 --- a/tests/unit/assessment/jobruns/notebook_no_sec_comp_task.json +++ b/tests/unit/assessment/jobruns/notebook_no_sec_comp_task.json @@ -18,4 +18,4 @@ } } ] -} \ No newline at end of file +} diff --git a/tests/unit/assessment/jobruns/notebook_no_sec_no_comp_task.json b/tests/unit/assessment/jobruns/notebook_no_sec_no_comp_task.json index 495a8c25bc..1ad58ac695 100644 --- a/tests/unit/assessment/jobruns/notebook_no_sec_no_comp_task.json +++ b/tests/unit/assessment/jobruns/notebook_no_sec_no_comp_task.json @@ -18,4 +18,4 @@ } } ] -} \ No newline at end of file +} diff --git a/tests/unit/assessment/jobruns/notebook_spark_conf_task.json b/tests/unit/assessment/jobruns/notebook_spark_conf_task.json index 30b49fd50b..024c0e574f 100644 --- a/tests/unit/assessment/jobruns/notebook_spark_conf_task.json +++ b/tests/unit/assessment/jobruns/notebook_spark_conf_task.json @@ -22,4 +22,4 @@ } } ] -} \ No newline at end of file +} diff --git a/tests/unit/assessment/jobruns/notebook_task.json b/tests/unit/assessment/jobruns/notebook_task.json index b1f4bf0f29..471453f337 100644 --- a/tests/unit/assessment/jobruns/notebook_task.json +++ b/tests/unit/assessment/jobruns/notebook_task.json @@ -12,4 +12,4 @@ } } ] -} \ No newline at end of file +} diff --git a/tests/unit/assessment/jobruns/python_wheel_task.json b/tests/unit/assessment/jobruns/python_wheel_task.json index 290ea45c09..081030c714 100644 --- a/tests/unit/assessment/jobruns/python_wheel_task.json +++ b/tests/unit/assessment/jobruns/python_wheel_task.json @@ -13,4 +13,4 @@ } } ] -} \ No newline at end of file +} diff --git a/tests/unit/assessment/jobruns/run_condition_task.json b/tests/unit/assessment/jobruns/run_condition_task.json index d6ecc5fa25..75cb398d20 100644 --- a/tests/unit/assessment/jobruns/run_condition_task.json +++ b/tests/unit/assessment/jobruns/run_condition_task.json @@ -15,4 +15,4 @@ } } ] -} \ No newline at end of file +} diff --git a/tests/unit/assessment/jobruns/spark_jar_task.json b/tests/unit/assessment/jobruns/spark_jar_task.json index 8166878d81..b25cbc08a3 100644 --- a/tests/unit/assessment/jobruns/spark_jar_task.json +++ b/tests/unit/assessment/jobruns/spark_jar_task.json @@ -13,4 +13,4 @@ } } ] -} \ No newline at end of file +} diff --git a/tests/unit/assessment/jobruns/sql_tasks.json b/tests/unit/assessment/jobruns/sql_tasks.json index c3d8fbc816..3f9e0ff57d 100644 --- a/tests/unit/assessment/jobruns/sql_tasks.json +++ b/tests/unit/assessment/jobruns/sql_tasks.json @@ -45,4 +45,4 @@ } } ] -} \ No newline at end of file +} diff --git a/tests/unit/assessment/jobs/legacy-job-on-azure-spn-secret.json b/tests/unit/assessment/jobs/legacy-job-on-azure-spn-secret.json index 8997a788bc..118979d83f 100644 --- a/tests/unit/assessment/jobs/legacy-job-on-azure-spn-secret.json +++ b/tests/unit/assessment/jobs/legacy-job-on-azure-spn-secret.json @@ -9,4 +9,4 @@ "notebook_path": "/Users/foo.bar@databricks.com/Customers/Example/Test/Load" } } -} \ No newline at end of file +} diff --git a/tests/unit/assessment/jobs/no-settings.json b/tests/unit/assessment/jobs/no-settings.json index 366c51ebef..ccee446571 100644 --- a/tests/unit/assessment/jobs/no-settings.json +++ b/tests/unit/assessment/jobs/no-settings.json @@ -1,3 +1,3 @@ { "job_id": 9002 -} \ No newline at end of file +} diff --git a/tests/unit/assessment/jobs/no-tasks.json b/tests/unit/assessment/jobs/no-tasks.json index 04c6da54fa..0615479de1 100644 --- a/tests/unit/assessment/jobs/no-tasks.json +++ b/tests/unit/assessment/jobs/no-tasks.json @@ -3,4 +3,4 @@ "settings": { "name": "No Tasks" } -} \ No newline at end of file +} diff --git a/tests/unit/assessment/jobs/on-azure-spn-secret.json b/tests/unit/assessment/jobs/on-azure-spn-secret.json index 85b749f377..d79535c716 100644 --- a/tests/unit/assessment/jobs/on-azure-spn-secret.json +++ b/tests/unit/assessment/jobs/on-azure-spn-secret.json @@ -14,4 +14,4 @@ } ] } -} \ No newline at end of file +} diff --git a/tests/unit/assessment/jobs/on-outdated-autoscale.json b/tests/unit/assessment/jobs/on-outdated-autoscale.json index 09c1418e33..b2e3a45b16 100644 --- a/tests/unit/assessment/jobs/on-outdated-autoscale.json +++ b/tests/unit/assessment/jobs/on-outdated-autoscale.json @@ -14,4 +14,4 @@ } ] } -} \ No newline at end of file +} diff --git a/tests/unit/assessment/jobs/on-simplest-autoscale.json b/tests/unit/assessment/jobs/on-simplest-autoscale.json index 830af2f1cb..84c8849b64 100644 --- a/tests/unit/assessment/jobs/on-simplest-autoscale.json +++ b/tests/unit/assessment/jobs/on-simplest-autoscale.json @@ -14,4 +14,4 @@ } ] } -} \ No newline at end of file +} diff --git a/tests/unit/assessment/jobs/policy-single-job-with-spn.json b/tests/unit/assessment/jobs/policy-single-job-with-spn.json index b5eef5c653..5f408544ac 100644 --- a/tests/unit/assessment/jobs/policy-single-job-with-spn.json +++ b/tests/unit/assessment/jobs/policy-single-job-with-spn.json @@ -22,4 +22,4 @@ } ] } -} \ No newline at end of file +} diff --git a/tests/unit/assessment/jobs/single-job.json b/tests/unit/assessment/jobs/single-job.json index d7a599536c..4d37073542 100644 --- a/tests/unit/assessment/jobs/single-job.json +++ b/tests/unit/assessment/jobs/single-job.json @@ -13,4 +13,4 @@ } ] } -} \ No newline at end of file +} diff --git a/tests/unit/assessment/jobs/some-spn.json b/tests/unit/assessment/jobs/some-spn.json index f36d2a486c..8085c279a6 100644 --- a/tests/unit/assessment/jobs/some-spn.json +++ b/tests/unit/assessment/jobs/some-spn.json @@ -25,4 +25,4 @@ } ] } -} \ No newline at end of file +} diff --git a/tests/unit/assessment/jobs/spark-jar-task.json b/tests/unit/assessment/jobs/spark-jar-task.json index df70ea0420..163e89114c 100644 --- a/tests/unit/assessment/jobs/spark-jar-task.json +++ b/tests/unit/assessment/jobs/spark-jar-task.json @@ -15,4 +15,4 @@ } ] } -} \ No newline at end of file +} diff --git a/tests/unit/assessment/pipelines/empty-spec.json b/tests/unit/assessment/pipelines/empty-spec.json index 42956973fe..5f5312dbe3 100644 --- a/tests/unit/assessment/pipelines/empty-spec.json +++ b/tests/unit/assessment/pipelines/empty-spec.json @@ -6,4 +6,4 @@ "configuration": { } } -} \ No newline at end of file +} diff --git a/tests/unit/assessment/pipelines/spec-with-spn.json b/tests/unit/assessment/pipelines/spec-with-spn.json index a668ce7bd2..320b9229f9 100644 --- a/tests/unit/assessment/pipelines/spec-with-spn.json +++ b/tests/unit/assessment/pipelines/spec-with-spn.json @@ -49,4 +49,4 @@ "spark.hadoop.fs.azure.sas.fixed.token.abcde.dfs.core.windows.net": "{{secrets/abcde_access/sasFixedToken}}" } } -} \ No newline at end of file +} diff --git a/tests/unit/assessment/policies/ext-hms.json b/tests/unit/assessment/policies/ext-hms.json index 28579017a8..ccea72a477 100644 --- a/tests/unit/assessment/policies/ext-hms.json +++ b/tests/unit/assessment/policies/ext-hms.json @@ -33,4 +33,4 @@ }, "policy_family_definition_overrides": {}, "name": "ext_hms" -} \ No newline at end of file +} diff --git a/tests/unit/assessment/policies/single-job-with-spn.json b/tests/unit/assessment/policies/single-job-with-spn.json index 0ce429360c..11d34533f4 100644 --- a/tests/unit/assessment/policies/single-job-with-spn.json +++ b/tests/unit/assessment/policies/single-job-with-spn.json @@ -27,4 +27,4 @@ } }, "policy_family_definition_overrides": {} -} \ No newline at end of file +} diff --git a/tests/unit/assessment/policies/single-user-with-empty-appid-spn.json b/tests/unit/assessment/policies/single-user-with-empty-appid-spn.json index dabe6d4bc0..42e453eed6 100644 --- a/tests/unit/assessment/policies/single-user-with-empty-appid-spn.json +++ b/tests/unit/assessment/policies/single-user-with-empty-appid-spn.json @@ -28,4 +28,4 @@ }, "policy_family_definition_overrides": { } -} \ No newline at end of file +} diff --git a/tests/unit/assessment/policies/single-user-with-spn-no-sparkversion.json b/tests/unit/assessment/policies/single-user-with-spn-no-sparkversion.json index fd94845044..ae4f4640d1 100644 --- a/tests/unit/assessment/policies/single-user-with-spn-no-sparkversion.json +++ b/tests/unit/assessment/policies/single-user-with-spn-no-sparkversion.json @@ -10,4 +10,4 @@ }, "policy_family_definition_overrides": { } -} \ No newline at end of file +} diff --git a/tests/unit/assessment/policies/single-user-with-spn-policyid.json b/tests/unit/assessment/policies/single-user-with-spn-policyid.json index e152cc3325..dfb967b1b1 100644 --- a/tests/unit/assessment/policies/single-user-with-spn-policyid.json +++ b/tests/unit/assessment/policies/single-user-with-spn-policyid.json @@ -31,4 +31,4 @@ "name": "test_policy", "description": "test", "creator_user_name": "test_creator" -} \ No newline at end of file +} diff --git a/tests/unit/assessment/policies/single-user-with-spn.json b/tests/unit/assessment/policies/single-user-with-spn.json index 37b6726233..6f81a8403c 100644 --- a/tests/unit/assessment/policies/single-user-with-spn.json +++ b/tests/unit/assessment/policies/single-user-with-spn.json @@ -79,4 +79,4 @@ "hidden": true } } -} \ No newline at end of file +} diff --git a/tests/unit/assessment/policies/spn-in-policy-overrides.json b/tests/unit/assessment/policies/spn-in-policy-overrides.json index 89dfb89f0e..8f5fce4fd1 100644 --- a/tests/unit/assessment/policies/spn-in-policy-overrides.json +++ b/tests/unit/assessment/policies/spn-in-policy-overrides.json @@ -27,4 +27,4 @@ "hidden": "true" } } -} \ No newline at end of file +} diff --git a/tests/unit/assessment/warehouses/dupe-spn-config.json b/tests/unit/assessment/warehouses/dupe-spn-config.json index 0c0bac552a..4c02199df9 100644 --- a/tests/unit/assessment/warehouses/dupe-spn-config.json +++ b/tests/unit/assessment/warehouses/dupe-spn-config.json @@ -39,4 +39,4 @@ "key": "spark.hadoop.fs.azure.account.oauth2.client.endpoint.newstorageacct.dfs.core.windows.net", "value": "https://login.microsoftonline.com/directory_12345/oauth2/token" } -] \ No newline at end of file +] diff --git a/tests/unit/assessment/warehouses/single-config.json b/tests/unit/assessment/warehouses/single-config.json index 1d10bc5981..5dbf11fafc 100644 --- a/tests/unit/assessment/warehouses/single-config.json +++ b/tests/unit/assessment/warehouses/single-config.json @@ -3,4 +3,4 @@ "key": "spark.hadoop.fs.azure.account.auth.type.storage_acct1.dfs.core.windows.net", "value": "OAuth" } -] \ No newline at end of file +] diff --git a/tests/unit/assessment/warehouses/spn-config.json b/tests/unit/assessment/warehouses/spn-config.json index fee278f6bc..0c73f970bd 100644 --- a/tests/unit/assessment/warehouses/spn-config.json +++ b/tests/unit/assessment/warehouses/spn-config.json @@ -39,4 +39,4 @@ "key": "spark.hadoop.fs.azure.account.oauth2.client.endpoint.storage_acct1.dfs.core.windows.net", "value": "https://login.microsoftonline.com/dummy_tenant_id_2/oauth2/token" } -] \ No newline at end of file +] diff --git a/tests/unit/assessment/warehouses/spn-secret-config.json b/tests/unit/assessment/warehouses/spn-secret-config.json index 0154585c19..7045af8697 100644 --- a/tests/unit/assessment/warehouses/spn-secret-config.json +++ b/tests/unit/assessment/warehouses/spn-secret-config.json @@ -39,4 +39,4 @@ "key": "spark.hadoop.fs.azure.account.oauth2.client.endpoint.xyz.dfs.core.windows.net", "value": "https://login.microsoftonline.com/dummy_tenant_id2/oauth2/token" } -] \ No newline at end of file +] diff --git a/tests/unit/azure/azure/mappings.json b/tests/unit/azure/azure/mappings.json index 73141e7795..dc1e0e41ca 100644 --- a/tests/unit/azure/azure/mappings.json +++ b/tests/unit/azure/azure/mappings.json @@ -418,4 +418,4 @@ } } } -] \ No newline at end of file +] diff --git a/tests/unit/hive_metastore/tables/dbfs_parquet.json b/tests/unit/hive_metastore/tables/dbfs_parquet.json index a787ffa363..559eb1726b 100644 --- a/tests/unit/hive_metastore/tables/dbfs_parquet.json +++ b/tests/unit/hive_metastore/tables/dbfs_parquet.json @@ -15,4 +15,4 @@ "src_table": "managed_dbfs", "dst_table": "managed_dbfs" } -} \ No newline at end of file +} diff --git a/tests/unit/hive_metastore/tables/external_hiveserde.json b/tests/unit/hive_metastore/tables/external_hiveserde.json index 58b67468e0..f690fefd45 100644 --- a/tests/unit/hive_metastore/tables/external_hiveserde.json +++ b/tests/unit/hive_metastore/tables/external_hiveserde.json @@ -15,4 +15,4 @@ "src_table": "external_src", "dst_table": "external_dst" } -} \ No newline at end of file +} diff --git a/tests/unit/hive_metastore/tables/external_no_sync.json b/tests/unit/hive_metastore/tables/external_no_sync.json index 73e4c5fecc..3c8cc3f002 100644 --- a/tests/unit/hive_metastore/tables/external_no_sync.json +++ b/tests/unit/hive_metastore/tables/external_no_sync.json @@ -15,4 +15,4 @@ "src_table": "external_src", "dst_table": "external_dst" } -} \ No newline at end of file +} diff --git a/tests/unit/hive_metastore/tables/external_no_sync_missing_location.json b/tests/unit/hive_metastore/tables/external_no_sync_missing_location.json index 71f3c64ee7..2df0fe6ae4 100644 --- a/tests/unit/hive_metastore/tables/external_no_sync_missing_location.json +++ b/tests/unit/hive_metastore/tables/external_no_sync_missing_location.json @@ -14,4 +14,4 @@ "src_table": "external_src", "dst_table": "external_dst" } -} \ No newline at end of file +} diff --git a/tests/unit/hive_metastore/tables/external_src.json b/tests/unit/hive_metastore/tables/external_src.json index 46c29a6999..7b1c2f79e3 100644 --- a/tests/unit/hive_metastore/tables/external_src.json +++ b/tests/unit/hive_metastore/tables/external_src.json @@ -14,4 +14,4 @@ "src_table": "external_src", "dst_table": "external_dst" } -} \ No newline at end of file +} diff --git a/tests/unit/hive_metastore/tables/external_src_unsupported.json b/tests/unit/hive_metastore/tables/external_src_unsupported.json index 32b6cbbe43..4510a984d9 100644 --- a/tests/unit/hive_metastore/tables/external_src_unsupported.json +++ b/tests/unit/hive_metastore/tables/external_src_unsupported.json @@ -14,4 +14,4 @@ "src_table": "external_src", "dst_table": "external_dst" } -} \ No newline at end of file +} diff --git a/tests/unit/hive_metastore/tables/managed_dbfs.json b/tests/unit/hive_metastore/tables/managed_dbfs.json index 5b9bb84b90..1bac3c396a 100644 --- a/tests/unit/hive_metastore/tables/managed_dbfs.json +++ b/tests/unit/hive_metastore/tables/managed_dbfs.json @@ -15,4 +15,4 @@ "src_table": "managed_dbfs", "dst_table": "managed_dbfs" } -} \ No newline at end of file +} diff --git a/tests/unit/hive_metastore/tables/managed_mnt.json b/tests/unit/hive_metastore/tables/managed_mnt.json index d7b96db341..cc93561cc8 100644 --- a/tests/unit/hive_metastore/tables/managed_mnt.json +++ b/tests/unit/hive_metastore/tables/managed_mnt.json @@ -15,4 +15,4 @@ "src_table": "managed_mnt", "dst_table": "managed_mnt" } -} \ No newline at end of file +} diff --git a/tests/unit/hive_metastore/tables/managed_other.json b/tests/unit/hive_metastore/tables/managed_other.json index 3d97e942c6..3078b3b776 100644 --- a/tests/unit/hive_metastore/tables/managed_other.json +++ b/tests/unit/hive_metastore/tables/managed_other.json @@ -15,4 +15,4 @@ "src_table": "managed_other", "dst_table": "managed_other" } -} \ No newline at end of file +} diff --git a/tests/unit/hive_metastore/tables/tables_and_views.json b/tests/unit/hive_metastore/tables/tables_and_views.json index 0c9abd64c9..3435fd52bc 100644 --- a/tests/unit/hive_metastore/tables/tables_and_views.json +++ b/tests/unit/hive_metastore/tables/tables_and_views.json @@ -91,4 +91,4 @@ } -] \ No newline at end of file +] diff --git a/tests/unit/hive_metastore/tables/view.json b/tests/unit/hive_metastore/tables/view.json index d699377c8a..24c6efabcc 100644 --- a/tests/unit/hive_metastore/tables/view.json +++ b/tests/unit/hive_metastore/tables/view.json @@ -15,4 +15,4 @@ "src_table": "view_src", "dst_table": "view_dst" } -} \ No newline at end of file +} diff --git a/tests/unit/source_code/samples/3_SparkR_Fine Grained Demand Forecasting.r b/tests/unit/source_code/samples/3_SparkR_Fine Grained Demand Forecasting.r index 00d4fa9aa0..1cc70187b2 100644 --- a/tests/unit/source_code/samples/3_SparkR_Fine Grained Demand Forecasting.r +++ b/tests/unit/source_code/samples/3_SparkR_Fine Grained Demand Forecasting.r @@ -517,4 +517,4 @@ saveAsTable(forecast_evals, "forecast_evals_sparkr", "delta", "append", mergeSch # MAGIC | library | description | license | source | # MAGIC |----------------------------------------|-------------------------|------------|-----------------------------------------------------| # MAGIC | prophet |Implements a procedure for forecasting time series data based on an additive model | MIT | https://cran.r-project.org/web/packages/prophet/index.html | -# MAGIC | Metrics | An implementation of evaluation metrics in R that are commonly used in supervised machine learning | BSD 3 | https://cran.r-project.org/web/packages/Metrics/index.html | +# MAGIC | Metrics | An implementation of evaluation metrics in R that are commonly used in supervised machine learning | BSD 3 | https://cran.r-project.org/web/packages/Metrics/index.html | diff --git a/ucx.iml b/ucx.iml index b714dec3e7..5246a84846 100644 --- a/ucx.iml +++ b/ucx.iml @@ -10,4 +10,4 @@ - \ No newline at end of file +