Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix issues when running solacc #1902

Merged
merged 27 commits into from
Jun 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
74095eb
rename tree.root to tree.node
ericvergnaud Jun 12, 2024
62cb196
add root property to tree
ericvergnaud Jun 12, 2024
36d6dbb
successful unit tests
ericvergnaud Jun 12, 2024
b3b5140
use named parameters from sate, not from context
ericvergnaud Jun 12, 2024
0f081be
integrate session state to linters
ericvergnaud Jun 12, 2024
018eb26
Merge branch 'main' into infer-linted-values-that-resolve-to-dbutils.…
ericvergnaud Jun 12, 2024
84e13a9
drop obsolete file
ericvergnaud Jun 12, 2024
8fce5df
Merge branch 'infer-linted-values-that-resolve-to-dbutils.widgets.get…
ericvergnaud Jun 12, 2024
d0117eb
integrate session state to linters and addd functional tests
ericvergnaud Jun 12, 2024
cd993e7
improve test coverage
ericvergnaud Jun 12, 2024
efb7188
improve test coverage nd formatting
ericvergnaud Jun 12, 2024
1c7aa67
improve test coverage
ericvergnaud Jun 12, 2024
521c416
simplify
ericvergnaud Jun 12, 2024
fdbdbed
refactor API
ericvergnaud Jun 13, 2024
95082c3
Merge branch 'main' into infer-linted-values-that-resolve-to-dbutils.…
ericvergnaud Jun 13, 2024
43cf134
formatting
ericvergnaud Jun 13, 2024
8b6a0e8
fix failing test
ericvergnaud Jun 13, 2024
4759117
fix failing tests
ericvergnaud Jun 13, 2024
3ff9ba6
add support for dbutils.widgets.getAll
ericvergnaud Jun 13, 2024
939b42f
fix crasher when running solacc
ericvergnaud Jun 13, 2024
6e63ee2
fix the fix
ericvergnaud Jun 13, 2024
b59cbd3
fix issue when linting sql statement 'CREATE SCHEMA xxx'
ericvergnaud Jun 13, 2024
2f0e028
Infer table names from f-strings and improve Advice messages
ericvergnaud Jun 13, 2024
4602ac1
Infer table names from f-strings and improve Advice messages
ericvergnaud Jun 13, 2024
f8f9b21
formatting
ericvergnaud Jun 13, 2024
ba2cc83
Merge branch 'main' into fix-issues-when-running-solacc
ericvergnaud Jun 14, 2024
aa63ece
increase test coverage
ericvergnaud Jun 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 15 additions & 17 deletions src/databricks/labs/ucx/source_code/linters/pyspark.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,26 +113,24 @@ def lint(
self, from_table: FromTable, index: MigrationIndex, session_state: CurrentSessionState, node: Call
) -> Iterator[Advice]:
table_arg = self._get_table_arg(node)

if not isinstance(table_arg, Const):
assert isinstance(node.func, Attribute) # always true, avoids a pylint warning
yield Advisory.from_node(
table_name = table_arg.as_string().strip("'").strip('"')
for inferred in Tree(table_arg).infer_values(session_state):
if not inferred.is_inferred():
yield Advisory.from_node(
code='table-migrate',
message=f"Can't migrate '{node.as_string()}' because its table name argument cannot be computed",
node=node,
)
continue
dst = self._find_dest(index, inferred.as_string(), from_table.schema)
if dst is None:
continue
yield Deprecation.from_node(
code='table-migrate',
message=f"Can't migrate '{node.func.attrname}' because its table name argument is not a constant",
message=f"Table {table_name} is migrated to {dst.destination()} in Unity Catalog",
# SQLGlot does not propagate tokens yet. See https://github.com/tobymao/sqlglot/issues/3159
node=node,
)
return

dst = self._find_dest(index, table_arg.value, from_table.schema)
if dst is None:
return

yield Deprecation.from_node(
code='table-migrate',
message=f"Table {table_arg.value} is migrated to {dst.destination()} in Unity Catalog",
# SQLGlot does not propagate tokens yet. See https://github.com/tobymao/sqlglot/issues/3159
node=node,
)

def apply(self, from_table: FromTable, index: MigrationIndex, node: Call) -> None:
table_arg = self._get_table_arg(node)
Expand Down
7 changes: 6 additions & 1 deletion src/databricks/labs/ucx/source_code/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import logging
import sqlglot
from sqlglot.expressions import Table, Expression, Use
from sqlglot.expressions import Table, Expression, Use, Create
from databricks.labs.ucx.hive_metastore.migration_status import MigrationIndex
from databricks.labs.ucx.source_code.base import Advice, Deprecation, Fixer, Linter, CurrentSessionState

Expand Down Expand Up @@ -52,6 +52,11 @@ def lint(self, code: str) -> Iterable[Advice]:
# the schema as the table name.
self._session_state.schema = table.name
continue
if isinstance(statement, Create) and statement.kind == "SCHEMA":
# Sqlglot captures the schema name in the Create statement as a Table, with
# the schema as the db name.
self._session_state.schema = table.db
continue

# we only migrate tables in the hive_metastore catalog
if self._catalog(table) != 'hive_metastore':
Expand Down
11 changes: 11 additions & 0 deletions tests/unit/source_code/linters/test_pyspark.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,17 @@ def test_spark_no_sql(empty_index):
assert not list(sqf.lint("print(1)"))


def test_spark_dynamic_sql(empty_index):
source = """
schema="some_schema"
df4.write.saveAsTable(f"{schema}.member_measure")
"""
session_state = CurrentSessionState()
ftf = FromTable(empty_index, session_state)
sqf = SparkSql(ftf, empty_index, session_state)
assert not list(sqf.lint(source))


def test_spark_sql_no_match(empty_index):
session_state = CurrentSessionState()
ftf = FromTable(empty_index, session_state)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
spark.catalog.cacheTable(storageLevel=None, tableName="old.things")

## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table.
# ucx[table-migrate:+1:0:+1:30] Can't migrate 'cacheTable' because its table name argument is not a constant
# ucx[table-migrate:+1:0:+1:30] Can't migrate 'spark.catalog.cacheTable(name)' because its table name argument cannot be computed
spark.catalog.cacheTable(name)
# ucx[table-migrate:+1:0:+1:40] Can't migrate 'cacheTable' because its table name argument is not a constant
# ucx[table-migrate:+1:0:+1:40] Can't migrate 'spark.catalog.cacheTable(f'boop{stuff}')' because its table name argument cannot be computed
spark.catalog.cacheTable(f"boop{stuff}")

## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@
do_stuff_with(df)

## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table.
# ucx[table-migrate:+1:9:+1:48] Can't migrate 'createExternalTable' because its table name argument is not a constant
# ucx[table-migrate:+1:9:+1:48] Can't migrate 'spark.catalog.createExternalTable(name)' because its table name argument cannot be computed
df = spark.catalog.createExternalTable(name)
do_stuff_with(df)
# ucx[table-migrate:+1:9:+1:58] Can't migrate 'createExternalTable' because its table name argument is not a constant
# ucx[table-migrate:+1:9:+1:58] Can't migrate 'spark.catalog.createExternalTable(f'boop{stuff}')' because its table name argument cannot be computed
df = spark.catalog.createExternalTable(f"boop{stuff}")
do_stuff_with(df)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@
do_stuff_with(df)

## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table.
# ucx[table-migrate:+1:9:+1:40] Can't migrate 'createTable' because its table name argument is not a constant
# ucx[table-migrate:+1:9:+1:40] Can't migrate 'spark.catalog.createTable(name)' because its table name argument cannot be computed
df = spark.catalog.createTable(name)
do_stuff_with(df)
# ucx[table-migrate:+1:9:+1:50] Can't migrate 'createTable' because its table name argument is not a constant
# ucx[table-migrate:+1:9:+1:50] Can't migrate 'spark.catalog.createTable(f'boop{stuff}')' because its table name argument cannot be computed
df = spark.catalog.createTable(f"boop{stuff}")
do_stuff_with(df)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
do_stuff_with(table)

## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table.
# ucx[table-migrate:+1:12:+1:40] Can't migrate 'getTable' because its table name argument is not a constant
# ucx[table-migrate:+1:12:+1:40] Can't migrate 'spark.catalog.getTable(name)' because its table name argument cannot be computed
table = spark.catalog.getTable(name)
do_stuff_with(table)
# ucx[table-migrate:+1:12:+1:50] Can't migrate 'getTable' because its table name argument is not a constant
# ucx[table-migrate:+1:12:+1:50] Can't migrate 'spark.catalog.getTable(f'boop{stuff}')' because its table name argument cannot be computed
table = spark.catalog.getTable(f"boop{stuff}")
do_stuff_with(table)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
cached_previously = spark.catalog.isCached("old.things", "extra-argument")

## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table.
# ucx[table-migrate:+1:24:+1:52] Can't migrate 'isCached' because its table name argument is not a constant
# ucx[table-migrate:+1:24:+1:52] Can't migrate 'spark.catalog.isCached(name)' because its table name argument cannot be computed
cached_previously = spark.catalog.isCached(name)
# ucx[table-migrate:+1:24:+1:62] Can't migrate 'isCached' because its table name argument is not a constant
# ucx[table-migrate:+1:24:+1:62] Can't migrate 'spark.catalog.isCached(f'boop{stuff}')' because its table name argument cannot be computed
cached_previously = spark.catalog.isCached(f"boop{stuff}")

## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@
columns = spark.catalog.listColumns(dbName="old", name="things")

## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table.
# ucx[table-migrate:+1:14:+1:45] Can't migrate 'listColumns' because its table name argument is not a constant
# ucx[table-migrate:+1:14:+1:45] Can't migrate 'spark.catalog.listColumns(name)' because its table name argument cannot be computed
columns = spark.catalog.listColumns(name)
# ucx[table-migrate:+1:14:+1:55] Can't migrate 'listColumns' because its table name argument is not a constant
# ucx[table-migrate:+1:14:+1:55] Can't migrate 'spark.catalog.listColumns(f'boop{stuff}')' because its table name argument cannot be computed
columns = spark.catalog.listColumns(f"boop{stuff}")

## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,12 @@
## Some calls that use a variable whose value is unknown: they could potentially reference a migrated database.
# ucx[table-migrate:+3:13:+3:43] Call to 'listTables' will return a list of <catalog>.<database>.<table> instead of <database>.<table>.
## TODO: The following isn't yet implemented:
## ucx[table-migrate:+1:13:+1:0] Can't migrate 'listTables' because its database name argument is not a constant
## ucx[table-migrate:+1:13:+1:0] Can't migrate 'listTables' because its database name argument cannot be computed
for table in spark.catalog.listTables(name):
do_stuff_with_table(table)
# ucx[table-migrate:+3:13:+3:53] Call to 'listTables' will return a list of <catalog>.<database>.<table> instead of <database>.<table>.
## TODO: The following isn't yet implemented:
## ucx[table-migrate:+1:13:+1:0] Can't migrate 'listTables' because its database name argument is not a constant
## ucx[table-migrate:+1:13:+1:0] Can't migrate 'listTables' because its database name argument cannot be computed
for table in spark.catalog.listTables(f"boop{stuff}"):
do_stuff_with_table(table)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
spark.catalog.recoverPartitions("old.things", "extra-argument")

## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table.
# ucx[table-migrate:+1:4:+1:41] Can't migrate 'recoverPartitions' because its table name argument is not a constant
# ucx[table-migrate:+1:4:+1:41] Can't migrate 'spark.catalog.recoverPartitions(name)' because its table name argument cannot be computed
spark.catalog.recoverPartitions(name)
# ucx[table-migrate:+1:4:+1:51] Can't migrate 'recoverPartitions' because its table name argument is not a constant
# ucx[table-migrate:+1:4:+1:51] Can't migrate 'spark.catalog.recoverPartitions(f'boop{stuff}')' because its table name argument cannot be computed
spark.catalog.recoverPartitions(f"boop{stuff}")

## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
spark.catalog.refreshTable("old.things", "extra-argument")

## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table.
# ucx[table-migrate:+1:4:+1:36] Can't migrate 'refreshTable' because its table name argument is not a constant
# ucx[table-migrate:+1:4:+1:36] Can't migrate 'spark.catalog.refreshTable(name)' because its table name argument cannot be computed
spark.catalog.refreshTable(name)
# ucx[table-migrate:+1:4:+1:46] Can't migrate 'refreshTable' because its table name argument is not a constant
# ucx[table-migrate:+1:4:+1:46] Can't migrate 'spark.catalog.refreshTable(f'boop{stuff}')' because its table name argument cannot be computed
spark.catalog.refreshTable(f"boop{stuff}")

## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@
pass

## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table.
# ucx[table-migrate:+1:7:+1:38] Can't migrate 'tableExists' because its table name argument is not a constant
# ucx[table-migrate:+1:7:+1:38] Can't migrate 'spark.catalog.tableExists(name)' because its table name argument cannot be computed
if spark.catalog.tableExists(name):
pass
# ucx[table-migrate:+1:7:+1:48] Can't migrate 'tableExists' because its table name argument is not a constant
# ucx[table-migrate:+1:7:+1:48] Can't migrate 'spark.catalog.tableExists(f'boot{stuff}')' because its table name argument cannot be computed
if spark.catalog.tableExists(f"boot{stuff}"):
pass

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
spark.catalog.uncacheTable("old.things", "extra-argument")

## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table.
# ucx[table-migrate:+1:4:+1:36] Can't migrate 'uncacheTable' because its table name argument is not a constant
# ucx[table-migrate:+1:4:+1:36] Can't migrate 'spark.catalog.uncacheTable(name)' because its table name argument cannot be computed
spark.catalog.uncacheTable(name)
# ucx[table-migrate:+1:4:+1:46] Can't migrate 'uncacheTable' because its table name argument is not a constant
# ucx[table-migrate:+1:4:+1:46] Can't migrate 'spark.catalog.uncacheTable(f'boop{stuff}')' because its table name argument cannot be computed
spark.catalog.uncacheTable(f"boop{stuff}")

## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
df.write.insertInto(overwrite=None, tableName="old.things")

## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table.
# ucx[table-migrate:+1:4:+1:29] Can't migrate 'insertInto' because its table name argument is not a constant
# ucx[table-migrate:+1:4:+1:29] Can't migrate 'df.write.insertInto(name)' because its table name argument cannot be computed
df.write.insertInto(name)
# ucx[table-migrate:+1:4:+1:39] Can't migrate 'insertInto' because its table name argument is not a constant
# ucx[table-migrate:+1:4:+1:39] Can't migrate 'df.write.insertInto(f'boop{stuff}')' because its table name argument cannot be computed
df.write.insertInto(f"boop{stuff}")

## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
df.write.saveAsTable(format="xyz", name="old.things")

## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table.
# ucx[table-migrate:+1:4:+1:46] Can't migrate 'saveAsTable' because its table name argument is not a constant
# ucx[table-migrate:+1:4:+1:46] Can't migrate 'df.write.format('delta').saveAsTable(name)' because its table name argument cannot be computed
df.write.format("delta").saveAsTable(name)
# ucx[table-migrate:+1:4:+1:56] Can't migrate 'saveAsTable' because its table name argument is not a constant
# ucx[table-migrate:+1:4:+1:56] Can't migrate 'df.write.format('delta').saveAsTable(f'boop{stuff}')' because its table name argument cannot be computed
df.write.format("delta").saveAsTable(f"boop{stuff}")

## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@
do_stuff_with(df)

## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table.
# ucx[table-migrate:+3:9:+3:26] Can't migrate 'table' because its table name argument is not a constant
# ucx[table-migrate:+3:9:+3:26] Can't migrate 'spark.table(name)' because its table name argument cannot be computed
# TODO: Fix false positive:
# ucx[table-migrate:+1:9:+1:26] The default format changed in Databricks Runtime 8.0, from Parquet to Delta
df = spark.table(name)
do_stuff_with(df)
# ucx[table-migrate:+3:9:+3:36] Can't migrate 'table' because its table name argument is not a constant
# ucx[table-migrate:+3:9:+3:36] Can't migrate 'spark.table(f'boop{stuff}')' because its table name argument cannot be computed
# TODO: Fix false positive:
# ucx[table-migrate:+1:9:+1:36] The default format changed in Databricks Runtime 8.0, from Parquet to Delta
df = spark.table(f"boop{stuff}")
Expand Down
8 changes: 8 additions & 0 deletions tests/unit/source_code/test_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,11 @@ def test_use_database_stops_migration(migration_index):
)
transformed_query = ftf.apply(old_query)
assert transformed_query == new_query


def test_parses_create_schema(migration_index):
query = "CREATE SCHEMA xyz"
session_state = CurrentSessionState(schema="old")
ftf = FromTable(migration_index, session_state=session_state)
advices = ftf.lint(query)
assert not list(advices)
Loading