Skip to content

Commit

Permalink
Fix issues when running solacc (#1902)
Browse files Browse the repository at this point in the history
  • Loading branch information
ericvergnaud authored Jun 14, 2024
1 parent 4414463 commit 7c3dab9
Show file tree
Hide file tree
Showing 18 changed files with 68 additions and 46 deletions.
32 changes: 15 additions & 17 deletions src/databricks/labs/ucx/source_code/linters/pyspark.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,26 +113,24 @@ def lint(
self, from_table: FromTable, index: MigrationIndex, session_state: CurrentSessionState, node: Call
) -> Iterator[Advice]:
table_arg = self._get_table_arg(node)

if not isinstance(table_arg, Const):
assert isinstance(node.func, Attribute) # always true, avoids a pylint warning
yield Advisory.from_node(
table_name = table_arg.as_string().strip("'").strip('"')
for inferred in Tree(table_arg).infer_values(session_state):
if not inferred.is_inferred():
yield Advisory.from_node(
code='table-migrate',
message=f"Can't migrate '{node.as_string()}' because its table name argument cannot be computed",
node=node,
)
continue
dst = self._find_dest(index, inferred.as_string(), from_table.schema)
if dst is None:
continue
yield Deprecation.from_node(
code='table-migrate',
message=f"Can't migrate '{node.func.attrname}' because its table name argument is not a constant",
message=f"Table {table_name} is migrated to {dst.destination()} in Unity Catalog",
# SQLGlot does not propagate tokens yet. See https://github.com/tobymao/sqlglot/issues/3159
node=node,
)
return

dst = self._find_dest(index, table_arg.value, from_table.schema)
if dst is None:
return

yield Deprecation.from_node(
code='table-migrate',
message=f"Table {table_arg.value} is migrated to {dst.destination()} in Unity Catalog",
# SQLGlot does not propagate tokens yet. See https://github.com/tobymao/sqlglot/issues/3159
node=node,
)

def apply(self, from_table: FromTable, index: MigrationIndex, node: Call) -> None:
table_arg = self._get_table_arg(node)
Expand Down
7 changes: 6 additions & 1 deletion src/databricks/labs/ucx/source_code/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import logging
import sqlglot
from sqlglot.expressions import Table, Expression, Use
from sqlglot.expressions import Table, Expression, Use, Create
from databricks.labs.ucx.hive_metastore.migration_status import MigrationIndex
from databricks.labs.ucx.source_code.base import Advice, Deprecation, Fixer, Linter, CurrentSessionState

Expand Down Expand Up @@ -52,6 +52,11 @@ def lint(self, code: str) -> Iterable[Advice]:
# the schema as the table name.
self._session_state.schema = table.name
continue
if isinstance(statement, Create) and statement.kind == "SCHEMA":
# Sqlglot captures the schema name in the Create statement as a Table, with
# the schema as the db name.
self._session_state.schema = table.db
continue

# we only migrate tables in the hive_metastore catalog
if self._catalog(table) != 'hive_metastore':
Expand Down
11 changes: 11 additions & 0 deletions tests/unit/source_code/linters/test_pyspark.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,17 @@ def test_spark_no_sql(empty_index):
assert not list(sqf.lint("print(1)"))


def test_spark_dynamic_sql(empty_index):
source = """
schema="some_schema"
df4.write.saveAsTable(f"{schema}.member_measure")
"""
session_state = CurrentSessionState()
ftf = FromTable(empty_index, session_state)
sqf = SparkSql(ftf, empty_index, session_state)
assert not list(sqf.lint(source))


def test_spark_sql_no_match(empty_index):
session_state = CurrentSessionState()
ftf = FromTable(empty_index, session_state)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
spark.catalog.cacheTable(storageLevel=None, tableName="old.things")

## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table.
# ucx[table-migrate:+1:0:+1:30] Can't migrate 'cacheTable' because its table name argument is not a constant
# ucx[table-migrate:+1:0:+1:30] Can't migrate 'spark.catalog.cacheTable(name)' because its table name argument cannot be computed
spark.catalog.cacheTable(name)
# ucx[table-migrate:+1:0:+1:40] Can't migrate 'cacheTable' because its table name argument is not a constant
# ucx[table-migrate:+1:0:+1:40] Can't migrate 'spark.catalog.cacheTable(f'boop{stuff}')' because its table name argument cannot be computed
spark.catalog.cacheTable(f"boop{stuff}")

## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@
do_stuff_with(df)

## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table.
# ucx[table-migrate:+1:9:+1:48] Can't migrate 'createExternalTable' because its table name argument is not a constant
# ucx[table-migrate:+1:9:+1:48] Can't migrate 'spark.catalog.createExternalTable(name)' because its table name argument cannot be computed
df = spark.catalog.createExternalTable(name)
do_stuff_with(df)
# ucx[table-migrate:+1:9:+1:58] Can't migrate 'createExternalTable' because its table name argument is not a constant
# ucx[table-migrate:+1:9:+1:58] Can't migrate 'spark.catalog.createExternalTable(f'boop{stuff}')' because its table name argument cannot be computed
df = spark.catalog.createExternalTable(f"boop{stuff}")
do_stuff_with(df)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@
do_stuff_with(df)

## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table.
# ucx[table-migrate:+1:9:+1:40] Can't migrate 'createTable' because its table name argument is not a constant
# ucx[table-migrate:+1:9:+1:40] Can't migrate 'spark.catalog.createTable(name)' because its table name argument cannot be computed
df = spark.catalog.createTable(name)
do_stuff_with(df)
# ucx[table-migrate:+1:9:+1:50] Can't migrate 'createTable' because its table name argument is not a constant
# ucx[table-migrate:+1:9:+1:50] Can't migrate 'spark.catalog.createTable(f'boop{stuff}')' because its table name argument cannot be computed
df = spark.catalog.createTable(f"boop{stuff}")
do_stuff_with(df)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
do_stuff_with(table)

## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table.
# ucx[table-migrate:+1:12:+1:40] Can't migrate 'getTable' because its table name argument is not a constant
# ucx[table-migrate:+1:12:+1:40] Can't migrate 'spark.catalog.getTable(name)' because its table name argument cannot be computed
table = spark.catalog.getTable(name)
do_stuff_with(table)
# ucx[table-migrate:+1:12:+1:50] Can't migrate 'getTable' because its table name argument is not a constant
# ucx[table-migrate:+1:12:+1:50] Can't migrate 'spark.catalog.getTable(f'boop{stuff}')' because its table name argument cannot be computed
table = spark.catalog.getTable(f"boop{stuff}")
do_stuff_with(table)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
cached_previously = spark.catalog.isCached("old.things", "extra-argument")

## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table.
# ucx[table-migrate:+1:24:+1:52] Can't migrate 'isCached' because its table name argument is not a constant
# ucx[table-migrate:+1:24:+1:52] Can't migrate 'spark.catalog.isCached(name)' because its table name argument cannot be computed
cached_previously = spark.catalog.isCached(name)
# ucx[table-migrate:+1:24:+1:62] Can't migrate 'isCached' because its table name argument is not a constant
# ucx[table-migrate:+1:24:+1:62] Can't migrate 'spark.catalog.isCached(f'boop{stuff}')' because its table name argument cannot be computed
cached_previously = spark.catalog.isCached(f"boop{stuff}")

## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@
columns = spark.catalog.listColumns(dbName="old", name="things")

## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table.
# ucx[table-migrate:+1:14:+1:45] Can't migrate 'listColumns' because its table name argument is not a constant
# ucx[table-migrate:+1:14:+1:45] Can't migrate 'spark.catalog.listColumns(name)' because its table name argument cannot be computed
columns = spark.catalog.listColumns(name)
# ucx[table-migrate:+1:14:+1:55] Can't migrate 'listColumns' because its table name argument is not a constant
# ucx[table-migrate:+1:14:+1:55] Can't migrate 'spark.catalog.listColumns(f'boop{stuff}')' because its table name argument cannot be computed
columns = spark.catalog.listColumns(f"boop{stuff}")

## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,12 @@
## Some calls that use a variable whose value is unknown: they could potentially reference a migrated database.
# ucx[table-migrate:+3:13:+3:43] Call to 'listTables' will return a list of <catalog>.<database>.<table> instead of <database>.<table>.
## TODO: The following isn't yet implemented:
## ucx[table-migrate:+1:13:+1:0] Can't migrate 'listTables' because its database name argument is not a constant
## ucx[table-migrate:+1:13:+1:0] Can't migrate 'listTables' because its database name argument cannot be computed
for table in spark.catalog.listTables(name):
do_stuff_with_table(table)
# ucx[table-migrate:+3:13:+3:53] Call to 'listTables' will return a list of <catalog>.<database>.<table> instead of <database>.<table>.
## TODO: The following isn't yet implemented:
## ucx[table-migrate:+1:13:+1:0] Can't migrate 'listTables' because its database name argument is not a constant
## ucx[table-migrate:+1:13:+1:0] Can't migrate 'listTables' because its database name argument cannot be computed
for table in spark.catalog.listTables(f"boop{stuff}"):
do_stuff_with_table(table)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
spark.catalog.recoverPartitions("old.things", "extra-argument")

## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table.
# ucx[table-migrate:+1:4:+1:41] Can't migrate 'recoverPartitions' because its table name argument is not a constant
# ucx[table-migrate:+1:4:+1:41] Can't migrate 'spark.catalog.recoverPartitions(name)' because its table name argument cannot be computed
spark.catalog.recoverPartitions(name)
# ucx[table-migrate:+1:4:+1:51] Can't migrate 'recoverPartitions' because its table name argument is not a constant
# ucx[table-migrate:+1:4:+1:51] Can't migrate 'spark.catalog.recoverPartitions(f'boop{stuff}')' because its table name argument cannot be computed
spark.catalog.recoverPartitions(f"boop{stuff}")

## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
spark.catalog.refreshTable("old.things", "extra-argument")

## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table.
# ucx[table-migrate:+1:4:+1:36] Can't migrate 'refreshTable' because its table name argument is not a constant
# ucx[table-migrate:+1:4:+1:36] Can't migrate 'spark.catalog.refreshTable(name)' because its table name argument cannot be computed
spark.catalog.refreshTable(name)
# ucx[table-migrate:+1:4:+1:46] Can't migrate 'refreshTable' because its table name argument is not a constant
# ucx[table-migrate:+1:4:+1:46] Can't migrate 'spark.catalog.refreshTable(f'boop{stuff}')' because its table name argument cannot be computed
spark.catalog.refreshTable(f"boop{stuff}")

## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@
pass

## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table.
# ucx[table-migrate:+1:7:+1:38] Can't migrate 'tableExists' because its table name argument is not a constant
# ucx[table-migrate:+1:7:+1:38] Can't migrate 'spark.catalog.tableExists(name)' because its table name argument cannot be computed
if spark.catalog.tableExists(name):
pass
# ucx[table-migrate:+1:7:+1:48] Can't migrate 'tableExists' because its table name argument is not a constant
# ucx[table-migrate:+1:7:+1:48] Can't migrate 'spark.catalog.tableExists(f'boot{stuff}')' because its table name argument cannot be computed
if spark.catalog.tableExists(f"boot{stuff}"):
pass

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
spark.catalog.uncacheTable("old.things", "extra-argument")

## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table.
# ucx[table-migrate:+1:4:+1:36] Can't migrate 'uncacheTable' because its table name argument is not a constant
# ucx[table-migrate:+1:4:+1:36] Can't migrate 'spark.catalog.uncacheTable(name)' because its table name argument cannot be computed
spark.catalog.uncacheTable(name)
# ucx[table-migrate:+1:4:+1:46] Can't migrate 'uncacheTable' because its table name argument is not a constant
# ucx[table-migrate:+1:4:+1:46] Can't migrate 'spark.catalog.uncacheTable(f'boop{stuff}')' because its table name argument cannot be computed
spark.catalog.uncacheTable(f"boop{stuff}")

## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
df.write.insertInto(overwrite=None, tableName="old.things")

## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table.
# ucx[table-migrate:+1:4:+1:29] Can't migrate 'insertInto' because its table name argument is not a constant
# ucx[table-migrate:+1:4:+1:29] Can't migrate 'df.write.insertInto(name)' because its table name argument cannot be computed
df.write.insertInto(name)
# ucx[table-migrate:+1:4:+1:39] Can't migrate 'insertInto' because its table name argument is not a constant
# ucx[table-migrate:+1:4:+1:39] Can't migrate 'df.write.insertInto(f'boop{stuff}')' because its table name argument cannot be computed
df.write.insertInto(f"boop{stuff}")

## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
df.write.saveAsTable(format="xyz", name="old.things")

## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table.
# ucx[table-migrate:+1:4:+1:46] Can't migrate 'saveAsTable' because its table name argument is not a constant
# ucx[table-migrate:+1:4:+1:46] Can't migrate 'df.write.format('delta').saveAsTable(name)' because its table name argument cannot be computed
df.write.format("delta").saveAsTable(name)
# ucx[table-migrate:+1:4:+1:56] Can't migrate 'saveAsTable' because its table name argument is not a constant
# ucx[table-migrate:+1:4:+1:56] Can't migrate 'df.write.format('delta').saveAsTable(f'boop{stuff}')' because its table name argument cannot be computed
df.write.format("delta").saveAsTable(f"boop{stuff}")

## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@
do_stuff_with(df)

## Some calls that use a variable whose value is unknown: they could potentially reference a migrated table.
# ucx[table-migrate:+3:9:+3:26] Can't migrate 'table' because its table name argument is not a constant
# ucx[table-migrate:+3:9:+3:26] Can't migrate 'spark.table(name)' because its table name argument cannot be computed
# TODO: Fix false positive:
# ucx[table-migrate:+1:9:+1:26] The default format changed in Databricks Runtime 8.0, from Parquet to Delta
df = spark.table(name)
do_stuff_with(df)
# ucx[table-migrate:+3:9:+3:36] Can't migrate 'table' because its table name argument is not a constant
# ucx[table-migrate:+3:9:+3:36] Can't migrate 'spark.table(f'boop{stuff}')' because its table name argument cannot be computed
# TODO: Fix false positive:
# ucx[table-migrate:+1:9:+1:36] The default format changed in Databricks Runtime 8.0, from Parquet to Delta
df = spark.table(f"boop{stuff}")
Expand Down
8 changes: 8 additions & 0 deletions tests/unit/source_code/test_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,11 @@ def test_use_database_stops_migration(migration_index):
)
transformed_query = ftf.apply(old_query)
assert transformed_query == new_query


def test_parses_create_schema(migration_index):
query = "CREATE SCHEMA xyz"
session_state = CurrentSessionState(schema="old")
ftf = FromTable(migration_index, session_state=session_state)
advices = ftf.lint(query)
assert not list(advices)

0 comments on commit 7c3dab9

Please sign in to comment.