Skip to content

Commit

Permalink
Fix grant visibility and classification (#1911)
Browse files Browse the repository at this point in the history
## Changes

This PR follows on from #1903 and fixes some problems in the underlying
inventory for grants and their details:

 - The `grant_details` view does not cover grants at the catalog level.
 - The object type for UDF and view grants is misclassified.

(These problems in the underlying inventory were present prior to #1903
but latent because they weren't directly exposed by the dashboard.)

### Functionality 

- [ ] added relevant user documentation
- [ ] added new CLI command
- [ ] modified existing command: `databricks labs ucx ...`
- [ ] added a new workflow
- [X] modified existing workflow: `assessment`
- [ ] added a new table
- [X] modified existing view: `grant_detail`

### Tests

- [X] manually tested
- [X] added unit tests
- [X] added integration tests
- [X] verified on staging environment (screenshot attached)
  • Loading branch information
asnare authored Jun 25, 2024
1 parent 78142ed commit 1a8c507
Show file tree
Hide file tree
Showing 6 changed files with 109 additions and 43 deletions.
34 changes: 22 additions & 12 deletions src/databricks/labs/ucx/hive_metastore/grants.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,8 +246,12 @@ def _crawl(self) -> Iterable[Grant]:
tasks.append(partial(self.grants, catalog=catalog, database=database))
for table in self._tc.snapshot():
fn = partial(self.grants, catalog=catalog, database=table.database)
# views are recognized as tables
tasks.append(partial(fn, table=table.name))
# Views are treated as a type of table and enumerated by the table crawler.
if table.view_text is None:
task = partial(fn, table=table.name)
else:
task = partial(fn, view=table.name)
tasks.append(task)
for udf in self._udf.snapshot():
fn = partial(self.grants, catalog=catalog, database=udf.database)
tasks.append(partial(fn, udf=udf.name))
Expand All @@ -269,6 +273,17 @@ def for_schema_info(self, schema: SchemaInfo):
principal_permissions[grant.principal].add(grant.action_type)
return principal_permissions

# The reported ObjectType for grants doesn't necessary match the type of grants we asked for. This maps
# those that aren't themselves.
_grants_reported_as = {
# SHOW type: RESULT type
"SCHEMA": "DATABASE",
"CATALOG": "CATALOG$",
"VIEW": "TABLE",
"ANY FILE": "ANY_FILE",
"ANONYMOUS FUNCTION": "ANONYMOUS_FUNCTION",
}

def grants(
self,
*,
Expand Down Expand Up @@ -318,18 +333,13 @@ def grants(
any_file=any_file,
anonymous_function=anonymous_function,
)
try: # pylint: disable=too-many-try-statements
grants = []
object_type_normalization = {
"SCHEMA": "DATABASE",
"CATALOG$": "CATALOG",
"ANY_FILE": "ANY FILE",
"ANONYMOUS_FUNCTION": "ANONYMOUS FUNCTION",
}
expected_grant_object_type = self._grants_reported_as.get(on_type, on_type)
grants = []
try:
for row in self._fetch(f"SHOW GRANTS ON {on_type} {escape_sql_identifier(key)}"):
(principal, action_type, object_type, _) = row
object_type = object_type_normalization.get(object_type, object_type)
if on_type != object_type:
# This seems to be here to help unit tests that don't sufficiently narrow the target of mocked queries. -ajs
if object_type != expected_grant_object_type:
continue
# we have to return concrete list, as with yield we're executing
# everything on the main thread.
Expand Down
1 change: 1 addition & 0 deletions src/databricks/labs/ucx/hive_metastore/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,7 @@ def _crawl(self) -> Iterable[Table]:
for database in self._all_databases():
logger.debug(f"[{catalog}.{database}] listing tables")
try:
# Also returns views.
table_rows = self._fetch(
f"SHOW TABLES FROM {escape_sql_identifier(catalog)}.{escape_sql_identifier(database)}"
)
Expand Down
9 changes: 6 additions & 3 deletions src/databricks/labs/ucx/queries/views/grant_detail.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,27 @@ WITH
array('Explicitly DENYing privileges is not supported in UC.'),
array()
) as failures
from $inventory.grants where database <> split("$inventory",'[.]')[1]
from $inventory.grants where database is null or database <> split("$inventory",'[.]')[1]
)
SELECT
CASE
WHEN anonymous_function THEN 'ANONYMOUS FUNCTION'
WHEN any_file THEN 'ANY FILE'
WHEN view IS NOT NULL THEN 'VIEW'
WHEN table IS NOT NULL THEN 'TABLE'
WHEN udf IS NOT NULL THEN 'UDF'
WHEN database IS NOT NULL THEN 'DATABASE'
WHEN catalog IS NOT NULL THEN 'CATALOG'
WHEN udf IS NOT NULL THEN 'UDF'
ELSE 'UNKNOWN'
END AS object_type,
CASE
WHEN anonymous_function THEN NULL
WHEN any_file THEN NULL
WHEN view IS NOT NULL THEN CONCAT(catalog, '.', database, '.', view)
WHEN table IS NOT NULL THEN CONCAT(catalog, '.', database, '.', table)
WHEN udf IS NOT NULL THEN CONCAT(catalog, '.', database, '.', udf)
WHEN database IS NOT NULL THEN CONCAT(catalog, '.', database)
WHEN catalog IS NOT NULL THEN catalog
WHEN udf IS NOT NULL THEN CONCAT(catalog, '.', database, '.', udf)
ELSE 'UNKNOWN'
END AS object_id,
action_type,
Expand All @@ -41,7 +41,10 @@ SELECT
catalog,
database,
table,
view,
udf,
any_file,
anonymous_function,
if(size(failures) < 1, 1, 0) as success,
to_json(failures) as failures
FROM raw
49 changes: 49 additions & 0 deletions tests/integration/hive_metastore/test_grant_detail.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,55 @@ def _deployed_schema(runtime_ctx) -> None:
deploy_schema(runtime_ctx.sql_backend, runtime_ctx.inventory_database)


@retried(on=[NotFound, TimeoutError], timeout=dt.timedelta(minutes=3))
def test_all_grant_types(
runtime_ctx: TestRuntimeContext, sql_backend: StatementExecutionBackend, _deployed_schema: None
):
"""Test that all types of grants are properly handled by the view when reporting the object type and identifier."""

# Fixture: a group and schema to hold all the objects, the objects themselves and a grant on each to the group.
group = runtime_ctx.make_group()
schema = runtime_ctx.make_schema()
table = runtime_ctx.make_table(schema_name=schema.name)
view = runtime_ctx.make_table(schema_name=schema.name, view=True, ctas="select 1")
udf = runtime_ctx.make_udf(schema_name=schema.name)
sql_backend.execute(f"GRANT SELECT ON CATALOG {schema.catalog_name} TO `{group.display_name}`")
sql_backend.execute(f"GRANT SELECT ON SCHEMA {schema.full_name} TO `{group.display_name}`")
sql_backend.execute(f"GRANT SELECT ON TABLE {table.full_name} TO `{group.display_name}`")
sql_backend.execute(f"GRANT SELECT ON VIEW {view.full_name} TO `{group.display_name}`")
sql_backend.execute(f"GRANT SELECT ON FUNCTION {udf.full_name} TO `{group.display_name}`")
sql_backend.execute(f"GRANT SELECT ON ANY FILE TO `{group.display_name}`")
sql_backend.execute(f"GRANT SELECT ON ANONYMOUS FUNCTION TO `{group.display_name}`")

# Ensure the view is populated (it's based on the crawled grants) and fetch the content.
GrantsCrawler(runtime_ctx.tables_crawler, runtime_ctx.udfs_crawler).snapshot()

rows = list(
sql_backend.fetch(
f"""
SELECT object_type, object_id
FROM {runtime_ctx.inventory_database}.grant_detail
WHERE principal_type='group' AND principal='{group.display_name}' and action_type='SELECT'
"""
)
)
grants = {(row.object_type, row.object_id) for row in rows}

# TODO: The types of objects targeted by grants is missclassified; this needs to be fixed.

# Test the results.
expected_grants = {
("TABLE", table.full_name),
("VIEW", view.full_name),
("DATABASE", schema.full_name),
("CATALOG", schema.catalog_name),
("UDF", udf.full_name),
("ANY FILE", None),
("ANONYMOUS FUNCTION", None),
}
assert grants == expected_grants


@retried(on=[NotFound, TimeoutError], timeout=dt.timedelta(minutes=3))
def test_grant_findings(
runtime_ctx: TestRuntimeContext, sql_backend: StatementExecutionBackend, _deployed_schema: None
Expand Down
39 changes: 21 additions & 18 deletions tests/integration/hive_metastore/test_grants.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,20 +40,21 @@ def test_all_grants_in_databases(runtime_ctx, sql_backend, make_group):
grants = GrantsCrawler(runtime_ctx.tables_crawler, runtime_ctx.udfs_crawler)

all_grants = {}
for grant in grants.snapshot():
for grant in list(grants.snapshot()):
logging.info(f"grant:\n{grant}\n hive: {grant.hive_grant_sql()}\n uc: {grant.uc_grant_sql()}")
all_grants[f"{grant.principal}.{grant.object_key}"] = grant.action_type
object_type, object_key = grant.this_type_and_key()
all_grants[f"{grant.principal}.{object_type}.{object_key}"] = grant.action_type

assert len(all_grants) >= 8, "must have at least three grants"
assert all_grants[f"{group_a.display_name}.hive_metastore.{schema_c.name}"] == "USAGE"
assert all_grants[f"{group_b.display_name}.hive_metastore.{schema_c.name}"] == "USAGE"
assert all_grants[f"{group_a.display_name}.{table_a.full_name}"] == "SELECT"
assert all_grants[f"{group_b.display_name}.{table_b.full_name}"] == "SELECT"
assert all_grants[f"{group_b.display_name}.{schema_b.full_name}"] == "MODIFY"
assert all_grants[f"{group_b.display_name}.{empty_schema.full_name}"] == "MODIFY"
assert all_grants[f"{group_b.display_name}.{view_c.full_name}"] == "MODIFY"
assert all_grants[f"{group_b.display_name}.{view_d.full_name}"] == "DENIED_MODIFY"
assert all_grants[f"{group_b.display_name}.{table_e.full_name}"] == "MODIFY"
assert len(all_grants) >= 9, "must have at least nine grants"
assert all_grants[f"{group_a.display_name}.DATABASE.hive_metastore.{schema_c.name}"] == "USAGE"
assert all_grants[f"{group_b.display_name}.DATABASE.hive_metastore.{schema_c.name}"] == "USAGE"
assert all_grants[f"{group_a.display_name}.TABLE.{table_a.full_name}"] == "SELECT"
assert all_grants[f"{group_b.display_name}.TABLE.{table_b.full_name}"] == "SELECT"
assert all_grants[f"{group_b.display_name}.DATABASE.{schema_b.full_name}"] == "MODIFY"
assert all_grants[f"{group_b.display_name}.DATABASE.{empty_schema.full_name}"] == "MODIFY"
assert all_grants[f"{group_b.display_name}.VIEW.{view_c.full_name}"] == "MODIFY"
assert all_grants[f"{group_b.display_name}.VIEW.{view_d.full_name}"] == "DENIED_MODIFY"
assert all_grants[f"{group_b.display_name}.TABLE.{table_e.full_name}"] == "MODIFY"


@retried(on=[NotFound], timeout=timedelta(minutes=3))
Expand All @@ -72,13 +73,15 @@ def test_all_grants_for_udfs_in_databases(runtime_ctx, sql_backend, make_group):

grants = GrantsCrawler(runtime_ctx.tables_crawler, runtime_ctx.udfs_crawler)

crawler_snapshot = list(grants.snapshot())
actual_grants = defaultdict(set)
for grant in grants.snapshot():
actual_grants[f"{grant.principal}.{grant.object_key}"].add(grant.action_type)
for grant in crawler_snapshot:
object_type, object_key = grant.this_type_and_key()
actual_grants[f"{grant.principal}.{object_type}.{object_key}"].add(grant.action_type)

assert {"SELECT", "READ_METADATA", "OWN"} == actual_grants[f"{group_a.display_name}.{udf_a.full_name}"]
assert {"SELECT", "READ_METADATA"} == actual_grants[f"{group_a.display_name}.{udf_b.full_name}"]
assert {"DENIED_READ_METADATA"} == actual_grants[f"{group_b.display_name}.{udf_b.full_name}"]
assert {"SELECT", "READ_METADATA", "OWN"} == actual_grants[f"{group_a.display_name}.FUNCTION.{udf_a.full_name}"]
assert {"SELECT", "READ_METADATA"} == actual_grants[f"{group_a.display_name}.FUNCTION.{udf_b.full_name}"]
assert {"DENIED_READ_METADATA"} == actual_grants[f"{group_b.display_name}.FUNCTION.{udf_b.full_name}"]


@retried(on=[NotFound], timeout=timedelta(minutes=3))
Expand All @@ -97,7 +100,7 @@ def test_all_grants_for_other_objects(
sql_backend.execute(f"DENY SELECT ON ANONYMOUS FUNCTION TO `{group_d.display_name}`")

crawler = GrantsCrawler(runtime_ctx.tables_crawler, runtime_ctx.udfs_crawler)
all_found_grants = crawler.snapshot()
all_found_grants = list(crawler.snapshot())

found_any_file_grants = defaultdict(set)
found_anonymous_function_grants = defaultdict(set)
Expand Down
20 changes: 10 additions & 10 deletions tests/unit/hive_metastore/test_grants.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def test_crawler_no_data():
table = TablesCrawler(sql_backend, "schema")
udf = UdfsCrawler(sql_backend, "schema")
crawler = GrantsCrawler(table, udf)
grants = crawler.snapshot()
grants = list(crawler.snapshot())
assert len(grants) == 0


Expand Down Expand Up @@ -212,7 +212,7 @@ def test_crawler_crawl():
table = TablesCrawler(sql_backend, "schema")
udf = UdfsCrawler(sql_backend, "schema")
crawler = GrantsCrawler(table, udf)
grants = crawler.snapshot()
grants = list(crawler.snapshot())
assert len(grants) == 3


Expand Down Expand Up @@ -240,7 +240,7 @@ def test_crawler_udf_crawl():
table = TablesCrawler(sql_backend, "schema")
udf = UdfsCrawler(sql_backend, "schema")
crawler = GrantsCrawler(table, udf)
grants = crawler.snapshot()
grants = list(crawler.snapshot())

assert len(grants) == 2
assert Grant(
Expand Down Expand Up @@ -272,7 +272,7 @@ def test_crawler_snapshot_when_no_data():
table = TablesCrawler(sql_backend, "schema")
udf = UdfsCrawler(sql_backend, "schema")
crawler = GrantsCrawler(table, udf)
snapshot = crawler.snapshot()
snapshot = list(crawler.snapshot())
assert len(snapshot) == 0


Expand All @@ -281,7 +281,7 @@ def test_crawler_snapshot_with_data():
table = TablesCrawler(sql_backend, "schema")
udf = UdfsCrawler(sql_backend, "schema")
crawler = GrantsCrawler(table, udf)
snapshot = crawler.snapshot()
snapshot = list(crawler.snapshot())
assert len(snapshot) == 3


Expand All @@ -308,7 +308,7 @@ def test_grants_returning_error_when_showing_grants():
udf = UdfsCrawler(backend, "default")
crawler = GrantsCrawler(table_crawler, udf)

results = crawler.snapshot()
results = list(crawler.snapshot())
assert results == [
Grant(
principal="principal1",
Expand Down Expand Up @@ -342,7 +342,7 @@ def test_grants_returning_error_when_describing():
udf = UdfsCrawler(backend, "default")
crawler = GrantsCrawler(table_crawler, udf)

results = crawler.snapshot()
results = list(crawler.snapshot())
assert results == [
Grant(
principal="principal1",
Expand Down Expand Up @@ -381,7 +381,7 @@ def test_udf_grants_returning_error_when_showing_grants():
udf = UdfsCrawler(backend, "default")
crawler = GrantsCrawler(table_crawler, udf)

results = crawler.snapshot()
results = list(crawler.snapshot())
assert results == [
Grant(
principal="principal1",
Expand Down Expand Up @@ -417,7 +417,7 @@ def test_udf_grants_returning_error_when_describing():
udf = UdfsCrawler(backend, "default")
crawler = GrantsCrawler(table_crawler, udf)

results = crawler.snapshot()
results = list(crawler.snapshot())
assert results == [
Grant(
principal="principal1",
Expand Down Expand Up @@ -460,6 +460,6 @@ def test_crawler_should_filter_databases():
table = TablesCrawler(sql_backend, "schema", include_databases=["database_one"])
udf = UdfsCrawler(sql_backend, "schema", include_databases=["database_one"])
crawler = GrantsCrawler(table, udf, include_databases=["database_one"])
grants = crawler.snapshot()
grants = list(crawler.snapshot())
assert len(grants) == 3
assert 'SHOW TABLES FROM hive_metastore.database_one' in sql_backend.queries

0 comments on commit 1a8c507

Please sign in to comment.