Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions superset/db_engine_specs/athena.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

from flask_babel import gettext as __
from sqlalchemy import types
from sqlalchemy.engine.url import URL

from superset.constants import TimeGrain
from superset.db_engine_specs.base import BaseEngineSpec
Expand All @@ -38,6 +39,7 @@ class AthenaEngineSpec(BaseEngineSpec):
disable_ssh_tunneling = True
# Athena doesn't support IS true/false syntax, use = true/false instead
use_equality_for_boolean_filters = True
supports_dynamic_schema = True

_time_grain_expressions = {
None: "{col}",
Expand Down Expand Up @@ -92,3 +94,39 @@ def _mutate_label(label: str) -> str:
:return: Conditionally mutated label
"""
return label.lower()

@classmethod
def adjust_engine_params(
cls,
uri: URL,
connect_args: dict[str, Any],
catalog: str | None = None,
schema: str | None = None,
) -> tuple[URL, dict[str, Any]]:
"""
Adjust the SQLAlchemy URI for Athena with a provided schema.

For AWS Athena the SQLAlchemy URI looks like this:

awsathena+rest://{aws_access_key_id}:{aws_secret_access_key}@athena.{region_name}.amazonaws.com:443/{schema_name}?s3_staging_dir={s3_staging_dir}&...
"""
if not schema:
return uri, connect_args

uri = uri.set(database=schema)
return uri, connect_args
Comment thread
ishmulyan marked this conversation as resolved.
Outdated

@classmethod
def get_schema_from_engine_params(
cls,
sqlalchemy_uri: URL,
connect_args: dict[str, Any],
) -> str | None:
"""
Return the configured schema.

For AWS Athena the SQLAlchemy URI looks like this:

awsathena+rest://{aws_access_key_id}:{aws_secret_access_key}@athena.{region_name}.amazonaws.com:443/{schema_name}?s3_staging_dir={s3_staging_dir}&...
"""
return sqlalchemy_uri.database

Copilot AI Nov 5, 2025

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The get_schema_from_engine_params method should handle the case where sqlalchemy_uri.database is None or an empty string. Without this check, accessing .database on a URL without a database component could lead to issues. Consider adding a guard to return None if database is falsy, similar to how Presto and Snowflake handle this (they check if \"/\" not in database).

Suggested change
return sqlalchemy_uri.database
database = sqlalchemy_uri.database
if not database:
return None
return database

Copilot uses AI. Check for mistakes.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In case sqlalchemy_uri.database is None the None is returned, no need to return None explicitly. No additional checks or guards needed.
Presto and Snowflake checks whether database contains /. Because they have different sqlalchemy connection strings and as a result a database could look like {caatalog}/{schema}. In case sqlalchemy_uri.database doesn't contain / it means no schemas provided and None is returned.

Copilot AI Nov 5, 2025

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider adding explicit handling for empty strings to match the pattern used in similar implementations. The current code returns the database attribute directly, but if the database is an empty string, it should return None instead. Add: return sqlalchemy_uri.database or None to ensure consistent behavior.

Suggested change
return sqlalchemy_uri.database
return sqlalchemy_uri.database or None

Copilot uses AI. Check for mistakes.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not see the same pattern over the codebase and don't think it's needed. Could you point me out to the code?

57 changes: 57 additions & 0 deletions tests/unit_tests/db_engine_specs/test_athena.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from typing import Optional

import pytest
from sqlalchemy.engine.url import make_url

from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
from tests.unit_tests.db_engine_specs.utils import assert_convert_dttm
Expand Down Expand Up @@ -120,3 +121,59 @@ def test_handle_boolean_filter() -> None:
str(result_false.compile(compile_kwargs={"literal_binds": True}))
== "test_col = false"
)


def test_adjust_engine_params() -> None:
"""
Test `adjust_engine_params`.

The method can be used to adjust the schema dynamically.
"""
from superset.db_engine_specs.athena import AthenaEngineSpec

url = make_url(
"awsathena+rest://athena.us-east-1.amazonaws.com:443/default?s3_staging_dir=s3%3A%2F%2Fathena-staging"
)

uri = AthenaEngineSpec.adjust_engine_params(url, {})[0]
assert (
str(uri)
== "awsathena+rest://athena.us-east-1.amazonaws.com:443/default?s3_staging_dir=s3%3A%2F%2Fathena-staging"
)

uri = AthenaEngineSpec.adjust_engine_params(
url,
{},
schema="new_schema",
)[0]
assert (
str(uri)
== "awsathena+rest://athena.us-east-1.amazonaws.com:443/new_schema?s3_staging_dir=s3%3A%2F%2Fathena-staging"
)


def test_get_schema_from_engine_params() -> None:
"""
Test the ``get_schema_from_engine_params`` method.
"""
from superset.db_engine_specs.athena import AthenaEngineSpec

assert (
AthenaEngineSpec.get_schema_from_engine_params(
make_url(
"awsathena+rest://athena.us-east-1.amazonaws.com:443/default?s3_staging_dir=s3%3A%2F%2Fathena-staging"
),
{},
)
== "default"
)

assert (
AthenaEngineSpec.get_schema_from_engine_params(
make_url(
"awsathena+rest://athena.us-east-1.amazonaws.com:443?s3_staging_dir=s3%3A%2F%2Fathena-staging"
),
{},
)
is None
)
Loading