From 732f66b54aa61ee49f5a71bcae4f17af385d9640 Mon Sep 17 00:00:00 2001 From: fabian_zse Date: Fri, 8 Aug 2025 15:42:09 +0200 Subject: [PATCH 1/5] add databricks oauth support --- docs/docs/configuration/databases.mdx | 59 ++- pyproject.toml | 4 +- superset/db_engine_specs/databricks.py | 46 ++ .../db_engine_specs/test_databricks.py | 420 +++++++++++++++++- 4 files changed, 525 insertions(+), 4 deletions(-) diff --git a/docs/docs/configuration/databases.mdx b/docs/docs/configuration/databases.mdx index 2b293e79c86a..267183b0909a 100644 --- a/docs/docs/configuration/databases.mdx +++ b/docs/docs/configuration/databases.mdx @@ -32,7 +32,7 @@ install new database drivers into your Superset configuration. ### Supported Databases and Dependencies Some of the recommended packages are shown below. Please refer to -[pyproject.toml](https://github.com/apache/superset/blob/master/pyproject.toml) for the versions that +[pyproject.toml](https://github.com/apache/superset/blob/master/pyproject.toml) for the versions thatdocs/docs/configuration/databases.mdx are compatible with Superset. |
Database
| PyPI package | Connection String | @@ -519,6 +519,63 @@ For a connection to a SQL endpoint you need to use the HTTP path from the endpoi {"connect_args": {"http_path": "/sql/1.0/endpoints/****", "driver_path": "/path/to/odbc/driver"}} ``` +##### OAuth2 Authentication + +Superset supports OAuth2 authentication for Databricks, allowing users to authenticate with their personal Databricks accounts instead of using shared access tokens. This provides better security and audit capabilities. + +###### Prerequisites + +1. Create an OAuth2 application in your Databricks account: + - Go to your Databricks account console + - Navigate to **Settings** → **Developer** → **OAuth apps** + - Create a new OAuth app with the redirect URI: `http://your-superset-host:port/api/v1/database/oauth2/` + +2. Configure OAuth2 in your `superset_config.py`: + +```python +from datetime import timedelta + +# OAuth2 configuration for Databricks +DATABASE_OAUTH2_CLIENTS = { + "Databricks (legacy)": { + "id": "your-databricks-client-id", + "secret": "your-databricks-client-secret", + "scope": "sql", + "authorization_request_uri": "https://accounts.cloud.databricks.com/oidc/accounts/{account_id}/v1/authorize", + "token_request_uri": "https://accounts.cloud.databricks.com/oidc/accounts/{account_id}/v1/token", + }, + "Databricks": { + "id": "your-databricks-client-id", + "secret": "your-databricks-client-secret", + "scope": "sql", + "authorization_request_uri": "https://accounts.cloud.databricks.com/oidc/accounts/{account_id}/v1/authorize", + "token_request_uri": "https://accounts.cloud.databricks.com/oidc/accounts/{account_id}/v1/token", + }, +} + +# OAuth2 redirect URI (adjust hostname/port for your setup) +DATABASE_OAUTH2_REDIRECT_URI = "http://your-superset-host:port/api/v1/database/oauth2/" + +# Optional: OAuth2 timeout +DATABASE_OAUTH2_TIMEOUT = timedelta(seconds=30) +``` + +Replace the following placeholders: +- `your-databricks-client-id`: Your Databricks OAuth2 application client ID +- `your-databricks-client-secret`: Your Databricks OAuth2 application client secret +- `{account_id}`: Your Databricks account ID (found in your workspace URL) +- `your-superset-host:port`: Your Superset instance hostname and port + +###### Usage + +Once configured, users can: + +1. Connect to Databricks databases normally using access tokens +2. When querying data, Superset will automatically redirect users to authenticate with Databricks if needed +3. User-specific OAuth2 tokens will be used for database connections, providing better security and audit trails + +This feature works with both "Databricks (legacy)" and "Databricks" engine types. + #### Denodo The recommended connector library for Denodo is diff --git a/pyproject.toml b/pyproject.toml index edfcd145aefd..6dcf34baec0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -119,8 +119,8 @@ cockroachdb = ["cockroachdb>=0.3.5, <0.4"] crate = ["sqlalchemy-cratedb>=0.40.1, <1"] databend = ["databend-sqlalchemy>=0.3.2, <1.0"] databricks = [ - "databricks-sql-connector>=2.0.2, <3", - "sqlalchemy-databricks>=0.2.0", + "databricks-sql-connector>=4.0.5, <5", + "databricks-sqlalchemy>=1.0.5, <2", ] db2 = ["ibm-db-sa>0.3.8, <=0.4.0"] denodo = ["denodo-sqlalchemy~=1.0.6"] diff --git a/superset/db_engine_specs/databricks.py b/superset/db_engine_specs/databricks.py index a885684408f8..4cd3c0b6345b 100644 --- a/superset/db_engine_specs/databricks.py +++ b/superset/db_engine_specs/databricks.py @@ -33,6 +33,7 @@ from superset.db_engine_specs.base import BaseEngineSpec, BasicParametersMixin from superset.db_engine_specs.hive import HiveEngineSpec from superset.errors import ErrorLevel, SupersetError, SupersetErrorType +from superset.exceptions import OAuth2RedirectError from superset.utils import json from superset.utils.core import get_user_agent, QuerySource from superset.utils.network import is_hostname_valid, is_port_open @@ -244,6 +245,29 @@ class DatabricksDynamicBaseEngineSpec(BasicParametersMixin, DatabricksBaseEngine "port": "port", } + @classmethod + def impersonate_user( + cls, + database: Database, + username: str | None, + user_token: str | None, + url: URL, + engine_kwargs: dict[str, Any], + ) -> tuple[URL, dict[str, Any]]: + """ + Update connection with OAuth2 access token for user impersonation. + """ + if user_token: + # Replace the access token in the URL with the user's OAuth2 token + url = url.set(password=user_token) + + # Also update connect_args if they contain access token + connect_args = engine_kwargs.setdefault("connect_args", {}) + if "access_token" in connect_args: + connect_args["access_token"] = user_token + + return url, engine_kwargs + @staticmethod def get_extra_params( database: Database, source: QuerySource | None = None @@ -424,6 +448,17 @@ class DatabricksNativeEngineSpec(DatabricksDynamicBaseEngineSpec): supports_dynamic_catalog = True supports_cross_catalog_queries = True + # OAuth 2.0 support + supports_oauth2 = True + oauth2_exception = OAuth2RedirectError + oauth2_scope = "sql" + oauth2_authorization_request_uri = ( + "https://accounts.cloud.databricks.com/oidc/accounts/{}/v1/authorize" + ) + oauth2_token_request_uri = ( + "https://accounts.cloud.databricks.com/oidc/accounts/{}/v1/token" # noqa: S105 + ) + @classmethod def build_sqlalchemy_uri( # type: ignore cls, parameters: DatabricksNativeParametersType, *_ @@ -563,6 +598,17 @@ class DatabricksPythonConnectorEngineSpec(DatabricksDynamicBaseEngineSpec): supports_dynamic_schema = supports_catalog = supports_dynamic_catalog = True + # OAuth 2.0 support + supports_oauth2 = True + oauth2_exception = OAuth2RedirectError + oauth2_scope = "sql" + oauth2_authorization_request_uri = ( + "https://accounts.cloud.databricks.com/oidc/accounts/{}/v1/authorize" + ) + oauth2_token_request_uri = ( + "https://accounts.cloud.databricks.com/oidc/accounts/{}/v1/token" # noqa: S105 + ) + @classmethod def build_sqlalchemy_uri( # type: ignore cls, parameters: DatabricksPythonConnectorParametersType, *_ diff --git a/tests/unit_tests/db_engine_specs/test_databricks.py b/tests/unit_tests/db_engine_specs/test_databricks.py index a58500200977..53d5d62b3696 100644 --- a/tests/unit_tests/db_engine_specs/test_databricks.py +++ b/tests/unit_tests/db_engine_specs/test_databricks.py @@ -18,13 +18,21 @@ from datetime import datetime from typing import Optional +from urllib.parse import parse_qs, urlparse import pytest from pytest_mock import MockerFixture +from sqlalchemy.engine.url import make_url -from superset.db_engine_specs.databricks import DatabricksNativeEngineSpec +from superset.db_engine_specs.databricks import ( + DatabricksNativeEngineSpec, + DatabricksPythonConnectorEngineSpec, +) from superset.errors import ErrorLevel, SupersetError, SupersetErrorType +from superset.exceptions import OAuth2RedirectError +from superset.superset_typing import OAuth2ClientConfig from superset.utils import json +from superset.utils.oauth2 import decode_oauth2_state from tests.unit_tests.db_engine_specs.utils import assert_convert_dttm from tests.unit_tests.fixtures.common import dttm # noqa: F401 @@ -284,3 +292,413 @@ def test_get_prequeries(mocker: MockerFixture) -> None: "USE CATALOG `escaped-hyphen`", "USE SCHEMA `hyphen-escaped`", ] + + +# OAuth2 Tests + + +def test_oauth2_attributes() -> None: + """ + Test that OAuth2 attributes are properly set for both engine specs. + """ + # Test DatabricksNativeEngineSpec + assert DatabricksNativeEngineSpec.supports_oauth2 is True + assert DatabricksNativeEngineSpec.oauth2_exception is OAuth2RedirectError + assert DatabricksNativeEngineSpec.oauth2_scope == "sql" + assert ( + DatabricksNativeEngineSpec.oauth2_authorization_request_uri + == "https://accounts.cloud.databricks.com/oidc/accounts/{}/v1/authorize" + ) + assert ( + DatabricksNativeEngineSpec.oauth2_token_request_uri + == "https://accounts.cloud.databricks.com/oidc/accounts/{}/v1/token" # noqa: S105 + ) + + # Test DatabricksPythonConnectorEngineSpec + assert DatabricksPythonConnectorEngineSpec.supports_oauth2 is True + assert DatabricksPythonConnectorEngineSpec.oauth2_exception is OAuth2RedirectError + assert DatabricksPythonConnectorEngineSpec.oauth2_scope == "sql" + assert ( + DatabricksPythonConnectorEngineSpec.oauth2_authorization_request_uri + == "https://accounts.cloud.databricks.com/oidc/accounts/{}/v1/authorize" + ) + assert ( + DatabricksPythonConnectorEngineSpec.oauth2_token_request_uri + == "https://accounts.cloud.databricks.com/oidc/accounts/{}/v1/token" # noqa: S105 + ) + + +def test_impersonate_user_with_token(mocker: MockerFixture) -> None: + """ + Test impersonate_user method with OAuth2 token for DatabricksNativeEngineSpec. + """ + database = mocker.MagicMock() + original_url = make_url( + "databricks+connector://token:original-token@host:443/database" + ) + engine_kwargs = {"connect_args": {"access_token": "original-token"}} + + # Test with user token + url, kwargs = DatabricksNativeEngineSpec.impersonate_user( + database=database, + username="user1", + user_token="user-oauth-token", # noqa: S106 + url=original_url, + engine_kwargs=engine_kwargs, + ) + + # Check that the password (token) was updated in the URL + assert url.password == "user-oauth-token" # noqa: S105 + # Check that access_token was updated in connect_args + assert kwargs["connect_args"]["access_token"] == "user-oauth-token" # noqa: S105 + + +def test_impersonate_user_without_token(mocker: MockerFixture) -> None: + """ + Test impersonate_user method without OAuth2 token. + """ + database = mocker.MagicMock() + original_url = make_url( + "databricks+connector://token:original-token@host:443/database" + ) + engine_kwargs = {"connect_args": {"access_token": "original-token"}} + + # Test without user token + url, kwargs = DatabricksNativeEngineSpec.impersonate_user( + database=database, + username="user1", + user_token=None, + url=original_url, + engine_kwargs=engine_kwargs, + ) + + # Check that nothing was changed + assert url.password == "original-token" # noqa: S105 + assert kwargs["connect_args"]["access_token"] == "original-token" # noqa: S105 + + +def test_impersonate_user_python_connector(mocker: MockerFixture) -> None: + """ + Test impersonate_user method for DatabricksPythonConnectorEngineSpec. + """ + database = mocker.MagicMock() + original_url = make_url( + "databricks://token:original-token@host:443?http_path=path&catalog=main&schema=default" + ) + engine_kwargs = {"connect_args": {"access_token": "original-token"}} + + # Test with user token + url, kwargs = DatabricksPythonConnectorEngineSpec.impersonate_user( + database=database, + username="user1", + user_token="user-oauth-token", # noqa: S106 + url=original_url, + engine_kwargs=engine_kwargs, + ) + + # Check that the password (token) was updated in the URL + assert url.password == "user-oauth-token" # noqa: S105 + # Check that access_token was updated in connect_args + assert kwargs["connect_args"]["access_token"] == "user-oauth-token" # noqa: S105 + + +@pytest.fixture +def oauth2_config_native() -> OAuth2ClientConfig: + """ + Config for Databricks Native OAuth2. + """ + return { + "id": "databricks-client-id", + "secret": "databricks-client-secret", + "scope": "sql", + "redirect_uri": "http://localhost:8088/api/v1/database/oauth2/", + "authorization_request_uri": "https://accounts.cloud.databricks.com/oidc/accounts/12345/v1/authorize", + "token_request_uri": "https://accounts.cloud.databricks.com/oidc/accounts/12345/v1/token", + "request_content_type": "json", + } + + +@pytest.fixture +def oauth2_config_python() -> OAuth2ClientConfig: + """ + Config for Databricks Python Connector OAuth2. + """ + return { + "id": "databricks-client-id", + "secret": "databricks-client-secret", + "scope": "sql", + "redirect_uri": "http://localhost:8088/api/v1/database/oauth2/", + "authorization_request_uri": "https://accounts.cloud.databricks.com/oidc/accounts/12345/v1/authorize", + "token_request_uri": "https://accounts.cloud.databricks.com/oidc/accounts/12345/v1/token", + "request_content_type": "json", + } + + +def test_is_oauth2_enabled_no_config_native(mocker: MockerFixture) -> None: + """ + Test `is_oauth2_enabled` when OAuth2 is not configured for Native engine. + """ + mocker.patch( + "flask.current_app.config", + new={"DATABASE_OAUTH2_CLIENTS": {}}, + ) + + assert DatabricksNativeEngineSpec.is_oauth2_enabled() is False + + +def test_is_oauth2_enabled_config_native(mocker: MockerFixture) -> None: + """ + Test `is_oauth2_enabled` when OAuth2 is configured for Native engine. + """ + mocker.patch( + "flask.current_app.config", + new={ + "DATABASE_OAUTH2_CLIENTS": { + "Databricks (legacy)": { + "id": "client-id", + "secret": "client-secret", + }, + } + }, + ) + + assert DatabricksNativeEngineSpec.is_oauth2_enabled() is True + + +def test_is_oauth2_enabled_no_config_python(mocker: MockerFixture) -> None: + """ + Test `is_oauth2_enabled` when OAuth2 is not configured for Python Connector engine. + """ + mocker.patch( + "flask.current_app.config", + new={"DATABASE_OAUTH2_CLIENTS": {}}, + ) + + assert DatabricksPythonConnectorEngineSpec.is_oauth2_enabled() is False + + +def test_is_oauth2_enabled_config_python(mocker: MockerFixture) -> None: + """ + Test `is_oauth2_enabled` when OAuth2 is configured for Python Connector engine. + """ + mocker.patch( + "flask.current_app.config", + new={ + "DATABASE_OAUTH2_CLIENTS": { + "Databricks": { + "id": "client-id", + "secret": "client-secret", + }, + } + }, + ) + + assert DatabricksPythonConnectorEngineSpec.is_oauth2_enabled() is True + + +def test_get_oauth2_authorization_uri_native( + mocker: MockerFixture, + oauth2_config_native: OAuth2ClientConfig, +) -> None: + """ + Test `get_oauth2_authorization_uri` for Native engine. + """ + from superset.db_engine_specs.base import OAuth2State + + state: OAuth2State = { + "database_id": 1, + "user_id": 1, + "default_redirect_uri": "http://localhost:8088/api/v1/database/oauth2/", + "tab_id": "1234", + } + + url = DatabricksNativeEngineSpec.get_oauth2_authorization_uri( + oauth2_config_native, state + ) + parsed = urlparse(url) + assert parsed.netloc == "accounts.cloud.databricks.com" + assert parsed.path == "/oidc/accounts/12345/v1/authorize" + + query = parse_qs(parsed.query) + assert query["scope"][0] == "sql" + encoded_state = query["state"][0].replace("%2E", ".") + assert decode_oauth2_state(encoded_state) == state + + +def test_get_oauth2_authorization_uri_python( + mocker: MockerFixture, + oauth2_config_python: OAuth2ClientConfig, +) -> None: + """ + Test `get_oauth2_authorization_uri` for Python Connector engine. + """ + from superset.db_engine_specs.base import OAuth2State + + state: OAuth2State = { + "database_id": 1, + "user_id": 1, + "default_redirect_uri": "http://localhost:8088/api/v1/database/oauth2/", + "tab_id": "1234", + } + + url = DatabricksPythonConnectorEngineSpec.get_oauth2_authorization_uri( + oauth2_config_python, state + ) + parsed = urlparse(url) + assert parsed.netloc == "accounts.cloud.databricks.com" + assert parsed.path == "/oidc/accounts/12345/v1/authorize" + + query = parse_qs(parsed.query) + assert query["scope"][0] == "sql" + encoded_state = query["state"][0].replace("%2E", ".") + assert decode_oauth2_state(encoded_state) == state + + +def test_get_oauth2_token_native( + mocker: MockerFixture, + oauth2_config_native: OAuth2ClientConfig, +) -> None: + """ + Test `get_oauth2_token` for Native engine. + """ + requests = mocker.patch("superset.db_engine_specs.base.requests") + requests.post().json.return_value = { + "access_token": "access-token", + "expires_in": 3600, + "scope": "sql", + "token_type": "Bearer", + "refresh_token": "refresh-token", + } + + assert DatabricksNativeEngineSpec.get_oauth2_token( + oauth2_config_native, "authorization-code" + ) == { + "access_token": "access-token", + "expires_in": 3600, + "scope": "sql", + "token_type": "Bearer", + "refresh_token": "refresh-token", + } + requests.post.assert_called_with( + "https://accounts.cloud.databricks.com/oidc/accounts/12345/v1/token", + json={ + "code": "authorization-code", + "client_id": "databricks-client-id", + "client_secret": "databricks-client-secret", + "redirect_uri": "http://localhost:8088/api/v1/database/oauth2/", + "grant_type": "authorization_code", + }, + timeout=30.0, + ) + + +def test_get_oauth2_token_python( + mocker: MockerFixture, + oauth2_config_python: OAuth2ClientConfig, +) -> None: + """ + Test `get_oauth2_token` for Python Connector engine. + """ + requests = mocker.patch("superset.db_engine_specs.base.requests") + requests.post().json.return_value = { + "access_token": "access-token", + "expires_in": 3600, + "scope": "sql", + "token_type": "Bearer", + "refresh_token": "refresh-token", + } + + assert DatabricksPythonConnectorEngineSpec.get_oauth2_token( + oauth2_config_python, "authorization-code" + ) == { + "access_token": "access-token", + "expires_in": 3600, + "scope": "sql", + "token_type": "Bearer", + "refresh_token": "refresh-token", + } + requests.post.assert_called_with( + "https://accounts.cloud.databricks.com/oidc/accounts/12345/v1/token", + json={ + "code": "authorization-code", + "client_id": "databricks-client-id", + "client_secret": "databricks-client-secret", + "redirect_uri": "http://localhost:8088/api/v1/database/oauth2/", + "grant_type": "authorization_code", + }, + timeout=30.0, + ) + + +def test_get_oauth2_fresh_token_native( + mocker: MockerFixture, + oauth2_config_native: OAuth2ClientConfig, +) -> None: + """ + Test `get_oauth2_fresh_token` for Native engine. + """ + requests = mocker.patch("superset.db_engine_specs.base.requests") + requests.post().json.return_value = { + "access_token": "new-access-token", + "expires_in": 3600, + "scope": "sql", + "token_type": "Bearer", + "refresh_token": "new-refresh-token", + } + + assert DatabricksNativeEngineSpec.get_oauth2_fresh_token( + oauth2_config_native, "old-refresh-token" + ) == { + "access_token": "new-access-token", + "expires_in": 3600, + "scope": "sql", + "token_type": "Bearer", + "refresh_token": "new-refresh-token", + } + requests.post.assert_called_with( + "https://accounts.cloud.databricks.com/oidc/accounts/12345/v1/token", + json={ + "client_id": "databricks-client-id", + "client_secret": "databricks-client-secret", + "refresh_token": "old-refresh-token", + "grant_type": "refresh_token", + }, + timeout=30.0, + ) + + +def test_get_oauth2_fresh_token_python( + mocker: MockerFixture, + oauth2_config_python: OAuth2ClientConfig, +) -> None: + """ + Test `get_oauth2_fresh_token` for Python Connector engine. + """ + requests = mocker.patch("superset.db_engine_specs.base.requests") + requests.post().json.return_value = { + "access_token": "new-access-token", + "expires_in": 3600, + "scope": "sql", + "token_type": "Bearer", + "refresh_token": "new-refresh-token", + } + + assert DatabricksPythonConnectorEngineSpec.get_oauth2_fresh_token( + oauth2_config_python, "old-refresh-token" + ) == { + "access_token": "new-access-token", + "expires_in": 3600, + "scope": "sql", + "token_type": "Bearer", + "refresh_token": "new-refresh-token", + } + requests.post.assert_called_with( + "https://accounts.cloud.databricks.com/oidc/accounts/12345/v1/token", + json={ + "client_id": "databricks-client-id", + "client_secret": "databricks-client-secret", + "refresh_token": "old-refresh-token", + "grant_type": "refresh_token", + }, + timeout=30.0, + ) From 7684aa792c40a3070c5d3f2ee467fdaac59e7e96 Mon Sep 17 00:00:00 2001 From: Fabian Halkivaha Date: Fri, 8 Aug 2025 15:46:49 +0200 Subject: [PATCH 2/5] fix docs slightly --- docs/docs/configuration/databases.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/configuration/databases.mdx b/docs/docs/configuration/databases.mdx index 267183b0909a..cc21ecebdc13 100644 --- a/docs/docs/configuration/databases.mdx +++ b/docs/docs/configuration/databases.mdx @@ -32,7 +32,7 @@ install new database drivers into your Superset configuration. ### Supported Databases and Dependencies Some of the recommended packages are shown below. Please refer to -[pyproject.toml](https://github.com/apache/superset/blob/master/pyproject.toml) for the versions thatdocs/docs/configuration/databases.mdx +[pyproject.toml](https://github.com/apache/superset/blob/master/pyproject.toml) for the versions that are compatible with Superset. |
Database
| PyPI package | Connection String | From aa3d69668a8b72aeb509bdb36781a4d2e5c687c7 Mon Sep 17 00:00:00 2001 From: fabian_zse Date: Fri, 8 Aug 2025 16:16:20 +0200 Subject: [PATCH 3/5] revert databricks deps update --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6dcf34baec0d..edfcd145aefd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -119,8 +119,8 @@ cockroachdb = ["cockroachdb>=0.3.5, <0.4"] crate = ["sqlalchemy-cratedb>=0.40.1, <1"] databend = ["databend-sqlalchemy>=0.3.2, <1.0"] databricks = [ - "databricks-sql-connector>=4.0.5, <5", - "databricks-sqlalchemy>=1.0.5, <2", + "databricks-sql-connector>=2.0.2, <3", + "sqlalchemy-databricks>=0.2.0", ] db2 = ["ibm-db-sa>0.3.8, <=0.4.0"] denodo = ["denodo-sqlalchemy~=1.0.6"] From fad98398472550dfb3eaea30ca65422e8dcfb5e5 Mon Sep 17 00:00:00 2001 From: fabian_zse Date: Fri, 8 Aug 2025 17:29:20 +0200 Subject: [PATCH 4/5] support all cloud providers --- docs/docs/configuration/databases.mdx | 31 +++- superset/db_engine_specs/databricks.py | 175 ++++++++++++++++-- .../db_engine_specs/test_databricks.py | 22 +-- 3 files changed, 194 insertions(+), 34 deletions(-) diff --git a/docs/docs/configuration/databases.mdx b/docs/docs/configuration/databases.mdx index cc21ecebdc13..247988f16448 100644 --- a/docs/docs/configuration/databases.mdx +++ b/docs/docs/configuration/databases.mdx @@ -536,20 +536,22 @@ Superset supports OAuth2 authentication for Databricks, allowing users to authen from datetime import timedelta # OAuth2 configuration for Databricks +# OAuth2 endpoints are automatically detected based on your Databricks cloud provider DATABASE_OAUTH2_CLIENTS = { "Databricks (legacy)": { "id": "your-databricks-client-id", "secret": "your-databricks-client-secret", "scope": "sql", - "authorization_request_uri": "https://accounts.cloud.databricks.com/oidc/accounts/{account_id}/v1/authorize", - "token_request_uri": "https://accounts.cloud.databricks.com/oidc/accounts/{account_id}/v1/token", + # OAuth2 endpoints are auto-detected based on hostname, but can be overridden: + # AWS: "authorization_request_uri": "https://accounts.cloud.databricks.com/oidc/accounts/{account_id}/v1/authorize", + # Azure: "authorization_request_uri": "https://login.microsoftonline.com/{tenant_id}/oauth2/v2.0/authorize", + # GCP: "authorization_request_uri": "https://accounts.gcp.databricks.com/oidc/accounts/{account_id}/v1/authorize", }, "Databricks": { "id": "your-databricks-client-id", "secret": "your-databricks-client-secret", "scope": "sql", - "authorization_request_uri": "https://accounts.cloud.databricks.com/oidc/accounts/{account_id}/v1/authorize", - "token_request_uri": "https://accounts.cloud.databricks.com/oidc/accounts/{account_id}/v1/token", + # OAuth2 endpoints are auto-detected based on hostname }, } @@ -563,9 +565,26 @@ DATABASE_OAUTH2_TIMEOUT = timedelta(seconds=30) Replace the following placeholders: - `your-databricks-client-id`: Your Databricks OAuth2 application client ID - `your-databricks-client-secret`: Your Databricks OAuth2 application client secret -- `{account_id}`: Your Databricks account ID (found in your workspace URL) - `your-superset-host:port`: Your Superset instance hostname and port +**Multi-Cloud Provider Support** + +Superset automatically detects your Databricks cloud provider and uses the appropriate OAuth2 endpoints: + +- **AWS**: Detected from hostnames containing `cloud.databricks.com` +- **Azure**: Detected from hostnames containing `azure` or `azuredatabricks` +- **GCP**: Detected from hostnames containing `gcp` or `googleusercontent` + +You can also explicitly specify the cloud provider in your database configuration under **Advanced** → **Other** → **ENGINE PARAMETERS**: + +```json +{ + "cloud_provider": "azure" +} +``` + +Valid cloud provider values are: `aws`, `azure`, `gcp`. + ###### Usage Once configured, users can: @@ -574,7 +593,7 @@ Once configured, users can: 2. When querying data, Superset will automatically redirect users to authenticate with Databricks if needed 3. User-specific OAuth2 tokens will be used for database connections, providing better security and audit trails -This feature works with both "Databricks (legacy)" and "Databricks" engine types. +This feature works with both "Databricks (legacy)" and "Databricks" engine types and automatically supports all major cloud providers (AWS, Azure, GCP). #### Denodo diff --git a/superset/db_engine_specs/databricks.py b/superset/db_engine_specs/databricks.py index 4cd3c0b6345b..76b5dfd49bf5 100644 --- a/superset/db_engine_specs/databricks.py +++ b/superset/db_engine_specs/databricks.py @@ -40,6 +40,11 @@ if TYPE_CHECKING: from superset.models.core import Database + from superset.superset_typing import ( + OAuth2ClientConfig, + OAuth2State, + OAuth2TokenResponse, + ) try: @@ -245,6 +250,48 @@ class DatabricksDynamicBaseEngineSpec(BasicParametersMixin, DatabricksBaseEngine "port": "port", } + # OAuth2 endpoints for different cloud providers + _oauth2_endpoints = { + "aws": { + "authorization_request_uri": "https://accounts.cloud.databricks.com/oidc/accounts/{}/v1/authorize", + "token_request_uri": "https://accounts.cloud.databricks.com/oidc/accounts/{}/v1/token", + }, + "azure": { + "authorization_request_uri": "https://login.microsoftonline.com/{}/oauth2/v2.0/authorize", + "token_request_uri": "https://login.microsoftonline.com/{}/oauth2/v2.0/token", + }, + "gcp": { + "authorization_request_uri": "https://accounts.gcp.databricks.com/oidc/accounts/{}/v1/authorize", + "token_request_uri": "https://accounts.gcp.databricks.com/oidc/accounts/{}/v1/token", + }, + } + + @classmethod + def _detect_cloud_provider(cls, database: Database) -> str: + """ + Detect the cloud provider based on the database configuration. + + Returns: + str: The cloud provider ('aws', 'azure', or 'gcp') + """ + # Check if cloud provider is explicitly configured in extra + if "cloud_provider" in (extra := cls.get_extra_params(database)): + provider = extra["cloud_provider"].lower() + if provider in cls._oauth2_endpoints: + return provider + + # Try to detect from hostname + hostname = database.url_object.host or "" + hostname = hostname.lower() + + if "azure" in hostname or "azuredatabricks" in hostname: + return "azure" + elif "gcp" in hostname or "googleusercontent" in hostname: + return "gcp" + else: + # Default to AWS for compatibility + return "aws" + @classmethod def impersonate_user( cls, @@ -452,12 +499,64 @@ class DatabricksNativeEngineSpec(DatabricksDynamicBaseEngineSpec): supports_oauth2 = True oauth2_exception = OAuth2RedirectError oauth2_scope = "sql" - oauth2_authorization_request_uri = ( - "https://accounts.cloud.databricks.com/oidc/accounts/{}/v1/authorize" - ) - oauth2_token_request_uri = ( - "https://accounts.cloud.databricks.com/oidc/accounts/{}/v1/token" # noqa: S105 - ) + + # OAuth2 endpoints are determined dynamically based on cloud provider + oauth2_authorization_request_uri = "" # Set dynamically + oauth2_token_request_uri = "" # Set dynamically + + @classmethod + def get_oauth2_authorization_uri( + cls, + config: "OAuth2ClientConfig", + state: "OAuth2State", + ) -> str: + """ + Return URI for initial OAuth2 request with dynamic endpoint detection. + """ + from superset.models.core import Database + + # Get the database to detect cloud provider + database_id = state["database_id"] + if database := Database.query.get(database_id): + provider = cls._detect_cloud_provider(database) + # Update config with the correct authorization URI for the cloud provider + from typing import cast + + config = cast( + "OAuth2ClientConfig", + dict(config) + | { + "authorization_request_uri": cls._oauth2_endpoints[provider][ + "authorization_request_uri" + ] + }, + ) + + return super().get_oauth2_authorization_uri(config, state) + + @classmethod + def get_oauth2_token( + cls, + config: "OAuth2ClientConfig", + code: str, + ) -> "OAuth2TokenResponse": + """ + Exchange authorization code for refresh/access tokens with dynamic endpoint. + + Note: For token exchange, we need the database context from the state. + This is a limitation of the current OAuth2 flow design. + """ + # For now, fall back to AWS endpoints for token exchange + # TODO: Improve OAuth2 flow to pass database context to token exchange + from typing import cast + + config = cast( + "OAuth2ClientConfig", + dict(config) + | {"token_request_uri": cls._oauth2_endpoints["aws"]["token_request_uri"]}, + ) + + return super().get_oauth2_token(config, code) @classmethod def build_sqlalchemy_uri( # type: ignore @@ -602,12 +701,64 @@ class DatabricksPythonConnectorEngineSpec(DatabricksDynamicBaseEngineSpec): supports_oauth2 = True oauth2_exception = OAuth2RedirectError oauth2_scope = "sql" - oauth2_authorization_request_uri = ( - "https://accounts.cloud.databricks.com/oidc/accounts/{}/v1/authorize" - ) - oauth2_token_request_uri = ( - "https://accounts.cloud.databricks.com/oidc/accounts/{}/v1/token" # noqa: S105 - ) + + # OAuth2 endpoints are determined dynamically based on cloud provider + oauth2_authorization_request_uri = "" # Set dynamically + oauth2_token_request_uri = "" # Set dynamically + + @classmethod + def get_oauth2_authorization_uri( + cls, + config: "OAuth2ClientConfig", + state: "OAuth2State", + ) -> str: + """ + Return URI for initial OAuth2 request with dynamic endpoint detection. + """ + from superset.models.core import Database + + # Get the database to detect cloud provider + database_id = state["database_id"] + if database := Database.query.get(database_id): + provider = cls._detect_cloud_provider(database) + # Update config with the correct authorization URI for the cloud provider + from typing import cast + + config = cast( + "OAuth2ClientConfig", + dict(config) + | { + "authorization_request_uri": cls._oauth2_endpoints[provider][ + "authorization_request_uri" + ] + }, + ) + + return super().get_oauth2_authorization_uri(config, state) + + @classmethod + def get_oauth2_token( + cls, + config: "OAuth2ClientConfig", + code: str, + ) -> "OAuth2TokenResponse": + """ + Exchange authorization code for refresh/access tokens with dynamic endpoint. + + Note: For token exchange, we need the database context from the state. + This is a limitation of the current OAuth2 flow design. + """ + # For now, fall back to AWS endpoints for token exchange + # TODO: Improve OAuth2 flow to pass database context to token exchange + from typing import cast + + config = cast( + "OAuth2ClientConfig", + dict(config) + | {"token_request_uri": cls._oauth2_endpoints["aws"]["token_request_uri"]}, + ) + + return super().get_oauth2_token(config, code) @classmethod def build_sqlalchemy_uri( # type: ignore diff --git a/tests/unit_tests/db_engine_specs/test_databricks.py b/tests/unit_tests/db_engine_specs/test_databricks.py index 53d5d62b3696..8b3baa5711b1 100644 --- a/tests/unit_tests/db_engine_specs/test_databricks.py +++ b/tests/unit_tests/db_engine_specs/test_databricks.py @@ -305,27 +305,17 @@ def test_oauth2_attributes() -> None: assert DatabricksNativeEngineSpec.supports_oauth2 is True assert DatabricksNativeEngineSpec.oauth2_exception is OAuth2RedirectError assert DatabricksNativeEngineSpec.oauth2_scope == "sql" - assert ( - DatabricksNativeEngineSpec.oauth2_authorization_request_uri - == "https://accounts.cloud.databricks.com/oidc/accounts/{}/v1/authorize" - ) - assert ( - DatabricksNativeEngineSpec.oauth2_token_request_uri - == "https://accounts.cloud.databricks.com/oidc/accounts/{}/v1/token" # noqa: S105 - ) + # OAuth2 endpoints are now dynamic and set at runtime + assert DatabricksNativeEngineSpec.oauth2_authorization_request_uri == "" + assert DatabricksNativeEngineSpec.oauth2_token_request_uri == "" # Test DatabricksPythonConnectorEngineSpec assert DatabricksPythonConnectorEngineSpec.supports_oauth2 is True assert DatabricksPythonConnectorEngineSpec.oauth2_exception is OAuth2RedirectError assert DatabricksPythonConnectorEngineSpec.oauth2_scope == "sql" - assert ( - DatabricksPythonConnectorEngineSpec.oauth2_authorization_request_uri - == "https://accounts.cloud.databricks.com/oidc/accounts/{}/v1/authorize" - ) - assert ( - DatabricksPythonConnectorEngineSpec.oauth2_token_request_uri - == "https://accounts.cloud.databricks.com/oidc/accounts/{}/v1/token" # noqa: S105 - ) + # OAuth2 endpoints are now dynamic and set at runtime + assert DatabricksPythonConnectorEngineSpec.oauth2_authorization_request_uri == "" + assert DatabricksPythonConnectorEngineSpec.oauth2_token_request_uri == "" def test_impersonate_user_with_token(mocker: MockerFixture) -> None: From af4dea928bac360265afd2a38d114169fcf95369 Mon Sep 17 00:00:00 2001 From: fabian_zse Date: Fri, 8 Aug 2025 17:29:49 +0200 Subject: [PATCH 5/5] cloud providers test --- .../test_databricks_multi_cloud.py | 268 ++++++++++++++++++ 1 file changed, 268 insertions(+) create mode 100644 tests/unit_tests/db_engine_specs/test_databricks_multi_cloud.py diff --git a/tests/unit_tests/db_engine_specs/test_databricks_multi_cloud.py b/tests/unit_tests/db_engine_specs/test_databricks_multi_cloud.py new file mode 100644 index 000000000000..f8afd60f65f8 --- /dev/null +++ b/tests/unit_tests/db_engine_specs/test_databricks_multi_cloud.py @@ -0,0 +1,268 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=unused-argument, import-outside-toplevel, protected-access + +from urllib.parse import parse_qs, urlparse + +import pytest +from pytest_mock import MockerFixture + +from superset.db_engine_specs.databricks import ( + DatabricksNativeEngineSpec, + DatabricksPythonConnectorEngineSpec, +) +from superset.superset_typing import OAuth2ClientConfig +from superset.utils.oauth2 import decode_oauth2_state + +# Multi-Cloud Provider Tests + + +@pytest.fixture +def mock_database_aws(mocker: MockerFixture): + """ + Mock database with AWS hostname. + """ + database = mocker.MagicMock() + database.url_object.host = "my-cluster.cloud.databricks.com" + database.id = 1 + return database + + +@pytest.fixture +def mock_database_azure(mocker: MockerFixture): + """ + Mock database with Azure hostname. + """ + database = mocker.MagicMock() + database.url_object.host = "adb-123456789.12.azuredatabricks.net" + database.id = 2 + return database + + +@pytest.fixture +def mock_database_gcp(mocker: MockerFixture): + """ + Mock database with GCP hostname. + """ + database = mocker.MagicMock() + database.url_object.host = "123456789.gcp.databricks.com" + database.id = 3 + return database + + +@pytest.fixture +def oauth2_config() -> OAuth2ClientConfig: + """ + Config for Databricks OAuth2. + """ + return { + "id": "databricks-client-id", + "secret": "databricks-client-secret", + "scope": "sql", + "redirect_uri": "http://localhost:8088/api/v1/database/oauth2/", + "authorization_request_uri": "https://accounts.cloud.databricks.com/oidc/accounts/12345/v1/authorize", + "token_request_uri": "https://accounts.cloud.databricks.com/oidc/accounts/12345/v1/token", + "request_content_type": "json", + } + + +def test_cloud_provider_detection_aws(mock_database_aws) -> None: + """ + Test cloud provider detection for AWS. + """ + provider = DatabricksNativeEngineSpec._detect_cloud_provider(mock_database_aws) + assert provider == "aws" + + +def test_cloud_provider_detection_azure(mock_database_azure) -> None: + """ + Test cloud provider detection for Azure. + """ + provider = DatabricksNativeEngineSpec._detect_cloud_provider(mock_database_azure) + assert provider == "azure" + + +def test_cloud_provider_detection_gcp(mock_database_gcp) -> None: + """ + Test cloud provider detection for GCP. + """ + provider = DatabricksNativeEngineSpec._detect_cloud_provider(mock_database_gcp) + assert provider == "gcp" + + +def test_cloud_provider_detection_explicit_config(mocker: MockerFixture) -> None: + """ + Test cloud provider detection with explicit configuration. + """ + database = mocker.MagicMock() + database.url_object.host = "generic-host.com" + + # Mock get_extra_params to return explicit cloud provider + mocker.patch.object( + DatabricksNativeEngineSpec, + "get_extra_params", + return_value={"cloud_provider": "azure"}, + ) + + provider = DatabricksNativeEngineSpec._detect_cloud_provider(database) + assert provider == "azure" + + +def test_get_oauth2_authorization_uri_aws( + mocker: MockerFixture, + oauth2_config: OAuth2ClientConfig, + mock_database_aws, +) -> None: + """ + Test OAuth2 authorization URI generation for AWS provider. + """ + from superset.db_engine_specs.base import OAuth2State + + # Mock the database query + mocker.patch( + "superset.models.core.Database.query.get", return_value=mock_database_aws + ) + + state: OAuth2State = { + "database_id": 1, + "user_id": 1, + "default_redirect_uri": "http://localhost:8088/api/v1/database/oauth2/", + "tab_id": "1234", + } + + url = DatabricksNativeEngineSpec.get_oauth2_authorization_uri(oauth2_config, state) + parsed = urlparse(url) + assert parsed.netloc == "accounts.cloud.databricks.com" + assert "/oidc/accounts/" in parsed.path + assert "/v1/authorize" in parsed.path + + query = parse_qs(parsed.query) + assert query["scope"][0] == "sql" + encoded_state = query["state"][0].replace("%2E", ".") + assert decode_oauth2_state(encoded_state) == state + + +def test_get_oauth2_authorization_uri_azure( + mocker: MockerFixture, + oauth2_config: OAuth2ClientConfig, + mock_database_azure, +) -> None: + """ + Test OAuth2 authorization URI generation for Azure provider. + """ + from superset.db_engine_specs.base import OAuth2State + + # Mock the database query + mocker.patch( + "superset.models.core.Database.query.get", return_value=mock_database_azure + ) + + state: OAuth2State = { + "database_id": 2, + "user_id": 1, + "default_redirect_uri": "http://localhost:8088/api/v1/database/oauth2/", + "tab_id": "1234", + } + + url = DatabricksNativeEngineSpec.get_oauth2_authorization_uri(oauth2_config, state) + parsed = urlparse(url) + assert parsed.netloc == "login.microsoftonline.com" + assert "/oauth2/v2.0/authorize" in parsed.path + + query = parse_qs(parsed.query) + assert query["scope"][0] == "sql" + encoded_state = query["state"][0].replace("%2E", ".") + assert decode_oauth2_state(encoded_state) == state + + +def test_get_oauth2_authorization_uri_gcp( + mocker: MockerFixture, + oauth2_config: OAuth2ClientConfig, + mock_database_gcp, +) -> None: + """ + Test OAuth2 authorization URI generation for GCP provider. + """ + from superset.db_engine_specs.base import OAuth2State + + # Mock the database query + mocker.patch( + "superset.models.core.Database.query.get", return_value=mock_database_gcp + ) + + state: OAuth2State = { + "database_id": 3, + "user_id": 1, + "default_redirect_uri": "http://localhost:8088/api/v1/database/oauth2/", + "tab_id": "1234", + } + + url = DatabricksNativeEngineSpec.get_oauth2_authorization_uri(oauth2_config, state) + parsed = urlparse(url) + assert parsed.netloc == "accounts.gcp.databricks.com" + assert "/oidc/accounts/" in parsed.path + assert "/v1/authorize" in parsed.path + + query = parse_qs(parsed.query) + assert query["scope"][0] == "sql" + encoded_state = query["state"][0].replace("%2E", ".") + assert decode_oauth2_state(encoded_state) == state + + +def test_python_connector_cloud_provider_detection_azure(mock_database_azure) -> None: + """ + Test cloud provider detection for Python Connector with Azure. + """ + provider = DatabricksPythonConnectorEngineSpec._detect_cloud_provider( + mock_database_azure + ) + assert provider == "azure" + + +def test_python_connector_oauth2_authorization_uri_azure( + mocker: MockerFixture, + oauth2_config: OAuth2ClientConfig, + mock_database_azure, +) -> None: + """ + Test OAuth2 authorization URI generation for Python Connector with Azure provider. + """ + from superset.db_engine_specs.base import OAuth2State + + # Mock the database query + mocker.patch( + "superset.models.core.Database.query.get", return_value=mock_database_azure + ) + + state: OAuth2State = { + "database_id": 2, + "user_id": 1, + "default_redirect_uri": "http://localhost:8088/api/v1/database/oauth2/", + "tab_id": "1234", + } + + url = DatabricksPythonConnectorEngineSpec.get_oauth2_authorization_uri( + oauth2_config, state + ) + parsed = urlparse(url) + assert parsed.netloc == "login.microsoftonline.com" + assert "/oauth2/v2.0/authorize" in parsed.path + + query = parse_qs(parsed.query) + assert query["scope"][0] == "sql" + encoded_state = query["state"][0].replace("%2E", ".") + assert decode_oauth2_state(encoded_state) == state