Skip to content

Commit

Permalink
[FEAT] Add additional Azure authentication methods (#2333)
Browse files Browse the repository at this point in the history
Adds:
- login via shared access signatures (SAS)
- login via Entra ID (active directory) using tenant id + client id +
client secret
- fallback to environment variables using azure-sdk-for-rust
  • Loading branch information
kevinzwang authored Jun 3, 2024
1 parent 540e65c commit 55b0bc4
Show file tree
Hide file tree
Showing 9 changed files with 278 additions and 16 deletions.
113 changes: 112 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions daft/daft.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,10 @@ class AzureConfig:

storage_account: str | None
access_key: str | None
sas_token: str | None
tenant_id: str | None
client_id: str | None
client_secret: str | None
anonymous: str | None
endpoint_url: str | None = None
use_ssl: bool | None = None
Expand All @@ -511,6 +515,10 @@ class AzureConfig:
self,
storage_account: str | None = None,
access_key: str | None = None,
sas_token: str | None = None,
tenant_id: str | None = None,
client_id: str | None = None,
client_secret: str | None = None,
anonymous: str | None = None,
endpoint_url: str | None = None,
use_ssl: bool | None = None,
Expand All @@ -519,6 +527,10 @@ class AzureConfig:
self,
storage_account: str | None = None,
access_key: str | None = None,
sas_token: str | None = None,
tenant_id: str | None = None,
client_id: str | None = None,
client_secret: str | None = None,
anonymous: str | None = None,
endpoint_url: str | None = None,
use_ssl: bool | None = None,
Expand Down
25 changes: 22 additions & 3 deletions daft/io/_iceberg.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def _convert_iceberg_file_io_properties_to_io_config(props: Dict[str, Any]) -> O
S3_SESSION_TOKEN,
)

from daft.io import GCSConfig, IOConfig, S3Config
from daft.io import AzureConfig, GCSConfig, IOConfig, S3Config

s3_mapping = {
S3_REGION: "region_name",
Expand All @@ -44,6 +44,7 @@ def _convert_iceberg_file_io_properties_to_io_config(props: Dict[str, Any]) -> O
s3_config = None

gcs_config = None
azure_config = None
if parse(pyiceberg.__version__) >= parse("0.5.0"):
from pyiceberg.io import GCS_PROJECT_ID

Expand All @@ -59,8 +60,26 @@ def _convert_iceberg_file_io_properties_to_io_config(props: Dict[str, Any]) -> O
if len(gcs_args) > 0:
gcs_config = GCSConfig(**gcs_args)

if s3_config is not None or gcs_config is not None:
return IOConfig(s3=s3_config, gcs=gcs_config)
azure_mapping = {
"adlfs.account-name": "storage_account",
"adlfs.account-key": "access_key",
"adlfs.sas-token": "sas_token",
"adlfs.tenant-id": "tenant_id",
"adlfs.client-id": "client_id",
"adlfs.client-secret": "client_secret",
}

azure_args = dict() # type: ignore
for pyiceberg_key, daft_key in azure_mapping.items():
value = props.get(pyiceberg_key, None)
if value is not None:
azure_args[daft_key] = value

if len(azure_args) > 0:
azure_config = AzureConfig(**azure_args)

if any([s3_config, gcs_config, azure_config]):
return IOConfig(s3=s3_config, gcs=gcs_config, azure=azure_config)
else:
return None

Expand Down
8 changes: 8 additions & 0 deletions daft/io/object_store_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,14 @@ def _azure_config_to_storage_options(azure_config: AzureConfig) -> dict[str, str
storage_options["account_name"] = azure_config.storage_account
if azure_config.access_key is not None:
storage_options["access_key"] = azure_config.access_key
if azure_config.sas_token is not None:
storage_options["sas_token"] = azure_config.sas_token
if azure_config.tenant_id is not None:
storage_options["tenant_id"] = azure_config.tenant_id
if azure_config.client_id is not None:
storage_options["client_id"] = azure_config.client_id
if azure_config.client_secret is not None:
storage_options["client_secret"] = azure_config.client_secret
if azure_config.endpoint_url is not None:
storage_options["endpoint"] = azure_config.endpoint_url
if azure_config.use_ssl is not None:
Expand Down
36 changes: 34 additions & 2 deletions src/common/io-config/src/azure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ use serde::Serialize;
pub struct AzureConfig {
pub storage_account: Option<String>,
pub access_key: Option<String>,
pub sas_token: Option<String>,
pub tenant_id: Option<String>,
pub client_id: Option<String>,
pub client_secret: Option<String>,
pub anonymous: bool,
pub endpoint_url: Option<String>,
pub use_ssl: bool,
Expand All @@ -18,6 +22,10 @@ impl Default for AzureConfig {
Self {
storage_account: None,
access_key: None,
sas_token: None,
tenant_id: None,
client_id: None,
client_secret: None,
anonymous: false,
endpoint_url: None,
use_ssl: true,
Expand All @@ -34,7 +42,19 @@ impl AzureConfig {
if let Some(access_key) = &self.access_key {
res.push(format!("Access key = {}", access_key));
}
res.push(format!("Anoynmous = {}", self.anonymous));
if let Some(sas_token) = &self.sas_token {
res.push(format!("Shared Access Signature = {}", sas_token));
}
if let Some(tenant_id) = &self.tenant_id {
res.push(format!("Tenant ID = {}", tenant_id));
}
if let Some(client_id) = &self.client_id {
res.push(format!("Client ID = {}", client_id));
}
if let Some(client_secret) = &self.client_secret {
res.push(format!("Client Secret = {}", client_secret));
}
res.push(format!("Anonymous = {}", self.anonymous));
if let Some(endpoint_url) = &self.endpoint_url {
res.push(format!("Endpoint URL = {}", endpoint_url));
}
Expand All @@ -50,10 +70,22 @@ impl Display for AzureConfig {
"AzureConfig
storage_account: {:?}
access_key: {:?}
sas_token: {:?}
tenant_id: {:?}
client_id: {:?}
client_secret: {:?}
anonymous: {:?}
endpoint_url: {:?}
use_ssl: {:?}",
self.storage_account, self.access_key, self.anonymous, self.endpoint_url, self.use_ssl
self.storage_account,
self.access_key,
self.sas_token,
self.tenant_id,
self.client_id,
self.client_secret,
self.anonymous,
self.endpoint_url,
self.use_ssl
)
}
}
2 changes: 1 addition & 1 deletion src/common/io-config/src/gcs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ impl GCSConfig {
if let Some(project_id) = &self.project_id {
res.push(format!("Project ID = {}", project_id));
}
res.push(format!("Anoynmous = {}", self.anonymous));
res.push(format!("Anonymous = {}", self.anonymous));
res
}
}
Expand Down
Loading

0 comments on commit 55b0bc4

Please sign in to comment.