From 5082d968ef7a31c021cc8a5008def6b1470f20ec Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Thu, 30 Jan 2025 14:13:44 -0500 Subject: [PATCH] Combine `config` and `kwargs` (#180) * Combine config and kwargs * Use `ObstoreError` --- README.md | 2 +- pyo3-object_store/src/aws.rs | 60 ++++++++++++++++++++------------- pyo3-object_store/src/azure.rs | 53 +++++++++++++++++++---------- pyo3-object_store/src/config.rs | 2 +- pyo3-object_store/src/gcp.rs | 53 +++++++++++++++++++---------- 5 files changed, 109 insertions(+), 61 deletions(-) diff --git a/README.md b/README.md index 8400e8d..6258df2 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ Simple, fast integration with object storage services like Amazon S3, Google Clo - **Streaming downloads** with configurable chunking. - **Streaming uploads** from async or sync iterators. - **Streaming list**, with no need to paginate. -- Automatically uses [**multipart uploads**](https://docs.aws.amazon.com/AmazonS3/latest/userguide/mpuoverview.html) for large file objects. +- Automatic [**multipart uploads**](https://docs.aws.amazon.com/AmazonS3/latest/userguide/mpuoverview.html) for large file objects. - Support for **conditional put** ("put if not exists"), as well as custom tags and attributes. - Optionally return list results in [Apache Arrow](https://arrow.apache.org/) format, which is faster and more memory-efficient than materializing Python `dict`s. - File-like object API and [fsspec](https://github.com/fsspec/filesystem_spec) integration. diff --git a/pyo3-object_store/src/aws.rs b/pyo3-object_store/src/aws.rs index 6f30f28..bc786c0 100644 --- a/pyo3-object_store/src/aws.rs +++ b/pyo3-object_store/src/aws.rs @@ -10,7 +10,7 @@ use pyo3::types::PyType; use crate::client::PyClientOptions; use crate::config::PyConfigValue; -use crate::error::{PyObjectStoreError, PyObjectStoreResult}; +use crate::error::{ObstoreError, PyObjectStoreError, PyObjectStoreResult}; use crate::retry::PyRetryConfig; /// A Python-facing wrapper around an [`AmazonS3`]. @@ -43,11 +43,8 @@ impl PyS3Store { kwargs: Option, ) -> PyObjectStoreResult { let mut builder = AmazonS3Builder::new().with_bucket_name(bucket); - if let Some(config) = config { - builder = config.apply_config(builder); - } - if let Some(kwargs) = kwargs { - builder = kwargs.apply_config(builder); + if let Some(config_kwargs) = combine_config_kwargs(config, kwargs)? { + builder = config_kwargs.apply_config(builder); } if let Some(client_options) = client_options { builder = builder.with_client_options(client_options.into()) @@ -73,11 +70,8 @@ impl PyS3Store { if let Some(bucket) = bucket { builder = builder.with_bucket_name(bucket); } - if let Some(config) = config { - builder = config.apply_config(builder); - } - if let Some(kwargs) = kwargs { - builder = kwargs.apply_config(builder); + if let Some(config_kwargs) = combine_config_kwargs(config, kwargs)? { + builder = config_kwargs.apply_config(builder); } if let Some(client_options) = client_options { builder = builder.with_client_options(client_options.into()) @@ -135,11 +129,8 @@ impl PyS3Store { if let Some(token) = token { builder = builder.with_token(token); } - if let Some(config) = config { - builder = config.apply_config(builder); - } - if let Some(kwargs) = kwargs { - builder = kwargs.apply_config(builder); + if let Some(config_kwargs) = combine_config_kwargs(config, kwargs)? { + builder = config_kwargs.apply_config(builder); } if let Some(client_options) = client_options { builder = builder.with_client_options(client_options.into()) @@ -162,11 +153,8 @@ impl PyS3Store { kwargs: Option, ) -> PyObjectStoreResult { let mut builder = AmazonS3Builder::from_env().with_url(url); - if let Some(config) = config { - builder = config.apply_config(builder); - } - if let Some(kwargs) = kwargs { - builder = kwargs.apply_config(builder); + if let Some(config_kwargs) = combine_config_kwargs(config, kwargs)? { + builder = config_kwargs.apply_config(builder); } if let Some(client_options) = client_options { builder = builder.with_client_options(client_options.into()) @@ -183,7 +171,7 @@ impl PyS3Store { } } -#[derive(Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct PyAmazonS3ConfigKey(AmazonS3ConfigKey); impl<'py> FromPyObject<'py> for PyAmazonS3ConfigKey { @@ -194,7 +182,7 @@ impl<'py> FromPyObject<'py> for PyAmazonS3ConfigKey { } } -#[derive(Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq, Eq)] pub struct PyAmazonS3Config(HashMap); impl<'py> FromPyObject<'py> for PyAmazonS3Config { @@ -210,4 +198,30 @@ impl PyAmazonS3Config { } builder } + + fn merge(mut self, other: PyAmazonS3Config) -> PyObjectStoreResult { + for (k, v) in other.0.into_iter() { + let old_value = self.0.insert(k.clone(), v); + if old_value.is_some() { + return Err(ObstoreError::new_err(format!( + "Duplicate key {} between config and kwargs", + k.0.as_ref() + )) + .into()); + } + } + + Ok(self) + } +} + +fn combine_config_kwargs( + config: Option, + kwargs: Option, +) -> PyObjectStoreResult> { + match (config, kwargs) { + (None, None) => Ok(None), + (Some(x), None) | (None, Some(x)) => Ok(Some(x)), + (Some(config), Some(kwargs)) => Ok(Some(config.merge(kwargs)?)), + } } diff --git a/pyo3-object_store/src/azure.rs b/pyo3-object_store/src/azure.rs index a3b2fc1..4c83f58 100644 --- a/pyo3-object_store/src/azure.rs +++ b/pyo3-object_store/src/azure.rs @@ -9,7 +9,7 @@ use pyo3::types::PyType; use crate::client::PyClientOptions; use crate::config::PyConfigValue; -use crate::error::{PyObjectStoreError, PyObjectStoreResult}; +use crate::error::{ObstoreError, PyObjectStoreError, PyObjectStoreResult}; use crate::retry::PyRetryConfig; /// A Python-facing wrapper around a [`MicrosoftAzure`]. @@ -42,11 +42,8 @@ impl PyAzureStore { kwargs: Option, ) -> PyObjectStoreResult { let mut builder = MicrosoftAzureBuilder::new().with_container_name(container); - if let Some(config) = config { - builder = config.apply_config(builder); - } - if let Some(kwargs) = kwargs { - builder = kwargs.apply_config(builder); + if let Some(config_kwargs) = combine_config_kwargs(config, kwargs)? { + builder = config_kwargs.apply_config(builder); } if let Some(client_options) = client_options { builder = builder.with_client_options(client_options.into()) @@ -69,11 +66,8 @@ impl PyAzureStore { kwargs: Option, ) -> PyObjectStoreResult { let mut builder = MicrosoftAzureBuilder::from_env().with_container_name(container); - if let Some(config) = config { - builder = config.apply_config(builder); - } - if let Some(kwargs) = kwargs { - builder = kwargs.apply_config(builder); + if let Some(config_kwargs) = combine_config_kwargs(config, kwargs)? { + builder = config_kwargs.apply_config(builder); } if let Some(client_options) = client_options { builder = builder.with_client_options(client_options.into()) @@ -95,11 +89,8 @@ impl PyAzureStore { kwargs: Option, ) -> PyObjectStoreResult { let mut builder = MicrosoftAzureBuilder::from_env().with_url(url); - if let Some(config) = config { - builder = config.apply_config(builder); - } - if let Some(kwargs) = kwargs { - builder = kwargs.apply_config(builder); + if let Some(config_kwargs) = combine_config_kwargs(config, kwargs)? { + builder = config_kwargs.apply_config(builder); } if let Some(client_options) = client_options { builder = builder.with_client_options(client_options.into()) @@ -116,7 +107,7 @@ impl PyAzureStore { } } -#[derive(Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct PyAzureConfigKey(AzureConfigKey); impl<'py> FromPyObject<'py> for PyAzureConfigKey { @@ -127,7 +118,7 @@ impl<'py> FromPyObject<'py> for PyAzureConfigKey { } } -#[derive(Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq, Eq)] pub struct PyAzureConfig(HashMap); impl<'py> FromPyObject<'py> for PyAzureConfig { @@ -143,4 +134,30 @@ impl PyAzureConfig { } builder } + + fn merge(mut self, other: PyAzureConfig) -> PyObjectStoreResult { + for (k, v) in other.0.into_iter() { + let old_value = self.0.insert(k.clone(), v); + if old_value.is_some() { + return Err(ObstoreError::new_err(format!( + "Duplicate key {} between config and kwargs", + k.0.as_ref() + )) + .into()); + } + } + + Ok(self) + } +} + +fn combine_config_kwargs( + config: Option, + kwargs: Option, +) -> PyObjectStoreResult> { + match (config, kwargs) { + (None, None) => Ok(None), + (Some(x), None) | (None, Some(x)) => Ok(Some(x)), + (Some(config), Some(kwargs)) => Ok(Some(config.merge(kwargs)?)), + } } diff --git a/pyo3-object_store/src/config.rs b/pyo3-object_store/src/config.rs index 6e129a5..3bcc286 100644 --- a/pyo3-object_store/src/config.rs +++ b/pyo3-object_store/src/config.rs @@ -10,7 +10,7 @@ use pyo3::prelude::*; /// - `True` and `False` (becomes `"true"` and `"false"`) /// - `timedelta` /// - `str` -#[derive(Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct PyConfigValue(pub String); impl<'py> FromPyObject<'py> for PyConfigValue { diff --git a/pyo3-object_store/src/gcp.rs b/pyo3-object_store/src/gcp.rs index 910e629..c19adbd 100644 --- a/pyo3-object_store/src/gcp.rs +++ b/pyo3-object_store/src/gcp.rs @@ -9,7 +9,7 @@ use pyo3::types::PyType; use crate::client::PyClientOptions; use crate::config::PyConfigValue; -use crate::error::{PyObjectStoreError, PyObjectStoreResult}; +use crate::error::{ObstoreError, PyObjectStoreError, PyObjectStoreResult}; use crate::retry::PyRetryConfig; /// A Python-facing wrapper around a [`GoogleCloudStorage`]. @@ -42,11 +42,8 @@ impl PyGCSStore { kwargs: Option, ) -> PyObjectStoreResult { let mut builder = GoogleCloudStorageBuilder::new().with_bucket_name(bucket); - if let Some(config) = config { - builder = config.apply_config(builder); - } - if let Some(kwargs) = kwargs { - builder = kwargs.apply_config(builder); + if let Some(config_kwargs) = combine_config_kwargs(config, kwargs)? { + builder = config_kwargs.apply_config(builder); } if let Some(client_options) = client_options { builder = builder.with_client_options(client_options.into()) @@ -69,11 +66,8 @@ impl PyGCSStore { kwargs: Option, ) -> PyObjectStoreResult { let mut builder = GoogleCloudStorageBuilder::from_env().with_bucket_name(bucket); - if let Some(config) = config { - builder = config.apply_config(builder); - } - if let Some(kwargs) = kwargs { - builder = kwargs.apply_config(builder); + if let Some(config_kwargs) = combine_config_kwargs(config, kwargs)? { + builder = config_kwargs.apply_config(builder); } if let Some(client_options) = client_options { builder = builder.with_client_options(client_options.into()) @@ -95,11 +89,8 @@ impl PyGCSStore { kwargs: Option, ) -> PyObjectStoreResult { let mut builder = GoogleCloudStorageBuilder::from_env().with_url(url); - if let Some(config) = config { - builder = config.apply_config(builder); - } - if let Some(kwargs) = kwargs { - builder = kwargs.apply_config(builder); + if let Some(config_kwargs) = combine_config_kwargs(config, kwargs)? { + builder = config_kwargs.apply_config(builder); } if let Some(client_options) = client_options { builder = builder.with_client_options(client_options.into()) @@ -116,7 +107,7 @@ impl PyGCSStore { } } -#[derive(Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct PyGoogleConfigKey(GoogleConfigKey); impl<'py> FromPyObject<'py> for PyGoogleConfigKey { @@ -127,7 +118,7 @@ impl<'py> FromPyObject<'py> for PyGoogleConfigKey { } } -#[derive(Debug, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq, Eq)] pub struct PyGoogleConfig(HashMap); impl<'py> FromPyObject<'py> for PyGoogleConfig { @@ -143,4 +134,30 @@ impl PyGoogleConfig { } builder } + + fn merge(mut self, other: PyGoogleConfig) -> PyObjectStoreResult { + for (k, v) in other.0.into_iter() { + let old_value = self.0.insert(k.clone(), v); + if old_value.is_some() { + return Err(ObstoreError::new_err(format!( + "Duplicate key {} between config and kwargs", + k.0.as_ref() + )) + .into()); + } + } + + Ok(self) + } +} + +fn combine_config_kwargs( + config: Option, + kwargs: Option, +) -> PyObjectStoreResult> { + match (config, kwargs) { + (None, None) => Ok(None), + (Some(x), None) | (None, Some(x)) => Ok(Some(x)), + (Some(config), Some(kwargs)) => Ok(Some(config.merge(kwargs)?)), + } }