Skip to content
2,763 changes: 1,547 additions & 1,216 deletions poetry.lock

Large diffs are not rendered by default.

31 changes: 21 additions & 10 deletions pyiceberg/catalog/dynamodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
)

import boto3
from mypy_boto3_dynamodb.client import DynamoDBClient

from pyiceberg.catalog import (
BOTOCORE_SESSION,
Expand Down Expand Up @@ -94,18 +95,28 @@


class DynamoDbCatalog(MetastoreCatalog):
def __init__(self, name: str, **properties: str):
def __init__(self, name: str, client: Optional[DynamoDBClient] = None, **properties: str):
"""Dynamodb catalog.
Args:
name: Name to identify the catalog.
client: An optional boto3 dynamodb client.
properties: Properties for dynamodb client construction and configuration.
"""
super().__init__(name, **properties)
if client is not None:
self.dynamodb = client
else:
session = boto3.Session(
profile_name=properties.get(DYNAMODB_PROFILE_NAME),
region_name=get_first_property_value(properties, DYNAMODB_REGION, AWS_REGION),
botocore_session=properties.get(BOTOCORE_SESSION),
aws_access_key_id=get_first_property_value(properties, DYNAMODB_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID),
aws_secret_access_key=get_first_property_value(properties, DYNAMODB_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY),
aws_session_token=get_first_property_value(properties, DYNAMODB_SESSION_TOKEN, AWS_SESSION_TOKEN),
)
self.dynamodb = session.client(DYNAMODB_CLIENT)

session = boto3.Session(
profile_name=properties.get(DYNAMODB_PROFILE_NAME),
region_name=get_first_property_value(properties, DYNAMODB_REGION, AWS_REGION),
botocore_session=properties.get(BOTOCORE_SESSION),
aws_access_key_id=get_first_property_value(properties, DYNAMODB_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID),
aws_secret_access_key=get_first_property_value(properties, DYNAMODB_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY),
aws_session_token=get_first_property_value(properties, DYNAMODB_SESSION_TOKEN, AWS_SESSION_TOKEN),
)
self.dynamodb = session.client(DYNAMODB_CLIENT)
self.dynamodb_table_name = self.properties.get(DYNAMODB_TABLE_NAME, DYNAMODB_TABLE_NAME_DEFAULT)
self._ensure_catalog_table_exists_or_create()

Expand Down
2 changes: 1 addition & 1 deletion pyiceberg/catalog/glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ def __init__(self, name: str, client: Optional[GlueClient] = None, **properties:
"""
super().__init__(name, **properties)

if client:
if client is not None:
self.glue = client
else:
retry_mode_prop_value = get_first_property_value(properties, GLUE_RETRY_MODE)
Expand Down
16 changes: 12 additions & 4 deletions pyiceberg/catalog/rest/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

from pyiceberg import __version__
from pyiceberg.catalog import (
BOTOCORE_SESSION,
TOKEN,
URI,
WAREHOUSE_LOCATION,
Expand All @@ -53,6 +54,7 @@
TableAlreadyExistsError,
UnauthorizedError,
)
from pyiceberg.io import AWS_ACCESS_KEY_ID, AWS_REGION, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN
from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec, assign_fresh_partition_spec_ids
from pyiceberg.schema import Schema, assign_fresh_schema_ids
from pyiceberg.table import (
Expand All @@ -72,7 +74,7 @@
from pyiceberg.typedef import EMPTY_DICT, UTF8, IcebergBaseModel, Identifier, Properties
from pyiceberg.types import transform_dict_value_to_str
from pyiceberg.utils.deprecated import deprecation_message
from pyiceberg.utils.properties import get_header_properties, property_as_bool
from pyiceberg.utils.properties import get_first_property_value, get_header_properties, property_as_bool

if TYPE_CHECKING:
import pyarrow as pa
Expand Down Expand Up @@ -390,11 +392,17 @@ class SigV4Adapter(HTTPAdapter):
def __init__(self, **properties: str):
super().__init__()
self._properties = properties
self._boto_session = boto3.Session(
region_name=get_first_property_value(self._properties, AWS_REGION),
botocore_session=self._properties.get(BOTOCORE_SESSION),
aws_access_key_id=get_first_property_value(self._properties, AWS_ACCESS_KEY_ID),
aws_secret_access_key=get_first_property_value(self._properties, AWS_SECRET_ACCESS_KEY),
aws_session_token=get_first_property_value(self._properties, AWS_SESSION_TOKEN),
)

def add_headers(self, request: PreparedRequest, **kwargs: Any) -> None: # pylint: disable=W0613
boto_session = boto3.Session()
credentials = boto_session.get_credentials().get_frozen_credentials()
region = self._properties.get(SIGV4_REGION, boto_session.region_name)
credentials = self._boto_session.get_credentials().get_frozen_credentials()
region = self._properties.get(SIGV4_REGION, self._boto_session.region_name)
service = self._properties.get(SIGV4_SERVICE, "execute-api")

url = str(request.url).split("?")[0]
Expand Down
9 changes: 7 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ ray = [
]
python-snappy = { version = ">=0.6.0,<1.0.0", optional = true }
thrift = { version = ">=0.13.0,<1.0.0", optional = true }
mypy-boto3-glue = { version = ">=1.28.18", optional = true }
boto3 = { version = ">=1.24.59", optional = true }
s3fs = { version = ">=2023.1.0", optional = true }
adlfs = { version = ">=2023.1.0", optional = true }
Expand Down Expand Up @@ -102,6 +101,8 @@ cython = "3.1.1"
deptry = ">=0.14,<0.24"
datafusion = ">=44,<48"
docutils = "!=0.21.post1" # https://github.com/python-poetry/poetry/issues/9248#issuecomment-2026240520
mypy-boto3-glue = { version = ">=1.28.18", optional = true }
mypy-boto3-dynamodb = { version = ">=1.28.18", optional = true }

[tool.poetry.group.docs.dependencies]
# for mkdocs
Expand Down Expand Up @@ -217,6 +218,10 @@ ignore_missing_imports = true
module = "mypy_boto3_glue.*"
ignore_missing_imports = true

[[tool.mypy.overrides]]
module = "mypy_boto3_dynamodb.*"
ignore_missing_imports = true

[[tool.mypy.overrides]]
module = "moto"
ignore_missing_imports = true
Expand Down Expand Up @@ -299,7 +304,7 @@ snappy = ["python-snappy"]
hive = ["thrift"]
hive-kerberos = ["thrift", "thrift_sasl", "kerberos"]
s3fs = ["s3fs"]
glue = ["boto3", "mypy-boto3-glue"]
glue = ["boto3"]
adlfs = ["adlfs"]
dynamodb = ["boto3"]
zstandard = ["zstandard"]
Expand Down
8 changes: 8 additions & 0 deletions tests/catalog/test_dynamodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -626,3 +626,11 @@ def test_table_exists(
assert test_catalog.table_exists(identifier) is True
# Act and Assert for an non-existing table
assert test_catalog.table_exists(("non", "exist")) is False


@mock_aws
def test_dynamodb_client_override() -> None:
catalog_name = "glue"
test_client = boto3.client("dynamodb", region_name="us-west-2")
test_catalog = DynamoDbCatalog(catalog_name, test_client)
assert test_catalog.dynamodb is test_client