Skip to content
2,773 changes: 1,553 additions & 1,220 deletions poetry.lock

Large diffs are not rendered by default.

36 changes: 23 additions & 13 deletions pyiceberg/catalog/dynamodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
)

import boto3
from mypy_boto3_dynamodb.client import DynamoDBClient

from pyiceberg.catalog import (
BOTOCORE_SESSION,
Expand Down Expand Up @@ -94,20 +95,29 @@


class DynamoDbCatalog(MetastoreCatalog):
def __init__(self, name: str, **properties: str):
super().__init__(name, **properties)
def __init__(self, name: str, client: Optional[DynamoDBClient] = None, **properties: str):
"""Dynamodb catalog.
session = boto3.Session(
profile_name=properties.get(DYNAMODB_PROFILE_NAME),
region_name=get_first_property_value(properties, DYNAMODB_REGION, AWS_REGION),
botocore_session=properties.get(BOTOCORE_SESSION),
aws_access_key_id=get_first_property_value(properties, DYNAMODB_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID),
aws_secret_access_key=get_first_property_value(properties, DYNAMODB_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY),
aws_session_token=get_first_property_value(properties, DYNAMODB_SESSION_TOKEN, AWS_SESSION_TOKEN),
)
self.dynamodb = session.client(DYNAMODB_CLIENT)
self.dynamodb_table_name = self.properties.get(DYNAMODB_TABLE_NAME, DYNAMODB_TABLE_NAME_DEFAULT)
self._ensure_catalog_table_exists_or_create()
Args:
name: Name to identify the catalog.
client: An optional boto3 dynamodb client.
properties: Properties for dynamodb client construction and configuration.
"""
super().__init__(name, **properties)
if client is not None:
self.dynamodb = client
else:
session = boto3.Session(
profile_name=properties.get(DYNAMODB_PROFILE_NAME),
region_name=get_first_property_value(properties, DYNAMODB_REGION, AWS_REGION),
botocore_session=properties.get(BOTOCORE_SESSION),
aws_access_key_id=get_first_property_value(properties, DYNAMODB_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID),
aws_secret_access_key=get_first_property_value(properties, DYNAMODB_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY),
aws_session_token=get_first_property_value(properties, DYNAMODB_SESSION_TOKEN, AWS_SESSION_TOKEN),
)
self.dynamodb = session.client(DYNAMODB_CLIENT)
self.dynamodb_table_name = self.properties.get(DYNAMODB_TABLE_NAME, DYNAMODB_TABLE_NAME_DEFAULT)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we also want to assign this one when passing in an existing client. Adding the methods to the class for completeness may also be a good thing to do as part of this PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think they do exist (in the MetastoreCatalog) and were just never updated into this class. I will take a look

Copy link
Contributor Author

@jayceslesar jayceslesar Jun 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After a second look I dont know if we need to make a change _ensure_catalog_table_exists_or_create makes the dynamo table which is needed to make + interact with iceberg tables

self._ensure_catalog_table_exists_or_create()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to execute this when passing in an existing client?


def _ensure_catalog_table_exists_or_create(self) -> None:
if self._dynamodb_table_exists():
Expand Down
2 changes: 1 addition & 1 deletion pyiceberg/catalog/glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ def __init__(self, name: str, client: Optional[GlueClient] = None, **properties:
"""
super().__init__(name, **properties)

if client:
if client is not None:
self.glue = client
else:
retry_mode_prop_value = get_first_property_value(properties, GLUE_RETRY_MODE)
Expand Down
16 changes: 12 additions & 4 deletions pyiceberg/catalog/rest/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

from pyiceberg import __version__
from pyiceberg.catalog import (
BOTOCORE_SESSION,
TOKEN,
URI,
WAREHOUSE_LOCATION,
Expand All @@ -53,6 +54,7 @@
TableAlreadyExistsError,
UnauthorizedError,
)
from pyiceberg.io import AWS_ACCESS_KEY_ID, AWS_REGION, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN
from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec, assign_fresh_partition_spec_ids
from pyiceberg.schema import Schema, assign_fresh_schema_ids
from pyiceberg.table import (
Expand All @@ -72,7 +74,7 @@
from pyiceberg.typedef import EMPTY_DICT, UTF8, IcebergBaseModel, Identifier, Properties
from pyiceberg.types import transform_dict_value_to_str
from pyiceberg.utils.deprecated import deprecation_message
from pyiceberg.utils.properties import get_header_properties, property_as_bool
from pyiceberg.utils.properties import get_first_property_value, get_header_properties, property_as_bool

if TYPE_CHECKING:
import pyarrow as pa
Expand Down Expand Up @@ -390,11 +392,17 @@ class SigV4Adapter(HTTPAdapter):
def __init__(self, **properties: str):
super().__init__()
self._properties = properties
self._boto_session = boto3.Session(
region_name=get_first_property_value(self._properties, AWS_REGION),
botocore_session=self._properties.get(BOTOCORE_SESSION),
aws_access_key_id=get_first_property_value(self._properties, AWS_ACCESS_KEY_ID),
aws_secret_access_key=get_first_property_value(self._properties, AWS_SECRET_ACCESS_KEY),
aws_session_toxken=get_first_property_value(self._properties, AWS_SESSION_TOKEN),
)

def add_headers(self, request: PreparedRequest, **kwargs: Any) -> None: # pylint: disable=W0613
boto_session = boto3.Session()
credentials = boto_session.get_credentials().get_frozen_credentials()
region = self._properties.get(SIGV4_REGION, boto_session.region_name)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Fokko @kevinjqliu probably worth refactoring this class out to get some proper test coverage?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey @jayceslesar yes, I think that makes sense. Let's do that in a separate PR 👍

credentials = self._boto_session.get_credentials().get_frozen_credentials()
region = self._properties.get(SIGV4_REGION, self._boto_session.region_name)
service = self._properties.get(SIGV4_SERVICE, "execute-api")

url = str(request.url).split("?")[0]
Expand Down
7 changes: 6 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ ray = [
python-snappy = { version = ">=0.6.0,<1.0.0", optional = true }
thrift = { version = ">=0.13.0,<1.0.0", optional = true }
mypy-boto3-glue = { version = ">=1.28.18", optional = true }
mypy-boto3-dynamodb = { version = ">=1.28.18", optional = true }
boto3 = { version = ">=1.24.59", optional = true }
s3fs = { version = ">=2023.1.0", optional = true }
adlfs = { version = ">=2023.1.0", optional = true }
Expand Down Expand Up @@ -217,6 +218,10 @@ ignore_missing_imports = true
module = "mypy_boto3_glue.*"
ignore_missing_imports = true

[[tool.mypy.overrides]]
module = "mypy_boto3_dynamodb.*"
ignore_missing_imports = true

[[tool.mypy.overrides]]
module = "moto"
ignore_missing_imports = true
Expand Down Expand Up @@ -301,7 +306,7 @@ hive-kerberos = ["thrift", "thrift_sasl", "kerberos"]
s3fs = ["s3fs"]
glue = ["boto3", "mypy-boto3-glue"]
adlfs = ["adlfs"]
dynamodb = ["boto3"]
dynamodb = ["boto3", "mypy-boto3-dynamodb"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this be a dev-dependency?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

probably -- the glue one isnt but I will move both over

zstandard = ["zstandard"]
sql-postgres = ["sqlalchemy", "psycopg2-binary"]
sql-sqlite = ["sqlalchemy"]
Expand Down
8 changes: 8 additions & 0 deletions tests/catalog/test_dynamodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -626,3 +626,11 @@ def test_table_exists(
assert test_catalog.table_exists(identifier) is True
# Act and Assert for an non-existing table
assert test_catalog.table_exists(("non", "exist")) is False


@mock_aws
def test_dynamodb_client_override() -> None:
catalog_name = "glue"
test_client = boto3.client("dynamodb", region_name="us-west-2")
test_catalog = DynamoDbCatalog(catalog_name, test_client)
assert test_catalog.dynamodb is test_client
Loading