diff --git a/python/src/iceberg/expressions/base.py b/python/src/iceberg/expressions/base.py index 63a087ec1245..06cc4d2bdbfd 100644 --- a/python/src/iceberg/expressions/base.py +++ b/python/src/iceberg/expressions/base.py @@ -14,14 +14,15 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -from abc import ABC, abstractmethod +from abc import ABCMeta, abstractmethod from enum import Enum, auto from functools import reduce, singledispatch from typing import Any, Generic, TypeVar from iceberg.files import StructProtocol from iceberg.schema import Accessor, Schema -from iceberg.types import NestedField, Singleton +from iceberg.types import NestedField +from iceberg.utils.singleton import Singleton T = TypeVar("T") @@ -88,7 +89,7 @@ def negate(self) -> "Operation": } -class Literal(Generic[T], ABC): +class Literal(Generic[T], metaclass=ABCMeta): """Literal which has a value and can be converted between types""" def __init__(self, value: T, value_type: type): @@ -129,7 +130,7 @@ def __ge__(self, other): return self.value >= other.value -class BooleanExpression(ABC): +class BooleanExpression(metaclass=ABCMeta): """base class for all boolean expressions""" @abstractmethod @@ -241,7 +242,7 @@ def __str__(self) -> str: return f"(not {self.child})" -class AlwaysTrue(BooleanExpression, Singleton): +class AlwaysTrue(BooleanExpression, metaclass=Singleton): """TRUE expression""" def __invert__(self) -> "AlwaysFalse": @@ -254,7 +255,7 @@ def __str__(self) -> str: return "true" -class AlwaysFalse(BooleanExpression, Singleton): +class AlwaysFalse(BooleanExpression, metaclass=Singleton): """FALSE expression""" def __invert__(self) -> "AlwaysTrue": @@ -348,7 +349,7 @@ def bind(self, schema: Schema, case_sensitive: bool) -> BoundReference: return BoundReference(field=field, accessor=schema.accessor_for_field(field.field_id)) -class BooleanExpressionVisitor(Generic[T], ABC): +class BooleanExpressionVisitor(Generic[T], metaclass=ABCMeta): @abstractmethod def visit_true(self) -> T: """Visit method for an AlwaysTrue boolean expression diff --git a/python/src/iceberg/expressions/literals.py b/python/src/iceberg/expressions/literals.py index a6128ee6c699..c89894d8ad00 100644 --- a/python/src/iceberg/expressions/literals.py +++ b/python/src/iceberg/expressions/literals.py @@ -37,7 +37,6 @@ FloatType, IntegerType, LongType, - Singleton, StringType, TimestampType, TimestamptzType, @@ -51,6 +50,7 @@ timestamp_to_micros, timestamptz_to_micros, ) +from iceberg.utils.singleton import Singleton @singledispatch @@ -112,7 +112,7 @@ def _(value: Decimal) -> Literal[Decimal]: return DecimalLiteral(value) -class AboveMax(Singleton): +class AboveMax(metaclass=Singleton): @property def value(self): raise ValueError("AboveMax has no value") @@ -127,7 +127,7 @@ def __str__(self): return "AboveMax" -class BelowMin(Singleton): +class BelowMin(metaclass=Singleton): def __init__(self): pass diff --git a/python/src/iceberg/types.py b/python/src/iceberg/types.py index 46828e6d408e..3139d13f8530 100644 --- a/python/src/iceberg/types.py +++ b/python/src/iceberg/types.py @@ -31,25 +31,13 @@ """ from dataclasses import dataclass, field from functools import cached_property -from typing import ( - ClassVar, - Dict, - Optional, - Tuple, -) +from typing import ClassVar, Optional, Tuple - -class Singleton: - _instance = None - - def __new__(cls): - if not isinstance(cls._instance, cls): - cls._instance = super().__new__(cls) - return cls._instance +from iceberg.utils.singleton import Singleton @dataclass(frozen=True) -class IcebergType: +class IcebergType(metaclass=Singleton): """Base type for all Iceberg Types Example: @@ -94,12 +82,6 @@ class FixedType(PrimitiveType): length: int = field() - _instances: ClassVar[Dict[int, "FixedType"]] = {} - - def __new__(cls, length: int): - cls._instances[length] = cls._instances.get(length) or object.__new__(cls) - return cls._instances[length] - @property def string_type(self) -> str: return f"fixed[{self.length}]" @@ -119,13 +101,6 @@ class DecimalType(PrimitiveType): precision: int = field() scale: int = field() - _instances: ClassVar[Dict[Tuple[int, int], "DecimalType"]] = {} - - def __new__(cls, precision: int, scale: int): - key = (precision, scale) - cls._instances[key] = cls._instances.get(key) or object.__new__(cls) - return cls._instances[key] - @property def string_type(self) -> str: return f"decimal({self.precision}, {self.scale})" @@ -161,20 +136,6 @@ class NestedField(IcebergType): required: bool = field(default=True) doc: Optional[str] = field(default=None, repr=False) - _instances: ClassVar[Dict[Tuple[bool, int, str, IcebergType, Optional[str]], "NestedField"]] = {} - - def __new__( - cls, - field_id: int, - name: str, - field_type: IcebergType, - required: bool = True, - doc: Optional[str] = None, - ): - key = (required, field_id, name, field_type, doc) - cls._instances[key] = cls._instances.get(key) or object.__new__(cls) - return cls._instances[key] - @property def optional(self) -> bool: return not self.required @@ -200,14 +161,6 @@ class StructType(IcebergType): fields: Tuple[NestedField] = field() - _instances: ClassVar[Dict[Tuple[NestedField, ...], "StructType"]] = {} - - def __new__(cls, *fields: NestedField, **kwargs): - if not fields and "fields" in kwargs: - fields = kwargs["fields"] - cls._instances[fields] = cls._instances.get(fields) or object.__new__(cls) - return cls._instances[fields] - def __init__(self, *fields: NestedField, **kwargs): # pylint: disable=super-init-not-called if not fields and "fields" in kwargs: fields = kwargs["fields"] @@ -232,18 +185,6 @@ class ListType(IcebergType): element_required: bool = field(default=True) element: NestedField = field(init=False, repr=False) - _instances: ClassVar[Dict[Tuple[bool, int, IcebergType], "ListType"]] = {} - - def __new__( - cls, - element_id: int, - element_type: IcebergType, - element_required: bool = True, - ): - key = (element_required, element_id, element_type) - cls._instances[key] = cls._instances.get(key) or object.__new__(cls) - return cls._instances[key] - def __post_init__(self): object.__setattr__( self, @@ -278,21 +219,6 @@ class MapType(IcebergType): key: NestedField = field(init=False, repr=False) value: NestedField = field(init=False, repr=False) - # _type_string_def = lambda self: f"map<{self.key_type}, {self.value_type}>" - _instances: ClassVar[Dict[Tuple[int, IcebergType, int, IcebergType, bool], "MapType"]] = {} - - def __new__( - cls, - key_id: int, - key_type: IcebergType, - value_id: int, - value_type: IcebergType, - value_required: bool = True, - ): - impl_key = (key_id, key_type, value_id, value_type, value_required) - cls._instances[impl_key] = cls._instances.get(impl_key) or object.__new__(cls) - return cls._instances[impl_key] - def __post_init__(self): object.__setattr__(self, "key", NestedField(name="key", field_id=self.key_id, field_type=self.key_type, required=False)) object.__setattr__( @@ -308,7 +234,7 @@ def __post_init__(self): @dataclass(frozen=True) -class BooleanType(PrimitiveType, Singleton): +class BooleanType(PrimitiveType): """A boolean data type in Iceberg can be represented using an instance of this class. Example: @@ -325,7 +251,7 @@ def string_type(self) -> str: @dataclass(frozen=True) -class IntegerType(PrimitiveType, Singleton): +class IntegerType(PrimitiveType): """An Integer data type in Iceberg can be represented using an instance of this class. Integers in Iceberg are 32-bit signed and can be promoted to Longs. @@ -350,7 +276,7 @@ def string_type(self) -> str: @dataclass(frozen=True) -class LongType(PrimitiveType, Singleton): +class LongType(PrimitiveType): """A Long data type in Iceberg can be represented using an instance of this class. Longs in Iceberg are 64-bit signed integers. @@ -379,7 +305,7 @@ def string_type(self) -> str: @dataclass(frozen=True) -class FloatType(PrimitiveType, Singleton): +class FloatType(PrimitiveType): """A Float data type in Iceberg can be represented using an instance of this class. Floats in Iceberg are 32-bit IEEE 754 floating points and can be promoted to Doubles. @@ -406,7 +332,7 @@ def string_type(self) -> str: @dataclass(frozen=True) -class DoubleType(PrimitiveType, Singleton): +class DoubleType(PrimitiveType): """A Double data type in Iceberg can be represented using an instance of this class. Doubles in Iceberg are 64-bit IEEE 754 floating points. @@ -424,7 +350,7 @@ def string_type(self) -> str: @dataclass(frozen=True) -class DateType(PrimitiveType, Singleton): +class DateType(PrimitiveType): """A Date data type in Iceberg can be represented using an instance of this class. Dates in Iceberg are calendar dates without a timezone or time. @@ -442,7 +368,7 @@ def string_type(self) -> str: @dataclass(frozen=True) -class TimeType(PrimitiveType, Singleton): +class TimeType(PrimitiveType): """A Time data type in Iceberg can be represented using an instance of this class. Times in Iceberg have microsecond precision and are a time of day without a date or timezone. @@ -460,7 +386,7 @@ def string_type(self) -> str: @dataclass(frozen=True) -class TimestampType(PrimitiveType, Singleton): +class TimestampType(PrimitiveType): """A Timestamp data type in Iceberg can be represented using an instance of this class. Timestamps in Iceberg have microsecond precision and include a date and a time of day without a timezone. @@ -478,7 +404,7 @@ def string_type(self) -> str: @dataclass(frozen=True) -class TimestamptzType(PrimitiveType, Singleton): +class TimestamptzType(PrimitiveType): """A Timestamptz data type in Iceberg can be represented using an instance of this class. Timestamptzs in Iceberg are stored as UTC and include a date and a time of day with a timezone. @@ -496,7 +422,7 @@ def string_type(self) -> str: @dataclass(frozen=True) -class StringType(PrimitiveType, Singleton): +class StringType(PrimitiveType): """A String data type in Iceberg can be represented using an instance of this class. Strings in Iceberg are arbitrary-length character sequences and are encoded with UTF-8. @@ -514,7 +440,7 @@ def string_type(self) -> str: @dataclass(frozen=True) -class UUIDType(PrimitiveType, Singleton): +class UUIDType(PrimitiveType): """A UUID data type in Iceberg can be represented using an instance of this class. UUIDs in Iceberg are universally unique identifiers. @@ -532,7 +458,7 @@ def string_type(self) -> str: @dataclass(frozen=True) -class BinaryType(PrimitiveType, Singleton): +class BinaryType(PrimitiveType): """A Binary data type in Iceberg can be represented using an instance of this class. Binaries in Iceberg are arbitrary-length byte arrays. diff --git a/python/src/iceberg/utils/singleton.py b/python/src/iceberg/utils/singleton.py new file mode 100644 index 000000000000..f6c6912fab6e --- /dev/null +++ b/python/src/iceberg/utils/singleton.py @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from abc import ABCMeta +from typing import ClassVar, Dict + + +class Singleton(ABCMeta): + _instances: ClassVar[Dict] = {} + + def __call__(cls, *args, **kwargs): + key = (cls, args, tuple(sorted(kwargs.items()))) + if key not in cls._instances: + cls._instances[key] = super().__call__(*args, **kwargs) + return cls._instances[key] diff --git a/python/tests/expressions/test_expressions_base.py b/python/tests/expressions/test_expressions_base.py index 7fe3e9817e1a..3d9177bbfe11 100644 --- a/python/tests/expressions/test_expressions_base.py +++ b/python/tests/expressions/test_expressions_base.py @@ -22,7 +22,8 @@ import pytest from iceberg.expressions import base -from iceberg.types import NestedField, Singleton, StringType +from iceberg.types import NestedField, StringType +from iceberg.utils.singleton import Singleton @pytest.mark.parametrize( @@ -63,7 +64,7 @@ def test_raise_on_no_negation_for_operation(operation): assert str(exc_info.value) == f"No negation defined for operation {operation}" -class TestExpressionA(base.BooleanExpression, Singleton): +class TestExpressionA(base.BooleanExpression, metaclass=Singleton): def __invert__(self): return TestExpressionB() @@ -74,7 +75,7 @@ def __str__(self): return "testexpra" -class TestExpressionB(base.BooleanExpression, Singleton): +class TestExpressionB(base.BooleanExpression, metaclass=Singleton): def __invert__(self): return TestExpressionA() diff --git a/python/tests/test_types.py b/python/tests/test_types.py index 26f6f421a7fc..3d75b88a58d6 100644 --- a/python/tests/test_types.py +++ b/python/tests/test_types.py @@ -204,3 +204,10 @@ def test_non_parameterized_type_equality(input_index, input_type, check_index, c assert input_type() == check_type() else: assert input_type() != check_type() + + +def test_types_singleton(): + """The types are immutable so we can return the same instance multiple times""" + assert id(BooleanType()) == id(BooleanType()) + assert id(FixedType(22)) == id(FixedType(22)) + assert id(FixedType(19)) != id(FixedType(25))