Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions python/src/iceberg/expressions/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,15 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from abc import ABC, abstractmethod
from abc import ABCMeta, abstractmethod
from enum import Enum, auto
from functools import reduce, singledispatch
from typing import Any, Generic, TypeVar

from iceberg.files import StructProtocol
from iceberg.schema import Accessor, Schema
from iceberg.types import NestedField, Singleton
from iceberg.types import NestedField
from iceberg.utils.singleton import Singleton

T = TypeVar("T")

Expand Down Expand Up @@ -88,7 +89,7 @@ def negate(self) -> "Operation":
}


class Literal(Generic[T], ABC):
class Literal(Generic[T], metaclass=ABCMeta):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we want the reader to know that this is changing a metaclass and not just inheriting from ABC? It seems like bringing a metaclass into the code is more clear about what's happening, but introduces a distinction that people (like me) may not understand or need.

"""Literal which has a value and can be converted between types"""

def __init__(self, value: T, value_type: type):
Expand Down Expand Up @@ -129,7 +130,7 @@ def __ge__(self, other):
return self.value >= other.value


class BooleanExpression(ABC):
class BooleanExpression(metaclass=ABCMeta):
"""base class for all boolean expressions"""

@abstractmethod
Expand Down Expand Up @@ -241,7 +242,7 @@ def __str__(self) -> str:
return f"(not {self.child})"


class AlwaysTrue(BooleanExpression, Singleton):
class AlwaysTrue(BooleanExpression, metaclass=Singleton):
"""TRUE expression"""

def __invert__(self) -> "AlwaysFalse":
Expand All @@ -254,7 +255,7 @@ def __str__(self) -> str:
return "true"


class AlwaysFalse(BooleanExpression, Singleton):
class AlwaysFalse(BooleanExpression, metaclass=Singleton):
"""FALSE expression"""

def __invert__(self) -> "AlwaysTrue":
Expand Down Expand Up @@ -348,7 +349,7 @@ def bind(self, schema: Schema, case_sensitive: bool) -> BoundReference:
return BoundReference(field=field, accessor=schema.accessor_for_field(field.field_id))


class BooleanExpressionVisitor(Generic[T], ABC):
class BooleanExpressionVisitor(Generic[T], metaclass=ABCMeta):
@abstractmethod
def visit_true(self) -> T:
"""Visit method for an AlwaysTrue boolean expression
Expand Down
6 changes: 3 additions & 3 deletions python/src/iceberg/expressions/literals.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
FloatType,
IntegerType,
LongType,
Singleton,
StringType,
TimestampType,
TimestamptzType,
Expand All @@ -51,6 +50,7 @@
timestamp_to_micros,
timestamptz_to_micros,
)
from iceberg.utils.singleton import Singleton


@singledispatch
Expand Down Expand Up @@ -112,7 +112,7 @@ def _(value: Decimal) -> Literal[Decimal]:
return DecimalLiteral(value)


class AboveMax(Singleton):
class AboveMax(metaclass=Singleton):
@property
def value(self):
raise ValueError("AboveMax has no value")
Expand All @@ -127,7 +127,7 @@ def __str__(self):
return "AboveMax"


class BelowMin(Singleton):
class BelowMin(metaclass=Singleton):
def __init__(self):
pass

Expand Down
104 changes: 15 additions & 89 deletions python/src/iceberg/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,25 +31,13 @@
"""
from dataclasses import dataclass, field
from functools import cached_property
from typing import (
ClassVar,
Dict,
Optional,
Tuple,
)
from typing import ClassVar, Optional, Tuple


class Singleton:
_instance = None

def __new__(cls):
if not isinstance(cls._instance, cls):
cls._instance = super().__new__(cls)
return cls._instance
from iceberg.utils.singleton import Singleton


@dataclass(frozen=True)
class IcebergType:
class IcebergType(metaclass=Singleton):
"""Base type for all Iceberg Types

Example:
Expand Down Expand Up @@ -94,12 +82,6 @@ class FixedType(PrimitiveType):

length: int = field()

_instances: ClassVar[Dict[int, "FixedType"]] = {}

def __new__(cls, length: int):
cls._instances[length] = cls._instances.get(length) or object.__new__(cls)
return cls._instances[length]

@property
def string_type(self) -> str:
return f"fixed[{self.length}]"
Expand All @@ -119,13 +101,6 @@ class DecimalType(PrimitiveType):
precision: int = field()
scale: int = field()

_instances: ClassVar[Dict[Tuple[int, int], "DecimalType"]] = {}

def __new__(cls, precision: int, scale: int):
key = (precision, scale)
cls._instances[key] = cls._instances.get(key) or object.__new__(cls)
return cls._instances[key]

@property
def string_type(self) -> str:
return f"decimal({self.precision}, {self.scale})"
Expand Down Expand Up @@ -161,20 +136,6 @@ class NestedField(IcebergType):
required: bool = field(default=True)
doc: Optional[str] = field(default=None, repr=False)

_instances: ClassVar[Dict[Tuple[bool, int, str, IcebergType, Optional[str]], "NestedField"]] = {}

def __new__(
cls,
field_id: int,
name: str,
field_type: IcebergType,
required: bool = True,
doc: Optional[str] = None,
):
key = (required, field_id, name, field_type, doc)
cls._instances[key] = cls._instances.get(key) or object.__new__(cls)
return cls._instances[key]

@property
def optional(self) -> bool:
return not self.required
Expand All @@ -200,14 +161,6 @@ class StructType(IcebergType):

fields: Tuple[NestedField] = field()

_instances: ClassVar[Dict[Tuple[NestedField, ...], "StructType"]] = {}

def __new__(cls, *fields: NestedField, **kwargs):
if not fields and "fields" in kwargs:
fields = kwargs["fields"]
cls._instances[fields] = cls._instances.get(fields) or object.__new__(cls)
return cls._instances[fields]

def __init__(self, *fields: NestedField, **kwargs): # pylint: disable=super-init-not-called
if not fields and "fields" in kwargs:
fields = kwargs["fields"]
Expand All @@ -232,18 +185,6 @@ class ListType(IcebergType):
element_required: bool = field(default=True)
element: NestedField = field(init=False, repr=False)

_instances: ClassVar[Dict[Tuple[bool, int, IcebergType], "ListType"]] = {}

def __new__(
cls,
element_id: int,
element_type: IcebergType,
element_required: bool = True,
):
key = (element_required, element_id, element_type)
cls._instances[key] = cls._instances.get(key) or object.__new__(cls)
return cls._instances[key]

def __post_init__(self):
object.__setattr__(
self,
Expand Down Expand Up @@ -278,21 +219,6 @@ class MapType(IcebergType):
key: NestedField = field(init=False, repr=False)
value: NestedField = field(init=False, repr=False)

# _type_string_def = lambda self: f"map<{self.key_type}, {self.value_type}>"
_instances: ClassVar[Dict[Tuple[int, IcebergType, int, IcebergType, bool], "MapType"]] = {}

def __new__(
cls,
key_id: int,
key_type: IcebergType,
value_id: int,
value_type: IcebergType,
value_required: bool = True,
):
impl_key = (key_id, key_type, value_id, value_type, value_required)
cls._instances[impl_key] = cls._instances.get(impl_key) or object.__new__(cls)
return cls._instances[impl_key]

def __post_init__(self):
object.__setattr__(self, "key", NestedField(name="key", field_id=self.key_id, field_type=self.key_type, required=False))
object.__setattr__(
Expand All @@ -308,7 +234,7 @@ def __post_init__(self):


@dataclass(frozen=True)
class BooleanType(PrimitiveType, Singleton):
class BooleanType(PrimitiveType):
"""A boolean data type in Iceberg can be represented using an instance of this class.

Example:
Expand All @@ -325,7 +251,7 @@ def string_type(self) -> str:


@dataclass(frozen=True)
class IntegerType(PrimitiveType, Singleton):
class IntegerType(PrimitiveType):
"""An Integer data type in Iceberg can be represented using an instance of this class. Integers in Iceberg are
32-bit signed and can be promoted to Longs.

Expand All @@ -350,7 +276,7 @@ def string_type(self) -> str:


@dataclass(frozen=True)
class LongType(PrimitiveType, Singleton):
class LongType(PrimitiveType):
"""A Long data type in Iceberg can be represented using an instance of this class. Longs in Iceberg are
64-bit signed integers.

Expand Down Expand Up @@ -379,7 +305,7 @@ def string_type(self) -> str:


@dataclass(frozen=True)
class FloatType(PrimitiveType, Singleton):
class FloatType(PrimitiveType):
"""A Float data type in Iceberg can be represented using an instance of this class. Floats in Iceberg are
32-bit IEEE 754 floating points and can be promoted to Doubles.

Expand All @@ -406,7 +332,7 @@ def string_type(self) -> str:


@dataclass(frozen=True)
class DoubleType(PrimitiveType, Singleton):
class DoubleType(PrimitiveType):
"""A Double data type in Iceberg can be represented using an instance of this class. Doubles in Iceberg are
64-bit IEEE 754 floating points.

Expand All @@ -424,7 +350,7 @@ def string_type(self) -> str:


@dataclass(frozen=True)
class DateType(PrimitiveType, Singleton):
class DateType(PrimitiveType):
"""A Date data type in Iceberg can be represented using an instance of this class. Dates in Iceberg are
calendar dates without a timezone or time.

Expand All @@ -442,7 +368,7 @@ def string_type(self) -> str:


@dataclass(frozen=True)
class TimeType(PrimitiveType, Singleton):
class TimeType(PrimitiveType):
"""A Time data type in Iceberg can be represented using an instance of this class. Times in Iceberg
have microsecond precision and are a time of day without a date or timezone.

Expand All @@ -460,7 +386,7 @@ def string_type(self) -> str:


@dataclass(frozen=True)
class TimestampType(PrimitiveType, Singleton):
class TimestampType(PrimitiveType):
"""A Timestamp data type in Iceberg can be represented using an instance of this class. Timestamps in
Iceberg have microsecond precision and include a date and a time of day without a timezone.

Expand All @@ -478,7 +404,7 @@ def string_type(self) -> str:


@dataclass(frozen=True)
class TimestamptzType(PrimitiveType, Singleton):
class TimestamptzType(PrimitiveType):
"""A Timestamptz data type in Iceberg can be represented using an instance of this class. Timestamptzs in
Iceberg are stored as UTC and include a date and a time of day with a timezone.

Expand All @@ -496,7 +422,7 @@ def string_type(self) -> str:


@dataclass(frozen=True)
class StringType(PrimitiveType, Singleton):
class StringType(PrimitiveType):
"""A String data type in Iceberg can be represented using an instance of this class. Strings in
Iceberg are arbitrary-length character sequences and are encoded with UTF-8.

Expand All @@ -514,7 +440,7 @@ def string_type(self) -> str:


@dataclass(frozen=True)
class UUIDType(PrimitiveType, Singleton):
class UUIDType(PrimitiveType):
"""A UUID data type in Iceberg can be represented using an instance of this class. UUIDs in
Iceberg are universally unique identifiers.

Expand All @@ -532,7 +458,7 @@ def string_type(self) -> str:


@dataclass(frozen=True)
class BinaryType(PrimitiveType, Singleton):
class BinaryType(PrimitiveType):
"""A Binary data type in Iceberg can be represented using an instance of this class. Binaries in
Iceberg are arbitrary-length byte arrays.

Expand Down
28 changes: 28 additions & 0 deletions python/src/iceberg/utils/singleton.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from abc import ABCMeta
from typing import ClassVar, Dict


class Singleton(ABCMeta):
_instances: ClassVar[Dict] = {}

def __call__(cls, *args, **kwargs):
key = (cls, args, tuple(sorted(kwargs.items())))
if key not in cls._instances:
cls._instances[key] = super().__call__(*args, **kwargs)
return cls._instances[key]
7 changes: 4 additions & 3 deletions python/tests/expressions/test_expressions_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
import pytest

from iceberg.expressions import base
from iceberg.types import NestedField, Singleton, StringType
from iceberg.types import NestedField, StringType
from iceberg.utils.singleton import Singleton


@pytest.mark.parametrize(
Expand Down Expand Up @@ -63,7 +64,7 @@ def test_raise_on_no_negation_for_operation(operation):
assert str(exc_info.value) == f"No negation defined for operation {operation}"


class TestExpressionA(base.BooleanExpression, Singleton):
class TestExpressionA(base.BooleanExpression, metaclass=Singleton):
def __invert__(self):
return TestExpressionB()

Expand All @@ -74,7 +75,7 @@ def __str__(self):
return "testexpra"


class TestExpressionB(base.BooleanExpression, Singleton):
class TestExpressionB(base.BooleanExpression, metaclass=Singleton):
def __invert__(self):
return TestExpressionA()

Expand Down
Loading