From 2ad6d3f27049a224571d30df3030a69ff377531e Mon Sep 17 00:00:00 2001 From: Jun He Date: Sun, 8 May 2022 23:21:11 -0700 Subject: [PATCH 1/2] add VoidTransform --- python/src/iceberg/transforms.py | 30 ++++++++++++++++++++++++++++++ python/tests/test_transforms.py | 13 +++++++++++++ 2 files changed, 43 insertions(+) diff --git a/python/src/iceberg/transforms.py b/python/src/iceberg/transforms.py index 3ebc19219bb9..f65f596f98fe 100644 --- a/python/src/iceberg/transforms.py +++ b/python/src/iceberg/transforms.py @@ -244,6 +244,32 @@ def result_type(self, source: IcebergType) -> IcebergType: return StringType() +class VoidTransform(Transform): + """A transform that always returns None""" + + _instance = None + + def __new__(cls): + if cls._instance is None: + cls._instance = super(VoidTransform, cls).__new__(cls) + return cls._instance + + def __init__(self): + super().__init__("void", "transforms.always_null()") + + def apply(self, value): + return None + + def can_transform(self, target: IcebergType) -> bool: + return True + + def result_type(self, source: IcebergType) -> IcebergType: + return source + + def to_human_string(self, value) -> str: + return "null" + + def bucket(source_type: IcebergType, num_buckets: int) -> BaseBucketTransform: if type(source_type) in {IntegerType, LongType, DateType, TimeType, TimestampType, TimestamptzType}: return BucketNumberTransform(source_type, num_buckets) @@ -259,3 +285,7 @@ def bucket(source_type: IcebergType, num_buckets: int) -> BaseBucketTransform: return BucketUUIDTransform(num_buckets) else: raise ValueError(f"Cannot bucket by type: {source_type}") + + +def always_null() -> Transform: + return VoidTransform() diff --git a/python/tests/test_transforms.py b/python/tests/test_transforms.py index e27350cf8a1a..85a0f987dc99 100644 --- a/python/tests/test_transforms.py +++ b/python/tests/test_transforms.py @@ -136,3 +136,16 @@ def test_unknown_transform(): assert unknown_transform.can_transform(FixedType(8)) assert not unknown_transform.can_transform(FixedType(5)) assert isinstance(unknown_transform.result_type(BooleanType()), StringType) + + +def test_void_transform(): + void_transform = transforms.always_null() + assert void_transform == eval(repr(void_transform)) + assert void_transform.apply("test") is None + assert void_transform.can_transform(BooleanType()) + assert isinstance(void_transform.result_type(BooleanType()), BooleanType) + assert not void_transform.preserves_order + assert void_transform.satisfies_order_of(transforms.always_null()) + assert not void_transform.satisfies_order_of(transforms.bucket(DateType(), 100)) + assert void_transform.to_human_string("test") == "null" + assert void_transform.dedup_name == "void" From 7e13846f614ef243ada0e85305526965eaa5cf61 Mon Sep 17 00:00:00 2001 From: Jun He Date: Fri, 13 May 2022 11:03:58 -0700 Subject: [PATCH 2/2] address the comments --- python/src/iceberg/transforms.py | 6 +++--- python/tests/test_transforms.py | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/python/src/iceberg/transforms.py b/python/src/iceberg/transforms.py index f65f596f98fe..8c54b214d8e9 100644 --- a/python/src/iceberg/transforms.py +++ b/python/src/iceberg/transforms.py @@ -234,7 +234,7 @@ def __init__(self, source_type: IcebergType, transform: str): self._type = source_type self._transform = transform - def apply(self, value): + def apply(self, value: Optional[S]): raise AttributeError(f"Cannot apply unsupported transform: {self}") def can_transform(self, target: IcebergType) -> bool: @@ -257,7 +257,7 @@ def __new__(cls): def __init__(self): super().__init__("void", "transforms.always_null()") - def apply(self, value): + def apply(self, value: Optional[S]) -> None: return None def can_transform(self, target: IcebergType) -> bool: @@ -266,7 +266,7 @@ def can_transform(self, target: IcebergType) -> bool: def result_type(self, source: IcebergType) -> IcebergType: return source - def to_human_string(self, value) -> str: + def to_human_string(self, value: Optional[S]) -> str: return "null" diff --git a/python/tests/test_transforms.py b/python/tests/test_transforms.py index 85a0f987dc99..a2271249455b 100644 --- a/python/tests/test_transforms.py +++ b/python/tests/test_transforms.py @@ -140,6 +140,7 @@ def test_unknown_transform(): def test_void_transform(): void_transform = transforms.always_null() + assert void_transform is transforms.always_null() assert void_transform == eval(repr(void_transform)) assert void_transform.apply("test") is None assert void_transform.can_transform(BooleanType())