diff --git a/python/src/iceberg/transforms.py b/python/src/iceberg/transforms.py index 3ebc19219bb9..8c54b214d8e9 100644 --- a/python/src/iceberg/transforms.py +++ b/python/src/iceberg/transforms.py @@ -234,7 +234,7 @@ def __init__(self, source_type: IcebergType, transform: str): self._type = source_type self._transform = transform - def apply(self, value): + def apply(self, value: Optional[S]): raise AttributeError(f"Cannot apply unsupported transform: {self}") def can_transform(self, target: IcebergType) -> bool: @@ -244,6 +244,32 @@ def result_type(self, source: IcebergType) -> IcebergType: return StringType() +class VoidTransform(Transform): + """A transform that always returns None""" + + _instance = None + + def __new__(cls): + if cls._instance is None: + cls._instance = super(VoidTransform, cls).__new__(cls) + return cls._instance + + def __init__(self): + super().__init__("void", "transforms.always_null()") + + def apply(self, value: Optional[S]) -> None: + return None + + def can_transform(self, target: IcebergType) -> bool: + return True + + def result_type(self, source: IcebergType) -> IcebergType: + return source + + def to_human_string(self, value: Optional[S]) -> str: + return "null" + + def bucket(source_type: IcebergType, num_buckets: int) -> BaseBucketTransform: if type(source_type) in {IntegerType, LongType, DateType, TimeType, TimestampType, TimestamptzType}: return BucketNumberTransform(source_type, num_buckets) @@ -259,3 +285,7 @@ def bucket(source_type: IcebergType, num_buckets: int) -> BaseBucketTransform: return BucketUUIDTransform(num_buckets) else: raise ValueError(f"Cannot bucket by type: {source_type}") + + +def always_null() -> Transform: + return VoidTransform() diff --git a/python/tests/test_transforms.py b/python/tests/test_transforms.py index e27350cf8a1a..a2271249455b 100644 --- a/python/tests/test_transforms.py +++ b/python/tests/test_transforms.py @@ -136,3 +136,17 @@ def test_unknown_transform(): assert unknown_transform.can_transform(FixedType(8)) assert not unknown_transform.can_transform(FixedType(5)) assert isinstance(unknown_transform.result_type(BooleanType()), StringType) + + +def test_void_transform(): + void_transform = transforms.always_null() + assert void_transform is transforms.always_null() + assert void_transform == eval(repr(void_transform)) + assert void_transform.apply("test") is None + assert void_transform.can_transform(BooleanType()) + assert isinstance(void_transform.result_type(BooleanType()), BooleanType) + assert not void_transform.preserves_order + assert void_transform.satisfies_order_of(transforms.always_null()) + assert not void_transform.satisfies_order_of(transforms.bucket(DateType(), 100)) + assert void_transform.to_human_string("test") == "null" + assert void_transform.dedup_name == "void"