1515# specific language governing permissions and limitations
1616# under the License.
1717
18- import math
1918import struct
19+ from abc import ABC
2020from decimal import Decimal
21- from typing import Optional
21+ from typing import Generic , Optional , TypeVar
2222from uuid import UUID
2323
2424import mmh3 # type: ignore
3737 TimeType ,
3838 UUIDType ,
3939)
40+ from iceberg .utils .decimal import decimal_to_bytes
4041
42+ S = TypeVar ("S" )
43+ T = TypeVar ("T" )
4144
42- class Transform :
45+
46+ class Transform (ABC , Generic [S , T ]):
4347 """Transform base class for concrete transforms.
4448
4549 A base class to transform values and project predicates on partition values.
@@ -60,18 +64,19 @@ def __repr__(self):
6064 def __str__ (self ):
6165 return self ._transform_string
6266
63- def __call__ (self , value ) :
67+ def __call__ (self , value : S ) -> Optional [ T ] :
6468 return self .apply (value )
6569
66- def apply (self , value ) :
67- raise NotImplementedError ()
70+ def apply (self , value : S ) -> Optional [ T ] :
71+ ...
6872
6973 def can_transform (self , source : IcebergType ) -> bool :
7074 return False
7175
7276 def result_type (self , source : IcebergType ) -> IcebergType :
73- raise NotImplementedError ()
77+ ...
7478
79+ @property
7580 def preserves_order (self ) -> bool :
7681 return False
7782
@@ -83,11 +88,12 @@ def to_human_string(self, value) -> str:
8388 return "null"
8489 return str (value )
8590
91+ @property
8692 def dedup_name (self ) -> str :
8793 return self ._transform_string
8894
8995
90- class BaseBucketTransform (Transform ):
96+ class BaseBucketTransform (Transform [ S , int ] ):
9197 """Base Transform class to transform a value into a bucket partition value
9298
9399 Transforms are parameterized by a number of buckets. Bucket partition transforms use a 32-bit
@@ -110,18 +116,15 @@ def __init__(self, source_type: IcebergType, num_buckets: int):
110116 def num_buckets (self ) -> int :
111117 return self ._num_buckets
112118
113- def hash (self , value ) -> int :
119+ def hash (self , value : S ) -> Optional [ int ] :
114120 raise NotImplementedError ()
115121
116- def apply (self , value ) -> Optional [int ]:
122+ def apply (self , value : S ) -> Optional [int ]:
117123 if value is None :
118124 return None
119125
120126 return (self .hash (value ) & IntegerType .max ) % self ._num_buckets
121127
122- def can_transform (self , source : IcebergType ) -> bool :
123- raise NotImplementedError ()
124-
125128 def result_type (self , source : IcebergType ) -> IcebergType :
126129 return IntegerType ()
127130
@@ -156,11 +159,7 @@ def can_transform(self, source: IcebergType) -> bool:
156159 return isinstance (source , DecimalType )
157160
158161 def hash (self , value : Decimal ) -> int :
159- value_tuple = value .as_tuple ()
160- unscaled_value = int (("-" if value_tuple .sign else "" ) + "" .join ([str (d ) for d in value_tuple .digits ]))
161- number_of_bytes = int (math .ceil (unscaled_value .bit_length () / 8 ))
162- value_in_bytes = unscaled_value .to_bytes (length = number_of_bytes , byteorder = "big" )
163- return mmh3 .hash (value_in_bytes )
162+ return mmh3 .hash (decimal_to_bytes (value ))
164163
165164
166165class BucketStringTransform (BaseBucketTransform ):
0 commit comments