apache · rdblue · Oct 31, 2021 · Oct 27, 2021 · Oct 29, 2021 · jackye1995
diff --git a/python/setup.py b/python/setup.py
@@ -18,12 +18,12 @@
 from setuptools import setup
 
 setup(
-    name='py-iceberg',
+    name="py-iceberg",
     install_requires=[],
     extras_require={
         "dev": [
             "tox-travis==0.12",
             "pytest",
         ],
-    }
+    },
 )
diff --git a/python/src/iceberg/types.py b/python/src/iceberg/types.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+
 class Type(object):
     def __init__(self, type_string: str, repr_string: str, is_primitive=False):
         self._type_string = type_string
@@ -34,7 +35,9 @@ def is_primitive(self) -> bool:
 
 class FixedType(Type):
     def __init__(self, length: int):
-        super().__init__(f"fixed[{length}]", f"FixedType(length={length})", is_primitive=True)
+        super().__init__(
+            f"fixed[{length}]", f"FixedType(length={length})", is_primitive=True
+        )
         self._length = length
 
     @property
@@ -44,8 +47,11 @@ def length(self) -> int:
 
 class DecimalType(Type):
     def __init__(self, precision: int, scale: int):
-        super().__init__(f"decimal({precision}, {scale})",
-                         f"DecimalType(precision={precision}, scale={scale})", is_primitive=True)
+        super().__init__(
+            f"decimal({precision}, {scale})",
+            f"DecimalType(precision={precision}, scale={scale})",
+            is_primitive=True,
+        )
         self._precision = precision
         self._scale = scale
 
@@ -59,7 +65,14 @@ def scale(self) -> int:
 
 
 class NestedField(object):
-    def __init__(self, is_optional: bool, field_id: int, name: str, field_type: Type, doc: str = None):
+    def __init__(
+        self,
+        is_optional: bool,
+        field_id: int,
+        name: str,
+        field_type: Type,
+        doc: str = None,
+    ):
         self._is_optional = is_optional
         self._id = field_id
         self._name = name
@@ -87,17 +100,26 @@ def type(self) -> Type:
         return self._type
 
     def __repr__(self):
-        return (f"NestedField(is_optional={self._is_optional}, field_id={self._id}, "
-                f"name={repr(self._name)}, field_type={repr(self._type)}, doc={repr(self._doc)})")
+        return (
+            f"NestedField(is_optional={self._is_optional}, field_id={self._id}, "
+            f"name={repr(self._name)}, field_type={repr(self._type)}, doc={repr(self._doc)})"
+        )
 
     def __str__(self):
-        return (f"{self._id}: {self._name}: {'optional' if self._is_optional else 'required'} {self._type}"
-                "" if self._doc is None else f" ({self._doc})")
+        return (
+            f"{self._id}: {self._name}: {'optional' if self._is_optional else 'required'} {self._type}"
+            ""
+            if self._doc is None
+            else f" ({self._doc})"
+        )
 
 
 class StructType(Type):
     def __init__(self, fields: list):
-        super().__init__(f"struct<{', '.join(map(str, fields))}>", f"StructType(fields={repr(fields)})")
+        super().__init__(
+            f"struct<{', '.join(map(str, fields))}>",
+            f"StructType(fields={repr(fields)})",
+        )
         self._fields = fields
 
     @property
@@ -117,8 +139,10 @@ def element(self) -> NestedField:
 
 class MapType(Type):
     def __init__(self, key: NestedField, value: NestedField):
-        super().__init__(f"map<{key.type}, {value.type}>",
-                         f"MapType(key={repr(key)}, value={repr(value)})")
+        super().__init__(
+            f"map<{key.type}, {value.type}>",
+            f"MapType(key={repr(key)}, value={repr(value)})",
+        )
         self._key_field = key
         self._value_field = value
 

diff --git a/python/src/iceberg/utils/bin_packing.py b/python/src/iceberg/utils/bin_packing.py
@@ -15,8 +15,11 @@
 # specific language governing permissions and limitations
 # under the License.
 
+
 class PackingIterator:
-    def __init__(self, items, target_weight, lookback, weight_func, largest_bin_first=False):
+    def __init__(
+        self, items, target_weight, lookback, weight_func, largest_bin_first=False
+    ):
         self.items = iter(items)
         self.target_weight = target_weight
         self.lookback = lookback

diff --git a/python/tests/test_types.py b/python/tests/test_types.py
@@ -15,15 +15,47 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from iceberg.types import (BinaryType, BooleanType, DateType, DecimalType, DoubleType, FixedType,
-                           FloatType, IntegerType, ListType, LongType, MapType, NestedField, StringType,
-                           StructType, TimestampType, TimestamptzType, TimeType, UUIDType)
 import pytest
 
+from iceberg.types import (
+    BinaryType,
+    BooleanType,
+    DateType,
+    DecimalType,
+    DoubleType,
+    FixedType,
+    FloatType,
+    IntegerType,
+    ListType,
+    LongType,
+    MapType,
+    NestedField,
+    StringType,
+    StructType,
+    TimestampType,
+    TimestamptzType,
+    TimeType,
+    UUIDType,
+)
 
-@pytest.mark.parametrize("input_type",
-                         [BooleanType, IntegerType, LongType, FloatType, DoubleType, DateType, TimeType,
-                          TimestampType, TimestamptzType, StringType, UUIDType, BinaryType])
+
+@pytest.mark.parametrize(
+    "input_type",
+    [
+        BooleanType,
+        IntegerType,
+        LongType,
+        FloatType,
+        DoubleType,
+        DateType,
+        TimeType,
+        TimestampType,
+        TimestamptzType,
+        StringType,
+        UUIDType,
+        BinaryType,
+    ],
+)
 def test_repr_primitive_types(input_type):
     assert input_type == eval(repr(input_type))
 
@@ -40,34 +72,58 @@ def test_decimal_type():
     type_var = DecimalType(precision=9, scale=2)
     assert type_var.precision == 9
     assert type_var.scale == 2
-    assert str(type_var) == 'decimal(9, 2)'
+    assert str(type_var) == "decimal(9, 2)"
     assert repr(type_var) == "DecimalType(precision=9, scale=2)"
     assert str(type_var) == str(eval(repr(type_var)))
 
 
 def test_struct_type():
-    type_var = StructType([NestedField(True, 1, "optional_field", IntegerType),
-                           NestedField(False, 2, "required_field", FixedType(5)),
-                           NestedField(False, 3, "required_field", StructType([
-                               NestedField(True, 4, "optional_field", DecimalType(8, 2)),
-                               NestedField(False, 5, "required_field", LongType)]))])
+    type_var = StructType(
+        [
+            NestedField(True, 1, "optional_field", IntegerType),
+            NestedField(False, 2, "required_field", FixedType(5)),
+            NestedField(
+                False,
+                3,
+                "required_field",
+                StructType(
+                    [
+                        NestedField(True, 4, "optional_field", DecimalType(8, 2)),
+                        NestedField(False, 5, "required_field", LongType),
+                    ]
+                ),
+            ),
+        ]
+    )
     assert len(type_var.fields) == 3
     assert str(type_var) == str(eval(repr(type_var)))
 
 
 def test_list_type():
-    type_var = ListType(NestedField(False, 1, "required_field", StructType([
-        NestedField(True, 2, "optional_field", DecimalType(8, 2)),
-        NestedField(False, 3, "required_field", LongType)])))
+    type_var = ListType(
+        NestedField(
+            False,
+            1,
+            "required_field",
+            StructType(
+                [
+                    NestedField(True, 2, "optional_field", DecimalType(8, 2)),
+                    NestedField(False, 3, "required_field", LongType),
+                ]
+            ),
+        )
+    )
     assert isinstance(type_var.element.type, StructType)
     assert len(type_var.element.type.fields) == 2
     assert type_var.element.field_id == 1
     assert str(type_var) == str(eval(repr(type_var)))
 
 
 def test_map_type():
-    type_var = MapType(NestedField(True, 1, "optional_field", DoubleType),
-                       NestedField(False, 2, "required_field", UUIDType))
+    type_var = MapType(
+        NestedField(True, 1, "optional_field", DoubleType),
+        NestedField(False, 2, "required_field", UUIDType),
+    )
     assert type_var.key.type == DoubleType
     assert type_var.key.field_id == 1
     assert type_var.value.type == UUIDType
@@ -76,12 +132,30 @@ def test_map_type():
 
 
 def test_nested_field():
-    field_var = NestedField(True, 1, "optional_field1", StructType([
-        NestedField(True, 2, "optional_field2", ListType(
-            NestedField(False, 3, "required_field3", DoubleType))),
-        NestedField(False, 4, "required_field4", MapType(
-            NestedField(True, 5, "optional_field5", TimeType),
-            NestedField(False, 6, "required_field6", UUIDType)))]))
+    field_var = NestedField(
+        True,
+        1,
+        "optional_field1",
+        StructType(
+            [
+                NestedField(
+                    True,
+                    2,
+                    "optional_field2",
+                    ListType(NestedField(False, 3, "required_field3", DoubleType)),
+                ),
+                NestedField(
+                    False,
+                    4,
+                    "required_field4",
+                    MapType(
+                        NestedField(True, 5, "optional_field5", TimeType),
+                        NestedField(False, 6, "required_field6", UUIDType),
+                    ),
+                ),
+            ]
+        ),
+    )
     assert field_var.is_optional
     assert not field_var.is_required
     assert field_var.field_id == 1

diff --git a/python/tests/utils/test_bin_packing.py b/python/tests/utils/test_bin_packing.py
@@ -17,34 +17,76 @@
 
 import random
 
-from iceberg.utils.bin_packing import PackingIterator
 import pytest
 
+from iceberg.utils.bin_packing import PackingIterator
+
 
-@pytest.mark.parametrize("splits, lookback, split_size, open_cost", [
-    ([random.randint(0, 64) for x in range(200)], 20, 128, 4),  # random splits
-    ([], 20, 128, 4),  # no splits
-    ([0] * 100 + [random.randint(0, 64) in range(10)] + [0] * 100, 20, 128, 4)  # sparse
-])
+@pytest.mark.parametrize(
+    "splits, lookback, split_size, open_cost",
+    [
+        ([random.randint(0, 64) for x in range(200)], 20, 128, 4),  # random splits
+        ([], 20, 128, 4),  # no splits
+        (
+            [0] * 100 + [random.randint(0, 64) in range(10)] + [0] * 100,
+            20,
+            128,
+            4,
+        ),  # sparse
+    ],
+)
 def test_bin_packing(splits, lookback, split_size, open_cost):
-
     def weight_func(x):
         return max(x, open_cost)
 
-    item_list_sums = [sum(item)
-                      for item in PackingIterator(splits, split_size, lookback, weight_func)]
+    item_list_sums = [
+        sum(item) for item in PackingIterator(splits, split_size, lookback, weight_func)
+    ]
     assert all([split_size >= item_sum >= 0 for item_sum in item_list_sums])
 
 
-@pytest.mark.parametrize("splits, target_weight, lookback, largest_bin_first, expected_lists", [
-    ([36, 36, 36, 36, 73, 110, 128], 128, 2, True, [[110], [128], [36, 73], [36, 36, 36]]),
-    ([36, 36, 36, 36, 73, 110, 128], 128, 2, False, [[36, 36, 36], [36, 73], [110], [128]]),
-    ([64, 64, 128, 32, 32, 32, 32], 128, 1, True, [[64, 64], [128], [32, 32, 32, 32]]),
-    ([64, 64, 128, 32, 32, 32, 32], 128, 1, False, [[64, 64], [128], [32, 32, 32, 32]]),
-])
-def test_bin_packing_lookback(splits, target_weight, lookback, largest_bin_first, expected_lists):
+@pytest.mark.parametrize(
+    "splits, target_weight, lookback, largest_bin_first, expected_lists",
+    [
+        (
+            [36, 36, 36, 36, 73, 110, 128],
+            128,
+            2,
+            True,
+            [[110], [128], [36, 73], [36, 36, 36]],
+        ),
+        (
+            [36, 36, 36, 36, 73, 110, 128],
+            128,
+            2,
+            False,
+            [[36, 36, 36], [36, 73], [110], [128]],
+        ),
+        (
+            [64, 64, 128, 32, 32, 32, 32],
+            128,
+            1,
+            True,
+            [[64, 64], [128], [32, 32, 32, 32]],
+        ),
+        (
+            [64, 64, 128, 32, 32, 32, 32],
+            128,
+            1,
+            False,
+            [[64, 64], [128], [32, 32, 32, 32]],
+        ),
+    ],
+)
+def test_bin_packing_lookback(
+    splits, target_weight, lookback, largest_bin_first, expected_lists
+):
     def weight_func(x):
         return x
 
-    assert [item for item in PackingIterator(
-        splits, target_weight, lookback, weight_func, largest_bin_first)] == expected_lists
+    assert [
+        item
+        for item in PackingIterator(
+            splits, target_weight, lookback, weight_func, largest_bin_first
+        )
+    ] == expected_lists