diff --git a/python/setup.py b/python/setup.py index 9acba2cc71fe..5073c558a252 100644 --- a/python/setup.py +++ b/python/setup.py @@ -18,12 +18,12 @@ from setuptools import setup setup( - name='py-iceberg', + name="py-iceberg", install_requires=[], extras_require={ "dev": [ "tox-travis==0.12", "pytest", ], - } + }, ) diff --git a/python/src/iceberg/types.py b/python/src/iceberg/types.py index 2113787da4b1..57856f92529f 100644 --- a/python/src/iceberg/types.py +++ b/python/src/iceberg/types.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. + class Type(object): def __init__(self, type_string: str, repr_string: str, is_primitive=False): self._type_string = type_string @@ -34,7 +35,9 @@ def is_primitive(self) -> bool: class FixedType(Type): def __init__(self, length: int): - super().__init__(f"fixed[{length}]", f"FixedType(length={length})", is_primitive=True) + super().__init__( + f"fixed[{length}]", f"FixedType(length={length})", is_primitive=True + ) self._length = length @property @@ -44,8 +47,11 @@ def length(self) -> int: class DecimalType(Type): def __init__(self, precision: int, scale: int): - super().__init__(f"decimal({precision}, {scale})", - f"DecimalType(precision={precision}, scale={scale})", is_primitive=True) + super().__init__( + f"decimal({precision}, {scale})", + f"DecimalType(precision={precision}, scale={scale})", + is_primitive=True, + ) self._precision = precision self._scale = scale @@ -59,7 +65,14 @@ def scale(self) -> int: class NestedField(object): - def __init__(self, is_optional: bool, field_id: int, name: str, field_type: Type, doc: str = None): + def __init__( + self, + is_optional: bool, + field_id: int, + name: str, + field_type: Type, + doc: str = None, + ): self._is_optional = is_optional self._id = field_id self._name = name @@ -87,17 +100,26 @@ def type(self) -> Type: return self._type def __repr__(self): - return (f"NestedField(is_optional={self._is_optional}, field_id={self._id}, " - f"name={repr(self._name)}, field_type={repr(self._type)}, doc={repr(self._doc)})") + return ( + f"NestedField(is_optional={self._is_optional}, field_id={self._id}, " + f"name={repr(self._name)}, field_type={repr(self._type)}, doc={repr(self._doc)})" + ) def __str__(self): - return (f"{self._id}: {self._name}: {'optional' if self._is_optional else 'required'} {self._type}" - "" if self._doc is None else f" ({self._doc})") + return ( + f"{self._id}: {self._name}: {'optional' if self._is_optional else 'required'} {self._type}" + "" + if self._doc is None + else f" ({self._doc})" + ) class StructType(Type): def __init__(self, fields: list): - super().__init__(f"struct<{', '.join(map(str, fields))}>", f"StructType(fields={repr(fields)})") + super().__init__( + f"struct<{', '.join(map(str, fields))}>", + f"StructType(fields={repr(fields)})", + ) self._fields = fields @property @@ -117,8 +139,10 @@ def element(self) -> NestedField: class MapType(Type): def __init__(self, key: NestedField, value: NestedField): - super().__init__(f"map<{key.type}, {value.type}>", - f"MapType(key={repr(key)}, value={repr(value)})") + super().__init__( + f"map<{key.type}, {value.type}>", + f"MapType(key={repr(key)}, value={repr(value)})", + ) self._key_field = key self._value_field = value diff --git a/python/src/iceberg/utils/bin_packing.py b/python/src/iceberg/utils/bin_packing.py index 17d4faad260d..8e32710028e0 100644 --- a/python/src/iceberg/utils/bin_packing.py +++ b/python/src/iceberg/utils/bin_packing.py @@ -15,8 +15,11 @@ # specific language governing permissions and limitations # under the License. + class PackingIterator: - def __init__(self, items, target_weight, lookback, weight_func, largest_bin_first=False): + def __init__( + self, items, target_weight, lookback, weight_func, largest_bin_first=False + ): self.items = iter(items) self.target_weight = target_weight self.lookback = lookback diff --git a/python/tests/test_types.py b/python/tests/test_types.py index b6d23dc5550f..3b1d834e05a8 100644 --- a/python/tests/test_types.py +++ b/python/tests/test_types.py @@ -15,15 +15,47 @@ # specific language governing permissions and limitations # under the License. -from iceberg.types import (BinaryType, BooleanType, DateType, DecimalType, DoubleType, FixedType, - FloatType, IntegerType, ListType, LongType, MapType, NestedField, StringType, - StructType, TimestampType, TimestamptzType, TimeType, UUIDType) import pytest +from iceberg.types import ( + BinaryType, + BooleanType, + DateType, + DecimalType, + DoubleType, + FixedType, + FloatType, + IntegerType, + ListType, + LongType, + MapType, + NestedField, + StringType, + StructType, + TimestampType, + TimestamptzType, + TimeType, + UUIDType, +) -@pytest.mark.parametrize("input_type", - [BooleanType, IntegerType, LongType, FloatType, DoubleType, DateType, TimeType, - TimestampType, TimestamptzType, StringType, UUIDType, BinaryType]) + +@pytest.mark.parametrize( + "input_type", + [ + BooleanType, + IntegerType, + LongType, + FloatType, + DoubleType, + DateType, + TimeType, + TimestampType, + TimestamptzType, + StringType, + UUIDType, + BinaryType, + ], +) def test_repr_primitive_types(input_type): assert input_type == eval(repr(input_type)) @@ -40,25 +72,47 @@ def test_decimal_type(): type_var = DecimalType(precision=9, scale=2) assert type_var.precision == 9 assert type_var.scale == 2 - assert str(type_var) == 'decimal(9, 2)' + assert str(type_var) == "decimal(9, 2)" assert repr(type_var) == "DecimalType(precision=9, scale=2)" assert str(type_var) == str(eval(repr(type_var))) def test_struct_type(): - type_var = StructType([NestedField(True, 1, "optional_field", IntegerType), - NestedField(False, 2, "required_field", FixedType(5)), - NestedField(False, 3, "required_field", StructType([ - NestedField(True, 4, "optional_field", DecimalType(8, 2)), - NestedField(False, 5, "required_field", LongType)]))]) + type_var = StructType( + [ + NestedField(True, 1, "optional_field", IntegerType), + NestedField(False, 2, "required_field", FixedType(5)), + NestedField( + False, + 3, + "required_field", + StructType( + [ + NestedField(True, 4, "optional_field", DecimalType(8, 2)), + NestedField(False, 5, "required_field", LongType), + ] + ), + ), + ] + ) assert len(type_var.fields) == 3 assert str(type_var) == str(eval(repr(type_var))) def test_list_type(): - type_var = ListType(NestedField(False, 1, "required_field", StructType([ - NestedField(True, 2, "optional_field", DecimalType(8, 2)), - NestedField(False, 3, "required_field", LongType)]))) + type_var = ListType( + NestedField( + False, + 1, + "required_field", + StructType( + [ + NestedField(True, 2, "optional_field", DecimalType(8, 2)), + NestedField(False, 3, "required_field", LongType), + ] + ), + ) + ) assert isinstance(type_var.element.type, StructType) assert len(type_var.element.type.fields) == 2 assert type_var.element.field_id == 1 @@ -66,8 +120,10 @@ def test_list_type(): def test_map_type(): - type_var = MapType(NestedField(True, 1, "optional_field", DoubleType), - NestedField(False, 2, "required_field", UUIDType)) + type_var = MapType( + NestedField(True, 1, "optional_field", DoubleType), + NestedField(False, 2, "required_field", UUIDType), + ) assert type_var.key.type == DoubleType assert type_var.key.field_id == 1 assert type_var.value.type == UUIDType @@ -76,12 +132,30 @@ def test_map_type(): def test_nested_field(): - field_var = NestedField(True, 1, "optional_field1", StructType([ - NestedField(True, 2, "optional_field2", ListType( - NestedField(False, 3, "required_field3", DoubleType))), - NestedField(False, 4, "required_field4", MapType( - NestedField(True, 5, "optional_field5", TimeType), - NestedField(False, 6, "required_field6", UUIDType)))])) + field_var = NestedField( + True, + 1, + "optional_field1", + StructType( + [ + NestedField( + True, + 2, + "optional_field2", + ListType(NestedField(False, 3, "required_field3", DoubleType)), + ), + NestedField( + False, + 4, + "required_field4", + MapType( + NestedField(True, 5, "optional_field5", TimeType), + NestedField(False, 6, "required_field6", UUIDType), + ), + ), + ] + ), + ) assert field_var.is_optional assert not field_var.is_required assert field_var.field_id == 1 diff --git a/python/tests/utils/test_bin_packing.py b/python/tests/utils/test_bin_packing.py index 0cf44f132ac8..7a5a0749fe76 100644 --- a/python/tests/utils/test_bin_packing.py +++ b/python/tests/utils/test_bin_packing.py @@ -17,34 +17,76 @@ import random -from iceberg.utils.bin_packing import PackingIterator import pytest +from iceberg.utils.bin_packing import PackingIterator + -@pytest.mark.parametrize("splits, lookback, split_size, open_cost", [ - ([random.randint(0, 64) for x in range(200)], 20, 128, 4), # random splits - ([], 20, 128, 4), # no splits - ([0] * 100 + [random.randint(0, 64) in range(10)] + [0] * 100, 20, 128, 4) # sparse -]) +@pytest.mark.parametrize( + "splits, lookback, split_size, open_cost", + [ + ([random.randint(0, 64) for x in range(200)], 20, 128, 4), # random splits + ([], 20, 128, 4), # no splits + ( + [0] * 100 + [random.randint(0, 64) in range(10)] + [0] * 100, + 20, + 128, + 4, + ), # sparse + ], +) def test_bin_packing(splits, lookback, split_size, open_cost): - def weight_func(x): return max(x, open_cost) - item_list_sums = [sum(item) - for item in PackingIterator(splits, split_size, lookback, weight_func)] + item_list_sums = [ + sum(item) for item in PackingIterator(splits, split_size, lookback, weight_func) + ] assert all([split_size >= item_sum >= 0 for item_sum in item_list_sums]) -@pytest.mark.parametrize("splits, target_weight, lookback, largest_bin_first, expected_lists", [ - ([36, 36, 36, 36, 73, 110, 128], 128, 2, True, [[110], [128], [36, 73], [36, 36, 36]]), - ([36, 36, 36, 36, 73, 110, 128], 128, 2, False, [[36, 36, 36], [36, 73], [110], [128]]), - ([64, 64, 128, 32, 32, 32, 32], 128, 1, True, [[64, 64], [128], [32, 32, 32, 32]]), - ([64, 64, 128, 32, 32, 32, 32], 128, 1, False, [[64, 64], [128], [32, 32, 32, 32]]), -]) -def test_bin_packing_lookback(splits, target_weight, lookback, largest_bin_first, expected_lists): +@pytest.mark.parametrize( + "splits, target_weight, lookback, largest_bin_first, expected_lists", + [ + ( + [36, 36, 36, 36, 73, 110, 128], + 128, + 2, + True, + [[110], [128], [36, 73], [36, 36, 36]], + ), + ( + [36, 36, 36, 36, 73, 110, 128], + 128, + 2, + False, + [[36, 36, 36], [36, 73], [110], [128]], + ), + ( + [64, 64, 128, 32, 32, 32, 32], + 128, + 1, + True, + [[64, 64], [128], [32, 32, 32, 32]], + ), + ( + [64, 64, 128, 32, 32, 32, 32], + 128, + 1, + False, + [[64, 64], [128], [32, 32, 32, 32]], + ), + ], +) +def test_bin_packing_lookback( + splits, target_weight, lookback, largest_bin_first, expected_lists +): def weight_func(x): return x - assert [item for item in PackingIterator( - splits, target_weight, lookback, weight_func, largest_bin_first)] == expected_lists + assert [ + item + for item in PackingIterator( + splits, target_weight, lookback, weight_func, largest_bin_first + ) + ] == expected_lists diff --git a/python/tox.ini b/python/tox.ini index d1596ea119ae..b099a001e362 100644 --- a/python/tox.ini +++ b/python/tox.ini @@ -34,20 +34,52 @@ commands = coverage report -m --fail-under=90 coverage html -d test-reports/{envname}/coverage-html coverage xml -o test-reports/{envname}/coverage.xml +[testenv:format] +description = reformat all source code +basepython = python3 +deps = + black + isort + flake8 +skip_install = true +commands = + isort --project iceberg --profile black setup.py src tests + black setup.py src tests + flake8 setup.py src tests [testenv:linters] basepython = python3 skip_install = true deps = . + {[testenv:isort]deps} + {[testenv:black]deps} {[testenv:flake8]deps} {[testenv:bandit]deps} {[testenv:mypy]deps} commands = + {[testenv:isort]deps} + {[testenv:black]deps} {[testenv:flake8]commands} {[testenv:bandit]commands} {[testenv:mypy]commands} +[testenv:isort] +basepython = python3 +skip_install = true +deps = + isort +commands = + isort --recursive --project iceberg --profile black --check-only setup.py src tests + +[testenv:black] +basepython = python3 +skip_install = true +deps = + black +commands = + black --check --diff src setup.py tests + [testenv:flake8] basepython = python3 skip_install = true @@ -93,7 +125,7 @@ commands = python -m http.server {posargs} [flake8] -ignore = E501,W503 +ignore = E501,I100,I202,W503 exclude = *.egg-info, *.pyc,