From 9cb74a450cf976c5807e440eb9627df2afcb8b00 Mon Sep 17 00:00:00 2001 From: Fokko Date: Tue, 3 Dec 2024 14:13:57 +0100 Subject: [PATCH] Ignore partition fields that reference a dropped source-id --- pyiceberg/partitioning.py | 5 ++- tests/integration/test_writes/test_writes.py | 40 ++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/pyiceberg/partitioning.py b/pyiceberg/partitioning.py index 5f9178ebf9..e1a121fbd3 100644 --- a/pyiceberg/partitioning.py +++ b/pyiceberg/partitioning.py @@ -218,7 +218,10 @@ def partition_type(self, schema: Schema) -> StructType: """ nested_fields = [] for field in self.fields: - source_type = schema.find_type(field.source_id) + try: + source_type = schema.find_type(field.source_id) + except ValueError: + continue result_type = field.transform.result_type(source_type) required = schema.find_field(field.source_id).required nested_fields.append(NestedField(field.field_id, field.name, result_type, required=required)) diff --git a/tests/integration/test_writes/test_writes.py b/tests/integration/test_writes/test_writes.py index 78ffc79c50..37d4fabada 100644 --- a/tests/integration/test_writes/test_writes.py +++ b/tests/integration/test_writes/test_writes.py @@ -1570,3 +1570,43 @@ def test_abort_table_transaction_on_exception( # Validate the transaction is aborted and no partial update is applied assert len(tbl.scan().to_pandas()) == table_size # type: ignore + + +@pytest.mark.integration +@pytest.mark.parametrize("format_version", [1, 2]) +def test_drop_field_from_partition_spec(session_catalog: Catalog, format_version: int) -> None: + identifier = f"default.drop_partition_field{format_version}" + + try: + session_catalog.drop_table(identifier=identifier) + except NoSuchTableError: + pass + + pa_table_with_column = pa.Table.from_pydict( + { + "foo": ["a", None, "z"], + "bar": [19, None, 25], + }, + schema=pa.schema([ + pa.field("foo", pa.string(), nullable=True), + pa.field("bar", pa.int32(), nullable=True), + ]), + ) + + tbl = session_catalog.create_table( + identifier=identifier, schema=pa_table_with_column.schema, properties={"format-version": str(format_version)} + ) + + with tbl.update_spec() as spec: + spec.add_field("foo", IdentityTransform(), "foo_identity") + + with tbl.transaction() as txn: + txn.append(pa_table_with_column) + + with tbl.update_spec() as spec: + spec.remove_field("foo_identity") + + with tbl.update_schema() as schema: + schema.delete_column("foo") + + assert tbl.scan().to_arrow()["bar"].to_pylist() == [19, 25, None]