diff --git a/aws/src/integration/java/org/apache/iceberg/aws/glue/TestGlueCatalogTable.java b/aws/src/integration/java/org/apache/iceberg/aws/glue/TestGlueCatalogTable.java index d202db4d8f3a..4dade77a3ac5 100644 --- a/aws/src/integration/java/org/apache/iceberg/aws/glue/TestGlueCatalogTable.java +++ b/aws/src/integration/java/org/apache/iceberg/aws/glue/TestGlueCatalogTable.java @@ -298,9 +298,17 @@ public void testColumnCommentsAndParameters() { String namespace = createNamespace(); String tableName = createTable(namespace); Table table = glueCatalog.loadTable(TableIdentifier.of(namespace, tableName)); - table.updateSchema().addColumn("c2", Types.StructType.of( - Types.NestedField.required(3, "z", Types.IntegerType.get())), "c2").commit(); + table.updateSchema() + .addColumn("c2", + Types.StructType.of(Types.NestedField.required(3, "z", Types.IntegerType.get())), "c2") + .addColumn("c3", Types.StringType.get()) + .addColumn("c4", Types.StringType.get()) + .commit(); table.updateSpec().addField(truncate("c1", 8)).commit(); + table.updateSchema() + .deleteColumn("c3") + .renameColumn("c4", "c5") + .commit(); GetTableResponse response = glue.getTable(GetTableRequest.builder() .databaseName(namespace).name(tableName).build()); List actualColumns = response.table().storageDescriptor().columns(); @@ -312,7 +320,8 @@ public void testColumnCommentsAndParameters() { .comment("c1") .parameters(ImmutableMap.of( IcebergToGlueConverter.ICEBERG_FIELD_ID, "1", - IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "false" + IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "false", + IcebergToGlueConverter.ICEBERG_FIELD_CURRENT, "true" )) .build(), Column.builder() @@ -321,7 +330,35 @@ public void testColumnCommentsAndParameters() { .comment("c2") .parameters(ImmutableMap.of( IcebergToGlueConverter.ICEBERG_FIELD_ID, "2", - IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "true" + IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "true", + IcebergToGlueConverter.ICEBERG_FIELD_CURRENT, "true" + )) + .build(), + Column.builder() + .name("c5") + .type("string") + .parameters(ImmutableMap.of( + IcebergToGlueConverter.ICEBERG_FIELD_ID, "5", + IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "true", + IcebergToGlueConverter.ICEBERG_FIELD_CURRENT, "true" + )) + .build(), + Column.builder() + .name("c3") + .type("string") + .parameters(ImmutableMap.of( + IcebergToGlueConverter.ICEBERG_FIELD_ID, "4", + IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "true", + IcebergToGlueConverter.ICEBERG_FIELD_CURRENT, "false" + )) + .build(), + Column.builder() + .name("c4") + .type("string") + .parameters(ImmutableMap.of( + IcebergToGlueConverter.ICEBERG_FIELD_ID, "5", + IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "true", + IcebergToGlueConverter.ICEBERG_FIELD_CURRENT, "false" )) .build() ); diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/IcebergToGlueConverter.java b/aws/src/main/java/org/apache/iceberg/aws/glue/IcebergToGlueConverter.java index 977a156d22a7..19c3a42eb975 100644 --- a/aws/src/main/java/org/apache/iceberg/aws/glue/IcebergToGlueConverter.java +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/IcebergToGlueConverter.java @@ -25,6 +25,7 @@ import java.util.Set; import java.util.regex.Pattern; import java.util.stream.Collectors; +import org.apache.iceberg.Schema; import org.apache.iceberg.TableMetadata; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; @@ -58,6 +59,7 @@ private IcebergToGlueConverter() { public static final String GLUE_DB_DESCRIPTION_KEY = "comment"; public static final String ICEBERG_FIELD_ID = "iceberg.field.id"; public static final String ICEBERG_FIELD_OPTIONAL = "iceberg.field.optional"; + public static final String ICEBERG_FIELD_CURRENT = "iceberg.field.current"; /** * A Glue database name cannot be longer than 252 characters. @@ -244,13 +246,22 @@ private static List toColumns(TableMetadata metadata) { Set addedNames = Sets.newHashSet(); for (NestedField field : metadata.schema().columns()) { - addColumnWithDedupe(columns, addedNames, field); + addColumnWithDedupe(columns, addedNames, field, true /* is current */); + } + + for (Schema schema : metadata.schemas()) { + if (schema.schemaId() != metadata.currentSchemaId()) { + for (NestedField field : schema.columns()) { + addColumnWithDedupe(columns, addedNames, field, false /* is not current */); + } + } } return columns; } - private static void addColumnWithDedupe(List columns, Set dedupe, NestedField field) { + private static void addColumnWithDedupe(List columns, Set dedupe, + NestedField field, boolean isCurrent) { if (!dedupe.contains(field.name())) { columns.add(Column.builder() .name(field.name()) @@ -258,7 +269,8 @@ private static void addColumnWithDedupe(List columns, Set dedupe .comment(field.doc()) .parameters(ImmutableMap.of( ICEBERG_FIELD_ID, Integer.toString(field.fieldId()), - ICEBERG_FIELD_OPTIONAL, Boolean.toString(field.isOptional()) + ICEBERG_FIELD_OPTIONAL, Boolean.toString(field.isOptional()), + ICEBERG_FIELD_CURRENT, Boolean.toString(isCurrent) )) .build()); dedupe.add(field.name()); diff --git a/aws/src/test/java/org/apache/iceberg/aws/glue/TestIcebergToGlueConverter.java b/aws/src/test/java/org/apache/iceberg/aws/glue/TestIcebergToGlueConverter.java index c4050435ae31..a7b025863b8b 100644 --- a/aws/src/test/java/org/apache/iceberg/aws/glue/TestIcebergToGlueConverter.java +++ b/aws/src/test/java/org/apache/iceberg/aws/glue/TestIcebergToGlueConverter.java @@ -138,7 +138,8 @@ public void testSetTableInputInformation() { .comment("comment1") .parameters(ImmutableMap.of( IcebergToGlueConverter.ICEBERG_FIELD_ID, "1", - IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "false" + IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "false", + IcebergToGlueConverter.ICEBERG_FIELD_CURRENT, "true" )) .build(), Column.builder() @@ -147,7 +148,69 @@ public void testSetTableInputInformation() { .comment("comment2") .parameters(ImmutableMap.of( IcebergToGlueConverter.ICEBERG_FIELD_ID, "2", - IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "false" + IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "false", + IcebergToGlueConverter.ICEBERG_FIELD_CURRENT, "true" + )) + .build())) + .build()) + .build(); + + Assert.assertEquals( + "Location do not match", + expectedTableInput.storageDescriptor().location(), + actualTableInput.storageDescriptor().location()); + Assert.assertEquals( + "Columns do not match", + expectedTableInput.storageDescriptor().columns(), + actualTableInput.storageDescriptor().columns()); + } + + @Test + public void testSetTableInputInformationWithRemovedColumns() { + // Actual TableInput + TableInput.Builder actualTableInputBuilder = TableInput.builder(); + Schema schema = new Schema( + Types.NestedField.required(1, "x", Types.StringType.get(), "comment1"), + Types.NestedField.required(2, "y", Types.StructType.of( + Types.NestedField.required(3, "z", Types.IntegerType.get())), "comment2") + ); + PartitionSpec partitionSpec = PartitionSpec.builderFor(schema) + .identity("x") + .withSpecId(1000) + .build(); + TableMetadata tableMetadata = TableMetadata + .newTableMetadata(schema, partitionSpec, "s3://test", ImmutableMap.of()); + + Schema newSchema = new Schema( + Types.NestedField.required(1, "x", Types.StringType.get(), "comment1") + ); + tableMetadata = tableMetadata.updateSchema(newSchema, 3); + IcebergToGlueConverter.setTableInputInformation(actualTableInputBuilder, tableMetadata); + TableInput actualTableInput = actualTableInputBuilder.build(); + + // Expected TableInput + TableInput expectedTableInput = TableInput.builder().storageDescriptor( + StorageDescriptor.builder() + .location("s3://test") + .columns(ImmutableList.of( + Column.builder() + .name("x") + .type("string") + .comment("comment1") + .parameters(ImmutableMap.of( + IcebergToGlueConverter.ICEBERG_FIELD_ID, "1", + IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "false", + IcebergToGlueConverter.ICEBERG_FIELD_CURRENT, "true" + )) + .build(), + Column.builder() + .name("y") + .type("struct") + .comment("comment2") + .parameters(ImmutableMap.of( + IcebergToGlueConverter.ICEBERG_FIELD_ID, "2", + IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "false", + IcebergToGlueConverter.ICEBERG_FIELD_CURRENT, "false" )) .build())) .build())