diff --git a/aws/src/integration/java/org/apache/iceberg/aws/glue/TestGlueCatalogTable.java b/aws/src/integration/java/org/apache/iceberg/aws/glue/TestGlueCatalogTable.java index bff07d589857..d202db4d8f3a 100644 --- a/aws/src/integration/java/org/apache/iceberg/aws/glue/TestGlueCatalogTable.java +++ b/aws/src/integration/java/org/apache/iceberg/aws/glue/TestGlueCatalogTable.java @@ -311,11 +311,8 @@ public void testColumnCommentsAndParameters() { .type("string") .comment("c1") .parameters(ImmutableMap.of( - IcebergToGlueConverter.ICEBERG_FIELD_USAGE, IcebergToGlueConverter.SCHEMA_COLUMN, IcebergToGlueConverter.ICEBERG_FIELD_ID, "1", - IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "false", - IcebergToGlueConverter.ICEBERG_FIELD_TYPE_STRING, "string", - IcebergToGlueConverter.ICEBERG_FIELD_TYPE_TYPE_ID, "STRING" + IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "false" )) .build(), Column.builder() @@ -323,37 +320,9 @@ public void testColumnCommentsAndParameters() { .type("struct") .comment("c2") .parameters(ImmutableMap.of( - IcebergToGlueConverter.ICEBERG_FIELD_USAGE, IcebergToGlueConverter.SCHEMA_COLUMN, IcebergToGlueConverter.ICEBERG_FIELD_ID, "2", - IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "true", - IcebergToGlueConverter.ICEBERG_FIELD_TYPE_STRING, "struct", - IcebergToGlueConverter.ICEBERG_FIELD_TYPE_TYPE_ID, "STRUCT" + IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "true" )) - .build(), - Column.builder() - .name("z") - .type("int") - .parameters(ImmutableMap.of( - IcebergToGlueConverter.ICEBERG_FIELD_USAGE, IcebergToGlueConverter.SCHEMA_SUBFIELD, - IcebergToGlueConverter.ICEBERG_FIELD_ID, "3", - IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "false", - IcebergToGlueConverter.ICEBERG_FIELD_TYPE_STRING, "int", - IcebergToGlueConverter.ICEBERG_FIELD_TYPE_TYPE_ID, "INTEGER" - )) - .build(), - Column.builder() - .name("c1_trunc_8") - .type("string") - .parameters(ImmutableMap.builder() - .put(IcebergToGlueConverter.ICEBERG_FIELD_USAGE, IcebergToGlueConverter.PARTITION_FIELD) - .put(IcebergToGlueConverter.ICEBERG_FIELD_TYPE_TYPE_ID, "STRING") - .put(IcebergToGlueConverter.ICEBERG_FIELD_TYPE_STRING, "string") - .put(IcebergToGlueConverter.ICEBERG_FIELD_ID, "1000") - .put(IcebergToGlueConverter.ICEBERG_PARTITION_FIELD_ID, "1000") - .put(IcebergToGlueConverter.ICEBERG_PARTITION_SOURCE_ID, "1") - .put(IcebergToGlueConverter.ICEBERG_PARTITION_TRANSFORM, "truncate[8]") - .build() - ) .build() ); Assert.assertEquals("Columns do not match", expectedColumns, actualColumns); diff --git a/aws/src/main/java/org/apache/iceberg/aws/glue/IcebergToGlueConverter.java b/aws/src/main/java/org/apache/iceberg/aws/glue/IcebergToGlueConverter.java index 6912ca78aee6..977a156d22a7 100644 --- a/aws/src/main/java/org/apache/iceberg/aws/glue/IcebergToGlueConverter.java +++ b/aws/src/main/java/org/apache/iceberg/aws/glue/IcebergToGlueConverter.java @@ -25,7 +25,6 @@ import java.util.Set; import java.util.regex.Pattern; import java.util.stream.Collectors; -import org.apache.iceberg.PartitionField; import org.apache.iceberg.TableMetadata; import org.apache.iceberg.catalog.Namespace; import org.apache.iceberg.catalog.TableIdentifier; @@ -37,7 +36,6 @@ import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.apache.iceberg.types.Type; -import org.apache.iceberg.types.TypeUtil; import org.apache.iceberg.types.Types; import org.apache.iceberg.types.Types.NestedField; import org.slf4j.Logger; @@ -58,17 +56,8 @@ private IcebergToGlueConverter() { private static final Pattern GLUE_TABLE_PATTERN = Pattern.compile("^[a-z0-9_]{1,255}$"); public static final String GLUE_DB_LOCATION_KEY = "location"; public static final String GLUE_DB_DESCRIPTION_KEY = "comment"; - public static final String ICEBERG_FIELD_USAGE = "iceberg.field.usage"; - public static final String ICEBERG_FIELD_TYPE_TYPE_ID = "iceberg.field.type.typeid"; - public static final String ICEBERG_FIELD_TYPE_STRING = "iceberg.field.type.string"; public static final String ICEBERG_FIELD_ID = "iceberg.field.id"; public static final String ICEBERG_FIELD_OPTIONAL = "iceberg.field.optional"; - public static final String ICEBERG_PARTITION_TRANSFORM = "iceberg.partition.transform"; - public static final String ICEBERG_PARTITION_FIELD_ID = "iceberg.partition.field-id"; - public static final String ICEBERG_PARTITION_SOURCE_ID = "iceberg.partition.source-id"; - public static final String SCHEMA_COLUMN = "schema-column"; - public static final String SCHEMA_SUBFIELD = "schema-subfield"; - public static final String PARTITION_FIELD = "partition-field"; /** * A Glue database name cannot be longer than 252 characters. @@ -252,59 +241,27 @@ private static String toTypeString(Type type) { private static List toColumns(TableMetadata metadata) { List columns = Lists.newArrayList(); - Set rootColumnSet = Sets.newHashSet(); - // Add schema-column fields + Set addedNames = Sets.newHashSet(); + for (NestedField field : metadata.schema().columns()) { - rootColumnSet.add(field); + addColumnWithDedupe(columns, addedNames, field); + } + + return columns; + } + + private static void addColumnWithDedupe(List columns, Set dedupe, NestedField field) { + if (!dedupe.contains(field.name())) { columns.add(Column.builder() .name(field.name()) .type(toTypeString(field.type())) .comment(field.doc()) - .parameters(convertToParameters(SCHEMA_COLUMN, field)) - .build()); - } - // Add schema-subfield - for (NestedField field : TypeUtil.indexById(metadata.schema().asStruct()).values()) { - if (!rootColumnSet.contains(field)) { - columns.add(Column.builder() - .name(field.name()) - .type(toTypeString(field.type())) - .comment(field.doc()) - .parameters(convertToParameters(SCHEMA_SUBFIELD, field)) - .build()); - } - } - // Add partition-field - for (PartitionField partitionField : metadata.spec().fields()) { - Type type = partitionField.transform() - .getResultType(metadata.schema().findField(partitionField.sourceId()).type()); - columns.add(Column.builder() - .name(partitionField.name()) - .type(toTypeString(type)) - .parameters(convertToPartitionFieldParameters(type, partitionField)) + .parameters(ImmutableMap.of( + ICEBERG_FIELD_ID, Integer.toString(field.fieldId()), + ICEBERG_FIELD_OPTIONAL, Boolean.toString(field.isOptional()) + )) .build()); + dedupe.add(field.name()); } - return columns; - } - - private static Map convertToParameters(String fieldUsage, NestedField field) { - return ImmutableMap.of(ICEBERG_FIELD_USAGE, fieldUsage, - ICEBERG_FIELD_TYPE_TYPE_ID, field.type().typeId().toString(), - ICEBERG_FIELD_TYPE_STRING, toTypeString(field.type()), - ICEBERG_FIELD_ID, Integer.toString(field.fieldId()), - ICEBERG_FIELD_OPTIONAL, Boolean.toString(field.isOptional()) - ); - } - - private static Map convertToPartitionFieldParameters(Type type, PartitionField partitionField) { - return ImmutableMap.builder() - .put(ICEBERG_FIELD_USAGE, PARTITION_FIELD) - .put(ICEBERG_FIELD_TYPE_TYPE_ID, type.typeId().toString()) - .put(ICEBERG_FIELD_TYPE_STRING, toTypeString(type)) - .put(ICEBERG_FIELD_ID, Integer.toString(partitionField.fieldId())) - .put(ICEBERG_PARTITION_TRANSFORM, partitionField.transform().toString()) - .put(ICEBERG_PARTITION_FIELD_ID, Integer.toString(partitionField.fieldId())) - .put(ICEBERG_PARTITION_SOURCE_ID, Integer.toString(partitionField.sourceId())) - .build(); } } diff --git a/aws/src/test/java/org/apache/iceberg/aws/glue/TestIcebergToGlueConverter.java b/aws/src/test/java/org/apache/iceberg/aws/glue/TestIcebergToGlueConverter.java index d6e8c8884d72..c4050435ae31 100644 --- a/aws/src/test/java/org/apache/iceberg/aws/glue/TestIcebergToGlueConverter.java +++ b/aws/src/test/java/org/apache/iceberg/aws/glue/TestIcebergToGlueConverter.java @@ -137,11 +137,8 @@ public void testSetTableInputInformation() { .type("string") .comment("comment1") .parameters(ImmutableMap.of( - IcebergToGlueConverter.ICEBERG_FIELD_USAGE, IcebergToGlueConverter.SCHEMA_COLUMN, IcebergToGlueConverter.ICEBERG_FIELD_ID, "1", - IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "false", - IcebergToGlueConverter.ICEBERG_FIELD_TYPE_STRING, "string", - IcebergToGlueConverter.ICEBERG_FIELD_TYPE_TYPE_ID, "STRING" + IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "false" )) .build(), Column.builder() @@ -149,40 +146,11 @@ public void testSetTableInputInformation() { .type("struct") .comment("comment2") .parameters(ImmutableMap.of( - IcebergToGlueConverter.ICEBERG_FIELD_USAGE, IcebergToGlueConverter.SCHEMA_COLUMN, IcebergToGlueConverter.ICEBERG_FIELD_ID, "2", - IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "false", - IcebergToGlueConverter.ICEBERG_FIELD_TYPE_STRING, "struct", - IcebergToGlueConverter.ICEBERG_FIELD_TYPE_TYPE_ID, "STRUCT" + IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "false" )) - .build(), - Column.builder() - .name("z") - .type("int") - .parameters(ImmutableMap.of( - IcebergToGlueConverter.ICEBERG_FIELD_USAGE, IcebergToGlueConverter.SCHEMA_SUBFIELD, - IcebergToGlueConverter.ICEBERG_FIELD_ID, "3", - IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "false", - IcebergToGlueConverter.ICEBERG_FIELD_TYPE_STRING, "int", - IcebergToGlueConverter.ICEBERG_FIELD_TYPE_TYPE_ID, "INTEGER" - )) - .build(), - Column.builder() - .name("x") - .type("string") - .parameters(ImmutableMap.builder() - .put(IcebergToGlueConverter.ICEBERG_FIELD_USAGE, IcebergToGlueConverter.PARTITION_FIELD) - .put(IcebergToGlueConverter.ICEBERG_FIELD_TYPE_TYPE_ID, "STRING") - .put(IcebergToGlueConverter.ICEBERG_FIELD_TYPE_STRING, "string") - .put(IcebergToGlueConverter.ICEBERG_FIELD_ID, "1000") - .put(IcebergToGlueConverter.ICEBERG_PARTITION_FIELD_ID, "1000") - .put(IcebergToGlueConverter.ICEBERG_PARTITION_SOURCE_ID, "1") - .put(IcebergToGlueConverter.ICEBERG_PARTITION_TRANSFORM, "identity") - .build() - ) - .build() - ) - ).build()) + .build())) + .build()) .build(); Assert.assertEquals(