Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -311,49 +311,18 @@ public void testColumnCommentsAndParameters() {
.type("string")
.comment("c1")
.parameters(ImmutableMap.of(
IcebergToGlueConverter.ICEBERG_FIELD_USAGE, IcebergToGlueConverter.SCHEMA_COLUMN,
IcebergToGlueConverter.ICEBERG_FIELD_ID, "1",
IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "false",
IcebergToGlueConverter.ICEBERG_FIELD_TYPE_STRING, "string",
IcebergToGlueConverter.ICEBERG_FIELD_TYPE_TYPE_ID, "STRING"
IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "false"
))
.build(),
Column.builder()
.name("c2")
.type("struct<z:int>")
.comment("c2")
.parameters(ImmutableMap.of(
IcebergToGlueConverter.ICEBERG_FIELD_USAGE, IcebergToGlueConverter.SCHEMA_COLUMN,
IcebergToGlueConverter.ICEBERG_FIELD_ID, "2",
IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "true",
IcebergToGlueConverter.ICEBERG_FIELD_TYPE_STRING, "struct<z:int>",
IcebergToGlueConverter.ICEBERG_FIELD_TYPE_TYPE_ID, "STRUCT"
IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "true"
))
.build(),
Column.builder()
.name("z")
.type("int")
.parameters(ImmutableMap.of(
IcebergToGlueConverter.ICEBERG_FIELD_USAGE, IcebergToGlueConverter.SCHEMA_SUBFIELD,
IcebergToGlueConverter.ICEBERG_FIELD_ID, "3",
IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "false",
IcebergToGlueConverter.ICEBERG_FIELD_TYPE_STRING, "int",
IcebergToGlueConverter.ICEBERG_FIELD_TYPE_TYPE_ID, "INTEGER"
))
.build(),
Column.builder()
.name("c1_trunc_8")
.type("string")
.parameters(ImmutableMap.<String, String>builder()
.put(IcebergToGlueConverter.ICEBERG_FIELD_USAGE, IcebergToGlueConverter.PARTITION_FIELD)
.put(IcebergToGlueConverter.ICEBERG_FIELD_TYPE_TYPE_ID, "STRING")
.put(IcebergToGlueConverter.ICEBERG_FIELD_TYPE_STRING, "string")
.put(IcebergToGlueConverter.ICEBERG_FIELD_ID, "1000")
.put(IcebergToGlueConverter.ICEBERG_PARTITION_FIELD_ID, "1000")
.put(IcebergToGlueConverter.ICEBERG_PARTITION_SOURCE_ID, "1")
.put(IcebergToGlueConverter.ICEBERG_PARTITION_TRANSFORM, "truncate[8]")
.build()
)
.build()
);
Assert.assertEquals("Columns do not match", expectedColumns, actualColumns);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import java.util.Set;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.iceberg.PartitionField;
import org.apache.iceberg.TableMetadata;
import org.apache.iceberg.catalog.Namespace;
import org.apache.iceberg.catalog.TableIdentifier;
Expand All @@ -37,7 +36,6 @@
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.TypeUtil;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.types.Types.NestedField;
import org.slf4j.Logger;
Expand All @@ -58,17 +56,8 @@ private IcebergToGlueConverter() {
private static final Pattern GLUE_TABLE_PATTERN = Pattern.compile("^[a-z0-9_]{1,255}$");
public static final String GLUE_DB_LOCATION_KEY = "location";
public static final String GLUE_DB_DESCRIPTION_KEY = "comment";
public static final String ICEBERG_FIELD_USAGE = "iceberg.field.usage";
public static final String ICEBERG_FIELD_TYPE_TYPE_ID = "iceberg.field.type.typeid";
public static final String ICEBERG_FIELD_TYPE_STRING = "iceberg.field.type.string";
public static final String ICEBERG_FIELD_ID = "iceberg.field.id";
public static final String ICEBERG_FIELD_OPTIONAL = "iceberg.field.optional";
public static final String ICEBERG_PARTITION_TRANSFORM = "iceberg.partition.transform";
public static final String ICEBERG_PARTITION_FIELD_ID = "iceberg.partition.field-id";
public static final String ICEBERG_PARTITION_SOURCE_ID = "iceberg.partition.source-id";
public static final String SCHEMA_COLUMN = "schema-column";
public static final String SCHEMA_SUBFIELD = "schema-subfield";
public static final String PARTITION_FIELD = "partition-field";

/**
* A Glue database name cannot be longer than 252 characters.
Expand Down Expand Up @@ -252,59 +241,27 @@ private static String toTypeString(Type type) {

private static List<Column> toColumns(TableMetadata metadata) {
List<Column> columns = Lists.newArrayList();
Set<NestedField> rootColumnSet = Sets.newHashSet();
// Add schema-column fields
Set<String> addedNames = Sets.newHashSet();

for (NestedField field : metadata.schema().columns()) {
rootColumnSet.add(field);
addColumnWithDedupe(columns, addedNames, field);
}

return columns;
}

private static void addColumnWithDedupe(List<Column> columns, Set<String> dedupe, NestedField field) {
if (!dedupe.contains(field.name())) {
columns.add(Column.builder()
.name(field.name())
.type(toTypeString(field.type()))
.comment(field.doc())
.parameters(convertToParameters(SCHEMA_COLUMN, field))
.build());
}
// Add schema-subfield
for (NestedField field : TypeUtil.indexById(metadata.schema().asStruct()).values()) {
if (!rootColumnSet.contains(field)) {
columns.add(Column.builder()
.name(field.name())
.type(toTypeString(field.type()))
.comment(field.doc())
.parameters(convertToParameters(SCHEMA_SUBFIELD, field))
.build());
}
}
// Add partition-field
for (PartitionField partitionField : metadata.spec().fields()) {
Type type = partitionField.transform()
.getResultType(metadata.schema().findField(partitionField.sourceId()).type());
columns.add(Column.builder()
.name(partitionField.name())
.type(toTypeString(type))
.parameters(convertToPartitionFieldParameters(type, partitionField))
.parameters(ImmutableMap.of(
ICEBERG_FIELD_ID, Integer.toString(field.fieldId()),
ICEBERG_FIELD_OPTIONAL, Boolean.toString(field.isOptional())
))
.build());
dedupe.add(field.name());
}
return columns;
}

private static Map<String, String> convertToParameters(String fieldUsage, NestedField field) {
return ImmutableMap.of(ICEBERG_FIELD_USAGE, fieldUsage,
ICEBERG_FIELD_TYPE_TYPE_ID, field.type().typeId().toString(),
ICEBERG_FIELD_TYPE_STRING, toTypeString(field.type()),
ICEBERG_FIELD_ID, Integer.toString(field.fieldId()),
ICEBERG_FIELD_OPTIONAL, Boolean.toString(field.isOptional())
);
}

private static Map<String, String> convertToPartitionFieldParameters(Type type, PartitionField partitionField) {
return ImmutableMap.<String, String>builder()
.put(ICEBERG_FIELD_USAGE, PARTITION_FIELD)
.put(ICEBERG_FIELD_TYPE_TYPE_ID, type.typeId().toString())
.put(ICEBERG_FIELD_TYPE_STRING, toTypeString(type))
.put(ICEBERG_FIELD_ID, Integer.toString(partitionField.fieldId()))
.put(ICEBERG_PARTITION_TRANSFORM, partitionField.transform().toString())
.put(ICEBERG_PARTITION_FIELD_ID, Integer.toString(partitionField.fieldId()))
.put(ICEBERG_PARTITION_SOURCE_ID, Integer.toString(partitionField.sourceId()))
.build();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -137,52 +137,20 @@ public void testSetTableInputInformation() {
.type("string")
.comment("comment1")
.parameters(ImmutableMap.of(
IcebergToGlueConverter.ICEBERG_FIELD_USAGE, IcebergToGlueConverter.SCHEMA_COLUMN,
IcebergToGlueConverter.ICEBERG_FIELD_ID, "1",
IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "false",
IcebergToGlueConverter.ICEBERG_FIELD_TYPE_STRING, "string",
IcebergToGlueConverter.ICEBERG_FIELD_TYPE_TYPE_ID, "STRING"
IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "false"
))
.build(),
Column.builder()
.name("y")
.type("struct<z:int>")
.comment("comment2")
.parameters(ImmutableMap.of(
IcebergToGlueConverter.ICEBERG_FIELD_USAGE, IcebergToGlueConverter.SCHEMA_COLUMN,
IcebergToGlueConverter.ICEBERG_FIELD_ID, "2",
IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "false",
IcebergToGlueConverter.ICEBERG_FIELD_TYPE_STRING, "struct<z:int>",
IcebergToGlueConverter.ICEBERG_FIELD_TYPE_TYPE_ID, "STRUCT"
IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "false"
))
.build(),
Column.builder()
.name("z")
.type("int")
.parameters(ImmutableMap.of(
IcebergToGlueConverter.ICEBERG_FIELD_USAGE, IcebergToGlueConverter.SCHEMA_SUBFIELD,
IcebergToGlueConverter.ICEBERG_FIELD_ID, "3",
IcebergToGlueConverter.ICEBERG_FIELD_OPTIONAL, "false",
IcebergToGlueConverter.ICEBERG_FIELD_TYPE_STRING, "int",
IcebergToGlueConverter.ICEBERG_FIELD_TYPE_TYPE_ID, "INTEGER"
))
.build(),
Column.builder()
.name("x")
.type("string")
.parameters(ImmutableMap.<String, String>builder()
.put(IcebergToGlueConverter.ICEBERG_FIELD_USAGE, IcebergToGlueConverter.PARTITION_FIELD)
.put(IcebergToGlueConverter.ICEBERG_FIELD_TYPE_TYPE_ID, "STRING")
.put(IcebergToGlueConverter.ICEBERG_FIELD_TYPE_STRING, "string")
.put(IcebergToGlueConverter.ICEBERG_FIELD_ID, "1000")
.put(IcebergToGlueConverter.ICEBERG_PARTITION_FIELD_ID, "1000")
.put(IcebergToGlueConverter.ICEBERG_PARTITION_SOURCE_ID, "1")
.put(IcebergToGlueConverter.ICEBERG_PARTITION_TRANSFORM, "identity")
.build()
)
.build()
)
).build())
.build()))
.build())
.build();

Assert.assertEquals(
Expand Down