Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,8 @@ private static void populateMapping(
fieldNameToIdMappingForTableColumns.put(
identity.getId(),
identity.getChildren().stream()
.collect(toImmutableMap(ColumnIdentity::getName, ColumnIdentity::getId)));
// Lower casing is required here because ORC StructColumnReader does the same before mapping
.collect(toImmutableMap(child -> child.getName().toLowerCase(ENGLISH), ColumnIdentity::getId)));

for (ColumnIdentity child : identity.getChildren()) {
populateMapping(child, fieldNameToIdMappingForTableColumns);
Expand All @@ -415,7 +416,9 @@ public IdBasedFieldMapper(Map<Integer, OrcColumn> idToColumnMappingForFile, Map<
@Override
public OrcColumn get(String fieldName)
{
int fieldId = nameToIdMappingForTableColumns.get(fieldName);
int fieldId = requireNonNull(
nameToIdMappingForTableColumns.get(fieldName),
() -> format("Id mapping for field %s not found", fieldName));
return idToColumnMappingForFile.get(fieldId);
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
connector.name=iceberg
hive.metastore.uri=thrift://hadoop-master:9083
# TODO: Remove this config to test default read behavior once Spark writer version is fixed. See https://github.com/trinodb/trino/issues/6369 for details
iceberg.use-file-size-from-metadata=false
Comment on lines +3 to +4
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this be removed now?
if Spark Iceberg still writes incorrect file sizes, we should reconsider whether iceberg.use-file-size-from-metadata shouldn't be false by default?

Original file line number Diff line number Diff line change
Expand Up @@ -367,11 +367,18 @@ public void testIdBasedFieldMapping()
String prestoTableName = prestoTableName(baseTableName);
String sparkTableName = sparkTableName(baseTableName);

onPresto().executeQuery(format(
"CREATE TABLE %s (_struct ROW(rename BIGINT, keep BIGINT, drop_and_add BIGINT), _partition BIGINT) "
+ "WITH (partitioning = ARRAY['_partition'])",
prestoTableName));
onPresto().executeQuery(format("INSERT INTO %s VALUES (row(1, 2, 3), 1001)", prestoTableName));
onSpark().executeQuery(format(
"CREATE TABLE %s (_struct STRUCT<rename:BIGINT, keep:BIGINT, drop_and_add:BIGINT, CaseSensitive:BIGINT>, _partition BIGINT)"
+ " USING ICEBERG"
+ " partitioned by (_partition)"
+ " TBLPROPERTIES ('write.format.default' = 'orc')",
sparkTableName));

onSpark().executeQuery(format(
"INSERT INTO TABLE %s SELECT "
+ "named_struct('rename', 1, 'keep', 2, 'drop_and_add', 3, 'CaseSensitive', 4), "
+ "1001",
sparkTableName));

// Alter nested fields using Spark. Presto does not support this yet.
onSpark().executeQuery(format("ALTER TABLE %s RENAME COLUMN _struct.rename TO renamed", sparkTableName));
Expand All @@ -383,6 +390,7 @@ public void testIdBasedFieldMapping()
// Rename does not change id
.addField("renamed", 1L)
.addField("keep", 2L)
.addField("CaseSensitive", 4L)
// Dropping and re-adding changes id
.addField("drop_and_add", null)
.build(),
Expand Down