Skip to content

Commit 2cee9a0

Browse files
committed
Fix failure when reading missing fields in Parquet
1 parent 2829a6d commit 2cee9a0

File tree

3 files changed

+71
-2
lines changed

3 files changed

+71
-2
lines changed

lib/trino-parquet/src/main/java/io/trino/parquet/reader/ParquetReader.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,11 @@ private ColumnChunk readArray(GroupField field)
337337
{
338338
List<Type> parameters = field.getType().getTypeParameters();
339339
checkArgument(parameters.size() == 1, "Arrays must have a single type parameter, found %s", parameters.size());
340-
Field elementField = field.getChildren().get(0).get();
340+
Optional<Field> children = field.getChildren().get(0);
341+
if (children.isEmpty()) {
342+
return new ColumnChunk(field.getType().createNullBlock(), new int[] {}, new int[] {});
343+
}
344+
Field elementField = children.get();
341345
ColumnChunk columnChunk = readColumnChunk(elementField);
342346

343347
ListColumnReader.BlockPositions collectionPositions = calculateCollectionOffsets(field, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
@@ -355,7 +359,8 @@ private ColumnChunk readMap(GroupField field)
355359

356360
ColumnChunk columnChunk = readColumnChunk(field.getChildren().get(0).get());
357361
blocks[0] = columnChunk.getBlock();
358-
blocks[1] = readColumnChunk(field.getChildren().get(1).get()).getBlock();
362+
Optional<Field> valueField = field.getChildren().get(1);
363+
blocks[1] = valueField.isPresent() ? readColumnChunk(valueField.get()).getBlock() : parameters.get(1).createNullBlock();
359364
ListColumnReader.BlockPositions collectionPositions = calculateCollectionOffsets(field, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
360365
Block mapBlock = ((MapType) field.getType()).createBlockFromKeyValue(collectionPositions.isNull(), collectionPositions.offsets(), blocks[0], blocks[1]);
361366
return new ColumnChunk(mapBlock, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());

plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/BaseIcebergConnectorTest.java

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8539,6 +8539,45 @@ public void testSetIllegalExtraPropertyKey()
85398539
}
85408540
}
85418541

8542+
@Test // regression test for https://github.com/trinodb/trino/issues/22922
8543+
void testArrayElementChange()
8544+
{
8545+
try (TestTable table = new TestTable(
8546+
getQueryRunner()::execute,
8547+
"test_array_schema_change",
8548+
"(col array(row(a varchar, b varchar)))",
8549+
List.of("CAST(array[row('a', 'b')] AS array(row(a varchar, b varchar)))"))) {
8550+
assertUpdate("ALTER TABLE " + table.getName() + " DROP COLUMN col.element.a");
8551+
assertUpdate("ALTER TABLE " + table.getName() + " ADD COLUMN col.element.c varchar");
8552+
assertUpdate("ALTER TABLE " + table.getName() + " DROP COLUMN col.element.b");
8553+
8554+
String expected = format == ORC ? "CAST(array[row(NULL)] AS array(row(c varchar)))" : "CAST(NULL AS array(row(c varchar)))";
8555+
assertThat(query("SELECT * FROM " + table.getName()))
8556+
.matches("VALUES " + expected);
8557+
}
8558+
}
8559+
8560+
// MAP type is tested in TestIcebergV2.testMapValueSchemaChange
8561+
8562+
@Test
8563+
void testRowFieldChange()
8564+
{
8565+
try (TestTable table = new TestTable(
8566+
getQueryRunner()::execute,
8567+
"test_row_schema_change",
8568+
"(col row(a varchar, b varchar))")) {
8569+
assertUpdate("INSERT INTO " + table.getName() + " SELECT CAST(row('a', 'b') AS row(a varchar, b varchar))", 1);
8570+
8571+
assertUpdate("ALTER TABLE " + table.getName() + " DROP COLUMN col.a");
8572+
assertUpdate("ALTER TABLE " + table.getName() + " ADD COLUMN col.c varchar");
8573+
assertUpdate("ALTER TABLE " + table.getName() + " DROP COLUMN col.b");
8574+
8575+
String expected = format == ORC || format == AVRO ? "CAST(row(NULL) AS row(c varchar))" : "CAST(NULL AS row(c varchar))";
8576+
assertThat(query("SELECT * FROM " + table.getName()))
8577+
.matches("SELECT " + expected);
8578+
}
8579+
}
8580+
85428581
@Test
85438582
public void testObjectStoreLayoutEnabledAndDataLocation()
85448583
throws Exception

plugin/trino-iceberg/src/test/java/io/trino/plugin/iceberg/TestIcebergV2.java

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1466,6 +1466,31 @@ public void testHighlyNestedFieldPartitioningWithTimestampTransform()
14661466
ImmutableSet.of("grandparent.parent.ts_hour=2021-01-01-01/", "grandparent.parent.ts_hour=2022-02-02-02/", "grandparent.parent.ts_hour=2023-03-03-03/"));
14671467
}
14681468

1469+
@Test
1470+
void testMapValueSchemaChange()
1471+
{
1472+
testMapValueSchemaChange("PARQUET", "map(array[1], array[NULL])");
1473+
testMapValueSchemaChange("ORC", "map(array[1], array[row(NULL)])");
1474+
testMapValueSchemaChange("AVRO", "NULL");
1475+
}
1476+
1477+
private void testMapValueSchemaChange(String format, String expectedValue)
1478+
{
1479+
try (TestTable table = new TestTable(
1480+
getQueryRunner()::execute,
1481+
"test_map_value_schema_change",
1482+
"WITH (format = '" + format + "') AS SELECT CAST(map(array[1], array[row(2)]) AS map(integer, row(field integer))) col")) {
1483+
Table icebergTable = loadTable(table.getName());
1484+
icebergTable.updateSchema()
1485+
.addColumn("col.value", "new_field", Types.IntegerType.get())
1486+
.deleteColumn("col.value.field")
1487+
.commit();
1488+
assertThat(query("SELECT * FROM " + table.getName()))
1489+
.as("Format: %s", format)
1490+
.matches("SELECT CAST(" + expectedValue + " AS map(integer, row(new_field integer)))");
1491+
}
1492+
}
1493+
14691494
@Test
14701495
public void testUpdateAfterEqualityDelete()
14711496
throws Exception

0 commit comments

Comments
 (0)