|
33 | 33 | import io.trino.plugin.hive.AcidInfo; |
34 | 34 | import io.trino.plugin.hive.FileFormatDataSourceStats; |
35 | 35 | import io.trino.plugin.hive.HiveColumnHandle; |
| 36 | +import io.trino.plugin.hive.HiveColumnProjectionInfo; |
36 | 37 | import io.trino.plugin.hive.HiveConfig; |
37 | 38 | import io.trino.plugin.hive.HivePageSourceFactory; |
38 | 39 | import io.trino.plugin.hive.HiveType; |
@@ -331,30 +332,26 @@ public static Optional<MessageType> getParquetMessageType(List<HiveColumnHandle> |
331 | 332 |
|
332 | 333 | public static Optional<org.apache.parquet.schema.Type> getColumnType(HiveColumnHandle column, MessageType messageType, boolean useParquetColumnNames) |
333 | 334 | { |
334 | | - Optional<org.apache.parquet.schema.Type> columnType = getBaseColumnParquetType(column, messageType, useParquetColumnNames); |
335 | | - if (columnType.isEmpty() || column.getHiveColumnProjectionInfo().isEmpty()) { |
336 | | - return columnType; |
| 335 | + Optional<org.apache.parquet.schema.Type> baseColumnType = getBaseColumnParquetType(column, messageType, useParquetColumnNames); |
| 336 | + if (baseColumnType.isEmpty() || column.getHiveColumnProjectionInfo().isEmpty()) { |
| 337 | + return baseColumnType; |
337 | 338 | } |
338 | | - GroupType baseType = columnType.get().asGroupType(); |
339 | | - ImmutableList.Builder<org.apache.parquet.schema.Type> typeBuilder = ImmutableList.builder(); |
340 | | - org.apache.parquet.schema.Type parentType = baseType; |
| 339 | + GroupType baseType = baseColumnType.get().asGroupType(); |
| 340 | + Optional<List<org.apache.parquet.schema.Type>> subFieldTypesOptional = dereferenceSubFieldTypes(baseType, column.getHiveColumnProjectionInfo().get()); |
341 | 341 |
|
342 | | - for (String name : column.getHiveColumnProjectionInfo().get().getDereferenceNames()) { |
343 | | - org.apache.parquet.schema.Type childType = getParquetTypeByName(name, parentType.asGroupType()); |
344 | | - if (childType == null) { |
345 | | - return Optional.empty(); |
346 | | - } |
347 | | - typeBuilder.add(childType); |
348 | | - parentType = childType; |
| 342 | + // if there is a mismatch between parquet schema and the hive schema and the column cannot be dereferenced |
| 343 | + if (subFieldTypesOptional.isEmpty()) { |
| 344 | + return Optional.empty(); |
349 | 345 | } |
350 | | - |
351 | | - List<org.apache.parquet.schema.Type> subfieldTypes = typeBuilder.build(); |
352 | | - org.apache.parquet.schema.Type type = subfieldTypes.get(subfieldTypes.size() - 1); |
353 | | - for (int i = subfieldTypes.size() - 2; i >= 0; --i) { |
354 | | - GroupType groupType = subfieldTypes.get(i).asGroupType(); |
355 | | - type = new GroupType(groupType.getRepetition(), groupType.getName(), ImmutableList.of(type)); |
| 346 | + else { |
| 347 | + List<org.apache.parquet.schema.Type> subfieldTypes = subFieldTypesOptional.get(); |
| 348 | + org.apache.parquet.schema.Type type = subfieldTypes.get(subfieldTypes.size() - 1); |
| 349 | + for (int i = subfieldTypes.size() - 2; i >= 0; --i) { |
| 350 | + GroupType groupType = subfieldTypes.get(i).asGroupType(); |
| 351 | + type = new GroupType(groupType.getRepetition(), groupType.getName(), ImmutableList.of(type)); |
| 352 | + } |
| 353 | + return Optional.of(new GroupType(baseType.getRepetition(), baseType.getName(), ImmutableList.of(type))); |
356 | 354 | } |
357 | | - return Optional.of(new GroupType(baseType.getRepetition(), baseType.getName(), ImmutableList.of(type))); |
358 | 355 | } |
359 | 356 |
|
360 | 357 | public static Optional<ColumnIndexStore> getColumnIndexStore( |
@@ -509,4 +506,32 @@ private static Optional<org.apache.parquet.schema.Type> getBaseColumnParquetType |
509 | 506 |
|
510 | 507 | return Optional.empty(); |
511 | 508 | } |
| 509 | + |
| 510 | + /** |
| 511 | + * Dereferencing base parquet type based on projection info's dereference names. |
| 512 | + * For example, when dereferencing baseType(level1Field0, level1Field1, Level1Field2(Level2Field0, Level2Field1)) |
| 513 | + * with a projection info's dereferenceNames list as (basetype, Level1Field2, Level2Field1). |
| 514 | + * It would return a list of parquet types in the order of (level1Field2, Level2Field1) |
| 515 | + * |
| 516 | + * @return child fields on each level of dereferencing. Return Optional.empty when failed to do the lookup. |
| 517 | + */ |
| 518 | + private static Optional<List<org.apache.parquet.schema.Type>> dereferenceSubFieldTypes(GroupType baseType, HiveColumnProjectionInfo projectionInfo) |
| 519 | + { |
| 520 | + checkArgument(baseType != null, "base type cannot be null when dereferencing"); |
| 521 | + checkArgument(projectionInfo != null, "hive column projection info cannot be null when doing dereferencing"); |
| 522 | + |
| 523 | + ImmutableList.Builder<org.apache.parquet.schema.Type> typeBuilder = ImmutableList.builder(); |
| 524 | + org.apache.parquet.schema.Type parentType = baseType; |
| 525 | + |
| 526 | + for (String name : projectionInfo.getDereferenceNames()) { |
| 527 | + org.apache.parquet.schema.Type childType = getParquetTypeByName(name, parentType.asGroupType()); |
| 528 | + if (childType == null) { |
| 529 | + return Optional.empty(); |
| 530 | + } |
| 531 | + typeBuilder.add(childType); |
| 532 | + parentType = childType; |
| 533 | + } |
| 534 | + |
| 535 | + return Optional.of(typeBuilder.build()); |
| 536 | + } |
512 | 537 | } |
0 commit comments