From 8fee8139ce7ecf9271c6c40cd414c60a03d26eba Mon Sep 17 00:00:00 2001 From: "yuzhao.cyz" Date: Thu, 15 Sep 2022 15:23:51 +0800 Subject: [PATCH] [HUDI-4844] Skip partition value resolving when the field does not exists for MergeOnReadInputFormat#getReader --- .../hudi/table/format/cow/CopyOnWriteInputFormat.java | 8 +++++++- .../hudi/table/format/mor/MergeOnReadInputFormat.java | 8 +++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/CopyOnWriteInputFormat.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/CopyOnWriteInputFormat.java index f04c23fe91e44..a36f9c914cb23 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/CopyOnWriteInputFormat.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/CopyOnWriteInputFormat.java @@ -109,7 +109,13 @@ public void open(FileInputSplit fileSplit) throws IOException { fileSplit.getPath()); LinkedHashMap partObjects = new LinkedHashMap<>(); partSpec.forEach((k, v) -> { - DataType fieldType = fullFieldTypes[fieldNameList.indexOf(k)]; + final int idx = fieldNameList.indexOf(k); + if (idx == -1) { + // for any rare cases that the partition field does not exist in schema, + // fallback to file read + return; + } + DataType fieldType = fullFieldTypes[idx]; if (!DataTypeUtils.isDatetimeType(fieldType)) { // date time type partition field is formatted specifically, // read directly from the data file to avoid format mismatch or precision loss diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/mor/MergeOnReadInputFormat.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/mor/MergeOnReadInputFormat.java index 61cf52386b274..c9b6561bdef20 100644 --- a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/mor/MergeOnReadInputFormat.java +++ b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/mor/MergeOnReadInputFormat.java @@ -300,7 +300,13 @@ private ParquetColumnarRowSplitReader getReader(String path, int[] requiredPos) FilePathUtils.extractPartitionKeys(this.conf)); LinkedHashMap partObjects = new LinkedHashMap<>(); partSpec.forEach((k, v) -> { - DataType fieldType = fieldTypes.get(fieldNames.indexOf(k)); + final int idx = fieldNames.indexOf(k); + if (idx == -1) { + // for any rare cases that the partition field does not exist in schema, + // fallback to file read + return; + } + DataType fieldType = fieldTypes.get(idx); if (!DataTypeUtils.isDatetimeType(fieldType)) { // date time type partition field is formatted specifically, // read directly from the data file to avoid format mismatch or precision loss