apache · codope · Apr 5, 2023 · yihua · May 5, 2023 · yihua
diff --git a/hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java b/hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java
@@ -314,9 +314,7 @@ protected HoodieTimeline getActiveTimeline() {
   private Object[] parsePartitionColumnValues(String[] partitionColumns, String partitionPath) {
     Object[] partitionColumnValues = doParsePartitionColumnValues(partitionColumns, partitionPath);
     if (shouldListLazily && partitionColumnValues.length != partitionColumns.length) {
-      throw new HoodieException("Failed to parse partition column values from the partition-path:"
-          + " likely non-encoded slashes being used in partition column's values. You can try to"
-          + " work this around by switching listing mode to eager");
+      LOG.warn(">>> PartitionColumns: " + partitionColumns + "  PartitionValues: " + partitionColumnValues);
     }
 
     return partitionColumnValues;

diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HiveHoodieTableFileIndex.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HiveHoodieTableFileIndex.java
@@ -58,7 +58,7 @@ public HiveHoodieTableFileIndex(HoodieEngineContext engineContext,
         shouldIncludePendingCommits,
         true,
         new NoopCache(),
-        false);
+        true);
   }
 
   /**

diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/SchemaEvolutionContext.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/SchemaEvolutionContext.java
@@ -58,7 +58,6 @@
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
-import java.io.IOException;
 
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -82,32 +81,25 @@ public class SchemaEvolutionContext {
 
   private final InputSplit split;
   private final JobConf job;
-  private HoodieTableMetaClient metaClient;
+  private final HoodieTableMetaClient metaClient;
   public Option<InternalSchema> internalSchemaOption;
 
-  public SchemaEvolutionContext(InputSplit split, JobConf job) throws IOException {
+  public SchemaEvolutionContext(InputSplit split, JobConf job) {
     this(split, job, Option.empty());
   }
 
-  public SchemaEvolutionContext(InputSplit split, JobConf job, Option<HoodieTableMetaClient> metaClientOption) throws IOException {
+  public SchemaEvolutionContext(InputSplit split, JobConf job, Option<HoodieTableMetaClient> metaClientOption) {
     this.split = split;
     this.job = job;
     this.metaClient = metaClientOption.isPresent() ? metaClientOption.get() : setUpHoodieTableMetaClient();
 if (schemaEvolutionContext.internalSchemaOption.isPresent()) { 
 if (schemaEvolutionContext.internalSchemaOption.isPresent()) { 
     if (this.metaClient == null) {
       internalSchemaOption = Option.empty();
       return;
     }
-    try {
-      TableSchemaResolver schemaUtil = new TableSchemaResolver(metaClient);
-      this.internalSchemaOption = schemaUtil.getTableInternalSchemaFromCommitMetadata();
-    } catch (Exception e) {
-      internalSchemaOption = Option.empty();
-      LOG.warn(String.format("failed to get internal Schema from hudi table：%s", metaClient.getBasePathV2()), e);
-    }
     LOG.info(String.format("finish init schema evolution for split: %s", split));
   }
 
-  private HoodieTableMetaClient setUpHoodieTableMetaClient() throws IOException {
+  private HoodieTableMetaClient setUpHoodieTableMetaClient() {
     try {
       Path inputPath = ((FileSplit)split).getPath();
       FileSystem fs =  inputPath.getFileSystem(job);
@@ -159,27 +151,26 @@ public void doEvolutionForRealtimeInputFormat(AbstractRealtimeRecordReader realt
    * Do schema evolution for ParquetFormat.
    */
   public void doEvolutionForParquetFormat() {
-    if (internalSchemaOption.isPresent()) {
+    List<String> requiredColumns = getRequireColumn(job);
+    // No need trigger schema evolution for count(*)/count(1) operation
+    boolean disableSchemaEvolution = requiredColumns.isEmpty() || (requiredColumns.size() == 1 && requiredColumns.get(0).isEmpty());
+    if (!disableSchemaEvolution) {
+      if (!internalSchemaOption.isPresent()) {
+        internalSchemaOption = new TableSchemaResolver(metaClient).getTableInternalSchemaFromCommitMetadata();
+      }
       // reading hoodie schema evolution table
       job.setBoolean(HIVE_EVOLUTION_ENABLE, true);
-      Path finalPath = ((FileSplit)split).getPath();
+      Path finalPath = ((FileSplit) split).getPath();
       InternalSchema prunedSchema;
-      List<String> requiredColumns = getRequireColumn(job);
-      // No need trigger schema evolution for count(*)/count(1) operation
-      boolean disableSchemaEvolution =
-          requiredColumns.isEmpty() || (requiredColumns.size() == 1 && requiredColumns.get(0).isEmpty());
-      if (!disableSchemaEvolution) {
-        prunedSchema = InternalSchemaUtils.pruneInternalSchema(internalSchemaOption.get(), requiredColumns);
-        InternalSchema querySchema = prunedSchema;
-        Long commitTime = Long.valueOf(FSUtils.getCommitTime(finalPath.getName()));
-        InternalSchema fileSchema = InternalSchemaCache.searchSchemaAndCache(commitTime, metaClient, false);
-        InternalSchema mergedInternalSchema = new InternalSchemaMerger(fileSchema, querySchema, true,
-            true).mergeSchema();
-        List<Types.Field> fields = mergedInternalSchema.columns();
-        setColumnNameList(job, fields);
-        setColumnTypeList(job, fields);
-        pushDownFilter(job, querySchema, fileSchema);
-      }
+      prunedSchema = InternalSchemaUtils.pruneInternalSchema(internalSchemaOption.get(), requiredColumns);
+      InternalSchema querySchema = prunedSchema;
+      Long commitTime = Long.valueOf(FSUtils.getCommitTime(finalPath.getName()));
+      InternalSchema fileSchema = InternalSchemaCache.searchSchemaAndCache(commitTime, metaClient, false);
+      InternalSchema mergedInternalSchema = new InternalSchemaMerger(fileSchema, querySchema, true, true).mergeSchema();
+      List<Types.Field> fields = mergedInternalSchema.columns();
+      setColumnNameList(job, fields);
+      setColumnTypeList(job, fields);
+      pushDownFilter(job, querySchema, fileSchema);
     }
   }
 

diff --git a/...hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/AbstractRealtimeRecordReader.java b/...hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/AbstractRealtimeRecordReader.java
@@ -66,10 +66,10 @@ public abstract class AbstractRealtimeRecordReader {
   private Schema readerSchema;
   private Schema writerSchema;
   private Schema hiveSchema;
-  private HoodieTableMetaClient metaClient;
+  private final HoodieTableMetaClient metaClient;
   protected SchemaEvolutionContext schemaEvolutionContext;
   // support merge operation
-  protected boolean supportPayload = true;
+  protected boolean supportPayload;
   // handle hive type to avro record
   protected HiveAvroSerializer serializer;
-Original file line number
+Diff line change
@@ Expand Up @@
             shouldIncludePendingCommits,
             true,
             new NoopCache(),
-            false);
+            true);
       }
       /**
@@ Expand Down @@