diff --git a/.github/workflows/bot.yml b/.github/workflows/bot.yml
index ebf3caccd9c62..20515f7c750ed 100644
--- a/.github/workflows/bot.yml
+++ b/.github/workflows/bot.yml
@@ -18,12 +18,16 @@ jobs:
         include:
           - scala: "scala-2.11"
             spark: "spark2"
+            skipModules: ""
           - scala: "scala-2.11"
             spark: "spark2,spark-shade-unbundle-avro"
+            skipModules: ""
           - scala: "scala-2.12"
             spark: "spark3.1.x"
+            skipModules: "!hudi-spark-datasource/hudi-spark3"
           - scala: "scala-2.12"
             spark: "spark3.1.x,spark-shade-unbundle-avro"
+            skipModules: "!hudi-spark-datasource/hudi-spark3"
           - scala: "scala-2.12"
             spark: "spark3"
           - scala: "scala-2.12"
@@ -40,4 +44,5 @@ jobs:
         env:
           SCALA_PROFILE: ${{ matrix.scala }}
           SPARK_PROFILE: ${{ matrix.spark }}
-        run: mvn install -P "$SCALA_PROFILE,$SPARK_PROFILE" -DskipTests=true -Dmaven.javadoc.skip=true -B -V
+          SKIP_MODULES: ${{ matrix.skipModules }}
+        run: mvn install -P "$SCALA_PROFILE,$SPARK_PROFILE" -pl "$SKIP_MODULES" -DskipTests=true -Dmaven.javadoc.skip=true -B -V
diff --git a/README.md b/README.md
index af11e6a14d5df..6d3475755ff87 100644
--- a/README.md
+++ b/README.md
@@ -83,7 +83,7 @@ mvn clean package -DskipTests -Dscala-2.12
 The default Spark version supported is 2.4.4. To build for different Spark 3 versions, use the corresponding profile
 
 ```
-# Build against Spark 3.2.0 (the default build shipped with the public Spark 3 bundle)
+# Build against Spark 3.2.1 (the default build shipped with the public Spark 3 bundle)
 mvn clean package -DskipTests -Dspark3
 
 # Build against Spark 3.1.2
diff --git a/doap_HUDI.rdf b/doap_HUDI.rdf
index 33f64ecf82ecb..a3b958a5cd7d0 100644
--- a/doap_HUDI.rdf
+++ b/doap_HUDI.rdf
@@ -81,6 +81,11 @@
         <created>2021-12-08</created>
         <revision>0.10.0</revision>
       </Version>
+      <Version>
+        <name>Apache Hudi 0.10.1</name>
+        <created>2022-01-26</created>
+        <revision>0.10.1</revision>
+      </Version>
     </release>
     <repository>
       <GitRepository>
diff --git a/docker/demo/config/test-suite/cow-spark-long-running.yaml b/docker/demo/config/test-suite/cow-spark-long-running.yaml
index 493ad7a5578f6..8a1e58f840a37 100644
--- a/docker/demo/config/test-suite/cow-spark-long-running.yaml
+++ b/docker/demo/config/test-suite/cow-spark-long-running.yaml
@@ -13,13 +13,13 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-dag_name: cow-spark-long-running-multi-partitions.yaml
-dag_rounds: 50
-dag_intermittent_delay_mins: 1
+dag_name: cow-spark-deltastreamer-long-running-multi-partitions.yaml
+dag_rounds: 30
+dag_intermittent_delay_mins: 0
 dag_content:
   first_insert:
     config:
-      record_size: 1000
+      record_size: 200
       num_partitions_insert: 50
       repeat_count: 1
       num_records_insert: 10000
@@ -33,12 +33,12 @@ dag_content:
     deps: first_insert
   first_validate:
     config:
-      validate_hive: true
+      validate_hive: false
     type: ValidateDatasetNode
     deps: first_hive_sync
   first_upsert:
     config:
-      record_size: 1000
+      record_size: 200
       num_partitions_insert: 50
       num_records_insert: 300
       repeat_count: 1
@@ -60,13 +60,13 @@ dag_content:
     deps: first_delete
   second_validate:
     config:
-      validate_hive: true
+      validate_hive: false
       delete_input_data: true
     type: ValidateDatasetNode
     deps: second_hive_sync
   last_validate:
     config:
-      execute_itr_count: 50
+      execute_itr_count: 30
       validate_clean: true
       validate_archival: true
     type: ValidateAsyncOperations
diff --git a/docker/demo/config/test-suite/cow-spark-simple.yaml b/docker/demo/config/test-suite/cow-spark-simple.yaml
index 21e7e6bbe39bc..0859c63200203 100644
--- a/docker/demo/config/test-suite/cow-spark-simple.yaml
+++ b/docker/demo/config/test-suite/cow-spark-simple.yaml
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 dag_name: cow-spark-simple.yaml
-dag_rounds: 2
+dag_rounds: 1
 dag_intermittent_delay_mins: 1
 dag_content:
   first_insert:
@@ -33,7 +33,7 @@ dag_content:
     deps: first_insert
   first_validate:
     config:
-      validate_hive: true
+      validate_hive: false
     type: ValidateDatasetNode
     deps: first_hive_sync
   first_upsert:
@@ -60,7 +60,7 @@ dag_content:
     deps: first_delete
   second_validate:
     config:
-      validate_hive: true
+      validate_hive: false
       delete_input_data: false
     type: ValidateDatasetNode
     deps: second_hive_sync
\ No newline at end of file
diff --git a/docker/demo/config/test-suite/cow-long-running-multi-partitions.yaml b/docker/demo/config/test-suite/deltastreamer-long-running-multi-partitions-hive.yaml
similarity index 96%
rename from docker/demo/config/test-suite/cow-long-running-multi-partitions.yaml
rename to docker/demo/config/test-suite/deltastreamer-long-running-multi-partitions-hive.yaml
index 0ce529805567b..324a4b4a6d0d5 100644
--- a/docker/demo/config/test-suite/cow-long-running-multi-partitions.yaml
+++ b/docker/demo/config/test-suite/deltastreamer-long-running-multi-partitions-hive.yaml
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-dag_name: cow-long-running-multi-partitions.yaml
+dag_name: deltastreamer-long-running-multi-partitions.yaml
 dag_rounds: 50
 dag_intermittent_delay_mins: 1
 dag_content:
@@ -76,7 +76,7 @@ dag_content:
     deps: first_delete
   second_validate:
     config:
-      validate_hive: false
+      validate_hive: true
       delete_input_data: true
     type: ValidateDatasetNode
     deps: second_hive_sync
diff --git a/docker/demo/config/test-suite/deltastreamer-long-running-multi-partitions.yaml b/docker/demo/config/test-suite/deltastreamer-long-running-multi-partitions.yaml
new file mode 100644
index 0000000000000..9d2766f1a5a7e
--- /dev/null
+++ b/docker/demo/config/test-suite/deltastreamer-long-running-multi-partitions.yaml
@@ -0,0 +1,78 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+dag_name: deltastreamer-long-running-multi-partitions.yaml
+dag_rounds: 50
+dag_intermittent_delay_mins: 1
+dag_content:
+  first_insert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 5
+      repeat_count: 1
+      num_records_insert: 1000
+    type: InsertNode
+    deps: none
+  second_insert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 50
+      repeat_count: 1
+      num_records_insert: 10000
+    deps: first_insert
+    type: InsertNode
+  third_insert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 2
+      repeat_count: 1
+      num_records_insert: 300
+    deps: second_insert
+    type: InsertNode
+  first_upsert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 2
+      num_records_insert: 300
+      repeat_count: 1
+      num_records_upsert: 100
+      num_partitions_upsert: 1
+    type: UpsertNode
+    deps: third_insert
+  first_delete:
+    config:
+      num_partitions_delete: 50
+      num_records_delete: 8000
+    type: DeleteNode
+    deps: first_upsert
+  second_hive_sync:
+    config:
+      queue_name: "adhoc"
+      engine: "mr"
+    type: HiveSyncNode
+    deps: first_delete
+  second_validate:
+    config:
+      validate_hive: false
+      delete_input_data: true
+    type: ValidateDatasetNode
+    deps: second_hive_sync
+  last_validate:
+    config:
+      execute_itr_count: 50
+      validate_clean: true
+      validate_archival: true
+    type: ValidateAsyncOperations
+    deps: second_validate
diff --git a/docker/demo/config/test-suite/deltastreamer-medium-full-dataset-validation.yaml b/docker/demo/config/test-suite/deltastreamer-medium-full-dataset-validation.yaml
new file mode 100644
index 0000000000000..2fc4961e15c07
--- /dev/null
+++ b/docker/demo/config/test-suite/deltastreamer-medium-full-dataset-validation.yaml
@@ -0,0 +1,81 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# to be used with test-aggressive-clean-archival.properties
+
+dag_name: deltastreamer-long-running-multi-partitions.yaml
+dag_rounds: 20
+dag_intermittent_delay_mins: 1
+dag_content:
+  first_insert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 5
+      repeat_count: 1
+      num_records_insert: 1000
+    type: InsertNode
+    deps: none
+  second_insert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 50
+      repeat_count: 1
+      num_records_insert: 10000
+    deps: first_insert
+    type: InsertNode
+  third_insert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 2
+      repeat_count: 1
+      num_records_insert: 300
+    deps: second_insert
+    type: InsertNode
+  first_upsert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 2
+      num_records_insert: 300
+      repeat_count: 1
+      num_records_upsert: 100
+      num_partitions_upsert: 1
+    type: UpsertNode
+    deps: third_insert
+  first_delete:
+    config:
+      num_partitions_delete: 50
+      num_records_delete: 8000
+    type: DeleteNode
+    deps: first_upsert
+  second_hive_sync:
+    config:
+      queue_name: "adhoc"
+      engine: "mr"
+    type: HiveSyncNode
+    deps: first_delete
+  second_validate:
+    config:
+      validate_hive: false
+      delete_input_data: false
+    type: ValidateDatasetNode
+    deps: second_hive_sync
+  last_validate:
+    config:
+      execute_itr_count: 20
+      validate_clean: true
+      validate_archival: true
+    type: ValidateAsyncOperations
+    deps: second_validate
diff --git a/docker/demo/config/test-suite/cow-long-running-example.yaml b/docker/demo/config/test-suite/detlastreamer-long-running-example.yaml
similarity index 97%
rename from docker/demo/config/test-suite/cow-long-running-example.yaml
rename to docker/demo/config/test-suite/detlastreamer-long-running-example.yaml
index 29b6858bf0506..28578eb9b687e 100644
--- a/docker/demo/config/test-suite/cow-long-running-example.yaml
+++ b/docker/demo/config/test-suite/detlastreamer-long-running-example.yaml
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-dag_name: cow-long-running-example.yaml
+dag_name: detlastreamer-long-running-example.yaml
 dag_rounds: 50
 dag_intermittent_delay_mins: 1
 dag_content:
diff --git a/docker/demo/config/test-suite/insert-overwrite-table.yaml b/docker/demo/config/test-suite/insert-overwrite-table.yaml
new file mode 100644
index 0000000000000..8b5a26e4683b7
--- /dev/null
+++ b/docker/demo/config/test-suite/insert-overwrite-table.yaml
@@ -0,0 +1,104 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+dag_name: simple-deltastreamer.yaml
+dag_rounds: 1
+dag_intermittent_delay_mins: 1
+dag_content:
+  first_insert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 10
+      repeat_count: 1
+      num_records_insert: 10000
+    type: SparkInsertNode
+    deps: none
+  first_upsert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 10
+      num_records_insert: 1000
+      repeat_count: 1
+      num_records_upsert: 8000
+      num_partitions_upsert: 10
+    type: SparkUpsertNode
+    deps: first_insert
+  second_insert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 10
+      repeat_count: 1
+      num_records_insert: 10000
+    type: SparkInsertNode
+    deps: first_upsert
+  second_upsert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 10
+      num_records_insert: 1000
+      repeat_count: 1
+      num_records_upsert: 8000
+      num_partitions_upsert: 10
+    type: SparkUpsertNode
+    deps: second_insert
+  first_hive_sync:
+    config:
+      queue_name: "adhoc"
+      engine: "mr"
+    type: HiveSyncNode
+    deps: second_upsert
+  first_insert_overwrite_table:
+    config:
+      record_size: 1000
+      repeat_count: 10
+      num_records_insert: 10
+    type: SparkInsertOverwriteTableNode
+    deps: first_hive_sync
+  delete_all_input_except_last:
+    config:
+      delete_input_data_except_latest: true
+    type: DeleteInputDatasetNode
+    deps: first_insert_overwrite_table
+  third_insert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 10
+      repeat_count: 1
+      num_records_insert: 10000
+    type: SparkInsertNode
+    deps: delete_all_input_except_last
+  third_upsert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 10
+      num_records_insert: 1000
+      repeat_count: 1
+      num_records_upsert: 8000
+      num_partitions_upsert: 10
+    type: SparkUpsertNode
+    deps: third_insert
+  second_hive_sync:
+    config:
+      queue_name: "adhoc"
+      engine: "mr"
+    type: HiveSyncNode
+    deps: third_upsert
+  second_validate:
+    config:
+      validate_full_data : true
+      validate_hive: false
+      delete_input_data: false
+    type: ValidateDatasetNode
+    deps: second_hive_sync
diff --git a/docker/demo/config/test-suite/insert-overwrite.yaml b/docker/demo/config/test-suite/insert-overwrite.yaml
new file mode 100644
index 0000000000000..f2299c50c08f3
--- /dev/null
+++ b/docker/demo/config/test-suite/insert-overwrite.yaml
@@ -0,0 +1,106 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+dag_name: simple-deltastreamer.yaml
+dag_rounds: 1
+dag_intermittent_delay_mins: 1
+dag_content:
+
+  first_insert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 10
+      repeat_count: 1
+      num_records_insert: 10000
+    type: SparkInsertNode
+    deps: none
+  first_upsert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 10
+      num_records_insert: 1000
+      repeat_count: 1
+      num_records_upsert: 8000
+      num_partitions_upsert: 10
+    type: SparkUpsertNode
+    deps: first_insert
+  second_insert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 10
+      repeat_count: 1
+      num_records_insert: 10000
+    type: SparkInsertNode
+    deps: first_upsert
+  second_upsert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 10
+      num_records_insert: 1000
+      repeat_count: 1
+      num_records_upsert: 8000
+      num_partitions_upsert: 10
+    type: SparkUpsertNode
+    deps: second_insert
+  first_hive_sync:
+    config:
+      queue_name: "adhoc"
+      engine: "mr"
+    type: HiveSyncNode
+    deps: second_upsert
+  first_insert_overwrite:
+    config:
+      record_size: 1000
+      num_partitions_insert: 10
+      repeat_count: 1
+      num_records_insert: 10
+    type: SparkInsertOverwriteNode
+    deps: first_hive_sync
+  delete_all_input_except_last:
+    config:
+      delete_input_data_except_latest: true
+    type: DeleteInputDatasetNode
+    deps: first_insert_overwrite
+  third_insert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 10
+      repeat_count: 1
+      num_records_insert: 10000
+    type: SparkInsertNode
+    deps: delete_all_input_except_last
+  third_upsert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 10
+      num_records_insert: 1000
+      repeat_count: 1
+      num_records_upsert: 8000
+      num_partitions_upsert: 10
+    type: SparkUpsertNode
+    deps: third_insert
+  second_hive_sync:
+    config:
+      queue_name: "adhoc"
+      engine: "mr"
+    type: HiveSyncNode
+    deps: third_upsert
+  second_validate:
+    config:
+      validate_full_data : true
+      validate_hive: false
+      delete_input_data: false
+    type: ValidateDatasetNode
+    deps: second_hive_sync
diff --git a/docker/demo/config/test-suite/simple-clustering-hive.yaml b/docker/demo/config/test-suite/simple-clustering-hive.yaml
new file mode 100644
index 0000000000000..e1f79bfe93c0f
--- /dev/null
+++ b/docker/demo/config/test-suite/simple-clustering-hive.yaml
@@ -0,0 +1,76 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+dag_name: simple-clustering-hive.yaml
+dag_rounds: 30
+dag_intermittent_delay_mins: 0
+dag_content:
+  first_insert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 1
+      repeat_count: 1
+      num_records_insert: 1000
+    type: InsertNode
+    deps: none
+  second_insert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 1
+      repeat_count: 1
+      num_records_insert: 10000
+    deps: first_insert
+    type: InsertNode
+  third_insert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 1
+      repeat_count: 1
+      num_records_insert: 300
+    deps: second_insert
+    type: InsertNode
+  first_delete:
+    config:
+      num_partitions_delete: 1
+      num_records_delete: 9000
+    type: DeleteNode
+    deps: third_insert
+  first_hive_sync:
+    config:
+      queue_name: "adhoc"
+      engine: "mr"
+    type: HiveSyncNode
+    deps: first_delete
+  first_validate:
+    config:
+      validate_hive: false
+    type: ValidateDatasetNode
+    deps: first_hive_sync
+  first_cluster:
+    config:
+      execute_itr_count: 20
+    type: ClusteringNode
+    deps: first_validate
+  second_hive_sync:
+    config:
+      queue_name: "adhoc"
+      engine: "mr"
+    type: HiveSyncNode
+    deps: first_cluster
+  second_validate:
+    config:
+      validate_hive: true
+    type: ValidateDatasetNode
+    deps: second_hive_sync
diff --git a/docker/demo/config/test-suite/cow-clustering-example.yaml b/docker/demo/config/test-suite/simple-clustering.yaml
similarity index 96%
rename from docker/demo/config/test-suite/cow-clustering-example.yaml
rename to docker/demo/config/test-suite/simple-clustering.yaml
index 95932317c04fd..7389ee3ebc34b 100644
--- a/docker/demo/config/test-suite/cow-clustering-example.yaml
+++ b/docker/demo/config/test-suite/simple-clustering.yaml
@@ -13,8 +13,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-dag_name: cow-clustering-example.yaml
-dag_rounds: 3
+dag_name: simple-clustering.yaml
+dag_rounds: 30
 dag_intermittent_delay_mins: 0
 dag_content:
   first_insert:
@@ -60,7 +60,7 @@ dag_content:
     deps: first_hive_sync
   first_cluster:
     config:
-      execute_itr_count: 2
+      execute_itr_count: 25
     type: ClusteringNode
     deps: first_validate
   second_hive_sync:
diff --git a/docker/demo/config/test-suite/simple-deltastreamer-hive.yaml b/docker/demo/config/test-suite/simple-deltastreamer-hive.yaml
new file mode 100644
index 0000000000000..e6738b6942b35
--- /dev/null
+++ b/docker/demo/config/test-suite/simple-deltastreamer-hive.yaml
@@ -0,0 +1,82 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+dag_name: simple-deltastreamer.yaml
+dag_rounds: 1
+dag_intermittent_delay_mins: 1
+dag_content:
+  first_insert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 1
+      repeat_count: 1
+      num_records_insert: 1000
+    type: InsertNode
+    deps: none
+  second_insert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 1
+      repeat_count: 1
+      num_records_insert: 10000
+    deps: first_insert
+    type: InsertNode
+  third_insert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 1
+      repeat_count: 1
+      num_records_insert: 300
+    deps: second_insert
+    type: InsertNode
+  first_hive_sync:
+    config:
+      queue_name: "adhoc"
+      engine: "mr"
+    type: HiveSyncNode
+    deps: third_insert
+  first_validate:
+    config:
+      validate_hive: false
+    type: ValidateDatasetNode
+    deps: first_hive_sync
+  first_upsert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 1
+      num_records_insert: 300
+      repeat_count: 1
+      num_records_upsert: 100
+      num_partitions_upsert: 1
+    type: UpsertNode
+    deps: first_validate
+  first_delete:
+    config:
+      num_partitions_delete: 1
+      num_records_delete: 2000
+    type: DeleteNode
+    deps: first_upsert
+  second_hive_sync:
+    config:
+      queue_name: "adhoc"
+      engine: "mr"
+    type: HiveSyncNode
+    deps: first_delete
+  second_validate:
+    config:
+      validate_hive: true
+      delete_input_data: true
+    type: ValidateDatasetNode
+    deps: second_hive_sync
diff --git a/docker/demo/config/test-suite/complex-dag-cow.yaml b/docker/demo/config/test-suite/simple-deltastreamer.yaml
similarity index 98%
rename from docker/demo/config/test-suite/complex-dag-cow.yaml
rename to docker/demo/config/test-suite/simple-deltastreamer.yaml
index 3a84b0a0acecd..f49a41baf8541 100644
--- a/docker/demo/config/test-suite/complex-dag-cow.yaml
+++ b/docker/demo/config/test-suite/simple-deltastreamer.yaml
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-dag_name: complex-dag-cow.yaml
+dag_name: simple-deltastreamer.yaml
 dag_rounds: 1
 dag_intermittent_delay_mins: 1
 dag_content:
diff --git a/docker/demo/config/test-suite/spark-clustering.yaml b/docker/demo/config/test-suite/spark-clustering.yaml
new file mode 100644
index 0000000000000..e8e722ca77c7c
--- /dev/null
+++ b/docker/demo/config/test-suite/spark-clustering.yaml
@@ -0,0 +1,73 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+dag_name: cow-spark-simple.yaml
+dag_rounds: 1
+dag_intermittent_delay_mins: 1
+dag_content:
+  first_insert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 10
+      repeat_count: 1
+      num_records_insert: 10000
+    type: SparkInsertNode
+    deps: none
+  first_upsert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 10
+      num_records_insert: 1000
+      repeat_count: 1
+      num_records_upsert: 8000
+      num_partitions_upsert: 10
+    type: SparkUpsertNode
+    deps: first_insert
+  second_insert:
+    config:
+      record_size: 1000
+      num_partitions_insert: 10
+      repeat_count: 1
+      num_records_insert: 10000
+    type: SparkInsertNode
+    deps: first_upsert
+  second_upsert:
+      config:
+        record_size: 1000
+        num_partitions_insert: 10
+        num_records_insert: 1000
+        repeat_count: 1
+        num_records_upsert: 8000
+        num_partitions_upsert: 10
+      type: SparkUpsertNode
+      deps: second_insert
+  first_delete:
+    config:
+      num_partitions_delete: 10
+      num_records_delete: 16000
+    type: SparkDeleteNode
+    deps: second_upsert
+  second_hive_sync:
+    config:
+      queue_name: "adhoc"
+      engine: "mr"
+    type: HiveSyncNode
+    deps: first_delete
+  second_validate:
+    config:
+      validate_hive: false
+      delete_input_data: false
+    type: ValidateDatasetNode
+    deps: second_hive_sync
\ No newline at end of file
diff --git a/docker/demo/config/test-suite/test-aggressive-clean-archival.properties b/docker/demo/config/test-suite/test-aggressive-clean-archival.properties
new file mode 100644
index 0000000000000..dcbbfb31c9936
--- /dev/null
+++ b/docker/demo/config/test-suite/test-aggressive-clean-archival.properties
@@ -0,0 +1,54 @@
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+hoodie.insert.shuffle.parallelism=100
+hoodie.upsert.shuffle.parallelism=100
+hoodie.bulkinsert.shuffle.parallelism=100
+
+hoodie.cleaner.commits.retained=5
+hoodie.keep.min.commits=9
+hoodie.keep.max.commits=10
+
+hoodie.deltastreamer.source.test.num_partitions=100
+hoodie.deltastreamer.source.test.datagen.use_rocksdb_for_storing_existing_keys=false
+hoodie.deltastreamer.source.test.max_unique_records=100000000
+hoodie.embed.timeline.server=false
+hoodie.deltastreamer.source.input.selector=org.apache.hudi.integ.testsuite.helpers.DFSTestSuitePathSelector
+
+hoodie.deltastreamer.source.input.selector=org.apache.hudi.integ.testsuite.helpers.DFSTestSuitePathSelector
+hoodie.datasource.hive_sync.skip_ro_suffix=true
+
+hoodie.datasource.write.recordkey.field=_row_key
+hoodie.datasource.write.keygenerator.class=org.apache.hudi.keygen.TimestampBasedKeyGenerator
+hoodie.datasource.write.partitionpath.field=timestamp
+
+hoodie.deltastreamer.source.dfs.root=/user/hive/warehouse/hudi-integ-test-suite/input
+hoodie.deltastreamer.schemaprovider.target.schema.file=file:/var/hoodie/ws/docker/demo/config/test-suite/source.avsc
+hoodie.deltastreamer.schemaprovider.source.schema.file=file:/var/hoodie/ws/docker/demo/config/test-suite/source.avsc
+hoodie.deltastreamer.keygen.timebased.timestamp.type=UNIX_TIMESTAMP
+hoodie.deltastreamer.keygen.timebased.output.dateformat=yyyy/MM/dd
+
+hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000/
+hoodie.datasource.hive_sync.database=testdb
+hoodie.datasource.hive_sync.table=table1
+hoodie.datasource.hive_sync.assume_date_partitioning=false
+hoodie.datasource.hive_sync.partition_fields=_hoodie_partition_path
+hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor
+
diff --git a/docker/demo/config/test-suite/test-clustering-aggressive-clean-archival.properties b/docker/demo/config/test-suite/test-clustering-aggressive-clean-archival.properties
new file mode 100644
index 0000000000000..abddd77ba327a
--- /dev/null
+++ b/docker/demo/config/test-suite/test-clustering-aggressive-clean-archival.properties
@@ -0,0 +1,61 @@
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+hoodie.insert.shuffle.parallelism=100
+hoodie.upsert.shuffle.parallelism=100
+hoodie.bulkinsert.shuffle.parallelism=100
+
+hoodie.cleaner.commits.retained=5
+hoodie.keep.min.commits=9
+hoodie.keep.max.commits=10
+
+hoodie.deltastreamer.source.test.num_partitions=100
+hoodie.deltastreamer.source.test.datagen.use_rocksdb_for_storing_existing_keys=false
+hoodie.deltastreamer.source.test.max_unique_records=100000000
+hoodie.embed.timeline.server=false
+hoodie.deltastreamer.source.input.selector=org.apache.hudi.integ.testsuite.helpers.DFSTestSuitePathSelector
+
+hoodie.deltastreamer.source.input.selector=org.apache.hudi.integ.testsuite.helpers.DFSTestSuitePathSelector
+hoodie.datasource.hive_sync.skip_ro_suffix=true
+
+hoodie.datasource.write.recordkey.field=_row_key
+hoodie.datasource.write.keygenerator.class=org.apache.hudi.keygen.TimestampBasedKeyGenerator
+hoodie.datasource.write.partitionpath.field=timestamp
+
+hoodie.clustering.inline=true
+hoodie.clustering.inline.max.commits=4
+hoodie.clustering.plan.strategy.sort.columns=_hoodie_partition_path,_row_key
+hoodie.clustering.plan.strategy.target.file.max.bytes=1073741824
+hoodie.clustering.plan.strategy.small.file.limit=629145600
+hoodie.clustering.execution.strategy.class=org.apache.hudi.client.clustering.run.strategy.SparkSortAndSizeExecutionStrategy
+
+hoodie.deltastreamer.source.dfs.root=/user/hive/warehouse/hudi-integ-test-suite/input
+hoodie.deltastreamer.schemaprovider.target.schema.file=file:/var/hoodie/ws/docker/demo/config/test-suite/source.avsc
+hoodie.deltastreamer.schemaprovider.source.schema.file=file:/var/hoodie/ws/docker/demo/config/test-suite/source.avsc
+hoodie.deltastreamer.keygen.timebased.timestamp.type=UNIX_TIMESTAMP
+hoodie.deltastreamer.keygen.timebased.output.dateformat=yyyy/MM/dd
+
+hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000/
+hoodie.datasource.hive_sync.database=testdb
+hoodie.datasource.hive_sync.table=table1
+hoodie.datasource.hive_sync.assume_date_partitioning=false
+hoodie.datasource.hive_sync.partition_fields=_hoodie_partition_path
+hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor
+
diff --git a/docker/demo/config/test-suite/test-clustering-metadata-aggressive-clean-archival.properties b/docker/demo/config/test-suite/test-clustering-metadata-aggressive-clean-archival.properties
new file mode 100644
index 0000000000000..931b1e3a09668
--- /dev/null
+++ b/docker/demo/config/test-suite/test-clustering-metadata-aggressive-clean-archival.properties
@@ -0,0 +1,63 @@
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+hoodie.insert.shuffle.parallelism=100
+hoodie.upsert.shuffle.parallelism=100
+hoodie.bulkinsert.shuffle.parallelism=100
+
+hoodie.cleaner.commits.retained=5
+hoodie.keep.min.commits=9
+hoodie.keep.max.commits=10
+
+hoodie.metadata.enable=true
+
+hoodie.deltastreamer.source.test.num_partitions=100
+hoodie.deltastreamer.source.test.datagen.use_rocksdb_for_storing_existing_keys=false
+hoodie.deltastreamer.source.test.max_unique_records=100000000
+hoodie.embed.timeline.server=false
+hoodie.deltastreamer.source.input.selector=org.apache.hudi.integ.testsuite.helpers.DFSTestSuitePathSelector
+
+hoodie.deltastreamer.source.input.selector=org.apache.hudi.integ.testsuite.helpers.DFSTestSuitePathSelector
+hoodie.datasource.hive_sync.skip_ro_suffix=true
+
+hoodie.datasource.write.recordkey.field=_row_key
+hoodie.datasource.write.keygenerator.class=org.apache.hudi.keygen.TimestampBasedKeyGenerator
+hoodie.datasource.write.partitionpath.field=timestamp
+
+hoodie.clustering.inline=true
+hoodie.clustering.inline.max.commits=4
+hoodie.clustering.plan.strategy.sort.columns=_hoodie_partition_path,_row_key
+hoodie.clustering.plan.strategy.target.file.max.bytes=1073741824
+hoodie.clustering.plan.strategy.small.file.limit=629145600
+hoodie.clustering.execution.strategy.class=org.apache.hudi.client.clustering.run.strategy.SparkSortAndSizeExecutionStrategy
+
+hoodie.deltastreamer.source.dfs.root=/user/hive/warehouse/hudi-integ-test-suite/input
+hoodie.deltastreamer.schemaprovider.target.schema.file=file:/var/hoodie/ws/docker/demo/config/test-suite/source.avsc
+hoodie.deltastreamer.schemaprovider.source.schema.file=file:/var/hoodie/ws/docker/demo/config/test-suite/source.avsc
+hoodie.deltastreamer.keygen.timebased.timestamp.type=UNIX_TIMESTAMP
+hoodie.deltastreamer.keygen.timebased.output.dateformat=yyyy/MM/dd
+
+hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000/
+hoodie.datasource.hive_sync.database=testdb
+hoodie.datasource.hive_sync.table=table1
+hoodie.datasource.hive_sync.assume_date_partitioning=false
+hoodie.datasource.hive_sync.partition_fields=_hoodie_partition_path
+hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor
+
diff --git a/docker/demo/config/test-suite/test-clustering.properties b/docker/demo/config/test-suite/test-clustering.properties
new file mode 100644
index 0000000000000..9aa4843b2746e
--- /dev/null
+++ b/docker/demo/config/test-suite/test-clustering.properties
@@ -0,0 +1,57 @@
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+hoodie.insert.shuffle.parallelism=100
+hoodie.upsert.shuffle.parallelism=100
+hoodie.bulkinsert.shuffle.parallelism=100
+
+hoodie.deltastreamer.source.test.num_partitions=100
+hoodie.deltastreamer.source.test.datagen.use_rocksdb_for_storing_existing_keys=false
+hoodie.deltastreamer.source.test.max_unique_records=100000000
+hoodie.embed.timeline.server=false
+hoodie.deltastreamer.source.input.selector=org.apache.hudi.integ.testsuite.helpers.DFSTestSuitePathSelector
+
+hoodie.deltastreamer.source.input.selector=org.apache.hudi.integ.testsuite.helpers.DFSTestSuitePathSelector
+hoodie.datasource.hive_sync.skip_ro_suffix=true
+
+hoodie.datasource.write.recordkey.field=_row_key
+hoodie.datasource.write.keygenerator.class=org.apache.hudi.keygen.TimestampBasedKeyGenerator
+hoodie.datasource.write.partitionpath.field=timestamp
+
+hoodie.clustering.inline=true
+hoodie.clustering.inline.max.commits=4
+hoodie.clustering.plan.strategy.sort.columns=_hoodie_partition_path,_row_key
+hoodie.clustering.plan.strategy.target.file.max.bytes=1073741824
+hoodie.clustering.plan.strategy.small.file.limit=629145600
+hoodie.clustering.execution.strategy.class=org.apache.hudi.client.clustering.run.strategy.SparkSortAndSizeExecutionStrategy
+
+hoodie.deltastreamer.source.dfs.root=/user/hive/warehouse/hudi-integ-test-suite/input
+hoodie.deltastreamer.schemaprovider.target.schema.file=file:/var/hoodie/ws/docker/demo/config/test-suite/source.avsc
+hoodie.deltastreamer.schemaprovider.source.schema.file=file:/var/hoodie/ws/docker/demo/config/test-suite/source.avsc
+hoodie.deltastreamer.keygen.timebased.timestamp.type=UNIX_TIMESTAMP
+hoodie.deltastreamer.keygen.timebased.output.dateformat=yyyy/MM/dd
+
+hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000/
+hoodie.datasource.hive_sync.database=testdb
+hoodie.datasource.hive_sync.table=table1
+hoodie.datasource.hive_sync.assume_date_partitioning=false
+hoodie.datasource.hive_sync.partition_fields=_hoodie_partition_path
+hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor
+
diff --git a/docker/demo/config/test-suite/test-metadata-aggressive-clean-archival.properties b/docker/demo/config/test-suite/test-metadata-aggressive-clean-archival.properties
new file mode 100644
index 0000000000000..8935ffb4264be
--- /dev/null
+++ b/docker/demo/config/test-suite/test-metadata-aggressive-clean-archival.properties
@@ -0,0 +1,56 @@
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+hoodie.insert.shuffle.parallelism=100
+hoodie.upsert.shuffle.parallelism=100
+hoodie.bulkinsert.shuffle.parallelism=100
+
+hoodie.cleaner.commits.retained=5
+hoodie.keep.min.commits=9
+hoodie.keep.max.commits=10
+
+hoodie.metadata.enable=true
+
+hoodie.deltastreamer.source.test.num_partitions=100
+hoodie.deltastreamer.source.test.datagen.use_rocksdb_for_storing_existing_keys=false
+hoodie.deltastreamer.source.test.max_unique_records=100000000
+hoodie.embed.timeline.server=false
+hoodie.deltastreamer.source.input.selector=org.apache.hudi.integ.testsuite.helpers.DFSTestSuitePathSelector
+
+hoodie.deltastreamer.source.input.selector=org.apache.hudi.integ.testsuite.helpers.DFSTestSuitePathSelector
+hoodie.datasource.hive_sync.skip_ro_suffix=true
+
+hoodie.datasource.write.recordkey.field=_row_key
+hoodie.datasource.write.keygenerator.class=org.apache.hudi.keygen.TimestampBasedKeyGenerator
+hoodie.datasource.write.partitionpath.field=timestamp
+
+hoodie.deltastreamer.source.dfs.root=/user/hive/warehouse/hudi-integ-test-suite/input
+hoodie.deltastreamer.schemaprovider.target.schema.file=file:/var/hoodie/ws/docker/demo/config/test-suite/source.avsc
+hoodie.deltastreamer.schemaprovider.source.schema.file=file:/var/hoodie/ws/docker/demo/config/test-suite/source.avsc
+hoodie.deltastreamer.keygen.timebased.timestamp.type=UNIX_TIMESTAMP
+hoodie.deltastreamer.keygen.timebased.output.dateformat=yyyy/MM/dd
+
+hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000/
+hoodie.datasource.hive_sync.database=testdb
+hoodie.datasource.hive_sync.table=table1
+hoodie.datasource.hive_sync.assume_date_partitioning=false
+hoodie.datasource.hive_sync.partition_fields=_hoodie_partition_path
+hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor
+
diff --git a/docker/demo/config/test-suite/test-metadata.properties b/docker/demo/config/test-suite/test-metadata.properties
new file mode 100644
index 0000000000000..48da77c511e93
--- /dev/null
+++ b/docker/demo/config/test-suite/test-metadata.properties
@@ -0,0 +1,56 @@
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+hoodie.insert.shuffle.parallelism=100
+hoodie.upsert.shuffle.parallelism=100
+hoodie.bulkinsert.shuffle.parallelism=100
+
+hoodie.metadata.enable=true
+
+hoodie.deltastreamer.source.test.num_partitions=100
+hoodie.deltastreamer.source.test.datagen.use_rocksdb_for_storing_existing_keys=false
+hoodie.deltastreamer.source.test.max_unique_records=100000000
+hoodie.embed.timeline.server=false
+hoodie.deltastreamer.source.input.selector=org.apache.hudi.integ.testsuite.helpers.DFSTestSuitePathSelector
+
+hoodie.deltastreamer.source.input.selector=org.apache.hudi.integ.testsuite.helpers.DFSTestSuitePathSelector
+hoodie.datasource.hive_sync.skip_ro_suffix=true
+
+hoodie.datasource.write.recordkey.field=_row_key
+hoodie.datasource.write.keygenerator.class=org.apache.hudi.keygen.TimestampBasedKeyGenerator
+hoodie.datasource.write.partitionpath.field=timestamp
+
+hoodie.clustering.plan.strategy.sort.columns=_row_key
+hoodie.clustering.plan.strategy.daybased.lookback.partitions=0
+hoodie.clustering.inline.max.commits=1
+
+hoodie.deltastreamer.source.dfs.root=/user/hive/warehouse/hudi-integ-test-suite/input
+hoodie.deltastreamer.schemaprovider.target.schema.file=file:/var/hoodie/ws/docker/demo/config/test-suite/source.avsc
+hoodie.deltastreamer.schemaprovider.source.schema.file=file:/var/hoodie/ws/docker/demo/config/test-suite/source.avsc
+hoodie.deltastreamer.keygen.timebased.timestamp.type=UNIX_TIMESTAMP
+hoodie.deltastreamer.keygen.timebased.output.dateformat=yyyy/MM/dd
+
+hoodie.datasource.hive_sync.jdbcurl=jdbc:hive2://hiveserver:10000/
+hoodie.datasource.hive_sync.database=testdb
+hoodie.datasource.hive_sync.table=table1
+hoodie.datasource.hive_sync.assume_date_partitioning=false
+hoodie.datasource.hive_sync.partition_fields=_hoodie_partition_path
+hoodie.datasource.hive_sync.partition_extractor_class=org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor
+
diff --git a/docker/demo/config/test-suite/test.properties b/docker/demo/config/test-suite/test.properties
index 30cd1c1f02f09..509b9f4ba628e 100644
--- a/docker/demo/config/test-suite/test.properties
+++ b/docker/demo/config/test-suite/test.properties
@@ -19,6 +19,8 @@ hoodie.insert.shuffle.parallelism=100
 hoodie.upsert.shuffle.parallelism=100
 hoodie.bulkinsert.shuffle.parallelism=100
 
+hoodie.metadata.enable=false
+
 hoodie.deltastreamer.source.test.num_partitions=100
 hoodie.deltastreamer.source.test.datagen.use_rocksdb_for_storing_existing_keys=false
 hoodie.deltastreamer.source.test.max_unique_records=100000000
@@ -32,10 +34,6 @@ hoodie.datasource.write.recordkey.field=_row_key
 hoodie.datasource.write.keygenerator.class=org.apache.hudi.keygen.TimestampBasedKeyGenerator
 hoodie.datasource.write.partitionpath.field=timestamp
 
-hoodie.clustering.plan.strategy.sort.columns=_row_key
-hoodie.clustering.plan.strategy.daybased.lookback.partitions=0
-hoodie.clustering.inline.max.commits=1
-
 hoodie.deltastreamer.source.dfs.root=/user/hive/warehouse/hudi-integ-test-suite/input
 hoodie.deltastreamer.schemaprovider.target.schema.file=file:/var/hoodie/ws/docker/demo/config/test-suite/source.avsc
 hoodie.deltastreamer.schemaprovider.source.schema.file=file:/var/hoodie/ws/docker/demo/config/test-suite/source.avsc
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ArchivedCommitsCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ArchivedCommitsCommand.java
index 102fcc2ae7a63..1747a59f4f366 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ArchivedCommitsCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ArchivedCommitsCommand.java
@@ -30,6 +30,7 @@
 import org.apache.hudi.common.table.log.HoodieLogFormat.Reader;
 import org.apache.hudi.common.table.log.block.HoodieAvroDataBlock;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.common.util.ClosableIterator;
 import org.apache.hudi.common.util.Option;
 
 import org.apache.avro.generic.GenericRecord;
@@ -80,8 +81,7 @@ public String showArchivedCommits(
       // read the avro blocks
       while (reader.hasNext()) {
         HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
-        List<IndexedRecord> records = blk.getRecords();
-        readRecords.addAll(records);
+        blk.getRecordItr().forEachRemaining(readRecords::add);
       }
       List<Comparable[]> readCommits = readRecords.stream().map(r -> (GenericRecord) r)
           .filter(r -> r.get("actionType").toString().equals(HoodieTimeline.COMMIT_ACTION)
@@ -155,8 +155,9 @@ public String showCommits(
       // read the avro blocks
       while (reader.hasNext()) {
         HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
-        List<IndexedRecord> records = blk.getRecords();
-        readRecords.addAll(records);
+        try (ClosableIterator<IndexedRecord> recordItr = blk.getRecordItr()) {
+          recordItr.forEachRemaining(readRecords::add);
+        }
       }
       List<Comparable[]> readCommits = readRecords.stream().map(r -> (GenericRecord) r)
           .map(r -> readCommit(r, skipMetadata)).collect(Collectors.toList());
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ClusteringCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ClusteringCommand.java
index 9adae1daa5336..4163f0cb5a6a4 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ClusteringCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ClusteringCommand.java
@@ -116,4 +116,40 @@ public String runClustering(
     }
     return "Succeeded to run clustering for " + clusteringInstantTime;
   }
+
+  /**
+   * Run clustering table service.
+   * <p>
+   * Example:
+   * > connect --path {path to hudi table}
+   * > clustering scheduleAndExecute --sparkMaster local --sparkMemory 2g
+   */
+  @CliCommand(value = "clustering scheduleAndExecute", help = "Run Clustering. Make a cluster plan first and execute that plan immediately")
+  public String runClustering(
+      @CliOption(key = "sparkMaster", unspecifiedDefaultValue = SparkUtil.DEFAULT_SPARK_MASTER, help = "Spark master") final String master,
+      @CliOption(key = "sparkMemory", help = "Spark executor memory", unspecifiedDefaultValue = "4g") final String sparkMemory,
+      @CliOption(key = "parallelism", help = "Parallelism for hoodie clustering", unspecifiedDefaultValue = "1") final String parallelism,
+      @CliOption(key = "retry", help = "Number of retries", unspecifiedDefaultValue = "1") final String retry,
+      @CliOption(key = "propsFilePath", help = "path to properties file on localfs or dfs with configurations for "
+          + "hoodie client for compacting", unspecifiedDefaultValue = "") final String propsFilePath,
+      @CliOption(key = "hoodieConfigs", help = "Any configuration that can be set in the properties file can be "
+          + "passed here in the form of an array", unspecifiedDefaultValue = "") final String[] configs) throws Exception {
+    HoodieTableMetaClient client = HoodieCLI.getTableMetaClient();
+    boolean initialized = HoodieCLI.initConf();
+    HoodieCLI.initFS(initialized);
+
+    String sparkPropertiesPath =
+        Utils.getDefaultPropertiesFile(JavaConverters.mapAsScalaMapConverter(System.getenv()).asScala());
+    SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
+    sparkLauncher.addAppArgs(SparkCommand.CLUSTERING_SCHEDULE_AND_EXECUTE.toString(), master, sparkMemory,
+        client.getBasePath(), client.getTableConfig().getTableName(), parallelism, retry, propsFilePath);
+    UtilHelpers.validateAndAddProperties(configs, sparkLauncher);
+    Process process = sparkLauncher.launch();
+    InputStreamConsumer.captureOutput(process);
+    int exitCode = process.waitFor();
+    if (exitCode != 0) {
+      return "Failed to run clustering for scheduleAndExecute.";
+    }
+    return "Succeeded to run clustering for scheduleAndExecute";
+  }
 }
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CommitsCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CommitsCommand.java
index 9517234a0bb60..db1cd207df5a1 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CommitsCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CommitsCommand.java
@@ -232,7 +232,9 @@ public String rollbackCommit(@CliOption(key = {"commit"}, help = "Commit to roll
       @CliOption(key = {"sparkProperties"}, help = "Spark Properties File Path") final String sparkPropertiesPath,
       @CliOption(key = "sparkMaster", unspecifiedDefaultValue = "", help = "Spark Master") String master,
       @CliOption(key = "sparkMemory", unspecifiedDefaultValue = "4G",
-         help = "Spark executor memory") final String sparkMemory)
+         help = "Spark executor memory") final String sparkMemory,
+      @CliOption(key = "rollbackUsingMarkers", unspecifiedDefaultValue = "true",
+         help = "Enabling marker based rollback") final String rollbackUsingMarkers)
       throws Exception {
     HoodieActiveTimeline activeTimeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
     HoodieTimeline completedTimeline = activeTimeline.getCommitsTimeline().filterCompletedInstants();
@@ -243,7 +245,7 @@ public String rollbackCommit(@CliOption(key = {"commit"}, help = "Commit to roll
 
     SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
     sparkLauncher.addAppArgs(SparkMain.SparkCommand.ROLLBACK.toString(), master, sparkMemory, instantTime,
-        HoodieCLI.getTableMetaClient().getBasePath());
+        HoodieCLI.getTableMetaClient().getBasePath(), rollbackUsingMarkers);
     Process process = sparkLauncher.launch();
     InputStreamConsumer.captureOutput(process);
     int exitCode = process.waitFor();
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CompactionCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CompactionCommand.java
index b6a366bbb75ef..097c68a542c47 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CompactionCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CompactionCommand.java
@@ -264,6 +264,41 @@ public String compact(
     return "Compaction successfully completed for " + compactionInstantTime;
   }
 
+  @CliCommand(value = "compaction scheduleAndExecute", help = "Schedule compaction plan and execute this plan")
+  public String compact(
+      @CliOption(key = {"parallelism"}, mandatory = true,
+          help = "Parallelism for hoodie compaction") final String parallelism,
+      @CliOption(key = "schemaFilePath", mandatory = true,
+          help = "Path for Avro schema file") final String schemaFilePath,
+      @CliOption(key = "sparkMaster", unspecifiedDefaultValue = "local",
+          help = "Spark Master") String master,
+      @CliOption(key = "sparkMemory", unspecifiedDefaultValue = "4G",
+          help = "Spark executor memory") final String sparkMemory,
+      @CliOption(key = "retry", unspecifiedDefaultValue = "1", help = "Number of retries") final String retry,
+      @CliOption(key = "propsFilePath", help = "path to properties file on localfs or dfs with configurations for hoodie client for compacting",
+          unspecifiedDefaultValue = "") final String propsFilePath,
+      @CliOption(key = "hoodieConfigs", help = "Any configuration that can be set in the properties file can be passed here in the form of an array",
+          unspecifiedDefaultValue = "") final String[] configs)
+      throws Exception {
+    HoodieTableMetaClient client = checkAndGetMetaClient();
+    boolean initialized = HoodieCLI.initConf();
+    HoodieCLI.initFS(initialized);
+    String sparkPropertiesPath =
+        Utils.getDefaultPropertiesFile(scala.collection.JavaConversions.propertiesAsScalaMap(System.getProperties()));
+    SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
+    sparkLauncher.addAppArgs(SparkCommand.COMPACT_SCHEDULE_AND_EXECUTE.toString(), master, sparkMemory, client.getBasePath(),
+        client.getTableConfig().getTableName(), parallelism, schemaFilePath,
+        retry, propsFilePath);
+    UtilHelpers.validateAndAddProperties(configs, sparkLauncher);
+    Process process = sparkLauncher.launch();
+    InputStreamConsumer.captureOutput(process);
+    int exitCode = process.waitFor();
+    if (exitCode != 0) {
+      return "Failed to schedule and execute compaction ";
+    }
+    return "Schedule and execute compaction successfully completed";
+  }
+
   /**
    * Prints all compaction details.
    */
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ExportCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ExportCommand.java
index 119ccb0dcf039..1d8d6dcd6ae93 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ExportCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/ExportCommand.java
@@ -34,14 +34,16 @@
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
+import org.apache.hudi.common.util.ClosableIterator;
 import org.apache.hudi.exception.HoodieException;
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
+
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.generic.IndexedRecord;
 import org.apache.avro.specific.SpecificData;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
 import org.springframework.shell.core.CommandMarker;
 import org.springframework.shell.core.annotation.CliCommand;
 import org.springframework.shell.core.annotation.CliOption;
@@ -69,8 +71,8 @@ public class ExportCommand implements CommandMarker {
   @CliCommand(value = "export instants", help = "Export Instants and their metadata from the Timeline")
   public String exportInstants(
       @CliOption(key = {"limit"}, help = "Limit Instants", unspecifiedDefaultValue = "-1") final Integer limit,
-      @CliOption(key = {"actions"}, help = "Comma seperated list of Instant actions to export",
-        unspecifiedDefaultValue = "clean,commit,deltacommit,rollback,savepoint,restore") final String filter,
+      @CliOption(key = {"actions"}, help = "Comma separated list of Instant actions to export",
+          unspecifiedDefaultValue = "clean,commit,deltacommit,rollback,savepoint,restore") final String filter,
       @CliOption(key = {"desc"}, help = "Ordering", unspecifiedDefaultValue = "false") final boolean descending,
       @CliOption(key = {"localFolder"}, help = "Local Folder to export to", mandatory = true) String localFolder)
       throws Exception {
@@ -122,44 +124,46 @@ private int copyArchivedInstants(List<FileStatus> statuses, Set<String> actionSe
       // read the avro blocks
       while (reader.hasNext() && copyCount < limit) {
         HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
-        for (IndexedRecord ir : blk.getRecords()) {
-          // Archived instants are saved as arvo encoded HoodieArchivedMetaEntry records. We need to get the
-          // metadata record from the entry and convert it to json.
-          HoodieArchivedMetaEntry archiveEntryRecord = (HoodieArchivedMetaEntry) SpecificData.get()
-              .deepCopy(HoodieArchivedMetaEntry.SCHEMA$, ir);
-
-          final String action = archiveEntryRecord.get("actionType").toString();
-          if (!actionSet.contains(action)) {
-            continue;
-          }
-
-          GenericRecord metadata = null;
-          switch (action) {
-            case HoodieTimeline.CLEAN_ACTION:
-              metadata = archiveEntryRecord.getHoodieCleanMetadata();
+        try (ClosableIterator<IndexedRecord> recordItr = blk.getRecordItr()) {
+          while (recordItr.hasNext()) {
+            IndexedRecord ir = recordItr.next();
+            // Archived instants are saved as arvo encoded HoodieArchivedMetaEntry records. We need to get the
+            // metadata record from the entry and convert it to json.
+            HoodieArchivedMetaEntry archiveEntryRecord = (HoodieArchivedMetaEntry) SpecificData.get()
+                    .deepCopy(HoodieArchivedMetaEntry.SCHEMA$, ir);
+            final String action = archiveEntryRecord.get("actionType").toString();
+            if (!actionSet.contains(action)) {
+              continue;
+            }
+
+            GenericRecord metadata = null;
+            switch (action) {
+              case HoodieTimeline.CLEAN_ACTION:
+                metadata = archiveEntryRecord.getHoodieCleanMetadata();
+                break;
+              case HoodieTimeline.COMMIT_ACTION:
+              case HoodieTimeline.DELTA_COMMIT_ACTION:
+                metadata = archiveEntryRecord.getHoodieCommitMetadata();
+                break;
+              case HoodieTimeline.ROLLBACK_ACTION:
+                metadata = archiveEntryRecord.getHoodieRollbackMetadata();
+                break;
+              case HoodieTimeline.SAVEPOINT_ACTION:
+                metadata = archiveEntryRecord.getHoodieSavePointMetadata();
+                break;
+              case HoodieTimeline.COMPACTION_ACTION:
+                metadata = archiveEntryRecord.getHoodieCompactionMetadata();
+                break;
+              default:
+                throw new HoodieException("Unknown type of action " + action);
+            }
+            
+            final String instantTime = archiveEntryRecord.get("commitTime").toString();
+            final String outPath = localFolder + Path.SEPARATOR + instantTime + "." + action;
+            writeToFile(outPath, HoodieAvroUtils.avroToJson(metadata, true));
+            if (++copyCount == limit) {
               break;
-            case HoodieTimeline.COMMIT_ACTION:
-            case HoodieTimeline.DELTA_COMMIT_ACTION:
-              metadata = archiveEntryRecord.getHoodieCommitMetadata();
-              break;
-            case HoodieTimeline.ROLLBACK_ACTION:
-              metadata = archiveEntryRecord.getHoodieRollbackMetadata();
-              break;
-            case HoodieTimeline.SAVEPOINT_ACTION:
-              metadata = archiveEntryRecord.getHoodieSavePointMetadata();
-              break;
-            case HoodieTimeline.COMPACTION_ACTION:
-              metadata = archiveEntryRecord.getHoodieCompactionMetadata();
-              break;
-            default:
-              throw new HoodieException("Unknown type of action " + action);
-          }
-
-          final String instantTime = archiveEntryRecord.get("commitTime").toString();
-          final String outPath = localFolder + Path.SEPARATOR + instantTime + "." + action;
-          writeToFile(outPath, HoodieAvroUtils.avroToJson(metadata, true));
-          if (++copyCount == limit) {
-            break;
+            }
           }
         }
       }
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieLogFileCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieLogFileCommand.java
index 27bcd81faefec..4a56858f3926a 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieLogFileCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieLogFileCommand.java
@@ -37,6 +37,7 @@
 import org.apache.hudi.common.table.log.block.HoodieLogBlock;
 import org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType;
 import org.apache.hudi.common.table.log.block.HoodieLogBlock.HoodieLogBlockType;
+import org.apache.hudi.common.util.ClosableIterator;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.config.HoodieCompactionConfig;
 import org.apache.hudi.config.HoodieMemoryConfig;
@@ -60,6 +61,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
+import java.util.concurrent.atomic.AtomicInteger;
 import java.util.stream.Collectors;
 
 import scala.Tuple2;
@@ -100,7 +102,7 @@ public String showLogFileCommits(
       while (reader.hasNext()) {
         HoodieLogBlock n = reader.next();
         String instantTime;
-        int recordCount = 0;
+        AtomicInteger recordCount = new AtomicInteger(0);
         if (n instanceof HoodieCorruptBlock) {
           try {
             instantTime = n.getLogBlockHeader().get(HeaderMetadataType.INSTANT_TIME);
@@ -120,17 +122,19 @@ public String showLogFileCommits(
             instantTime = "dummy_instant_time_" + dummyInstantTimeCount;
           }
           if (n instanceof HoodieDataBlock) {
-            recordCount = ((HoodieDataBlock) n).getRecords().size();
+            try (ClosableIterator<IndexedRecord> recordItr = ((HoodieDataBlock) n).getRecordItr()) {
+              recordItr.forEachRemaining(r -> recordCount.incrementAndGet());
+            }
           }
         }
         if (commitCountAndMetadata.containsKey(instantTime)) {
           commitCountAndMetadata.get(instantTime).add(
-              new Tuple3<>(n.getBlockType(), new Tuple2<>(n.getLogBlockHeader(), n.getLogBlockFooter()), recordCount));
+              new Tuple3<>(n.getBlockType(), new Tuple2<>(n.getLogBlockHeader(), n.getLogBlockFooter()), recordCount.get()));
         } else {
           List<Tuple3<HoodieLogBlockType, Tuple2<Map<HeaderMetadataType, String>, Map<HeaderMetadataType, String>>, Integer>> list =
               new ArrayList<>();
           list.add(
-              new Tuple3<>(n.getBlockType(), new Tuple2<>(n.getLogBlockHeader(), n.getLogBlockFooter()), recordCount));
+              new Tuple3<>(n.getBlockType(), new Tuple2<>(n.getLogBlockHeader(), n.getLogBlockFooter()), recordCount.get()));
           commitCountAndMetadata.put(instantTime, list);
         }
       }
@@ -232,11 +236,12 @@ public String showLogFileRecords(
           HoodieLogBlock n = reader.next();
           if (n instanceof HoodieDataBlock) {
             HoodieDataBlock blk = (HoodieDataBlock) n;
-            List<IndexedRecord> records = blk.getRecords();
-            for (IndexedRecord record : records) {
-              if (allRecords.size() < limit) {
-                allRecords.add(record);
-              }
+            try (ClosableIterator<IndexedRecord> recordItr = blk.getRecordItr()) {
+              recordItr.forEachRemaining(record -> {
+                if (allRecords.size() < limit) {
+                  allRecords.add(record);
+                }
+              });
             }
           }
         }
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/MarkersCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/MarkersCommand.java
new file mode 100644
index 0000000000000..57a4ee1879855
--- /dev/null
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/MarkersCommand.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.cli.commands;
+
+import org.apache.hudi.cli.HoodieCLI;
+import org.apache.hudi.cli.utils.InputStreamConsumer;
+import org.apache.hudi.cli.utils.SparkUtil;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.spark.launcher.SparkLauncher;
+import org.springframework.shell.core.CommandMarker;
+import org.springframework.shell.core.annotation.CliCommand;
+import org.springframework.shell.core.annotation.CliOption;
+import org.springframework.stereotype.Component;
+
+/**
+ * CLI command for marker options.
+ */
+@Component
+public class MarkersCommand implements CommandMarker {
+
+  @CliCommand(value = "marker delete", help = "Delete the marker")
+  public String deleteMarker(@CliOption(key = {"commit"}, help = "Delete a marker") final String instantTime,
+      @CliOption(key = {"sparkProperties"}, help = "Spark Properties File Path") final String sparkPropertiesPath,
+      @CliOption(key = "sparkMaster", unspecifiedDefaultValue = "", help = "Spark Master") String master,
+      @CliOption(key = "sparkMemory", unspecifiedDefaultValue = "1G",
+          help = "Spark executor memory") final String sparkMemory)
+      throws Exception {
+    HoodieTableMetaClient metaClient = HoodieCLI.getTableMetaClient();
+    SparkLauncher sparkLauncher = SparkUtil.initLauncher(sparkPropertiesPath);
+    sparkLauncher.addAppArgs(SparkMain.SparkCommand.DELETE_MARKER.toString(), master, sparkMemory, instantTime,
+        metaClient.getBasePath());
+    Process process = sparkLauncher.launch();
+    InputStreamConsumer.captureOutput(process);
+    int exitCode = process.waitFor();
+    // Refresh the current
+    HoodieCLI.refreshTableMetadata();
+    if (exitCode != 0) {
+      return String.format("Failed: Could not delete marker \"%s\".", instantTime);
+    }
+    return String.format("Marker \"%s\" deleted.", instantTime);
+  }
+}
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java
index 2533562d8206e..6c068c898b9be 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/RepairsCommand.java
@@ -45,7 +45,6 @@
 import org.springframework.stereotype.Component;
 import scala.collection.JavaConverters;
 
-import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.util.List;
@@ -153,10 +152,12 @@ public String overwriteHoodieProperties(
 
     HoodieTableMetaClient client = HoodieCLI.getTableMetaClient();
     Properties newProps = new Properties();
-    newProps.load(new FileInputStream(new File(overwriteFilePath)));
+    newProps.load(new FileInputStream(overwriteFilePath));
     Map<String, String> oldProps = client.getTableConfig().propsMap();
     Path metaPathDir = new Path(client.getBasePath(), METAFOLDER_NAME);
     HoodieTableConfig.create(client.getFs(), metaPathDir, newProps);
+    // reload new props as checksum would have been added
+    newProps = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient()).getTableConfig().getProps();
 
     TreeSet<String> allPropKeys = new TreeSet<>();
     allPropKeys.addAll(newProps.keySet().stream().map(Object::toString).collect(Collectors.toSet()));
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java
index d1ee109f59042..0de1a1adfe0be 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/SparkMain.java
@@ -18,6 +18,7 @@
 
 package org.apache.hudi.cli.commands;
 
+import org.apache.hadoop.fs.Path;
 import org.apache.hudi.DataSourceWriteOptions;
 import org.apache.hudi.cli.DeDupeType;
 import org.apache.hudi.cli.DedupeSparkJob;
@@ -25,6 +26,7 @@
 import org.apache.hudi.client.SparkRDDWriteClient;
 import org.apache.hudi.client.common.HoodieSparkEngineContext;
 import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.HoodieTableVersion;
@@ -38,7 +40,9 @@
 import org.apache.hudi.exception.HoodieSavepointException;
 import org.apache.hudi.index.HoodieIndex;
 import org.apache.hudi.keygen.constant.KeyGeneratorType;
+import org.apache.hudi.table.HoodieSparkTable;
 import org.apache.hudi.table.action.compact.strategy.UnBoundedCompactionStrategy;
+import org.apache.hudi.table.marker.WriteMarkersFactory;
 import org.apache.hudi.table.upgrade.SparkUpgradeDowngradeHelper;
 import org.apache.hudi.table.upgrade.UpgradeDowngrade;
 import org.apache.hudi.utilities.HDFSParquetImporter;
@@ -51,8 +55,6 @@
 import org.apache.hudi.utilities.UtilHelpers;
 import org.apache.hudi.utilities.deltastreamer.BootstrapExecutor;
 import org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer;
-
-import org.apache.hadoop.fs.Path;
 import org.apache.log4j.Logger;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.sql.SQLContext;
@@ -74,9 +76,9 @@ public class SparkMain {
    * Commands.
    */
   enum SparkCommand {
-    BOOTSTRAP, ROLLBACK, DEDUPLICATE, ROLLBACK_TO_SAVEPOINT, SAVEPOINT, IMPORT, UPSERT, COMPACT_SCHEDULE, COMPACT_RUN,
+    BOOTSTRAP, ROLLBACK, DEDUPLICATE, ROLLBACK_TO_SAVEPOINT, SAVEPOINT, IMPORT, UPSERT, COMPACT_SCHEDULE, COMPACT_RUN, COMPACT_SCHEDULE_AND_EXECUTE,
     COMPACT_UNSCHEDULE_PLAN, COMPACT_UNSCHEDULE_FILE, COMPACT_VALIDATE, COMPACT_REPAIR, CLUSTERING_SCHEDULE,
-    CLUSTERING_RUN, CLEAN, DELETE_SAVEPOINT, UPGRADE, DOWNGRADE
+    CLUSTERING_RUN, CLUSTERING_SCHEDULE_AND_EXECUTE, CLEAN, DELETE_MARKER, DELETE_SAVEPOINT, UPGRADE, DOWNGRADE
   }
 
   public static void main(String[] args) throws Exception {
@@ -92,8 +94,8 @@ public static void main(String[] args) throws Exception {
     try {
       switch (cmd) {
         case ROLLBACK:
-          assert (args.length == 5);
-          returnCode = rollback(jsc, args[3], args[4]);
+          assert (args.length == 6);
+          returnCode = rollback(jsc, args[3], args[4], Boolean.parseBoolean(args[5]));
           break;
         case DEDUPLICATE:
           assert (args.length == 8);
@@ -128,7 +130,21 @@ public static void main(String[] args) throws Exception {
             configs.addAll(Arrays.asList(args).subList(9, args.length));
           }
           returnCode = compact(jsc, args[3], args[4], args[5], Integer.parseInt(args[6]), args[7],
-              Integer.parseInt(args[8]), false, propsFilePath, configs);
+              Integer.parseInt(args[8]), HoodieCompactor.EXECUTE, propsFilePath, configs);
+          break;
+        case COMPACT_SCHEDULE_AND_EXECUTE:
+          assert (args.length >= 9);
+          propsFilePath = null;
+          if (!StringUtils.isNullOrEmpty(args[8])) {
+            propsFilePath = args[8];
+          }
+          configs = new ArrayList<>();
+          if (args.length > 9) {
+            configs.addAll(Arrays.asList(args).subList(8, args.length));
+          }
+
+          returnCode = compact(jsc, args[3], args[4], null, Integer.parseInt(args[5]), args[6],
+              Integer.parseInt(args[7]), HoodieCompactor.SCHEDULE_AND_EXECUTE, propsFilePath, configs);
           break;
         case COMPACT_SCHEDULE:
           assert (args.length >= 7);
@@ -140,7 +156,7 @@ public static void main(String[] args) throws Exception {
           if (args.length > 7) {
             configs.addAll(Arrays.asList(args).subList(7, args.length));
           }
-          returnCode = compact(jsc, args[3], args[4], args[5], 1, "", 0, true, propsFilePath, configs);
+          returnCode = compact(jsc, args[3], args[4], args[5], 1, "", 0, HoodieCompactor.SCHEDULE, propsFilePath, configs);
           break;
         case COMPACT_VALIDATE:
           assert (args.length == 7);
@@ -176,7 +192,20 @@ public static void main(String[] args) throws Exception {
             configs.addAll(Arrays.asList(args).subList(9, args.length));
           }
           returnCode = cluster(jsc, args[3], args[4], args[5], Integer.parseInt(args[6]), args[2],
-              Integer.parseInt(args[7]), false, propsFilePath, configs);
+              Integer.parseInt(args[7]), HoodieClusteringJob.EXECUTE, propsFilePath, configs);
+          break;
+        case CLUSTERING_SCHEDULE_AND_EXECUTE:
+          assert (args.length >= 8);
+          propsFilePath = null;
+          if (!StringUtils.isNullOrEmpty(args[7])) {
+            propsFilePath = args[7];
+          }
+          configs = new ArrayList<>();
+          if (args.length > 8) {
+            configs.addAll(Arrays.asList(args).subList(8, args.length));
+          }
+          returnCode = cluster(jsc, args[3], args[4], null, Integer.parseInt(args[5]), args[2],
+              Integer.parseInt(args[6]), HoodieClusteringJob.SCHEDULE_AND_EXECUTE, propsFilePath, configs);
           break;
         case CLUSTERING_SCHEDULE:
           assert (args.length >= 7);
@@ -189,7 +218,7 @@ public static void main(String[] args) throws Exception {
             configs.addAll(Arrays.asList(args).subList(7, args.length));
           }
           returnCode = cluster(jsc, args[3], args[4], args[5], 1, args[2],
-              0, true, propsFilePath, configs);
+              0, HoodieClusteringJob.SCHEDULE, propsFilePath, configs);
           break;
         case CLEAN:
           assert (args.length >= 5);
@@ -207,6 +236,10 @@ public static void main(String[] args) throws Exception {
           assert (args.length == 7);
           returnCode = createSavepoint(jsc, args[3], args[4], args[5], args[6]);
           break;
+        case DELETE_MARKER:
+          assert (args.length == 5);
+          returnCode = deleteMarker(jsc, args[3], args[4]);
+          break;
         case DELETE_SAVEPOINT:
           assert (args.length == 5);
           returnCode = deleteSavepoint(jsc, args[3], args[4]);
@@ -250,6 +283,21 @@ protected static void clean(JavaSparkContext jsc, String basePath, String propsF
     new HoodieCleaner(cfg, jsc).run();
   }
 
+  protected static int deleteMarker(JavaSparkContext jsc, String instantTime, String basePath) {
+    try {
+      SparkRDDWriteClient client = createHoodieClient(jsc, basePath);
+      HoodieWriteConfig config = client.getConfig();
+      HoodieEngineContext context = client.getEngineContext();
+      HoodieSparkTable table = HoodieSparkTable.create(config, context, true);
+      WriteMarkersFactory.get(config.getMarkersType(), table, instantTime)
+          .quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
+      return 0;
+    } catch (Exception e) {
+      LOG.warn(String.format("Failed: Could not clean marker instantTime: \"%s\".", instantTime), e);
+      return -1;
+    }
+  }
+
   private static int dataLoad(JavaSparkContext jsc, String command, String srcPath, String targetPath, String tableName,
       String tableType, String rowKey, String partitionKey, int parallelism, String schemaFile,
       int retry, String propsFilePath, List<String> configs) {
@@ -320,7 +368,7 @@ private static void doCompactUnscheduleFile(JavaSparkContext jsc, String basePat
   }
 
   private static int compact(JavaSparkContext jsc, String basePath, String tableName, String compactionInstant,
-      int parallelism, String schemaFile, int retry, boolean schedule, String propsFilePath,
+      int parallelism, String schemaFile, int retry, String mode, String propsFilePath,
       List<String> configs) {
     HoodieCompactor.Config cfg = new HoodieCompactor.Config();
     cfg.basePath = basePath;
@@ -330,20 +378,20 @@ private static int compact(JavaSparkContext jsc, String basePath, String tableNa
     cfg.strategyClassName = UnBoundedCompactionStrategy.class.getCanonicalName();
     cfg.parallelism = parallelism;
     cfg.schemaFile = schemaFile;
-    cfg.runSchedule = schedule;
+    cfg.runningMode = mode;
     cfg.propsFilePath = propsFilePath;
     cfg.configs = configs;
     return new HoodieCompactor(jsc, cfg).compact(retry);
   }
 
   private static int cluster(JavaSparkContext jsc, String basePath, String tableName, String clusteringInstant,
-      int parallelism, String sparkMemory, int retry, boolean schedule, String propsFilePath, List<String> configs) {
+      int parallelism, String sparkMemory, int retry, String runningMode, String propsFilePath, List<String> configs) {
     HoodieClusteringJob.Config cfg = new HoodieClusteringJob.Config();
     cfg.basePath = basePath;
     cfg.tableName = tableName;
     cfg.clusteringInstantTime = clusteringInstant;
     cfg.parallelism = parallelism;
-    cfg.runSchedule = schedule;
+    cfg.runningMode = runningMode;
     cfg.propsFilePath = propsFilePath;
     cfg.configs = configs;
     jsc.getConf().set("spark.executor.memory", sparkMemory);
@@ -394,8 +442,8 @@ private static int doBootstrap(JavaSparkContext jsc, String tableName, String ta
     return 0;
   }
 
-  private static int rollback(JavaSparkContext jsc, String instantTime, String basePath) throws Exception {
-    SparkRDDWriteClient client = createHoodieClient(jsc, basePath);
+  private static int rollback(JavaSparkContext jsc, String instantTime, String basePath, Boolean rollbackUsingMarkers) throws Exception {
+    SparkRDDWriteClient client = createHoodieClient(jsc, basePath, rollbackUsingMarkers);
     if (client.rollback(instantTime)) {
       LOG.info(String.format("The commit \"%s\" rolled back.", instantTime));
       return 0;
@@ -425,7 +473,7 @@ private static int rollbackToSavepoint(JavaSparkContext jsc, String savepointTim
       LOG.info(String.format("The commit \"%s\" rolled back.", savepointTime));
       return 0;
     } catch (Exception e) {
-      LOG.warn(String.format("The commit \"%s\" failed to roll back.", savepointTime));
+      LOG.warn(String.format("The commit \"%s\" failed to roll back.", savepointTime), e);
       return -1;
     }
   }
@@ -437,7 +485,7 @@ private static int deleteSavepoint(JavaSparkContext jsc, String savepointTime, S
       LOG.info(String.format("Savepoint \"%s\" deleted.", savepointTime));
       return 0;
     } catch (Exception e) {
-      LOG.warn(String.format("Failed: Could not delete savepoint \"%s\".", savepointTime));
+      LOG.warn(String.format("Failed: Could not delete savepoint \"%s\".", savepointTime), e);
       return -1;
     }
   }
@@ -452,11 +500,12 @@ private static int deleteSavepoint(JavaSparkContext jsc, String savepointTime, S
    * @throws Exception
    */
   protected static int upgradeOrDowngradeTable(JavaSparkContext jsc, String basePath, String toVersion) {
-    HoodieWriteConfig config = getWriteConfig(basePath);
+    HoodieWriteConfig config = getWriteConfig(basePath, Boolean.parseBoolean(HoodieWriteConfig.ROLLBACK_USING_MARKERS_ENABLE.defaultValue()));
     HoodieTableMetaClient metaClient =
         HoodieTableMetaClient.builder().setConf(jsc.hadoopConfiguration()).setBasePath(config.getBasePath())
             .setLoadActiveTimelineOnLoad(false).setConsistencyGuardConfig(config.getConsistencyGuardConfig())
-            .setLayoutVersion(Option.of(new TimelineLayoutVersion(config.getTimelineLayoutVersion()))).build();
+            .setLayoutVersion(Option.of(new TimelineLayoutVersion(config.getTimelineLayoutVersion())))
+            .setFileSystemRetryConfig(config.getFileSystemRetryConfig()).build();
     try {
       new UpgradeDowngrade(metaClient, config, new HoodieSparkEngineContext(jsc), SparkUpgradeDowngradeHelper.getInstance())
           .run(HoodieTableVersion.valueOf(toVersion), null);
@@ -468,13 +517,18 @@ protected static int upgradeOrDowngradeTable(JavaSparkContext jsc, String basePa
     }
   }
 
-  private static SparkRDDWriteClient createHoodieClient(JavaSparkContext jsc, String basePath) throws Exception {
-    HoodieWriteConfig config = getWriteConfig(basePath);
+  private static SparkRDDWriteClient createHoodieClient(JavaSparkContext jsc, String basePath, Boolean rollbackUsingMarkers) throws Exception {
+    HoodieWriteConfig config = getWriteConfig(basePath, rollbackUsingMarkers);
     return new SparkRDDWriteClient(new HoodieSparkEngineContext(jsc), config);
   }
 
-  private static HoodieWriteConfig getWriteConfig(String basePath) {
+  private static SparkRDDWriteClient createHoodieClient(JavaSparkContext jsc, String basePath) throws Exception {
+    return createHoodieClient(jsc, basePath, Boolean.parseBoolean(HoodieWriteConfig.ROLLBACK_USING_MARKERS_ENABLE.defaultValue()));
+  }
+
+  private static HoodieWriteConfig getWriteConfig(String basePath, Boolean rollbackUsingMarkers) {
     return HoodieWriteConfig.newBuilder().withPath(basePath)
+        .withRollbackUsingMarkers(rollbackUsingMarkers)
         .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()).build();
   }
 }
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/utils/CommitUtil.java b/hudi-cli/src/main/java/org/apache/hudi/cli/utils/CommitUtil.java
index a95cc53df329c..5d58aa9d2e498 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/utils/CommitUtil.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/utils/CommitUtil.java
@@ -25,9 +25,6 @@
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 
 import java.io.IOException;
-import java.text.ParseException;
-import java.time.Instant;
-import java.time.ZoneId;
 import java.time.ZonedDateTime;
 import java.util.Date;
 import java.util.List;
@@ -53,16 +50,4 @@ public static String getTimeDaysAgo(int numberOfDays) {
     Date date = Date.from(ZonedDateTime.now().minusDays(numberOfDays).toInstant());
     return HoodieActiveTimeline.formatDate(date);
   }
-
-  /**
-   * Add hours to specified time. If hours <0, this acts as remove hours.
-   * example, say compactionCommitTime: "20200202020000"
-   *  a) hours: +1, returns 20200202030000
-   *  b) hours: -1, returns 20200202010000
-   */
-  public static String addHours(String compactionCommitTime, int hours) throws ParseException {
-    Instant instant = HoodieActiveTimeline.parseDateFromInstantTime(compactionCommitTime).toInstant();
-    ZonedDateTime commitDateTime = ZonedDateTime.ofInstant(instant, ZoneId.systemDefault());
-    return HoodieActiveTimeline.formatDate(Date.from(commitDateTime.plusHours(hours).toInstant()));
-  }
 }
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/utils/SparkTempViewProvider.java b/hudi-cli/src/main/java/org/apache/hudi/cli/utils/SparkTempViewProvider.java
index bbd8440448fd6..6f5a11ad6657f 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/utils/SparkTempViewProvider.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/utils/SparkTempViewProvider.java
@@ -20,8 +20,6 @@
 
 import org.apache.hudi.exception.HoodieException;
 
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.sql.Dataset;
@@ -31,27 +29,34 @@
 import org.apache.spark.sql.types.DataType;
 import org.apache.spark.sql.types.DataTypes;
 import org.apache.spark.sql.types.StructType;
+import org.springframework.shell.support.logging.HandlerUtils;
 
 import java.util.List;
+import java.util.logging.Handler;
+import java.util.logging.Level;
+import java.util.logging.Logger;
 import java.util.stream.Collectors;
 
 public class SparkTempViewProvider implements TempViewProvider {
-  private static final Logger LOG = LogManager.getLogger(SparkTempViewProvider.class);
+  private static final Logger LOG = HandlerUtils.getLogger(SparkTempViewProvider.class);
 
   private JavaSparkContext jsc;
   private SQLContext sqlContext;
 
   public SparkTempViewProvider(String appName) {
     try {
+      Handler handler = LOG.getParent().getHandlers()[0];
       SparkConf sparkConf = new SparkConf().setAppName(appName)
               .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").setMaster("local[8]");
       jsc = new JavaSparkContext(sparkConf);
-      jsc.setLogLevel("ERROR");
-
       sqlContext = new SQLContext(jsc);
+      if (handler != null) {
+        LOG.getParent().removeHandler(LOG.getParent().getHandlers()[0]);
+        LOG.getParent().addHandler(handler);
+      }
     } catch (Throwable ex) {
       // log full stack trace and rethrow. Without this its difficult to debug failures, if any
-      LOG.error("unable to initialize spark context ", ex);
+      LOG.log(Level.WARNING, "unable to initialize spark context ", ex);
       throw new HoodieException(ex);
     }
   }
@@ -90,7 +95,7 @@ public void createOrReplace(String tableName, List<String> headers, List<List<Co
       System.out.println("Wrote table view: " + tableName);
     } catch (Throwable ex) {
       // log full stack trace and rethrow. Without this its difficult to debug failures, if any
-      LOG.error("unable to write ", ex);
+      LOG.log(Level.WARNING, "unable to write ", ex);
       throw new HoodieException(ex);
     }
   }
@@ -101,7 +106,7 @@ public void runQuery(String sqlText) {
       this.sqlContext.sql(sqlText).show(Integer.MAX_VALUE, false);
     } catch (Throwable ex) {
       // log full stack trace and rethrow. Without this its difficult to debug failures, if any
-      LOG.error("unable to read ", ex);
+      LOG.log(Level.WARNING, "unable to read ", ex);
       throw new HoodieException(ex);
     }
   }
@@ -112,7 +117,7 @@ public void showAllViews() {
       sqlContext.sql("SHOW TABLES").show(Integer.MAX_VALUE, false);
     } catch (Throwable ex) {
       // log full stack trace and rethrow. Without this its difficult to debug failures, if any
-      LOG.error("unable to get all views ", ex);
+      LOG.log(Level.WARNING, "unable to get all views ", ex);
       throw new HoodieException(ex);
     }
   }
@@ -123,7 +128,7 @@ public void deleteTable(String tableName) {
       sqlContext.sql("DROP TABLE IF EXISTS " + tableName);
     } catch (Throwable ex) {
       // log full stack trace and rethrow. Without this its difficult to debug failures, if any
-      LOG.error("unable to initialize spark context ", ex);
+      LOG.log(Level.WARNING, "unable to initialize spark context ", ex);
       throw new HoodieException(ex);
     }
   }
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/utils/SparkUtil.java b/hudi-cli/src/main/java/org/apache/hudi/cli/utils/SparkUtil.java
index 45e0487551668..ae99b0b8240d7 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/utils/SparkUtil.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/utils/SparkUtil.java
@@ -79,7 +79,9 @@ public static SparkConf getDefaultConf(final String appName, final Option<String
     if (properties.getProperty(HoodieCliSparkConfig.CLI_SPARK_MASTER) != null) {
       sparkMasterNode = properties.getProperty(HoodieCliSparkConfig.CLI_SPARK_MASTER);
     }
-    sparkMasterNode = sparkMaster.orElse(sparkMasterNode);
+    if (sparkMaster.isPresent() && !sparkMaster.get().trim().isEmpty()) {
+      sparkMasterNode = sparkMaster.orElse(sparkMasterNode);
+    }
     sparkConf.setMaster(sparkMasterNode);
 
     // Configure driver
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestArchivedCommitsCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestArchivedCommitsCommand.java
index 091c4b4173aef..d822ad658920b 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestArchivedCommitsCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestArchivedCommitsCommand.java
@@ -24,16 +24,16 @@
 import org.apache.hudi.cli.functional.CLIFunctionalTestHarness;
 import org.apache.hudi.cli.testutils.HoodieTestCommitMetadataGenerator;
 import org.apache.hudi.cli.testutils.HoodieTestCommitUtilities;
-import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
+import org.apache.hudi.common.testutils.HoodieTestUtils;
 import org.apache.hudi.config.HoodieCompactionConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.table.HoodieSparkTable;
-import org.apache.hudi.table.HoodieTimelineArchiveLog;
+import org.apache.hudi.client.HoodieTimelineArchiver;
 
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Tag;
@@ -75,7 +75,6 @@ public void init() throws Exception {
         .withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build())
         .withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder()
             .withRemoteServerPort(timelineServicePort).build())
-        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(false).build())
         .forTable("test-trip-table").build();
 
     // Create six commits
@@ -90,14 +89,19 @@ public void init() throws Exception {
       HoodieTestCommitMetadataGenerator.createCommitFileWithMetadata(tablePath, timestamp, hadoopConf());
     }
 
+    // Simulate a compaction commit in metadata table timeline
+    // so the archival in data table can happen
+    HoodieTestUtils.createCompactionCommitInMetadataTable(
+        hadoopConf(), metaClient.getFs(), tablePath, "105");
+
     metaClient = HoodieTableMetaClient.reload(metaClient);
     // reload the timeline and get all the commits before archive
     metaClient.getActiveTimeline().reload().getAllCommitsTimeline().filterCompletedInstants();
 
     // archive
     HoodieSparkTable table = HoodieSparkTable.create(cfg, context(), metaClient);
-    HoodieTimelineArchiveLog archiveLog = new HoodieTimelineArchiveLog(cfg, table);
-    archiveLog.archiveIfRequired(context());
+    HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(cfg, table);
+    archiver.archiveIfRequired(context());
   }
 
   /**
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCleansCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCleansCommand.java
index 90d15b6cabf52..c475c633f58a6 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCleansCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCleansCommand.java
@@ -83,7 +83,7 @@ public void init() throws Exception {
     String fileId1 = UUID.randomUUID().toString();
     String fileId2 = UUID.randomUUID().toString();
     FileSystem fs = FSUtils.getFs(basePath(), hadoopConf());
-    HoodieTestDataGenerator.writePartitionMetadata(fs, HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS, tablePath);
+    HoodieTestDataGenerator.writePartitionMetadataDeprecated(fs, HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS, tablePath);
 
     // Create four commits
     for (int i = 100; i < 104; i++) {
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCommitsCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCommitsCommand.java
index 9e2046df8cce6..b23c6fd150dc0 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCommitsCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCommitsCommand.java
@@ -40,7 +40,7 @@
 import org.apache.hudi.config.HoodieCompactionConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.table.HoodieSparkTable;
-import org.apache.hudi.table.HoodieTimelineArchiveLog;
+import org.apache.hudi.client.HoodieTimelineArchiver;
 
 import org.apache.hadoop.fs.FileSystem;
 import org.junit.jupiter.api.BeforeEach;
@@ -48,6 +48,7 @@
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.EnumSource;
+import org.junit.jupiter.params.provider.ValueSource;
 import org.springframework.shell.core.CommandResult;
 
 import java.io.IOException;
@@ -60,6 +61,7 @@
 import java.util.function.Function;
 import java.util.stream.Collectors;
 
+import static org.apache.hudi.common.testutils.HoodieTestUtils.createCompactionCommitInMetadataTable;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
@@ -204,15 +206,16 @@ public void testShowCommits() throws Exception {
   /**
    * Test case of 'commits showarchived' command.
    */
-  @Test
-  public void testShowArchivedCommits() throws Exception {
+  @ParameterizedTest
+  @ValueSource(booleans = {true, false})
+  public void testShowArchivedCommits(boolean enableMetadataTable) throws Exception {
     // Generate archive
     HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath1)
         .withSchema(HoodieTestCommitMetadataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
         .withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build())
         .withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder()
             .withRemoteServerPort(timelineServicePort).build())
-        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(false).build())
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(enableMetadataTable).build())
         .forTable("test-trip-table").build();
 
     // generate data and metadata
@@ -229,11 +232,17 @@ public void testShowArchivedCommits() throws Exception {
           Option.of(value[0]), Option.of(value[1]));
     }
 
+    if (enableMetadataTable) {
+      // Simulate a compaction commit in metadata table timeline
+      // so the archival in data table can happen
+      createCompactionCommitInMetadataTable(hadoopConf(), metaClient.getFs(), tablePath1, "104");
+    }
+
     // archive
     metaClient = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient());
     HoodieSparkTable table = HoodieSparkTable.create(cfg, context(), metaClient);
-    HoodieTimelineArchiveLog archiveLog = new HoodieTimelineArchiveLog(cfg, table);
-    archiveLog.archiveIfRequired(context());
+    HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(cfg, table);
+    archiver.archiveIfRequired(context());
 
     CommandResult cr = shell().executeCommand(String.format("commits showarchived --startTs %s --endTs %s", "100", "104"));
     assertTrue(cr.isSuccess());
@@ -251,15 +260,16 @@ public void testShowArchivedCommits() throws Exception {
     assertEquals(expected, got);
   }
 
-  @Test
-  public void testShowArchivedCommitsWithMultiCommitsFile() throws Exception {
+  @ParameterizedTest
+  @ValueSource(booleans = {true, false})
+  public void testShowArchivedCommitsWithMultiCommitsFile(boolean enableMetadataTable) throws Exception {
     // Generate archive
     HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath1)
         .withSchema(HoodieTestCommitMetadataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
         .withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build())
         .withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder()
             .withRemoteServerPort(timelineServicePort).build())
-        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(false).build())
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(enableMetadataTable).build())
         .forTable("test-trip-table").build();
 
     // generate data and metadata
@@ -269,6 +279,12 @@ public void testShowArchivedCommitsWithMultiCommitsFile() throws Exception {
       data.put(String.valueOf(i), new Integer[] {i, i});
     }
 
+    if (enableMetadataTable) {
+      // Simulate a compaction commit in metadata table timeline
+      // so the archival in data table can happen
+      createCompactionCommitInMetadataTable(hadoopConf(), metaClient.getFs(), tablePath1, "194");
+    }
+
     for (Map.Entry<String, Integer[]> entry : data.entrySet()) {
       String key = entry.getKey();
       Integer[] value = entry.getValue();
@@ -279,8 +295,8 @@ public void testShowArchivedCommitsWithMultiCommitsFile() throws Exception {
       HoodieSparkTable table = HoodieSparkTable.create(cfg, context(), metaClient);
 
       // need to create multi archive files
-      HoodieTimelineArchiveLog archiveLog = new HoodieTimelineArchiveLog(cfg, table);
-      archiveLog.archiveIfRequired(context());
+      HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(cfg, table);
+      archiver.archiveIfRequired(context());
     }
 
     CommandResult cr = shell().executeCommand(String.format("commits showarchived --startTs %s --endTs %s", "160", "174"));
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCompactionCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCompactionCommand.java
index 21841a5769450..17c1002f6b0dd 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCompactionCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestCompactionCommand.java
@@ -24,7 +24,9 @@
 import org.apache.hudi.cli.TableHeader;
 import org.apache.hudi.cli.functional.CLIFunctionalTestHarness;
 import org.apache.hudi.cli.testutils.HoodieTestCommitMetadataGenerator;
-import org.apache.hudi.common.config.HoodieMetadataConfig;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.fs.HoodieWrapperFileSystem;
+import org.apache.hudi.common.fs.NoOpConsistencyGuard;
 import org.apache.hudi.common.model.HoodieAvroPayload;
 import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
@@ -35,12 +37,13 @@
 import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
 import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
 import org.apache.hudi.common.testutils.CompactionTestUtils;
+import org.apache.hudi.common.testutils.HoodieTestUtils;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.config.HoodieCompactionConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.table.HoodieSparkTable;
-import org.apache.hudi.table.HoodieTimelineArchiveLog;
+import org.apache.hudi.client.HoodieTimelineArchiver;
 
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Tag;
@@ -152,7 +155,11 @@ private void generateCompactionInstances() throws IOException {
       activeTimeline.transitionCompactionInflightToComplete(
           new HoodieInstant(HoodieInstant.State.INFLIGHT, COMPACTION_ACTION, timestamp), Option.empty());
     });
-
+    // Simulate a compaction commit in metadata table timeline
+    // so the archival in data table can happen
+    HoodieTestUtils.createCompactionCommitInMetadataTable(hadoopConf(),
+        new HoodieWrapperFileSystem(
+            FSUtils.getFs(tablePath, hadoopConf()), new NoOpConsistencyGuard()), tablePath, "007");
   }
 
   private void generateArchive() throws IOException {
@@ -162,13 +169,12 @@ private void generateArchive() throws IOException {
         .withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build())
         .withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder()
             .withRemoteServerPort(timelineServicePort).build())
-        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(false).build())
         .forTable("test-trip-table").build();
     // archive
     HoodieTableMetaClient metaClient = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient());
     HoodieSparkTable table = HoodieSparkTable.create(cfg, context(), metaClient);
-    HoodieTimelineArchiveLog archiveLog = new HoodieTimelineArchiveLog(cfg, table);
-    archiveLog.archiveIfRequired(context());
+    HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(cfg, table);
+    archiver.archiveIfRequired(context());
   }
 
   /**
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestHoodieLogFileCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestHoodieLogFileCommand.java
index f2571ce3598d6..ee7fbda11b783 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestHoodieLogFileCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestHoodieLogFileCommand.java
@@ -108,7 +108,7 @@ public void init() throws IOException, InterruptedException, URISyntaxException
       Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
       header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, INSTANT_TIME);
       header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
-      dataBlock = new HoodieAvroDataBlock(records, header);
+      dataBlock = new HoodieAvroDataBlock(records, header, HoodieRecord.RECORD_KEY_METADATA_FIELD);
       writer.appendBlock(dataBlock);
     }
   }
@@ -188,7 +188,7 @@ public void testShowLogFileRecordsWithMerge() throws IOException, InterruptedExc
       Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
       header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, INSTANT_TIME);
       header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
-      HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records1, header);
+      HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records1, header, HoodieRecord.RECORD_KEY_METADATA_FIELD);
       writer.appendBlock(dataBlock);
     } finally {
       if (writer != null) {
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRepairsCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRepairsCommand.java
index 048b2a20e6b2c..27cc31ccea2cf 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRepairsCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRepairsCommand.java
@@ -39,7 +39,6 @@
 import org.junit.jupiter.api.Test;
 import org.springframework.shell.core.CommandResult;
 
-import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.net.URL;
@@ -51,6 +50,14 @@
 import java.util.Properties;
 import java.util.stream.Collectors;
 
+import static org.apache.hudi.common.table.HoodieTableConfig.ARCHIVELOG_FOLDER;
+import static org.apache.hudi.common.table.HoodieTableConfig.NAME;
+import static org.apache.hudi.common.table.HoodieTableConfig.TABLE_CHECKSUM;
+import static org.apache.hudi.common.table.HoodieTableConfig.TIMELINE_LAYOUT_VERSION;
+import static org.apache.hudi.common.table.HoodieTableConfig.TYPE;
+import static org.apache.hudi.common.table.HoodieTableConfig.VERSION;
+import static org.apache.hudi.common.table.HoodieTableConfig.generateChecksum;
+import static org.apache.hudi.common.table.HoodieTableConfig.validateChecksum;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -104,7 +111,7 @@ public void testAddPartitionMetaWithDryRun() throws IOException {
     // expected all 'No'.
     String[][] rows = FSUtils.getAllPartitionFoldersThreeLevelsDown(fs, tablePath)
         .stream()
-        .map(partition -> new String[]{partition, "No", "None"})
+        .map(partition -> new String[] {partition, "No", "None"})
         .toArray(String[][]::new);
     String expected = HoodiePrintHelper.print(new String[] {HoodieTableHeaderFields.HEADER_PARTITION_PATH,
         HoodieTableHeaderFields.HEADER_METADATA_PRESENT, HoodieTableHeaderFields.HEADER_ACTION}, rows);
@@ -135,7 +142,7 @@ public void testAddPartitionMetaWithRealRun() throws IOException {
     List<String> paths = FSUtils.getAllPartitionFoldersThreeLevelsDown(fs, tablePath);
     // after dry run, the action will be 'Repaired'
     String[][] rows = paths.stream()
-        .map(partition -> new String[]{partition, "No", "Repaired"})
+        .map(partition -> new String[] {partition, "No", "Repaired"})
         .toArray(String[][]::new);
     String expected = HoodiePrintHelper.print(new String[] {HoodieTableHeaderFields.HEADER_PARTITION_PATH,
         HoodieTableHeaderFields.HEADER_METADATA_PRESENT, HoodieTableHeaderFields.HEADER_ACTION}, rows);
@@ -147,7 +154,7 @@ public void testAddPartitionMetaWithRealRun() throws IOException {
 
     // after real run, Metadata is present now.
     rows = paths.stream()
-        .map(partition -> new String[]{partition, "Yes", "None"})
+        .map(partition -> new String[] {partition, "Yes", "None"})
         .toArray(String[][]::new);
     expected = HoodiePrintHelper.print(new String[] {HoodieTableHeaderFields.HEADER_PARTITION_PATH,
         HoodieTableHeaderFields.HEADER_METADATA_PRESENT, HoodieTableHeaderFields.HEADER_ACTION}, rows);
@@ -170,19 +177,24 @@ public void testOverwriteHoodieProperties() throws IOException {
     Map<String, String> oldProps = HoodieCLI.getTableMetaClient().getTableConfig().propsMap();
 
     // after overwrite, the stored value in .hoodie is equals to which read from properties.
-    Map<String, String> result = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient()).getTableConfig().propsMap();
+    HoodieTableConfig tableConfig = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient()).getTableConfig();
+    Map<String, String> result = tableConfig.propsMap();
+    // validate table checksum
+    assertTrue(result.containsKey(TABLE_CHECKSUM.key()));
+    assertTrue(validateChecksum(tableConfig.getProps()));
     Properties expectProps = new Properties();
-    expectProps.load(new FileInputStream(new File(newProps.getPath())));
+    expectProps.load(new FileInputStream(newProps.getPath()));
 
     Map<String, String> expected = expectProps.entrySet().stream()
         .collect(Collectors.toMap(e -> String.valueOf(e.getKey()), e -> String.valueOf(e.getValue())));
+    expected.putIfAbsent(TABLE_CHECKSUM.key(), String.valueOf(generateChecksum(tableConfig.getProps())));
     assertEquals(expected, result);
 
     // check result
-    List<String> allPropsStr = Arrays.asList("hoodie.table.name", "hoodie.table.type", "hoodie.table.version",
-        "hoodie.archivelog.folder", "hoodie.timeline.layout.version");
-    String[][] rows = allPropsStr.stream().sorted().map(key -> new String[]{key,
-        oldProps.getOrDefault(key, "null"), result.getOrDefault(key, "null")})
+    List<String> allPropsStr = Arrays.asList(NAME.key(), TYPE.key(), VERSION.key(),
+        ARCHIVELOG_FOLDER.key(), TIMELINE_LAYOUT_VERSION.key(), TABLE_CHECKSUM.key());
+    String[][] rows = allPropsStr.stream().sorted().map(key -> new String[] {key,
+            oldProps.getOrDefault(key, "null"), result.getOrDefault(key, "null")})
         .toArray(String[][]::new);
     String expect = HoodiePrintHelper.print(new String[] {HoodieTableHeaderFields.HEADER_HOODIE_PROPERTY,
         HoodieTableHeaderFields.HEADER_OLD_VALUE, HoodieTableHeaderFields.HEADER_NEW_VALUE}, rows);
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRollbacksCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRollbacksCommand.java
index 17bc48f66f0c4..9a10893b35e89 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRollbacksCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestRollbacksCommand.java
@@ -24,7 +24,7 @@
 import org.apache.hudi.cli.HoodieTableHeaderFields;
 import org.apache.hudi.cli.TableHeader;
 import org.apache.hudi.cli.functional.CLIFunctionalTestHarness;
-import org.apache.hudi.client.AbstractHoodieWriteClient;
+import org.apache.hudi.client.BaseHoodieWriteClient;
 import org.apache.hudi.client.SparkRDDWriteClient;
 import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
@@ -93,7 +93,7 @@ public void init() throws Exception {
         .withRollbackUsingMarkers(false)
         .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.INMEMORY).build()).build();
 
-    try (AbstractHoodieWriteClient client = new SparkRDDWriteClient(context(), config)) {
+    try (BaseHoodieWriteClient client = new SparkRDDWriteClient(context(), config)) {
       // Rollback inflight commit3 and commit2
       client.rollback("102");
       client.rollback("101");
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestUpgradeDowngradeCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestUpgradeDowngradeCommand.java
index cba6d901b956d..b3650fa027626 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestUpgradeDowngradeCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestUpgradeDowngradeCommand.java
@@ -106,7 +106,7 @@ public void testDowngradeCommand() throws Exception {
     assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), HoodieTableVersion.ZERO.versionCode());
     assertTableVersionFromPropertyFile();
 
-    // verify marker files are non existant
+    // verify marker files are non existent
     for (String partitionPath : DEFAULT_PARTITION_PATHS) {
       assertEquals(0, FileCreateUtils.getTotalMarkerFileCount(tablePath, partitionPath, "101", IOType.MERGE));
     }
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestClusteringCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestClusteringCommand.java
new file mode 100644
index 0000000000000..17075f9d3dfb6
--- /dev/null
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestClusteringCommand.java
@@ -0,0 +1,199 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.cli.integ;
+
+import org.apache.hudi.cli.HoodieCLI;
+import org.apache.hudi.cli.commands.TableCommand;
+import org.apache.hudi.cli.testutils.AbstractShellIntegrationTest;
+import org.apache.hudi.client.SparkRDDWriteClient;
+import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.client.common.HoodieSparkEngineContext;
+import org.apache.hudi.common.model.HoodieAvroPayload;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
+import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.config.HoodieIndexConfig;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.index.HoodieIndex;
+import org.apache.hudi.testutils.HoodieClientTestBase;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.springframework.shell.core.CommandResult;
+
+import java.io.IOException;
+import java.nio.file.Paths;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import static org.junit.jupiter.api.Assertions.assertAll;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * Integration test class for {@link org.apache.hudi.cli.commands.ClusteringCommand}.
+ * <p/>
+ * A command use SparkLauncher need load jars under lib which generate during mvn package.
+ * Use integration test instead of unit test.
+ */
+public class ITTestClusteringCommand extends AbstractShellIntegrationTest {
+
+  private String tablePath;
+  private String tableName;
+
+  @BeforeEach
+  public void init() throws IOException {
+    tableName = "test_table_" + ITTestClusteringCommand.class.getName();
+    tablePath = Paths.get(basePath, tableName).toString();
+
+    HoodieCLI.conf = jsc.hadoopConfiguration();
+    // Create table and connect
+    new TableCommand().createTable(
+        tablePath, tableName, HoodieTableType.COPY_ON_WRITE.name(),
+        "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload");
+    metaClient.setBasePath(tablePath);
+    metaClient = HoodieTableMetaClient.reload(metaClient);
+  }
+
+  /**
+   * Test case for command 'clustering schedule'.
+   */
+  @Test
+  public void testScheduleClustering() throws IOException {
+    // generate commits
+    generateCommits();
+
+    CommandResult cr = scheduleClustering();
+    assertAll("Command run failed",
+        () -> assertTrue(cr.isSuccess()),
+        () -> assertTrue(
+            cr.getResult().toString().startsWith("Succeeded to schedule clustering for")));
+
+    // there is 1 requested clustering
+    HoodieActiveTimeline timeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
+    assertEquals(1, timeline.filterPendingReplaceTimeline().countInstants());
+  }
+
+  /**
+   * Test case for command 'clustering run'.
+   */
+  @Test
+  public void testClustering() throws IOException {
+    // generate commits
+    generateCommits();
+
+    CommandResult cr1 = scheduleClustering();
+    assertTrue(cr1.isSuccess());
+
+    // get clustering instance
+    HoodieActiveTimeline timeline = HoodieCLI.getTableMetaClient().getActiveTimeline();
+    Option<String> instance =
+        timeline.filterPendingReplaceTimeline().firstInstant().map(HoodieInstant::getTimestamp);
+    assertTrue(instance.isPresent(), "Must have pending clustering.");
+
+    CommandResult cr2 = getShell().executeCommand(
+        String.format("clustering run --parallelism %s --clusteringInstant %s --sparkMaster %s",
+            2, instance, "local"));
+
+    assertAll("Command run failed",
+        () -> assertTrue(cr2.isSuccess()),
+        () -> assertTrue(
+            cr2.getResult().toString().startsWith("Succeeded to run clustering for ")));
+
+    // assert clustering complete
+    assertTrue(HoodieCLI.getTableMetaClient().getActiveTimeline().reload()
+        .filterCompletedInstants().getInstants()
+        .map(HoodieInstant::getTimestamp).collect(Collectors.toList()).contains(instance),
+        "Pending clustering must be completed");
+
+    assertTrue(HoodieCLI.getTableMetaClient().getActiveTimeline().reload()
+            .getCompletedReplaceTimeline().getInstants()
+            .map(HoodieInstant::getTimestamp).collect(Collectors.toList()).contains(instance),
+        "Pending clustering must be completed");
+  }
+
+  /**
+   * Test case for command 'clustering scheduleAndExecute'.
+   */
+  @Test
+  public void testClusteringScheduleAndExecute() throws IOException {
+    // generate commits
+    generateCommits();
+
+    CommandResult cr2 = getShell().executeCommand(
+        String.format("clustering scheduleAndExecute --parallelism %s --sparkMaster %s", 2, "local"));
+
+    assertAll("Command run failed",
+        () -> assertTrue(cr2.isSuccess()),
+        () -> assertTrue(
+            cr2.getResult().toString().startsWith("Succeeded to run clustering for scheduleAndExecute")));
+
+    // assert clustering complete
+    assertTrue(HoodieCLI.getTableMetaClient().getActiveTimeline().reload()
+            .getCompletedReplaceTimeline().getInstants()
+            .map(HoodieInstant::getTimestamp).count() > 0,
+        "Completed clustering couldn't be 0");
+  }
+
+  private CommandResult scheduleClustering() {
+    // generate requested clustering
+    return getShell().executeCommand(
+        String.format("clustering schedule --hoodieConfigs hoodie.clustering.inline.max.commits=1 --sparkMaster %s", "local"));
+  }
+
+  private void generateCommits() throws IOException {
+    HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();
+
+    // Create the write client to write some records in
+    HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(tablePath)
+        .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2)
+        .withDeleteParallelism(2).forTable(tableName)
+        .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()).build();
+
+    SparkRDDWriteClient<HoodieAvroPayload> client = new SparkRDDWriteClient<>(new HoodieSparkEngineContext(jsc), cfg);
+
+    insert(jsc, client, dataGen, "001");
+    insert(jsc, client, dataGen, "002");
+  }
+
+  private List<HoodieRecord> insert(JavaSparkContext jsc, SparkRDDWriteClient<HoodieAvroPayload> client,
+      HoodieTestDataGenerator dataGen, String newCommitTime) throws IOException {
+    // inserts
+    client.startCommitWithTime(newCommitTime);
+
+    List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 10);
+    JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
+    operateFunc(SparkRDDWriteClient::insert, client, writeRecords, newCommitTime);
+    return records;
+  }
+
+  private JavaRDD<WriteStatus> operateFunc(
+      HoodieClientTestBase.Function3<JavaRDD<WriteStatus>, SparkRDDWriteClient, JavaRDD<HoodieRecord>, String> writeFn,
+      SparkRDDWriteClient<HoodieAvroPayload> client, JavaRDD<HoodieRecord> writeRecords, String commitTime)
+      throws IOException {
+    return writeFn.apply(client, writeRecords, commitTime);
+  }
+}
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestCommitsCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestCommitsCommand.java
index b3c5c06be9a29..18f4a387d474e 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestCommitsCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestCommitsCommand.java
@@ -100,5 +100,18 @@ public void testRollbackCommit() throws Exception {
 
     HoodieActiveTimeline timeline = metaClient.reloadActiveTimeline();
     assertEquals(2, timeline.getCommitsTimeline().countInstants(), "There should have 2 instants.");
+
+    // rollback complete commit
+    CommandResult cr2 = getShell().executeCommand(String.format("commit rollback --commit %s --sparkMaster %s --sparkMemory %s",
+            "101", "local", "4G"));
+    assertTrue(cr2.isSuccess());
+
+    metaClient = HoodieTableMetaClient.reload(HoodieCLI.getTableMetaClient());
+
+    HoodieActiveTimeline rollbackTimeline2 = new RollbacksCommand.RollbackTimeline(metaClient);
+    assertEquals(1, rollbackTimeline2.getRollbackTimeline().countInstants(), "There should have 2 rollback instant.");
+
+    HoodieActiveTimeline timeline2 = metaClient.reloadActiveTimeline();
+    assertEquals(2, timeline2.getCommitsTimeline().countInstants(), "There should have 1 instants.");
   }
 }
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestCompactionCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestCompactionCommand.java
index 37a2098d0cd18..4734f45e7074b 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestCompactionCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestCompactionCommand.java
@@ -140,6 +140,33 @@ public void testCompact() throws IOException {
         "Pending compaction must be completed");
   }
 
+  /**
+   * Test case for command 'compaction scheduleAndExecute'.
+   */
+  @Test
+  public void testCompactScheduleAndExecute() throws IOException {
+    // generate commits
+    generateCommits();
+
+    String schemaPath = Paths.get(basePath, "compaction.schema").toString();
+    writeSchemaToTmpFile(schemaPath);
+
+    CommandResult cr2 = getShell().executeCommand(
+        String.format("compaction scheduleAndExecute --parallelism %s --schemaFilePath %s --sparkMaster %s",
+            2, schemaPath, "local"));
+
+    assertAll("Command run failed",
+        () -> assertTrue(cr2.isSuccess()),
+        () -> assertTrue(
+            cr2.getResult().toString().startsWith("Schedule and execute compaction successfully completed")));
+
+    // assert compaction complete
+    assertTrue(HoodieCLI.getTableMetaClient().getActiveTimeline().reload()
+            .filterCompletedInstants().getInstants()
+            .map(HoodieInstant::getTimestamp).count() > 0,
+        "Completed compaction couldn't be 0");
+  }
+
   /**
    * Test case for command 'compaction validate'.
    */
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestMarkersCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestMarkersCommand.java
new file mode 100644
index 0000000000000..221a29f5250d2
--- /dev/null
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/integ/ITTestMarkersCommand.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.cli.integ;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hudi.cli.commands.TableCommand;
+import org.apache.hudi.cli.testutils.AbstractShellIntegrationTest;
+import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.model.IOType;
+import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
+import org.apache.hudi.common.testutils.FileCreateUtils;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.springframework.shell.core.CommandResult;
+
+import java.io.IOException;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * Integration test class for {@link org.apache.hudi.cli.commands.MarkersCommand}.
+ * <p/>
+ * A command use SparkLauncher need load jars under lib which generate during mvn package.
+ * Use integration test instead of unit test.
+ */
+public class ITTestMarkersCommand extends AbstractShellIntegrationTest {
+
+  private String tablePath;
+
+  @BeforeEach
+  public void init() throws IOException {
+    String tableName = "test_table";
+    tablePath = basePath + Path.SEPARATOR + tableName;
+
+    // Create table and connect
+    new TableCommand().createTable(
+        tablePath, "test_table", HoodieTableType.COPY_ON_WRITE.name(),
+        "", TimelineLayoutVersion.VERSION_1, "org.apache.hudi.common.model.HoodieAvroPayload");
+  }
+
+  /**
+   * Test case of command 'marker delete'.
+   */
+  @Test
+  public void testDeleteMarker() throws IOException {
+    // generate markers
+    String instantTime1 = "101";
+
+    FileCreateUtils.createMarkerFile(tablePath, "partA", instantTime1, "f0", IOType.APPEND);
+    FileCreateUtils.createMarkerFile(tablePath, "partA", instantTime1, "f1", IOType.APPEND);
+
+    assertEquals(2, FileCreateUtils.getTotalMarkerFileCount(tablePath, "partA", instantTime1, IOType.APPEND));
+
+    CommandResult cr = getShell().executeCommand(
+        String.format("marker delete --commit %s --sparkMaster %s", instantTime1, "local"));
+    assertTrue(cr.isSuccess());
+
+    assertEquals(0, FileCreateUtils.getTotalMarkerFileCount(tablePath, "partA", instantTime1, IOType.APPEND));
+  }
+}
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/testutils/HoodieTestCommitMetadataGenerator.java b/hudi-cli/src/test/java/org/apache/hudi/cli/testutils/HoodieTestCommitMetadataGenerator.java
index 105a9f639c792..f59dca4e1ea9f 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/testutils/HoodieTestCommitMetadataGenerator.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/testutils/HoodieTestCommitMetadataGenerator.java
@@ -73,9 +73,9 @@ public static void createCommitFileWithMetadata(String basePath, String commitTi
   }
 
   public static void createCommitFileWithMetadata(String basePath, String commitTime, Configuration configuration,
-      Option<Integer> writes, Option<Integer> updates, Map<String, String> extraMetdata) throws Exception {
+                                                  Option<Integer> writes, Option<Integer> updates, Map<String, String> extraMetadata) throws Exception {
     createCommitFileWithMetadata(basePath, commitTime, configuration, UUID.randomUUID().toString(),
-        UUID.randomUUID().toString(), writes, updates, extraMetdata);
+        UUID.randomUUID().toString(), writes, updates, extraMetadata);
   }
 
   public static void createCommitFileWithMetadata(String basePath, String commitTime, Configuration configuration,
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/testutils/SparkUtilTest.java b/hudi-cli/src/test/java/org/apache/hudi/cli/testutils/SparkUtilTest.java
new file mode 100644
index 0000000000000..4966438292949
--- /dev/null
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/testutils/SparkUtilTest.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.cli.testutils;
+
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.cli.utils.SparkUtil;
+import org.apache.spark.SparkConf;
+
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+public class SparkUtilTest {
+  @Test
+  public void testGetDefaultSparkConf() {
+    SparkConf sparkConf = SparkUtil.getDefaultConf("test-spark-app", Option.of(""));
+    assertEquals(SparkUtil.DEFAULT_SPARK_MASTER, sparkConf.get("spark.master"));
+  }
+}
diff --git a/hudi-client/hudi-client-common/pom.xml b/hudi-client/hudi-client-common/pom.xml
index a9209f5534df8..a55a136652728 100644
--- a/hudi-client/hudi-client-common/pom.xml
+++ b/hudi-client/hudi-client-common/pom.xml
@@ -163,7 +163,6 @@
     <dependency>
       <groupId>org.awaitility</groupId>
       <artifactId>awaitility</artifactId>
-      <version>3.1.2</version>
       <scope>test</scope>
     </dependency>
 
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/async/AsyncArchiveService.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/async/AsyncArchiveService.java
new file mode 100644
index 0000000000000..3fdc21dd21683
--- /dev/null
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/async/AsyncArchiveService.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.async;
+
+import org.apache.hudi.client.BaseHoodieWriteClient;
+import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.exception.HoodieException;
+
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+/**
+ * Async archive service to run concurrently with write operation.
+ */
+public class AsyncArchiveService extends HoodieAsyncTableService {
+
+  private static final Logger LOG = LogManager.getLogger(AsyncArchiveService.class);
+
+  private final BaseHoodieWriteClient writeClient;
+  private final transient ExecutorService executor = Executors.newSingleThreadExecutor();
+
+  protected AsyncArchiveService(BaseHoodieWriteClient writeClient) {
+    super(writeClient.getConfig());
+    this.writeClient = writeClient;
+  }
+
+  @Override
+  protected Pair<CompletableFuture, ExecutorService> startService() {
+    LOG.info("Starting async archive service...");
+    return Pair.of(CompletableFuture.supplyAsync(() -> {
+      writeClient.archive();
+      return true;
+    }, executor), executor);
+  }
+
+  public static AsyncArchiveService startAsyncArchiveIfEnabled(BaseHoodieWriteClient writeClient) {
+    HoodieWriteConfig config = writeClient.getConfig();
+    if (!config.isAutoArchive() || !config.isAsyncArchive()) {
+      LOG.info("The HoodieWriteClient is not configured to auto & async archive. Async archive service will not start.");
+      return null;
+    }
+    AsyncArchiveService asyncArchiveService = new AsyncArchiveService(writeClient);
+    asyncArchiveService.start(null);
+    return asyncArchiveService;
+  }
+
+  public static void waitForCompletion(AsyncArchiveService asyncArchiveService) {
+    if (asyncArchiveService != null) {
+      LOG.info("Waiting for async archive service to finish");
+      try {
+        asyncArchiveService.waitForShutdown();
+      } catch (Exception e) {
+        throw new HoodieException("Error waiting for async archive service to finish", e);
+      }
+    }
+  }
+
+  public static void forceShutdown(AsyncArchiveService asyncArchiveService) {
+    if (asyncArchiveService != null) {
+      LOG.info("Shutting down async archive service...");
+      asyncArchiveService.shutdown(true);
+    }
+  }
+}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/AsyncCleanerService.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/async/AsyncCleanerService.java
similarity index 56%
rename from hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/AsyncCleanerService.java
rename to hudi-client/hudi-client-common/src/main/java/org/apache/hudi/async/AsyncCleanerService.java
index a5a38f2cc5949..72907e6d3fbcd 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/AsyncCleanerService.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/async/AsyncCleanerService.java
@@ -7,21 +7,24 @@
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
- *      http://www.apache.org/licenses/LICENSE-2.0
+ *   http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
  */
 
-package org.apache.hudi.client;
+package org.apache.hudi.async;
 
-import org.apache.hudi.async.HoodieAsyncService;
+import org.apache.hudi.client.BaseHoodieWriteClient;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieException;
+
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
@@ -30,54 +33,55 @@
 import java.util.concurrent.Executors;
 
 /**
- * Clean service running concurrently with write operation.
+ * Async clean service to run concurrently with write operation.
  */
-class AsyncCleanerService extends HoodieAsyncService {
+public class AsyncCleanerService extends HoodieAsyncTableService {
 
   private static final Logger LOG = LogManager.getLogger(AsyncCleanerService.class);
 
-  private final AbstractHoodieWriteClient writeClient;
+  private final BaseHoodieWriteClient writeClient;
   private final transient ExecutorService executor = Executors.newSingleThreadExecutor();
 
-  protected AsyncCleanerService(AbstractHoodieWriteClient writeClient) {
+  protected AsyncCleanerService(BaseHoodieWriteClient writeClient) {
+    super(writeClient.getConfig());
     this.writeClient = writeClient;
   }
 
   @Override
   protected Pair<CompletableFuture, ExecutorService> startService() {
     String instantTime = HoodieActiveTimeline.createNewInstantTime();
-    LOG.info("Auto cleaning is enabled. Running cleaner async to write operation at instant time " + instantTime);
+    LOG.info(String.format("Starting async clean service with instant time %s...", instantTime));
     return Pair.of(CompletableFuture.supplyAsync(() -> {
       writeClient.clean(instantTime);
       return true;
     }, executor), executor);
   }
 
-  public static AsyncCleanerService startAsyncCleaningIfEnabled(AbstractHoodieWriteClient writeClient) {
-    AsyncCleanerService asyncCleanerService = null;
-    if (writeClient.getConfig().isAutoClean() && writeClient.getConfig().isAsyncClean()) {
-      asyncCleanerService = new AsyncCleanerService(writeClient);
-      asyncCleanerService.start(null);
-    } else {
-      LOG.info("Async auto cleaning is not enabled. Not running cleaner now");
+  public static AsyncCleanerService startAsyncCleaningIfEnabled(BaseHoodieWriteClient writeClient) {
+    HoodieWriteConfig config = writeClient.getConfig();
+    if (!config.isAutoClean() || !config.isAsyncClean()) {
+      LOG.info("The HoodieWriteClient is not configured to auto & async clean. Async clean service will not start.");
+      return null;
     }
+    AsyncCleanerService asyncCleanerService = new AsyncCleanerService(writeClient);
+    asyncCleanerService.start(null);
     return asyncCleanerService;
   }
 
   public static void waitForCompletion(AsyncCleanerService asyncCleanerService) {
     if (asyncCleanerService != null) {
-      LOG.info("Waiting for async cleaner to finish");
+      LOG.info("Waiting for async clean service to finish");
       try {
         asyncCleanerService.waitForShutdown();
       } catch (Exception e) {
-        throw new HoodieException("Error waiting for async cleaning to finish", e);
+        throw new HoodieException("Error waiting for async clean service to finish", e);
       }
     }
   }
 
   public static void forceShutdown(AsyncCleanerService asyncCleanerService) {
     if (asyncCleanerService != null) {
-      LOG.info("Shutting down async cleaner");
+      LOG.info("Shutting down async clean service...");
       asyncCleanerService.shutdown(true);
     }
   }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/async/AsyncClusteringService.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/async/AsyncClusteringService.java
index b9707bb6d82a7..1c1cf2bb9f74b 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/async/AsyncClusteringService.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/async/AsyncClusteringService.java
@@ -19,8 +19,8 @@
 
 package org.apache.hudi.async;
 
-import org.apache.hudi.client.AbstractClusteringClient;
-import org.apache.hudi.client.AbstractHoodieWriteClient;
+import org.apache.hudi.client.BaseClusterer;
+import org.apache.hudi.client.BaseHoodieWriteClient;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.exception.HoodieIOException;
@@ -38,25 +38,25 @@
  * Async clustering service that runs in a separate thread.
  * Currently, only one clustering thread is allowed to run at any time.
  */
-public abstract class AsyncClusteringService extends HoodieAsyncService {
+public abstract class AsyncClusteringService extends HoodieAsyncTableService {
 
   private static final long serialVersionUID = 1L;
   private static final Logger LOG = LogManager.getLogger(AsyncClusteringService.class);
 
   private final int maxConcurrentClustering;
-  private transient AbstractClusteringClient clusteringClient;
+  private transient BaseClusterer clusteringClient;
 
-  public AsyncClusteringService(AbstractHoodieWriteClient writeClient) {
+  public AsyncClusteringService(BaseHoodieWriteClient writeClient) {
     this(writeClient, false);
   }
 
-  public AsyncClusteringService(AbstractHoodieWriteClient writeClient, boolean runInDaemonMode) {
-    super(runInDaemonMode);
+  public AsyncClusteringService(BaseHoodieWriteClient writeClient, boolean runInDaemonMode) {
+    super(writeClient.getConfig(), runInDaemonMode);
     this.clusteringClient = createClusteringClient(writeClient);
     this.maxConcurrentClustering = 1;
   }
 
-  protected abstract AbstractClusteringClient createClusteringClient(AbstractHoodieWriteClient client);
+  protected abstract BaseClusterer createClusteringClient(BaseHoodieWriteClient client);
 
   /**
    * Start clustering service.
@@ -82,10 +82,16 @@ protected Pair<CompletableFuture, ExecutorService> startService() {
         }
         LOG.info("Clustering executor shutting down properly");
       } catch (InterruptedException ie) {
+        hasError = true;
         LOG.warn("Clustering executor got interrupted exception! Stopping", ie);
       } catch (IOException e) {
-        LOG.error("Clustering executor failed", e);
+        hasError = true;
+        LOG.error("Clustering executor failed due to IOException", e);
         throw new HoodieIOException(e.getMessage(), e);
+      } catch (Exception e) {
+        hasError = true;
+        LOG.error("Clustering executor failed", e);
+        throw e;
       }
       return true;
     }, executor)).toArray(CompletableFuture[]::new)), executor);
@@ -94,7 +100,7 @@ protected Pair<CompletableFuture, ExecutorService> startService() {
   /**
    * Update the write client to be used for clustering.
    */
-  public synchronized void updateWriteClient(AbstractHoodieWriteClient writeClient) {
+  public synchronized void updateWriteClient(BaseHoodieWriteClient writeClient) {
     this.clusteringClient.updateWriteClient(writeClient);
   }
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/async/AsyncCompactService.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/async/AsyncCompactService.java
index 2f63297210e14..f1f7f416e466c 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/async/AsyncCompactService.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/async/AsyncCompactService.java
@@ -17,8 +17,8 @@
 
 package org.apache.hudi.async;
 
-import org.apache.hudi.client.AbstractCompactor;
-import org.apache.hudi.client.AbstractHoodieWriteClient;
+import org.apache.hudi.client.BaseCompactor;
+import org.apache.hudi.client.BaseHoodieWriteClient;
 import org.apache.hudi.common.engine.EngineProperty;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
@@ -37,7 +37,7 @@
 /**
  * Async Compactor Service that runs in separate thread. Currently, only one compactor is allowed to run at any time.
  */
-public abstract class AsyncCompactService extends HoodieAsyncService {
+public abstract class AsyncCompactService extends HoodieAsyncTableService {
 
   private static final long serialVersionUID = 1L;
   private static final Logger LOG = LogManager.getLogger(AsyncCompactService.class);
@@ -48,21 +48,21 @@ public abstract class AsyncCompactService extends HoodieAsyncService {
   public static final String COMPACT_POOL_NAME = "hoodiecompact";
 
   private final int maxConcurrentCompaction;
-  private transient AbstractCompactor compactor;
+  private transient BaseCompactor compactor;
   protected transient HoodieEngineContext context;
 
-  public AsyncCompactService(HoodieEngineContext context, AbstractHoodieWriteClient client) {
+  public AsyncCompactService(HoodieEngineContext context, BaseHoodieWriteClient client) {
     this(context, client, false);
   }
 
-  public AsyncCompactService(HoodieEngineContext context, AbstractHoodieWriteClient client, boolean runInDaemonMode) {
-    super(runInDaemonMode);
+  public AsyncCompactService(HoodieEngineContext context, BaseHoodieWriteClient client, boolean runInDaemonMode) {
+    super(client.getConfig(), runInDaemonMode);
     this.context = context;
     this.compactor = createCompactor(client);
     this.maxConcurrentCompaction = 1;
   }
 
-  protected abstract AbstractCompactor createCompactor(AbstractHoodieWriteClient client);
+  protected abstract BaseCompactor createCompactor(BaseHoodieWriteClient client);
 
   /**
    * Start Compaction Service.
@@ -92,10 +92,16 @@ protected Pair<CompletableFuture, ExecutorService> startService() {
         }
         LOG.info("Compactor shutting down properly!!");
       } catch (InterruptedException ie) {
+        hasError = true;
         LOG.warn("Compactor executor thread got interrupted exception. Stopping", ie);
       } catch (IOException e) {
-        LOG.error("Compactor executor failed", e);
+        hasError = true;
+        LOG.error("Compactor executor failed due to IOException", e);
         throw new HoodieIOException(e.getMessage(), e);
+      } catch (Exception e) {
+        hasError = true;
+        LOG.error("Compactor executor failed", e);
+        throw e;
       }
       return true;
     }, executor)).toArray(CompletableFuture[]::new)), executor);
@@ -110,7 +116,7 @@ protected boolean shouldStopCompactor() {
     return false;
   }
 
-  public synchronized void updateWriteClient(AbstractHoodieWriteClient writeClient) {
+  public synchronized void updateWriteClient(BaseHoodieWriteClient writeClient) {
     this.compactor.updateWriteClient(writeClient);
   }
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/async/HoodieAsyncService.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/async/HoodieAsyncService.java
index f57484d886c9b..1ce6dfb288d62 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/async/HoodieAsyncService.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/async/HoodieAsyncService.java
@@ -36,12 +36,15 @@
 import java.util.function.Function;
 
 /**
- * Base Class for running clean/delta-sync/compaction/clustering in separate thread and controlling their life-cycle.
+ * Base Class for running archive/clean/delta-sync/compaction/clustering in separate thread and controlling their life-cycles.
  */
 public abstract class HoodieAsyncService implements Serializable {
 
   private static final Logger LOG = LogManager.getLogger(HoodieAsyncService.class);
+  private static final long POLLING_SECONDS = 10;
 
+  // Flag indicating whether an error is incurred in the service
+  protected boolean hasError;
   // Flag to track if the service is started.
   private boolean started;
   // Flag indicating shutdown is externally requested
@@ -70,21 +73,32 @@ protected HoodieAsyncService(boolean runInDaemonMode) {
     this.runInDaemonMode = runInDaemonMode;
   }
 
-  protected boolean isShutdownRequested() {
+  public boolean isStarted() {
+    return started;
+  }
+
+  public boolean isShutdownRequested() {
     return shutdownRequested;
   }
 
-  protected boolean isShutdown() {
+  public boolean isShutdown() {
     return shutdown;
   }
 
+  public boolean hasError() {
+    return hasError;
+  }
+
   /**
    * Wait till the service shutdown. If the service shutdown with exception, it will be thrown
-   * 
+   *
    * @throws ExecutionException
    * @throws InterruptedException
    */
   public void waitForShutdown() throws ExecutionException, InterruptedException {
+    if (future == null) {
+      return;
+    }
     try {
       future.get();
     } catch (ExecutionException ex) {
@@ -102,6 +116,7 @@ public void waitForShutdown() throws ExecutionException, InterruptedException {
   public void shutdown(boolean force) {
     if (!shutdownRequested || force) {
       shutdownRequested = true;
+      shutdown = true;
       if (executor != null) {
         if (force) {
           executor.shutdownNow();
@@ -125,6 +140,10 @@ public void shutdown(boolean force) {
    * @param onShutdownCallback
    */
   public void start(Function<Boolean, Boolean> onShutdownCallback) {
+    if (started) {
+      LOG.warn("The async service already started.");
+      return;
+    }
     Pair<CompletableFuture, ExecutorService> res = startService();
     future = res.getKey();
     executor = res.getValue();
@@ -134,8 +153,6 @@ public void start(Function<Boolean, Boolean> onShutdownCallback) {
 
   /**
    * Service implementation.
-   * 
-   * @return
    */
   protected abstract Pair<CompletableFuture, ExecutorService> startService();
 
@@ -146,6 +163,9 @@ public void start(Function<Boolean, Boolean> onShutdownCallback) {
    */
   @SuppressWarnings("unchecked")
   private void shutdownCallback(Function<Boolean, Boolean> callback) {
+    if (future == null) {
+      return;
+    }
     future.whenComplete((resp, error) -> {
       if (null != callback) {
         callback.apply(null != error);
@@ -166,8 +186,8 @@ public boolean isRunInDaemonMode() {
   public void waitTillPendingAsyncServiceInstantsReducesTo(int numPending) throws InterruptedException {
     try {
       queueLock.lock();
-      while (!isShutdown() && (pendingInstants.size() > numPending)) {
-        consumed.await();
+      while (!isShutdown() && !hasError() && (pendingInstants.size() > numPending)) {
+        consumed.await(POLLING_SECONDS, TimeUnit.SECONDS);
       }
     } finally {
       queueLock.unlock();
@@ -190,8 +210,8 @@ public void enqueuePendingAsyncServiceInstant(HoodieInstant instant) {
    * @throws InterruptedException
    */
   HoodieInstant fetchNextAsyncServiceInstant() throws InterruptedException {
-    LOG.info("Waiting for next instant upto 10 seconds");
-    HoodieInstant instant = pendingInstants.poll(10, TimeUnit.SECONDS);
+    LOG.info(String.format("Waiting for next instant up to %d seconds", POLLING_SECONDS));
+    HoodieInstant instant = pendingInstants.poll(POLLING_SECONDS, TimeUnit.SECONDS);
     if (instant != null) {
       try {
         queueLock.lock();
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/async/HoodieAsyncTableService.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/async/HoodieAsyncTableService.java
new file mode 100644
index 0000000000000..6a53d30063c1d
--- /dev/null
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/async/HoodieAsyncTableService.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.async;
+
+import org.apache.hudi.client.RunsTableService;
+import org.apache.hudi.config.HoodieWriteConfig;
+
+import java.util.function.Function;
+
+public abstract class HoodieAsyncTableService extends HoodieAsyncService implements RunsTableService {
+
+  protected HoodieWriteConfig writeConfig;
+
+  protected HoodieAsyncTableService() {
+  }
+
+  protected HoodieAsyncTableService(HoodieWriteConfig writeConfig) {
+    this.writeConfig = writeConfig;
+  }
+
+  protected HoodieAsyncTableService(HoodieWriteConfig writeConfig, boolean runInDaemonMode) {
+    super(runInDaemonMode);
+    this.writeConfig = writeConfig;
+  }
+
+  @Override
+  public void start(Function<Boolean, Boolean> onShutdownCallback) {
+    if (!tableServicesEnabled(writeConfig)) {
+      return;
+    }
+    super.start(onShutdownCallback);
+  }
+}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/AbstractClusteringClient.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseClusterer.java
similarity index 80%
rename from hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/AbstractClusteringClient.java
rename to hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseClusterer.java
index 34234f546ed19..648ce805b0825 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/AbstractClusteringClient.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseClusterer.java
@@ -28,13 +28,13 @@
 /**
  * Client will run one round of clustering.
  */
-public abstract class AbstractClusteringClient<T extends HoodieRecordPayload, I, K, O> implements Serializable {
+public abstract class BaseClusterer<T extends HoodieRecordPayload, I, K, O> implements Serializable {
 
   private static final long serialVersionUID = 1L;
 
-  protected transient AbstractHoodieWriteClient<T, I, K, O> clusteringClient;
+  protected transient BaseHoodieWriteClient<T, I, K, O> clusteringClient;
 
-  public AbstractClusteringClient(AbstractHoodieWriteClient<T, I, K, O> clusteringClient) {
+  public BaseClusterer(BaseHoodieWriteClient<T, I, K, O> clusteringClient) {
     this.clusteringClient = clusteringClient;
   }
 
@@ -49,7 +49,7 @@ public AbstractClusteringClient(AbstractHoodieWriteClient<T, I, K, O> clustering
    * Update the write client used by async clustering.
    * @param writeClient
    */
-  public void updateWriteClient(AbstractHoodieWriteClient<T, I, K, O> writeClient) {
+  public void updateWriteClient(BaseHoodieWriteClient<T, I, K, O> writeClient) {
     this.clusteringClient = writeClient;
   }
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/AbstractCompactor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseCompactor.java
similarity index 78%
rename from hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/AbstractCompactor.java
rename to hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseCompactor.java
index c80b34a3ef656..88737dbcf1d7e 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/AbstractCompactor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseCompactor.java
@@ -27,19 +27,19 @@
 /**
  * Run one round of compaction.
  */
-public abstract class AbstractCompactor<T extends HoodieRecordPayload, I, K, O> implements Serializable {
+public abstract class BaseCompactor<T extends HoodieRecordPayload, I, K, O> implements Serializable {
 
   private static final long serialVersionUID = 1L;
 
-  protected transient AbstractHoodieWriteClient<T, I, K, O> compactionClient;
+  protected transient BaseHoodieWriteClient<T, I, K, O> compactionClient;
 
-  public AbstractCompactor(AbstractHoodieWriteClient<T, I, K, O> compactionClient) {
+  public BaseCompactor(BaseHoodieWriteClient<T, I, K, O> compactionClient) {
     this.compactionClient = compactionClient;
   }
 
   public abstract void compact(HoodieInstant instant) throws IOException;
 
-  public void updateWriteClient(AbstractHoodieWriteClient<T, I, K, O> writeClient) {
+  public void updateWriteClient(BaseHoodieWriteClient<T, I, K, O> writeClient) {
     this.compactionClient = writeClient;
   }
 
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/AbstractHoodieClient.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieClient.java
similarity index 91%
rename from hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/AbstractHoodieClient.java
rename to hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieClient.java
index 350fe0c9bf7e0..3f208a0f86a09 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/AbstractHoodieClient.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieClient.java
@@ -40,9 +40,9 @@
  * Abstract class taking care of holding common member variables (FileSystem, SparkContext, HoodieConfigs) Also, manages
  * embedded timeline-server if enabled.
  */
-public abstract class AbstractHoodieClient implements Serializable, AutoCloseable {
+public abstract class BaseHoodieClient implements Serializable, AutoCloseable {
 
-  private static final Logger LOG = LogManager.getLogger(AbstractHoodieClient.class);
+  private static final Logger LOG = LogManager.getLogger(BaseHoodieClient.class);
 
   protected final transient FileSystem fs;
   protected final transient HoodieEngineContext context;
@@ -59,11 +59,11 @@ public abstract class AbstractHoodieClient implements Serializable, AutoCloseabl
   private transient Option<EmbeddedTimelineService> timelineServer;
   private final boolean shouldStopTimelineServer;
 
-  protected AbstractHoodieClient(HoodieEngineContext context, HoodieWriteConfig clientConfig) {
+  protected BaseHoodieClient(HoodieEngineContext context, HoodieWriteConfig clientConfig) {
     this(context, clientConfig, Option.empty());
   }
 
-  protected AbstractHoodieClient(HoodieEngineContext context, HoodieWriteConfig clientConfig,
+  protected BaseHoodieClient(HoodieEngineContext context, HoodieWriteConfig clientConfig,
       Option<EmbeddedTimelineService> timelineServer) {
     this.hadoopConf = context.getHadoopConf().get();
     this.fs = FSUtils.getFs(clientConfig.getBasePath(), hadoopConf);
@@ -134,7 +134,8 @@ protected void initWrapperFSMetrics() {
   protected HoodieTableMetaClient createMetaClient(boolean loadActiveTimelineOnLoad) {
     return HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(config.getBasePath())
         .setLoadActiveTimelineOnLoad(loadActiveTimelineOnLoad).setConsistencyGuardConfig(config.getConsistencyGuardConfig())
-        .setLayoutVersion(Option.of(new TimelineLayoutVersion(config.getTimelineLayoutVersion()))).build();
+        .setLayoutVersion(Option.of(new TimelineLayoutVersion(config.getTimelineLayoutVersion())))
+        .setFileSystemRetryConfig(config.getFileSystemRetryConfig()).build();
   }
 
   public Option<EmbeddedTimelineService> getTimelineServer() {
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/AbstractHoodieWriteClient.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java
similarity index 87%
rename from hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/AbstractHoodieWriteClient.java
rename to hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java
index c9162de9e9ca1..7b67ff54a2aa5 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/AbstractHoodieWriteClient.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java
@@ -18,11 +18,14 @@
 
 package org.apache.hudi.client;
 
+import org.apache.hudi.async.AsyncArchiveService;
+import org.apache.hudi.async.AsyncCleanerService;
 import org.apache.hudi.avro.model.HoodieCleanMetadata;
 import org.apache.hudi.avro.model.HoodieCleanerPlan;
 import org.apache.hudi.avro.model.HoodieClusteringPlan;
 import org.apache.hudi.avro.model.HoodieCompactionPlan;
 import org.apache.hudi.avro.model.HoodieRestoreMetadata;
+import org.apache.hudi.avro.model.HoodieRestorePlan;
 import org.apache.hudi.avro.model.HoodieRollbackMetadata;
 import org.apache.hudi.avro.model.HoodieRollbackPlan;
 import org.apache.hudi.callback.HoodieWriteCommitCallback;
@@ -66,7 +69,6 @@
 import org.apache.hudi.metrics.HoodieMetrics;
 import org.apache.hudi.table.BulkInsertPartitioner;
 import org.apache.hudi.table.HoodieTable;
-import org.apache.hudi.table.HoodieTimelineArchiveLog;
 import org.apache.hudi.table.action.HoodieWriteMetadata;
 import org.apache.hudi.table.action.rollback.RollbackUtils;
 import org.apache.hudi.table.action.savepoint.SavepointHelpers;
@@ -98,14 +100,15 @@
  * @param <K> Type of keys
  * @param <O> Type of outputs
  */
-public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I, K, O> extends AbstractHoodieClient {
+public abstract class BaseHoodieWriteClient<T extends HoodieRecordPayload, I, K, O> extends BaseHoodieClient
+    implements RunsTableService {
 
   protected static final String LOOKUP_STR = "lookup";
   private static final long serialVersionUID = 1L;
-  private static final Logger LOG = LogManager.getLogger(AbstractHoodieWriteClient.class);
+  private static final Logger LOG = LogManager.getLogger(BaseHoodieWriteClient.class);
 
   protected final transient HoodieMetrics metrics;
-  private final transient HoodieIndex<T, ?, ?, ?> index;
+  private final transient HoodieIndex<?, ?> index;
 
   protected transient Timer.Context writeTimer = null;
   protected transient Timer.Context compactionTimer;
@@ -114,6 +117,7 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
   private transient WriteOperationType operationType;
   private transient HoodieWriteCommitCallback commitCallback;
   protected transient AsyncCleanerService asyncCleanerService;
+  protected transient AsyncArchiveService asyncArchiveService;
   protected final TransactionManager txnManager;
   protected Option<Pair<HoodieInstant, Map<String, String>>> lastCompletedTxnAndMetadata = Option.empty();
 
@@ -123,7 +127,7 @@ public abstract class AbstractHoodieWriteClient<T extends HoodieRecordPayload, I
    * @param writeConfig instance of HoodieWriteConfig
    */
   @Deprecated
-  public AbstractHoodieWriteClient(HoodieEngineContext context, HoodieWriteConfig writeConfig) {
+  public BaseHoodieWriteClient(HoodieEngineContext context, HoodieWriteConfig writeConfig) {
     this(context, writeConfig, Option.empty());
   }
 
@@ -134,7 +138,7 @@ public AbstractHoodieWriteClient(HoodieEngineContext context, HoodieWriteConfig
    * @param timelineService Timeline Service that runs as part of write client.
    */
   @Deprecated
-  public AbstractHoodieWriteClient(HoodieEngineContext context, HoodieWriteConfig writeConfig,
+  public BaseHoodieWriteClient(HoodieEngineContext context, HoodieWriteConfig writeConfig,
                                    Option<EmbeddedTimelineService> timelineService) {
     super(context, writeConfig, timelineService);
     this.metrics = new HoodieMetrics(config);
@@ -142,7 +146,7 @@ public AbstractHoodieWriteClient(HoodieEngineContext context, HoodieWriteConfig
     this.txnManager = new TransactionManager(config, fs);
   }
 
-  protected abstract HoodieIndex<T, ?, ?, ?> createIndex(HoodieWriteConfig writeConfig);
+  protected abstract HoodieIndex<?, ?> createIndex(HoodieWriteConfig writeConfig);
 
   public void setOperationType(WriteOperationType operationType) {
     this.operationType = operationType;
@@ -359,7 +363,7 @@ public void rollbackFailedBootstrap() {
    * table for the very first time (e.g: converting an existing table to Hoodie).
    * <p>
    * This implementation uses sortBy (which does range partitioning based on reservoir sampling) and attempts to control
-   * the numbers of files with less memory compared to the {@link AbstractHoodieWriteClient#insert(I, String)}
+   * the numbers of files with less memory compared to the {@link BaseHoodieWriteClient#insert(I, String)}
    *
    * @param records HoodieRecords to insert
    * @param instantTime Instant time of the commit
@@ -372,7 +376,7 @@ public void rollbackFailedBootstrap() {
    * table for the very first time (e.g: converting an existing table to Hoodie).
    * <p>
    * This implementation uses sortBy (which does range partitioning based on reservoir sampling) and attempts to control
-   * the numbers of files with less memory compared to the {@link AbstractHoodieWriteClient#insert(I, String)}. Optionally
+   * the numbers of files with less memory compared to the {@link BaseHoodieWriteClient#insert(I, String)}. Optionally
    * it allows users to specify their own partitioner. If specified then it will be used for repartitioning records. See
    * {@link BulkInsertPartitioner}.
    *
@@ -392,7 +396,7 @@ public abstract O bulkInsert(I records, final String instantTime,
    * duplicates if needed.
    * <p>
    * This implementation uses sortBy (which does range partitioning based on reservoir sampling) and attempts to control
-   * the numbers of files with less memory compared to the {@link AbstractHoodieWriteClient#insert(I, String)}. Optionally
+   * the numbers of files with less memory compared to the {@link BaseHoodieWriteClient#insert(I, String)}. Optionally
    * it allows users to specify their own partitioner. If specified then it will be used for repartitioning records. See
    * {@link BulkInsertPartitioner}.
    *
@@ -430,6 +434,11 @@ protected void preWrite(String instantTime, WriteOperationType writeOperationTyp
     } else {
       this.asyncCleanerService.start(null);
     }
+    if (null == this.asyncArchiveService) {
+      this.asyncArchiveService = AsyncArchiveService.startAsyncArchiveIfEnabled(this);
+    } else {
+      this.asyncArchiveService.start(null);
+    }
   }
 
   /**
@@ -455,16 +464,17 @@ protected void postCommit(HoodieTable<T, I, K, O> table, HoodieCommitMetadata me
       WriteMarkersFactory.get(config.getMarkersType(), table, instantTime)
           .quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
       autoCleanOnCommit();
-      if (config.isAutoArchive()) {
-        archive(table);
-      }
+      autoArchiveOnCommit(table);
     } finally {
       this.heartbeatClient.stop(instantTime);
     }
   }
 
   protected void runTableServicesInline(HoodieTable<T, I, K, O> table, HoodieCommitMetadata metadata, Option<Map<String, String>> extraMetadata) {
-    if (config.areAnyTableServicesInline()) {
+    if (!tableServicesEnabled(config)) {
+      return;
+    }
+    if (config.areAnyTableServicesExecutedInline() || config.areAnyTableServicesScheduledInline()) {
       if (config.isMetadataTableEnabled()) {
         table.getHoodieView().sync();
       }
@@ -472,19 +482,35 @@ protected void runTableServicesInline(HoodieTable<T, I, K, O> table, HoodieCommi
       if (config.inlineCompactionEnabled()) {
         runAnyPendingCompactions(table);
         metadata.addMetadata(HoodieCompactionConfig.INLINE_COMPACT.key(), "true");
-        inlineCompact(extraMetadata);
+        inlineCompaction(extraMetadata);
       } else {
         metadata.addMetadata(HoodieCompactionConfig.INLINE_COMPACT.key(), "false");
       }
 
+      // if just inline schedule is enabled
+      if (!config.inlineCompactionEnabled() && config.scheduleInlineCompaction()
+          && !table.getActiveTimeline().getWriteTimeline().filterPendingCompactionTimeline().getInstants().findAny().isPresent()) {
+        // proceed only if there are no pending compactions
+        metadata.addMetadata(HoodieCompactionConfig.SCHEDULE_INLINE_COMPACT.key(), "true");
+        inlineScheduleCompaction(extraMetadata);
+      }
+
       // Do an inline clustering if enabled
       if (config.inlineClusteringEnabled()) {
         runAnyPendingClustering(table);
         metadata.addMetadata(HoodieClusteringConfig.INLINE_CLUSTERING.key(), "true");
-        inlineCluster(extraMetadata);
+        inlineClustering(extraMetadata);
       } else {
         metadata.addMetadata(HoodieClusteringConfig.INLINE_CLUSTERING.key(), "false");
       }
+
+      // if just inline schedule is enabled
+      if (!config.inlineClusteringEnabled() && config.scheduleInlineClustering()
+          && !table.getActiveTimeline().filterPendingReplaceTimeline().getInstants().findAny().isPresent()) {
+        // proceed only if there are no pending clustering
+        metadata.addMetadata(HoodieClusteringConfig.SCHEDULE_INLINE_CLUSTERING.key(), "true");
+        inlineScheduleClustering(extraMetadata);
+      }
     }
   }
 
@@ -506,22 +532,34 @@ protected void runAnyPendingClustering(HoodieTable<T, I, K, O> table) {
     });
   }
 
-  /**
-   * Handle auto clean during commit.
-   *
-   */
   protected void autoCleanOnCommit() {
-    if (config.isAutoClean()) {
-      // Call clean to cleanup if there is anything to cleanup after the commit,
-      if (config.isAsyncClean()) {
-        LOG.info("Cleaner has been spawned already. Waiting for it to finish");
-        AsyncCleanerService.waitForCompletion(asyncCleanerService);
-        LOG.info("Cleaner has finished");
-      } else {
-        // Do not reuse instantTime for clean as metadata table requires all changes to have unique instant timestamps.
-        LOG.info("Auto cleaning is enabled. Running cleaner now");
-        clean(true);
-      }
+    if (!config.isAutoClean()) {
+      return;
+    }
+
+    if (config.isAsyncClean()) {
+      LOG.info("Async cleaner has been spawned. Waiting for it to finish");
+      AsyncCleanerService.waitForCompletion(asyncCleanerService);
+      LOG.info("Async cleaner has finished");
+    } else {
+      LOG.info("Start to clean synchronously.");
+      // Do not reuse instantTime for clean as metadata table requires all changes to have unique instant timestamps.
+      clean(true);
+    }
+  }
+
+  protected void autoArchiveOnCommit(HoodieTable<T, I, K, O> table) {
+    if (!config.isAutoArchive()) {
+      return;
+    }
+
+    if (config.isAsyncArchive()) {
+      LOG.info("Async archiver has been spawned. Waiting for it to finish");
+      AsyncArchiveService.waitForCompletion(asyncArchiveService);
+      LOG.info("Async archiver has finished");
+    } else {
+      LOG.info("Start to archive synchronously.");
+      archive(table);
     }
   }
 
@@ -606,7 +644,7 @@ public boolean rollback(final String commitInstantTime) throws HoodieRollbackExc
   /**
    * @Deprecated
    * Rollback the inflight record changes with the given commit time. This
-   * will be removed in future in favor of {@link AbstractHoodieWriteClient#restoreToInstant(String)}
+   * will be removed in future in favor of {@link BaseHoodieWriteClient#restoreToInstant(String)}
    * Adding this api for backwards compatability.
    * @param commitInstantTime Instant time of the commit
    * @param skipLocking if this is triggered by another parent transaction, locking can be skipped.
@@ -620,7 +658,7 @@ public boolean rollback(final String commitInstantTime, boolean skipLocking) thr
   /**
    * @Deprecated
    * Rollback the inflight record changes with the given commit time. This
-   * will be removed in future in favor of {@link AbstractHoodieWriteClient#restoreToInstant(String)}
+   * will be removed in future in favor of {@link BaseHoodieWriteClient#restoreToInstant(String)}
    *
    * @param commitInstantTime Instant time of the commit
    * @param pendingRollbackInfo pending rollback instant and plan if rollback failed from previous attempt.
@@ -639,8 +677,8 @@ public boolean rollback(final String commitInstantTime, Option<HoodiePendingRoll
           .findFirst());
       if (commitInstantOpt.isPresent()) {
         LOG.info("Scheduling Rollback at instant time :" + rollbackInstantTime);
-        Option<HoodieRollbackPlan> rollbackPlanOption = pendingRollbackInfo.map(entry -> Option.of(entry.getRollbackPlan())).orElse(table.scheduleRollback(context, rollbackInstantTime,
-            commitInstantOpt.get(), false, config.shouldRollbackUsingMarkers()));
+        Option<HoodieRollbackPlan> rollbackPlanOption = pendingRollbackInfo.map(entry -> Option.of(entry.getRollbackPlan()))
+            .orElseGet(() -> table.scheduleRollback(context, rollbackInstantTime, commitInstantOpt.get(), false, config.shouldRollbackUsingMarkers()));
         if (rollbackPlanOption.isPresent()) {
           // execute rollback
           HoodieRollbackMetadata rollbackMetadata = table.rollback(context, rollbackInstantTime, commitInstantOpt.get(), true,
@@ -674,16 +712,21 @@ public HoodieRestoreMetadata restoreToInstant(final String instantTime) throws H
     Timer.Context timerContext = metrics.getRollbackCtx();
     try {
       HoodieTable<T, I, K, O> table = createTable(config, hadoopConf, config.isMetadataTableEnabled());
-      HoodieRestoreMetadata restoreMetadata = table.restore(context, restoreInstantTime, instantTime);
-      if (timerContext != null) {
-        final long durationInMs = metrics.getDurationInMs(timerContext.stop());
-        final long totalFilesDeleted = restoreMetadata.getHoodieRestoreMetadata().values().stream()
-            .flatMap(Collection::stream)
-            .mapToLong(HoodieRollbackMetadata::getTotalFilesDeleted)
-            .sum();
-        metrics.updateRollbackMetrics(durationInMs, totalFilesDeleted);
+      Option<HoodieRestorePlan> restorePlanOption = table.scheduleRestore(context, restoreInstantTime, instantTime);
+      if (restorePlanOption.isPresent()) {
+        HoodieRestoreMetadata restoreMetadata = table.restore(context, restoreInstantTime, instantTime);
+        if (timerContext != null) {
+          final long durationInMs = metrics.getDurationInMs(timerContext.stop());
+          final long totalFilesDeleted = restoreMetadata.getHoodieRestoreMetadata().values().stream()
+              .flatMap(Collection::stream)
+              .mapToLong(HoodieRollbackMetadata::getTotalFilesDeleted)
+              .sum();
+          metrics.updateRollbackMetrics(durationInMs, totalFilesDeleted);
+        }
+        return restoreMetadata;
+      } else {
+        throw new HoodieRestoreException("Failed to restore " + config.getBasePath() + " to commit " + instantTime);
       }
-      return restoreMetadata;
     } catch (Exception e) {
       throw new HoodieRestoreException("Failed to restore to " + instantTime, e);
     }
@@ -714,28 +757,38 @@ public HoodieCleanMetadata clean(String cleanInstantTime, boolean skipLocking) t
    * Clean up any stale/old files/data lying around (either on file storage or index storage) based on the
    * configurations and CleaningPolicy used. (typically files that no longer can be used by a running query can be
    * cleaned). This API provides the flexibility to schedule clean instant asynchronously via
-   * {@link AbstractHoodieWriteClient#scheduleTableService(String, Option, TableServiceType)} and disable inline scheduling
+   * {@link BaseHoodieWriteClient#scheduleTableService(String, Option, TableServiceType)} and disable inline scheduling
    * of clean.
    * @param cleanInstantTime instant time for clean.
    * @param scheduleInline true if needs to be scheduled inline. false otherwise.
    * @param skipLocking if this is triggered by another parent transaction, locking can be skipped.
    */
   public HoodieCleanMetadata clean(String cleanInstantTime, boolean scheduleInline, boolean skipLocking) throws HoodieIOException {
-    if (scheduleInline) {
-      scheduleTableServiceInternal(cleanInstantTime, Option.empty(), TableServiceType.CLEAN);
+    if (!tableServicesEnabled(config)) {
+      return null;
     }
-    LOG.info("Cleaner started");
     final Timer.Context timerContext = metrics.getCleanCtx();
-    LOG.info("Cleaned failed attempts if any");
     CleanerUtils.rollbackFailedWrites(config.getFailedWritesCleanPolicy(),
         HoodieTimeline.CLEAN_ACTION, () -> rollbackFailedWrites(skipLocking));
-    HoodieCleanMetadata metadata = createTable(config, hadoopConf).clean(context, cleanInstantTime, skipLocking);
-    if (timerContext != null && metadata != null) {
-      long durationMs = metrics.getDurationInMs(timerContext.stop());
-      metrics.updateCleanMetrics(durationMs, metadata.getTotalFilesDeleted());
-      LOG.info("Cleaned " + metadata.getTotalFilesDeleted() + " files"
-          + " Earliest Retained Instant :" + metadata.getEarliestCommitToRetain()
-          + " cleanerElapsedMs" + durationMs);
+
+    HoodieCleanMetadata metadata = null;
+    HoodieTable table = createTable(config, hadoopConf);
+    if (config.allowMultipleCleans() || !table.getActiveTimeline().getCleanerTimeline().filterInflightsAndRequested().firstInstant().isPresent()) {
+      LOG.info("Cleaner started");
+      // proceed only if multiple clean schedules are enabled or if there are no pending cleans.
+      if (scheduleInline) {
+        scheduleTableServiceInternal(cleanInstantTime, Option.empty(), TableServiceType.CLEAN);
+        table.getMetaClient().reloadActiveTimeline();
+      }
+
+      metadata = table.clean(context, cleanInstantTime, skipLocking);
+      if (timerContext != null && metadata != null) {
+        long durationMs = metrics.getDurationInMs(timerContext.stop());
+        metrics.updateCleanMetrics(durationMs, metadata.getTotalFilesDeleted());
+        LOG.info("Cleaned " + metadata.getTotalFilesDeleted() + " files"
+            + " Earliest Retained Instant :" + metadata.getEarliestCommitToRetain()
+            + " cleanerElapsedMs" + durationMs);
+      }
     }
     return metadata;
   }
@@ -760,10 +813,13 @@ public HoodieCleanMetadata clean(boolean skipLocking) {
    * @param table table to commit on.
    */
   protected void archive(HoodieTable<T, I, K, O> table) {
+    if (!tableServicesEnabled(config)) {
+      return;
+    }
     try {
       // We cannot have unbounded commit files. Archive commits if we have to archive
-      HoodieTimelineArchiveLog archiveLog = new HoodieTimelineArchiveLog(config, table);
-      archiveLog.archiveIfRequired(context);
+      HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(config, table);
+      archiver.archiveIfRequired(context);
     } catch (IOException ioe) {
       throw new HoodieIOException("Failed to archive", ioe);
     }
@@ -861,7 +917,7 @@ public boolean scheduleCompactionAtInstant(String instantTime, Option<Map<String
    * @param compactionInstantTime Compaction Instant Time
    * @return Collection of WriteStatus to inspect errors and counts
    */
-  public O compact(String compactionInstantTime) {
+  public HoodieWriteMetadata<O> compact(String compactionInstantTime) {
     return compact(compactionInstantTime, config.shouldAutoCommit());
   }
 
@@ -869,17 +925,16 @@ public O compact(String compactionInstantTime) {
    * Commit a compaction operation. Allow passing additional meta-data to be stored in commit instant file.
    *
    * @param compactionInstantTime Compaction Instant Time
-   * @param writeStatuses Collection of WriteStatus to inspect errors and counts
+   * @param metadata All the metadata that gets stored along with a commit
    * @param extraMetadata Extra Metadata to be stored
    */
-  public abstract void commitCompaction(String compactionInstantTime, O writeStatuses,
-                                        Option<Map<String, String>> extraMetadata) throws IOException;
+  public abstract void commitCompaction(String compactionInstantTime, HoodieCommitMetadata metadata,
+                                        Option<Map<String, String>> extraMetadata);
 
   /**
    * Commit Compaction and track metrics.
    */
-  protected abstract void completeCompaction(HoodieCommitMetadata metadata, O writeStatuses,
-                                             HoodieTable<T, I, K, O> table, String compactionCommitTime);
+  protected abstract void completeCompaction(HoodieCommitMetadata metadata, HoodieTable<T, I, K, O> table, String compactionCommitTime);
 
   /**
    * Get inflight time line exclude compaction and clustering.
@@ -1001,13 +1056,14 @@ protected List<String> getInstantsToRollback(HoodieTableMetaClient metaClient, H
    * @param compactionInstantTime Compaction Instant Time
    * @return Collection of Write Status
    */
-  protected abstract O compact(String compactionInstantTime, boolean shouldComplete);
+  protected abstract HoodieWriteMetadata<O> compact(String compactionInstantTime, boolean shouldComplete);
 
   /**
    * Performs a compaction operation on a table, serially before or after an insert/upsert action.
+   * Scheduling and execution is done inline.
    */
-  protected Option<String> inlineCompact(Option<Map<String, String>> extraMetadata) {
-    Option<String> compactionInstantTimeOpt = scheduleCompaction(extraMetadata);
+  protected Option<String> inlineCompaction(Option<Map<String, String>> extraMetadata) {
+    Option<String> compactionInstantTimeOpt = inlineScheduleCompaction(extraMetadata);
     compactionInstantTimeOpt.ifPresent(compactInstantTime -> {
       // inline compaction should auto commit as the user is never given control
       compact(compactInstantTime, true);
@@ -1015,6 +1071,15 @@ protected Option<String> inlineCompact(Option<Map<String, String>> extraMetadata
     return compactionInstantTimeOpt;
   }
 
+  /***
+   * Schedules compaction inline.
+   * @param extraMetadata extrametada to be used.
+   * @return compaction instant if scheduled.
+   */
+  protected Option<String> inlineScheduleCompaction(Option<Map<String, String>> extraMetadata) {
+    return scheduleCompaction(extraMetadata);
+  }
+
   /**
    * Schedules a new clustering instant.
    * @param extraMetadata Extra Metadata to be stored
@@ -1093,7 +1158,13 @@ public Option<String> scheduleTableService(String instantTime, Option<Map<String
 
   private Option<String> scheduleTableServiceInternal(String instantTime, Option<Map<String, String>> extraMetadata,
                                                       TableServiceType tableServiceType) {
+    if (!tableServicesEnabled(config)) {
+      return Option.empty();
+    }
     switch (tableServiceType) {
+      case ARCHIVE:
+        LOG.info("Scheduling archiving is not supported. Skipping.");
+        return Option.empty();
       case CLUSTER:
         LOG.info("Scheduling clustering at instant time :" + instantTime);
         Option<HoodieClusteringPlan> clusteringPlan = createTable(config, hadoopConf, config.isMetadataTableEnabled())
@@ -1116,9 +1187,10 @@ private Option<String> scheduleTableServiceInternal(String instantTime, Option<M
 
   /**
    * Executes a clustering plan on a table, serially before or after an insert/upsert action.
+   * Schedules and executes clustering inline.
    */
-  protected Option<String> inlineCluster(Option<Map<String, String>> extraMetadata) {
-    Option<String> clusteringInstantOpt = scheduleClustering(extraMetadata);
+  protected Option<String> inlineClustering(Option<Map<String, String>> extraMetadata) {
+    Option<String> clusteringInstantOpt = inlineScheduleClustering(extraMetadata);
     clusteringInstantOpt.ifPresent(clusteringInstant -> {
       // inline cluster should auto commit as the user is never given control
       cluster(clusteringInstant, true);
@@ -1126,6 +1198,15 @@ protected Option<String> inlineCluster(Option<Map<String, String>> extraMetadata
     return clusteringInstantOpt;
   }
 
+  /**
+   * Schedules clustering inline.
+   * @param extraMetadata extrametadata to use.
+   * @return clustering instant if scheduled.
+   */
+  protected Option<String> inlineScheduleClustering(Option<Map<String, String>> extraMetadata) {
+    return scheduleClustering(extraMetadata);
+  }
+
   protected void rollbackInflightClustering(HoodieInstant inflightInstant, HoodieTable<T, I, K, O> table) {
     String commitTime = HoodieActiveTimeline.createNewInstantTime();
     table.scheduleRollback(context, commitTime, inflightInstant, false, config.shouldRollbackUsingMarkers());
@@ -1160,7 +1241,7 @@ public HoodieMetrics getMetrics() {
     return metrics;
   }
 
-  public HoodieIndex<T, ?, ?, ?> getIndex() {
+  public HoodieIndex<?, ?> getIndex() {
     return index;
   }
 
@@ -1208,7 +1289,8 @@ protected void releaseResources() {
 
   @Override
   public void close() {
-    // release AsyncCleanerService
+    AsyncArchiveService.forceShutdown(asyncArchiveService);
+    asyncArchiveService = null;
     AsyncCleanerService.forceShutdown(asyncCleanerService);
     asyncCleanerService = null;
     // Stop timeline-server if running
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/CompactionAdminClient.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/CompactionAdminClient.java
index 1c869e46f1cbf..40e8f85a3ac70 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/CompactionAdminClient.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/CompactionAdminClient.java
@@ -61,7 +61,7 @@
 /**
  * Client to perform admin operations related to compaction.
  */
-public class CompactionAdminClient extends AbstractHoodieClient {
+public class CompactionAdminClient extends BaseHoodieClient {
 
   private static final Logger LOG = LogManager.getLogger(CompactionAdminClient.class);
 
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTimelineArchiveLog.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/HoodieTimelineArchiver.java
similarity index 97%
rename from hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTimelineArchiveLog.java
rename to hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/HoodieTimelineArchiver.java
index 138e40a90c6e0..15401c0292e14 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTimelineArchiveLog.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/HoodieTimelineArchiver.java
@@ -7,19 +7,18 @@
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
- *      http://www.apache.org/licenses/LICENSE-2.0
+ *   http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
  */
 
-package org.apache.hudi.table;
+package org.apache.hudi.client;
 
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
 import org.apache.hudi.avro.model.HoodieArchivedMetaEntry;
 import org.apache.hudi.avro.model.HoodieMergeArchiveFilePlan;
 import org.apache.hudi.client.utils.MetadataConversionUtils;
@@ -52,11 +51,14 @@
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.metadata.HoodieTableMetadata;
+import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.table.marker.WriteMarkers;
 import org.apache.hudi.table.marker.WriteMarkersFactory;
 
 import org.apache.avro.Schema;
 import org.apache.avro.generic.IndexedRecord;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
@@ -79,9 +81,9 @@
 /**
  * Archiver to bound the growth of files under .hoodie meta path.
  */
-public class HoodieTimelineArchiveLog<T extends HoodieAvroPayload, I, K, O> {
+public class HoodieTimelineArchiver<T extends HoodieAvroPayload, I, K, O> {
 
-  private static final Logger LOG = LogManager.getLogger(HoodieTimelineArchiveLog.class);
+  private static final Logger LOG = LogManager.getLogger(HoodieTimelineArchiver.class);
 
   private final Path archiveFilePath;
   private final HoodieWriteConfig config;
@@ -91,7 +93,7 @@ public class HoodieTimelineArchiveLog<T extends HoodieAvroPayload, I, K, O> {
   private final HoodieTable<T, I, K, O> table;
   private final HoodieTableMetaClient metaClient;
 
-  public HoodieTimelineArchiveLog(HoodieWriteConfig config, HoodieTable<T, I, K, O> table) {
+  public HoodieTimelineArchiver(HoodieWriteConfig config, HoodieTable<T, I, K, O> table) {
     this.config = config;
     this.table = table;
     this.metaClient = table.getMetaClient();
@@ -319,8 +321,7 @@ public void mergeArchiveFiles(List<FileStatus> compactCandidate) throws IOExcept
           // Read the avro blocks
           while (reader.hasNext()) {
             HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
-            List<IndexedRecord> recordsPerFile = blk.getRecords();
-            records.addAll(recordsPerFile);
+            blk.getRecordItr().forEachRemaining(records::add);
             if (records.size() >= this.config.getCommitArchivalBatchSize()) {
               writeToFile(wrapperSchema, records);
             }
@@ -427,7 +428,7 @@ private Stream<HoodieInstant> getInstantsToArchive() {
         .collect(Collectors.groupingBy(i -> Pair.of(i.getTimestamp(),
             HoodieInstant.getComparableAction(i.getAction()))));
 
-    // If metadata table is enabled, do not archive instants which are more recent that the last compaction on the
+    // If metadata table is enabled, do not archive instants which are more recent than the last compaction on the
     // metadata table.
     if (config.isMetadataTableEnabled()) {
       try (HoodieTableMetadata tableMetadata = HoodieTableMetadata.create(table.getContext(), config.getMetadataConfig(),
@@ -445,7 +446,7 @@ private Stream<HoodieInstant> getInstantsToArchive() {
         throw new HoodieException("Error limiting instant archival based on metadata table", e);
       }
     }
-
+    
     return instants.flatMap(hoodieInstant ->
         groupByTsAction.get(Pair.of(hoodieInstant.getTimestamp(),
             HoodieInstant.getComparableAction(hoodieInstant.getAction()))).stream());
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/RunsTableService.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/RunsTableService.java
new file mode 100644
index 0000000000000..64e540568e8dc
--- /dev/null
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/RunsTableService.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.client;
+
+import org.apache.hudi.config.HoodieWriteConfig;
+
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+public interface RunsTableService {
+
+  Logger LOG = LogManager.getLogger(RunsTableService.class);
+
+  default boolean tableServicesEnabled(HoodieWriteConfig config) {
+    boolean enabled = config.areTableServicesEnabled();
+    if (!enabled) {
+      LOG.warn(String.format("Table services are disabled. Set `%s` to enable.", HoodieWriteConfig.TABLE_SERVICES_ENABLED));
+    }
+    return enabled;
+  }
+}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/embedded/EmbeddedTimelineService.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/embedded/EmbeddedTimelineService.java
index 20f9b75a910a5..72f8e29c9fa8e 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/embedded/EmbeddedTimelineService.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/embedded/EmbeddedTimelineService.java
@@ -112,8 +112,12 @@ public FileSystemViewStorageConfig getRemoteFileSystemViewConfig() {
     FileSystemViewStorageType viewStorageType = writeConfig.getClientSpecifiedViewStorageConfig()
         .shouldEnableBackupForRemoteFileSystemView()
         ? FileSystemViewStorageType.REMOTE_FIRST : FileSystemViewStorageType.REMOTE_ONLY;
-    return FileSystemViewStorageConfig.newBuilder().withStorageType(viewStorageType)
-        .withRemoteServerHost(hostAddr).withRemoteServerPort(serverPort).build();
+    return FileSystemViewStorageConfig.newBuilder()
+        .withStorageType(viewStorageType)
+        .withRemoteServerHost(hostAddr)
+        .withRemoteServerPort(serverPort)
+        .withRemoteTimelineClientTimeoutSecs(writeConfig.getClientSpecifiedViewStorageConfig().getRemoteTimelineClientTimeoutSecs())
+        .build();
   }
 
   public FileSystemViewManager getViewManager() {
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/InProcessLockProvider.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/InProcessLockProvider.java
index 426d1cfaf4020..9e5a2379c4c93 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/InProcessLockProvider.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/InProcessLockProvider.java
@@ -19,13 +19,14 @@
 
 package org.apache.hudi.client.transaction.lock;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hudi.common.config.LockConfiguration;
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.lock.LockProvider;
 import org.apache.hudi.common.lock.LockState;
 import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.exception.HoodieLockException;
+
+import org.apache.hadoop.conf.Configuration;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.jetbrains.annotations.NotNull;
@@ -92,7 +93,11 @@ public boolean tryLock(long time, @NotNull TimeUnit unit) {
   public void unlock() {
     LOG.info(getLogMessage(LockState.RELEASING));
     try {
-      LOCK.writeLock().unlock();
+      if (LOCK.isWriteLockedByCurrentThread()) {
+        LOCK.writeLock().unlock();
+      } else {
+        LOG.warn("Cannot unlock because the current thread does not hold the lock.");
+      }
     } catch (Exception e) {
       throw new HoodieLockException(getLogMessage(LockState.FAILED_TO_RELEASE), e);
     }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/LockManager.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/LockManager.java
index 773685980af43..913736cad8a91 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/LockManager.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/lock/LockManager.java
@@ -83,6 +83,10 @@ public void lock() {
     }
   }
 
+  /**
+   * We need to take care of the scenarios that current thread may not be the holder of this lock
+   * and tries to call unlock()
+   */
   public void unlock() {
     if (writeConfig.getWriteConcurrencyMode().supportsOptimisticConcurrencyControl()) {
       getLockProvider().unlock();
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/utils/TransactionUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/utils/TransactionUtils.java
index ed2ea457764fb..9d7683128fc8c 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/utils/TransactionUtils.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/utils/TransactionUtils.java
@@ -60,9 +60,31 @@ public static Option<HoodieCommitMetadata> resolveWriteConflictIfAny(
       final Option<HoodieCommitMetadata> thisCommitMetadata,
       final HoodieWriteConfig config,
       Option<HoodieInstant> lastCompletedTxnOwnerInstant) throws HoodieWriteConflictException {
+    return resolveWriteConflictIfAny(table, currentTxnOwnerInstant, thisCommitMetadata, config, lastCompletedTxnOwnerInstant, false);
+  }
+
+  /**
+   * Resolve any write conflicts when committing data.
+   *
+   * @param table
+   * @param currentTxnOwnerInstant
+   * @param thisCommitMetadata
+   * @param config
+   * @param lastCompletedTxnOwnerInstant
+   * @return
+   * @throws HoodieWriteConflictException
+   */
+  public static Option<HoodieCommitMetadata> resolveWriteConflictIfAny(
+      final HoodieTable table,
+      final Option<HoodieInstant> currentTxnOwnerInstant,
+      final Option<HoodieCommitMetadata> thisCommitMetadata,
+      final HoodieWriteConfig config,
+      Option<HoodieInstant> lastCompletedTxnOwnerInstant,
+      boolean reloadActiveTimeline) throws HoodieWriteConflictException {
     if (config.getWriteConcurrencyMode().supportsOptimisticConcurrencyControl()) {
       ConflictResolutionStrategy resolutionStrategy = config.getWriteConflictResolutionStrategy();
-      Stream<HoodieInstant> instantStream = resolutionStrategy.getCandidateInstants(table.getActiveTimeline(), currentTxnOwnerInstant.get(), lastCompletedTxnOwnerInstant);
+      Stream<HoodieInstant> instantStream = resolutionStrategy.getCandidateInstants(reloadActiveTimeline
+          ? table.getMetaClient().reloadActiveTimeline() : table.getActiveTimeline(), currentTxnOwnerInstant.get(), lastCompletedTxnOwnerInstant);
       final ConcurrentOperation thisOperation = new ConcurrentOperation(currentTxnOwnerInstant.get(), thisCommitMetadata.orElse(new HoodieCommitMetadata()));
       instantStream.forEach(instant -> {
         try {
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieFileSliceReader.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/common/table/log/HoodieFileSliceReader.java
similarity index 85%
rename from hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieFileSliceReader.java
rename to hudi-client/hudi-client-common/src/main/java/org/apache/hudi/common/table/log/HoodieFileSliceReader.java
index a786e8305bc27..a042255cdcb1a 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieFileSliceReader.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/common/table/log/HoodieFileSliceReader.java
@@ -7,13 +7,14 @@
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
- *      http://www.apache.org/licenses/LICENSE-2.0
+ *   http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
  */
 
 package org.apache.hudi.common.table.log;
@@ -23,6 +24,7 @@
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.SpillableMapUtils;
 import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.config.HoodiePayloadConfig;
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.io.storage.HoodieFileReader;
 
@@ -53,10 +55,11 @@ public static HoodieFileSliceReader getFileSliceReader(
       return new HoodieFileSliceReader(scanner.iterator());
     } else {
       Iterable<HoodieRecord<? extends HoodieRecordPayload>> iterable = () -> scanner.iterator();
+      HoodiePayloadConfig payloadConfig = HoodiePayloadConfig.newBuilder().withPayloadOrderingField(preCombineField).build();
       return new HoodieFileSliceReader(StreamSupport.stream(iterable.spliterator(), false)
           .map(e -> {
             try {
-              GenericRecord record = (GenericRecord) e.getData().getInsertValue(schema).get();
+              GenericRecord record = (GenericRecord) e.getData().getInsertValue(schema, payloadConfig.getProps()).get();
               return transform(record, scanner, payloadClass, preCombineField, simpleKeyGenFieldsOpt);
             } catch (IOException io) {
               throw new HoodieIOException("Error while creating reader for file slice with no base file.", io);
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieClusteringConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieClusteringConfig.java
index 057b4a6f61299..41b1812c08151 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieClusteringConfig.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieClusteringConfig.java
@@ -25,6 +25,7 @@
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.engine.EngineType;
 import org.apache.hudi.common.util.TypeUtils;
+import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.HoodieNotSupportedException;
 import org.apache.hudi.table.action.cluster.ClusteringPlanPartitionFilterMode;
@@ -59,7 +60,7 @@ public class HoodieClusteringConfig extends HoodieConfig {
       "hoodie.clustering.plan.partition.filter.mode";
 
   // Any Space-filling curves optimize(z-order/hilbert) params can be saved with this prefix
-  public static final String LAYOUT_OPTIMIZE_PARAM_PREFIX = "hoodie.layout.optimize.";
+  private static final String LAYOUT_OPTIMIZE_PARAM_PREFIX = "hoodie.layout.optimize.";
 
   public static final ConfigProperty<String> DAYBASED_LOOKBACK_PARTITIONS = ConfigProperty
       .key(CLUSTERING_STRATEGY_PARAM_PREFIX + "daybased.lookback.partitions")
@@ -83,7 +84,7 @@ public class HoodieClusteringConfig extends HoodieConfig {
 
   public static final ConfigProperty<String> PLAN_STRATEGY_SMALL_FILE_LIMIT = ConfigProperty
       .key(CLUSTERING_STRATEGY_PARAM_PREFIX + "small.file.limit")
-      .defaultValue(String.valueOf(600 * 1024 * 1024L))
+      .defaultValue(String.valueOf(300 * 1024 * 1024L))
       .sinceVersion("0.7.0")
       .withDocumentation("Files smaller than the size specified here are candidates for clustering");
 
@@ -113,7 +114,8 @@ public class HoodieClusteringConfig extends HoodieConfig {
       .key("hoodie.clustering.inline")
       .defaultValue("false")
       .sinceVersion("0.7.0")
-      .withDocumentation("Turn on inline clustering - clustering will be run after each write operation is complete");
+      .withDocumentation("Turn on inline clustering - clustering will be run after each write operation is complete")
+      .withAlternatives("hoodie.datasource.clustering.inline.enable");
 
   public static final ConfigProperty<String> INLINE_CLUSTERING_MAX_COMMITS = ConfigProperty
       .key("hoodie.clustering.inline.max.commits")
@@ -177,11 +179,22 @@ public class HoodieClusteringConfig extends HoodieConfig {
       .withDocumentation("Determines how to handle updates, deletes to file groups that are under clustering."
           + " Default strategy just rejects the update");
 
+  public static final ConfigProperty<String> SCHEDULE_INLINE_CLUSTERING = ConfigProperty
+      .key("hoodie.clustering.schedule.inline")
+      .defaultValue("false")
+      .withDocumentation("When set to true, clustering service will be attempted for inline scheduling after each write. Users have to ensure "
+          + "they have a separate job to run async clustering(execution) for the one scheduled by this writer. Users can choose to set both "
+          + "`hoodie.clustering.inline` and `hoodie.clustering.schedule.inline` to false and have both scheduling and execution triggered by any async process, on which "
+          + "case `hoodie.clustering.async.enabled` is expected to be set to true. But if `hoodie.clustering.inline` is set to false, and `hoodie.clustering.schedule.inline` "
+          + "is set to true, regular writers will schedule clustering inline, but users are expected to trigger async job for execution. If `hoodie.clustering.inline` is set "
+          + "to true, regular writers will do both scheduling and execution inline for clustering");
+
   public static final ConfigProperty<String> ASYNC_CLUSTERING_ENABLE = ConfigProperty
       .key("hoodie.clustering.async.enabled")
       .defaultValue("false")
       .sinceVersion("0.7.0")
-      .withDocumentation("Enable running of clustering service, asynchronously as inserts happen on the table.");
+      .withDocumentation("Enable running of clustering service, asynchronously as inserts happen on the table.")
+      .withAlternatives("hoodie.datasource.clustering.async.enable");
 
   public static final ConfigProperty<Boolean> PRESERVE_COMMIT_METADATA = ConfigProperty
       .key("hoodie.clustering.preserve.commit.metadata")
@@ -190,63 +203,88 @@ public class HoodieClusteringConfig extends HoodieConfig {
       .withDocumentation("When rewriting data, preserves existing hoodie_commit_time");
 
   /**
-   * Using space-filling curves to optimize the layout of table to boost query performance.
-   * The table data which sorted by space-filling curve has better aggregation;
-   * combine with min-max filtering, it can achieve good performance improvement.
-   *
-   * Notice:
-   * when we use this feature, we need specify the sort columns.
-   * The more columns involved in sorting, the worse the aggregation, and the smaller the query performance improvement.
-   * Choose the filter columns which commonly used in query sql as sort columns.
-   * It is recommend that 2 ~ 4 columns participate in sorting.
+   * @deprecated this setting has no effect. Please refer to clustering configuration, as well as
+   * {@link #LAYOUT_OPTIMIZE_STRATEGY} config to enable advanced record layout optimization strategies
    */
   public static final ConfigProperty LAYOUT_OPTIMIZE_ENABLE = ConfigProperty
       .key(LAYOUT_OPTIMIZE_PARAM_PREFIX + "enable")
       .defaultValue(false)
       .sinceVersion("0.10.0")
-      .withDocumentation("Enable use z-ordering/space-filling curves to optimize the layout of table to boost query performance. "
-          + "This parameter takes precedence over clustering strategy set using " + EXECUTION_STRATEGY_CLASS_NAME.key());
+      .deprecatedAfter("0.11.0")
+      .withDocumentation("This setting has no effect. Please refer to clustering configuration, as well as "
+          + "LAYOUT_OPTIMIZE_STRATEGY config to enable advanced record layout optimization strategies");
 
-  public static final ConfigProperty LAYOUT_OPTIMIZE_STRATEGY = ConfigProperty
+  /**
+   * Determines ordering strategy in for records layout optimization.
+   * Currently, following strategies are supported
+   * <ul>
+   *   <li>Linear: simply orders records lexicographically</li>
+   *   <li>Z-order: orders records along Z-order spatial-curve</li>
+   *   <li>Hilbert: orders records along Hilbert's spatial-curve</li>
+   * </ul>
+   *
+   * NOTE: "z-order", "hilbert" strategies may consume considerably more compute, than "linear".
+   *       Make sure to perform small-scale local testing for your dataset before applying globally.
+   */
+  public static final ConfigProperty<String> LAYOUT_OPTIMIZE_STRATEGY = ConfigProperty
       .key(LAYOUT_OPTIMIZE_PARAM_PREFIX + "strategy")
-      .defaultValue("z-order")
+      .defaultValue("linear")
       .sinceVersion("0.10.0")
-      .withDocumentation("Type of layout optimization to be applied, current only supports `z-order` and `hilbert` curves.");
+      .withDocumentation("Determines ordering strategy used in records layout optimization. "
+          + "Currently supported strategies are \"linear\", \"z-order\" and \"hilbert\" values are supported.");
 
   /**
-   * There exists two method to build z-curve.
-   * one is directly mapping sort cols to z-value to build z-curve;
-   * we can find this method in Amazon DynamoDB https://aws.amazon.com/cn/blogs/database/tag/z-order/
-   * the other one is Boundary-based Interleaved Index method which we proposed. simply call it sample method.
-   * Refer to rfc-28 for specific algorithm flow.
-   * Boundary-based Interleaved Index method has better generalization, but the build speed is slower than direct method.
+   * NOTE: This setting only has effect if {@link #LAYOUT_OPTIMIZE_STRATEGY} value is set to
+   *       either "z-order" or "hilbert" (ie leveraging space-filling curves)
+   *
+   * Currently, two methods to order records along the curve are supported "build" and "sample":
+   *
+   * <ul>
+   *   <li>Direct: entails that spatial curve will be built in full, "filling in" all of the individual
+   *   points corresponding to each individual record</li>
+   *   <li>Sample: leverages boundary-base interleaved index method (described in more details in
+   *   Amazon DynamoDB blog [1])</li>
+   * </ul>
+   *
+   * NOTE: Boundary-based interleaved Index method has better generalization,
+   *       but is slower than direct method.
+   *
+   * Please refer to RFC-28 for specific elaboration on both flows.
+   *
+   * [1] https://aws.amazon.com/cn/blogs/database/tag/z-order/
    */
-  public static final ConfigProperty LAYOUT_OPTIMIZE_CURVE_BUILD_METHOD = ConfigProperty
+  public static final ConfigProperty<String> LAYOUT_OPTIMIZE_SPATIAL_CURVE_BUILD_METHOD = ConfigProperty
       .key(LAYOUT_OPTIMIZE_PARAM_PREFIX + "curve.build.method")
       .defaultValue("direct")
       .sinceVersion("0.10.0")
-      .withDocumentation("Controls how data is sampled to build the space filling curves. two methods: `direct`,`sample`."
-          + "The direct method is faster than the sampling, however sample method would produce a better data layout.");
+      .withDocumentation("Controls how data is sampled to build the space-filling curves. "
+          + "Two methods: \"direct\", \"sample\". The direct method is faster than the sampling, "
+          + "however sample method would produce a better data layout.");
+
   /**
-   * Doing sample for table data is the first step in Boundary-based Interleaved Index method.
-   * larger sample number means better optimize result, but more memory consumption
+   * NOTE: This setting only has effect if {@link #LAYOUT_OPTIMIZE_SPATIAL_CURVE_BUILD_METHOD} value
+   *       is set to "sample"
+   *
+   * Determines target sample size used by the Boundary-based Interleaved Index method.
+   * Larger sample size entails better layout optimization outcomes, at the expense of higher memory
+   * footprint.
    */
-  public static final ConfigProperty LAYOUT_OPTIMIZE_BUILD_CURVE_SAMPLE_SIZE = ConfigProperty
+  public static final ConfigProperty<String> LAYOUT_OPTIMIZE_BUILD_CURVE_SAMPLE_SIZE = ConfigProperty
       .key(LAYOUT_OPTIMIZE_PARAM_PREFIX + "build.curve.sample.size")
       .defaultValue("200000")
       .sinceVersion("0.10.0")
-      .withDocumentation("when setting" + LAYOUT_OPTIMIZE_CURVE_BUILD_METHOD.key() + " to `sample`, the amount of sampling to be done."
-          + "Large sample size leads to better results, at the expense of more memory usage.");
+      .withDocumentation("Determines target sample size used by the Boundary-based Interleaved Index method "
+          + "of building space-filling curve. Larger sample size entails better layout optimization outcomes, "
+          + "at the expense of higher memory footprint.");
 
   /**
-   * The best way to use Z-order/Space-filling curves is to cooperate with Data-Skipping
-   * with data-skipping query engine can greatly reduce the number of table files to be read.
-   * otherwise query engine can only do row-group skipping for files (parquet/orc)
+   * @deprecated this setting has no effect
    */
   public static final ConfigProperty LAYOUT_OPTIMIZE_DATA_SKIPPING_ENABLE = ConfigProperty
       .key(LAYOUT_OPTIMIZE_PARAM_PREFIX + "data.skipping.enable")
       .defaultValue(true)
       .sinceVersion("0.10.0")
+      .deprecatedAfter("0.11.0")
       .withDocumentation("Enable data skipping by collecting statistics once layout optimization is complete.");
 
   public static final ConfigProperty<Boolean> ROLLBACK_PENDING_CLUSTERING_ON_CONFLICT = ConfigProperty
@@ -480,6 +518,11 @@ public Builder withInlineClustering(Boolean inlineClustering) {
       return this;
     }
 
+    public Builder withScheduleInlineClustering(Boolean scheduleInlineClustering) {
+      clusteringConfig.setValue(SCHEDULE_INLINE_CLUSTERING, String.valueOf(scheduleInlineClustering));
+      return this;
+    }
+
     public Builder withInlineClusteringNumCommits(int numCommits) {
       clusteringConfig.setValue(INLINE_CLUSTERING_MAX_COMMITS, String.valueOf(numCommits));
       return this;
@@ -516,18 +559,13 @@ public Builder withRollbackPendingClustering(Boolean rollbackPendingClustering)
       return this;
     }
 
-    public Builder withSpaceFillingCurveDataOptimizeEnable(Boolean enable) {
-      clusteringConfig.setValue(LAYOUT_OPTIMIZE_ENABLE, String.valueOf(enable));
-      return this;
-    }
-
     public Builder withDataOptimizeStrategy(String strategy) {
       clusteringConfig.setValue(LAYOUT_OPTIMIZE_STRATEGY, strategy);
       return this;
     }
 
     public Builder withDataOptimizeBuildCurveStrategy(String method) {
-      clusteringConfig.setValue(LAYOUT_OPTIMIZE_CURVE_BUILD_METHOD, method);
+      clusteringConfig.setValue(LAYOUT_OPTIMIZE_SPATIAL_CURVE_BUILD_METHOD, method);
       return this;
     }
 
@@ -536,17 +574,18 @@ public Builder withDataOptimizeBuildCurveSampleNumber(int sampleNumber) {
       return this;
     }
 
-    public Builder withDataOptimizeDataSkippingEnable(boolean dataSkipping) {
-      clusteringConfig.setValue(LAYOUT_OPTIMIZE_DATA_SKIPPING_ENABLE, String.valueOf(dataSkipping));
-      return this;
-    }
-
     public HoodieClusteringConfig build() {
       clusteringConfig.setDefaultValue(
           PLAN_STRATEGY_CLASS_NAME, getDefaultPlanStrategyClassName(engineType));
       clusteringConfig.setDefaultValue(
           EXECUTION_STRATEGY_CLASS_NAME, getDefaultExecutionStrategyClassName(engineType));
       clusteringConfig.setDefaults(HoodieClusteringConfig.class.getName());
+
+      boolean inlineCluster = clusteringConfig.getBoolean(HoodieClusteringConfig.INLINE_CLUSTERING);
+      boolean inlineClusterSchedule = clusteringConfig.getBoolean(HoodieClusteringConfig.SCHEDULE_INLINE_CLUSTERING);
+      ValidationUtils.checkArgument(!(inlineCluster && inlineClusterSchedule), String.format("Either of inline clustering (%s) or "
+              + "schedule inline clustering (%s) can be enabled. Both can't be set to true at the same time. %s,%s", HoodieClusteringConfig.INLINE_CLUSTERING.key(),
+          HoodieClusteringConfig.SCHEDULE_INLINE_CLUSTERING.key(), inlineCluster, inlineClusterSchedule));
       return clusteringConfig;
     }
 
@@ -578,21 +617,21 @@ private String getDefaultExecutionStrategyClassName(EngineType engineType) {
   /**
    * Type of a strategy for building Z-order/Hilbert space-filling curves.
    */
-  public enum BuildCurveStrategyType {
+  public enum SpatialCurveCompositionStrategyType {
     DIRECT("direct"),
     SAMPLE("sample");
 
-    private static final Map<String, BuildCurveStrategyType> VALUE_TO_ENUM_MAP =
-        TypeUtils.getValueToEnumMap(BuildCurveStrategyType.class, e -> e.value);
+    private static final Map<String, SpatialCurveCompositionStrategyType> VALUE_TO_ENUM_MAP =
+        TypeUtils.getValueToEnumMap(SpatialCurveCompositionStrategyType.class, e -> e.value);
 
     private final String value;
 
-    BuildCurveStrategyType(String value) {
+    SpatialCurveCompositionStrategyType(String value) {
       this.value = value;
     }
 
-    public static BuildCurveStrategyType fromValue(String value) {
-      BuildCurveStrategyType enumValue = VALUE_TO_ENUM_MAP.get(value);
+    public static SpatialCurveCompositionStrategyType fromValue(String value) {
+      SpatialCurveCompositionStrategyType enumValue = VALUE_TO_ENUM_MAP.get(value);
       if (enumValue == null) {
         throw new HoodieException(String.format("Invalid value (%s)", value));
       }
@@ -605,6 +644,7 @@ public static BuildCurveStrategyType fromValue(String value) {
    * Layout optimization strategies such as Z-order/Hilbert space-curves, etc
    */
   public enum LayoutOptimizationStrategy {
+    LINEAR("linear"),
     ZORDER("z-order"),
     HILBERT("hilbert");
 
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java
index 4d1e197cf8a1e..0aac9308da439 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieCompactionConfig.java
@@ -50,13 +50,6 @@
         + "cleaning (reclamation of older/unused file groups/slices).")
 public class HoodieCompactionConfig extends HoodieConfig {
 
-  public static final ConfigProperty<String> AUTO_CLEAN = ConfigProperty
-      .key("hoodie.clean.automatic")
-      .defaultValue("true")
-      .withDocumentation("When enabled, the cleaner table service is invoked immediately after each commit,"
-          + " to delete older file slices. It's recommended to enable this, to ensure metadata and data storage"
-          + " growth is bounded.");
-
   public static final ConfigProperty<String> AUTO_ARCHIVE = ConfigProperty
       .key("hoodie.archive.automatic")
       .defaultValue("true")
@@ -64,6 +57,20 @@ public class HoodieCompactionConfig extends HoodieConfig {
           + " to archive commits if we cross a maximum value of commits."
           + " It's recommended to enable this, to ensure number of active commits is bounded.");
 
+  public static final ConfigProperty<String> ASYNC_ARCHIVE = ConfigProperty
+      .key("hoodie.archive.async")
+      .defaultValue("false")
+      .sinceVersion("0.11.0")
+      .withDocumentation("Only applies when " + AUTO_ARCHIVE.key() + " is turned on. "
+          + "When turned on runs archiver async with writing, which can speed up overall write performance.");
+
+  public static final ConfigProperty<String> AUTO_CLEAN = ConfigProperty
+      .key("hoodie.clean.automatic")
+      .defaultValue("true")
+      .withDocumentation("When enabled, the cleaner table service is invoked immediately after each commit,"
+          + " to delete older file slices. It's recommended to enable this, to ensure metadata and data storage"
+          + " growth is bounded.");
+
   public static final ConfigProperty<String> ASYNC_CLEAN = ConfigProperty
       .key("hoodie.clean.async")
       .defaultValue("false")
@@ -76,6 +83,12 @@ public class HoodieCompactionConfig extends HoodieConfig {
       .withDocumentation("Number of commits to retain, without cleaning. This will be retained for num_of_commits * time_between_commits "
           + "(scheduled). This also directly translates into how much data retention the table supports for incremental queries.");
 
+  public static final ConfigProperty<String> CLEANER_HOURS_RETAINED = ConfigProperty.key("hoodie.cleaner.hours.retained")
+          .defaultValue("24")
+          .withDocumentation("Number of hours for which commits need to be retained. This config provides a more flexible option as"
+          + "compared to number of commits retained for cleaning service. Setting this property ensures all the files, but the latest in a file group,"
+                  + " corresponding to commits with commit times older than the configured number of hours to be retained are cleaned.");
+
   public static final ConfigProperty<String> CLEANER_POLICY = ConfigProperty
       .key("hoodie.cleaner.policy")
       .defaultValue(HoodieCleaningPolicy.KEEP_LATEST_COMMITS.name())
@@ -90,6 +103,16 @@ public class HoodieCompactionConfig extends HoodieConfig {
       .withDocumentation("When set to true, compaction service is triggered after each write. While being "
           + " simpler operationally, this adds extra latency on the write path.");
 
+  public static final ConfigProperty<String> SCHEDULE_INLINE_COMPACT = ConfigProperty
+      .key("hoodie.compact.schedule.inline")
+      .defaultValue("false")
+      .withDocumentation("When set to true, compaction service will be attempted for inline scheduling after each write. Users have to ensure "
+          + "they have a separate job to run async compaction(execution) for the one scheduled by this writer. Users can choose to set both "
+          + "`hoodie.compact.inline` and `hoodie.compact.schedule.inline` to false and have both scheduling and execution triggered by any async process. "
+          + "But if `hoodie.compact.inline` is set to false, and `hoodie.compact.schedule.inline` is set to true, regular writers will schedule compaction inline, "
+          + "but users are expected to trigger async job for execution. If `hoodie.compact.inline` is set to true, regular writers will do both scheduling and "
+          + "execution inline for compaction");
+
   public static final ConfigProperty<String> INLINE_COMPACT_NUM_DELTA_COMMITS = ConfigProperty
       .key("hoodie.compact.inline.max.delta.commits")
       .defaultValue("5")
@@ -156,7 +179,8 @@ public class HoodieCompactionConfig extends HoodieConfig {
       .defaultValue(String.valueOf(104857600))
       .withDocumentation("During upsert operation, we opportunistically expand existing small files on storage, instead of writing"
           + " new files, to keep number of files to an optimum. This config sets the file size limit below which a file on storage "
-          + " becomes a candidate to be selected as such a `small file`. By default, treat any file <= 100MB as a small file.");
+          + " becomes a candidate to be selected as such a `small file`. By default, treat any file <= 100MB as a small file."
+          + " Also note that if this set <= 0, will not try to get small files and directly write new files");
 
   public static final ConfigProperty<String> RECORD_SIZE_ESTIMATION_THRESHOLD = ConfigProperty
       .key("hoodie.record.size.estimation.threshold")
@@ -200,7 +224,7 @@ public class HoodieCompactionConfig extends HoodieConfig {
 
   public static final ConfigProperty<String> COMPACTION_LAZY_BLOCK_READ_ENABLE = ConfigProperty
       .key("hoodie.compaction.lazy.block.read")
-      .defaultValue("false")
+      .defaultValue("true")
       .withDocumentation("When merging the delta log files, this config helps to choose whether the log blocks "
           + "should be read lazily or not. Choose true to use lazy block reading (low memory usage, but incurs seeks to each block"
           + " header) or false for immediate block read (higher memory usage)");
@@ -254,6 +278,13 @@ public class HoodieCompactionConfig extends HoodieConfig {
       .withDocumentation("The average record size. If not explicitly specified, hudi will compute the "
           + "record size estimate compute dynamically based on commit metadata. "
           + " This is critical in computing the insert parallelism and bin-packing inserts into small files.");
+  
+  public static final ConfigProperty<Boolean> ALLOW_MULTIPLE_CLEANS = ConfigProperty
+      .key("hoodie.clean.allow.multiple")
+      .defaultValue(true)
+      .sinceVersion("0.11.0")
+      .withDocumentation("Allows scheduling/executing multiple cleans by enabling this config. If users prefer to strictly ensure clean requests should be mutually exclusive, "
+          + ".i.e. a 2nd clean will not be scheduled if another clean is not yet completed to avoid repeat cleaning of same files, they might want to disable this config.");
 
   public static final ConfigProperty<Integer> ARCHIVE_MERGE_FILES_BATCH_SIZE = ConfigProperty
       .key("hoodie.archive.merge.files.batch.size")
@@ -512,6 +543,16 @@ public Builder fromProperties(Properties props) {
       return this;
     }
 
+    public Builder withAutoArchive(Boolean autoArchive) {
+      compactionConfig.setValue(AUTO_ARCHIVE, String.valueOf(autoArchive));
+      return this;
+    }
+
+    public Builder withAsyncArchive(Boolean asyncArchive) {
+      compactionConfig.setValue(ASYNC_ARCHIVE, String.valueOf(asyncArchive));
+      return this;
+    }
+
     public Builder withAutoClean(Boolean autoClean) {
       compactionConfig.setValue(AUTO_CLEAN, String.valueOf(autoClean));
       return this;
@@ -522,11 +563,6 @@ public Builder withAsyncClean(Boolean asyncClean) {
       return this;
     }
 
-    public Builder withAutoArchive(Boolean autoArchive) {
-      compactionConfig.setValue(AUTO_ARCHIVE, String.valueOf(autoArchive));
-      return this;
-    }
-
     public Builder withIncrementalCleaningMode(Boolean incrementalCleaningMode) {
       compactionConfig.setValue(CLEANER_INCREMENTAL_MODE_ENABLE, String.valueOf(incrementalCleaningMode));
       return this;
@@ -537,6 +573,11 @@ public Builder withInlineCompaction(Boolean inlineCompaction) {
       return this;
     }
 
+    public Builder withScheduleInlineCompaction(Boolean scheduleAsyncCompaction) {
+      compactionConfig.setValue(SCHEDULE_INLINE_COMPACT, String.valueOf(scheduleAsyncCompaction));
+      return this;
+    }
+
     public Builder withInlineCompactionTriggerStrategy(CompactionTriggerStrategy compactionTriggerStrategy) {
       compactionConfig.setValue(INLINE_COMPACT_TRIGGER_STRATEGY, compactionTriggerStrategy.name());
       return this;
@@ -557,6 +598,11 @@ public Builder retainCommits(int commitsRetained) {
       return this;
     }
 
+    public Builder cleanerNumHoursRetained(int cleanerHoursRetained) {
+      compactionConfig.setValue(CLEANER_HOURS_RETAINED, String.valueOf(cleanerHoursRetained));
+      return this;
+    }
+
     public Builder archiveCommitsWith(int minToKeep, int maxToKeep) {
       compactionConfig.setValue(MIN_COMMITS_TO_KEEP, String.valueOf(minToKeep));
       compactionConfig.setValue(MAX_COMMITS_TO_KEEP, String.valueOf(maxToKeep));
@@ -603,6 +649,11 @@ public Builder approxRecordSize(int recordSizeEstimate) {
       return this;
     }
 
+    public Builder allowMultipleCleans(boolean allowMultipleCleanSchedules) {
+      compactionConfig.setValue(ALLOW_MULTIPLE_CLEANS, String.valueOf(allowMultipleCleanSchedules));
+      return this;
+    }
+
     public Builder withCleanerParallelism(int cleanerParallelism) {
       compactionConfig.setValue(CLEANER_PARALLELISM_VALUE, String.valueOf(cleanerParallelism));
       return this;
@@ -700,6 +751,12 @@ public HoodieCompactionConfig build() {
                   + "missing data from few instants.",
               HoodieCompactionConfig.MIN_COMMITS_TO_KEEP.key(), minInstantsToKeep,
               HoodieCompactionConfig.CLEANER_COMMITS_RETAINED.key(), cleanerCommitsRetained));
+
+      boolean inlineCompact = compactionConfig.getBoolean(HoodieCompactionConfig.INLINE_COMPACT);
+      boolean inlineCompactSchedule = compactionConfig.getBoolean(HoodieCompactionConfig.SCHEDULE_INLINE_COMPACT);
+      ValidationUtils.checkArgument(!(inlineCompact && inlineCompactSchedule), String.format("Either of inline compaction (%s) or "
+              + "schedule inline compaction (%s) can be enabled. Both can't be set to true at the same time. %s, %s", HoodieCompactionConfig.INLINE_COMPACT.key(),
+          HoodieCompactionConfig.SCHEDULE_INLINE_COMPACT.key(), inlineCompact, inlineCompactSchedule));
       return compactionConfig;
     }
   }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java
index e156310c736b0..f82f14d5a9c64 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java
@@ -152,7 +152,7 @@ public class HoodieIndexConfig extends HoodieConfig {
 
   public static final ConfigProperty<String> SIMPLE_INDEX_PARALLELISM = ConfigProperty
       .key("hoodie.simple.index.parallelism")
-      .defaultValue("50")
+      .defaultValue("100")
       .withDocumentation("Only applies if index type is SIMPLE. "
           + "This is the amount of parallelism for index lookup, which involves a Spark Shuffle");
 
@@ -568,7 +568,7 @@ public HoodieIndexConfig build() {
     private String getDefaultIndexType(EngineType engineType) {
       switch (engineType) {
         case SPARK:
-          return HoodieIndex.IndexType.BLOOM.name();
+          return HoodieIndex.IndexType.SIMPLE.name();
         case FLINK:
         case JAVA:
           return HoodieIndex.IndexType.INMEMORY.name();
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieStorageConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieStorageConfig.java
index 42689ec18e948..6447a039cc069 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieStorageConfig.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieStorageConfig.java
@@ -83,14 +83,17 @@ public class HoodieStorageConfig extends HoodieConfig {
       .withDocumentation("Lower values increase the size of metadata tracked within HFile, but can offer potentially "
           + "faster lookup times.");
 
-  // used to size log files
+  public static final ConfigProperty<String> LOGFILE_DATA_BLOCK_FORMAT = ConfigProperty
+      .key("hoodie.logfile.data.block.format")
+      .noDefaultValue()
+      .withDocumentation("Format of the data block within delta logs. Following formats are currently supported \"avro\", \"hfile\", \"parquet\"");
+
   public static final ConfigProperty<String> LOGFILE_MAX_SIZE = ConfigProperty
       .key("hoodie.logfile.max.size")
       .defaultValue(String.valueOf(1024 * 1024 * 1024)) // 1 GB
       .withDocumentation("LogFile max size. This is the maximum size allowed for a log file "
           + "before it is rolled over to the next version.");
 
-  // used to size data blocks in log file
   public static final ConfigProperty<String> LOGFILE_DATA_BLOCK_MAX_SIZE = ConfigProperty
       .key("hoodie.logfile.data.block.max.size")
       .defaultValue(String.valueOf(256 * 1024 * 1024))
@@ -124,7 +127,7 @@ public class HoodieStorageConfig extends HoodieConfig {
 
   public static final ConfigProperty<String> PARQUET_OUTPUT_TIMESTAMP_TYPE = ConfigProperty
           .key("hoodie.parquet.outputtimestamptype")
-          .defaultValue("TIMESTAMP_MILLIS")
+          .defaultValue("TIMESTAMP_MICROS")
           .withDocumentation("Sets spark.sql.parquet.outputTimestampType. Parquet timestamp type to use when Spark writes data to Parquet files.");
 
   public static final ConfigProperty<String> HFILE_COMPRESSION_ALGORITHM_NAME = ConfigProperty
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
index 5896ac49ea69d..b7b410817b2fd 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
@@ -31,15 +31,18 @@
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.engine.EngineType;
 import org.apache.hudi.common.fs.ConsistencyGuardConfig;
+import org.apache.hudi.common.fs.FileSystemRetryConfig;
 import org.apache.hudi.common.model.HoodieCleaningPolicy;
 import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
 import org.apache.hudi.common.model.HoodieFileFormat;
 import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload;
 import org.apache.hudi.common.model.WriteConcurrencyMode;
 import org.apache.hudi.common.table.HoodieTableConfig;
+import org.apache.hudi.common.table.log.block.HoodieLogBlock;
 import org.apache.hudi.common.table.marker.MarkerType;
 import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
 import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
+import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.ReflectionUtils;
 import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.config.metrics.HoodieMetricsConfig;
@@ -59,9 +62,9 @@
 import org.apache.hudi.table.action.cluster.ClusteringPlanPartitionFilterMode;
 import org.apache.hudi.table.action.compact.CompactionTriggerStrategy;
 import org.apache.hudi.table.action.compact.strategy.CompactionStrategy;
+import org.apache.hudi.table.storage.HoodieStorageLayout;
 
 import org.apache.hadoop.hbase.io.compress.Compression;
-import org.apache.hudi.table.storage.HoodieStorageLayout;
 import org.apache.orc.CompressionKind;
 import org.apache.parquet.hadoop.metadata.CompressionCodecName;
 
@@ -438,7 +441,20 @@ public class HoodieWriteConfig extends HoodieConfig {
       .sinceVersion("0.10.0")
       .withDocumentation("File Id Prefix provider class, that implements `org.apache.hudi.fileid.FileIdPrefixProvider`");
 
+  public static final ConfigProperty<Boolean> TABLE_SERVICES_ENABLED = ConfigProperty
+      .key("hoodie.table.services.enabled")
+      .defaultValue(true)
+      .sinceVersion("0.11.0")
+      .withDocumentation("Master control to disable all table services including archive, clean, compact, cluster, etc.");
+
+  public static final ConfigProperty<Boolean> RELEASE_RESOURCE_ENABLE = ConfigProperty
+      .key("hoodie.release.resource.on.completion.enable")
+      .defaultValue(true)
+      .sinceVersion("0.11.0")
+      .withDocumentation("Control to enable release all persist rdds when the spark job finish.");
+
   private ConsistencyGuardConfig consistencyGuardConfig;
+  private FileSystemRetryConfig fileSystemRetryConfig;
 
   // Hoodie Write Client transparently rewrites File System View config when embedded mode is enabled
   // We keep track of original config and rewritten config
@@ -832,6 +848,7 @@ protected HoodieWriteConfig(EngineType engineType, Properties props) {
     newProps.putAll(props);
     this.engineType = engineType;
     this.consistencyGuardConfig = ConsistencyGuardConfig.newBuilder().fromProperties(newProps).build();
+    this.fileSystemRetryConfig = FileSystemRetryConfig.newBuilder().fromProperties(newProps).build();
     this.clientSpecifiedViewStorageConfig = FileSystemViewStorageConfig.newBuilder().fromProperties(newProps).build();
     this.viewStorageConfig = clientSpecifiedViewStorageConfig;
     this.hoodiePayloadConfig = HoodiePayloadConfig.newBuilder().fromProperties(newProps).build();
@@ -1074,6 +1091,10 @@ public int getCleanerCommitsRetained() {
     return getInt(HoodieCompactionConfig.CLEANER_COMMITS_RETAINED);
   }
 
+  public int getCleanerHoursRetained() {
+    return getInt(HoodieCompactionConfig.CLEANER_HOURS_RETAINED);
+  }
+
   public int getMaxCommitsToKeep() {
     return getInt(HoodieCompactionConfig.MAX_COMMITS_TO_KEEP);
   }
@@ -1102,6 +1123,10 @@ public int getCopyOnWriteRecordSizeEstimate() {
     return getInt(HoodieCompactionConfig.COPY_ON_WRITE_RECORD_SIZE_ESTIMATE);
   }
 
+  public boolean allowMultipleCleans() {
+    return getBoolean(HoodieCompactionConfig.ALLOW_MULTIPLE_CLEANS);
+  }
+
   public boolean shouldAutoTuneInsertSplits() {
     return getBoolean(HoodieCompactionConfig.COPY_ON_WRITE_AUTO_SPLIT_INSERTS);
   }
@@ -1110,10 +1135,6 @@ public int getCleanerParallelism() {
     return getInt(HoodieCompactionConfig.CLEANER_PARALLELISM_VALUE);
   }
 
-  public boolean isAutoClean() {
-    return getBoolean(HoodieCompactionConfig.AUTO_CLEAN);
-  }
-
   public boolean getArchiveMergeEnable() {
     return getBoolean(HoodieCompactionConfig.ARCHIVE_MERGE_ENABLE);
   }
@@ -1126,6 +1147,14 @@ public boolean isAutoArchive() {
     return getBoolean(HoodieCompactionConfig.AUTO_ARCHIVE);
   }
 
+  public boolean isAsyncArchive() {
+    return getBoolean(HoodieCompactionConfig.ASYNC_ARCHIVE);
+  }
+
+  public boolean isAutoClean() {
+    return getBoolean(HoodieCompactionConfig.AUTO_CLEAN);
+  }
+
   public boolean isAsyncClean() {
     return getBoolean(HoodieCompactionConfig.ASYNC_CLEAN);
   }
@@ -1138,6 +1167,10 @@ public boolean inlineCompactionEnabled() {
     return getBoolean(HoodieCompactionConfig.INLINE_COMPACT);
   }
 
+  public boolean scheduleInlineCompaction() {
+    return getBoolean(HoodieCompactionConfig.SCHEDULE_INLINE_COMPACT);
+  }
+
   public CompactionTriggerStrategy getInlineCompactTriggerStrategy() {
     return CompactionTriggerStrategy.valueOf(getString(HoodieCompactionConfig.INLINE_COMPACT_TRIGGER_STRATEGY));
   }
@@ -1178,6 +1211,10 @@ public boolean inlineClusteringEnabled() {
     return getBoolean(HoodieClusteringConfig.INLINE_CLUSTERING);
   }
 
+  public boolean scheduleInlineClustering() {
+    return getBoolean(HoodieClusteringConfig.SCHEDULE_INLINE_CLUSTERING);
+  }
+
   public boolean isAsyncClusteringEnabled() {
     return getBoolean(HoodieClusteringConfig.ASYNC_CLUSTERING_ENABLE);
   }
@@ -1288,30 +1325,21 @@ public String getClusteringSortColumns() {
     return getString(HoodieClusteringConfig.PLAN_STRATEGY_SORT_COLUMNS);
   }
 
-  /**
-   * Data layout optimize properties.
-   */
-  public boolean isLayoutOptimizationEnabled() {
-    return getBoolean(HoodieClusteringConfig.LAYOUT_OPTIMIZE_ENABLE);
+  public HoodieClusteringConfig.LayoutOptimizationStrategy getLayoutOptimizationStrategy() {
+    return HoodieClusteringConfig.LayoutOptimizationStrategy.fromValue(
+        getStringOrDefault(HoodieClusteringConfig.LAYOUT_OPTIMIZE_STRATEGY)
+    );
   }
 
-  public String getLayoutOptimizationStrategy() {
-    return getString(HoodieClusteringConfig.LAYOUT_OPTIMIZE_STRATEGY);
-  }
-
-  public HoodieClusteringConfig.BuildCurveStrategyType getLayoutOptimizationCurveBuildMethod() {
-    return HoodieClusteringConfig.BuildCurveStrategyType.fromValue(
-        getString(HoodieClusteringConfig.LAYOUT_OPTIMIZE_CURVE_BUILD_METHOD));
+  public HoodieClusteringConfig.SpatialCurveCompositionStrategyType getLayoutOptimizationCurveBuildMethod() {
+    return HoodieClusteringConfig.SpatialCurveCompositionStrategyType.fromValue(
+        getString(HoodieClusteringConfig.LAYOUT_OPTIMIZE_SPATIAL_CURVE_BUILD_METHOD));
   }
 
   public int getLayoutOptimizationSampleSize() {
     return getInt(HoodieClusteringConfig.LAYOUT_OPTIMIZE_BUILD_CURVE_SAMPLE_SIZE);
   }
 
-  public boolean isDataSkippingEnabled() {
-    return getBoolean(HoodieClusteringConfig.LAYOUT_OPTIMIZE_DATA_SKIPPING_ENABLE);
-  }
-
   /**
    * index properties.
    */
@@ -1444,6 +1472,14 @@ public boolean useBloomIndexBucketizedChecking() {
     return getBoolean(HoodieIndexConfig.BLOOM_INDEX_BUCKETIZED_CHECKING);
   }
 
+  public boolean isMetadataBloomFilterIndexEnabled() {
+    return isMetadataTableEnabled() && getMetadataConfig().isBloomFilterIndexEnabled();
+  }
+
+  public boolean isMetadataIndexColumnStatsForAllColumnsEnabled() {
+    return isMetadataTableEnabled() && getMetadataConfig().isMetadataColumnStatsIndexForAllColumnsEnabled();
+  }
+
   public int getBloomIndexKeysPerBucket() {
     return getInt(HoodieIndexConfig.BLOOM_INDEX_KEYS_PER_BUCKET);
   }
@@ -1515,6 +1551,11 @@ public String parquetOutputTimestampType() {
     return getString(HoodieStorageConfig.PARQUET_OUTPUT_TIMESTAMP_TYPE);
   }
 
+  public Option<HoodieLogBlock.HoodieLogBlockType> getLogDataBlockFormat() {
+    return Option.ofNullable(getString(HoodieStorageConfig.LOGFILE_DATA_BLOCK_FORMAT))
+        .map(HoodieLogBlock.HoodieLogBlockType::fromId);
+  }
+
   public long getLogFileMaxSize() {
     return getLong(HoodieStorageConfig.LOGFILE_MAX_SIZE);
   }
@@ -1681,7 +1722,7 @@ public boolean getPushGatewayRandomJobNameSuffix() {
   public String getMetricReporterMetricsNamePrefix() {
     return getStringOrDefault(HoodieMetricsConfig.METRICS_REPORTER_PREFIX);
   }
-  
+
   /**
    * memory configs.
    */
@@ -1701,6 +1742,10 @@ public ConsistencyGuardConfig getConsistencyGuardConfig() {
     return consistencyGuardConfig;
   }
 
+  public FileSystemRetryConfig getFileSystemRetryConfig() {
+    return fileSystemRetryConfig;
+  }
+
   public void setConsistencyGuardConfig(ConsistencyGuardConfig consistencyGuardConfig) {
     this.consistencyGuardConfig = consistencyGuardConfig;
   }
@@ -1853,12 +1898,12 @@ public WriteConcurrencyMode getWriteConcurrencyMode() {
   }
 
   /**
-   * Are any table services configured to run inline?
+   * Are any table services configured to run inline for both scheduling and execution?
    *
    * @return True if any table services are configured to run inline, false otherwise.
    */
-  public Boolean areAnyTableServicesInline() {
-    return inlineClusteringEnabled() || inlineCompactionEnabled() || isAutoClean();
+  public Boolean areAnyTableServicesExecutedInline() {
+    return inlineClusteringEnabled() || inlineCompactionEnabled() || isAutoClean() || isAutoArchive();
   }
 
   /**
@@ -1867,7 +1912,11 @@ public Boolean areAnyTableServicesInline() {
    * @return True if any table services are configured to run async, false otherwise.
    */
   public Boolean areAnyTableServicesAsync() {
-    return isAsyncClusteringEnabled() || !inlineCompactionEnabled() || isAsyncClean();
+    return isAsyncClusteringEnabled() || !inlineCompactionEnabled() || isAsyncClean() || isAsyncArchive();
+  }
+
+  public Boolean areAnyTableServicesScheduledInline() {
+    return scheduleInlineCompaction() || scheduleInlineClustering();
   }
 
   public String getPreCommitValidators() {
@@ -1898,6 +1947,14 @@ public String getFileIdPrefixProviderClassName() {
     return getString(FILEID_PREFIX_PROVIDER_CLASS);
   }
 
+  public boolean areTableServicesEnabled() {
+    return getBooleanOrDefault(TABLE_SERVICES_ENABLED);
+  }
+
+  public boolean areReleaseResourceEnabled() {
+    return getBooleanOrDefault(RELEASE_RESOURCE_ENABLE);
+  }
+
   /**
    * Layout configs.
    */
@@ -2263,6 +2320,16 @@ public Builder withFileIdPrefixProviderClassName(String fileIdPrefixProviderClas
       return this;
     }
 
+    public Builder withTableServicesEnabled(boolean enabled) {
+      writeConfig.setValue(TABLE_SERVICES_ENABLED, Boolean.toString(enabled));
+      return this;
+    }
+
+    public Builder withReleaseResourceEnabled(boolean enabled) {
+      writeConfig.setValue(RELEASE_RESOURCE_ENABLE, Boolean.toString(enabled));
+      return this;
+    }
+
     public Builder withProperties(Properties properties) {
       this.writeConfig.getProps().putAll(properties);
       return this;
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/exception/HoodieRestoreException.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/exception/HoodieRestoreException.java
index c6c9076f51bae..baad53aba5941 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/exception/HoodieRestoreException.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/exception/HoodieRestoreException.java
@@ -23,4 +23,8 @@ public class HoodieRestoreException extends HoodieException {
   public HoodieRestoreException(String msg, Throwable e) {
     super(msg, e);
   }
+
+  public HoodieRestoreException(String msg) {
+    super(msg);
+  }
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/execution/HoodieLazyInsertIterable.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/execution/HoodieLazyInsertIterable.java
index 0d28c74e13f9b..b078076b864f5 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/execution/HoodieLazyInsertIterable.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/execution/HoodieLazyInsertIterable.java
@@ -18,7 +18,6 @@
 
 package org.apache.hudi.execution;
 
-import java.util.Properties;
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.client.utils.LazyIterableIterator;
 import org.apache.hudi.common.engine.TaskContextSupplier;
@@ -36,6 +35,7 @@
 
 import java.util.Iterator;
 import java.util.List;
+import java.util.Properties;
 import java.util.function.Function;
 
 /**
@@ -87,7 +87,7 @@ public static class HoodieInsertValueGenResult<T extends HoodieRecord> {
     public HoodieInsertValueGenResult(T record, Schema schema, Properties properties) {
       this.record = record;
       try {
-        this.insertValue = record.getData().getInsertValue(schema, properties);
+        this.insertValue = ((HoodieRecordPayload) record.getData()).getInsertValue(schema, properties);
       } catch (Exception e) {
         this.exception = Option.of(e);
       }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndex.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndex.java
index 0428d12c40306..922371c4a0f45 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndex.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndex.java
@@ -27,7 +27,6 @@
 import org.apache.hudi.common.model.FileSlice;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.model.WriteOperationType;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieIndexException;
@@ -39,13 +38,11 @@
 /**
  * Base class for different types of indexes to determine the mapping from uuid.
  *
- * @param <T> Sub type of HoodieRecordPayload
  * @param <I> Type of inputs for deprecated APIs
- * @param <K> Type of keys for deprecated APIs
  * @param <O> Type of outputs for deprecated APIs
  */
 @PublicAPIClass(maturity = ApiMaturityLevel.EVOLVING)
-public abstract class HoodieIndex<T extends HoodieRecordPayload, I, K, O> implements Serializable {
+public abstract class HoodieIndex<I, O> implements Serializable {
 
   protected final HoodieWriteConfig config;
 
@@ -60,7 +57,7 @@ protected HoodieIndex(HoodieWriteConfig config) {
   @Deprecated
   @PublicAPIMethod(maturity = ApiMaturityLevel.DEPRECATED)
   public I tagLocation(I records, HoodieEngineContext context,
-                       HoodieTable<T, I, K, O> hoodieTable) throws HoodieIndexException {
+                       HoodieTable hoodieTable) throws HoodieIndexException {
     throw new HoodieNotSupportedException("Deprecated API should not be called");
   }
 
@@ -70,7 +67,7 @@ public I tagLocation(I records, HoodieEngineContext context,
   @Deprecated
   @PublicAPIMethod(maturity = ApiMaturityLevel.DEPRECATED)
   public O updateLocation(O writeStatuses, HoodieEngineContext context,
-                          HoodieTable<T, I, K, O> hoodieTable) throws HoodieIndexException {
+                          HoodieTable hoodieTable) throws HoodieIndexException {
     throw new HoodieNotSupportedException("Deprecated API should not be called");
   }
 
@@ -79,8 +76,8 @@ public O updateLocation(O writeStatuses, HoodieEngineContext context,
    * the row (if it is actually present).
    */
   @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
-  public abstract HoodieData<HoodieRecord<T>> tagLocation(
-      HoodieData<HoodieRecord<T>> records, HoodieEngineContext context,
+  public abstract <R> HoodieData<HoodieRecord<R>> tagLocation(
+      HoodieData<HoodieRecord<R>> records, HoodieEngineContext context,
       HoodieTable hoodieTable) throws HoodieIndexException;
 
   /**
@@ -144,6 +141,6 @@ public void close() {
   }
 
   public enum IndexType {
-    HBASE, INMEMORY, BLOOM, GLOBAL_BLOOM, SIMPLE, GLOBAL_SIMPLE, BUCKET
+    HBASE, INMEMORY, BLOOM, GLOBAL_BLOOM, SIMPLE, GLOBAL_SIMPLE, BUCKET, FLINK_STATE
   }
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java
index e5426ca1161f9..b714c50334b4f 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/HoodieIndexUtils.java
@@ -18,17 +18,30 @@
 
 package org.apache.hudi.index;
 
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
 import org.apache.hudi.common.engine.HoodieEngineContext;
+import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.HoodieBaseFile;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordLocation;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.util.HoodieTimer;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.exception.HoodieIndexException;
+import org.apache.hudi.io.storage.HoodieFileReader;
+import org.apache.hudi.io.storage.HoodieFileReaderFactory;
 import org.apache.hudi.table.HoodieTable;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
 
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
+import java.util.Set;
+import java.util.TreeSet;
 
 import static java.util.stream.Collectors.toList;
 
@@ -37,6 +50,8 @@
  */
 public class HoodieIndexUtils {
 
+  private static final Logger LOG = LogManager.getLogger(HoodieIndexUtils.class);
+
   /**
    * Fetches Pair of partition path and {@link HoodieBaseFile}s for interested partitions.
    *
@@ -87,18 +102,48 @@ public static List<Pair<String, HoodieBaseFile>> getLatestBaseFilesForAllPartiti
    * @return the tagged {@link HoodieRecord}
    */
   public static HoodieRecord getTaggedRecord(HoodieRecord inputRecord, Option<HoodieRecordLocation> location) {
-    HoodieRecord record = inputRecord;
+    HoodieRecord<?> record = inputRecord;
     if (location.isPresent()) {
       // When you have a record in multiple files in the same partition, then <row key, record> collection
       // will have 2 entries with the same exact in memory copy of the HoodieRecord and the 2
       // separate filenames that the record is found in. This will result in setting
       // currentLocation 2 times and it will fail the second time. So creating a new in memory
       // copy of the hoodie record.
-      record = new HoodieRecord<>(inputRecord);
+      record = inputRecord.newInstance();
       record.unseal();
       record.setCurrentLocation(location.get());
       record.seal();
     }
     return record;
   }
+
+  /**
+   * Given a list of row keys and one file, return only row keys existing in that file.
+   *
+   * @param filePath            - File to filter keys from
+   * @param candidateRecordKeys - Candidate keys to filter
+   * @return List of candidate keys that are available in the file
+   */
+  public static List<String> filterKeysFromFile(Path filePath, List<String> candidateRecordKeys,
+                                                Configuration configuration) throws HoodieIndexException {
+    ValidationUtils.checkArgument(FSUtils.isBaseFile(filePath));
+    List<String> foundRecordKeys = new ArrayList<>();
+    try {
+      // Load all rowKeys from the file, to double-confirm
+      if (!candidateRecordKeys.isEmpty()) {
+        HoodieTimer timer = new HoodieTimer().startTimer();
+        HoodieFileReader fileReader = HoodieFileReaderFactory.getFileReader(configuration, filePath);
+        Set<String> fileRowKeys = fileReader.filterRowKeys(new TreeSet<>(candidateRecordKeys));
+        foundRecordKeys.addAll(fileRowKeys);
+        LOG.info(String.format("Checked keys against file %s, in %d ms. #candidates (%d) #found (%d)", filePath,
+            timer.endTimer(), candidateRecordKeys.size(), foundRecordKeys.size()));
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Keys matching for file " + filePath + " => " + foundRecordKeys);
+        }
+      }
+    } catch (Exception e) {
+      throw new HoodieIndexException("Error checking candidate keys against file.", e);
+    }
+    return foundRecordKeys;
+  }
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/BaseHoodieBloomIndexHelper.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/BaseHoodieBloomIndexHelper.java
index 9f0e815632f38..9430d9bb5e50b 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/BaseHoodieBloomIndexHelper.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/BaseHoodieBloomIndexHelper.java
@@ -24,7 +24,7 @@
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecordLocation;
-import org.apache.hudi.common.util.collection.ImmutablePair;
+import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.table.HoodieTable;
 
@@ -51,7 +51,7 @@ public abstract class BaseHoodieBloomIndexHelper implements Serializable {
   public abstract HoodiePairData<HoodieKey, HoodieRecordLocation> findMatchingFilesForRecordKeys(
       HoodieWriteConfig config, HoodieEngineContext context, HoodieTable hoodieTable,
       HoodiePairData<String, String> partitionRecordKeyPairs,
-      HoodieData<ImmutablePair<String, HoodieKey>> fileComparisonPairs,
+      HoodieData<Pair<String, HoodieKey>> fileComparisonPairs,
       Map<String, List<BloomIndexFileInfo>> partitionToFileInfo,
       Map<String, Long> recordsPerPartition);
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBaseBloomIndexCheckFunction.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBaseBloomIndexCheckFunction.java
index 441a212c59f40..80031f4e8f025 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBaseBloomIndexCheckFunction.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBaseBloomIndexCheckFunction.java
@@ -25,7 +25,7 @@
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.HoodieIndexException;
 import org.apache.hudi.io.HoodieKeyLookupHandle;
-import org.apache.hudi.io.HoodieKeyLookupHandle.KeyLookupResult;
+import org.apache.hudi.io.HoodieKeyLookupResult;
 import org.apache.hudi.table.HoodieTable;
 
 import java.util.function.Function;
@@ -37,7 +37,7 @@
  * Function performing actual checking of list containing (fileId, hoodieKeys) against the actual files.
  */
 public class HoodieBaseBloomIndexCheckFunction
-        implements Function<Iterator<Pair<String, HoodieKey>>, Iterator<List<KeyLookupResult>>> {
+    implements Function<Iterator<Pair<String, HoodieKey>>, Iterator<List<HoodieKeyLookupResult>>> {
 
   private final HoodieTable hoodieTable;
 
@@ -49,11 +49,11 @@ public HoodieBaseBloomIndexCheckFunction(HoodieTable hoodieTable, HoodieWriteCon
   }
 
   @Override
-  public Iterator<List<KeyLookupResult>> apply(Iterator<Pair<String, HoodieKey>> filePartitionRecordKeyTripletItr) {
+  public Iterator<List<HoodieKeyLookupResult>> apply(Iterator<Pair<String, HoodieKey>> filePartitionRecordKeyTripletItr) {
     return new LazyKeyCheckIterator(filePartitionRecordKeyTripletItr);
   }
 
-  class LazyKeyCheckIterator extends LazyIterableIterator<Pair<String, HoodieKey>, List<KeyLookupResult>> {
+  class LazyKeyCheckIterator extends LazyIterableIterator<Pair<String, HoodieKey>, List<HoodieKeyLookupResult>> {
 
     private HoodieKeyLookupHandle keyLookupHandle;
 
@@ -66,8 +66,8 @@ protected void start() {
     }
 
     @Override
-    protected List<KeyLookupResult> computeNext() {
-      List<KeyLookupResult> ret = new ArrayList<>();
+    protected List<HoodieKeyLookupResult> computeNext() {
+      List<HoodieKeyLookupResult> ret = new ArrayList<>();
       try {
         // process one file in each go.
         while (inputItr.hasNext()) {
@@ -83,7 +83,7 @@ protected List<KeyLookupResult> computeNext() {
           }
 
           // if continue on current file
-          if (keyLookupHandle.getPartitionPathFilePair().equals(partitionPathFilePair)) {
+          if (keyLookupHandle.getPartitionPathFileIDPair().equals(partitionPathFilePair)) {
             keyLookupHandle.addKey(recordKey);
           } else {
             // do the actual checking of file & break out
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java
index a223d695cc03a..d3e73c058cc56 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndex.java
@@ -19,20 +19,22 @@
 
 package org.apache.hudi.index.bloom;
 
+import org.apache.hudi.avro.model.HoodieMetadataColumnStats;
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.common.config.HoodieConfig;
 import org.apache.hudi.common.data.HoodieData;
 import org.apache.hudi.common.data.HoodiePairData;
 import org.apache.hudi.common.engine.HoodieEngineContext;
+import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordLocation;
-import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.collection.ImmutablePair;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.config.HoodieIndexConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.exception.HoodieMetadataException;
 import org.apache.hudi.exception.MetadataNotFoundException;
 import org.apache.hudi.index.HoodieIndex;
 import org.apache.hudi.index.HoodieIndexUtils;
@@ -46,6 +48,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
+import java.util.stream.Stream;
 
 import static java.util.stream.Collectors.groupingBy;
 import static java.util.stream.Collectors.mapping;
@@ -55,8 +58,7 @@
 /**
  * Indexing mechanism based on bloom filter. Each parquet file includes its row_key bloom filter in its metadata.
  */
-public class HoodieBloomIndex<T extends HoodieRecordPayload<T>>
-    extends HoodieIndex<T, Object, Object, Object> {
+public class HoodieBloomIndex extends HoodieIndex<Object, Object> {
   private static final Logger LOG = LogManager.getLogger(HoodieBloomIndex.class);
 
   private final BaseHoodieBloomIndexHelper bloomIndexHelper;
@@ -67,8 +69,8 @@ public HoodieBloomIndex(HoodieWriteConfig config, BaseHoodieBloomIndexHelper blo
   }
 
   @Override
-  public HoodieData<HoodieRecord<T>> tagLocation(
-      HoodieData<HoodieRecord<T>> records, HoodieEngineContext context,
+  public <R> HoodieData<HoodieRecord<R>> tagLocation(
+      HoodieData<HoodieRecord<R>> records, HoodieEngineContext context,
       HoodieTable hoodieTable) {
     // Step 0: cache the input records if needed
     if (config.getBloomIndexUseCaching()) {
@@ -94,7 +96,7 @@ record -> new ImmutablePair<>(record.getPartitionPath(), record.getRecordKey()))
     }
 
     // Step 3: Tag the incoming records, as inserts or updates, by joining with existing record keys
-    HoodieData<HoodieRecord<T>> taggedRecords = tagLocationBacktoRecords(keyFilenamePairs, records);
+    HoodieData<HoodieRecord<R>> taggedRecords = tagLocationBacktoRecords(keyFilenamePairs, records);
 
     if (config.getBloomIndexUseCaching()) {
       records.unpersist();
@@ -111,19 +113,25 @@ record -> new ImmutablePair<>(record.getPartitionPath(), record.getRecordKey()))
   private HoodiePairData<HoodieKey, HoodieRecordLocation> lookupIndex(
       HoodiePairData<String, String> partitionRecordKeyPairs, final HoodieEngineContext context,
       final HoodieTable hoodieTable) {
-    // Obtain records per partition, in the incoming records
+    // Step 1: Obtain records per partition, in the incoming records
     Map<String, Long> recordsPerPartition = partitionRecordKeyPairs.countByKey();
     List<String> affectedPartitionPathList = new ArrayList<>(recordsPerPartition.keySet());
 
     // Step 2: Load all involved files as <Partition, filename> pairs
-    List<Pair<String, BloomIndexFileInfo>> fileInfoList =
-        loadInvolvedFiles(affectedPartitionPathList, context, hoodieTable);
+    List<Pair<String, BloomIndexFileInfo>> fileInfoList;
+    if (config.getBloomIndexPruneByRanges()) {
+      fileInfoList = (config.getMetadataConfig().isColumnStatsIndexEnabled()
+          ? loadColumnRangesFromMetaIndex(affectedPartitionPathList, context, hoodieTable)
+          : loadColumnRangesFromFiles(affectedPartitionPathList, context, hoodieTable));
+    } else {
+      fileInfoList = getFileInfoForLatestBaseFiles(affectedPartitionPathList, context, hoodieTable);
+    }
     final Map<String, List<BloomIndexFileInfo>> partitionToFileInfo =
         fileInfoList.stream().collect(groupingBy(Pair::getLeft, mapping(Pair::getRight, toList())));
 
     // Step 3: Obtain a HoodieData, for each incoming record, that already exists, with the file id,
     // that contains it.
-    HoodieData<ImmutablePair<String, HoodieKey>> fileComparisonPairs =
+    HoodieData<Pair<String, HoodieKey>> fileComparisonPairs =
         explodeRecordsWithFileComparisons(partitionToFileInfo, partitionRecordKeyPairs);
 
     return bloomIndexHelper.findMatchingFilesForRecordKeys(config, context, hoodieTable,
@@ -133,30 +141,84 @@ private HoodiePairData<HoodieKey, HoodieRecordLocation> lookupIndex(
   /**
    * Load all involved files as <Partition, filename> pair List.
    */
-  List<Pair<String, BloomIndexFileInfo>> loadInvolvedFiles(
+  List<Pair<String, BloomIndexFileInfo>> loadColumnRangesFromFiles(
       List<String> partitions, final HoodieEngineContext context, final HoodieTable hoodieTable) {
     // Obtain the latest data files from all the partitions.
     List<Pair<String, String>> partitionPathFileIDList = getLatestBaseFilesForAllPartitions(partitions, context, hoodieTable).stream()
         .map(pair -> Pair.of(pair.getKey(), pair.getValue().getFileId()))
         .collect(toList());
 
-    if (config.getBloomIndexPruneByRanges()) {
-      // also obtain file ranges, if range pruning is enabled
-      context.setJobStatus(this.getClass().getName(), "Obtain key ranges for file slices (range pruning=on)");
-      return context.map(partitionPathFileIDList, pf -> {
-        try {
-          HoodieRangeInfoHandle rangeInfoHandle = new HoodieRangeInfoHandle(config, hoodieTable, pf);
-          String[] minMaxKeys = rangeInfoHandle.getMinMaxKeys();
-          return Pair.of(pf.getKey(), new BloomIndexFileInfo(pf.getValue(), minMaxKeys[0], minMaxKeys[1]));
-        } catch (MetadataNotFoundException me) {
-          LOG.warn("Unable to find range metadata in file :" + pf);
-          return Pair.of(pf.getKey(), new BloomIndexFileInfo(pf.getValue()));
+    context.setJobStatus(this.getClass().getName(), "Obtain key ranges for file slices (range pruning=on)");
+    return context.map(partitionPathFileIDList, pf -> {
+      try {
+        HoodieRangeInfoHandle rangeInfoHandle = new HoodieRangeInfoHandle(config, hoodieTable, pf);
+        String[] minMaxKeys = rangeInfoHandle.getMinMaxKeys();
+        return Pair.of(pf.getKey(), new BloomIndexFileInfo(pf.getValue(), minMaxKeys[0], minMaxKeys[1]));
+      } catch (MetadataNotFoundException me) {
+        LOG.warn("Unable to find range metadata in file :" + pf);
+        return Pair.of(pf.getKey(), new BloomIndexFileInfo(pf.getValue()));
+      }
+    }, Math.max(partitionPathFileIDList.size(), 1));
+  }
+
+  /**
+   * Get BloomIndexFileInfo for all the latest base files for the requested partitions.
+   *
+   * @param partitions  - List of partitions to get the base files for
+   * @param context     - Engine context
+   * @param hoodieTable - Hoodie Table
+   * @return List of partition and file column range info pairs
+   */
+  private List<Pair<String, BloomIndexFileInfo>> getFileInfoForLatestBaseFiles(
+      List<String> partitions, final HoodieEngineContext context, final HoodieTable hoodieTable) {
+    List<Pair<String, String>> partitionPathFileIDList = getLatestBaseFilesForAllPartitions(partitions, context,
+        hoodieTable).stream()
+        .map(pair -> Pair.of(pair.getKey(), pair.getValue().getFileId()))
+        .collect(toList());
+    return partitionPathFileIDList.stream()
+        .map(pf -> Pair.of(pf.getKey(), new BloomIndexFileInfo(pf.getValue()))).collect(toList());
+  }
+
+  /**
+   * Load the column stats index as BloomIndexFileInfo for all the involved files in the partition.
+   *
+   * @param partitions  - List of partitions for which column stats need to be loaded
+   * @param context     - Engine context
+   * @param hoodieTable - Hoodie table
+   * @return List of partition and file column range info pairs
+   */
+  protected List<Pair<String, BloomIndexFileInfo>> loadColumnRangesFromMetaIndex(
+      List<String> partitions, final HoodieEngineContext context, final HoodieTable hoodieTable) {
+    // also obtain file ranges, if range pruning is enabled
+    context.setJobStatus(this.getClass().getName(), "Load meta index key ranges for file slices");
+
+    final String keyField = hoodieTable.getMetaClient().getTableConfig().getRecordKeyFieldProp();
+    return context.flatMap(partitions, partitionName -> {
+      // Partition and file name pairs
+      List<Pair<String, String>> partitionFileNameList = HoodieIndexUtils.getLatestBaseFilesForPartition(partitionName,
+              hoodieTable).stream().map(baseFile -> Pair.of(partitionName, baseFile.getFileName()))
+          .sorted()
+          .collect(toList());
+      if (partitionFileNameList.isEmpty()) {
+        return Stream.empty();
+      }
+      try {
+        Map<Pair<String, String>, HoodieMetadataColumnStats> fileToColumnStatsMap = hoodieTable
+            .getMetadataTable().getColumnStats(partitionFileNameList, keyField);
+        List<Pair<String, BloomIndexFileInfo>> result = new ArrayList<>();
+        for (Map.Entry<Pair<String, String>, HoodieMetadataColumnStats> entry : fileToColumnStatsMap.entrySet()) {
+          result.add(Pair.of(entry.getKey().getLeft(),
+              new BloomIndexFileInfo(
+                  FSUtils.getFileId(entry.getKey().getRight()),
+                  entry.getValue().getMinValue(),
+                  entry.getValue().getMaxValue()
+              )));
         }
-      }, Math.max(partitionPathFileIDList.size(), 1));
-    } else {
-      return partitionPathFileIDList.stream()
-          .map(pf -> Pair.of(pf.getKey(), new BloomIndexFileInfo(pf.getValue()))).collect(toList());
-    }
+        return result.stream();
+      } catch (MetadataNotFoundException me) {
+        throw new HoodieMetadataException("Unable to find column range metadata for partition:" + partitionName, me);
+      }
+    }, Math.max(partitions.size(), 1));
   }
 
   @Override
@@ -197,7 +259,7 @@ public boolean isImplicitWithStorage() {
    * Sub-partition to ensure the records can be looked up against files & also prune file<=>record comparisons based on
    * recordKey ranges in the index info.
    */
-  HoodieData<ImmutablePair<String, HoodieKey>> explodeRecordsWithFileComparisons(
+  HoodieData<Pair<String, HoodieKey>> explodeRecordsWithFileComparisons(
       final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo,
       HoodiePairData<String, String> partitionRecordKeyPairs) {
     IndexFileFilter indexFileFilter =
@@ -209,7 +271,7 @@ HoodieData<ImmutablePair<String, HoodieKey>> explodeRecordsWithFileComparisons(
       String partitionPath = partitionRecordKeyPair.getLeft();
 
       return indexFileFilter.getMatchingFilesAndPartition(partitionPath, recordKey).stream()
-          .map(partitionFileIdPair -> new ImmutablePair<>(partitionFileIdPair.getRight(),
+          .map(partitionFileIdPair -> (Pair<String, HoodieKey>) new ImmutablePair<>(partitionFileIdPair.getRight(),
               new HoodieKey(recordKey, partitionPath)))
           .collect(Collectors.toList());
     }).flatMap(List::iterator);
@@ -218,10 +280,10 @@ HoodieData<ImmutablePair<String, HoodieKey>> explodeRecordsWithFileComparisons(
   /**
    * Tag the <rowKey, filename> back to the original HoodieRecord List.
    */
-  protected HoodieData<HoodieRecord<T>> tagLocationBacktoRecords(
+  protected <R> HoodieData<HoodieRecord<R>> tagLocationBacktoRecords(
       HoodiePairData<HoodieKey, HoodieRecordLocation> keyFilenamePair,
-      HoodieData<HoodieRecord<T>> records) {
-    HoodiePairData<HoodieKey, HoodieRecord<T>> keyRecordPairs =
+      HoodieData<HoodieRecord<R>> records) {
+    HoodiePairData<HoodieKey, HoodieRecord<R>> keyRecordPairs =
         records.mapToPair(record -> new ImmutablePair<>(record.getKey(), record));
     // Here as the records might have more data than keyFilenamePairs (some row keys' fileId is null),
     // so we do left outer join.
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieGlobalBloomIndex.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieGlobalBloomIndex.java
index 39fa72a329fe3..5f2007ea53668 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieGlobalBloomIndex.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/HoodieGlobalBloomIndex.java
@@ -24,6 +24,7 @@
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.EmptyHoodieRecordPayload;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordLocation;
@@ -46,7 +47,7 @@
  * This filter will only work with hoodie table since it will only load partitions
  * with .hoodie_partition_metadata file in it.
  */
-public class HoodieGlobalBloomIndex<T extends HoodieRecordPayload<T>> extends HoodieBloomIndex<T> {
+public class HoodieGlobalBloomIndex extends HoodieBloomIndex {
   public HoodieGlobalBloomIndex(HoodieWriteConfig config, BaseHoodieBloomIndexHelper bloomIndexHelper) {
     super(config, bloomIndexHelper);
   }
@@ -55,11 +56,11 @@ public HoodieGlobalBloomIndex(HoodieWriteConfig config, BaseHoodieBloomIndexHelp
    * Load all involved files as <Partition, filename> pairs from all partitions in the table.
    */
   @Override
-  List<Pair<String, BloomIndexFileInfo>> loadInvolvedFiles(List<String> partitions, final HoodieEngineContext context,
-                                                           final HoodieTable hoodieTable) {
+  List<Pair<String, BloomIndexFileInfo>> loadColumnRangesFromFiles(List<String> partitions, final HoodieEngineContext context,
+                                                                   final HoodieTable hoodieTable) {
     HoodieTableMetaClient metaClient = hoodieTable.getMetaClient();
     List<String> allPartitionPaths = FSUtils.getAllPartitionPaths(context, config.getMetadataConfig(), metaClient.getBasePath());
-    return super.loadInvolvedFiles(allPartitionPaths, context, hoodieTable);
+    return super.loadColumnRangesFromFiles(allPartitionPaths, context, hoodieTable);
   }
 
   /**
@@ -73,7 +74,7 @@ List<Pair<String, BloomIndexFileInfo>> loadInvolvedFiles(List<String> partitions
    */
 
   @Override
-  HoodieData<ImmutablePair<String, HoodieKey>> explodeRecordsWithFileComparisons(
+  HoodieData<Pair<String, HoodieKey>> explodeRecordsWithFileComparisons(
       final Map<String, List<BloomIndexFileInfo>> partitionToFileIndexInfo,
       HoodiePairData<String, String> partitionRecordKeyPairs) {
 
@@ -86,7 +87,7 @@ HoodieData<ImmutablePair<String, HoodieKey>> explodeRecordsWithFileComparisons(
       String partitionPath = partitionRecordKeyPair.getLeft();
 
       return indexFileFilter.getMatchingFilesAndPartition(partitionPath, recordKey).stream()
-          .map(partitionFileIdPair -> new ImmutablePair<>(partitionFileIdPair.getRight(),
+          .map(partitionFileIdPair -> (Pair<String, HoodieKey>) new ImmutablePair<>(partitionFileIdPair.getRight(),
               new HoodieKey(recordKey, partitionFileIdPair.getLeft())))
           .collect(Collectors.toList());
     }).flatMap(List::iterator);
@@ -96,11 +97,11 @@ HoodieData<ImmutablePair<String, HoodieKey>> explodeRecordsWithFileComparisons(
    * Tagging for global index should only consider the record key.
    */
   @Override
-  protected HoodieData<HoodieRecord<T>> tagLocationBacktoRecords(
+  protected <R> HoodieData<HoodieRecord<R>> tagLocationBacktoRecords(
       HoodiePairData<HoodieKey, HoodieRecordLocation> keyLocationPairs,
-      HoodieData<HoodieRecord<T>> records) {
+      HoodieData<HoodieRecord<R>> records) {
 
-    HoodiePairData<String, HoodieRecord<T>> incomingRowKeyRecordPairs =
+    HoodiePairData<String, HoodieRecord<R>> incomingRowKeyRecordPairs =
         records.mapToPair(record -> new ImmutablePair<>(record.getRecordKey(), record));
 
     HoodiePairData<String, Pair<HoodieRecordLocation, HoodieKey>> existingRecordKeyToRecordLocationHoodieKeyMap =
@@ -109,29 +110,29 @@ protected HoodieData<HoodieRecord<T>> tagLocationBacktoRecords(
 
     // Here as the records might have more data than rowKeys (some rowKeys' fileId is null), so we do left outer join.
     return incomingRowKeyRecordPairs.leftOuterJoin(existingRecordKeyToRecordLocationHoodieKeyMap).values().flatMap(record -> {
-      final HoodieRecord<T> hoodieRecord = record.getLeft();
+      final HoodieRecord<R> hoodieRecord = record.getLeft();
       final Option<Pair<HoodieRecordLocation, HoodieKey>> recordLocationHoodieKeyPair = record.getRight();
       if (recordLocationHoodieKeyPair.isPresent()) {
         // Record key matched to file
         if (config.getBloomIndexUpdatePartitionPath()
             && !recordLocationHoodieKeyPair.get().getRight().getPartitionPath().equals(hoodieRecord.getPartitionPath())) {
           // Create an empty record to delete the record in the old partition
-          HoodieRecord<T> deleteRecord = new HoodieRecord(recordLocationHoodieKeyPair.get().getRight(),
+          HoodieRecord<R> deleteRecord = new HoodieAvroRecord(recordLocationHoodieKeyPair.get().getRight(),
               new EmptyHoodieRecordPayload());
           deleteRecord.setCurrentLocation(recordLocationHoodieKeyPair.get().getLeft());
           deleteRecord.seal();
           // Tag the incoming record for inserting to the new partition
-          HoodieRecord<T> insertRecord = HoodieIndexUtils.getTaggedRecord(hoodieRecord, Option.empty());
+          HoodieRecord<R> insertRecord = HoodieIndexUtils.getTaggedRecord(hoodieRecord, Option.empty());
           return Arrays.asList(deleteRecord, insertRecord).iterator();
         } else {
           // Ignore the incoming record's partition, regardless of whether it differs from its old partition or not.
           // When it differs, the record will still be updated at its old partition.
           return Collections.singletonList(
-              (HoodieRecord<T>) HoodieIndexUtils.getTaggedRecord(new HoodieRecord<>(recordLocationHoodieKeyPair.get().getRight(), hoodieRecord.getData()),
+              (HoodieRecord<R>) HoodieIndexUtils.getTaggedRecord(new HoodieAvroRecord(recordLocationHoodieKeyPair.get().getRight(), (HoodieRecordPayload) hoodieRecord.getData()),
                   Option.ofNullable(recordLocationHoodieKeyPair.get().getLeft()))).iterator();
         }
       } else {
-        return Collections.singletonList((HoodieRecord<T>) HoodieIndexUtils.getTaggedRecord(hoodieRecord, Option.empty())).iterator();
+        return Collections.singletonList((HoodieRecord<R>) HoodieIndexUtils.getTaggedRecord(hoodieRecord, Option.empty())).iterator();
       }
     });
   }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/ListBasedHoodieBloomIndexHelper.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/ListBasedHoodieBloomIndexHelper.java
index 74191df523659..c42d80c62e758 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/ListBasedHoodieBloomIndexHelper.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bloom/ListBasedHoodieBloomIndexHelper.java
@@ -28,7 +28,7 @@
 import org.apache.hudi.common.util.collection.ImmutablePair;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.config.HoodieWriteConfig;
-import org.apache.hudi.io.HoodieKeyLookupHandle;
+import org.apache.hudi.io.HoodieKeyLookupResult;
 import org.apache.hudi.table.HoodieTable;
 
 import java.util.ArrayList;
@@ -57,15 +57,14 @@ public static ListBasedHoodieBloomIndexHelper getInstance() {
   public HoodiePairData<HoodieKey, HoodieRecordLocation> findMatchingFilesForRecordKeys(
       HoodieWriteConfig config, HoodieEngineContext context, HoodieTable hoodieTable,
       HoodiePairData<String, String> partitionRecordKeyPairs,
-      HoodieData<ImmutablePair<String, HoodieKey>> fileComparisonPairs,
+      HoodieData<Pair<String, HoodieKey>> fileComparisonPairs,
       Map<String, List<BloomIndexFileInfo>> partitionToFileInfo, Map<String, Long> recordsPerPartition) {
     List<Pair<String, HoodieKey>> fileComparisonPairList =
         HoodieList.getList(fileComparisonPairs).stream()
-            .sorted(Comparator.comparing(ImmutablePair::getLeft)).collect(toList());
+            .sorted(Comparator.comparing(Pair::getLeft)).collect(toList());
 
-    List<HoodieKeyLookupHandle.KeyLookupResult> keyLookupResults = new ArrayList<>();
-
-    Iterator<List<HoodieKeyLookupHandle.KeyLookupResult>> iterator = new HoodieBaseBloomIndexCheckFunction(
+    List<HoodieKeyLookupResult> keyLookupResults = new ArrayList<>();
+    Iterator<List<HoodieKeyLookupResult>> iterator = new HoodieBaseBloomIndexCheckFunction(
         hoodieTable, config).apply(fileComparisonPairList.iterator());
     while (iterator.hasNext()) {
       keyLookupResults.addAll(iterator.next());
@@ -77,7 +76,7 @@ public HoodiePairData<HoodieKey, HoodieRecordLocation> findMatchingFilesForRecor
         lookupResult.getMatchingRecordKeys().stream()
             .map(recordKey -> new ImmutablePair<>(lookupResult, recordKey)).iterator()
     ).mapToPair(pair -> {
-      HoodieKeyLookupHandle.KeyLookupResult lookupResult = pair.getLeft();
+      HoodieKeyLookupResult lookupResult = pair.getLeft();
       String recordKey = pair.getRight();
       return new ImmutablePair<>(
           new HoodieKey(recordKey, lookupResult.getPartitionPath()),
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/BucketIdentifier.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/BucketIdentifier.java
index 7dee9f3cdfa33..ddd95721a46b6 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/BucketIdentifier.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/BucketIdentifier.java
@@ -23,6 +23,7 @@
 import org.apache.hudi.common.model.HoodieRecord;
 
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.regex.Pattern;
@@ -39,7 +40,7 @@ public static int getBucketId(HoodieRecord record, String indexKeyFields, int nu
   public static int getBucketId(HoodieKey hoodieKey, String indexKeyFields, int numBuckets) {
     List<String> hashKeyFields;
     if (!hoodieKey.getRecordKey().contains(":")) {
-      hashKeyFields = Arrays.asList(hoodieKey.getRecordKey());
+      hashKeyFields = Collections.singletonList(hoodieKey.getRecordKey());
     } else {
       Map<String, String> recordKeyPairs = Arrays.stream(hoodieKey.getRecordKey().split(","))
           .map(p -> p.split(":"))
@@ -56,6 +57,10 @@ public  static int getBucketId(List<String> hashKeyFields, int numBuckets) {
     return hashKeyFields.hashCode() % numBuckets;
   }
 
+  public static String partitionBucketIdStr(String partition, int bucketId) {
+    return String.format("%s_%s", partition, bucketIdStr(bucketId));
+  }
+
   public static int bucketIdFromFileId(String fileId) {
     return Integer.parseInt(fileId.substring(0, 8));
   }
@@ -64,6 +69,10 @@ public static String bucketIdStr(int n) {
     return String.format("%08d", n);
   }
 
+  public static String newBucketFileIdPrefix(int bucketId) {
+    return newBucketFileIdPrefix(bucketIdStr(bucketId));
+  }
+
   public static String newBucketFileIdPrefix(String bucketId) {
     return FSUtils.createNewFileIdPfx().replaceFirst(".{8}", bucketId);
   }
@@ -71,4 +80,8 @@ public static String newBucketFileIdPrefix(String bucketId) {
   public static boolean isBucketFileName(String name) {
     return BUCKET_NAME.matcher(name).matches();
   }
+
+  public static int mod(int x, int y) {
+    return x % y;
+  }
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/HoodieBucketIndex.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/HoodieBucketIndex.java
index acb06ea48bed1..a243eea767856 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/HoodieBucketIndex.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/bucket/HoodieBucketIndex.java
@@ -24,7 +24,6 @@
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordLocation;
-import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.model.WriteOperationType;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.collection.Pair;
@@ -34,6 +33,7 @@
 import org.apache.hudi.index.HoodieIndex;
 import org.apache.hudi.index.HoodieIndexUtils;
 import org.apache.hudi.table.HoodieTable;
+
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
@@ -42,10 +42,8 @@
 
 /**
  * Hash indexing mechanism.
- * @param <T>
  */
-public class HoodieBucketIndex<T extends HoodieRecordPayload<T>>
-    extends HoodieIndex<T, Object, Object, Object> {
+public class HoodieBucketIndex extends HoodieIndex<Object, Object> {
 
   private static final Logger LOG =  LogManager.getLogger(HoodieBucketIndex.class);
 
@@ -66,14 +64,14 @@ public HoodieData<WriteStatus> updateLocation(HoodieData<WriteStatus> writeStatu
   }
 
   @Override
-  public HoodieData<HoodieRecord<T>> tagLocation(HoodieData<HoodieRecord<T>> records,
-      HoodieEngineContext context,
+  public <R> HoodieData<HoodieRecord<R>> tagLocation(
+      HoodieData<HoodieRecord<R>> records, HoodieEngineContext context,
       HoodieTable hoodieTable)
       throws HoodieIndexException {
-    HoodieData<HoodieRecord<T>> taggedRecords = records.mapPartitions(recordIter -> {
+    HoodieData<HoodieRecord<R>> taggedRecords = records.mapPartitions(recordIter -> {
       // partitionPath -> bucketId -> fileInfo
       Map<String, Map<Integer, Pair<String, String>>> partitionPathFileIDList = new HashMap<>();
-      return new LazyIterableIterator<HoodieRecord<T>, HoodieRecord<T>>(recordIter) {
+      return new LazyIterableIterator<HoodieRecord<R>, HoodieRecord<R>>(recordIter) {
 
         @Override
         protected void start() {
@@ -81,7 +79,7 @@ protected void start() {
         }
 
         @Override
-        protected HoodieRecord<T> computeNext() {
+        protected HoodieRecord<R> computeNext() {
           HoodieRecord record = recordIter.next();
           int bucketId = BucketIdentifier.getBucketId(record, config.getBucketIndexHashField(), numBuckets);
           String partitionPath = record.getPartitionPath();
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/inmemory/HoodieInMemoryHashIndex.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/inmemory/HoodieInMemoryHashIndex.java
index bec675c102ff5..42dcc1b97d760 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/inmemory/HoodieInMemoryHashIndex.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/inmemory/HoodieInMemoryHashIndex.java
@@ -25,7 +25,6 @@
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordLocation;
-import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.index.HoodieIndex;
@@ -41,8 +40,8 @@
  * <p>
  * ONLY USE FOR LOCAL TESTING
  */
-public class HoodieInMemoryHashIndex<T extends HoodieRecordPayload<T>>
-    extends HoodieIndex<T, Object, Object, Object> {
+public class HoodieInMemoryHashIndex
+    extends HoodieIndex<Object, Object> {
 
   private static ConcurrentMap<HoodieKey, HoodieRecordLocation> recordLocationMap;
 
@@ -56,13 +55,13 @@ public HoodieInMemoryHashIndex(HoodieWriteConfig config) {
   }
 
   @Override
-  public HoodieData<HoodieRecord<T>> tagLocation(
-      HoodieData<HoodieRecord<T>> records, HoodieEngineContext context,
+  public <R> HoodieData<HoodieRecord<R>> tagLocation(
+      HoodieData<HoodieRecord<R>> records, HoodieEngineContext context,
       HoodieTable hoodieTable) {
     return records.mapPartitions(hoodieRecordIterator -> {
-      List<HoodieRecord<T>> taggedRecords = new ArrayList<>();
+      List<HoodieRecord<R>> taggedRecords = new ArrayList<>();
       while (hoodieRecordIterator.hasNext()) {
-        HoodieRecord<T> record = hoodieRecordIterator.next();
+        HoodieRecord<R> record = hoodieRecordIterator.next();
         if (recordLocationMap.containsKey(record.getKey())) {
           record.unseal();
           record.setCurrentLocation(recordLocationMap.get(record.getKey()));
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/simple/HoodieGlobalSimpleIndex.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/simple/HoodieGlobalSimpleIndex.java
index 8935fcb02fec2..805ae462a1128 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/simple/HoodieGlobalSimpleIndex.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/simple/HoodieGlobalSimpleIndex.java
@@ -24,6 +24,7 @@
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.EmptyHoodieRecordPayload;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieBaseFile;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
@@ -47,17 +48,15 @@
 /**
  * A global simple index which reads interested fields(record key and partition path) from base files and
  * joins with incoming records to find the tagged location.
- *
- * @param <T>
  */
-public class HoodieGlobalSimpleIndex<T extends HoodieRecordPayload<T>> extends HoodieSimpleIndex<T> {
+public class HoodieGlobalSimpleIndex extends HoodieSimpleIndex {
   public HoodieGlobalSimpleIndex(HoodieWriteConfig config, Option<BaseKeyGenerator> keyGeneratorOpt) {
     super(config, keyGeneratorOpt);
   }
 
   @Override
-  public HoodieData<HoodieRecord<T>> tagLocation(
-      HoodieData<HoodieRecord<T>> records, HoodieEngineContext context,
+  public <R> HoodieData<HoodieRecord<R>> tagLocation(
+      HoodieData<HoodieRecord<R>> records, HoodieEngineContext context,
       HoodieTable hoodieTable) {
     return tagLocationInternal(records, context, hoodieTable);
   }
@@ -71,11 +70,11 @@ public HoodieData<HoodieRecord<T>> tagLocation(
    * @return {@link HoodieData} of records with record locations set
    */
   @Override
-  protected HoodieData<HoodieRecord<T>> tagLocationInternal(
-      HoodieData<HoodieRecord<T>> inputRecords, HoodieEngineContext context,
+  protected <R> HoodieData<HoodieRecord<R>> tagLocationInternal(
+      HoodieData<HoodieRecord<R>> inputRecords, HoodieEngineContext context,
       HoodieTable hoodieTable) {
 
-    HoodiePairData<String, HoodieRecord<T>> keyedInputRecords =
+    HoodiePairData<String, HoodieRecord<R>> keyedInputRecords =
         inputRecords.mapToPair(entry -> new ImmutablePair<>(entry.getRecordKey(), entry));
     HoodiePairData<HoodieKey, HoodieRecordLocation> allRecordLocationsInTable =
         fetchAllRecordLocations(context, hoodieTable, config.getGlobalSimpleIndexParallelism());
@@ -114,8 +113,8 @@ protected List<Pair<String, HoodieBaseFile>> getAllBaseFilesInTable(
    * @param existingRecords existing records with {@link HoodieRecordLocation}s
    * @return {@link HoodieData} of {@link HoodieRecord}s with tagged {@link HoodieRecordLocation}s
    */
-  private HoodieData<HoodieRecord<T>> getTaggedRecords(
-      HoodiePairData<String, HoodieRecord<T>> incomingRecords,
+  private <R> HoodieData<HoodieRecord<R>> getTaggedRecords(
+      HoodiePairData<String, HoodieRecord<R>> incomingRecords,
       HoodiePairData<HoodieKey, HoodieRecordLocation> existingRecords) {
     HoodiePairData<String, Pair<String, HoodieRecordLocation>> existingRecordByRecordKey =
         existingRecords.mapToPair(
@@ -124,29 +123,29 @@ private HoodieData<HoodieRecord<T>> getTaggedRecords(
 
     return incomingRecords.leftOuterJoin(existingRecordByRecordKey).values()
         .flatMap(entry -> {
-          HoodieRecord<T> inputRecord = entry.getLeft();
+          HoodieRecord<R> inputRecord = entry.getLeft();
           Option<Pair<String, HoodieRecordLocation>> partitionPathLocationPair = Option.ofNullable(entry.getRight().orElse(null));
-          List<HoodieRecord<T>> taggedRecords;
+          List<HoodieRecord<R>> taggedRecords;
 
           if (partitionPathLocationPair.isPresent()) {
             String partitionPath = partitionPathLocationPair.get().getKey();
             HoodieRecordLocation location = partitionPathLocationPair.get().getRight();
             if (config.getGlobalSimpleIndexUpdatePartitionPath() && !(inputRecord.getPartitionPath().equals(partitionPath))) {
               // Create an empty record to delete the record in the old partition
-              HoodieRecord<T> deleteRecord = new HoodieRecord(new HoodieKey(inputRecord.getRecordKey(), partitionPath), new EmptyHoodieRecordPayload());
+              HoodieRecord<R> deleteRecord = new HoodieAvroRecord(new HoodieKey(inputRecord.getRecordKey(), partitionPath), new EmptyHoodieRecordPayload());
               deleteRecord.setCurrentLocation(location);
               deleteRecord.seal();
               // Tag the incoming record for inserting to the new partition
-              HoodieRecord<T> insertRecord = (HoodieRecord<T>) HoodieIndexUtils.getTaggedRecord(inputRecord, Option.empty());
+              HoodieRecord<R> insertRecord = (HoodieRecord<R>) HoodieIndexUtils.getTaggedRecord(inputRecord, Option.empty());
               taggedRecords = Arrays.asList(deleteRecord, insertRecord);
             } else {
               // Ignore the incoming record's partition, regardless of whether it differs from its old partition or not.
               // When it differs, the record will still be updated at its old partition.
-              HoodieRecord<T> newRecord = new HoodieRecord<>(new HoodieKey(inputRecord.getRecordKey(), partitionPath), inputRecord.getData());
-              taggedRecords = Collections.singletonList((HoodieRecord<T>) HoodieIndexUtils.getTaggedRecord(newRecord, Option.ofNullable(location)));
+              HoodieRecord<R> newRecord = new HoodieAvroRecord(new HoodieKey(inputRecord.getRecordKey(), partitionPath), (HoodieRecordPayload) inputRecord.getData());
+              taggedRecords = Collections.singletonList((HoodieRecord<R>) HoodieIndexUtils.getTaggedRecord(newRecord, Option.ofNullable(location)));
             }
           } else {
-            taggedRecords = Collections.singletonList((HoodieRecord<T>) HoodieIndexUtils.getTaggedRecord(inputRecord, Option.empty()));
+            taggedRecords = Collections.singletonList((HoodieRecord<R>) HoodieIndexUtils.getTaggedRecord(inputRecord, Option.empty()));
           }
           return taggedRecords.iterator();
         });
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/simple/HoodieSimpleIndex.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/simple/HoodieSimpleIndex.java
index dfefe5adabfe9..95823ff51e35d 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/simple/HoodieSimpleIndex.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/index/simple/HoodieSimpleIndex.java
@@ -28,7 +28,6 @@
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordLocation;
-import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.collection.ImmutablePair;
 import org.apache.hudi.common.util.collection.Pair;
@@ -47,11 +46,9 @@
 /**
  * A simple index which reads interested fields(record key and partition path) from base files and
  * joins with incoming records to find the tagged location.
- *
- * @param <T> type of {@link HoodieRecordPayload}
  */
-public class HoodieSimpleIndex<T extends HoodieRecordPayload<T>>
-    extends HoodieIndex<T, Object, Object, Object> {
+public class HoodieSimpleIndex
+    extends HoodieIndex<Object, Object> {
 
   private final Option<BaseKeyGenerator> keyGeneratorOpt;
 
@@ -88,8 +85,8 @@ public boolean isImplicitWithStorage() {
   }
 
   @Override
-  public HoodieData<HoodieRecord<T>> tagLocation(
-      HoodieData<HoodieRecord<T>> records, HoodieEngineContext context,
+  public <R> HoodieData<HoodieRecord<R>> tagLocation(
+      HoodieData<HoodieRecord<R>> records, HoodieEngineContext context,
       HoodieTable hoodieTable) {
     return tagLocationInternal(records, context, hoodieTable);
   }
@@ -102,23 +99,23 @@ public HoodieData<HoodieRecord<T>> tagLocation(
    * @param hoodieTable  instance of {@link HoodieTable} to use
    * @return {@link HoodieData} of records with record locations set
    */
-  protected HoodieData<HoodieRecord<T>> tagLocationInternal(
-      HoodieData<HoodieRecord<T>> inputRecords, HoodieEngineContext context,
+  protected <R> HoodieData<HoodieRecord<R>> tagLocationInternal(
+      HoodieData<HoodieRecord<R>> inputRecords, HoodieEngineContext context,
       HoodieTable hoodieTable) {
     if (config.getSimpleIndexUseCaching()) {
       inputRecords.persist(new HoodieConfig(config.getProps())
           .getString(HoodieIndexConfig.SIMPLE_INDEX_INPUT_STORAGE_LEVEL_VALUE));
     }
 
-    HoodiePairData<HoodieKey, HoodieRecord<T>> keyedInputRecords =
+    HoodiePairData<HoodieKey, HoodieRecord<R>> keyedInputRecords =
         inputRecords.mapToPair(record -> new ImmutablePair<>(record.getKey(), record));
     HoodiePairData<HoodieKey, HoodieRecordLocation> existingLocationsOnTable =
         fetchRecordLocationsForAffectedPartitions(keyedInputRecords.keys(), context, hoodieTable,
             config.getSimpleIndexParallelism());
 
-    HoodieData<HoodieRecord<T>> taggedRecords =
+    HoodieData<HoodieRecord<R>> taggedRecords =
         keyedInputRecords.leftOuterJoin(existingLocationsOnTable).map(entry -> {
-          final HoodieRecord<T> untaggedRecord = entry.getRight().getLeft();
+          final HoodieRecord<R> untaggedRecord = entry.getRight().getLeft();
           final Option<HoodieRecordLocation> location = Option.ofNullable(entry.getRight().getRight().orElse(null));
           return HoodieIndexUtils.getTaggedRecord(untaggedRecord, location);
         });
@@ -151,7 +148,7 @@ protected HoodiePairData<HoodieKey, HoodieRecordLocation> fetchRecordLocationsFo
   protected HoodiePairData<HoodieKey, HoodieRecordLocation> fetchRecordLocations(
       HoodieEngineContext context, HoodieTable hoodieTable, int parallelism,
       List<Pair<String, HoodieBaseFile>> baseFiles) {
-    int fetchParallelism = Math.max(1, Math.max(baseFiles.size(), parallelism));
+    int fetchParallelism = Math.max(1, Math.min(baseFiles.size(), parallelism));
 
     return context.parallelize(baseFiles, fetchParallelism)
         .flatMap(partitionPathBaseFile -> new HoodieKeyLocationFetchHandle(config, hoodieTable, partitionPathBaseFile, keyGeneratorOpt)
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java
index 6df05a7c6bd72..7eafe268ba8e8 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieAppendHandle.java
@@ -38,10 +38,12 @@
 import org.apache.hudi.common.table.log.AppendResult;
 import org.apache.hudi.common.table.log.HoodieLogFormat;
 import org.apache.hudi.common.table.log.HoodieLogFormat.Writer;
-import org.apache.hudi.common.table.log.block.HoodieDataBlock;
+import org.apache.hudi.common.table.log.block.HoodieAvroDataBlock;
 import org.apache.hudi.common.table.log.block.HoodieDeleteBlock;
+import org.apache.hudi.common.table.log.block.HoodieHFileDataBlock;
 import org.apache.hudi.common.table.log.block.HoodieLogBlock;
 import org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType;
+import org.apache.hudi.common.table.log.block.HoodieParquetDataBlock;
 import org.apache.hudi.common.table.view.TableFileSystemView.SliceView;
 import org.apache.hudi.common.util.DefaultSizeEstimator;
 import org.apache.hudi.common.util.Option;
@@ -49,6 +51,7 @@
 import org.apache.hudi.common.util.SizeEstimator;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieAppendException;
+import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.HoodieUpsertException;
 import org.apache.hudi.table.HoodieTable;
 
@@ -360,13 +363,13 @@ protected void appendDataAndDeleteBlocks(Map<HeaderMetadataType, String> header)
       header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, writeSchemaWithMetaFields.toString());
       List<HoodieLogBlock> blocks = new ArrayList<>(2);
       if (recordList.size() > 0) {
-        if (config.populateMetaFields()) {
-          blocks.add(HoodieDataBlock.getBlock(hoodieTable.getLogDataBlockFormat(), recordList, header));
-        } else {
-          final String keyField = hoodieTable.getMetaClient().getTableConfig().getRecordKeyFieldProp();
-          blocks.add(HoodieDataBlock.getBlock(hoodieTable.getLogDataBlockFormat(), recordList, header, keyField));
-        }
+        String keyField = config.populateMetaFields()
+            ? HoodieRecord.RECORD_KEY_METADATA_FIELD
+            : hoodieTable.getMetaClient().getTableConfig().getRecordKeyFieldProp();
+
+        blocks.add(getBlock(config, pickLogDataBlockFormat(), recordList, header, keyField));
       }
+
       if (keysToDelete.size() > 0) {
         blocks.add(new HoodieDeleteBlock(keysToDelete.toArray(new HoodieKey[keysToDelete.size()]), header));
       }
@@ -390,7 +393,7 @@ public boolean canWrite(HoodieRecord record) {
 
   @Override
   public void write(HoodieRecord record, Option<IndexedRecord> insertValue) {
-    Option<Map<String, String>> recordMetadata = record.getData().getMetadata();
+    Option<Map<String, String>> recordMetadata = ((HoodieRecordPayload) record.getData()).getMetadata();
     try {
       init(record);
       flushToDiskIfRequired(record);
@@ -497,4 +500,40 @@ private void flushToDiskIfRequired(HoodieRecord record) {
       numberOfRecords = 0;
     }
   }
+
+  private HoodieLogBlock.HoodieLogBlockType pickLogDataBlockFormat() {
+    Option<HoodieLogBlock.HoodieLogBlockType> logBlockTypeOpt = config.getLogDataBlockFormat();
+    if (logBlockTypeOpt.isPresent()) {
+      return logBlockTypeOpt.get();
+    }
+
+    // Fallback to deduce data-block type based on the base file format
+    switch (hoodieTable.getBaseFileFormat()) {
+      case PARQUET:
+      case ORC:
+        return HoodieLogBlock.HoodieLogBlockType.AVRO_DATA_BLOCK;
+      case HFILE:
+        return HoodieLogBlock.HoodieLogBlockType.HFILE_DATA_BLOCK;
+      default:
+        throw new HoodieException("Base file format " + hoodieTable.getBaseFileFormat()
+            + " does not have associated log block type");
+    }
+  }
+
+  private static HoodieLogBlock getBlock(HoodieWriteConfig writeConfig,
+                                         HoodieLogBlock.HoodieLogBlockType logDataBlockFormat,
+                                         List<IndexedRecord> recordList,
+                                         Map<HeaderMetadataType, String> header,
+                                         String keyField) {
+    switch (logDataBlockFormat) {
+      case AVRO_DATA_BLOCK:
+        return new HoodieAvroDataBlock(recordList, header, keyField);
+      case HFILE_DATA_BLOCK:
+        return new HoodieHFileDataBlock(recordList, header, writeConfig.getHFileCompressionAlgorithm());
+      case PARQUET_DATA_BLOCK:
+        return new HoodieParquetDataBlock(recordList, header, keyField, writeConfig.getParquetCompressionCodec());
+      default:
+        throw new HoodieException("Data block format " + logDataBlockFormat + " not implemented");
+    }
+  }
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieCreateHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieCreateHandle.java
index a9ff1f85478cb..096c257b1f797 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieCreateHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieCreateHandle.java
@@ -18,7 +18,6 @@
 
 package org.apache.hudi.io;
 
-import org.apache.avro.Schema;
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.common.engine.TaskContextSupplier;
 import org.apache.hudi.common.fs.FSUtils;
@@ -37,6 +36,7 @@
 import org.apache.hudi.io.storage.HoodieFileWriterFactory;
 import org.apache.hudi.table.HoodieTable;
 
+import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.generic.IndexedRecord;
 import org.apache.hadoop.fs.Path;
@@ -128,7 +128,7 @@ public boolean canWrite(HoodieRecord record) {
    */
   @Override
   public void write(HoodieRecord record, Option<IndexedRecord> avroRecord) {
-    Option recordMetadata = record.getData().getMetadata();
+    Option recordMetadata = ((HoodieRecordPayload) record.getData()).getMetadata();
     if (HoodieOperation.isDelete(record.getOperation())) {
       avroRecord = Option.empty();
     }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieIOHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieIOHandle.java
index c6f9dddef30db..1ad28d14b3a8d 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieIOHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieIOHandle.java
@@ -19,6 +19,8 @@
 package org.apache.hudi.io;
 
 import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.table.HoodieTable;
 
@@ -31,8 +33,8 @@ public abstract class HoodieIOHandle<T extends HoodieRecordPayload, I, K, O> {
   protected final FileSystem fs;
   protected final HoodieTable<T, I, K, O> hoodieTable;
 
-  HoodieIOHandle(HoodieWriteConfig config, String instantTime, HoodieTable<T, I, K, O> hoodieTable) {
-    this.instantTime = instantTime;
+  HoodieIOHandle(HoodieWriteConfig config, Option<String> instantTime, HoodieTable<T, I, K, O> hoodieTable) {
+    this.instantTime = instantTime.orElse(StringUtils.EMPTY_STRING);
     this.config = config;
     this.hoodieTable = hoodieTable;
     this.fs = getFileSystem();
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieKeyLocationFetchHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieKeyLocationFetchHandle.java
index c33931f503a36..ab8b83c14aeec 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieKeyLocationFetchHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieKeyLocationFetchHandle.java
@@ -47,7 +47,7 @@ public class HoodieKeyLocationFetchHandle<T extends HoodieRecordPayload, I, K, O
 
   public HoodieKeyLocationFetchHandle(HoodieWriteConfig config, HoodieTable<T, I, K, O> hoodieTable,
                                       Pair<String, HoodieBaseFile> partitionPathBaseFilePair, Option<BaseKeyGenerator> keyGeneratorOpt) {
-    super(config, null, hoodieTable, Pair.of(partitionPathBaseFilePair.getLeft(), partitionPathBaseFilePair.getRight().getFileId()));
+    super(config, hoodieTable, Pair.of(partitionPathBaseFilePair.getLeft(), partitionPathBaseFilePair.getRight().getFileId()));
     this.partitionPathBaseFilePair = partitionPathBaseFilePair;
     this.keyGeneratorOpt = keyGeneratorOpt;
   }
@@ -57,9 +57,9 @@ public Stream<Pair<HoodieKey, HoodieRecordLocation>> locations() {
     BaseFileUtils baseFileUtils = BaseFileUtils.getInstance(baseFile.getPath());
     List<HoodieKey> hoodieKeyList = new ArrayList<>();
     if (keyGeneratorOpt.isPresent()) {
-      hoodieKeyList = baseFileUtils.fetchRecordKeyPartitionPath(hoodieTable.getHadoopConf(), new Path(baseFile.getPath()), keyGeneratorOpt);
+      hoodieKeyList = baseFileUtils.fetchHoodieKeys(hoodieTable.getHadoopConf(), new Path(baseFile.getPath()), keyGeneratorOpt);
     } else {
-      hoodieKeyList = baseFileUtils.fetchRecordKeyPartitionPath(hoodieTable.getHadoopConf(), new Path(baseFile.getPath()));
+      hoodieKeyList = baseFileUtils.fetchHoodieKeys(hoodieTable.getHadoopConf(), new Path(baseFile.getPath()));
     }
     return hoodieKeyList.stream()
         .map(entry -> Pair.of(entry,
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieKeyLookupHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieKeyLookupHandle.java
index ad84e3e974af8..12d075e0cb532 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieKeyLookupHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieKeyLookupHandle.java
@@ -19,25 +19,30 @@
 package org.apache.hudi.io;
 
 import org.apache.hudi.common.bloom.BloomFilter;
+import org.apache.hudi.common.bloom.BloomFilterTypeCode;
+import org.apache.hudi.common.bloom.HoodieDynamicBoundedBloomFilter;
+import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.HoodieBaseFile;
 import org.apache.hudi.common.model.HoodieRecordPayload;
-import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.util.HoodieTimer;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieIndexException;
+import org.apache.hudi.index.HoodieIndexUtils;
+import org.apache.hudi.io.storage.HoodieFileReader;
 import org.apache.hudi.table.HoodieTable;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
 import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
-import java.util.HashSet;
 import java.util.List;
-import java.util.Set;
 
 /**
  * Takes a bunch of keys and returns ones that are present in the file group.
@@ -46,52 +51,58 @@ public class HoodieKeyLookupHandle<T extends HoodieRecordPayload, I, K, O> exten
 
   private static final Logger LOG = LogManager.getLogger(HoodieKeyLookupHandle.class);
 
-  private final HoodieTableType tableType;
-
   private final BloomFilter bloomFilter;
-
   private final List<String> candidateRecordKeys;
-
+  private final boolean useMetadataTableIndex;
+  private Option<String> fileName = Option.empty();
   private long totalKeysChecked;
 
   public HoodieKeyLookupHandle(HoodieWriteConfig config, HoodieTable<T, I, K, O> hoodieTable,
-                               Pair<String, String> partitionPathFilePair) {
-    super(config, null, hoodieTable, partitionPathFilePair);
-    this.tableType = hoodieTable.getMetaClient().getTableType();
+                               Pair<String, String> partitionPathFileIDPair) {
+    this(config, hoodieTable, partitionPathFileIDPair, Option.empty(), false);
+  }
+
+  public HoodieKeyLookupHandle(HoodieWriteConfig config, HoodieTable<T, I, K, O> hoodieTable,
+                               Pair<String, String> partitionPathFileIDPair, Option<String> fileName,
+                               boolean useMetadataTableIndex) {
+    super(config, hoodieTable, partitionPathFileIDPair);
     this.candidateRecordKeys = new ArrayList<>();
     this.totalKeysChecked = 0;
-    HoodieTimer timer = new HoodieTimer().startTimer();
-
-    try {
-      this.bloomFilter = createNewFileReader().readBloomFilter();
-    } catch (IOException e) {
-      throw new HoodieIndexException(String.format("Error reading bloom filter from %s: %s", partitionPathFilePair, e));
+    if (fileName.isPresent()) {
+      ValidationUtils.checkArgument(FSUtils.getFileId(fileName.get()).equals(getFileId()),
+          "File name '" + fileName.get() + "' doesn't match this lookup handle fileid '" + getFileId() + "'");
+      this.fileName = fileName;
     }
-    LOG.info(String.format("Read bloom filter from %s in %d ms", partitionPathFilePair, timer.endTimer()));
+    this.useMetadataTableIndex = useMetadataTableIndex;
+    this.bloomFilter = getBloomFilter();
   }
 
-  /**
-   * Given a list of row keys and one file, return only row keys existing in that file.
-   */
-  public List<String> checkCandidatesAgainstFile(Configuration configuration, List<String> candidateRecordKeys,
-                                                 Path filePath) throws HoodieIndexException {
-    List<String> foundRecordKeys = new ArrayList<>();
+  private BloomFilter getBloomFilter() {
+    BloomFilter bloomFilter = null;
+    HoodieTimer timer = new HoodieTimer().startTimer();
     try {
-      // Load all rowKeys from the file, to double-confirm
-      if (!candidateRecordKeys.isEmpty()) {
-        HoodieTimer timer = new HoodieTimer().startTimer();
-        Set<String> fileRowKeys = createNewFileReader().filterRowKeys(new HashSet<>(candidateRecordKeys));
-        foundRecordKeys.addAll(fileRowKeys);
-        LOG.info(String.format("Checked keys against file %s, in %d ms. #candidates (%d) #found (%d)", filePath,
-            timer.endTimer(), candidateRecordKeys.size(), foundRecordKeys.size()));
-        if (LOG.isDebugEnabled()) {
-          LOG.debug("Keys matching for file " + filePath + " => " + foundRecordKeys);
+      if (this.useMetadataTableIndex) {
+        ValidationUtils.checkArgument(this.fileName.isPresent(),
+            "File name not available to fetch bloom filter from the metadata table index.");
+        Option<ByteBuffer> bloomFilterByteBuffer =
+            hoodieTable.getMetadataTable().getBloomFilter(partitionPathFileIDPair.getLeft(), fileName.get());
+        if (!bloomFilterByteBuffer.isPresent()) {
+          throw new HoodieIndexException("BloomFilter missing for " + partitionPathFileIDPair.getRight());
+        }
+        bloomFilter =
+            new HoodieDynamicBoundedBloomFilter(StandardCharsets.UTF_8.decode(bloomFilterByteBuffer.get()).toString(),
+                BloomFilterTypeCode.DYNAMIC_V0);
+      } else {
+        try (HoodieFileReader reader = createNewFileReader()) {
+          bloomFilter = reader.readBloomFilter();
         }
       }
-    } catch (Exception e) {
-      throw new HoodieIndexException("Error checking candidate keys against file.", e);
+    } catch (IOException e) {
+      throw new HoodieIndexException(String.format("Error reading bloom filter from %s/%s - %s",
+          getPartitionPathFileIDPair().getLeft(), this.fileName, e));
     }
-    return foundRecordKeys;
+    LOG.info(String.format("Read bloom filter from %s in %d ms", partitionPathFileIDPair, timer.endTimer()));
+    return bloomFilter;
   }
 
   /**
@@ -101,7 +112,7 @@ public void addKey(String recordKey) {
     // check record key against bloom filter of current file & add to possible keys if needed
     if (bloomFilter.mightContain(recordKey)) {
       if (LOG.isDebugEnabled()) {
-        LOG.debug("Record key " + recordKey + " matches bloom filter in  " + partitionPathFilePair);
+        LOG.debug("Record key " + recordKey + " matches bloom filter in  " + partitionPathFileIDPair);
       }
       candidateRecordKeys.add(recordKey);
     }
@@ -111,53 +122,18 @@ public void addKey(String recordKey) {
   /**
    * Of all the keys, that were added, return a list of keys that were actually found in the file group.
    */
-  public KeyLookupResult getLookupResult() {
+  public HoodieKeyLookupResult getLookupResult() {
     if (LOG.isDebugEnabled()) {
-      LOG.debug("#The candidate row keys for " + partitionPathFilePair + " => " + candidateRecordKeys);
+      LOG.debug("#The candidate row keys for " + partitionPathFileIDPair + " => " + candidateRecordKeys);
     }
 
     HoodieBaseFile dataFile = getLatestDataFile();
-    List<String> matchingKeys =
-        checkCandidatesAgainstFile(hoodieTable.getHadoopConf(), candidateRecordKeys, new Path(dataFile.getPath()));
+    List<String> matchingKeys = HoodieIndexUtils.filterKeysFromFile(new Path(dataFile.getPath()), candidateRecordKeys,
+        hoodieTable.getHadoopConf());
     LOG.info(
         String.format("Total records (%d), bloom filter candidates (%d)/fp(%d), actual matches (%d)", totalKeysChecked,
             candidateRecordKeys.size(), candidateRecordKeys.size() - matchingKeys.size(), matchingKeys.size()));
-    return new KeyLookupResult(partitionPathFilePair.getRight(), partitionPathFilePair.getLeft(),
+    return new HoodieKeyLookupResult(partitionPathFileIDPair.getRight(), partitionPathFileIDPair.getLeft(),
         dataFile.getCommitTime(), matchingKeys);
   }
-
-  /**
-   * Encapsulates the result from a key lookup.
-   */
-  public static class KeyLookupResult {
-
-    private final String fileId;
-    private final String baseInstantTime;
-    private final List<String> matchingRecordKeys;
-    private final String partitionPath;
-
-    public KeyLookupResult(String fileId, String partitionPath, String baseInstantTime,
-                           List<String> matchingRecordKeys) {
-      this.fileId = fileId;
-      this.partitionPath = partitionPath;
-      this.baseInstantTime = baseInstantTime;
-      this.matchingRecordKeys = matchingRecordKeys;
-    }
-
-    public String getFileId() {
-      return fileId;
-    }
-
-    public String getBaseInstantTime() {
-      return baseInstantTime;
-    }
-
-    public String getPartitionPath() {
-      return partitionPath;
-    }
-
-    public List<String> getMatchingRecordKeys() {
-      return matchingRecordKeys;
-    }
-  }
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieKeyLookupResult.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieKeyLookupResult.java
new file mode 100644
index 0000000000000..19096a21d8700
--- /dev/null
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieKeyLookupResult.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.io;
+
+import java.util.List;
+
+/**
+ * Encapsulates the result from a key lookup.
+ */
+public class HoodieKeyLookupResult {
+
+  private final String fileId;
+  private final String baseInstantTime;
+  private final List<String> matchingRecordKeys;
+  private final String partitionPath;
+
+  public HoodieKeyLookupResult(String fileId, String partitionPath, String baseInstantTime,
+                               List<String> matchingRecordKeys) {
+    this.fileId = fileId;
+    this.partitionPath = partitionPath;
+    this.baseInstantTime = baseInstantTime;
+    this.matchingRecordKeys = matchingRecordKeys;
+  }
+
+  public String getFileId() {
+    return fileId;
+  }
+
+  public String getBaseInstantTime() {
+    return baseInstantTime;
+  }
+
+  public String getPartitionPath() {
+    return partitionPath;
+  }
+
+  public List<String> getMatchingRecordKeys() {
+    return matchingRecordKeys;
+  }
+}
+
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java
index 87a8d133f0dd5..32d4ec2a6d794 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java
@@ -324,7 +324,7 @@ public void write(GenericRecord oldRecord) {
     if (keyToNewRecords.containsKey(key)) {
       // If we have duplicate records that we are updating, then the hoodie record will be deflated after
       // writing the first record. So make a copy of the record to be merged
-      HoodieRecord<T> hoodieRecord = new HoodieRecord<>(keyToNewRecords.get(key));
+      HoodieRecord<T> hoodieRecord = keyToNewRecords.get(key).newInstance();
       try {
         Option<IndexedRecord> combinedAvroRecord =
             hoodieRecord.getData().combineAndGetUpdateValue(oldRecord,
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieRangeInfoHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieRangeInfoHandle.java
index 78fa9be690367..abe4a9befef9b 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieRangeInfoHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieRangeInfoHandle.java
@@ -21,6 +21,7 @@
 import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.io.storage.HoodieFileReader;
 import org.apache.hudi.table.HoodieTable;
 
 import java.io.IOException;
@@ -32,10 +33,12 @@ public class HoodieRangeInfoHandle<T extends HoodieRecordPayload, I, K, O> exten
 
   public HoodieRangeInfoHandle(HoodieWriteConfig config, HoodieTable<T, I, K, O> hoodieTable,
       Pair<String, String> partitionPathFilePair) {
-    super(config, null, hoodieTable, partitionPathFilePair);
+    super(config, hoodieTable, partitionPathFilePair);
   }
 
   public String[] getMinMaxKeys() throws IOException {
-    return createNewFileReader().readMinMaxRecordKeys();
+    try (HoodieFileReader reader = createNewFileReader()) {
+      return reader.readMinMaxRecordKeys();
+    }
   }
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieReadHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieReadHandle.java
index a771c33c40661..fee75b22decd7 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieReadHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieReadHandle.java
@@ -18,8 +18,11 @@
 
 package org.apache.hudi.io;
 
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.apache.hudi.common.model.HoodieBaseFile;
 import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.io.storage.HoodieFileReader;
@@ -28,20 +31,17 @@
 
 import java.io.IOException;
 
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-
 /**
  * Base class for read operations done logically on the file group.
  */
 public abstract class HoodieReadHandle<T extends HoodieRecordPayload, I, K, O> extends HoodieIOHandle<T, I, K, O> {
 
-  protected final Pair<String, String> partitionPathFilePair;
+  protected final Pair<String, String> partitionPathFileIDPair;
 
-  public HoodieReadHandle(HoodieWriteConfig config, String instantTime, HoodieTable<T, I, K, O> hoodieTable,
-      Pair<String, String> partitionPathFilePair) {
-    super(config, instantTime, hoodieTable);
-    this.partitionPathFilePair = partitionPathFilePair;
+  public HoodieReadHandle(HoodieWriteConfig config, HoodieTable<T, I, K, O> hoodieTable,
+                          Pair<String, String> partitionPathFileIDPair) {
+    super(config, Option.empty(), hoodieTable);
+    this.partitionPathFileIDPair = partitionPathFileIDPair;
   }
 
   @Override
@@ -49,17 +49,17 @@ protected FileSystem getFileSystem() {
     return hoodieTable.getMetaClient().getFs();
   }
 
-  public Pair<String, String> getPartitionPathFilePair() {
-    return partitionPathFilePair;
+  public Pair<String, String> getPartitionPathFileIDPair() {
+    return partitionPathFileIDPair;
   }
 
   public String getFileId() {
-    return partitionPathFilePair.getRight();
+    return partitionPathFileIDPair.getRight();
   }
 
   protected HoodieBaseFile getLatestDataFile() {
     return hoodieTable.getBaseFileOnlyView()
-        .getLatestBaseFile(partitionPathFilePair.getLeft(), partitionPathFilePair.getRight()).get();
+        .getLatestBaseFile(partitionPathFileIDPair.getLeft(), partitionPathFileIDPair.getRight()).get();
   }
 
   protected HoodieFileReader createNewFileReader() throws IOException {
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieSortedMergeHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieSortedMergeHandle.java
index 533611df2b765..897491b906aae 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieSortedMergeHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieSortedMergeHandle.java
@@ -85,7 +85,7 @@ public void write(GenericRecord oldRecord) {
       }
 
       // This is a new insert
-      HoodieRecord<T> hoodieRecord = new HoodieRecord<>(keyToNewRecords.get(keyToPreWrite));
+      HoodieRecord<T> hoodieRecord = keyToNewRecords.get(keyToPreWrite).newInstance();
       if (writtenRecordKeys.contains(keyToPreWrite)) {
         throw new HoodieUpsertException("Insert/Update not in sorted order");
       }
@@ -108,8 +108,9 @@ public void write(GenericRecord oldRecord) {
   @Override
   public List<WriteStatus> close() {
     // write out any pending records (this can happen when inserts are turned into updates)
-    newRecordKeysSorted.stream().forEach(key -> {
+    while (!newRecordKeysSorted.isEmpty()) {
       try {
+        String key = newRecordKeysSorted.poll();
         HoodieRecord<T> hoodieRecord = keyToNewRecords.get(key);
         if (!writtenRecordKeys.contains(hoodieRecord.getRecordKey())) {
           if (useWriterSchema) {
@@ -122,7 +123,7 @@ public List<WriteStatus> close() {
       } catch (IOException e) {
         throw new HoodieUpsertException("Failed to close UpdateHandle", e);
       }
-    });
+    }
     newRecordKeysSorted.clear();
     keyToNewRecords.clear();
 
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieWriteHandle.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieWriteHandle.java
index 37721611e2c9a..28e88e16a6482 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieWriteHandle.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieWriteHandle.java
@@ -108,7 +108,7 @@ public HoodieWriteHandle(HoodieWriteConfig config, String instantTime, String pa
   protected HoodieWriteHandle(HoodieWriteConfig config, String instantTime, String partitionPath, String fileId,
                               HoodieTable<T, I, K, O> hoodieTable, Option<Schema> overriddenSchema,
                               TaskContextSupplier taskContextSupplier) {
-    super(config, instantTime, hoodieTable);
+    super(config, Option.of(instantTime), hoodieTable);
     this.partitionPath = partitionPath;
     this.fileId = fileId;
     this.tableSchema = overriddenSchema.orElseGet(() -> getSpecifiedTableSchema(config));
@@ -210,7 +210,7 @@ public void write(HoodieRecord record, Option<IndexedRecord> insertValue) {
    * Perform the actual writing of the given record into the backing file.
    */
   public void write(HoodieRecord record, Option<IndexedRecord> avroRecord, Option<Exception> exception) {
-    Option recordMetadata = record.getData().getMetadata();
+    Option recordMetadata = ((HoodieRecordPayload) record.getData()).getMetadata();
     if (exception.isPresent() && exception.get() instanceof Throwable) {
       // Not throwing exception from here, since we don't want to fail the entire job for a single record
       writeStatus.markFailure(record, exception.get(), recordMetadata);
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java
index 0b6afd4d28b92..38db1cde41226 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieFileWriterFactory.java
@@ -87,7 +87,8 @@ private static <T extends HoodieRecordPayload, R extends IndexedRecord> HoodieFi
     BloomFilter filter = createBloomFilter(config);
     HoodieHFileConfig hfileConfig = new HoodieHFileConfig(hoodieTable.getHadoopConf(),
         config.getHFileCompressionAlgorithm(), config.getHFileBlockSize(), config.getHFileMaxFileSize(),
-        PREFETCH_ON_OPEN, CACHE_DATA_IN_L1, DROP_BEHIND_CACHE_COMPACTION, filter, HFILE_COMPARATOR);
+        HoodieHFileReader.KEY_FIELD_NAME, PREFETCH_ON_OPEN, CACHE_DATA_IN_L1, DROP_BEHIND_CACHE_COMPACTION,
+        filter, HFILE_COMPARATOR);
 
     return new HoodieHFileWriter<>(instantTime, path, hfileConfig, schema, taskContextSupplier, config.populateMetaFields());
   }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileConfig.java
index 7e4c519a8fafc..1079566b782f1 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileConfig.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileConfig.java
@@ -43,9 +43,10 @@ public class HoodieHFileConfig {
   private final Configuration hadoopConf;
   private final BloomFilter bloomFilter;
   private final KeyValue.KVComparator hfileComparator;
+  private final String keyFieldName;
 
   public HoodieHFileConfig(Configuration hadoopConf, Compression.Algorithm compressionAlgorithm, int blockSize,
-                           long maxFileSize, boolean prefetchBlocksOnOpen, boolean cacheDataInL1,
+                           long maxFileSize, String keyFieldName, boolean prefetchBlocksOnOpen, boolean cacheDataInL1,
                            boolean dropBehindCacheCompaction, BloomFilter bloomFilter, KeyValue.KVComparator hfileComparator) {
     this.hadoopConf = hadoopConf;
     this.compressionAlgorithm = compressionAlgorithm;
@@ -56,6 +57,7 @@ public HoodieHFileConfig(Configuration hadoopConf, Compression.Algorithm compres
     this.dropBehindCacheCompaction = dropBehindCacheCompaction;
     this.bloomFilter = bloomFilter;
     this.hfileComparator = hfileComparator;
+    this.keyFieldName = keyFieldName;
   }
 
   public Configuration getHadoopConf() {
@@ -97,4 +99,8 @@ public BloomFilter getBloomFilter() {
   public KeyValue.KVComparator getHfileComparator() {
     return hfileComparator;
   }
+
+  public String getKeyFieldName() {
+    return keyFieldName;
+  }
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileWriter.java
index a719bcb8f334f..2ad6d7f9220b0 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileWriter.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileWriter.java
@@ -38,6 +38,8 @@
 import org.apache.hadoop.hbase.io.hfile.HFileContext;
 import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
 import org.apache.hadoop.io.Writable;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.StringUtils;
 
 import java.io.DataInput;
 import java.io.DataOutput;
@@ -63,6 +65,8 @@ public class HoodieHFileWriter<T extends HoodieRecordPayload, R extends IndexedR
   private final String instantTime;
   private final TaskContextSupplier taskContextSupplier;
   private final boolean populateMetaFields;
+  private final Schema schema;
+  private final Option<Schema.Field> keyFieldSchema;
   private HFile.Writer writer;
   private String minRecordKey;
   private String maxRecordKey;
@@ -77,6 +81,8 @@ public HoodieHFileWriter(String instantTime, Path file, HoodieHFileConfig hfileC
     this.file = HoodieWrapperFileSystem.convertToHoodiePath(file, conf);
     this.fs = (HoodieWrapperFileSystem) this.file.getFileSystem(conf);
     this.hfileConfig = hfileConfig;
+    this.schema = schema;
+    this.keyFieldSchema = Option.ofNullable(schema.getField(hfileConfig.getKeyFieldName()));
 
     // TODO - compute this compression ratio dynamically by looking at the bytes written to the
     // stream and the actual file size reported by HDFS
@@ -121,8 +127,25 @@ public boolean canWrite() {
   }
 
   @Override
-  public void writeAvro(String recordKey, IndexedRecord object) throws IOException {
-    byte[] value = HoodieAvroUtils.avroToBytes((GenericRecord)object);
+  public void writeAvro(String recordKey, IndexedRecord record) throws IOException {
+    byte[] value = null;
+    boolean isRecordSerialized = false;
+    if (keyFieldSchema.isPresent()) {
+      GenericRecord keyExcludedRecord = (GenericRecord) record;
+      int keyFieldPos = this.keyFieldSchema.get().pos();
+      boolean isKeyAvailable = (record.get(keyFieldPos) != null && !(record.get(keyFieldPos).toString().isEmpty()));
+      if (isKeyAvailable) {
+        Object originalKey = keyExcludedRecord.get(keyFieldPos);
+        keyExcludedRecord.put(keyFieldPos, StringUtils.EMPTY_STRING);
+        value = HoodieAvroUtils.avroToBytes(keyExcludedRecord);
+        keyExcludedRecord.put(keyFieldPos, originalKey);
+        isRecordSerialized = true;
+      }
+    }
+    if (!isRecordSerialized) {
+      value = HoodieAvroUtils.avroToBytes((GenericRecord) record);
+    }
+
     KeyValue kv = new KeyValue(recordKey.getBytes(), null, null, value);
     writer.append(kv);
 
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetWriter.java
index 4f51de35d24a9..3cee8c816d41f 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetWriter.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetWriter.java
@@ -51,13 +51,23 @@ public class HoodieParquetWriter<T extends HoodieRecordPayload, R extends Indexe
   private final TaskContextSupplier taskContextSupplier;
   private final boolean populateMetaFields;
 
-  public HoodieParquetWriter(String instantTime, Path file, HoodieAvroParquetConfig parquetConfig,
-      Schema schema, TaskContextSupplier taskContextSupplier, boolean populateMetaFields) throws IOException {
+  public HoodieParquetWriter(String instantTime,
+                             Path file,
+                             HoodieAvroParquetConfig parquetConfig,
+                             Schema schema,
+                             TaskContextSupplier taskContextSupplier,
+                             boolean populateMetaFields) throws IOException {
     super(HoodieWrapperFileSystem.convertToHoodiePath(file, parquetConfig.getHadoopConf()),
-        ParquetFileWriter.Mode.CREATE, parquetConfig.getWriteSupport(), parquetConfig.getCompressionCodecName(),
-        parquetConfig.getBlockSize(), parquetConfig.getPageSize(), parquetConfig.getPageSize(),
-        parquetConfig.dictionaryEnabled(), DEFAULT_IS_VALIDATING_ENABLED,
-        DEFAULT_WRITER_VERSION, FSUtils.registerFileSystem(file, parquetConfig.getHadoopConf()));
+        ParquetFileWriter.Mode.CREATE,
+        parquetConfig.getWriteSupport(),
+        parquetConfig.getCompressionCodecName(),
+        parquetConfig.getBlockSize(),
+        parquetConfig.getPageSize(),
+        parquetConfig.getPageSize(),
+        parquetConfig.dictionaryEnabled(),
+        DEFAULT_IS_VALIDATING_ENABLED,
+        DEFAULT_WRITER_VERSION,
+        FSUtils.registerFileSystem(file, parquetConfig.getHadoopConf()));
     this.file = HoodieWrapperFileSystem.convertToHoodiePath(file, parquetConfig.getHadoopConf());
     this.fs =
         (HoodieWrapperFileSystem) this.file.getFileSystem(FSUtils.registerFileSystem(file, parquetConfig.getHadoopConf()));
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/ComplexAvroKeyGenerator.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/ComplexAvroKeyGenerator.java
index fc87a83e36a81..dca0c25775133 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/ComplexAvroKeyGenerator.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/ComplexAvroKeyGenerator.java
@@ -32,10 +32,14 @@ public class ComplexAvroKeyGenerator extends BaseKeyGenerator {
 
   public ComplexAvroKeyGenerator(TypedProperties props) {
     super(props);
-    this.recordKeyFields = Arrays.stream(props.getString(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key())
-        .split(",")).map(String::trim).filter(s -> !s.isEmpty()).collect(Collectors.toList());
-    this.partitionPathFields = Arrays.stream(props.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key())
-        .split(",")).map(String::trim).filter(s -> !s.isEmpty()).collect(Collectors.toList());
+    this.recordKeyFields = Arrays.stream(props.getString(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key()).split(","))
+        .map(String::trim)
+        .filter(s -> !s.isEmpty())
+        .collect(Collectors.toList());
+    this.partitionPathFields = Arrays.stream(props.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key()).split(","))
+        .map(String::trim)
+        .filter(s -> !s.isEmpty())
+        .collect(Collectors.toList());
   }
 
   @Override
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/KeyGenUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/KeyGenUtils.java
index d9de544d29b29..f1e41296f1dd3 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/KeyGenUtils.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/KeyGenUtils.java
@@ -29,7 +29,7 @@
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieKeyException;
 import org.apache.hudi.exception.HoodieNotSupportedException;
-import org.apache.hudi.keygen.parser.AbstractHoodieDateTimeParser;
+import org.apache.hudi.keygen.parser.BaseHoodieDateTimeParser;
 
 import java.io.IOException;
 import java.util.Arrays;
@@ -161,9 +161,9 @@ public static String getPartitionPath(GenericRecord record, String partitionPath
   /**
    * Create a date time parser class for TimestampBasedKeyGenerator, passing in any configs needed.
    */
-  public static AbstractHoodieDateTimeParser createDateTimeParser(TypedProperties props, String parserClass) throws IOException  {
+  public static BaseHoodieDateTimeParser createDateTimeParser(TypedProperties props, String parserClass) throws IOException  {
     try {
-      return (AbstractHoodieDateTimeParser) ReflectionUtils.loadClass(parserClass, props);
+      return (BaseHoodieDateTimeParser) ReflectionUtils.loadClass(parserClass, props);
     } catch (Throwable e) {
       throw new IOException("Could not load date time parser class " + parserClass, e);
     }
@@ -196,4 +196,4 @@ public static KeyGenerator createKeyGeneratorByClassName(TypedProperties props)
     }
     return keyGenerator;
   }
-}
\ No newline at end of file
+}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/TimestampBasedAvroKeyGenerator.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/TimestampBasedAvroKeyGenerator.java
index bc84ece503487..bce7e24c57a5f 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/TimestampBasedAvroKeyGenerator.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/TimestampBasedAvroKeyGenerator.java
@@ -26,8 +26,8 @@
 import org.apache.hudi.exception.HoodieKeyGeneratorException;
 import org.apache.hudi.exception.HoodieNotSupportedException;
 import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
-import org.apache.hudi.keygen.parser.AbstractHoodieDateTimeParser;
-import org.apache.hudi.keygen.parser.HoodieDateTimeParserImpl;
+import org.apache.hudi.keygen.parser.BaseHoodieDateTimeParser;
+import org.apache.hudi.keygen.parser.HoodieDateTimeParser;
 import org.joda.time.DateTime;
 import org.joda.time.DateTimeZone;
 import org.joda.time.format.DateTimeFormat;
@@ -56,7 +56,7 @@ public enum TimestampType implements Serializable {
   private final String outputDateFormat;
   private transient Option<DateTimeFormatter> inputFormatter;
   private transient DateTimeFormatter partitionFormatter;
-  private final AbstractHoodieDateTimeParser parser;
+  private final BaseHoodieDateTimeParser parser;
 
   // TimeZone detailed settings reference
   // https://docs.oracle.com/javase/8/docs/api/java/util/TimeZone.html
@@ -65,29 +65,6 @@ public enum TimestampType implements Serializable {
 
   protected final boolean encodePartitionPath;
 
-  /**
-   * Supported configs.
-   */
-  public static class Config {
-
-    // One value from TimestampType above
-    public static final String TIMESTAMP_TYPE_FIELD_PROP = "hoodie.deltastreamer.keygen.timebased.timestamp.type";
-    public static final String INPUT_TIME_UNIT =
-        "hoodie.deltastreamer.keygen.timebased.timestamp.scalar.time.unit";
-    //This prop can now accept list of input date formats.
-    public static final String TIMESTAMP_INPUT_DATE_FORMAT_PROP =
-        "hoodie.deltastreamer.keygen.timebased.input.dateformat";
-    public static final String TIMESTAMP_INPUT_DATE_FORMAT_LIST_DELIMITER_REGEX_PROP = "hoodie.deltastreamer.keygen.timebased.input.dateformat.list.delimiter.regex";
-    public static final String TIMESTAMP_INPUT_TIMEZONE_FORMAT_PROP = "hoodie.deltastreamer.keygen.timebased.input.timezone";
-    public static final String TIMESTAMP_OUTPUT_DATE_FORMAT_PROP =
-        "hoodie.deltastreamer.keygen.timebased.output.dateformat";
-    //still keeping this prop for backward compatibility so that functionality for existing users does not break.
-    public static final String TIMESTAMP_TIMEZONE_FORMAT_PROP =
-        "hoodie.deltastreamer.keygen.timebased.timezone";
-    public static final String TIMESTAMP_OUTPUT_TIMEZONE_FORMAT_PROP = "hoodie.deltastreamer.keygen.timebased.output.timezone";
-    static final String DATE_TIME_PARSER_PROP = "hoodie.deltastreamer.keygen.datetime.parser.class";
-  }
-
   public TimestampBasedAvroKeyGenerator(TypedProperties config) throws IOException {
     this(config, config.getString(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key()),
         config.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key()));
@@ -99,12 +76,12 @@ public TimestampBasedAvroKeyGenerator(TypedProperties config) throws IOException
 
   TimestampBasedAvroKeyGenerator(TypedProperties config, String recordKeyField, String partitionPathField) throws IOException {
     super(config, recordKeyField, partitionPathField);
-    String dateTimeParserClass = config.getString(Config.DATE_TIME_PARSER_PROP, HoodieDateTimeParserImpl.class.getName());
+    String dateTimeParserClass = config.getString(KeyGeneratorOptions.Config.DATE_TIME_PARSER_PROP, HoodieDateTimeParser.class.getName());
     this.parser = KeyGenUtils.createDateTimeParser(config, dateTimeParserClass);
     this.inputDateTimeZone = parser.getInputDateTimeZone();
     this.outputDateTimeZone = parser.getOutputDateTimeZone();
     this.outputDateFormat = parser.getOutputDateFormat();
-    this.timestampType = TimestampType.valueOf(config.getString(Config.TIMESTAMP_TYPE_FIELD_PROP));
+    this.timestampType = TimestampType.valueOf(config.getString(KeyGeneratorOptions.Config.TIMESTAMP_TYPE_FIELD_PROP));
 
     switch (this.timestampType) {
       case EPOCHMILLISECONDS:
@@ -114,7 +91,7 @@ public TimestampBasedAvroKeyGenerator(TypedProperties config) throws IOException
         timeUnit = SECONDS;
         break;
       case SCALAR:
-        String timeUnitStr = config.getString(Config.INPUT_TIME_UNIT, TimeUnit.SECONDS.toString());
+        String timeUnitStr = config.getString(KeyGeneratorOptions.Config.INPUT_TIME_UNIT, TimeUnit.SECONDS.toString());
         timeUnit = TimeUnit.valueOf(timeUnitStr.toUpperCase());
         break;
       default:
@@ -148,7 +125,7 @@ public Object getDefaultPartitionVal() {
       // {Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP} won't be null, it has been checked in the initialization process of
       // inputFormatter
       String delimiter = parser.getConfigInputDateFormatDelimiter();
-      String format = config.getString(Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP, "").split(delimiter)[0];
+      String format = config.getString(KeyGeneratorOptions.Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP, "").split(delimiter)[0];
 
       // if both input and output timeZone are not configured, use GMT.
       if (null != inputDateTimeZone) {
@@ -200,7 +177,7 @@ public String getPartitionPath(Object partitionVal) {
       timeMs = convertLongTimeToMillis(((BigDecimal) partitionVal).longValue());
     } else if (partitionVal instanceof CharSequence) {
       if (!inputFormatter.isPresent()) {
-        throw new HoodieException("Missing inputformatter. Ensure " + Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP + " config is set when timestampType is DATE_STRING or MIXED!");
+        throw new HoodieException("Missing inputformatter. Ensure " + KeyGeneratorOptions.Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP + " config is set when timestampType is DATE_STRING or MIXED!");
       }
       DateTime parsedDateTime = inputFormatter.get().parseDateTime(partitionVal.toString());
       if (this.outputDateTimeZone == null) {
@@ -224,7 +201,7 @@ public String getPartitionPath(Object partitionVal) {
   private long convertLongTimeToMillis(Long partitionVal) {
     if (timeUnit == null) {
       // should not be possible
-      throw new RuntimeException(Config.INPUT_TIME_UNIT + " is not specified but scalar it supplied as time value");
+      throw new RuntimeException(KeyGeneratorOptions.Config.INPUT_TIME_UNIT + " is not specified but scalar it supplied as time value");
     }
     return MILLISECONDS.convert(partitionVal, timeUnit);
   }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/parser/AbstractHoodieDateTimeParser.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/parser/BaseHoodieDateTimeParser.java
similarity index 84%
rename from hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/parser/AbstractHoodieDateTimeParser.java
rename to hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/parser/BaseHoodieDateTimeParser.java
index 6fb05c30be11a..74c62fc63f537 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/parser/AbstractHoodieDateTimeParser.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/parser/BaseHoodieDateTimeParser.java
@@ -19,24 +19,24 @@
 
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.util.Option;
-import org.apache.hudi.keygen.TimestampBasedAvroKeyGenerator.Config;
+import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
 import org.joda.time.DateTimeZone;
 import org.joda.time.format.DateTimeFormatter;
 
 import java.io.Serializable;
 
-public abstract class AbstractHoodieDateTimeParser implements Serializable {
+public abstract class BaseHoodieDateTimeParser implements Serializable {
 
   protected final TypedProperties config;
   protected final String configInputDateFormatDelimiter;
 
-  public AbstractHoodieDateTimeParser(TypedProperties config) {
+  public BaseHoodieDateTimeParser(TypedProperties config) {
     this.config = config;
     this.configInputDateFormatDelimiter = initInputDateFormatDelimiter();
   }
 
   private String initInputDateFormatDelimiter() {
-    String inputDateFormatDelimiter = config.getString(Config.TIMESTAMP_INPUT_DATE_FORMAT_LIST_DELIMITER_REGEX_PROP, ",").trim();
+    String inputDateFormatDelimiter = config.getString(KeyGeneratorOptions.Config.TIMESTAMP_INPUT_DATE_FORMAT_LIST_DELIMITER_REGEX_PROP, ",").trim();
     inputDateFormatDelimiter = inputDateFormatDelimiter.isEmpty() ? "," : inputDateFormatDelimiter;
     return inputDateFormatDelimiter;
   }
@@ -45,7 +45,7 @@ private String initInputDateFormatDelimiter() {
    * Returns the output date format in which the partition paths will be created for the hudi dataset.
    */
   public String getOutputDateFormat() {
-    return config.getString(Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP);
+    return config.getString(KeyGeneratorOptions.Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP);
   }
 
   /**
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/parser/HoodieDateTimeParserImpl.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/parser/HoodieDateTimeParser.java
similarity index 68%
rename from hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/parser/HoodieDateTimeParserImpl.java
rename to hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/parser/HoodieDateTimeParser.java
index 81960ea168391..c15d484df7a53 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/parser/HoodieDateTimeParserImpl.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/keygen/parser/HoodieDateTimeParser.java
@@ -20,8 +20,8 @@
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.keygen.TimestampBasedAvroKeyGenerator.TimestampType;
-import org.apache.hudi.keygen.TimestampBasedAvroKeyGenerator.Config;
 import org.apache.hudi.keygen.KeyGenUtils;
+import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
 import org.joda.time.DateTimeZone;
 import org.joda.time.format.DateTimeFormat;
 import org.joda.time.format.DateTimeFormatter;
@@ -32,7 +32,7 @@
 import java.util.Collections;
 import java.util.TimeZone;
 
-public class HoodieDateTimeParserImpl extends AbstractHoodieDateTimeParser {
+public class HoodieDateTimeParser extends BaseHoodieDateTimeParser {
 
   private String configInputDateFormatList;
 
@@ -40,15 +40,15 @@ public class HoodieDateTimeParserImpl extends AbstractHoodieDateTimeParser {
   // https://docs.oracle.com/javase/8/docs/api/java/util/TimeZone.html
   private final DateTimeZone inputDateTimeZone;
 
-  public HoodieDateTimeParserImpl(TypedProperties config) {
+  public HoodieDateTimeParser(TypedProperties config) {
     super(config);
-    KeyGenUtils.checkRequiredProperties(config, Arrays.asList(Config.TIMESTAMP_TYPE_FIELD_PROP, Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP));
+    KeyGenUtils.checkRequiredProperties(config, Arrays.asList(KeyGeneratorOptions.Config.TIMESTAMP_TYPE_FIELD_PROP, KeyGeneratorOptions.Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP));
     this.inputDateTimeZone = getInputDateTimeZone();
   }
 
   private DateTimeFormatter getInputDateFormatter() {
     if (this.configInputDateFormatList.isEmpty()) {
-      throw new IllegalArgumentException(Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP + " configuration is required");
+      throw new IllegalArgumentException(KeyGeneratorOptions.Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP + " configuration is required");
     }
 
     DateTimeFormatter formatter = new DateTimeFormatterBuilder()
@@ -72,16 +72,16 @@ private DateTimeFormatter getInputDateFormatter() {
 
   @Override
   public String getOutputDateFormat() {
-    return config.getString(Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP);
+    return config.getString(KeyGeneratorOptions.Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP);
   }
 
   @Override
   public Option<DateTimeFormatter> getInputFormatter() {
-    TimestampType timestampType = TimestampType.valueOf(config.getString(Config.TIMESTAMP_TYPE_FIELD_PROP));
+    TimestampType timestampType = TimestampType.valueOf(config.getString(KeyGeneratorOptions.Config.TIMESTAMP_TYPE_FIELD_PROP));
     if (timestampType == TimestampType.DATE_STRING || timestampType == TimestampType.MIXED) {
       KeyGenUtils.checkRequiredProperties(config,
-          Collections.singletonList(Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP));
-      this.configInputDateFormatList = config.getString(Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP, "");
+          Collections.singletonList(KeyGeneratorOptions.Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP));
+      this.configInputDateFormatList = config.getString(KeyGeneratorOptions.Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP, "");
       return Option.of(getInputDateFormatter());
     }
 
@@ -91,10 +91,10 @@ public Option<DateTimeFormatter> getInputFormatter() {
   @Override
   public DateTimeZone getInputDateTimeZone() {
     String inputTimeZone;
-    if (config.containsKey(Config.TIMESTAMP_TIMEZONE_FORMAT_PROP)) {
-      inputTimeZone = config.getString(Config.TIMESTAMP_TIMEZONE_FORMAT_PROP, "GMT");
+    if (config.containsKey(KeyGeneratorOptions.Config.TIMESTAMP_TIMEZONE_FORMAT_PROP)) {
+      inputTimeZone = config.getString(KeyGeneratorOptions.Config.TIMESTAMP_TIMEZONE_FORMAT_PROP, "GMT");
     } else {
-      inputTimeZone = config.getString(Config.TIMESTAMP_INPUT_TIMEZONE_FORMAT_PROP, "");
+      inputTimeZone = config.getString(KeyGeneratorOptions.Config.TIMESTAMP_INPUT_TIMEZONE_FORMAT_PROP, "");
     }
     return !inputTimeZone.trim().isEmpty() ? DateTimeZone.forTimeZone(TimeZone.getTimeZone(inputTimeZone)) : null;
   }
@@ -102,10 +102,10 @@ public DateTimeZone getInputDateTimeZone() {
   @Override
   public DateTimeZone getOutputDateTimeZone() {
     String outputTimeZone;
-    if (config.containsKey(Config.TIMESTAMP_TIMEZONE_FORMAT_PROP)) {
-      outputTimeZone = config.getString(Config.TIMESTAMP_TIMEZONE_FORMAT_PROP, "GMT");
+    if (config.containsKey(KeyGeneratorOptions.Config.TIMESTAMP_TIMEZONE_FORMAT_PROP)) {
+      outputTimeZone = config.getString(KeyGeneratorOptions.Config.TIMESTAMP_TIMEZONE_FORMAT_PROP, "GMT");
     } else {
-      outputTimeZone = config.getString(Config.TIMESTAMP_OUTPUT_TIMEZONE_FORMAT_PROP, "");
+      outputTimeZone = config.getString(KeyGeneratorOptions.Config.TIMESTAMP_OUTPUT_TIMEZONE_FORMAT_PROP, "");
     }
     return !outputTimeZone.trim().isEmpty() ? DateTimeZone.forTimeZone(TimeZone.getTimeZone(outputTimeZone)) : null;
   }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
index 32f05cbad870c..eee676822a8aa 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
@@ -24,13 +24,14 @@
 import org.apache.hudi.avro.model.HoodieMetadataRecord;
 import org.apache.hudi.avro.model.HoodieRestoreMetadata;
 import org.apache.hudi.avro.model.HoodieRollbackMetadata;
-import org.apache.hudi.client.AbstractHoodieWriteClient;
+import org.apache.hudi.client.BaseHoodieWriteClient;
 import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.config.SerializableConfiguration;
 import org.apache.hudi.common.data.HoodieData;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.fs.ConsistencyGuardConfig;
 import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.FileSlice;
 import org.apache.hudi.common.model.HoodieCleaningPolicy;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
@@ -39,6 +40,7 @@
 import org.apache.hudi.common.model.HoodieLogFile;
 import org.apache.hudi.common.model.HoodiePartitionMetadata;
 import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieRecordLocation;
 import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.model.WriteConcurrencyMode;
 import org.apache.hudi.common.table.HoodieTableConfig;
@@ -50,6 +52,7 @@
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
+import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
 import org.apache.hudi.common.util.HoodieTimer;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.ValidationUtils;
@@ -96,7 +99,7 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
 
   // Virtual keys support for metadata table. This Field is
   // from the metadata payload schema.
-  private static final String RECORD_KEY_FIELD = HoodieMetadataPayload.SCHEMA_FIELD_ID_KEY;
+  private static final String RECORD_KEY_FIELD_NAME = HoodieMetadataPayload.KEY_FIELD_NAME;
 
   protected HoodieWriteConfig metadataWriteConfig;
   protected HoodieWriteConfig dataWriteConfig;
@@ -109,6 +112,8 @@ public abstract class HoodieBackedTableMetadataWriter implements HoodieTableMeta
   protected boolean enabled;
   protected SerializableConfiguration hadoopConf;
   protected final transient HoodieEngineContext engineContext;
+  // TODO: HUDI-3258 Support secondary key via multiple partitions within a single type
+  protected final List<MetadataPartitionType> enabledPartitionTypes;
 
   /**
    * Hudi backed table metadata writer.
@@ -128,6 +133,8 @@ protected <T extends SpecificRecordBase> HoodieBackedTableMetadataWriter(Configu
     this.dataWriteConfig = writeConfig;
     this.engineContext = engineContext;
     this.hadoopConf = new SerializableConfiguration(hadoopConf);
+    this.metrics = Option.empty();
+    this.enabledPartitionTypes = new ArrayList<>();
 
     if (writeConfig.isMetadataTableEnabled()) {
       this.tableName = writeConfig.getTableName() + METADATA_TABLE_NAME_SUFFIX;
@@ -145,22 +152,67 @@ protected <T extends SpecificRecordBase> HoodieBackedTableMetadataWriter(Configu
       ValidationUtils.checkArgument(!this.metadataWriteConfig.isMetadataTableEnabled(),
           "File listing cannot be used for Metadata Table");
 
-      initRegistry();
       this.dataMetaClient =
           HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(dataWriteConfig.getBasePath()).build();
+      enablePartitions();
+      initRegistry();
       initialize(engineContext, actionMetadata, inflightInstantTimestamp);
       initTableMetadata();
     } else {
       enabled = false;
-      this.metrics = Option.empty();
     }
   }
 
   public HoodieBackedTableMetadataWriter(Configuration hadoopConf, HoodieWriteConfig writeConfig,
-      HoodieEngineContext engineContext) {
+                                         HoodieEngineContext engineContext) {
     this(hadoopConf, writeConfig, engineContext, Option.empty(), Option.empty());
   }
 
+  /**
+   * Enable metadata table partitions based on config.
+   */
+  private void enablePartitions() {
+    final HoodieMetadataConfig metadataConfig = dataWriteConfig.getMetadataConfig();
+    boolean isBootstrapCompleted;
+    Option<HoodieTableMetaClient> metaClient = Option.empty();
+    try {
+      isBootstrapCompleted = dataMetaClient.getFs().exists(new Path(metadataWriteConfig.getBasePath(), HoodieTableMetaClient.METAFOLDER_NAME));
+      if (isBootstrapCompleted) {
+        metaClient = Option.of(HoodieTableMetaClient.builder().setConf(hadoopConf.get())
+            .setBasePath(metadataWriteConfig.getBasePath()).build());
+      }
+    } catch (IOException e) {
+      throw new HoodieException("Failed to enable metadata partitions!", e);
+    }
+
+    Option<HoodieTableFileSystemView> fsView = Option.ofNullable(
+        metaClient.isPresent() ? HoodieTableMetadataUtil.getFileSystemView(metaClient.get()) : null);
+    enablePartition(MetadataPartitionType.FILES, metadataConfig, metaClient, fsView, isBootstrapCompleted);
+    if (metadataConfig.isBloomFilterIndexEnabled()) {
+      enablePartition(MetadataPartitionType.BLOOM_FILTERS, metadataConfig, metaClient, fsView, isBootstrapCompleted);
+    }
+    if (metadataConfig.isColumnStatsIndexEnabled()) {
+      enablePartition(MetadataPartitionType.COLUMN_STATS, metadataConfig, metaClient, fsView, isBootstrapCompleted);
+    }
+  }
+
+  /**
+   * Enable metadata table partition.
+   *
+   * @param partitionType        - Metadata table partition type
+   * @param metadataConfig       - Table config
+   * @param metaClient           - Meta client for the metadata table
+   * @param fsView               - Metadata table filesystem view to use
+   * @param isBootstrapCompleted - Is metadata table bootstrap completed
+   */
+  private void enablePartition(final MetadataPartitionType partitionType, final HoodieMetadataConfig metadataConfig,
+                               final Option<HoodieTableMetaClient> metaClient, Option<HoodieTableFileSystemView> fsView, boolean isBootstrapCompleted) {
+    final int fileGroupCount = HoodieTableMetadataUtil.getPartitionFileGroupCount(partitionType, metaClient, fsView,
+        metadataConfig, isBootstrapCompleted);
+    partitionType.setFileGroupCount(fileGroupCount);
+    this.enabledPartitionTypes.add(partitionType);
+  }
+
   protected abstract void initRegistry();
 
   /**
@@ -217,8 +269,8 @@ private HoodieWriteConfig createMetadataWriteConfig(HoodieWriteConfig writeConfi
 
     // RecordKey properties are needed for the metadata table records
     final Properties properties = new Properties();
-    properties.put(HoodieTableConfig.RECORDKEY_FIELDS.key(), RECORD_KEY_FIELD);
-    properties.put("hoodie.datasource.write.recordkey.field", RECORD_KEY_FIELD);
+    properties.put(HoodieTableConfig.RECORDKEY_FIELDS.key(), RECORD_KEY_FIELD_NAME);
+    properties.put("hoodie.datasource.write.recordkey.field", RECORD_KEY_FIELD_NAME);
     builder.withProperties(properties);
 
     if (writeConfig.isMetricsOn()) {
@@ -257,10 +309,14 @@ public HoodieWriteConfig getWriteConfig() {
     return metadataWriteConfig;
   }
 
-  public HoodieBackedTableMetadata metadata() {
+  public HoodieBackedTableMetadata getTableMetadata() {
     return metadata;
   }
 
+  public List<MetadataPartitionType> getEnabledPartitionTypes() {
+    return this.enabledPartitionTypes;
+  }
+
   /**
    * Initialize the metadata table if it does not exist.
    *
@@ -454,13 +510,13 @@ private boolean bootstrapFromFilesystem(HoodieEngineContext engineContext, Hoodi
         .setArchiveLogFolder(ARCHIVELOG_FOLDER.defaultValue())
         .setPayloadClassName(HoodieMetadataPayload.class.getName())
         .setBaseFileFormat(HoodieFileFormat.HFILE.toString())
-        .setRecordKeyFields(RECORD_KEY_FIELD)
+        .setRecordKeyFields(RECORD_KEY_FIELD_NAME)
         .setPopulateMetaFields(dataWriteConfig.getMetadataConfig().populateMetaFields())
         .setKeyGeneratorClassProp(HoodieTableMetadataKeyGenerator.class.getCanonicalName())
         .initTable(hadoopConf.get(), metadataWriteConfig.getBasePath());
 
     initTableMetadata();
-    initializeFileGroups(dataMetaClient, MetadataPartitionType.FILES, createInstantTime, 1);
+    initializeEnabledFileGroups(dataMetaClient, createInstantTime);
 
     // List all partitions in the basePath of the containing dataset
     LOG.info("Initializing metadata table by using file listings in " + dataWriteConfig.getBasePath());
@@ -529,13 +585,27 @@ private List<DirectoryInfo> listAllPartitions(HoodieTableMetaClient datasetMetaC
     return partitionsToBootstrap;
   }
 
+  /**
+   * Initialize file groups for all the enabled partition types.
+   *
+   * @param dataMetaClient    - Meta client for the data table
+   * @param createInstantTime - Metadata table create instant time
+   * @throws IOException
+   */
+  private void initializeEnabledFileGroups(HoodieTableMetaClient dataMetaClient, String createInstantTime) throws IOException {
+    for (MetadataPartitionType enabledPartitionType : this.enabledPartitionTypes) {
+      initializeFileGroups(dataMetaClient, enabledPartitionType, createInstantTime,
+          enabledPartitionType.getFileGroupCount());
+    }
+  }
+
   /**
    * Initialize file groups for a partition. For file listing, we just have one file group.
    *
    * All FileGroups for a given metadata partition has a fixed prefix as per the {@link MetadataPartitionType#getFileIdPrefix()}.
    * Each file group is suffixed with 4 digits with increments of 1 starting with 0000.
    *
-   * Lets say we configure 10 file groups for record level index partittion, and prefix as "record-index-bucket-"
+   * Lets say we configure 10 file groups for record level index partition, and prefix as "record-index-bucket-"
    * File groups will be named as :
    *    record-index-bucket-0000, .... -> ..., record-index-bucket-0009
    */
@@ -550,12 +620,12 @@ private void initializeFileGroups(HoodieTableMetaClient dataMetaClient, Metadata
     final HoodieDeleteBlock block = new HoodieDeleteBlock(new HoodieKey[0], blockHeader);
 
     LOG.info(String.format("Creating %d file groups for partition %s with base fileId %s at instant time %s",
-        fileGroupCount, metadataPartition.partitionPath(), metadataPartition.getFileIdPrefix(), instantTime));
+        fileGroupCount, metadataPartition.getPartitionPath(), metadataPartition.getFileIdPrefix(), instantTime));
     for (int i = 0; i < fileGroupCount; ++i) {
       final String fileGroupFileId = String.format("%s%04d", metadataPartition.getFileIdPrefix(), i);
       try {
         HoodieLogFormat.Writer writer = HoodieLogFormat.newWriterBuilder()
-            .onParentPath(FSUtils.getPartitionPath(metadataWriteConfig.getBasePath(), metadataPartition.partitionPath()))
+            .onParentPath(FSUtils.getPartitionPath(metadataWriteConfig.getBasePath(), metadataPartition.getPartitionPath()))
             .withFileId(fileGroupFileId).overBaseCommit(instantTime)
             .withLogVersion(HoodieLogFile.LOGFILE_BASE_VERSION)
             .withFileSize(0L)
@@ -567,7 +637,7 @@ private void initializeFileGroups(HoodieTableMetaClient dataMetaClient, Metadata
         writer.appendBlock(block);
         writer.close();
       } catch (InterruptedException e) {
-        throw new HoodieException("Failed to created fileGroup " + fileGroupFileId + " for partition " + metadataPartition.partitionPath(), e);
+        throw new HoodieException("Failed to created fileGroup " + fileGroupFileId + " for partition " + metadataPartition.getPartitionPath(), e);
       }
     }
   }
@@ -577,7 +647,7 @@ private void initializeFileGroups(HoodieTableMetaClient dataMetaClient, Metadata
    * Updates of different commit metadata uses the same method to convert to HoodieRecords and hence.
    */
   private interface ConvertMetadataFunction {
-    List<HoodieRecord> convertMetadata();
+    Map<MetadataPartitionType, HoodieData<HoodieRecord>> convertMetadata();
   }
 
   /**
@@ -589,8 +659,8 @@ private interface ConvertMetadataFunction {
    */
   private <T> void processAndCommit(String instantTime, ConvertMetadataFunction convertMetadataFunction, boolean canTriggerTableService) {
     if (enabled && metadata != null) {
-      List<HoodieRecord> records = convertMetadataFunction.convertMetadata();
-      commit(engineContext.parallelize(records, 1), MetadataPartitionType.FILES.partitionPath(), instantTime, canTriggerTableService);
+      Map<MetadataPartitionType, HoodieData<HoodieRecord>> partitionRecordsMap = convertMetadataFunction.convertMetadata();
+      commit(instantTime, partitionRecordsMap, canTriggerTableService);
     }
   }
 
@@ -602,7 +672,8 @@ private <T> void processAndCommit(String instantTime, ConvertMetadataFunction co
    */
   @Override
   public void update(HoodieCommitMetadata commitMetadata, String instantTime, boolean isTableServiceAction) {
-    processAndCommit(instantTime, () -> HoodieTableMetadataUtil.convertMetadataToRecords(commitMetadata, instantTime), !isTableServiceAction);
+    processAndCommit(instantTime, () -> HoodieTableMetadataUtil.convertMetadataToRecords(engineContext, enabledPartitionTypes,
+        commitMetadata, dataMetaClient, dataWriteConfig.isMetadataIndexColumnStatsForAllColumnsEnabled(), instantTime), !isTableServiceAction);
   }
 
   /**
@@ -613,8 +684,8 @@ public void update(HoodieCommitMetadata commitMetadata, String instantTime, bool
    */
   @Override
   public void update(HoodieCleanMetadata cleanMetadata, String instantTime) {
-    processAndCommit(instantTime, () -> HoodieTableMetadataUtil.convertMetadataToRecords(cleanMetadata, instantTime),
-        false);
+    processAndCommit(instantTime, () -> HoodieTableMetadataUtil.convertMetadataToRecords(engineContext, enabledPartitionTypes,
+        cleanMetadata, dataMetaClient, instantTime), false);
   }
 
   /**
@@ -625,8 +696,9 @@ public void update(HoodieCleanMetadata cleanMetadata, String instantTime) {
    */
   @Override
   public void update(HoodieRestoreMetadata restoreMetadata, String instantTime) {
-    processAndCommit(instantTime, () -> HoodieTableMetadataUtil.convertMetadataToRecords(metadataMetaClient.getActiveTimeline(),
-        restoreMetadata, instantTime, metadata.getSyncedInstantTime()), false);
+    processAndCommit(instantTime, () -> HoodieTableMetadataUtil.convertMetadataToRecords(engineContext,
+        enabledPartitionTypes, metadataMetaClient.getActiveTimeline(), restoreMetadata, dataMetaClient, instantTime,
+        metadata.getSyncedInstantTime()), false);
   }
 
   /**
@@ -650,9 +722,11 @@ public void update(HoodieRollbackMetadata rollbackMetadata, String instantTime)
         }
       }
 
-      List<HoodieRecord> records = HoodieTableMetadataUtil.convertMetadataToRecords(metadataMetaClient.getActiveTimeline(), rollbackMetadata, instantTime,
-          metadata.getSyncedInstantTime(), wasSynced);
-      commit(engineContext.parallelize(records, 1), MetadataPartitionType.FILES.partitionPath(), instantTime, false);
+      Map<MetadataPartitionType, HoodieData<HoodieRecord>> records =
+          HoodieTableMetadataUtil.convertMetadataToRecords(engineContext, enabledPartitionTypes,
+              metadataMetaClient.getActiveTimeline(), rollbackMetadata, dataMetaClient, instantTime,
+              metadata.getSyncedInstantTime(), wasSynced);
+      commit(instantTime, records, false);
     }
   }
 
@@ -665,12 +739,47 @@ public void close() throws Exception {
 
   /**
    * Commit the {@code HoodieRecord}s to Metadata Table as a new delta-commit.
-   *  @param records The HoodieData of records to be written.
-   * @param partitionName The partition to which the records are to be written.
-   * @param instantTime The timestamp to use for the deltacommit.
+   *
+   * @param instantTime            - Action instant time for this commit
+   * @param partitionRecordsMap    - Map of partition name to its records to commit
    * @param canTriggerTableService true if table services can be scheduled and executed. false otherwise.
    */
-  protected abstract void commit(HoodieData<HoodieRecord> records, String partitionName, String instantTime, boolean canTriggerTableService);
+  protected abstract void commit(
+      String instantTime, Map<MetadataPartitionType, HoodieData<HoodieRecord>> partitionRecordsMap,
+      boolean canTriggerTableService);
+
+  /**
+   * Tag each record with the location in the given partition.
+   * The record is tagged with respective file slice's location based on its record key.
+   */
+  protected HoodieData<HoodieRecord> prepRecords(Map<MetadataPartitionType,
+      HoodieData<HoodieRecord>> partitionRecordsMap) {
+    // The result set
+    HoodieData<HoodieRecord> allPartitionRecords = engineContext.emptyHoodieData();
+
+    HoodieTableFileSystemView fsView = HoodieTableMetadataUtil.getFileSystemView(metadataMetaClient);
+    for (Map.Entry<MetadataPartitionType, HoodieData<HoodieRecord>> entry : partitionRecordsMap.entrySet()) {
+      final String partitionName = entry.getKey().getPartitionPath();
+      final int fileGroupCount = entry.getKey().getFileGroupCount();
+      HoodieData<HoodieRecord> records = entry.getValue();
+
+      List<FileSlice> fileSlices =
+          HoodieTableMetadataUtil.getPartitionLatestFileSlices(metadataMetaClient, Option.ofNullable(fsView), partitionName);
+      ValidationUtils.checkArgument(fileSlices.size() == fileGroupCount,
+          String.format("Invalid number of file groups for partition:%s, found=%d, required=%d",
+              partitionName, fileSlices.size(), fileGroupCount));
+
+      HoodieData<HoodieRecord> rddSinglePartitionRecords = records.map(r -> {
+        FileSlice slice = fileSlices.get(HoodieTableMetadataUtil.mapRecordKeyToFileGroupIndex(r.getRecordKey(),
+            fileGroupCount));
+        r.setCurrentLocation(new HoodieRecordLocation(slice.getBaseInstantTime(), slice.getFileId()));
+        return r;
+      });
+
+      allPartitionRecords = allPartitionRecords.union(rddSinglePartitionRecords);
+    }
+    return allPartitionRecords;
+  }
 
   /**
    *  Perform a compaction on the Metadata Table.
@@ -682,7 +791,7 @@ public void close() throws Exception {
    *   2. In multi-writer scenario, a parallel operation with a greater instantTime may have completed creating a
    *      deltacommit.
    */
-  protected void compactIfNecessary(AbstractHoodieWriteClient writeClient, String instantTime) {
+  protected void compactIfNecessary(BaseHoodieWriteClient writeClient, String instantTime) {
     // finish off any pending compactions if any from previous attempt.
     writeClient.runAnyPendingCompactions();
 
@@ -706,7 +815,7 @@ protected void compactIfNecessary(AbstractHoodieWriteClient writeClient, String
     }
   }
 
-  protected void cleanIfNecessary(AbstractHoodieWriteClient writeClient, String instantTime) {
+  protected void cleanIfNecessary(BaseHoodieWriteClient writeClient, String instantTime) {
     Option<HoodieInstant> lastCompletedCompactionInstant = metadataMetaClient.reloadActiveTimeline()
         .getCommitTimeline().filterCompletedInstants().lastInstant();
     if (lastCompletedCompactionInstant.isPresent()
@@ -735,14 +844,19 @@ protected void bootstrapCommit(List<DirectoryInfo> partitionInfoList, String cre
     List<String> partitions = partitionInfoList.stream().map(p ->
         p.getRelativePath().isEmpty() ? NON_PARTITIONED_NAME : p.getRelativePath()).collect(Collectors.toList());
     final int totalFiles = partitionInfoList.stream().mapToInt(p -> p.getTotalFiles()).sum();
+    final Map<MetadataPartitionType, HoodieData<HoodieRecord>> partitionToRecordsMap = new HashMap<>();
 
     // Record which saves the list of all partitions
     HoodieRecord allPartitionRecord = HoodieMetadataPayload.createPartitionListRecord(partitions);
     if (partitions.isEmpty()) {
-      // in case of boostrapping of a fresh table, there won't be any partitions, but we need to make a boostrap commit
-      commit(engineContext.parallelize(Collections.singletonList(allPartitionRecord), 1), MetadataPartitionType.FILES.partitionPath(), createInstantTime, false);
+      // in case of bootstrapping of a fresh table, there won't be any partitions, but we need to make a boostrap commit
+      final HoodieData<HoodieRecord> allPartitionRecordsRDD = engineContext.parallelize(
+          Collections.singletonList(allPartitionRecord), 1);
+      partitionToRecordsMap.put(MetadataPartitionType.FILES, allPartitionRecordsRDD);
+      commit(createInstantTime, partitionToRecordsMap, false);
       return;
     }
+
     HoodieData<HoodieRecord> partitionRecords = engineContext.parallelize(Arrays.asList(allPartitionRecord), 1);
     if (!partitionInfoList.isEmpty()) {
       HoodieData<HoodieRecord> fileListRecords = engineContext.parallelize(partitionInfoList, partitionInfoList.size()).map(partitionInfo -> {
@@ -762,7 +876,8 @@ protected void bootstrapCommit(List<DirectoryInfo> partitionInfoList, String cre
 
     LOG.info("Committing " + partitions.size() + " partitions and " + totalFiles + " files to metadata");
     ValidationUtils.checkState(partitionRecords.count() == (partitions.size() + 1));
-    commit(partitionRecords, MetadataPartitionType.FILES.partitionPath(), createInstantTime, false);
+    partitionToRecordsMap.put(MetadataPartitionType.FILES, partitionRecords);
+    commit(createInstantTime, partitionToRecordsMap, false);
   }
 
   /**
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataKeyGenerator.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataKeyGenerator.java
index 4ec143bf06789..332be73b14f57 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataKeyGenerator.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataKeyGenerator.java
@@ -42,7 +42,7 @@ public HoodieTableMetadataKeyGenerator(TypedProperties config) {
 
   @Override
   public String getRecordKey(GenericRecord record) {
-    return KeyGenUtils.getRecordKey(record, HoodieMetadataPayload.SCHEMA_FIELD_ID_KEY, isConsistentLogicalTimestampEnabled());
+    return KeyGenUtils.getRecordKey(record, HoodieMetadataPayload.KEY_FIELD_NAME, isConsistentLogicalTimestampEnabled());
   }
 
   @Override
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/HoodieMetrics.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/HoodieMetrics.java
index e874047b8c644..d13110feef228 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/HoodieMetrics.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/HoodieMetrics.java
@@ -130,6 +130,26 @@ public Timer.Context getIndexCtx() {
     return indexTimer == null ? null : indexTimer.time();
   }
 
+  public void updateMetricsForEmptyData(String actionType) {
+    if (!config.isMetricsOn() || !config.getMetricsReporterType().equals(MetricsReporterType.PROMETHEUS_PUSHGATEWAY)) {
+      // No-op if metrics are not of type PROMETHEUS_PUSHGATEWAY.
+      return;
+    }
+    Metrics.registerGauge(getMetricsName(actionType, "totalPartitionsWritten"), 0);
+    Metrics.registerGauge(getMetricsName(actionType, "totalFilesInsert"), 0);
+    Metrics.registerGauge(getMetricsName(actionType, "totalFilesUpdate"), 0);
+    Metrics.registerGauge(getMetricsName(actionType, "totalRecordsWritten"), 0);
+    Metrics.registerGauge(getMetricsName(actionType, "totalUpdateRecordsWritten"), 0);
+    Metrics.registerGauge(getMetricsName(actionType, "totalInsertRecordsWritten"), 0);
+    Metrics.registerGauge(getMetricsName(actionType, "totalBytesWritten"), 0);
+    Metrics.registerGauge(getMetricsName(actionType, "totalScanTime"), 0);
+    Metrics.registerGauge(getMetricsName(actionType, "totalCreateTime"), 0);
+    Metrics.registerGauge(getMetricsName(actionType, "totalUpsertTime"), 0);
+    Metrics.registerGauge(getMetricsName(actionType, "totalCompactedRecordsUpdated"), 0);
+    Metrics.registerGauge(getMetricsName(actionType, "totalLogFilesCompacted"), 0);
+    Metrics.registerGauge(getMetricsName(actionType, "totalLogFilesSize"), 0);
+  }
+
   public void updateCommitMetrics(long commitEpochTimeInMs, long durationInMs, HoodieCommitMetadata metadata,
       String actionType) {
     updateCommitTimingMetrics(commitEpochTimeInMs, durationInMs, metadata, actionType);
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/MetricsReporterFactory.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/MetricsReporterFactory.java
index dc9e80431b8d7..d81e337b28d7a 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/MetricsReporterFactory.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/MetricsReporterFactory.java
@@ -23,12 +23,12 @@
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.metrics.cloudwatch.CloudWatchMetricsReporter;
+import org.apache.hudi.metrics.custom.CustomizableMetricsReporter;
 import org.apache.hudi.metrics.datadog.DatadogMetricsReporter;
-
-import com.codahale.metrics.MetricRegistry;
 import org.apache.hudi.metrics.prometheus.PrometheusReporter;
 import org.apache.hudi.metrics.prometheus.PushGatewayMetricsReporter;
-import org.apache.hudi.metrics.userdefined.AbstractUserDefinedMetricsReporter;
+
+import com.codahale.metrics.MetricRegistry;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
@@ -47,9 +47,9 @@ public static MetricsReporter createReporter(HoodieWriteConfig config, MetricReg
     if (!StringUtils.isNullOrEmpty(reporterClassName)) {
       Object instance = ReflectionUtils.loadClass(
           reporterClassName, new Class<?>[] {Properties.class, MetricRegistry.class}, config.getProps(), registry);
-      if (!(instance instanceof AbstractUserDefinedMetricsReporter)) {
+      if (!(instance instanceof CustomizableMetricsReporter)) {
         throw new HoodieException(config.getMetricReporterClassName()
-            + " is not a subclass of AbstractUserDefinedMetricsReporter");
+            + " is not a subclass of CustomizableMetricsReporter");
       }
       return (MetricsReporter) instance;
     }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/custom/CustomizableMetricsReporter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/custom/CustomizableMetricsReporter.java
new file mode 100644
index 0000000000000..13574b1e15693
--- /dev/null
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/custom/CustomizableMetricsReporter.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.metrics.custom;
+
+import org.apache.hudi.metrics.MetricsReporter;
+
+import com.codahale.metrics.MetricRegistry;
+
+import java.util.Properties;
+
+/**
+ * Extensible metrics reporter for custom implementation.
+ */
+public abstract class CustomizableMetricsReporter extends MetricsReporter {
+  private Properties props;
+  private MetricRegistry registry;
+
+  public CustomizableMetricsReporter(Properties props, MetricRegistry registry) {
+    this.props = props;
+    this.registry = registry;
+  }
+
+  public Properties getProps() {
+    return props;
+  }
+
+  public MetricRegistry getRegistry() {
+    return registry;
+  }
+}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/userdefined/AbstractUserDefinedMetricsReporter.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/userdefined/AbstractUserDefinedMetricsReporter.java
index 0a0d7bbe123a6..715b9564c5f70 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/userdefined/AbstractUserDefinedMetricsReporter.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metrics/userdefined/AbstractUserDefinedMetricsReporter.java
@@ -7,38 +7,31 @@
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
- *      http://www.apache.org/licenses/LICENSE-2.0
+ *   http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
  */
 
 package org.apache.hudi.metrics.userdefined;
 
+import org.apache.hudi.metrics.custom.CustomizableMetricsReporter;
+
 import com.codahale.metrics.MetricRegistry;
-import org.apache.hudi.metrics.MetricsReporter;
+
 import java.util.Properties;
 
 /**
- * Abstract class of user defined metrics reporter.
+ * @deprecated Extend {@link CustomizableMetricsReporter} instead.
  */
-public abstract class AbstractUserDefinedMetricsReporter extends MetricsReporter {
-  private Properties props;
-  private MetricRegistry registry;
+@Deprecated
+public abstract class AbstractUserDefinedMetricsReporter extends CustomizableMetricsReporter {
 
   public AbstractUserDefinedMetricsReporter(Properties props, MetricRegistry registry) {
-    this.props = props;
-    this.registry = registry;
-  }
-
-  public Properties getProps() {
-    return props;
-  }
-
-  public MetricRegistry getRegistry() {
-    return registry;
+    super(props, registry);
   }
-}
\ No newline at end of file
+}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java
index 40e3a316db107..bb4ae962038fe 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java
@@ -18,13 +18,13 @@
 
 package org.apache.hudi.table;
 
-import org.apache.avro.specific.SpecificRecordBase;
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.avro.model.HoodieCleanMetadata;
 import org.apache.hudi.avro.model.HoodieCleanerPlan;
 import org.apache.hudi.avro.model.HoodieClusteringPlan;
 import org.apache.hudi.avro.model.HoodieCompactionPlan;
 import org.apache.hudi.avro.model.HoodieRestoreMetadata;
+import org.apache.hudi.avro.model.HoodieRestorePlan;
 import org.apache.hudi.avro.model.HoodieRollbackMetadata;
 import org.apache.hudi.avro.model.HoodieRollbackPlan;
 import org.apache.hudi.avro.model.HoodieSavepointMetadata;
@@ -44,7 +44,6 @@
 import org.apache.hudi.common.model.HoodieWriteStat;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.TableSchemaResolver;
-import org.apache.hudi.common.table.log.block.HoodieLogBlock.HoodieLogBlockType;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -55,6 +54,7 @@
 import org.apache.hudi.common.table.view.TableFileSystemView;
 import org.apache.hudi.common.table.view.TableFileSystemView.BaseFileOnlyView;
 import org.apache.hudi.common.table.view.TableFileSystemView.SliceView;
+import org.apache.hudi.common.util.Functions;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.config.HoodieWriteConfig;
@@ -69,17 +69,19 @@
 import org.apache.hudi.table.action.bootstrap.HoodieBootstrapWriteMetadata;
 import org.apache.hudi.table.marker.WriteMarkers;
 import org.apache.hudi.table.marker.WriteMarkersFactory;
+import org.apache.hudi.table.storage.HoodieLayoutFactory;
+import org.apache.hudi.table.storage.HoodieStorageLayout;
 
 import org.apache.avro.Schema;
+import org.apache.avro.specific.SpecificRecordBase;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hudi.table.storage.HoodieLayoutFactory;
-import org.apache.hudi.table.storage.HoodieStorageLayout;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
 import javax.annotation.Nonnull;
+
 import java.io.IOException;
 import java.io.Serializable;
 import java.util.ArrayList;
@@ -104,7 +106,7 @@ public abstract class HoodieTable<T extends HoodieRecordPayload, I, K, O> implem
 
   protected final HoodieWriteConfig config;
   protected final HoodieTableMetaClient metaClient;
-  protected final HoodieIndex<T, ?, ?, ?> index;
+  protected final HoodieIndex<?, ?> index;
   private SerializableConfiguration hadoopConfiguration;
   protected final TaskContextSupplier taskContextSupplier;
   private final HoodieTableMetadata metadata;
@@ -130,7 +132,7 @@ protected HoodieTable(HoodieWriteConfig config, HoodieEngineContext context, Hoo
     this.taskContextSupplier = context.getTaskContextSupplier();
   }
 
-  protected abstract HoodieIndex<T, ?, ?, ?> getIndex(HoodieWriteConfig config, HoodieEngineContext context);
+  protected abstract HoodieIndex<?, ?> getIndex(HoodieWriteConfig config, HoodieEngineContext context);
 
   protected HoodieStorageLayout getStorageLayout(HoodieWriteConfig config) {
     return HoodieLayoutFactory.createLayout(config);
@@ -143,6 +145,10 @@ private synchronized FileSystemViewManager getViewManager() {
     return viewManager;
   }
 
+  public HoodieTableMetadata getMetadata() {
+    return metadata;
+  }
+
   /**
    * Upsert a batch of new records into Hoodie table at the supplied instantTime.
    * @param context    HoodieEngineContext
@@ -348,6 +354,13 @@ public HoodieTimeline getRollbackTimeline() {
     return getActiveTimeline().getRollbackTimeline();
   }
 
+  /**
+   * Get restore timeline.
+   */
+  public HoodieTimeline getRestoreTimeline() {
+    return getActiveTimeline().getRestoreTimeline();
+  }
+
   /**
    * Get only the completed (no-inflights) savepoint timeline.
    */
@@ -369,7 +382,7 @@ public HoodieActiveTimeline getActiveTimeline() {
   /**
    * Return the index.
    */
-  public HoodieIndex<T, ?, ?, ?> getIndex() {
+  public HoodieIndex<?, ?> getIndex() {
     return index;
   }
 
@@ -499,6 +512,13 @@ public abstract HoodieRestoreMetadata restore(HoodieEngineContext context,
                                                 String restoreInstantTime,
                                                 String instantToRestore);
 
+  /**
+   * Schedules Restore for the table to the given instant.
+   */
+  public abstract Option<HoodieRestorePlan> scheduleRestore(HoodieEngineContext context,
+                                                    String restoreInstantTime,
+                                                    String instantToRestore);
+
   /**
    * Rollback failed compactions. Inflight rollbacks for compactions revert the .inflight file
    * to the .requested file.
@@ -681,9 +701,9 @@ private void validateSchema() throws HoodieUpsertException, HoodieInsertExceptio
     Schema writerSchema;
     boolean isValid;
     try {
-      TableSchemaResolver schemaUtil = new TableSchemaResolver(getMetaClient());
+      TableSchemaResolver schemaResolver = new TableSchemaResolver(getMetaClient());
       writerSchema = HoodieAvroUtils.createHoodieWriteSchema(config.getSchema());
-      tableSchema = HoodieAvroUtils.createHoodieWriteSchema(schemaUtil.getTableAvroSchemaWithoutMetadataFields());
+      tableSchema = HoodieAvroUtils.createHoodieWriteSchema(schemaResolver.getTableAvroSchemaWithoutMetadataFields());
       isValid = TableSchemaResolver.isSchemaCompatible(tableSchema, writerSchema);
     } catch (Exception e) {
       throw new HoodieException("Failed to read schema/check compatibility for base path " + metaClient.getBasePath(), e);
@@ -719,19 +739,6 @@ public HoodieFileFormat getLogFileFormat() {
     return metaClient.getTableConfig().getLogFileFormat();
   }
 
-  public HoodieLogBlockType getLogDataBlockFormat() {
-    switch (getBaseFileFormat()) {
-      case PARQUET:
-      case ORC:
-        return HoodieLogBlockType.AVRO_DATA_BLOCK;
-      case HFILE:
-        return HoodieLogBlockType.HFILE_DATA_BLOCK;
-      default:
-        throw new HoodieException("Base file format " + getBaseFileFormat()
-            + " does not have associated log block format");
-    }
-  }
-
   public String getBaseFileExtension() {
     return getBaseFileFormat().getFileExtension();
   }
@@ -776,11 +783,18 @@ public final Option<HoodieTableMetadataWriter> getMetadataWriter(String triggeri
    * @param triggeringInstantTimestamp - The instant that is triggering this metadata write
    * @return instance of {@link HoodieTableMetadataWriter}
    */
-  public <T extends SpecificRecordBase> Option<HoodieTableMetadataWriter> getMetadataWriter(String triggeringInstantTimestamp,
-                                                                                            Option<T> actionMetadata) {
+  public <R extends SpecificRecordBase> Option<HoodieTableMetadataWriter> getMetadataWriter(String triggeringInstantTimestamp,
+                                                                                            Option<R> actionMetadata) {
     // Each engine is expected to override this and
     // provide the actual metadata writer, if enabled.
     return Option.empty();
   }
 
+  public HoodieTableMetadata getMetadataTable() {
+    return this.metadata;
+  }
+
+  public Runnable getPreExecuteRunnable() {
+    return Functions.noop();
+  }
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/WorkloadProfile.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/WorkloadProfile.java
index 7700e95d1d707..8e6160b095483 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/WorkloadProfile.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/WorkloadProfile.java
@@ -33,9 +33,14 @@
 public class WorkloadProfile implements Serializable {
 
   /**
-   * Computed workload profile.
+   * Computed workload stats.
    */
-  protected final HashMap<String, WorkloadStat> partitionPathStatMap;
+  protected final HashMap<String, WorkloadStat> inputPartitionPathStatMap;
+
+  /**
+   * Execution/Output workload stats
+   */
+  protected final HashMap<String, WorkloadStat> outputPartitionPathStatMap;
 
   /**
    * Global workloadStat.
@@ -47,13 +52,21 @@ public class WorkloadProfile implements Serializable {
    */
   private WriteOperationType operationType;
 
+  private final boolean hasOutputWorkLoadStats;
+
   public WorkloadProfile(Pair<HashMap<String, WorkloadStat>, WorkloadStat> profile) {
-    this.partitionPathStatMap = profile.getLeft();
+    this(profile, false);
+  }
+
+  public WorkloadProfile(Pair<HashMap<String, WorkloadStat>, WorkloadStat> profile, boolean hasOutputWorkLoadStats) {
+    this.inputPartitionPathStatMap = profile.getLeft();
     this.globalStat = profile.getRight();
+    this.outputPartitionPathStatMap = new HashMap<>();
+    this.hasOutputWorkLoadStats = hasOutputWorkLoadStats;
   }
 
-  public WorkloadProfile(Pair<HashMap<String, WorkloadStat>, WorkloadStat> profile, WriteOperationType operationType) {
-    this(profile);
+  public WorkloadProfile(Pair<HashMap<String, WorkloadStat>, WorkloadStat> profile, WriteOperationType operationType, boolean hasOutputWorkLoadStats) {
+    this(profile, hasOutputWorkLoadStats);
     this.operationType = operationType;
   }
 
@@ -62,15 +75,37 @@ public WorkloadStat getGlobalStat() {
   }
 
   public Set<String> getPartitionPaths() {
-    return partitionPathStatMap.keySet();
+    return inputPartitionPathStatMap.keySet();
+  }
+
+  public Set<String> getOutputPartitionPaths() {
+    return hasOutputWorkLoadStats ? outputPartitionPathStatMap.keySet() : inputPartitionPathStatMap.keySet();
   }
 
-  public HashMap<String, WorkloadStat> getPartitionPathStatMap() {
-    return partitionPathStatMap;
+  public HashMap<String, WorkloadStat> getInputPartitionPathStatMap() {
+    return inputPartitionPathStatMap;
+  }
+
+  public HashMap<String, WorkloadStat> getOutputPartitionPathStatMap() {
+    return outputPartitionPathStatMap;
+  }
+
+  public boolean hasOutputWorkLoadStats() {
+    return hasOutputWorkLoadStats;
+  }
+
+  public void updateOutputPartitionPathStatMap(String partitionPath, WorkloadStat workloadStat) {
+    if (hasOutputWorkLoadStats) {
+      outputPartitionPathStatMap.put(partitionPath, workloadStat);
+    }
   }
 
   public WorkloadStat getWorkloadStat(String partitionPath) {
-    return partitionPathStatMap.get(partitionPath);
+    return inputPartitionPathStatMap.get(partitionPath);
+  }
+
+  public WorkloadStat getOutputWorkloadStat(String partitionPath) {
+    return hasOutputWorkLoadStats ? outputPartitionPathStatMap.get(partitionPath) : inputPartitionPathStatMap.get(partitionPath);
   }
 
   public WriteOperationType getOperationType() {
@@ -81,7 +116,8 @@ public WriteOperationType getOperationType() {
   public String toString() {
     final StringBuilder sb = new StringBuilder("WorkloadProfile {");
     sb.append("globalStat=").append(globalStat).append(", ");
-    sb.append("partitionStat=").append(partitionPathStatMap).append(", ");
+    sb.append("InputPartitionStat=").append(inputPartitionPathStatMap).append(", ");
+    sb.append("OutputPartitionStat=").append(outputPartitionPathStatMap).append(", ");
     sb.append("operationType=").append(operationType);
     sb.append('}');
     return sb.toString();
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/WorkloadStat.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/WorkloadStat.java
index c3371bab092db..327a5a3ae7980 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/WorkloadStat.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/WorkloadStat.java
@@ -33,9 +33,12 @@ public class WorkloadStat implements Serializable {
 
   private long numUpdates = 0L;
 
+  private HashMap<String, Pair<String, Long>> insertLocationToCount;
+
   private HashMap<String, Pair<String, Long>> updateLocationToCount;
 
   public WorkloadStat() {
+    insertLocationToCount = new HashMap<>();
     updateLocationToCount = new HashMap<>();
   }
 
@@ -43,6 +46,17 @@ public long addInserts(long numInserts) {
     return this.numInserts += numInserts;
   }
 
+  public long addInserts(HoodieRecordLocation location, long numInserts) {
+    long accNumInserts = 0;
+    if (insertLocationToCount.containsKey(location.getFileId())) {
+      accNumInserts = insertLocationToCount.get(location.getFileId()).getRight();
+    }
+    insertLocationToCount.put(
+        location.getFileId(),
+        Pair.of(location.getInstantTime(), numInserts + accNumInserts));
+    return this.numInserts += numInserts;
+  }
+
   public long addUpdates(HoodieRecordLocation location, long numUpdates) {
     long accNumUpdates = 0;
     if (updateLocationToCount.containsKey(location.getFileId())) {
@@ -66,6 +80,10 @@ public HashMap<String, Pair<String, Long>> getUpdateLocationToCount() {
     return updateLocationToCount;
   }
 
+  public HashMap<String, Pair<String, Long>> getInsertLocationToCount() {
+    return insertLocationToCount;
+  }
+
   @Override
   public String toString() {
     final StringBuilder sb = new StringBuilder("WorkloadStat {");
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/BaseActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/BaseActionExecutor.java
index 221f970cb5132..f893b4ccd5c4e 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/BaseActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/BaseActionExecutor.java
@@ -65,7 +65,7 @@ protected final void writeTableMetadata(HoodieCommitMetadata metadata, String ac
    * Writes clean metadata to table metadata.
    * @param metadata clean metadata of interest.
    */
-  protected final void writeTableMetadata(HoodieCleanMetadata metadata) {
+  protected final void writeTableMetadata(HoodieCleanMetadata metadata, String instantTime) {
     table.getMetadataWriter(instantTime).ifPresent(w -> w.update(metadata, instantTime));
   }
 
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapRecordConsumer.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapRecordConsumer.java
index 4d3cd479de09d..8966a5d51c7cb 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapRecordConsumer.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapRecordConsumer.java
@@ -19,6 +19,7 @@
 package org.apache.hudi.table.action.bootstrap;
 
 import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.util.queue.BoundedInMemoryQueueConsumer;
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.io.HoodieBootstrapHandle;
@@ -39,7 +40,8 @@ public BootstrapRecordConsumer(HoodieBootstrapHandle bootstrapHandle) {
   @Override
   protected void consumeOneRecord(HoodieRecord record) {
     try {
-      bootstrapHandle.write(record, record.getData().getInsertValue(bootstrapHandle.getWriterSchemaWithMetaFields()));
+      bootstrapHandle.write(record, ((HoodieRecordPayload) record.getData())
+          .getInsertValue(bootstrapHandle.getWriterSchemaWithMetaFields()));
     } catch (IOException e) {
       throw new HoodieIOException(e.getMessage(), e);
     }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java
index 9813b2b659677..4ae8009c9a88e 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanActionExecutor.java
@@ -208,7 +208,7 @@ private HoodieCleanMetadata runClean(HoodieTable<T, I, K, O> table, HoodieInstan
       if (!skipLocking) {
         this.txnManager.beginTransaction(Option.empty(), Option.empty());
       }
-      writeTableMetadata(metadata);
+      writeTableMetadata(metadata, inflightInstant.getTimestamp());
       table.getActiveTimeline().transitionCleanInflightToComplete(inflightInstant,
           TimelineMetadataUtils.serializeCleanMetadata(metadata));
       LOG.info("Marked clean started on " + inflightInstant.getTimestamp() + " as complete");
@@ -240,9 +240,13 @@ public HoodieCleanMetadata execute() {
             LOG.warn("Failed to perform previous clean operation, instant: " + hoodieInstant, e);
           }
         }
+        table.getMetaClient().reloadActiveTimeline();
+        if (config.isMetadataTableEnabled()) {
+          table.getHoodieView().sync();
+        }
       });
-      table.getMetaClient().reloadActiveTimeline();
     }
+
     // return the last clean metadata for now
     // TODO (NA) : Clean only the earliest pending clean just like how we do for other table services
     // This requires the CleanActionExecutor to be refactored as BaseCommitActionExecutor
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
index 27937af880c40..7e56d3456a0a4 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/clean/CleanPlanner.java
@@ -33,6 +33,7 @@
 import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
 import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
@@ -50,8 +51,12 @@
 
 import java.io.IOException;
 import java.io.Serializable;
+import java.time.Instant;
+import java.time.ZoneId;
+import java.time.ZonedDateTime;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.Date;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -123,6 +128,7 @@ public Stream<String> getSavepointedDataFiles(String savepointTime) {
   public List<String> getPartitionPathsToClean(Option<HoodieInstant> earliestRetainedInstant) throws IOException {
     switch (config.getCleanerPolicy()) {
       case KEEP_LATEST_COMMITS:
+      case KEEP_LATEST_BY_HOURS:
         return getPartitionPathsForCleanByCommits(earliestRetainedInstant);
       case KEEP_LATEST_FILE_VERSIONS:
         return getPartitionPathsForFullCleaning();
@@ -251,6 +257,10 @@ private List<CleanFileInfo> getFilesToCleanKeepingLatestVersions(String partitio
     return deletePaths;
   }
 
+  private List<CleanFileInfo> getFilesToCleanKeepingLatestCommits(String partitionPath) {
+    return getFilesToCleanKeepingLatestCommits(partitionPath, config.getCleanerCommitsRetained(), HoodieCleaningPolicy.KEEP_LATEST_COMMITS);
+  }
+
   /**
    * Selects the versions for file for cleaning, such that it
    * <p>
@@ -265,8 +275,7 @@ private List<CleanFileInfo> getFilesToCleanKeepingLatestVersions(String partitio
    * <p>
    * This policy is the default.
    */
-  private List<CleanFileInfo> getFilesToCleanKeepingLatestCommits(String partitionPath) {
-    int commitsRetained = config.getCleanerCommitsRetained();
+  private List<CleanFileInfo> getFilesToCleanKeepingLatestCommits(String partitionPath, int commitsRetained, HoodieCleaningPolicy policy) {
     LOG.info("Cleaning " + partitionPath + ", retaining latest " + commitsRetained + " commits. ");
     List<CleanFileInfo> deletePaths = new ArrayList<>();
 
@@ -303,14 +312,24 @@ private List<CleanFileInfo> getFilesToCleanKeepingLatestCommits(String partition
             // do not clean up a savepoint data file
             continue;
           }
-          // Dont delete the latest commit and also the last commit before the earliest commit we
-          // are retaining
-          // The window of commit retain == max query run time. So a query could be running which
-          // still
-          // uses this file.
-          if (fileCommitTime.equals(lastVersion) || (fileCommitTime.equals(lastVersionBeforeEarliestCommitToRetain))) {
-            // move on to the next file
-            continue;
+
+          if (policy == HoodieCleaningPolicy.KEEP_LATEST_COMMITS) {
+            // Dont delete the latest commit and also the last commit before the earliest commit we
+            // are retaining
+            // The window of commit retain == max query run time. So a query could be running which
+            // still
+            // uses this file.
+            if (fileCommitTime.equals(lastVersion) || (fileCommitTime.equals(lastVersionBeforeEarliestCommitToRetain))) {
+              // move on to the next file
+              continue;
+            }
+          } else if (policy == HoodieCleaningPolicy.KEEP_LATEST_BY_HOURS) {
+            // This block corresponds to KEEP_LATEST_BY_HOURS policy
+            // Do not delete the latest commit.
+            if (fileCommitTime.equals(lastVersion)) {
+              // move on to the next file
+              continue;
+            }
           }
 
           // Always keep the last commit
@@ -334,6 +353,18 @@ private List<CleanFileInfo> getFilesToCleanKeepingLatestCommits(String partition
     }
     return deletePaths;
   }
+
+  /**
+   * This method finds the files to be cleaned based on the number of hours. If {@code config.getCleanerHoursRetained()} is set to 5,
+   * all the files with commit time earlier than 5 hours will be removed. Also the latest file for any file group is retained.
+   * This policy gives much more flexibility to users for retaining data for running incremental queries as compared to
+   * KEEP_LATEST_COMMITS cleaning policy. The default number of hours is 5.
+   * @param partitionPath partition path to check
+   * @return list of files to clean
+   */
+  private List<CleanFileInfo> getFilesToCleanKeepingLatestHours(String partitionPath) {
+    return getFilesToCleanKeepingLatestCommits(partitionPath, 0, HoodieCleaningPolicy.KEEP_LATEST_BY_HOURS);
+  }
   
   private List<CleanFileInfo> getReplacedFilesEligibleToClean(List<String> savepointedFiles, String partitionPath, Option<HoodieInstant> earliestCommitToRetain) {
     final Stream<HoodieFileGroup> replacedGroups;
@@ -392,6 +423,8 @@ public List<CleanFileInfo> getDeletePaths(String partitionPath) {
       deletePaths = getFilesToCleanKeepingLatestCommits(partitionPath);
     } else if (policy == HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS) {
       deletePaths = getFilesToCleanKeepingLatestVersions(partitionPath);
+    } else if (policy == HoodieCleaningPolicy.KEEP_LATEST_BY_HOURS) {
+      deletePaths = getFilesToCleanKeepingLatestHours(partitionPath);
     } else {
       throw new IllegalArgumentException("Unknown cleaning policy : " + policy.name());
     }
@@ -406,9 +439,16 @@ public List<CleanFileInfo> getDeletePaths(String partitionPath) {
   public Option<HoodieInstant> getEarliestCommitToRetain() {
     Option<HoodieInstant> earliestCommitToRetain = Option.empty();
     int commitsRetained = config.getCleanerCommitsRetained();
+    int hoursRetained = config.getCleanerHoursRetained();
     if (config.getCleanerPolicy() == HoodieCleaningPolicy.KEEP_LATEST_COMMITS
         && commitTimeline.countInstants() > commitsRetained) {
-      earliestCommitToRetain = commitTimeline.nthInstant(commitTimeline.countInstants() - commitsRetained);
+      earliestCommitToRetain = commitTimeline.nthInstant(commitTimeline.countInstants() - commitsRetained); //15 instants total, 10 commits to retain, this gives 6th instant in the list
+    } else if (config.getCleanerPolicy() == HoodieCleaningPolicy.KEEP_LATEST_BY_HOURS) {
+      Instant instant = Instant.now();
+      ZonedDateTime currentDateTime = ZonedDateTime.ofInstant(instant, ZoneId.systemDefault());
+      String earliestTimeToRetain = HoodieActiveTimeline.formatDate(Date.from(currentDateTime.minusHours(hoursRetained).toInstant()));
+      earliestCommitToRetain = Option.fromJavaOptional(commitTimeline.getInstants().filter(i -> HoodieTimeline.compareTimestamps(i.getTimestamp(),
+              HoodieTimeline.GREATER_THAN_OR_EQUALS, earliestTimeToRetain)).findFirst());
     }
     return earliestCommitToRetain;
   }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/BaseClusteringPlanActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/ClusteringPlanActionExecutor.java
similarity index 93%
rename from hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/BaseClusteringPlanActionExecutor.java
rename to hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/ClusteringPlanActionExecutor.java
index a1820ed93b7ca..15ead5efb0080 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/BaseClusteringPlanActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/cluster/ClusteringPlanActionExecutor.java
@@ -41,13 +41,13 @@
 import java.util.Collections;
 import java.util.Map;
 
-public abstract class BaseClusteringPlanActionExecutor<T extends HoodieRecordPayload, I, K, O> extends BaseActionExecutor<T, I, K, O, Option<HoodieClusteringPlan>> {
+public class ClusteringPlanActionExecutor<T extends HoodieRecordPayload, I, K, O> extends BaseActionExecutor<T, I, K, O, Option<HoodieClusteringPlan>> {
 
-  private static final Logger LOG = LogManager.getLogger(BaseClusteringPlanActionExecutor.class);
+  private static final Logger LOG = LogManager.getLogger(ClusteringPlanActionExecutor.class);
 
   private final Option<Map<String, String>> extraMetadata;
 
-  public BaseClusteringPlanActionExecutor(HoodieEngineContext context,
+  public ClusteringPlanActionExecutor(HoodieEngineContext context,
                                           HoodieWriteConfig config,
                                           HoodieTable<T, I, K, O> table,
                                           String instantTime,
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/AbstractBulkInsertHelper.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseBulkInsertHelper.java
similarity index 91%
rename from hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/AbstractBulkInsertHelper.java
rename to hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseBulkInsertHelper.java
index 6e1ddeb72e0a6..dffd926aee3d5 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/AbstractBulkInsertHelper.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseBulkInsertHelper.java
@@ -26,7 +26,7 @@
 import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.table.action.HoodieWriteMetadata;
 
-public abstract class AbstractBulkInsertHelper<T extends HoodieRecordPayload, I, K, O, R> {
+public abstract class BaseBulkInsertHelper<T extends HoodieRecordPayload, I, K, O, R> {
 
   /**
    * Mark instant as inflight, write input records, update index and return result.
@@ -34,7 +34,7 @@ public abstract class AbstractBulkInsertHelper<T extends HoodieRecordPayload, I,
   public abstract HoodieWriteMetadata<O> bulkInsert(I inputRecords, String instantTime,
                                                     HoodieTable<T, I, K, O> table, HoodieWriteConfig config,
                                                     BaseCommitActionExecutor<T, I, K, O, R> executor, boolean performDedupe,
-                                                    Option<BulkInsertPartitioner<T>> userDefinedBulkInsertPartitioner);
+                                                    Option<BulkInsertPartitioner<I>> userDefinedBulkInsertPartitioner);
 
   /**
    * Only write input records. Does not change timeline/index. Return information about new files created.
@@ -42,7 +42,7 @@ public abstract HoodieWriteMetadata<O> bulkInsert(I inputRecords, String instant
   public abstract O bulkInsert(I inputRecords, String instantTime,
                                HoodieTable<T, I, K, O> table, HoodieWriteConfig config,
                                boolean performDedupe,
-                               Option<BulkInsertPartitioner<T>> userDefinedBulkInsertPartitioner,
+                               Option<BulkInsertPartitioner<I>> userDefinedBulkInsertPartitioner,
                                boolean addMetadataFields,
                                int parallelism,
                                WriteHandleFactory writeHandleFactory);
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseCommitActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseCommitActionExecutor.java
index 7449f3f8045a3..b8d5948c1f453 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseCommitActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseCommitActionExecutor.java
@@ -18,12 +18,18 @@
 
 package org.apache.hudi.table.action.commit;
 
+import org.apache.hudi.avro.HoodieAvroUtils;
+import org.apache.hudi.avro.model.HoodieClusteringGroup;
+import org.apache.hudi.avro.model.HoodieClusteringPlan;
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.client.transaction.TransactionManager;
 import org.apache.hudi.client.utils.TransactionUtils;
+import org.apache.hudi.common.data.HoodieData;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.engine.TaskContextSupplier;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
+import org.apache.hudi.common.model.HoodieFileGroupId;
+import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.model.HoodieWriteStat;
@@ -32,10 +38,14 @@
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieInstant.State;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.common.util.ClusteringUtils;
+import org.apache.hudi.common.util.CommitUtils;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.ReflectionUtils;
 import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.exception.HoodieClusteringException;
 import org.apache.hudi.exception.HoodieCommitException;
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.table.HoodieTable;
@@ -43,7 +53,9 @@
 import org.apache.hudi.table.WorkloadStat;
 import org.apache.hudi.table.action.BaseActionExecutor;
 import org.apache.hudi.table.action.HoodieWriteMetadata;
+import org.apache.hudi.table.action.cluster.strategy.ClusteringExecutionStrategy;
 
+import org.apache.avro.Schema;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
@@ -54,6 +66,9 @@
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
 
 public abstract class BaseCommitActionExecutor<T extends HoodieRecordPayload, I, K, O, R>
     extends BaseActionExecutor<T, I, K, O, R> {
@@ -73,7 +88,7 @@ public BaseCommitActionExecutor(HoodieEngineContext context, HoodieWriteConfig c
     this.operationType = operationType;
     this.extraMetadata = extraMetadata;
     this.taskContextSupplier = context.getTaskContextSupplier();
-    // TODO : Remove this once we refactor and move out autoCommit method from here, since the TxnManager is held in {@link AbstractHoodieWriteClient}.
+    // TODO : Remove this once we refactor and move out autoCommit method from here, since the TxnManager is held in {@link BaseHoodieWriteClient}.
     this.txnManager = new TransactionManager(config, table.getMetaClient().getFs());
     this.lastCompletedTxn = TransactionUtils.getLastCompletedTxnInstantAndMetadata(table.getMetaClient());
     if (table.getStorageLayout().doesNotSupport(operationType)) {
@@ -94,22 +109,32 @@ void saveWorkloadProfileMetadataToInflight(WorkloadProfile profile, String insta
       throws HoodieCommitException {
     try {
       HoodieCommitMetadata metadata = new HoodieCommitMetadata();
-      profile.getPartitionPaths().forEach(path -> {
-        WorkloadStat partitionStat = profile.getWorkloadStat(path);
+      profile.getOutputPartitionPaths().forEach(path -> {
+        WorkloadStat partitionStat = profile.getOutputWorkloadStat(path);
         HoodieWriteStat insertStat = new HoodieWriteStat();
         insertStat.setNumInserts(partitionStat.getNumInserts());
         insertStat.setFileId("");
         insertStat.setPrevCommit(HoodieWriteStat.NULL_COMMIT);
         metadata.addWriteStat(path, insertStat);
-
-        partitionStat.getUpdateLocationToCount().forEach((key, value) -> {
-          HoodieWriteStat writeStat = new HoodieWriteStat();
-          writeStat.setFileId(key);
-          // TODO : Write baseCommitTime is possible here ?
-          writeStat.setPrevCommit(value.getKey());
-          writeStat.setNumUpdateWrites(value.getValue());
-          metadata.addWriteStat(path, writeStat);
-        });
+        Map<String, Pair<String, Long>> updateLocationMap = partitionStat.getUpdateLocationToCount();
+        Map<String, Pair<String, Long>> insertLocationMap = partitionStat.getInsertLocationToCount();
+        Stream.concat(updateLocationMap.keySet().stream(), insertLocationMap.keySet().stream())
+            .distinct()
+            .forEach(fileId -> {
+              HoodieWriteStat writeStat = new HoodieWriteStat();
+              writeStat.setFileId(fileId);
+              Pair<String, Long> updateLocation = updateLocationMap.get(fileId);
+              Pair<String, Long> insertLocation = insertLocationMap.get(fileId);
+              // TODO : Write baseCommitTime is possible here ?
+              writeStat.setPrevCommit(updateLocation != null ? updateLocation.getKey() : insertLocation.getKey());
+              if (updateLocation != null) {
+                writeStat.setNumUpdateWrites(updateLocation.getValue());
+              }
+              if (insertLocation != null) {
+                writeStat.setNumInserts(insertLocation.getValue());
+              }
+              metadata.addWriteStat(path, writeStat);
+            });
       });
       metadata.setOperationType(operationType);
 
@@ -152,18 +177,22 @@ protected void commitOnAutoCommit(HoodieWriteMetadata result) {
 
   protected void autoCommit(Option<Map<String, String>> extraMetadata, HoodieWriteMetadata<O> result) {
     final Option<HoodieInstant> inflightInstant = Option.of(new HoodieInstant(State.INFLIGHT,
-        HoodieTimeline.COMMIT_ACTION, instantTime));
+        getCommitActionType(), instantTime));
     this.txnManager.beginTransaction(inflightInstant,
         lastCompletedTxn.isPresent() ? Option.of(lastCompletedTxn.get().getLeft()) : Option.empty());
     try {
+      setCommitMetadata(result);
+      // reload active timeline so as to get all updates after current transaction have started. hence setting last arg to true.
       TransactionUtils.resolveWriteConflictIfAny(table, this.txnManager.getCurrentTransactionOwner(),
-          result.getCommitMetadata(), config, this.txnManager.getLastCompletedTransactionOwner());
+          result.getCommitMetadata(), config, this.txnManager.getLastCompletedTransactionOwner(), true);
       commit(extraMetadata, result);
     } finally {
       this.txnManager.endTransaction(inflightInstant);
     }
   }
 
+  protected abstract void setCommitMetadata(HoodieWriteMetadata<O> result);
+
   protected abstract void commit(Option<Map<String, String>> extraMetadata, HoodieWriteMetadata<O> result);
 
   /**
@@ -197,4 +226,65 @@ protected abstract Iterator<List<WriteStatus>> handleInsert(String idPfx,
 
   protected abstract Iterator<List<WriteStatus>> handleUpdate(String partitionPath, String fileId,
       Iterator<HoodieRecord<T>> recordItr) throws IOException;
+
+  protected HoodieWriteMetadata<HoodieData<WriteStatus>> executeClustering(HoodieClusteringPlan clusteringPlan) {
+    HoodieInstant instant = HoodieTimeline.getReplaceCommitRequestedInstant(instantTime);
+    // Mark instant as clustering inflight
+    table.getActiveTimeline().transitionReplaceRequestedToInflight(instant, Option.empty());
+    table.getMetaClient().reloadActiveTimeline();
+
+    final Schema schema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(config.getSchema()));
+    HoodieWriteMetadata<HoodieData<WriteStatus>> writeMetadata = (
+        (ClusteringExecutionStrategy<T, HoodieData<HoodieRecord<T>>, HoodieData<HoodieKey>, HoodieData<WriteStatus>>)
+            ReflectionUtils.loadClass(config.getClusteringExecutionStrategyClass(),
+                new Class<?>[] {HoodieTable.class, HoodieEngineContext.class, HoodieWriteConfig.class}, table, context, config))
+        .performClustering(clusteringPlan, schema, instantTime);
+    HoodieData<WriteStatus> writeStatusList = writeMetadata.getWriteStatuses();
+    HoodieData<WriteStatus> statuses = updateIndex(writeStatusList, writeMetadata);
+    writeMetadata.setWriteStats(statuses.map(WriteStatus::getStat).collectAsList());
+    writeMetadata.setPartitionToReplaceFileIds(getPartitionToReplacedFileIds(clusteringPlan, writeMetadata));
+    validateWriteResult(clusteringPlan, writeMetadata);
+    commitOnAutoCommit(writeMetadata);
+    if (!writeMetadata.getCommitMetadata().isPresent()) {
+      HoodieCommitMetadata commitMetadata = CommitUtils.buildMetadata(writeMetadata.getWriteStats().get(), writeMetadata.getPartitionToReplaceFileIds(),
+          extraMetadata, operationType, getSchemaToStoreInCommit(), getCommitActionType());
+      writeMetadata.setCommitMetadata(Option.of(commitMetadata));
+    }
+    return writeMetadata;
+  }
+
+  private HoodieData<WriteStatus> updateIndex(HoodieData<WriteStatus> writeStatuses, HoodieWriteMetadata<HoodieData<WriteStatus>> result) {
+    Instant indexStartTime = Instant.now();
+    // Update the index back
+    HoodieData<WriteStatus> statuses = table.getIndex().updateLocation(writeStatuses, context, table);
+    result.setIndexUpdateDuration(Duration.between(indexStartTime, Instant.now()));
+    result.setWriteStatuses(statuses);
+    return statuses;
+  }
+
+  private Map<String, List<String>> getPartitionToReplacedFileIds(HoodieClusteringPlan clusteringPlan, HoodieWriteMetadata<HoodieData<WriteStatus>> writeMetadata) {
+    Set<HoodieFileGroupId> newFilesWritten = writeMetadata.getWriteStats().get().stream()
+        .map(s -> new HoodieFileGroupId(s.getPartitionPath(), s.getFileId())).collect(Collectors.toSet());
+
+    return ClusteringUtils.getFileGroupsFromClusteringPlan(clusteringPlan)
+        .filter(fg -> "org.apache.hudi.client.clustering.run.strategy.SparkSingleFileSortExecutionStrategy"
+            .equals(config.getClusteringExecutionStrategyClass())
+            || !newFilesWritten.contains(fg))
+        .collect(Collectors.groupingBy(HoodieFileGroupId::getPartitionPath, Collectors.mapping(HoodieFileGroupId::getFileId, Collectors.toList())));
+  }
+
+  /**
+   * Validate actions taken by clustering. In the first implementation, we validate at least one new file is written.
+   * But we can extend this to add more validation. E.g. number of records read = number of records written etc.
+   * We can also make these validations in BaseCommitActionExecutor to reuse pre-commit hooks for multiple actions.
+   */
+  private void validateWriteResult(HoodieClusteringPlan clusteringPlan, HoodieWriteMetadata<HoodieData<WriteStatus>> writeMetadata) {
+    if (writeMetadata.getWriteStatuses().isEmpty()) {
+      throw new HoodieClusteringException("Clustering plan produced 0 WriteStatus for " + instantTime
+          + " #groups: " + clusteringPlan.getInputGroups().size() + " expected at least "
+          + clusteringPlan.getInputGroups().stream().mapToInt(HoodieClusteringGroup::getNumOutputFileGroups).sum()
+          + " write statuses");
+    }
+  }
+
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/AbstractDeleteHelper.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseDeleteHelper.java
similarity index 95%
rename from hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/AbstractDeleteHelper.java
rename to hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseDeleteHelper.java
index ac0f2596f490e..b119587f47535 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/AbstractDeleteHelper.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseDeleteHelper.java
@@ -29,7 +29,7 @@
  *
  * @param <T>
  */
-public abstract class AbstractDeleteHelper<T extends HoodieRecordPayload, I, K, O, R> {
+public abstract class BaseDeleteHelper<T extends HoodieRecordPayload, I, K, O, R> {
 
   /**
    * Deduplicate Hoodie records, using the given deduplication function.
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/AbstractMergeHelper.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseMergeHelper.java
similarity index 98%
rename from hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/AbstractMergeHelper.java
rename to hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseMergeHelper.java
index 59a3323bcb73e..5ead348140aa3 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/AbstractMergeHelper.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseMergeHelper.java
@@ -47,7 +47,7 @@
 /**
  * Helper to read records from previous version of base file and run Merge.
  */
-public abstract class AbstractMergeHelper<T extends HoodieRecordPayload, I, K, O> {
+public abstract class BaseMergeHelper<T extends HoodieRecordPayload, I, K, O> {
 
   /**
    * Read records from previous version of base file and merge.
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/AbstractWriteHelper.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseWriteHelper.java
similarity index 95%
rename from hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/AbstractWriteHelper.java
rename to hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseWriteHelper.java
index 3f241944c3af7..6d5372b47297d 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/AbstractWriteHelper.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/commit/BaseWriteHelper.java
@@ -30,7 +30,7 @@
 import java.time.Duration;
 import java.time.Instant;
 
-public abstract class AbstractWriteHelper<T extends HoodieRecordPayload, I, K, O, R> {
+public abstract class BaseWriteHelper<T extends HoodieRecordPayload, I, K, O, R> {
 
   public HoodieWriteMetadata<O> write(String instantTime,
                                       I inputRecords,
@@ -86,5 +86,5 @@ public I deduplicateRecords(
   }
 
   public abstract I deduplicateRecords(
-      I records, HoodieIndex<T, ?, ?, ?> index, int parallelism);
+      I records, HoodieIndex<?, ?> index, int parallelism);
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/HoodieCompactor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/HoodieCompactor.java
index 73e1413d9dde0..e238d40683b64 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/HoodieCompactor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/HoodieCompactor.java
@@ -111,13 +111,13 @@ public HoodieData<WriteStatus> compact(
     table.getMetaClient().reloadActiveTimeline();
 
     HoodieTableMetaClient metaClient = table.getMetaClient();
-    TableSchemaResolver schemaUtil = new TableSchemaResolver(metaClient);
+    TableSchemaResolver schemaResolver = new TableSchemaResolver(metaClient);
 
     // Here we firstly use the table schema as the reader schema to read
     // log file.That is because in the case of MergeInto, the config.getSchema may not
     // the same with the table schema.
     try {
-      Schema readerSchema = schemaUtil.getTableAvroSchema(false);
+      Schema readerSchema = schemaResolver.getTableAvroSchema(false);
       config.setSchema(readerSchema.toString());
     } catch (Exception e) {
       // If there is no commit in the table, just ignore the exception.
@@ -182,14 +182,30 @@ public List<WriteStatus> compact(HoodieCompactionHandler compactionHandler,
         .withOperationField(config.allowOperationMetadataField())
         .withPartition(operation.getPartitionPath())
         .build();
-    if (!scanner.iterator().hasNext()) {
-      scanner.close();
-      return new ArrayList<>();
-    }
 
     Option<HoodieBaseFile> oldDataFileOpt =
         operation.getBaseFile(metaClient.getBasePath(), operation.getPartitionPath());
 
+    // Considering following scenario: if all log blocks in this fileSlice is rollback, it returns an empty scanner.
+    // But in this case, we need to give it a base file. Otherwise, it will lose base file in following fileSlice.
+    if (!scanner.iterator().hasNext()) {
+      if (!oldDataFileOpt.isPresent()) {
+        scanner.close();
+        return new ArrayList<>();
+      } else {
+        // TODO: we may directly rename original parquet file if there is not evolution/devolution of schema
+        /*
+        TaskContextSupplier taskContextSupplier = hoodieCopyOnWriteTable.getTaskContextSupplier();
+        String newFileName = FSUtils.makeDataFileName(instantTime,
+            FSUtils.makeWriteToken(taskContextSupplier.getPartitionIdSupplier().get(), taskContextSupplier.getStageIdSupplier().get(), taskContextSupplier.getAttemptIdSupplier().get()),
+            operation.getFileId(), hoodieCopyOnWriteTable.getBaseFileExtension());
+        Path oldFilePath = new Path(oldDataFileOpt.get().getPath());
+        Path newFilePath = new Path(oldFilePath.getParent(), newFileName);
+        FileUtil.copy(fs,oldFilePath, fs, newFilePath, false, fs.getConf());
+        */
+      }
+    }
+
     // Compacting is very similar to applying updates to existing file
     Iterator<List<WriteStatus>> result;
     // If the dataFile is present, perform updates else perform inserts into a new base file.
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/restore/BaseRestoreActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/restore/BaseRestoreActionExecutor.java
index 58247bb8ea0be..9025623e86916 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/restore/BaseRestoreActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/restore/BaseRestoreActionExecutor.java
@@ -18,7 +18,9 @@
 
 package org.apache.hudi.table.action.restore;
 
+import org.apache.hudi.avro.model.HoodieInstantInfo;
 import org.apache.hudi.avro.model.HoodieRestoreMetadata;
+import org.apache.hudi.avro.model.HoodieRestorePlan;
 import org.apache.hudi.avro.model.HoodieRollbackMetadata;
 import org.apache.hudi.client.transaction.TransactionManager;
 import org.apache.hudi.common.engine.HoodieEngineContext;
@@ -29,14 +31,18 @@
 import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
 import org.apache.hudi.common.util.HoodieTimer;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.exception.HoodieRestoreException;
 import org.apache.hudi.exception.HoodieRollbackException;
 import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.table.action.BaseActionExecutor;
+
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
@@ -65,27 +71,51 @@ public HoodieRestoreMetadata execute() {
     HoodieTimer restoreTimer = new HoodieTimer();
     restoreTimer.startTimer();
 
-    // Get all the commits on the timeline after the provided commit time
-    List<HoodieInstant> instantsToRollback = table.getActiveTimeline().getWriteTimeline()
-        .getReverseOrderedInstants()
-        .filter(instant -> HoodieActiveTimeline.GREATER_THAN.test(instant.getTimestamp(), restoreInstantTime))
-        .collect(Collectors.toList());
+    Option<HoodieInstant> restoreInstant = table.getRestoreTimeline()
+        .filterInflightsAndRequested()
+        .filter(instant -> instant.getTimestamp().equals(instantTime))
+        .firstInstant();
+    if (!restoreInstant.isPresent()) {
+      throw new HoodieRollbackException("No pending restore instants found to execute restore");
+    }
+    try {
+      List<HoodieInstant> instantsToRollback = getInstantsToRollback(restoreInstant.get());
+      ValidationUtils.checkArgument(restoreInstant.get().getState().equals(HoodieInstant.State.REQUESTED)
+          || restoreInstant.get().getState().equals(HoodieInstant.State.INFLIGHT));
+      Map<String, List<HoodieRollbackMetadata>> instantToMetadata = new HashMap<>();
+      if (restoreInstant.get().isRequested()) {
+        table.getActiveTimeline().transitionRestoreRequestedToInflight(restoreInstant.get());
+      }
 
-    Map<String, List<HoodieRollbackMetadata>> instantToMetadata = new HashMap<>();
-    table.getActiveTimeline().createNewInstant(new HoodieInstant(true, HoodieTimeline.RESTORE_ACTION, instantTime));
-    instantsToRollback.forEach(instant -> {
-      instantToMetadata.put(instant.getTimestamp(), Collections.singletonList(rollbackInstant(instant)));
-      LOG.info("Deleted instant " + instant);
-    });
+      instantsToRollback.forEach(instant -> {
+        instantToMetadata.put(instant.getTimestamp(), Collections.singletonList(rollbackInstant(instant)));
+        LOG.info("Deleted instant " + instant);
+      });
 
-    try {
       return finishRestore(instantToMetadata,
           instantsToRollback,
           restoreTimer.endTimer()
       );
     } catch (IOException io) {
-      throw new HoodieRollbackException("unable to rollback instants " + instantsToRollback, io);
+      throw new HoodieRestoreException("unable to Restore instant " + restoreInstant.get(), io);
+    }
+  }
+
+  private List<HoodieInstant> getInstantsToRollback(HoodieInstant restoreInstant) throws IOException {
+    List<HoodieInstant> instantsToRollback = new ArrayList<>();
+    HoodieRestorePlan restorePlan = RestoreUtils.getRestorePlan(table.getMetaClient(), restoreInstant);
+    for (HoodieInstantInfo instantInfo : restorePlan.getInstantsToRollback()) {
+      // If restore crashed mid-way, there are chances that some commits are already rolled back,
+      // but some are not. so, we can ignore those commits which are fully rolledback in previous attempt if any.
+      Option<HoodieInstant> rollbackInstantOpt = table.getActiveTimeline().getWriteTimeline()
+          .filter(instant -> instant.getTimestamp().equals(instantInfo.getCommitTime()) && instant.getAction().equals(instantInfo.getAction())).firstInstant();
+      if (rollbackInstantOpt.isPresent()) {
+        instantsToRollback.add(rollbackInstantOpt.get());
+      } else {
+        LOG.warn("Ignoring already rolledback instant " + instantInfo.toString());
+      }
     }
+    return instantsToRollback;
   }
 
   protected abstract HoodieRollbackMetadata rollbackInstant(HoodieInstant rollbackInstant);
@@ -99,7 +129,7 @@ private HoodieRestoreMetadata finishRestore(Map<String, List<HoodieRollbackMetad
     writeToMetadata(restoreMetadata);
     table.getActiveTimeline().saveAsComplete(new HoodieInstant(true, HoodieTimeline.RESTORE_ACTION, instantTime),
         TimelineMetadataUtils.serializeRestoreMetadata(restoreMetadata));
-    // get all rollbacks instants after restore instant time and delete them.
+    // get all pending rollbacks instants after restore instant time and delete them.
     // if not, rollbacks will be considered not completed and might hinder metadata table compaction.
     List<HoodieInstant> instantsToRollback = table.getActiveTimeline().getRollbackTimeline()
         .getReverseOrderedInstants()
@@ -115,6 +145,7 @@ private HoodieRestoreMetadata finishRestore(Map<String, List<HoodieRollbackMetad
 
   /**
    * Update metadata table if available. Any update to metadata table happens within data table lock.
+   *
    * @param restoreMetadata instance of {@link HoodieRestoreMetadata} to be applied to metadata.
    */
   private void writeToMetadata(HoodieRestoreMetadata restoreMetadata) {
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/restore/RestoreUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/restore/RestoreUtils.java
new file mode 100644
index 0000000000000..24c57a0709b1a
--- /dev/null
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/restore/RestoreUtils.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.table.action.restore;
+
+import org.apache.hudi.avro.model.HoodieRestorePlan;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
+
+import java.io.IOException;
+
+public class RestoreUtils {
+
+  /**
+   * Get Latest version of Restore plan corresponding to a restore instant.
+   *
+   * @param metaClient      Hoodie Table Meta Client
+   * @param restoreInstant Instant referring to restore action
+   * @return Rollback plan corresponding to rollback instant
+   * @throws IOException
+   */
+  public static HoodieRestorePlan getRestorePlan(HoodieTableMetaClient metaClient, HoodieInstant restoreInstant)
+      throws IOException {
+    final HoodieInstant requested = HoodieTimeline.getRollbackRequestedInstant(restoreInstant);
+    return TimelineMetadataUtils.deserializeAvroMetadata(
+        metaClient.getActiveTimeline().readRestoreInfoAsBytes(requested).get(), HoodieRestorePlan.class);
+  }
+}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/BaseRollbackHelper.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/BaseRollbackHelper.java
index 078d9ac27d389..189de373d92d7 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/BaseRollbackHelper.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/BaseRollbackHelper.java
@@ -18,6 +18,9 @@
 
 package org.apache.hudi.table.action.rollback;
 
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
 import org.apache.hudi.avro.model.HoodieRollbackRequest;
 import org.apache.hudi.common.HoodieRollbackStat;
 import org.apache.hudi.common.engine.HoodieEngineContext;
@@ -33,11 +36,6 @@
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.exception.HoodieRollbackException;
-
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
@@ -122,19 +120,11 @@ List<Pair<String, HoodieRollbackStat>> maybeDeleteAndCollectStats(HoodieEngineCo
         rollbackStats.forEach(entry -> partitionToRollbackStats.add(Pair.of(entry.getPartitionPath(), entry)));
         return partitionToRollbackStats.stream();
       } else if (!rollbackRequest.getLogBlocksToBeDeleted().isEmpty()) {
-        Map<String, Long> logFilesToBeDeleted = rollbackRequest.getLogBlocksToBeDeleted();
-        String fileId = rollbackRequest.getFileId();
-        String latestBaseInstant = rollbackRequest.getLatestBaseInstant();
-        FileSystem fs = metaClient.getFs();
-        // collect all log files that is supposed to be deleted with this rollback
-        // what happens if file was deleted when invoking fs.getFileStatus(?) below.
-        // I understand we don't delete log files. but just curious if we need to handle this case.
-        Map<FileStatus, Long> writtenLogFileSizeMap = new HashMap<>();
-        for (Map.Entry<String, Long> entry : logFilesToBeDeleted.entrySet()) {
-          writtenLogFileSizeMap.put(fs.getFileStatus(new Path(entry.getKey())), entry.getValue());
-        }
         HoodieLogFormat.Writer writer = null;
         try {
+          String fileId = rollbackRequest.getFileId();
+          String latestBaseInstant = rollbackRequest.getLatestBaseInstant();
+
           writer = HoodieLogFormat.newWriterBuilder()
               .onParentPath(FSUtils.getPartitionPath(metaClient.getBasePath(), rollbackRequest.getPartitionPath()))
               .withFileId(fileId)
@@ -156,7 +146,7 @@ List<Pair<String, HoodieRollbackStat>> maybeDeleteAndCollectStats(HoodieEngineCo
               writer.close();
             }
           } catch (IOException io) {
-            throw new HoodieIOException("Error appending rollback block..", io);
+            throw new HoodieIOException("Error appending rollback block", io);
           }
         }
 
@@ -167,15 +157,21 @@ List<Pair<String, HoodieRollbackStat>> maybeDeleteAndCollectStats(HoodieEngineCo
             metaClient.getFs().getFileStatus(Objects.requireNonNull(writer).getLogFile().getPath()),
             1L
         );
-        return Collections.singletonList(Pair.of(rollbackRequest.getPartitionPath(),
-            HoodieRollbackStat.newBuilder().withPartitionPath(rollbackRequest.getPartitionPath())
-                .withRollbackBlockAppendResults(filesToNumBlocksRollback)
-                .withWrittenLogFileSizeMap(writtenLogFileSizeMap).build())).stream();
+
+        return Collections.singletonList(
+            Pair.of(rollbackRequest.getPartitionPath(),
+                HoodieRollbackStat.newBuilder()
+                    .withPartitionPath(rollbackRequest.getPartitionPath())
+                    .withRollbackBlockAppendResults(filesToNumBlocksRollback)
+                    .build()))
+            .stream();
       } else {
-        return Collections
-            .singletonList(Pair.of(rollbackRequest.getPartitionPath(),
-                HoodieRollbackStat.newBuilder().withPartitionPath(rollbackRequest.getPartitionPath())
-                    .build())).stream();
+        return Collections.singletonList(
+            Pair.of(rollbackRequest.getPartitionPath(),
+                HoodieRollbackStat.newBuilder()
+                    .withPartitionPath(rollbackRequest.getPartitionPath())
+                    .build()))
+            .stream();
       }
     }, numPartitions);
   }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/ListingBasedRollbackHelper.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/ListingBasedRollbackHelper.java
index b47136fa02a58..628b2fc3720f8 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/ListingBasedRollbackHelper.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/ListingBasedRollbackHelper.java
@@ -19,18 +19,17 @@
 
 package org.apache.hudi.table.action.rollback;
 
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
 import org.apache.hudi.avro.model.HoodieRollbackRequest;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.fs.FSUtils;
-import org.apache.hudi.common.model.HoodieFileFormat;
-import org.apache.hudi.common.model.HoodieLogFile;
+import org.apache.hudi.common.model.HoodieWriteStat;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.config.HoodieWriteConfig;
-
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.PathFilter;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
@@ -38,7 +37,6 @@
 import java.io.Serializable;
 import java.util.Arrays;
 import java.util.Collections;
-import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
@@ -104,22 +102,20 @@ private List<HoodieRollbackRequest> getListingBasedRollbackRequests(HoodieEngine
         case APPEND_ROLLBACK_BLOCK: {
           String fileId = rollbackRequest.getFileId().get();
           String latestBaseInstant = rollbackRequest.getLatestBaseInstant().get();
-          // collect all log files that is supposed to be deleted with this rollback
-          Map<FileStatus, Long> writtenLogFileSizeMap = FSUtils.getAllLogFiles(metaClient.getFs(),
-              FSUtils.getPartitionPath(config.getBasePath(), rollbackRequest.getPartitionPath()),
-              fileId, HoodieFileFormat.HOODIE_LOG.getFileExtension(), latestBaseInstant)
-              .collect(Collectors.toMap(HoodieLogFile::getFileStatus, value -> value.getFileStatus().getLen()));
-          Map<String, Long> logFilesToBeDeleted = new HashMap<>();
-          for (Map.Entry<FileStatus, Long> fileToBeDeleted : writtenLogFileSizeMap.entrySet()) {
-            logFilesToBeDeleted.put(fileToBeDeleted.getKey().getPath().toString(), fileToBeDeleted.getValue());
-          }
+          HoodieWriteStat writeStat = rollbackRequest.getWriteStat().get();
+
+          Path fullLogFilePath = FSUtils.getPartitionPath(config.getBasePath(), writeStat.getPath());
+
+          Map<String, Long> logFilesWithBlocksToRollback =
+              Collections.singletonMap(fullLogFilePath.toString(), writeStat.getTotalWriteBytes());
+
           return new HoodieRollbackRequest(rollbackRequest.getPartitionPath(), fileId, latestBaseInstant,
-              Collections.EMPTY_LIST, logFilesToBeDeleted);
+              Collections.EMPTY_LIST, logFilesWithBlocksToRollback);
         }
         default:
           throw new IllegalStateException("Unknown Rollback action " + rollbackRequest);
       }
-    }, numPartitions).stream().collect(Collectors.toList());
+    }, numPartitions);
   }
 
   private FileStatus[] getBaseFilesToBeDeleted(HoodieTableMetaClient metaClient, HoodieWriteConfig config,
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/ListingBasedRollbackRequest.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/ListingBasedRollbackRequest.java
index fc369a46711cf..7411231bb7d79 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/ListingBasedRollbackRequest.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/ListingBasedRollbackRequest.java
@@ -18,12 +18,15 @@
 
 package org.apache.hudi.table.action.rollback;
 
+import org.apache.hudi.common.model.HoodieWriteStat;
 import org.apache.hudi.common.util.Option;
 
+import java.io.Serializable;
+
 /**
  * Request for performing one rollback action.
  */
-public class ListingBasedRollbackRequest {
+public class ListingBasedRollbackRequest implements Serializable {
 
   /**
    * Rollback commands, that trigger a specific handling for rollback.
@@ -49,32 +52,42 @@ public enum Type {
    */
   private final Option<String> latestBaseInstant;
 
+  /**
+   * TODO
+   */
+  private final Option<HoodieWriteStat> writeStat;
+
   private final Type type;
 
+  public ListingBasedRollbackRequest(String partitionPath, Type type) {
+    this(partitionPath, Option.empty(), Option.empty(), Option.empty(), type);
+  }
+
   public ListingBasedRollbackRequest(String partitionPath,
                                      Option<String> fileId,
                                      Option<String> latestBaseInstant,
+                                     Option<HoodieWriteStat> writeStat,
                                      Type type) {
     this.partitionPath = partitionPath;
     this.fileId = fileId;
     this.latestBaseInstant = latestBaseInstant;
+    this.writeStat = writeStat;
     this.type = type;
   }
 
   public static ListingBasedRollbackRequest createRollbackRequestWithDeleteDataFilesOnlyAction(String partitionPath) {
-    return new ListingBasedRollbackRequest(partitionPath, Option.empty(), Option.empty(),
-        Type.DELETE_DATA_FILES_ONLY);
+    return new ListingBasedRollbackRequest(partitionPath, Type.DELETE_DATA_FILES_ONLY);
   }
 
   public static ListingBasedRollbackRequest createRollbackRequestWithDeleteDataAndLogFilesAction(String partitionPath) {
-    return new ListingBasedRollbackRequest(partitionPath, Option.empty(), Option.empty(),
-        Type.DELETE_DATA_AND_LOG_FILES);
+    return new ListingBasedRollbackRequest(partitionPath, Type.DELETE_DATA_AND_LOG_FILES);
   }
 
-  public static ListingBasedRollbackRequest createRollbackRequestWithAppendRollbackBlockAction(String partitionPath, String fileId,
-                                                                                               String baseInstant) {
-    return new ListingBasedRollbackRequest(partitionPath, Option.of(fileId), Option.of(baseInstant),
-        Type.APPEND_ROLLBACK_BLOCK);
+  public static ListingBasedRollbackRequest createRollbackRequestWithAppendRollbackBlockAction(String partitionPath,
+                                                                                               String fileId,
+                                                                                               String baseInstant,
+                                                                                               HoodieWriteStat writeStat) {
+    return new ListingBasedRollbackRequest(partitionPath, Option.of(fileId), Option.of(baseInstant), Option.of(writeStat), Type.APPEND_ROLLBACK_BLOCK);
   }
 
   public String getPartitionPath() {
@@ -89,6 +102,10 @@ public Option<String> getLatestBaseInstant() {
     return latestBaseInstant;
   }
 
+  public Option<HoodieWriteStat> getWriteStat() {
+    return writeStat;
+  }
+
   public Type getType() {
     return type;
   }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/MarkerBasedRollbackStrategy.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/MarkerBasedRollbackStrategy.java
index 9d04e3036f204..e7a4170ec7871 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/MarkerBasedRollbackStrategy.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/MarkerBasedRollbackStrategy.java
@@ -18,6 +18,7 @@
 
 package org.apache.hudi.table.action.rollback;
 
+import org.apache.hadoop.fs.Path;
 import org.apache.hudi.avro.model.HoodieRollbackRequest;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.fs.FSUtils;
@@ -31,18 +32,13 @@
 import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.table.marker.MarkerBasedRollbackUtils;
 import org.apache.hudi.table.marker.WriteMarkers;
-
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
 import java.io.IOException;
 import java.util.Collections;
-import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.stream.Collectors;
 
 import static org.apache.hudi.table.action.rollback.BaseRollbackHelper.EMPTY_STRING;
 
@@ -90,42 +86,41 @@ public List<HoodieRollbackRequest> getRollbackRequests(HoodieInstant instantToRo
                 Collections.singletonList(fullDeletePath.toString()),
                 Collections.emptyMap());
           case APPEND:
+            // NOTE: This marker file-path does NOT correspond to a log-file, but rather is a phony
+            //       path serving as a "container" for the following components:
+            //          - Base file's file-id
+            //          - Base file's commit instant
+            //          - Partition path
             return getRollbackRequestForAppend(WriteMarkers.stripMarkerSuffix(markerFilePath));
           default:
             throw new HoodieRollbackException("Unknown marker type, during rollback of " + instantToRollback);
         }
-      }, parallelism).stream().collect(Collectors.toList());
+      }, parallelism);
     } catch (Exception e) {
       throw new HoodieRollbackException("Error rolling back using marker files written for " + instantToRollback, e);
     }
   }
 
-  protected HoodieRollbackRequest getRollbackRequestForAppend(String appendBaseFilePath) throws IOException {
-    Path baseFilePathForAppend = new Path(basePath, appendBaseFilePath);
+  protected HoodieRollbackRequest getRollbackRequestForAppend(String markerFilePath) throws IOException {
+    Path baseFilePathForAppend = new Path(basePath, markerFilePath);
     String fileId = FSUtils.getFileIdFromFilePath(baseFilePathForAppend);
     String baseCommitTime = FSUtils.getCommitTime(baseFilePathForAppend.getName());
-    String partitionPath = FSUtils.getRelativePartitionPath(new Path(basePath), new Path(basePath, appendBaseFilePath).getParent());
-    Map<FileStatus, Long> writtenLogFileSizeMap = getWrittenLogFileSizeMap(partitionPath, baseCommitTime, fileId);
-    Map<String, Long> writtenLogFileStrSizeMap = new HashMap<>();
-    for (Map.Entry<FileStatus, Long> entry : writtenLogFileSizeMap.entrySet()) {
-      writtenLogFileStrSizeMap.put(entry.getKey().getPath().toString(), entry.getValue());
-    }
-    return new HoodieRollbackRequest(partitionPath, fileId, baseCommitTime, Collections.emptyList(), writtenLogFileStrSizeMap);
+    String relativePartitionPath = FSUtils.getRelativePartitionPath(new Path(basePath), baseFilePathForAppend.getParent());
+    Path partitionPath = FSUtils.getPartitionPath(config.getBasePath(), relativePartitionPath);
+
+    // NOTE: Since we're rolling back incomplete Delta Commit, it only could have appended its
+    //       block to the latest log-file
+    // TODO(HUDI-1517) use provided marker-file's path instead
+    HoodieLogFile latestLogFile = FSUtils.getLatestLogFile(table.getMetaClient().getFs(), partitionPath, fileId,
+        HoodieFileFormat.HOODIE_LOG.getFileExtension(), baseCommitTime).get();
+
+    // NOTE: Marker's don't carry information about the cumulative size of the blocks that have been appended,
+    //       therefore we simply stub this value.
+    Map<String, Long> logFilesWithBlocsToRollback =
+        Collections.singletonMap(latestLogFile.getFileStatus().getPath().toString(), -1L);
+
+    return new HoodieRollbackRequest(relativePartitionPath, fileId, baseCommitTime, Collections.emptyList(),
+        logFilesWithBlocsToRollback);
   }
 
-  /**
-   * Returns written log file size map for the respective baseCommitTime to assist in metadata table syncing.
-   *
-   * @param partitionPathStr partition path of interest
-   * @param baseCommitTime   base commit time of interest
-   * @param fileId           fileId of interest
-   * @return Map<FileStatus, File size>
-   * @throws IOException
-   */
-  private Map<FileStatus, Long> getWrittenLogFileSizeMap(String partitionPathStr, String baseCommitTime, String fileId) throws IOException {
-    // collect all log files that is supposed to be deleted with this rollback
-    return FSUtils.getAllLogFiles(table.getMetaClient().getFs(),
-        FSUtils.getPartitionPath(config.getBasePath(), partitionPathStr), fileId, HoodieFileFormat.HOODIE_LOG.getFileExtension(), baseCommitTime)
-        .collect(Collectors.toMap(HoodieLogFile::getFileStatus, value -> value.getFileStatus().getLen()));
-  }
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/RestorePlanActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/RestorePlanActionExecutor.java
new file mode 100644
index 0000000000000..e33dffcb7b953
--- /dev/null
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/RestorePlanActionExecutor.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.table.action.rollback;
+
+import org.apache.hudi.avro.model.HoodieInstantInfo;
+import org.apache.hudi.avro.model.HoodieRestorePlan;
+import org.apache.hudi.common.engine.HoodieEngineContext;
+import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
+import org.apache.hudi.common.util.ClusteringUtils;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hudi.table.HoodieTable;
+import org.apache.hudi.table.action.BaseActionExecutor;
+
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+/**
+ * Plans the restore action and add a restore.requested meta file to timeline.
+ */
+public class RestorePlanActionExecutor<T extends HoodieRecordPayload, I, K, O> extends BaseActionExecutor<T, I, K, O, Option<HoodieRestorePlan>> {
+
+
+  private static final Logger LOG = LogManager.getLogger(RestorePlanActionExecutor.class);
+
+  public static final Integer RESTORE_PLAN_VERSION_1 = 1;
+  public static final Integer LATEST_RESTORE_PLAN_VERSION = RESTORE_PLAN_VERSION_1;
+  private final String restoreInstantTime;
+
+  public RestorePlanActionExecutor(HoodieEngineContext context,
+                                   HoodieWriteConfig config,
+                                   HoodieTable<T, I, K, O> table,
+                                   String instantTime,
+                                   String restoreInstantTime) {
+    super(context, config, table, instantTime);
+    this.restoreInstantTime = restoreInstantTime;
+  }
+
+  @Override
+  public Option<HoodieRestorePlan> execute() {
+    final HoodieInstant restoreInstant = new HoodieInstant(HoodieInstant.State.REQUESTED, HoodieTimeline.RESTORE_ACTION, instantTime);
+    try {
+      // Get all the commits on the timeline after the provided commit time
+      // rollback pending clustering instants first before other instants (See HUDI-3362)
+      List<HoodieInstant> pendingClusteringInstantsToRollback = table.getActiveTimeline().filterPendingReplaceTimeline()
+              // filter only clustering related replacecommits (Not insert_overwrite related commits)
+              .filter(instant -> ClusteringUtils.isPendingClusteringInstant(table.getMetaClient(), instant))
+              .getReverseOrderedInstants()
+              .filter(instant -> HoodieActiveTimeline.GREATER_THAN.test(instant.getTimestamp(), restoreInstantTime))
+              .collect(Collectors.toList());
+
+      // Get all the commits on the timeline after the provided commit time
+      List<HoodieInstant> commitInstantsToRollback = table.getActiveTimeline().getWriteTimeline()
+              .getReverseOrderedInstants()
+              .filter(instant -> HoodieActiveTimeline.GREATER_THAN.test(instant.getTimestamp(), restoreInstantTime))
+              .filter(instant -> !pendingClusteringInstantsToRollback.contains(instant))
+              .collect(Collectors.toList());
+
+      // Combine both lists - first rollback pending clustering and then rollback all other commits
+      List<HoodieInstantInfo> instantsToRollback = Stream.concat(pendingClusteringInstantsToRollback.stream(), commitInstantsToRollback.stream())
+              .map(entry -> new HoodieInstantInfo(entry.getTimestamp(), entry.getAction()))
+              .collect(Collectors.toList());
+
+      HoodieRestorePlan restorePlan = new HoodieRestorePlan(instantsToRollback, LATEST_RESTORE_PLAN_VERSION);
+      table.getActiveTimeline().saveToRestoreRequested(restoreInstant, TimelineMetadataUtils.serializeRestorePlan(restorePlan));
+      table.getMetaClient().reloadActiveTimeline();
+      LOG.info("Requesting Restore with instant time " + restoreInstant);
+      return Option.of(restorePlan);
+    } catch (IOException e) {
+      LOG.error("Got exception when saving restore requested file", e);
+      throw new HoodieIOException(e.getMessage(), e);
+    }
+  }
+}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/RollbackUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/RollbackUtils.java
index a4b59a88b92c4..2bc9b59b0d1f1 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/RollbackUtils.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/RollbackUtils.java
@@ -18,6 +18,7 @@
 
 package org.apache.hudi.table.action.rollback;
 
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hudi.avro.model.HoodieRollbackPlan;
 import org.apache.hudi.common.HoodieRollbackStat;
 import org.apache.hudi.common.engine.HoodieEngineContext;
@@ -33,12 +34,8 @@
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
 import org.apache.hudi.common.util.Option;
-import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.config.HoodieWriteConfig;
-import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.table.HoodieTable;
-
-import org.apache.hadoop.fs.FileStatus;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
@@ -48,8 +45,11 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
+import java.util.function.Function;
 import java.util.stream.Collectors;
 
+import static org.apache.hudi.common.util.ValidationUtils.checkArgument;
+
 public class RollbackUtils {
 
   private static final Logger LOG = LogManager.getLogger(RollbackUtils.class);
@@ -88,7 +88,7 @@ static Map<HoodieLogBlock.HeaderMetadataType, String> generateHeader(String inst
    * @return Merged HoodieRollbackStat
    */
   static HoodieRollbackStat mergeRollbackStat(HoodieRollbackStat stat1, HoodieRollbackStat stat2) {
-    ValidationUtils.checkArgument(stat1.getPartitionPath().equals(stat2.getPartitionPath()));
+    checkArgument(stat1.getPartitionPath().equals(stat2.getPartitionPath()));
     final List<String> successDeleteFiles = new ArrayList<>();
     final List<String> failedDeleteFiles = new ArrayList<>();
     final Map<FileStatus, Long> commandBlocksCount = new HashMap<>();
@@ -99,9 +99,7 @@ static HoodieRollbackStat mergeRollbackStat(HoodieRollbackStat stat1, HoodieRoll
     Option.ofNullable(stat2.getFailedDeleteFiles()).ifPresent(failedDeleteFiles::addAll);
     Option.ofNullable(stat1.getCommandBlocksCount()).ifPresent(commandBlocksCount::putAll);
     Option.ofNullable(stat2.getCommandBlocksCount()).ifPresent(commandBlocksCount::putAll);
-    Option.ofNullable(stat1.getWrittenLogFileSizeMap()).ifPresent(writtenLogFileSizeMap::putAll);
-    Option.ofNullable(stat2.getWrittenLogFileSizeMap()).ifPresent(writtenLogFileSizeMap::putAll);
-    return new HoodieRollbackStat(stat1.getPartitionPath(), successDeleteFiles, failedDeleteFiles, commandBlocksCount, writtenLogFileSizeMap);
+    return new HoodieRollbackStat(stat1.getPartitionPath(), successDeleteFiles, failedDeleteFiles, commandBlocksCount);
   }
 
   /**
@@ -191,28 +189,22 @@ public static List<ListingBasedRollbackRequest> generateRollbackRequestsUsingFil
           // (B.3) Rollback triggered for first commit - Same as (B.1)
           // (B.4) Rollback triggered for recurring commits - Same as (B.2) plus we need to delete the log files
           // as well if the base base file gets deleted.
-          try {
-            HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(
-                table.getMetaClient().getCommitTimeline()
-                    .getInstantDetails(new HoodieInstant(true, instantToRollback.getAction(), instantToRollback.getTimestamp()))
-                    .get(),
-                HoodieCommitMetadata.class);
+          HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(
+              table.getMetaClient().getCommitTimeline().getInstantDetails(instantToRollback).get(),
+              HoodieCommitMetadata.class);
 
-            // In case all data was inserts and the commit failed, delete the file belonging to that commit
-            // We do not know fileIds for inserts (first inserts are either log files or base files),
-            // delete all files for the corresponding failed commit, if present (same as COW)
-            partitionRollbackRequests.add(
-                ListingBasedRollbackRequest.createRollbackRequestWithDeleteDataAndLogFilesAction(partitionPath));
+          // In case all data was inserts and the commit failed, delete the file belonging to that commit
+          // We do not know fileIds for inserts (first inserts are either log files or base files),
+          // delete all files for the corresponding failed commit, if present (same as COW)
+          partitionRollbackRequests.add(
+              ListingBasedRollbackRequest.createRollbackRequestWithDeleteDataAndLogFilesAction(partitionPath));
 
-            // append rollback blocks for updates
-            if (commitMetadata.getPartitionToWriteStats().containsKey(partitionPath)) {
-              partitionRollbackRequests
-                  .addAll(generateAppendRollbackBlocksAction(partitionPath, instantToRollback, commitMetadata, table));
-            }
-            break;
-          } catch (IOException io) {
-            throw new HoodieIOException("Failed to collect rollback actions for commit " + commit, io);
+          // append rollback blocks for updates and inserts as A.2 and B.2
+          if (commitMetadata.getPartitionToWriteStats().containsKey(partitionPath)) {
+            partitionRollbackRequests
+                .addAll(generateAppendRollbackBlocksAction(partitionPath, instantToRollback, commitMetadata, table));
           }
+          break;
         default:
           break;
       }
@@ -222,7 +214,7 @@ public static List<ListingBasedRollbackRequest> generateRollbackRequestsUsingFil
 
   private static List<ListingBasedRollbackRequest> generateAppendRollbackBlocksAction(String partitionPath, HoodieInstant rollbackInstant,
       HoodieCommitMetadata commitMetadata, HoodieTable table) {
-    ValidationUtils.checkArgument(rollbackInstant.getAction().equals(HoodieTimeline.DELTA_COMMIT_ACTION));
+    checkArgument(rollbackInstant.getAction().equals(HoodieTimeline.DELTA_COMMIT_ACTION));
 
     // wStat.getPrevCommit() might not give the right commit time in the following
     // scenario : If a compaction was scheduled, the new commitTime associated with the requested compaction will be
@@ -230,30 +222,40 @@ private static List<ListingBasedRollbackRequest> generateAppendRollbackBlocksAct
     // But the index (global) might store the baseCommit of the base and not the requested, hence get the
     // baseCommit always by listing the file slice
     // With multi writers, rollbacks could be lazy. and so we need to use getLatestFileSlicesBeforeOrOn() instead of getLatestFileSlices()
-    Map<String, String> fileIdToBaseCommitTimeForLogMap = table.getSliceView().getLatestFileSlicesBeforeOrOn(partitionPath, rollbackInstant.getTimestamp(),
-        true).collect(Collectors.toMap(FileSlice::getFileId, FileSlice::getBaseInstantTime));
+    Map<String, FileSlice> latestFileSlices = table.getSliceView()
+        .getLatestFileSlicesBeforeOrOn(partitionPath, rollbackInstant.getTimestamp(), true)
+        .collect(Collectors.toMap(FileSlice::getFileId, Function.identity()));
+
+    return commitMetadata.getPartitionToWriteStats().get(partitionPath)
+        .stream()
+        .filter(writeStat -> {
+          // Filter out stats without prevCommit since they are all inserts
+          boolean validForRollback = (writeStat != null) && (!writeStat.getPrevCommit().equals(HoodieWriteStat.NULL_COMMIT))
+              && (writeStat.getPrevCommit() != null) && latestFileSlices.containsKey(writeStat.getFileId());
 
-    return commitMetadata.getPartitionToWriteStats().get(partitionPath).stream().filter(wStat -> {
+          if (!validForRollback) {
+            return false;
+          }
 
-      // Filter out stats without prevCommit since they are all inserts
-      boolean validForRollback = (wStat != null) && (!wStat.getPrevCommit().equals(HoodieWriteStat.NULL_COMMIT))
-          && (wStat.getPrevCommit() != null) && fileIdToBaseCommitTimeForLogMap.containsKey(wStat.getFileId());
+          FileSlice latestFileSlice = latestFileSlices.get(writeStat.getFileId());
 
-      if (validForRollback) {
-        // For sanity, log instant time can never be less than base-commit on which we are rolling back
-        ValidationUtils
-            .checkArgument(HoodieTimeline.compareTimestamps(fileIdToBaseCommitTimeForLogMap.get(wStat.getFileId()),
-                HoodieTimeline.LESSER_THAN_OR_EQUALS, rollbackInstant.getTimestamp()));
-      }
+          // For sanity, log-file base-instant time can never be less than base-commit on which we are rolling back
+          checkArgument(
+              HoodieTimeline.compareTimestamps(latestFileSlice.getBaseInstantTime(),
+                  HoodieTimeline.LESSER_THAN_OR_EQUALS, rollbackInstant.getTimestamp()),
+              "Log-file base-instant could not be less than the instant being rolled back");
 
-      return validForRollback && HoodieTimeline.compareTimestamps(fileIdToBaseCommitTimeForLogMap.get(
-          // Base Ts should be strictly less. If equal (for inserts-to-logs), the caller employs another option
-          // to delete and we should not step on it
-          wStat.getFileId()), HoodieTimeline.LESSER_THAN, rollbackInstant.getTimestamp());
-    }).map(wStat -> {
-      String baseCommitTime = fileIdToBaseCommitTimeForLogMap.get(wStat.getFileId());
-      return ListingBasedRollbackRequest.createRollbackRequestWithAppendRollbackBlockAction(partitionPath, wStat.getFileId(),
-          baseCommitTime);
-    }).collect(Collectors.toList());
+          // Command block "rolling back" the preceding block {@link HoodieCommandBlockTypeEnum#ROLLBACK_PREVIOUS_BLOCK}
+          // w/in the latest file-slice is appended iff base-instant of the log-file is _strictly_ less
+          // than the instant of the Delta Commit being rolled back. Otherwise, log-file will be cleaned up
+          // in a different branch of the flow.
+          return HoodieTimeline.compareTimestamps(latestFileSlice.getBaseInstantTime(), HoodieTimeline.LESSER_THAN, rollbackInstant.getTimestamp());
+        })
+        .map(writeStat -> {
+          FileSlice latestFileSlice = latestFileSlices.get(writeStat.getFileId());
+          return ListingBasedRollbackRequest.createRollbackRequestWithAppendRollbackBlockAction(partitionPath,
+              writeStat.getFileId(), latestFileSlice.getBaseInstantTime(), writeStat);
+        })
+        .collect(Collectors.toList());
   }
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/SerializableHoodieRollbackRequest.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/SerializableHoodieRollbackRequest.java
index acd1c50badbc7..8f19692ed7c72 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/SerializableHoodieRollbackRequest.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/SerializableHoodieRollbackRequest.java
@@ -20,6 +20,7 @@
 
 import org.apache.hudi.avro.model.HoodieRollbackRequest;
 
+import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@@ -29,7 +30,7 @@
  * HoodieRollbackRequest in HoodieRollbackPlan (avro pojo) is not operable direclty within spark parallel engine.
  * Hence converting the same to this {@link SerializableHoodieRollbackRequest} and then using it within spark.parallelize.
  */
-public class SerializableHoodieRollbackRequest {
+public class SerializableHoodieRollbackRequest implements Serializable {
 
   private final String partitionPath;
   private final String fileId;
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/DowngradeHandler.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/DowngradeHandler.java
index 24b9d6f5da6f4..45bbd78c3fb36 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/DowngradeHandler.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/DowngradeHandler.java
@@ -35,10 +35,10 @@ public interface DowngradeHandler {
    * @param config                 instance of {@link HoodieWriteConfig} to be used.
    * @param context                instance of {@link HoodieEngineContext} to be used.
    * @param instantTime            current instant time that should not touched.
-   * @param upgradeDowngradeHelper instance of {@link BaseUpgradeDowngradeHelper} to be used.
+   * @param upgradeDowngradeHelper instance of {@link SupportsUpgradeDowngrade} to be used.
    * @return Map of config properties and its values to be added to table properties.
    */
   Map<ConfigProperty, String> downgrade(
       HoodieWriteConfig config, HoodieEngineContext context, String instantTime,
-      BaseUpgradeDowngradeHelper upgradeDowngradeHelper);
+      SupportsUpgradeDowngrade upgradeDowngradeHelper);
 }
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/cluster/JavaClusteringPlanActionExecutor.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/FourToThreeDowngradeHandler.java
similarity index 52%
rename from hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/cluster/JavaClusteringPlanActionExecutor.java
rename to hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/FourToThreeDowngradeHandler.java
index 1d78ecc2bf41c..17dc01d0213e7 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/cluster/JavaClusteringPlanActionExecutor.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/FourToThreeDowngradeHandler.java
@@ -17,27 +17,22 @@
  * under the License.
  */
 
-package org.apache.hudi.table.action.cluster;
+package org.apache.hudi.table.upgrade;
 
-import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.common.config.ConfigProperty;
 import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieKey;
-import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
-import org.apache.hudi.common.util.Option;
 import org.apache.hudi.config.HoodieWriteConfig;
-import org.apache.hudi.table.HoodieTable;
 
-import java.util.List;
+import java.util.Collections;
 import java.util.Map;
 
-public class JavaClusteringPlanActionExecutor<T extends HoodieRecordPayload> extends
-    BaseClusteringPlanActionExecutor<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> {
+/**
+ * DowngradeHandler to assist in downgrading {@link org.apache.hudi.table.HoodieTable} from version 4 to 3.
+ */
+public class FourToThreeDowngradeHandler implements DowngradeHandler {
 
-  public JavaClusteringPlanActionExecutor(
-      HoodieEngineContext context, HoodieWriteConfig config,
-      HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> table,
-      String instantTime, Option<Map<String, String>> extraMetadata) {
-    super(context, config, table, instantTime, extraMetadata);
+  @Override
+  public Map<ConfigProperty, String> downgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime, SupportsUpgradeDowngrade upgradeDowngradeHelper) {
+    return Collections.emptyMap();
   }
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/OneToTwoUpgradeHandler.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/OneToTwoUpgradeHandler.java
index efa0fe472c52c..dbf4d6159dcbd 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/OneToTwoUpgradeHandler.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/OneToTwoUpgradeHandler.java
@@ -35,7 +35,7 @@ public class OneToTwoUpgradeHandler implements UpgradeHandler {
   @Override
   public Map<ConfigProperty, String> upgrade(
       HoodieWriteConfig config, HoodieEngineContext context, String instantTime,
-      BaseUpgradeDowngradeHelper upgradeDowngradeHelper) {
+      SupportsUpgradeDowngrade upgradeDowngradeHelper) {
     Map<ConfigProperty, String> tablePropsToAdd = new Hashtable<>();
     tablePropsToAdd.put(HoodieTableConfig.PARTITION_FIELDS, upgradeDowngradeHelper.getPartitionColumns(config));
     tablePropsToAdd.put(HoodieTableConfig.RECORDKEY_FIELDS, config.getString(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key()));
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/OneToZeroDowngradeHandler.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/OneToZeroDowngradeHandler.java
index e6051cf321b50..14fe8e2b88713 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/OneToZeroDowngradeHandler.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/OneToZeroDowngradeHandler.java
@@ -40,7 +40,7 @@ public class OneToZeroDowngradeHandler implements DowngradeHandler {
   @Override
   public Map<ConfigProperty, String> downgrade(
       HoodieWriteConfig config, HoodieEngineContext context, String instantTime,
-      BaseUpgradeDowngradeHelper upgradeDowngradeHelper) {
+      SupportsUpgradeDowngrade upgradeDowngradeHelper) {
     HoodieTable table = upgradeDowngradeHelper.getTable(config, context);
     // fetch pending commit info
     HoodieTimeline inflightTimeline = table.getMetaClient().getCommitsTimeline().filterPendingExcludingCompaction();
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/BaseUpgradeDowngradeHelper.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/SupportsUpgradeDowngrade.java
similarity index 97%
rename from hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/BaseUpgradeDowngradeHelper.java
rename to hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/SupportsUpgradeDowngrade.java
index d3f157be954da..5e6b9db913fa5 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/BaseUpgradeDowngradeHelper.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/SupportsUpgradeDowngrade.java
@@ -26,7 +26,7 @@
 /**
  * Interface for engine-specific logic needed for upgrade and downgrade actions.
  */
-public interface BaseUpgradeDowngradeHelper {
+public interface SupportsUpgradeDowngrade {
   /**
    * @param config  Write config.
    * @param context {@link HoodieEngineContext} instance to use.
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/ThreeToFourUpgradeHandler.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/ThreeToFourUpgradeHandler.java
new file mode 100644
index 0000000000000..72e96bb4103bc
--- /dev/null
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/ThreeToFourUpgradeHandler.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.table.upgrade;
+
+import org.apache.hudi.common.config.ConfigProperty;
+import org.apache.hudi.common.engine.HoodieEngineContext;
+import org.apache.hudi.common.table.HoodieTableConfig;
+import org.apache.hudi.config.HoodieWriteConfig;
+
+import java.util.Hashtable;
+import java.util.Map;
+
+/**
+ * UpgradeHandler to assist in upgrading {@link org.apache.hudi.table.HoodieTable} from version 3 to 4.
+ */
+public class ThreeToFourUpgradeHandler implements UpgradeHandler {
+
+  @Override
+  public Map<ConfigProperty, String> upgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime, SupportsUpgradeDowngrade upgradeDowngradeHelper) {
+    Map<ConfigProperty, String> tablePropsToAdd = new Hashtable<>();
+    tablePropsToAdd.put(HoodieTableConfig.TABLE_CHECKSUM, String.valueOf(HoodieTableConfig.generateChecksum(config.getProps())));
+    return tablePropsToAdd;
+  }
+}
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/ThreeToTwoDowngradeHandler.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/ThreeToTwoDowngradeHandler.java
index 964859c0ae07d..4f209f05ffc9b 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/ThreeToTwoDowngradeHandler.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/ThreeToTwoDowngradeHandler.java
@@ -33,7 +33,7 @@
 public class ThreeToTwoDowngradeHandler implements DowngradeHandler {
 
   @Override
-  public Map<ConfigProperty, String> downgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime, BaseUpgradeDowngradeHelper upgradeDowngradeHelper) {
+  public Map<ConfigProperty, String> downgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime, SupportsUpgradeDowngrade upgradeDowngradeHelper) {
     if (config.isMetadataTableEnabled()) {
       // Metadata Table in version 3 is synchronous and in version 2 is asynchronous. Downgrading to asynchronous
       // removes the checks in code to decide whether to use a LogBlock or not. Also, the schema for the
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/TwoToOneDowngradeHandler.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/TwoToOneDowngradeHandler.java
index ee638a16f8633..de1a1067fe111 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/TwoToOneDowngradeHandler.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/TwoToOneDowngradeHandler.java
@@ -54,7 +54,7 @@ public class TwoToOneDowngradeHandler implements DowngradeHandler {
   @Override
   public Map<ConfigProperty, String> downgrade(
       HoodieWriteConfig config, HoodieEngineContext context, String instantTime,
-      BaseUpgradeDowngradeHelper upgradeDowngradeHelper) {
+      SupportsUpgradeDowngrade upgradeDowngradeHelper) {
     HoodieTable table = upgradeDowngradeHelper.getTable(config, context);
     HoodieTableMetaClient metaClient = table.getMetaClient();
 
@@ -115,9 +115,11 @@ private void convertToDirectMarkers(final String commitInstantTime,
               + "\" is not supported for rollback.");
       }
     } else {
-      // In case of partial failures during downgrade, there is a chance that marker type file was deleted,
-      // but timeline server based marker files are left.  So deletes them if any
-      deleteTimelineBasedMarkerFiles(context, markerDir, fileSystem, parallelism);
+      if (fileSystem.exists(new Path(markerDir))) {
+        // In case of partial failures during downgrade, there is a chance that marker type file was deleted,
+        // but timeline server based marker files are left.  So deletes them if any
+        deleteTimelineBasedMarkerFiles(context, markerDir, fileSystem, parallelism);
+      }
     }
   }
 
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/TwoToThreeUpgradeHandler.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/TwoToThreeUpgradeHandler.java
index bff3788d56cfe..c13d21ec201a0 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/TwoToThreeUpgradeHandler.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/TwoToThreeUpgradeHandler.java
@@ -35,7 +35,7 @@
  */
 public class TwoToThreeUpgradeHandler implements UpgradeHandler {
   @Override
-  public Map<ConfigProperty, String> upgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime, BaseUpgradeDowngradeHelper upgradeDowngradeHelper) {
+  public Map<ConfigProperty, String> upgrade(HoodieWriteConfig config, HoodieEngineContext context, String instantTime, SupportsUpgradeDowngrade upgradeDowngradeHelper) {
     if (config.isMetadataTableEnabled()) {
       // Metadata Table in version 2 is asynchronous and in version 3 is synchronous. Synchronous table will not
       // sync any instants not already synced. So its simpler to re-bootstrap the table. Also, the schema for the
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/UpgradeDowngrade.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/UpgradeDowngrade.java
index 0e8f752a8f682..1a75ff51cabd4 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/UpgradeDowngrade.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/UpgradeDowngrade.java
@@ -42,7 +42,7 @@ public class UpgradeDowngrade {
   private static final Logger LOG = LogManager.getLogger(UpgradeDowngrade.class);
   public static final String HOODIE_UPDATED_PROPERTY_FILE = "hoodie.properties.updated";
 
-  private final BaseUpgradeDowngradeHelper upgradeDowngradeHelper;
+  private final SupportsUpgradeDowngrade upgradeDowngradeHelper;
   private HoodieTableMetaClient metaClient;
   protected HoodieWriteConfig config;
   protected HoodieEngineContext context;
@@ -52,7 +52,7 @@ public class UpgradeDowngrade {
 
   public UpgradeDowngrade(
       HoodieTableMetaClient metaClient, HoodieWriteConfig config, HoodieEngineContext context,
-      BaseUpgradeDowngradeHelper upgradeDowngradeHelper) {
+      SupportsUpgradeDowngrade upgradeDowngradeHelper) {
     this.metaClient = metaClient;
     this.config = config;
     this.context = context;
@@ -143,6 +143,8 @@ protected Map<ConfigProperty, String> upgrade(HoodieTableVersion fromVersion, Ho
       return new OneToTwoUpgradeHandler().upgrade(config, context, instantTime, upgradeDowngradeHelper);
     } else if (fromVersion == HoodieTableVersion.TWO && toVersion == HoodieTableVersion.THREE) {
       return new TwoToThreeUpgradeHandler().upgrade(config, context, instantTime, upgradeDowngradeHelper);
+    } else if (fromVersion == HoodieTableVersion.THREE && toVersion == HoodieTableVersion.FOUR) {
+      return new ThreeToFourUpgradeHandler().upgrade(config, context, instantTime, upgradeDowngradeHelper);
     } else {
       throw new HoodieUpgradeDowngradeException(fromVersion.versionCode(), toVersion.versionCode(), true);
     }
@@ -155,6 +157,8 @@ protected Map<ConfigProperty, String> downgrade(HoodieTableVersion fromVersion,
       return new TwoToOneDowngradeHandler().downgrade(config, context, instantTime, upgradeDowngradeHelper);
     } else if (fromVersion == HoodieTableVersion.THREE && toVersion == HoodieTableVersion.TWO) {
       return new ThreeToTwoDowngradeHandler().downgrade(config, context, instantTime, upgradeDowngradeHelper);
+    } else if (fromVersion == HoodieTableVersion.FOUR && toVersion == HoodieTableVersion.THREE) {
+      return new FourToThreeDowngradeHandler().downgrade(config, context, instantTime, upgradeDowngradeHelper);
     } else {
       throw new HoodieUpgradeDowngradeException(fromVersion.versionCode(), toVersion.versionCode(), false);
     }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/UpgradeHandler.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/UpgradeHandler.java
index 9dc477ffc9dc6..147aa4d8ab2dd 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/UpgradeHandler.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/UpgradeHandler.java
@@ -35,10 +35,10 @@ public interface UpgradeHandler {
    * @param config                 instance of {@link HoodieWriteConfig} to be used.
    * @param context                instance of {@link HoodieEngineContext} to be used.
    * @param instantTime            current instant time that should not be touched.
-   * @param upgradeDowngradeHelper instance of {@link BaseUpgradeDowngradeHelper} to be used.
+   * @param upgradeDowngradeHelper instance of {@link SupportsUpgradeDowngrade} to be used.
    * @return Map of config properties and its values to be added to table properties.
    */
   Map<ConfigProperty, String> upgrade(
       HoodieWriteConfig config, HoodieEngineContext context, String instantTime,
-      BaseUpgradeDowngradeHelper upgradeDowngradeHelper);
+      SupportsUpgradeDowngrade upgradeDowngradeHelper);
 }
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/ZeroToOneUpgradeHandler.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/ZeroToOneUpgradeHandler.java
index 1aebbf6b4c42d..6a114154c8778 100644
--- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/ZeroToOneUpgradeHandler.java
+++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/ZeroToOneUpgradeHandler.java
@@ -57,7 +57,7 @@ public class ZeroToOneUpgradeHandler implements UpgradeHandler {
   @Override
   public Map<ConfigProperty, String> upgrade(
       HoodieWriteConfig config, HoodieEngineContext context, String instantTime,
-      BaseUpgradeDowngradeHelper upgradeDowngradeHelper) {
+      SupportsUpgradeDowngrade upgradeDowngradeHelper) {
     // fetch pending commit info
     HoodieTable table = upgradeDowngradeHelper.getTable(config, context);
     HoodieTimeline inflightTimeline = table.getMetaClient().getCommitsTimeline().filterPendingExcludingCompaction();
diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/async/TestAsyncArchiveService.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/async/TestAsyncArchiveService.java
new file mode 100644
index 0000000000000..9dad8b8020a1f
--- /dev/null
+++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/async/TestAsyncArchiveService.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.async;
+
+import org.apache.hudi.client.BaseHoodieWriteClient;
+import org.apache.hudi.config.HoodieWriteConfig;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.mockito.Mock;
+import org.mockito.junit.jupiter.MockitoExtension;
+
+import java.util.concurrent.ExecutionException;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+@ExtendWith(MockitoExtension.class)
+class TestAsyncArchiveService {
+
+  @Mock
+  BaseHoodieWriteClient writeClient;
+  @Mock
+  HoodieWriteConfig config;
+
+  @Test
+  void startAsyncArchiveReturnsNullWhenAutoArchiveDisabled() {
+    when(config.isAutoArchive()).thenReturn(false);
+    when(writeClient.getConfig()).thenReturn(config);
+    assertNull(AsyncArchiveService.startAsyncArchiveIfEnabled(writeClient));
+  }
+
+  @Test
+  void startAsyncArchiveReturnsNullWhenAsyncArchiveDisabled() {
+    when(config.isAutoArchive()).thenReturn(true);
+    when(config.isAsyncArchive()).thenReturn(false);
+    when(writeClient.getConfig()).thenReturn(config);
+    assertNull(AsyncArchiveService.startAsyncArchiveIfEnabled(writeClient));
+  }
+
+  @Test
+  void startAsyncArchiveIfEnabled() {
+    when(config.isAutoArchive()).thenReturn(true);
+    when(config.isAsyncArchive()).thenReturn(true);
+    when(writeClient.getConfig()).thenReturn(config);
+    assertNotNull(AsyncArchiveService.startAsyncArchiveIfEnabled(writeClient));
+  }
+
+  @Test
+  void startServiceShouldInvokeCallArchiveMethod() throws ExecutionException, InterruptedException {
+    AsyncArchiveService service = new AsyncArchiveService(writeClient);
+    assertEquals(true, service.startService().getLeft().get());
+    verify(writeClient).archive();
+  }
+}
diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/async/TestHoodieAsyncTableService.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/async/TestHoodieAsyncTableService.java
new file mode 100644
index 0000000000000..0c19576d042bf
--- /dev/null
+++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/async/TestHoodieAsyncTableService.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.async;
+
+import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.config.HoodieWriteConfig;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.mockito.Mock;
+import org.mockito.junit.jupiter.MockitoExtension;
+
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ExecutorService;
+
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.mockito.Mockito.when;
+
+@ExtendWith(MockitoExtension.class)
+class TestHoodieAsyncTableService {
+
+  @Test
+  void tableServiceShouldNotStartIfDisabled(@Mock HoodieWriteConfig config) {
+    when(config.areTableServicesEnabled()).thenReturn(false);
+    HoodieAsyncTableService service = new DummyAsyncTableService(config);
+    service.start(null);
+    assertFalse(service.isStarted());
+  }
+
+  private static class DummyAsyncTableService extends HoodieAsyncTableService {
+
+    protected DummyAsyncTableService(HoodieWriteConfig writeConfig) {
+      super(writeConfig);
+    }
+
+    @Override
+    protected Pair<CompletableFuture, ExecutorService> startService() {
+      return null;
+    }
+  }
+}
diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/TestInProcessLockProvider.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/TestInProcessLockProvider.java
index 9e7472c13db98..6c245787449d9 100644
--- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/TestInProcessLockProvider.java
+++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/TestInProcessLockProvider.java
@@ -238,7 +238,7 @@ public void testRedundantUnlock() {
     assertDoesNotThrow(() -> {
       inProcessLockProvider.unlock();
     });
-    assertThrows(HoodieLockException.class, () -> {
+    assertDoesNotThrow(() -> {
       inProcessLockProvider.unlock();
     });
   }
@@ -246,7 +246,7 @@ public void testRedundantUnlock() {
   @Test
   public void testUnlockWithoutLock() {
     InProcessLockProvider inProcessLockProvider = new InProcessLockProvider(lockConfiguration, hadoopConfiguration);
-    assertThrows(HoodieLockException.class, () -> {
+    assertDoesNotThrow(() -> {
       inProcessLockProvider.unlock();
     });
   }
diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/TestTransactionManager.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/TestTransactionManager.java
index a1a7f6a3122d0..22f8017841a83 100644
--- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/TestTransactionManager.java
+++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/TestTransactionManager.java
@@ -81,7 +81,7 @@ public void testSingleWriterNestedTransaction() {
     });
 
     transactionManager.endTransaction();
-    assertThrows(HoodieLockException.class, () -> {
+    assertDoesNotThrow(() -> {
       transactionManager.endTransaction();
     });
   }
diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieWriteConfig.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieWriteConfig.java
index c86a34a609f1b..2c3ae98c6e6be 100644
--- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieWriteConfig.java
+++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/config/TestHoodieWriteConfig.java
@@ -82,7 +82,7 @@ public void testPropertyLoading(boolean withAlternative) throws IOException {
   public void testDefaultIndexAccordingToEngineType() {
     testEngineSpecificConfig(HoodieWriteConfig::getIndexType,
         constructConfigMap(
-            EngineType.SPARK, HoodieIndex.IndexType.BLOOM,
+            EngineType.SPARK, HoodieIndex.IndexType.SIMPLE,
             EngineType.FLINK, HoodieIndex.IndexType.INMEMORY,
             EngineType.JAVA, HoodieIndex.IndexType.INMEMORY));
   }
@@ -167,7 +167,7 @@ public void testDefaultLockProviderWhenAsyncServicesEnabled() {
       }
     });
     assertFalse(writeConfig.areAnyTableServicesAsync());
-    assertTrue(writeConfig.areAnyTableServicesInline());
+    assertTrue(writeConfig.areAnyTableServicesExecutedInline());
     assertEquals(HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.defaultValue(), writeConfig.getLockProviderClass());
 
     // 5. User override for the lock provider should always take the precedence
diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java
index 86a0886de664d..fd25d92cba62e 100644
--- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java
+++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java
@@ -23,6 +23,7 @@
 import org.apache.hudi.common.bloom.BloomFilterTypeCode;
 import org.apache.hudi.common.engine.TaskContextSupplier;
 import org.apache.hudi.common.model.EmptyHoodieRecordPayload;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 
@@ -103,7 +104,7 @@ private HoodieHFileWriter createHFileWriter(Schema avroSchema, boolean populateM
     String instantTime = "000";
 
     HoodieHFileConfig hoodieHFileConfig = new HoodieHFileConfig(conf, Compression.Algorithm.GZ, 1024 * 1024, 120 * 1024 * 1024,
-        PREFETCH_ON_OPEN, CACHE_DATA_IN_L1, DROP_BEHIND_CACHE_COMPACTION, filter, HFILE_COMPARATOR);
+        HoodieHFileReader.KEY_FIELD_NAME, PREFETCH_ON_OPEN, CACHE_DATA_IN_L1, DROP_BEHIND_CACHE_COMPACTION, filter, HFILE_COMPARATOR);
     return new HoodieHFileWriter(instantTime, filePath, hoodieHFileConfig, avroSchema, mockTaskContextSupplier, populateMetaFields);
   }
 
@@ -122,7 +123,7 @@ public void testWriteReadHFile(boolean populateMetaFields, boolean testAvroWithM
       record.put("time", Integer.toString(RANDOM.nextInt()));
       record.put("number", i);
       if (testAvroWithMeta) {
-        writer.writeAvroWithMetadata(record, new HoodieRecord(new HoodieKey((String) record.get("_row_key"),
+        writer.writeAvroWithMetadata(record, new HoodieAvroRecord(new HoodieKey((String) record.get("_row_key"),
             Integer.toString((Integer) record.get("number"))), new EmptyHoodieRecordPayload())); // payload does not matter. GenericRecord passed in is what matters
         // only HoodieKey will be looked up from the 2nd arg(HoodieRecord).
       } else {
@@ -170,4 +171,4 @@ private Set<String> getRandomKeys(int count, List<String> keys) {
     }
     return rowKeys;
   }
-}
\ No newline at end of file
+}
diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metrics/TestMetricsReporterFactory.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metrics/TestMetricsReporterFactory.java
index edd2302a6ecc7..3689755e4447d 100644
--- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metrics/TestMetricsReporterFactory.java
+++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/metrics/TestMetricsReporterFactory.java
@@ -21,10 +21,10 @@
 
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.metrics.custom.CustomizableMetricsReporter;
 
 import com.codahale.metrics.MetricRegistry;
-import org.apache.hudi.exception.HoodieException;
-import org.apache.hudi.metrics.userdefined.AbstractUserDefinedMetricsReporter;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.extension.ExtendWith;
 import org.mockito.Mock;
@@ -63,7 +63,7 @@ public void metricsReporterFactoryShouldReturnUserDefinedReporter() {
 
     when(config.getProps()).thenReturn(props);
     MetricsReporter reporter = MetricsReporterFactory.createReporter(config, registry);
-    assertTrue(reporter instanceof AbstractUserDefinedMetricsReporter);
+    assertTrue(reporter instanceof CustomizableMetricsReporter);
     assertEquals(props, ((DummyMetricsReporter) reporter).getProps());
     assertEquals(registry, ((DummyMetricsReporter) reporter).getRegistry());
   }
@@ -75,7 +75,7 @@ public void metricsReporterFactoryShouldThrowExceptionWhenMetricsReporterClassIs
     assertThrows(HoodieException.class, () -> MetricsReporterFactory.createReporter(config, registry));
   }
 
-  public static class DummyMetricsReporter extends AbstractUserDefinedMetricsReporter {
+  public static class DummyMetricsReporter extends CustomizableMetricsReporter {
 
     public DummyMetricsReporter(Properties props, MetricRegistry registry) {
       super(props, registry);
diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/testutils/HoodieWriteableTestTable.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/testutils/HoodieWriteableTestTable.java
index 32fd200145e9b..3488a1365ce88 100644
--- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/testutils/HoodieWriteableTestTable.java
+++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/testutils/HoodieWriteableTestTable.java
@@ -27,6 +27,7 @@
 import org.apache.hudi.common.model.HoodieLogFile;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordLocation;
+import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.log.HoodieLogFormat;
@@ -41,6 +42,7 @@
 import org.apache.hudi.io.storage.HoodieOrcConfig;
 import org.apache.hudi.io.storage.HoodieOrcWriter;
 import org.apache.hudi.io.storage.HoodieParquetWriter;
+import org.apache.hudi.metadata.HoodieTableMetadataWriter;
 
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericRecord;
@@ -48,7 +50,6 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hudi.metadata.HoodieTableMetadataWriter;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.orc.CompressionKind;
@@ -118,7 +119,7 @@ public HoodieWriteableTestTable withInserts(String partition, String fileId, Lis
           config, schema, contextSupplier, populateMetaFields)) {
         int seqId = 1;
         for (HoodieRecord record : records) {
-          GenericRecord avroRecord = (GenericRecord) record.getData().getInsertValue(schema).get();
+          GenericRecord avroRecord = (GenericRecord) ((HoodieRecordPayload) record.getData()).getInsertValue(schema).get();
           if (populateMetaFields) {
             HoodieAvroUtils.addCommitMetadataToRecord(avroRecord, currentInstantTime, String.valueOf(seqId++));
             HoodieAvroUtils.addHoodieKeyToRecord(avroRecord, record.getRecordKey(), record.getPartitionPath(), fileName);
@@ -141,7 +142,7 @@ public HoodieWriteableTestTable withInserts(String partition, String fileId, Lis
           config, schema, contextSupplier)) {
         int seqId = 1;
         for (HoodieRecord record : records) {
-          GenericRecord avroRecord = (GenericRecord) record.getData().getInsertValue(schema).get();
+          GenericRecord avroRecord = (GenericRecord) ((HoodieRecordPayload) record.getData()).getInsertValue(schema).get();
           HoodieAvroUtils.addCommitMetadataToRecord(avroRecord, currentInstantTime, String.valueOf(seqId++));
           HoodieAvroUtils.addHoodieKeyToRecord(avroRecord, record.getRecordKey(), record.getPartitionPath(), fileName);
           writer.writeAvro(record.getRecordKey(), avroRecord);
@@ -175,14 +176,14 @@ private Pair<String, HoodieLogFile> appendRecordsToLogFile(List<HoodieRecord> gr
       header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
       logWriter.appendBlock(new HoodieAvroDataBlock(groupedRecords.stream().map(r -> {
         try {
-          GenericRecord val = (GenericRecord) r.getData().getInsertValue(schema).get();
+          GenericRecord val = (GenericRecord) ((HoodieRecordPayload) r.getData()).getInsertValue(schema).get();
           HoodieAvroUtils.addHoodieKeyToRecord(val, r.getRecordKey(), r.getPartitionPath(), "");
           return (IndexedRecord) val;
         } catch (IOException e) {
           LOG.warn("Failed to convert record " + r.toString(), e);
           return null;
         }
-      }).collect(Collectors.toList()), header));
+      }).collect(Collectors.toList()), header, HoodieRecord.RECORD_KEY_METADATA_FIELD));
       return Pair.of(partitionPath, logWriter.getLogFile());
     }
   }
diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/testutils/providers/HoodieWriteClientProvider.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/testutils/providers/HoodieWriteClientProvider.java
index 9bc559deb5ba4..f67e158c8395d 100644
--- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/testutils/providers/HoodieWriteClientProvider.java
+++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/testutils/providers/HoodieWriteClientProvider.java
@@ -19,12 +19,12 @@
 
 package org.apache.hudi.testutils.providers;
 
-import org.apache.hudi.client.AbstractHoodieWriteClient;
+import org.apache.hudi.client.BaseHoodieWriteClient;
 import org.apache.hudi.config.HoodieWriteConfig;
 
 import java.io.IOException;
 
 public interface HoodieWriteClientProvider {
 
-  AbstractHoodieWriteClient getHoodieWriteClient(HoodieWriteConfig cfg) throws IOException;
+  BaseHoodieWriteClient getHoodieWriteClient(HoodieWriteConfig cfg) throws IOException;
 }
diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/utils/TestMetadataConversionUtils.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/utils/TestMetadataConversionUtils.java
index c0952bc5a7204..415c12a6407c6 100644
--- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/utils/TestMetadataConversionUtils.java
+++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/utils/TestMetadataConversionUtils.java
@@ -188,7 +188,6 @@ private void createRollbackMetadata(String instantTime) throws Exception {
     rollbackPartitionMetadata.setPartitionPath("p1");
     rollbackPartitionMetadata.setSuccessDeleteFiles(Arrays.asList("f1"));
     rollbackPartitionMetadata.setFailedDeleteFiles(new ArrayList<>());
-    rollbackPartitionMetadata.setWrittenLogFiles(new HashMap<>());
     rollbackPartitionMetadata.setRollbackLogFiles(new HashMap<>());
     Map<String, HoodieRollbackPartitionMetadata> partitionMetadataMap = new HashMap<>();
     partitionMetadataMap.put("p1", rollbackPartitionMetadata);
diff --git a/hudi-client/hudi-flink-client/pom.xml b/hudi-client/hudi-flink-client/pom.xml
index b2cc6949bb08b..b6f1f3d372d28 100644
--- a/hudi-client/hudi-flink-client/pom.xml
+++ b/hudi-client/hudi-flink-client/pom.xml
@@ -60,7 +60,7 @@
     </dependency>
     <dependency>
       <groupId>org.apache.flink</groupId>
-      <artifactId>flink-table-runtime-blink_${scala.binary.version}</artifactId>
+      <artifactId>flink-table-runtime_${scala.binary.version}</artifactId>
       <version>${flink.version}</version>
       <scope>provided</scope>
     </dependency>
@@ -159,7 +159,7 @@
     </dependency>
     <dependency>
       <groupId>org.apache.flink</groupId>
-      <artifactId>flink-runtime_${scala.binary.version}</artifactId>
+      <artifactId>flink-runtime</artifactId>
       <version>${flink.version}</version>
       <scope>test</scope>
       <classifier>tests</classifier>
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/HoodieFlinkWriteClient.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/HoodieFlinkWriteClient.java
index 4108ba425e8ca..1f5d14af744fb 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/HoodieFlinkWriteClient.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/client/HoodieFlinkWriteClient.java
@@ -18,6 +18,7 @@
 
 package org.apache.hudi.client;
 
+import org.apache.hudi.async.AsyncCleanerService;
 import org.apache.hudi.client.common.HoodieFlinkEngineContext;
 import org.apache.hudi.common.data.HoodieList;
 import org.apache.hudi.common.engine.HoodieEngineContext;
@@ -39,7 +40,6 @@
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieCommitException;
-import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.HoodieNotSupportedException;
 import org.apache.hudi.index.FlinkHoodieIndexFactory;
 import org.apache.hudi.index.HoodieIndex;
@@ -68,7 +68,6 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.IOException;
 import java.text.ParseException;
 import java.util.HashMap;
 import java.util.Iterator;
@@ -78,7 +77,7 @@
 
 @SuppressWarnings("checkstyle:LineLength")
 public class HoodieFlinkWriteClient<T extends HoodieRecordPayload> extends
-    AbstractHoodieWriteClient<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> {
+    BaseHoodieWriteClient<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> {
 
   private static final Logger LOG = LoggerFactory.getLogger(HoodieFlinkWriteClient.class);
 
@@ -334,10 +333,7 @@ protected void postCommit(HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>,
       // Delete the marker directory for the instant.
       WriteMarkersFactory.get(config.getMarkersType(), createTable(config, hadoopConf), instantTime)
           .quietDeleteMarkerDir(context, config.getMarkersDeleteParallelism());
-      if (config.isAutoArchive()) {
-        // We cannot have unbounded commit files. Archive commits if we have to archive
-        archive(table);
-      }
+      autoArchiveOnCommit(table);
     } finally {
       this.heartbeatClient.stop(instantTime);
     }
@@ -346,23 +342,20 @@ protected void postCommit(HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>,
   @Override
   public void commitCompaction(
       String compactionInstantTime,
-      List<WriteStatus> writeStatuses,
-      Option<Map<String, String>> extraMetadata) throws IOException {
+      HoodieCommitMetadata metadata,
+      Option<Map<String, String>> extraMetadata) {
     HoodieFlinkTable<T> table = getHoodieTable();
-    HoodieCommitMetadata metadata = CompactHelpers.getInstance().createCompactionMetadata(
-        table, compactionInstantTime, HoodieList.of(writeStatuses), config.getSchema());
     extraMetadata.ifPresent(m -> m.forEach(metadata::addMetadata));
-    completeCompaction(metadata, writeStatuses, table, compactionInstantTime);
+    completeCompaction(metadata, table, compactionInstantTime);
   }
 
   @Override
   public void completeCompaction(
       HoodieCommitMetadata metadata,
-      List<WriteStatus> writeStatuses,
       HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> table,
       String compactionCommitTime) {
     this.context.setJobStatus(this.getClass().getSimpleName(), "Collect compaction write status and commit compaction");
-    List<HoodieWriteStat> writeStats = writeStatuses.stream().map(WriteStatus::getStat).collect(Collectors.toList());
+    List<HoodieWriteStat> writeStats = metadata.getWriteStats();
     final HoodieInstant compactionInstant = new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, compactionCommitTime);
     try {
       this.txnManager.beginTransaction(Option.of(compactionInstant), Option.empty());
@@ -391,16 +384,11 @@ public void completeCompaction(
   }
 
   @Override
-  protected List<WriteStatus> compact(String compactionInstantTime, boolean shouldComplete) {
+  protected HoodieWriteMetadata<List<WriteStatus>> compact(String compactionInstantTime, boolean shouldComplete) {
     // only used for metadata table, the compaction happens in single thread
-    try {
-      List<WriteStatus> writeStatuses =
-          getHoodieTable().compact(context, compactionInstantTime).getWriteStatuses();
-      commitCompaction(compactionInstantTime, writeStatuses, Option.empty());
-      return writeStatuses;
-    } catch (IOException e) {
-      throw new HoodieException("Error while compacting instant: " + compactionInstantTime);
-    }
+    HoodieWriteMetadata<List<WriteStatus>> compactionMetadata = getHoodieTable().compact(context, compactionInstantTime);
+    commitCompaction(compactionInstantTime, compactionMetadata.getCommitMetadata().get(), Option.empty());
+    return compactionMetadata;
   }
 
   @Override
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/index/FlinkHoodieIndex.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/index/FlinkHoodieIndex.java
index 847a2183a156d..66c1b07793ee7 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/index/FlinkHoodieIndex.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/index/FlinkHoodieIndex.java
@@ -25,7 +25,6 @@
 import org.apache.hudi.common.data.HoodieData;
 import org.apache.hudi.common.data.HoodieList;
 import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.config.HoodieWriteConfig;
@@ -33,12 +32,12 @@
 import org.apache.hudi.table.HoodieTable;
 
 import java.util.List;
+import java.util.stream.Collectors;
 
 /**
  * Base flink implementation of {@link HoodieIndex}.
- * @param <T> payload type
  */
-public abstract class FlinkHoodieIndex<T extends HoodieRecordPayload> extends HoodieIndex<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> {
+public abstract class FlinkHoodieIndex<T extends HoodieRecordPayload> extends HoodieIndex<List<HoodieRecord<T>>, List<WriteStatus>> {
   protected FlinkHoodieIndex(HoodieWriteConfig config) {
     super(config);
   }
@@ -48,21 +47,22 @@ protected FlinkHoodieIndex(HoodieWriteConfig config) {
   @PublicAPIMethod(maturity = ApiMaturityLevel.DEPRECATED)
   public abstract List<WriteStatus> updateLocation(List<WriteStatus> writeStatuses,
                                                    HoodieEngineContext context,
-                                                   HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> hoodieTable) throws HoodieIndexException;
+                                                   HoodieTable hoodieTable) throws HoodieIndexException;
 
   @Override
   @Deprecated
   @PublicAPIMethod(maturity = ApiMaturityLevel.DEPRECATED)
   public abstract List<HoodieRecord<T>> tagLocation(List<HoodieRecord<T>> records,
                                                     HoodieEngineContext context,
-                                                    HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> hoodieTable) throws HoodieIndexException;
+                                                    HoodieTable hoodieTable) throws HoodieIndexException;
 
   @Override
   @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
-  public HoodieData<HoodieRecord<T>> tagLocation(
-      HoodieData<HoodieRecord<T>> records, HoodieEngineContext context,
+  public <R> HoodieData<HoodieRecord<R>> tagLocation(
+      HoodieData<HoodieRecord<R>> records, HoodieEngineContext context,
       HoodieTable hoodieTable) throws HoodieIndexException {
-    return HoodieList.of(tagLocation(HoodieList.getList(records), context, hoodieTable));
+    List<HoodieRecord<T>> hoodieRecords = tagLocation(HoodieList.getList(records.map(record -> (HoodieRecord<T>) record)), context, hoodieTable);
+    return HoodieList.of(hoodieRecords.stream().map(r -> (HoodieRecord<R>) r).collect(Collectors.toList()));
   }
 
   @Override
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/index/FlinkHoodieIndexFactory.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/index/FlinkHoodieIndexFactory.java
index a9196ca9a3d20..54110d93506d9 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/index/FlinkHoodieIndexFactory.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/index/FlinkHoodieIndexFactory.java
@@ -46,11 +46,11 @@ public static HoodieIndex createIndex(HoodieFlinkEngineContext context, HoodieWr
     // TODO more indexes to be added
     switch (config.getIndexType()) {
       case INMEMORY:
-        return new FlinkInMemoryStateIndex<>(context, config);
+        return new FlinkInMemoryStateIndex(context, config);
       case BLOOM:
-        return new HoodieBloomIndex<>(config, ListBasedHoodieBloomIndexHelper.getInstance());
+        return new HoodieBloomIndex(config, ListBasedHoodieBloomIndexHelper.getInstance());
       case SIMPLE:
-        return new HoodieSimpleIndex<>(config, Option.empty());
+        return new HoodieSimpleIndex(config, Option.empty());
       default:
         throw new HoodieIndexException("Unsupported index type " + config.getIndexType());
     }
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/index/state/FlinkInMemoryStateIndex.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/index/state/FlinkInMemoryStateIndex.java
index aa779c4252fcd..af9785edbeb0c 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/index/state/FlinkInMemoryStateIndex.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/index/state/FlinkInMemoryStateIndex.java
@@ -22,9 +22,7 @@
 import org.apache.hudi.client.common.HoodieFlinkEngineContext;
 import org.apache.hudi.common.data.HoodieData;
 import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieIndexException;
 import org.apache.hudi.index.HoodieIndex;
@@ -37,11 +35,8 @@
 
 /**
  * Hoodie index implementation backed by flink state.
- *
- * @param <T> type of payload
  */
-public class FlinkInMemoryStateIndex<T extends HoodieRecordPayload<T>>
-    extends HoodieIndex<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> {
+public class FlinkInMemoryStateIndex extends HoodieIndex<List<HoodieRecord>, List<WriteStatus>> {
 
   private static final Logger LOG = LogManager.getLogger(FlinkInMemoryStateIndex.class);
 
@@ -50,8 +45,8 @@ public FlinkInMemoryStateIndex(HoodieFlinkEngineContext context, HoodieWriteConf
   }
 
   @Override
-  public HoodieData<HoodieRecord<T>> tagLocation(
-      HoodieData<HoodieRecord<T>> records, HoodieEngineContext context,
+  public <R> HoodieData<HoodieRecord<R>> tagLocation(
+      HoodieData<HoodieRecord<R>> records, HoodieEngineContext context,
       HoodieTable hoodieTable) throws HoodieIndexException {
     throw new UnsupportedOperationException("No need to tag location for FlinkInMemoryStateIndex");
   }
@@ -88,4 +83,4 @@ public boolean canIndexLogFiles() {
   public boolean isImplicitWithStorage() {
     return true;
   }
-}
\ No newline at end of file
+}
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/metadata/FlinkHoodieBackedTableMetadataWriter.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/metadata/FlinkHoodieBackedTableMetadataWriter.java
index af9fee0688049..275ab4f5e0a33 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/metadata/FlinkHoodieBackedTableMetadataWriter.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/metadata/FlinkHoodieBackedTableMetadataWriter.java
@@ -22,11 +22,10 @@
 import org.apache.hudi.client.HoodieFlinkWriteClient;
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.common.data.HoodieData;
+import org.apache.hudi.common.data.HoodieList;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.metrics.Registry;
-import org.apache.hudi.common.model.FileSlice;
 import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordLocation;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.util.Option;
@@ -41,7 +40,7 @@
 import java.io.IOException;
 import java.util.Collections;
 import java.util.List;
-import java.util.stream.Collectors;
+import java.util.Map;
 
 public class FlinkHoodieBackedTableMetadataWriter extends HoodieBackedTableMetadataWriter {
 
@@ -101,10 +100,12 @@ protected <T extends SpecificRecordBase> void initialize(HoodieEngineContext eng
   }
 
   @Override
-  protected void commit(HoodieData<HoodieRecord> hoodieDataRecords, String partitionName, String instantTime, boolean canTriggerTableService) {
+  protected void commit(String instantTime, Map<MetadataPartitionType, HoodieData<HoodieRecord>> partitionRecordsMap,
+                        boolean canTriggerTableService) {
     ValidationUtils.checkState(enabled, "Metadata table cannot be committed to as it is not enabled");
-    List<HoodieRecord> records = (List<HoodieRecord>) hoodieDataRecords.get();
-    List<HoodieRecord> recordList = prepRecords(records, partitionName, 1);
+    ValidationUtils.checkState(metadataMetaClient != null, "Metadata table is not fully initialized yet.");
+    HoodieData<HoodieRecord> preppedRecords = prepRecords(partitionRecordsMap);
+    List<HoodieRecord> preppedRecordList = HoodieList.getList(preppedRecords);
 
     try (HoodieFlinkWriteClient writeClient = new HoodieFlinkWriteClient(engineContext, metadataWriteConfig)) {
       if (!metadataMetaClient.getActiveTimeline().filterCompletedInstants().containsInstant(instantTime)) {
@@ -119,20 +120,21 @@ protected void commit(HoodieData<HoodieRecord> hoodieDataRecords, String partiti
         // once rollback is complete, compaction will be retried again, which will eventually hit this code block where the respective commit is
         // already part of completed commit. So, we have to manually remove the completed instant and proceed.
         // and it is for the same reason we enabled withAllowMultiWriteOnSameInstant for metadata table.
-        HoodieInstant alreadyCompletedInstant = metadataMetaClient.getActiveTimeline().filterCompletedInstants().filter(entry -> entry.getTimestamp().equals(instantTime)).lastInstant().get();
+        HoodieInstant alreadyCompletedInstant =
+            metadataMetaClient.getActiveTimeline().filterCompletedInstants().filter(entry -> entry.getTimestamp().equals(instantTime)).lastInstant().get();
         HoodieActiveTimeline.deleteInstantFile(metadataMetaClient.getFs(), metadataMetaClient.getMetaPath(), alreadyCompletedInstant);
         metadataMetaClient.reloadActiveTimeline();
       }
 
-      List<WriteStatus> statuses = records.size() > 0
-          ? writeClient.upsertPreppedRecords(recordList, instantTime)
+      List<WriteStatus> statuses = preppedRecordList.size() > 0
+          ? writeClient.upsertPreppedRecords(preppedRecordList, instantTime)
           : Collections.emptyList();
       statuses.forEach(writeStatus -> {
         if (writeStatus.hasErrors()) {
           throw new HoodieMetadataException("Failed to commit metadata table records at instant " + instantTime);
         }
       });
-      // flink does not support auto-commit yet, also the auto commit logic is not complete as AbstractHoodieWriteClient now.
+      // flink does not support auto-commit yet, also the auto commit logic is not complete as BaseHoodieWriteClient now.
       writeClient.commit(instantTime, statuses, Option.empty(), HoodieActiveTimeline.DELTA_COMMIT_ACTION, Collections.emptyMap());
 
       // reload timeline
@@ -147,21 +149,4 @@ protected void commit(HoodieData<HoodieRecord> hoodieDataRecords, String partiti
     // Update total size of the metadata and count of base/log files
     metrics.ifPresent(m -> m.updateSizeMetrics(metadataMetaClient, metadata));
   }
-
-  /**
-   * Tag each record with the location in the given partition.
-   *
-   * The record is tagged with respective file slice's location based on its record key.
-   */
-  private List<HoodieRecord> prepRecords(List<HoodieRecord> records, String partitionName, int numFileGroups) {
-    List<FileSlice> fileSlices = HoodieTableMetadataUtil.getPartitionLatestFileSlices(metadataMetaClient, partitionName);
-    ValidationUtils.checkArgument(fileSlices.size() == numFileGroups, String.format("Invalid number of file groups: found=%d, required=%d", fileSlices.size(), numFileGroups));
-
-    return records.stream().map(r -> {
-      FileSlice slice = fileSlices.get(HoodieTableMetadataUtil.mapRecordKeyToFileGroupIndex(r.getRecordKey(), numFileGroups));
-      final String instantTime = slice.isEmpty() ? "I" : "U";
-      r.setCurrentLocation(new HoodieRecordLocation(instantTime, slice.getFileId()));
-      return r;
-    }).collect(Collectors.toList());
-  }
-}
+}
\ No newline at end of file
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkCopyOnWriteTable.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkCopyOnWriteTable.java
index 9aceffe44f86a..7e41ab150fbf2 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkCopyOnWriteTable.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkCopyOnWriteTable.java
@@ -23,6 +23,7 @@
 import org.apache.hudi.avro.model.HoodieClusteringPlan;
 import org.apache.hudi.avro.model.HoodieCompactionPlan;
 import org.apache.hudi.avro.model.HoodieRestoreMetadata;
+import org.apache.hudi.avro.model.HoodieRestorePlan;
 import org.apache.hudi.avro.model.HoodieRollbackMetadata;
 import org.apache.hudi.avro.model.HoodieRollbackPlan;
 import org.apache.hudi.avro.model.HoodieSavepointMetadata;
@@ -342,6 +343,11 @@ public HoodieSavepointMetadata savepoint(HoodieEngineContext context, String ins
     throw new HoodieNotSupportedException("Savepoint is not supported yet");
   }
 
+  @Override
+  public Option<HoodieRestorePlan> scheduleRestore(HoodieEngineContext context, String restoreInstantTime, String instantToRestore) {
+    throw new HoodieNotSupportedException("Restore is not supported yet");
+  }
+
   @Override
   public HoodieRestoreMetadata restore(HoodieEngineContext context, String restoreInstantTime, String instantToRestore) {
     throw new HoodieNotSupportedException("Savepoint and restore is not supported yet");
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkTable.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkTable.java
index 164b00e2d6ce4..2f08a55c956fb 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkTable.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/HoodieFlinkTable.java
@@ -53,7 +53,8 @@ public static <T extends HoodieRecordPayload> HoodieFlinkTable<T> create(HoodieW
     HoodieTableMetaClient metaClient =
         HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(config.getBasePath())
             .setLoadActiveTimelineOnLoad(true).setConsistencyGuardConfig(config.getConsistencyGuardConfig())
-            .setLayoutVersion(Option.of(new TimelineLayoutVersion(config.getTimelineLayoutVersion()))).build();
+            .setLayoutVersion(Option.of(new TimelineLayoutVersion(config.getTimelineLayoutVersion())))
+            .setFileSystemRetryConfig(config.getFileSystemRetryConfig()).build();
     return HoodieFlinkTable.create(config, context, metaClient);
   }
 
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/BaseFlinkCommitActionExecutor.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/BaseFlinkCommitActionExecutor.java
index 5dfa511a8823f..51138cd29daa6 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/BaseFlinkCommitActionExecutor.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/BaseFlinkCommitActionExecutor.java
@@ -134,6 +134,12 @@ protected void commit(Option<Map<String, String>> extraMetadata, HoodieWriteMeta
     commit(extraMetadata, result, result.getWriteStatuses().stream().map(WriteStatus::getStat).collect(Collectors.toList()));
   }
 
+  protected void setCommitMetadata(HoodieWriteMetadata<List<WriteStatus>> result) {
+    result.setCommitMetadata(Option.of(CommitUtils.buildMetadata(result.getWriteStatuses().stream().map(WriteStatus::getStat).collect(Collectors.toList()),
+        result.getPartitionToReplaceFileIds(),
+        extraMetadata, operationType, getSchemaToStoreInCommit(), getCommitActionType())));
+  }
+
   protected void commit(Option<Map<String, String>> extraMetadata, HoodieWriteMetadata<List<WriteStatus>> result, List<HoodieWriteStat> writeStats) {
     String actionType = getCommitActionType();
     LOG.info("Committing " + instantTime + ", action Type " + actionType);
@@ -144,8 +150,7 @@ protected void commit(Option<Map<String, String>> extraMetadata, HoodieWriteMeta
     try {
       LOG.info("Committing " + instantTime + ", action Type " + getCommitActionType());
       HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
-      HoodieCommitMetadata metadata = CommitUtils.buildMetadata(writeStats, result.getPartitionToReplaceFileIds(),
-          extraMetadata, operationType, getSchemaToStoreInCommit(), getCommitActionType());
+      HoodieCommitMetadata metadata = result.getCommitMetadata().get();
 
       writeTableMetadata(metadata, actionType);
 
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkDeleteHelper.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkDeleteHelper.java
index 05ac93725bfc9..8dd0c99bae299 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkDeleteHelper.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkDeleteHelper.java
@@ -22,6 +22,7 @@
 import org.apache.hudi.common.data.HoodieList;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.model.EmptyHoodieRecordPayload;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.util.collection.Pair;
@@ -43,7 +44,7 @@
 
 @SuppressWarnings("checkstyle:LineLength")
 public class FlinkDeleteHelper<R> extends
-    AbstractDeleteHelper<EmptyHoodieRecordPayload, List<HoodieRecord<EmptyHoodieRecordPayload>>, List<HoodieKey>, List<WriteStatus>, R> {
+    BaseDeleteHelper<EmptyHoodieRecordPayload, List<HoodieRecord<EmptyHoodieRecordPayload>>, List<HoodieKey>, List<WriteStatus>, R> {
 
   private FlinkDeleteHelper() {
   }
@@ -93,7 +94,7 @@ public HoodieWriteMetadata<List<WriteStatus>> execute(String instantTime,
       }
 
       List<HoodieRecord<EmptyHoodieRecordPayload>> dedupedRecords =
-          dedupedKeys.stream().map(key -> new HoodieRecord<>(key, new EmptyHoodieRecordPayload())).collect(Collectors.toList());
+          dedupedKeys.stream().map(key -> new HoodieAvroRecord<>(key, new EmptyHoodieRecordPayload())).collect(Collectors.toList());
       Instant beginTag = Instant.now();
       // perform index look up to get existing location of records
       List<HoodieRecord<EmptyHoodieRecordPayload>> taggedRecords = HoodieList.getList(
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkMergeHelper.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkMergeHelper.java
index 5ed6d5d529ba3..38d4e60f648ec 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkMergeHelper.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkMergeHelper.java
@@ -45,7 +45,7 @@
 
 import scala.collection.immutable.List;
 
-public class FlinkMergeHelper<T extends HoodieRecordPayload> extends AbstractMergeHelper<T, List<HoodieRecord<T>>,
+public class FlinkMergeHelper<T extends HoodieRecordPayload> extends BaseMergeHelper<T, List<HoodieRecord<T>>,
     List<HoodieKey>, List<WriteStatus>> {
 
   private FlinkMergeHelper() {
@@ -91,7 +91,7 @@ public void runMerge(HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List
 
       ThreadLocal<BinaryEncoder> encoderCache = new ThreadLocal<>();
       ThreadLocal<BinaryDecoder> decoderCache = new ThreadLocal<>();
-      wrapper = new BoundedInMemoryExecutor(table.getConfig().getWriteBufferLimitBytes(), new IteratorBasedQueueProducer<>(readerIterator),
+      wrapper = new BoundedInMemoryExecutor<>(table.getConfig().getWriteBufferLimitBytes(), new IteratorBasedQueueProducer<>(readerIterator),
           Option.of(new UpdateHandler(mergeHandle)), record -> {
         if (!externalSchemaTransformation) {
           return record;
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkWriteHelper.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkWriteHelper.java
index 3914e486f8a86..d28aafcc4abf8 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkWriteHelper.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/FlinkWriteHelper.java
@@ -21,6 +21,7 @@
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.common.data.HoodieList;
 import org.apache.hudi.common.engine.HoodieEngineContext;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieOperation;
 import org.apache.hudi.common.model.HoodieRecord;
@@ -48,7 +49,7 @@
  * <p>Computing the records batch locations all at a time is a pressure to the engine,
  * we should avoid that in streaming system.
  */
-public class FlinkWriteHelper<T extends HoodieRecordPayload, R> extends AbstractWriteHelper<T, List<HoodieRecord<T>>,
+public class FlinkWriteHelper<T extends HoodieRecordPayload, R> extends BaseWriteHelper<T, List<HoodieRecord<T>>,
     List<HoodieKey>, List<WriteStatus>, R> {
 
   private FlinkWriteHelper() {
@@ -89,7 +90,7 @@ protected List<HoodieRecord<T>> tag(List<HoodieRecord<T>> dedupedRecords, Hoodie
 
   @Override
   public List<HoodieRecord<T>> deduplicateRecords(
-      List<HoodieRecord<T>> records, HoodieIndex<T, ?, ?, ?> index, int parallelism) {
+      List<HoodieRecord<T>> records, HoodieIndex<?, ?> index, int parallelism) {
     Map<Object, List<Pair<Object, HoodieRecord<T>>>> keyedRecords = records.stream().map(record -> {
       // If index used is global, then records are expected to differ in their partitionPath
       final Object key = record.getKey().getRecordKey();
@@ -107,7 +108,7 @@ public List<HoodieRecord<T>> deduplicateRecords(
       boolean choosePrev = data1.equals(reducedData);
       HoodieKey reducedKey = choosePrev ? rec1.getKey() : rec2.getKey();
       HoodieOperation operation = choosePrev ? rec1.getOperation() : rec2.getOperation();
-      HoodieRecord<T> hoodieRecord = new HoodieRecord<>(reducedKey, reducedData, operation);
+      HoodieRecord<T> hoodieRecord = new HoodieAvroRecord<>(reducedKey, reducedData, operation);
       // reuse the location from the first record.
       hoodieRecord.setCurrentLocation(rec1.getCurrentLocation());
       return hoodieRecord;
diff --git a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/upgrade/FlinkUpgradeDowngradeHelper.java b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/upgrade/FlinkUpgradeDowngradeHelper.java
index d097d2e60057c..69acce5627543 100644
--- a/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/upgrade/FlinkUpgradeDowngradeHelper.java
+++ b/hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/upgrade/FlinkUpgradeDowngradeHelper.java
@@ -29,7 +29,7 @@
 /**
  * Flink upgrade and downgrade helper.
  */
-public class FlinkUpgradeDowngradeHelper implements BaseUpgradeDowngradeHelper {
+public class FlinkUpgradeDowngradeHelper implements SupportsUpgradeDowngrade {
 
   private static final FlinkUpgradeDowngradeHelper SINGLETON_INSTANCE =
       new FlinkUpgradeDowngradeHelper();
diff --git a/hudi-client/hudi-flink-client/src/test/java/org/apache/hudi/index/bloom/TestFlinkHoodieBloomIndex.java b/hudi-client/hudi-flink-client/src/test/java/org/apache/hudi/index/bloom/TestFlinkHoodieBloomIndex.java
index 7b4e3b675ea05..50adabbd585ea 100644
--- a/hudi-client/hudi-flink-client/src/test/java/org/apache/hudi/index/bloom/TestFlinkHoodieBloomIndex.java
+++ b/hudi-client/hudi-flink-client/src/test/java/org/apache/hudi/index/bloom/TestFlinkHoodieBloomIndex.java
@@ -23,6 +23,7 @@
 import org.apache.hudi.common.bloom.BloomFilterTypeCode;
 import org.apache.hudi.common.data.HoodieList;
 import org.apache.hudi.common.data.HoodieMapPair;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
@@ -31,7 +32,7 @@
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.config.HoodieIndexConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
-import org.apache.hudi.io.HoodieKeyLookupHandle;
+import org.apache.hudi.index.HoodieIndexUtils;
 import org.apache.hudi.table.HoodieFlinkTable;
 import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.testutils.HoodieFlinkClientTestHarness;
@@ -115,22 +116,22 @@ public void testLoadInvolvedFiles(boolean rangePruning, boolean treeFiltering, b
     RawTripTestPayload rowChange1 =
         new RawTripTestPayload("{\"_row_key\":\"000\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
     HoodieRecord record1 =
-        new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
+        new HoodieAvroRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
     RawTripTestPayload rowChange2 =
         new RawTripTestPayload("{\"_row_key\":\"001\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
     HoodieRecord record2 =
-        new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
+        new HoodieAvroRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
     RawTripTestPayload rowChange3 =
         new RawTripTestPayload("{\"_row_key\":\"002\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
     HoodieRecord record3 =
-        new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
+        new HoodieAvroRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
     RawTripTestPayload rowChange4 =
         new RawTripTestPayload("{\"_row_key\":\"003\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
     HoodieRecord record4 =
-        new HoodieRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
+        new HoodieAvroRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
 
     List<String> partitions = asList("2016/01/21", "2016/04/01", "2015/03/12");
-    List<Pair<String, BloomIndexFileInfo>> filesList = index.loadInvolvedFiles(partitions, context, hoodieTable);
+    List<Pair<String, BloomIndexFileInfo>> filesList = index.loadColumnRangesFromFiles(partitions, context, hoodieTable);
     // Still 0, as no valid commit
     assertEquals(0, filesList.size());
 
@@ -140,7 +141,7 @@ public void testLoadInvolvedFiles(boolean rangePruning, boolean treeFiltering, b
         .withInserts("2015/03/12", "4", record2, record3, record4);
     metaClient.reloadActiveTimeline();
 
-    filesList = index.loadInvolvedFiles(partitions, context, hoodieTable);
+    filesList = index.loadColumnRangesFromFiles(partitions, context, hoodieTable);
     assertEquals(4, filesList.size());
 
     if (rangePruning) {
@@ -212,16 +213,16 @@ public void testCheckUUIDsAgainstOneFile() throws Exception {
         + "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":32}";
     RawTripTestPayload rowChange1 = new RawTripTestPayload(recordStr1);
     HoodieRecord record1 =
-        new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
+        new HoodieAvroRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
     RawTripTestPayload rowChange2 = new RawTripTestPayload(recordStr2);
     HoodieRecord record2 =
-        new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
+        new HoodieAvroRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
     RawTripTestPayload rowChange3 = new RawTripTestPayload(recordStr3);
     HoodieRecord record3 =
-        new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
+        new HoodieAvroRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
     RawTripTestPayload rowChange4 = new RawTripTestPayload(recordStr4);
     HoodieRecord record4 =
-        new HoodieRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
+        new HoodieAvroRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
 
     // We write record1, record2 to a base file, but the bloom filter contains (record1,
     // record2, record3).
@@ -242,9 +243,8 @@ public void testCheckUUIDsAgainstOneFile() throws Exception {
 
     HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).build();
     HoodieFlinkTable table = HoodieFlinkTable.create(config, context, metaClient);
-    HoodieKeyLookupHandle keyHandle = new HoodieKeyLookupHandle<>(config, table, Pair.of(partition, fileId));
-    List<String> results = keyHandle.checkCandidatesAgainstFile(hadoopConf, uuids,
-        new Path(java.nio.file.Paths.get(basePath, partition, filename).toString()));
+    List<String> results = HoodieIndexUtils.filterKeysFromFile(
+        new Path(java.nio.file.Paths.get(basePath, partition, filename).toString()), uuids, hadoopConf);
     assertEquals(results.size(), 2);
     assertTrue(results.get(0).equals("1eb5b87a-1feh-4edd-87b4-6ec96dc405a0")
         || results.get(1).equals("1eb5b87a-1feh-4edd-87b4-6ec96dc405a0"));
@@ -287,16 +287,16 @@ public void testTagLocation(boolean rangePruning, boolean treeFiltering, boolean
     String recordStr4 = "{\"_row_key\":\"" + rowKey1 + "\",\"time\":\"2015-01-31T03:16:41.415Z\",\"number\":32}";
     RawTripTestPayload rowChange1 = new RawTripTestPayload(recordStr1);
     HoodieRecord record1 =
-        new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
+        new HoodieAvroRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
     RawTripTestPayload rowChange2 = new RawTripTestPayload(recordStr2);
     HoodieRecord record2 =
-        new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
+        new HoodieAvroRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
     RawTripTestPayload rowChange3 = new RawTripTestPayload(recordStr3);
     HoodieRecord record3 =
-        new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
+        new HoodieAvroRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
     RawTripTestPayload rowChange4 = new RawTripTestPayload(recordStr4);
     HoodieRecord record4 =
-        new HoodieRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
+        new HoodieAvroRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
     List<HoodieRecord> records = asList(record1, record2, record3, record4);
 
     // Also create the metadata and config
@@ -355,15 +355,15 @@ public void testCheckExists(boolean rangePruning, boolean treeFiltering, boolean
         + "\"time\":\"2015-01-31T03:16:41.415Z\",\"number\":32}";
     RawTripTestPayload rowChange1 = new RawTripTestPayload(recordStr1);
     HoodieKey key1 = new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath());
-    HoodieRecord record1 = new HoodieRecord(key1, rowChange1);
+    HoodieRecord record1 = new HoodieAvroRecord(key1, rowChange1);
     RawTripTestPayload rowChange2 = new RawTripTestPayload(recordStr2);
     HoodieKey key2 = new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath());
-    HoodieRecord record2 = new HoodieRecord(key2, rowChange2);
+    HoodieRecord record2 = new HoodieAvroRecord(key2, rowChange2);
     RawTripTestPayload rowChange3 = new RawTripTestPayload(recordStr3);
     HoodieKey key3 = new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath());
     RawTripTestPayload rowChange4 = new RawTripTestPayload(recordStr4);
     HoodieKey key4 = new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath());
-    HoodieRecord record4 = new HoodieRecord(key4, rowChange4);
+    HoodieRecord record4 = new HoodieAvroRecord(key4, rowChange4);
     List<HoodieKey> keys = asList(key1, key2, key3, key4);
 
     // Also create the metadata and config
@@ -374,7 +374,7 @@ public void testCheckExists(boolean rangePruning, boolean treeFiltering, boolean
     // Let's tag
     HoodieBloomIndex bloomIndex = new HoodieBloomIndex(config, ListBasedHoodieBloomIndexHelper.getInstance());
     List<HoodieRecord> toTagRecords = new ArrayList<>();
-    toTagRecords.add(new HoodieRecord(record4.getKey(), null));
+    toTagRecords.add(new HoodieAvroRecord(record4.getKey(), null));
     List<HoodieRecord> taggedRecords = tagLocation(bloomIndex, toTagRecords, hoodieTable);
     Map<HoodieKey, Option<Pair<String, String>>> recordLocations = new HashMap<>();
     for (HoodieRecord taggedRecord : taggedRecords) {
@@ -397,7 +397,7 @@ public void testCheckExists(boolean rangePruning, boolean treeFiltering, boolean
     hoodieTable = HoodieFlinkTable.create(config, context, metaClient);
     List<HoodieRecord> toTagRecords1 = new ArrayList<>();
     for (HoodieKey key : keys) {
-      taggedRecords.add(new HoodieRecord(key, null));
+      taggedRecords.add(new HoodieAvroRecord(key, null));
     }
 
     taggedRecords = tagLocation(bloomIndex, toTagRecords1, hoodieTable);
@@ -437,9 +437,9 @@ public void testBloomFilterFalseError(boolean rangePruning, boolean treeFilterin
 
     // We write record1 to a base file, using a bloom filter having both records
     RawTripTestPayload rowChange1 = new RawTripTestPayload(recordStr1);
-    HoodieRecord record1 = new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
+    HoodieRecord record1 = new HoodieAvroRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
     RawTripTestPayload rowChange2 = new RawTripTestPayload(recordStr2);
-    HoodieRecord record2 = new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
+    HoodieRecord record2 = new HoodieAvroRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
 
     BloomFilter filter = BloomFilterFactory.createBloomFilter(10000, 0.0000001, -1, BloomFilterTypeCode.SIMPLE.name());
     filter.add(record2.getRecordKey());
diff --git a/hudi-client/hudi-flink-client/src/test/java/org/apache/hudi/testutils/HoodieFlinkWriteableTestTable.java b/hudi-client/hudi-flink-client/src/test/java/org/apache/hudi/testutils/HoodieFlinkWriteableTestTable.java
index 50e8f776ac635..de95520854646 100644
--- a/hudi-client/hudi-flink-client/src/test/java/org/apache/hudi/testutils/HoodieFlinkWriteableTestTable.java
+++ b/hudi-client/hudi-flink-client/src/test/java/org/apache/hudi/testutils/HoodieFlinkWriteableTestTable.java
@@ -19,6 +19,7 @@
 
 package org.apache.hudi.testutils;
 
+import org.apache.avro.generic.IndexedRecord;
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.common.bloom.BloomFilter;
 import org.apache.hudi.common.bloom.BloomFilterFactory;
@@ -26,6 +27,7 @@
 import org.apache.hudi.common.model.HoodieLogFile;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordLocation;
+import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.log.HoodieLogFormat;
 import org.apache.hudi.common.table.log.block.HoodieAvroDataBlock;
@@ -39,6 +41,7 @@
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
@@ -130,14 +133,14 @@ private Pair<String, HoodieLogFile> appendRecordsToLogFile(List<HoodieRecord> gr
       header.put(HeaderMetadataType.SCHEMA, schema.toString());
       logWriter.appendBlock(new HoodieAvroDataBlock(groupedRecords.stream().map(r -> {
         try {
-          GenericRecord val = (GenericRecord) r.getData().getInsertValue(schema).get();
+          GenericRecord val = (GenericRecord) ((HoodieRecordPayload) r.getData()).getInsertValue(schema).get();
           HoodieAvroUtils.addHoodieKeyToRecord(val, r.getRecordKey(), r.getPartitionPath(), "");
-          return (org.apache.avro.generic.IndexedRecord) val;
-        } catch (java.io.IOException e) {
+          return (IndexedRecord) val;
+        } catch (IOException e) {
           LOG.warn("Failed to convert record " + r.toString(), e);
           return null;
         }
-      }).collect(Collectors.toList()), header));
+      }).collect(Collectors.toList()), header, HoodieRecord.RECORD_KEY_METADATA_FIELD));
       return Pair.of(partitionPath, logWriter.getLogFile());
     }
   }
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/HoodieJavaWriteClient.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/HoodieJavaWriteClient.java
index 212187b2d7552..f365f29329782 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/HoodieJavaWriteClient.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/HoodieJavaWriteClient.java
@@ -44,13 +44,12 @@
 import com.codahale.metrics.Timer;
 import org.apache.hadoop.conf.Configuration;
 
-import java.io.IOException;
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
 
 public class HoodieJavaWriteClient<T extends HoodieRecordPayload> extends
-    AbstractHoodieWriteClient<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> {
+    BaseHoodieWriteClient<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> {
 
   public HoodieJavaWriteClient(HoodieEngineContext context, HoodieWriteConfig clientConfig) {
     super(context, clientConfig);
@@ -210,21 +209,20 @@ protected List<WriteStatus> postWrite(HoodieWriteMetadata<List<WriteStatus>> res
 
   @Override
   public void commitCompaction(String compactionInstantTime,
-                               List<WriteStatus> writeStatuses,
-                               Option<Map<String, String>> extraMetadata) throws IOException {
+                               HoodieCommitMetadata metadata,
+                               Option<Map<String, String>> extraMetadata) {
     throw new HoodieNotSupportedException("CommitCompaction is not supported in HoodieJavaClient");
   }
 
   @Override
   protected void completeCompaction(HoodieCommitMetadata metadata,
-                                    List<WriteStatus> writeStatuses,
                                     HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> table,
                                     String compactionCommitTime) {
     throw new HoodieNotSupportedException("CompleteCompaction is not supported in HoodieJavaClient");
   }
 
   @Override
-  protected List<WriteStatus> compact(String compactionInstantTime,
+  protected HoodieWriteMetadata<List<WriteStatus>> compact(String compactionInstantTime,
                                       boolean shouldComplete) {
     throw new HoodieNotSupportedException("Compact is not supported in HoodieJavaClient");
   }
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/JavaExecutionStrategy.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/JavaExecutionStrategy.java
index a7dc4a3c0fa73..7d7609f0fa0a9 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/JavaExecutionStrategy.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/JavaExecutionStrategy.java
@@ -24,8 +24,11 @@
 import org.apache.hudi.avro.model.HoodieClusteringPlan;
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.client.common.JavaTaskContextSupplier;
+import org.apache.hudi.common.data.HoodieData;
+import org.apache.hudi.common.data.HoodieList;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.model.ClusteringOperation;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieFileGroupId;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
@@ -70,7 +73,7 @@
  * Clustering strategy for Java engine.
  */
 public abstract class JavaExecutionStrategy<T extends HoodieRecordPayload<T>>
-    extends ClusteringExecutionStrategy<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> {
+    extends ClusteringExecutionStrategy<T, HoodieData<HoodieRecord<T>>, HoodieData<HoodieKey>, HoodieData<WriteStatus>> {
 
   private static final Logger LOG = LogManager.getLogger(JavaExecutionStrategy.class);
 
@@ -80,7 +83,7 @@ public JavaExecutionStrategy(
   }
 
   @Override
-  public HoodieWriteMetadata<List<WriteStatus>> performClustering(
+  public HoodieWriteMetadata<HoodieData<WriteStatus>> performClustering(
       HoodieClusteringPlan clusteringPlan, Schema schema, String instantTime) {
     // execute clustering for each group and collect WriteStatus
     List<WriteStatus> writeStatusList = new ArrayList<>();
@@ -89,8 +92,8 @@ public HoodieWriteMetadata<List<WriteStatus>> performClustering(
             inputGroup, clusteringPlan.getStrategy().getStrategyParams(),
             Option.ofNullable(clusteringPlan.getPreserveHoodieMetadata()).orElse(false),
             instantTime)));
-    HoodieWriteMetadata<List<WriteStatus>> writeMetadata = new HoodieWriteMetadata<>();
-    writeMetadata.setWriteStatuses(writeStatusList);
+    HoodieWriteMetadata<HoodieData<WriteStatus>> writeMetadata = new HoodieWriteMetadata<>();
+    writeMetadata.setWriteStatuses(HoodieList.of(writeStatusList));
     return writeMetadata;
   }
 
@@ -120,7 +123,7 @@ public abstract List<WriteStatus> performClusteringWithRecordList(
    * @param schema         Schema of the data including metadata fields.
    * @return empty for now.
    */
-  protected Option<BulkInsertPartitioner<T>> getPartitioner(Map<String, String> strategyParams, Schema schema) {
+  protected Option<BulkInsertPartitioner<List<HoodieRecord<T>>>> getPartitioner(Map<String, String> strategyParams, Schema schema) {
     if (strategyParams.containsKey(PLAN_STRATEGY_SORT_COLUMNS.key())) {
       return Option.of(new JavaCustomColumnsSortPartitioner(
           strategyParams.get(PLAN_STRATEGY_SORT_COLUMNS.key()).split(","),
@@ -237,7 +240,7 @@ private HoodieRecord<T> transform(IndexedRecord indexedRecord) {
     HoodieKey hoodieKey = new HoodieKey(key, partition);
 
     HoodieRecordPayload avroPayload = new RewriteAvroPayload(record);
-    HoodieRecord hoodieRecord = new HoodieRecord(hoodieKey, avroPayload);
+    HoodieRecord hoodieRecord = new HoodieAvroRecord(hoodieKey, avroPayload);
     return hoodieRecord;
   }
 }
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/index/JavaHoodieIndex.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/index/JavaHoodieIndex.java
index 7f8b83f5c7d5d..dd64859cad7e5 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/index/JavaHoodieIndex.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/index/JavaHoodieIndex.java
@@ -25,7 +25,6 @@
 import org.apache.hudi.common.data.HoodieData;
 import org.apache.hudi.common.data.HoodieList;
 import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.config.HoodieWriteConfig;
@@ -33,8 +32,9 @@
 import org.apache.hudi.table.HoodieTable;
 
 import java.util.List;
+import java.util.stream.Collectors;
 
-public abstract class JavaHoodieIndex<T extends HoodieRecordPayload> extends HoodieIndex<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> {
+public abstract class JavaHoodieIndex<T extends HoodieRecordPayload> extends HoodieIndex<List<HoodieRecord<T>>, List<WriteStatus>> {
   protected JavaHoodieIndex(HoodieWriteConfig config) {
     super(config);
   }
@@ -44,21 +44,22 @@ protected JavaHoodieIndex(HoodieWriteConfig config) {
   @PublicAPIMethod(maturity = ApiMaturityLevel.DEPRECATED)
   public abstract List<WriteStatus> updateLocation(List<WriteStatus> writeStatuses,
                                                    HoodieEngineContext context,
-                                                   HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> hoodieTable) throws HoodieIndexException;
+                                                   HoodieTable hoodieTable) throws HoodieIndexException;
 
   @Override
   @Deprecated
   @PublicAPIMethod(maturity = ApiMaturityLevel.DEPRECATED)
   public abstract List<HoodieRecord<T>> tagLocation(List<HoodieRecord<T>> records,
                                                     HoodieEngineContext context,
-                                                    HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> hoodieTable) throws HoodieIndexException;
+                                                    HoodieTable hoodieTable) throws HoodieIndexException;
 
   @Override
   @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
-  public HoodieData<HoodieRecord<T>> tagLocation(
-      HoodieData<HoodieRecord<T>> records, HoodieEngineContext context,
+  public <R> HoodieData<HoodieRecord<R>> tagLocation(
+      HoodieData<HoodieRecord<R>> records, HoodieEngineContext context,
       HoodieTable hoodieTable) throws HoodieIndexException {
-    return HoodieList.of(tagLocation(HoodieList.getList(records), context, hoodieTable));
+    List<HoodieRecord<T>> hoodieRecords = tagLocation(HoodieList.getList(records.map(record -> (HoodieRecord<T>) record)), context, hoodieTable);
+    return HoodieList.of(hoodieRecords.stream().map(r -> (HoodieRecord<R>) r).collect(Collectors.toList()));
   }
 
   @Override
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/index/JavaHoodieIndexFactory.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/index/JavaHoodieIndexFactory.java
index f6135fb132afa..9f4adad8ecf8a 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/index/JavaHoodieIndexFactory.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/index/JavaHoodieIndexFactory.java
@@ -44,9 +44,9 @@ public static HoodieIndex createIndex(HoodieWriteConfig config) {
     // TODO more indexes to be added
     switch (config.getIndexType()) {
       case INMEMORY:
-        return new HoodieInMemoryHashIndex<>(config);
+        return new HoodieInMemoryHashIndex(config);
       case BLOOM:
-        return new HoodieBloomIndex<>(config, ListBasedHoodieBloomIndexHelper.getInstance());
+        return new HoodieBloomIndex(config, ListBasedHoodieBloomIndexHelper.getInstance());
       default:
         throw new HoodieIndexException("Unsupported index type " + config.getIndexType());
     }
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/HoodieJavaCopyOnWriteTable.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/HoodieJavaCopyOnWriteTable.java
index 62a6980d509ab..447ed3e96cd9e 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/HoodieJavaCopyOnWriteTable.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/HoodieJavaCopyOnWriteTable.java
@@ -23,6 +23,7 @@
 import org.apache.hudi.avro.model.HoodieClusteringPlan;
 import org.apache.hudi.avro.model.HoodieCompactionPlan;
 import org.apache.hudi.avro.model.HoodieRestoreMetadata;
+import org.apache.hudi.avro.model.HoodieRestorePlan;
 import org.apache.hudi.avro.model.HoodieRollbackMetadata;
 import org.apache.hudi.avro.model.HoodieRollbackPlan;
 import org.apache.hudi.avro.model.HoodieSavepointMetadata;
@@ -48,7 +49,7 @@
 import org.apache.hudi.table.action.bootstrap.HoodieBootstrapWriteMetadata;
 import org.apache.hudi.table.action.clean.CleanActionExecutor;
 import org.apache.hudi.table.action.clean.CleanPlanActionExecutor;
-import org.apache.hudi.table.action.cluster.JavaClusteringPlanActionExecutor;
+import org.apache.hudi.table.action.cluster.ClusteringPlanActionExecutor;
 import org.apache.hudi.table.action.cluster.JavaExecuteClusteringCommitActionExecutor;
 import org.apache.hudi.table.action.commit.JavaBulkInsertCommitActionExecutor;
 import org.apache.hudi.table.action.commit.JavaBulkInsertPreppedCommitActionExecutor;
@@ -63,15 +64,17 @@
 import org.apache.hudi.table.action.restore.CopyOnWriteRestoreActionExecutor;
 import org.apache.hudi.table.action.rollback.BaseRollbackPlanActionExecutor;
 import org.apache.hudi.table.action.rollback.CopyOnWriteRollbackActionExecutor;
+import org.apache.hudi.table.action.rollback.RestorePlanActionExecutor;
 import org.apache.hudi.table.action.savepoint.SavepointActionExecutor;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import javax.annotation.Nonnull;
+
 import java.io.IOException;
 import java.util.Collections;
 import java.util.Iterator;
-import javax.annotation.Nonnull;
 import java.util.List;
 import java.util.Map;
 
@@ -190,7 +193,7 @@ public HoodieWriteMetadata<List<WriteStatus>> compact(HoodieEngineContext contex
 
   @Override
   public Option<HoodieClusteringPlan> scheduleClustering(final HoodieEngineContext context, final String instantTime, final Option<Map<String, String>> extraMetadata) {
-    return new JavaClusteringPlanActionExecutor<>(context, config, this, instantTime, extraMetadata).execute();
+    return new ClusteringPlanActionExecutor<>(context, config, this, instantTime, extraMetadata).execute();
   }
 
   @Override
@@ -247,6 +250,11 @@ public HoodieSavepointMetadata savepoint(HoodieEngineContext context,
         context, config, this, instantToSavepoint, user, comment).execute();
   }
 
+  @Override
+  public Option<HoodieRestorePlan> scheduleRestore(HoodieEngineContext context, String restoreInstantTime, String instantToRestore) {
+    return new RestorePlanActionExecutor(context, config, this, restoreInstantTime, instantToRestore).execute();
+  }
+
   @Override
   public HoodieRestoreMetadata restore(HoodieEngineContext context,
                                        String restoreInstantTime,
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/cluster/JavaExecuteClusteringCommitActionExecutor.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/cluster/JavaExecuteClusteringCommitActionExecutor.java
index 83364bdc3ad35..168d558143bd3 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/cluster/JavaExecuteClusteringCommitActionExecutor.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/cluster/JavaExecuteClusteringCommitActionExecutor.java
@@ -19,46 +19,32 @@
 
 package org.apache.hudi.table.action.cluster;
 
-import org.apache.hudi.avro.HoodieAvroUtils;
-import org.apache.hudi.avro.model.HoodieClusteringGroup;
 import org.apache.hudi.avro.model.HoodieClusteringPlan;
 import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.common.data.HoodieData;
 import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieCommitMetadata;
-import org.apache.hudi.common.model.HoodieFileGroupId;
-import org.apache.hudi.common.model.HoodieKey;
-import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.model.WriteOperationType;
-import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.util.ClusteringUtils;
-import org.apache.hudi.common.util.CommitUtils;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.common.util.ReflectionUtils;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieClusteringException;
 import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.table.action.HoodieWriteMetadata;
-import org.apache.hudi.table.action.cluster.strategy.ClusteringExecutionStrategy;
 import org.apache.hudi.table.action.commit.BaseJavaCommitActionExecutor;
 
-import org.apache.avro.Schema;
-
 import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.stream.Collectors;
 
 public class JavaExecuteClusteringCommitActionExecutor<T extends HoodieRecordPayload<T>>
     extends BaseJavaCommitActionExecutor<T> {
 
   private final HoodieClusteringPlan clusteringPlan;
 
-  public JavaExecuteClusteringCommitActionExecutor(
-      HoodieEngineContext context, HoodieWriteConfig config, HoodieTable table,
-      String instantTime) {
+  public JavaExecuteClusteringCommitActionExecutor(HoodieEngineContext context,
+                                                   HoodieWriteConfig config,
+                                                   HoodieTable table,
+                                                   String instantTime) {
     super(context, config, table, instantTime, WriteOperationType.CLUSTER);
     this.clusteringPlan = ClusteringUtils.getClusteringPlan(
         table.getMetaClient(), HoodieTimeline.getReplaceCommitRequestedInstant(instantTime))
@@ -68,56 +54,13 @@ public JavaExecuteClusteringCommitActionExecutor(
 
   @Override
   public HoodieWriteMetadata<List<WriteStatus>> execute() {
-    HoodieInstant instant = HoodieTimeline.getReplaceCommitRequestedInstant(instantTime);
-    // Mark instant as clustering inflight
-    table.getActiveTimeline().transitionReplaceRequestedToInflight(instant, Option.empty());
-    table.getMetaClient().reloadActiveTimeline();
-
-    final Schema schema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(config.getSchema()));
-    HoodieWriteMetadata<List<WriteStatus>> writeMetadata = (
-        (ClusteringExecutionStrategy<T, List<HoodieRecord<? extends HoodieRecordPayload>>, List<HoodieKey>, List<WriteStatus>>)
-            ReflectionUtils.loadClass(config.getClusteringExecutionStrategyClass(),
-                new Class<?>[] {HoodieTable.class, HoodieEngineContext.class, HoodieWriteConfig.class}, table, context, config))
-        .performClustering(clusteringPlan, schema, instantTime);
-    List<WriteStatus> writeStatusList = writeMetadata.getWriteStatuses();
-    List<WriteStatus> statuses = updateIndex(writeStatusList, writeMetadata);
-    writeMetadata.setWriteStats(statuses.stream().map(WriteStatus::getStat).collect(Collectors.toList()));
-    writeMetadata.setPartitionToReplaceFileIds(getPartitionToReplacedFileIds(writeMetadata));
-    validateWriteResult(writeMetadata);
-    commitOnAutoCommit(writeMetadata);
-    if (!writeMetadata.getCommitMetadata().isPresent()) {
-      HoodieCommitMetadata commitMetadata = CommitUtils.buildMetadata(writeMetadata.getWriteStats().get(), writeMetadata.getPartitionToReplaceFileIds(),
-          extraMetadata, operationType, getSchemaToStoreInCommit(), getCommitActionType());
-      writeMetadata.setCommitMetadata(Option.of(commitMetadata));
-    }
-    return writeMetadata;
-  }
-
-  /**
-   * Validate actions taken by clustering. In the first implementation, we validate at least one new file is written.
-   * But we can extend this to add more validation. E.g. number of records read = number of records written etc.
-   * We can also make these validations in BaseCommitActionExecutor to reuse pre-commit hooks for multiple actions.
-   */
-  private void validateWriteResult(HoodieWriteMetadata<List<WriteStatus>> writeMetadata) {
-    if (writeMetadata.getWriteStatuses().isEmpty()) {
-      throw new HoodieClusteringException("Clustering plan produced 0 WriteStatus for " + instantTime
-          + " #groups: " + clusteringPlan.getInputGroups().size() + " expected at least "
-          + clusteringPlan.getInputGroups().stream().mapToInt(HoodieClusteringGroup::getNumOutputFileGroups).sum()
-          + " write statuses");
-    }
+    HoodieWriteMetadata<HoodieData<WriteStatus>> writeMetadata = executeClustering(clusteringPlan);
+    List<WriteStatus> transformedWriteStatuses = writeMetadata.getWriteStatuses().collectAsList();
+    return writeMetadata.clone(transformedWriteStatuses);
   }
 
   @Override
   protected String getCommitActionType() {
     return HoodieTimeline.REPLACE_COMMIT_ACTION;
   }
-
-  @Override
-  protected Map<String, List<String>> getPartitionToReplacedFileIds(HoodieWriteMetadata<List<WriteStatus>> writeMetadata) {
-    Set<HoodieFileGroupId> newFilesWritten = writeMetadata.getWriteStats().get().stream()
-        .map(s -> new HoodieFileGroupId(s.getPartitionPath(), s.getFileId())).collect(Collectors.toSet());
-    return ClusteringUtils.getFileGroupsFromClusteringPlan(clusteringPlan)
-        .filter(fg -> !newFilesWritten.contains(fg))
-        .collect(Collectors.groupingBy(fg -> fg.getPartitionPath(), Collectors.mapping(fg -> fg.getFileId(), Collectors.toList())));
-  }
 }
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/BaseJavaCommitActionExecutor.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/BaseJavaCommitActionExecutor.java
index 2a93c5012ce1e..dc6994d315f02 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/BaseJavaCommitActionExecutor.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/BaseJavaCommitActionExecutor.java
@@ -42,6 +42,7 @@
 import org.apache.hudi.io.CreateHandleFactory;
 import org.apache.hudi.io.HoodieMergeHandle;
 import org.apache.hudi.io.HoodieSortedMergeHandle;
+import org.apache.hudi.io.HoodieConcatHandle;
 import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.table.WorkloadProfile;
 import org.apache.hudi.table.WorkloadStat;
@@ -90,27 +91,27 @@ public BaseJavaCommitActionExecutor(HoodieEngineContext context,
   public HoodieWriteMetadata<List<WriteStatus>> execute(List<HoodieRecord<T>> inputRecords) {
     HoodieWriteMetadata<List<WriteStatus>> result = new HoodieWriteMetadata<>();
 
-    WorkloadProfile profile = null;
+    WorkloadProfile workloadProfile = null;
     if (isWorkloadProfileNeeded()) {
-      profile = new WorkloadProfile(buildProfile(inputRecords));
-      LOG.info("Workload profile :" + profile);
+      workloadProfile = new WorkloadProfile(buildProfile(inputRecords), table.getIndex().canIndexLogFiles());
+      LOG.info("Input workload profile :" + workloadProfile);
+    }
+
+    final Partitioner partitioner = getPartitioner(workloadProfile);
+    try {
+      saveWorkloadProfileMetadataToInflight(workloadProfile, instantTime);
+    } catch (Exception e) {
+      HoodieTableMetaClient metaClient = table.getMetaClient();
+      HoodieInstant inflightInstant = new HoodieInstant(HoodieInstant.State.INFLIGHT, metaClient.getCommitActionType(), instantTime);
       try {
-        saveWorkloadProfileMetadataToInflight(profile, instantTime);
-      } catch (Exception e) {
-        HoodieTableMetaClient metaClient = table.getMetaClient();
-        HoodieInstant inflightInstant = new HoodieInstant(HoodieInstant.State.INFLIGHT, metaClient.getCommitActionType(), instantTime);
-        try {
-          if (!metaClient.getFs().exists(new Path(metaClient.getMetaPath(), inflightInstant.getFileName()))) {
-            throw new HoodieCommitException("Failed to commit " + instantTime + " unable to save inflight metadata ", e);
-          }
-        } catch (IOException ex) {
-          LOG.error("Check file exists failed");
-          throw new HoodieCommitException("Failed to commit " + instantTime + " unable to save inflight metadata ", ex);
+        if (!metaClient.getFs().exists(new Path(metaClient.getMetaPath(), inflightInstant.getFileName()))) {
+          throw new HoodieCommitException("Failed to commit " + instantTime + " unable to save inflight metadata ", e);
         }
+      } catch (IOException ex) {
+        LOG.error("Check file exists failed");
+        throw new HoodieCommitException("Failed to commit " + instantTime + " unable to save inflight metadata ", ex);
       }
     }
-
-    final Partitioner partitioner = getPartitioner(profile);
     Map<Integer, List<HoodieRecord<T>>> partitionedRecords = partition(inputRecords, partitioner);
 
     List<WriteStatus> writeStatuses = new LinkedList<>();
@@ -196,6 +197,11 @@ protected void commit(Option<Map<String, String>> extraMetadata, HoodieWriteMeta
     commit(extraMetadata, result, result.getWriteStatuses().stream().map(WriteStatus::getStat).collect(Collectors.toList()));
   }
 
+  protected void setCommitMetadata(HoodieWriteMetadata<List<WriteStatus>> result) {
+    result.setCommitMetadata(Option.of(CommitUtils.buildMetadata(result.getWriteStatuses().stream().map(WriteStatus::getStat).collect(Collectors.toList()),
+        result.getPartitionToReplaceFileIds(), extraMetadata, operationType, getSchemaToStoreInCommit(), getCommitActionType())));
+  }
+
   protected void commit(Option<Map<String, String>> extraMetadata, HoodieWriteMetadata<List<WriteStatus>> result, List<HoodieWriteStat> writeStats) {
     String actionType = getCommitActionType();
     LOG.info("Committing " + instantTime + ", action Type " + actionType);
@@ -206,8 +212,7 @@ protected void commit(Option<Map<String, String>> extraMetadata, HoodieWriteMeta
     try {
       LOG.info("Committing " + instantTime + ", action Type " + getCommitActionType());
       HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
-      HoodieCommitMetadata metadata = CommitUtils.buildMetadata(writeStats, result.getPartitionToReplaceFileIds(),
-          extraMetadata, operationType, getSchemaToStoreInCommit(), getCommitActionType());
+      HoodieCommitMetadata metadata = result.getCommitMetadata().get();
 
       writeTableMetadata(metadata, actionType);
 
@@ -289,6 +294,8 @@ protected Iterator<List<WriteStatus>> handleUpdateInternal(HoodieMergeHandle<?,?
   protected HoodieMergeHandle getUpdateHandle(String partitionPath, String fileId, Iterator<HoodieRecord<T>> recordItr) {
     if (table.requireSortedRecords()) {
       return new HoodieSortedMergeHandle<>(config, instantTime, table, recordItr, partitionPath, fileId, taskContextSupplier, Option.empty());
+    } else if (!WriteOperationType.isChangingRecords(operationType) && config.allowDuplicateInserts()) {
+      return new HoodieConcatHandle<>(config, instantTime, table, recordItr, partitionPath, fileId, taskContextSupplier, Option.empty());
     } else {
       return new HoodieMergeHandle<>(config, instantTime, table, recordItr, partitionPath, fileId, taskContextSupplier, Option.empty());
     }
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaBulkInsertHelper.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaBulkInsertHelper.java
index cdfa303cd738e..de7afdf00ebeb 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaBulkInsertHelper.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaBulkInsertHelper.java
@@ -39,12 +39,12 @@
 import java.util.List;
 
 /**
- * A java implementation of {@link AbstractBulkInsertHelper}.
+ * A java implementation of {@link BaseBulkInsertHelper}.
  *
  * @param <T>
  */
 @SuppressWarnings("checkstyle:LineLength")
-public class JavaBulkInsertHelper<T extends HoodieRecordPayload, R> extends AbstractBulkInsertHelper<T, List<HoodieRecord<T>>,
+public class JavaBulkInsertHelper<T extends HoodieRecordPayload, R> extends BaseBulkInsertHelper<T, List<HoodieRecord<T>>,
     List<HoodieKey>, List<WriteStatus>, R> {
 
   private JavaBulkInsertHelper() {
@@ -65,7 +65,7 @@ public HoodieWriteMetadata<List<WriteStatus>> bulkInsert(final List<HoodieRecord
                                                            final HoodieWriteConfig config,
                                                            final BaseCommitActionExecutor<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>, R> executor,
                                                            final boolean performDedupe,
-                                                           final Option<BulkInsertPartitioner<T>> userDefinedBulkInsertPartitioner) {
+                                                           final Option<BulkInsertPartitioner<List<HoodieRecord<T>>>> userDefinedBulkInsertPartitioner) {
     HoodieWriteMetadata result = new HoodieWriteMetadata();
 
     // It's possible the transition to inflight could have already happened.
@@ -89,7 +89,7 @@ public List<WriteStatus> bulkInsert(List<HoodieRecord<T>> inputRecords,
                                       HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List<WriteStatus>> table,
                                       HoodieWriteConfig config,
                                       boolean performDedupe,
-                                      Option<BulkInsertPartitioner<T>> userDefinedBulkInsertPartitioner,
+                                      Option<BulkInsertPartitioner<List<HoodieRecord<T>>>> userDefinedBulkInsertPartitioner,
                                       boolean useWriterSchema,
                                       int parallelism,
                                       WriteHandleFactory writeHandleFactory) {
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaBulkInsertPreppedCommitActionExecutor.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaBulkInsertPreppedCommitActionExecutor.java
index 37b56b6325bc3..ed72fbe7850e2 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaBulkInsertPreppedCommitActionExecutor.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaBulkInsertPreppedCommitActionExecutor.java
@@ -26,9 +26,8 @@
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieInsertException;
-import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.table.BulkInsertPartitioner;
-
+import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.table.action.HoodieWriteMetadata;
 
 import java.util.List;
@@ -37,12 +36,12 @@ public class JavaBulkInsertPreppedCommitActionExecutor<T extends HoodieRecordPay
     extends BaseJavaCommitActionExecutor<T> {
 
   private final List<HoodieRecord<T>> preppedInputRecord;
-  private final Option<BulkInsertPartitioner<T>> userDefinedBulkInsertPartitioner;
+  private final Option<BulkInsertPartitioner<List<HoodieRecord<T>>>> userDefinedBulkInsertPartitioner;
 
   public JavaBulkInsertPreppedCommitActionExecutor(HoodieJavaEngineContext context,
                                                    HoodieWriteConfig config, HoodieTable table,
                                                    String instantTime, List<HoodieRecord<T>> preppedInputRecord,
-                                                   Option<BulkInsertPartitioner<T>> userDefinedBulkInsertPartitioner) {
+                                                   Option<BulkInsertPartitioner<List<HoodieRecord<T>>>> userDefinedBulkInsertPartitioner) {
     super(context, config, table, instantTime, WriteOperationType.BULK_INSERT);
     this.preppedInputRecord = preppedInputRecord;
     this.userDefinedBulkInsertPartitioner = userDefinedBulkInsertPartitioner;
@@ -60,4 +59,4 @@ public HoodieWriteMetadata<List<WriteStatus>> execute() {
       throw new HoodieInsertException("Failed to bulk insert for commit time " + instantTime, e);
     }
   }
-}
\ No newline at end of file
+}
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaDeleteHelper.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaDeleteHelper.java
index fc81b787f4737..f82c1c561b2c5 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaDeleteHelper.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaDeleteHelper.java
@@ -22,6 +22,7 @@
 import org.apache.hudi.common.data.HoodieList;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.model.EmptyHoodieRecordPayload;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.util.collection.Pair;
@@ -43,7 +44,7 @@
 
 @SuppressWarnings("checkstyle:LineLength")
 public class JavaDeleteHelper<R> extends
-    AbstractDeleteHelper<EmptyHoodieRecordPayload, List<HoodieRecord<EmptyHoodieRecordPayload>>, List<HoodieKey>, List<WriteStatus>, R> {
+    BaseDeleteHelper<EmptyHoodieRecordPayload, List<HoodieRecord<EmptyHoodieRecordPayload>>, List<HoodieKey>, List<WriteStatus>, R> {
 
   private JavaDeleteHelper() {
   }
@@ -95,7 +96,7 @@ public HoodieWriteMetadata<List<WriteStatus>> execute(String instantTime,
       }
 
       List<HoodieRecord<EmptyHoodieRecordPayload>> dedupedRecords =
-          dedupedKeys.stream().map(key -> new HoodieRecord<>(key, new EmptyHoodieRecordPayload())).collect(Collectors.toList());
+          dedupedKeys.stream().map(key -> new HoodieAvroRecord<>(key, new EmptyHoodieRecordPayload())).collect(Collectors.toList());
       Instant beginTag = Instant.now();
       // perform index look up to get existing location of records
       List<HoodieRecord<EmptyHoodieRecordPayload>> taggedRecords = HoodieList.getList(
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaMergeHelper.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaMergeHelper.java
index a55121472310d..7878d857761ea 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaMergeHelper.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaMergeHelper.java
@@ -44,7 +44,7 @@
 import java.util.Iterator;
 import java.util.List;
 
-public class JavaMergeHelper<T extends HoodieRecordPayload> extends AbstractMergeHelper<T, List<HoodieRecord<T>>,
+public class JavaMergeHelper<T extends HoodieRecordPayload> extends BaseMergeHelper<T, List<HoodieRecord<T>>,
     List<HoodieKey>, List<WriteStatus>> {
 
   private JavaMergeHelper() {
@@ -91,7 +91,7 @@ public void runMerge(HoodieTable<T, List<HoodieRecord<T>>, List<HoodieKey>, List
 
       ThreadLocal<BinaryEncoder> encoderCache = new ThreadLocal<>();
       ThreadLocal<BinaryDecoder> decoderCache = new ThreadLocal<>();
-      wrapper = new BoundedInMemoryExecutor(table.getConfig().getWriteBufferLimitBytes(), new IteratorBasedQueueProducer<>(readerIterator),
+      wrapper = new BoundedInMemoryExecutor<>(table.getConfig().getWriteBufferLimitBytes(), new IteratorBasedQueueProducer<>(readerIterator),
           Option.of(new UpdateHandler(mergeHandle)), record -> {
         if (!externalSchemaTransformation) {
           return record;
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaUpsertPartitioner.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaUpsertPartitioner.java
index 33f59f4406f39..deaf934cf5d03 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaUpsertPartitioner.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaUpsertPartitioner.java
@@ -25,6 +25,7 @@
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecordLocation;
 import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.common.model.HoodieWriteStat;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.util.NumericUtils;
@@ -64,9 +65,9 @@ public class JavaUpsertPartitioner<T extends HoodieRecordPayload<T>> implements
    */
   private int totalBuckets = 0;
   /**
-   * Stat for the current workload. Helps in determining inserts, upserts etc.
+   * Stat for the input and output workload. Describe the workload before and after being assigned buckets.
    */
-  private WorkloadProfile profile;
+  private WorkloadProfile workloadProfile;
   /**
    * Helps decide which bucket an incoming update should go to.
    */
@@ -84,16 +85,16 @@ public class JavaUpsertPartitioner<T extends HoodieRecordPayload<T>> implements
 
   protected final HoodieWriteConfig config;
 
-  public JavaUpsertPartitioner(WorkloadProfile profile, HoodieEngineContext context, HoodieTable table,
+  public JavaUpsertPartitioner(WorkloadProfile workloadProfile, HoodieEngineContext context, HoodieTable table,
                                HoodieWriteConfig config) {
     updateLocationToBucket = new HashMap<>();
     partitionPathToInsertBucketInfos = new HashMap<>();
     bucketInfoMap = new HashMap<>();
-    this.profile = profile;
+    this.workloadProfile = workloadProfile;
     this.table = table;
     this.config = config;
-    assignUpdates(profile);
-    assignInserts(profile, context);
+    assignUpdates(workloadProfile);
+    assignInserts(workloadProfile, context);
 
     LOG.info("Total Buckets :" + totalBuckets + ", buckets info => " + bucketInfoMap + ", \n"
         + "Partition to insert buckets => " + partitionPathToInsertBucketInfos + ", \n"
@@ -102,11 +103,19 @@ public JavaUpsertPartitioner(WorkloadProfile profile, HoodieEngineContext contex
 
   private void assignUpdates(WorkloadProfile profile) {
     // each update location gets a partition
-    Set<Map.Entry<String, WorkloadStat>> partitionStatEntries = profile.getPartitionPathStatMap().entrySet();
+    Set<Map.Entry<String, WorkloadStat>> partitionStatEntries = profile.getInputPartitionPathStatMap().entrySet();
     for (Map.Entry<String, WorkloadStat> partitionStat : partitionStatEntries) {
+      WorkloadStat outputWorkloadStats = profile.getOutputPartitionPathStatMap().getOrDefault(partitionStat.getKey(), new WorkloadStat());
       for (Map.Entry<String, Pair<String, Long>> updateLocEntry :
           partitionStat.getValue().getUpdateLocationToCount().entrySet()) {
         addUpdateBucket(partitionStat.getKey(), updateLocEntry.getKey());
+        if (profile.hasOutputWorkLoadStats()) {
+          HoodieRecordLocation hoodieRecordLocation = new HoodieRecordLocation(updateLocEntry.getValue().getKey(), updateLocEntry.getKey());
+          outputWorkloadStats.addUpdates(hoodieRecordLocation, updateLocEntry.getValue().getValue());
+        }
+      }
+      if (profile.hasOutputWorkLoadStats()) {
+        profile.updateOutputPartitionPathStatMap(partitionStat.getKey(), outputWorkloadStats);
       }
     }
   }
@@ -133,9 +142,10 @@ private void assignInserts(WorkloadProfile profile, HoodieEngineContext context)
 
     for (String partitionPath : partitionPaths) {
       WorkloadStat pStat = profile.getWorkloadStat(partitionPath);
+      WorkloadStat outputWorkloadStats = profile.getOutputPartitionPathStatMap().getOrDefault(partitionPath, new WorkloadStat());
       if (pStat.getNumInserts() > 0) {
 
-        List<SmallFile> smallFiles = partitionSmallFilesMap.get(partitionPath);
+        List<SmallFile> smallFiles = partitionSmallFilesMap.getOrDefault(partitionPath, new ArrayList<>());
         this.smallFiles.addAll(smallFiles);
 
         LOG.info("For partitionPath : " + partitionPath + " Small Files => " + smallFiles);
@@ -158,6 +168,9 @@ private void assignInserts(WorkloadProfile profile, HoodieEngineContext context)
               bucket = addUpdateBucket(partitionPath, smallFile.location.getFileId());
               LOG.info("Assigning " + recordsToAppend + " inserts to new update bucket " + bucket);
             }
+            if (profile.hasOutputWorkLoadStats()) {
+              outputWorkloadStats.addInserts(smallFile.location, recordsToAppend);
+            }
             bucketNumbers.add(bucket);
             recordsPerBucket.add(recordsToAppend);
             totalUnassignedInserts -= recordsToAppend;
@@ -183,6 +196,9 @@ private void assignInserts(WorkloadProfile profile, HoodieEngineContext context)
             }
             BucketInfo bucketInfo = new BucketInfo(BucketType.INSERT, FSUtils.createNewFileIdPfx(), partitionPath);
             bucketInfoMap.put(totalBuckets, bucketInfo);
+            if (profile.hasOutputWorkLoadStats()) {
+              outputWorkloadStats.addInserts(new HoodieRecordLocation(HoodieWriteStat.NULL_COMMIT, bucketInfo.getFileIdPrefix()), recordsPerBucket.get(recordsPerBucket.size() - 1));
+            }
             totalBuckets++;
           }
         }
@@ -200,11 +216,19 @@ private void assignInserts(WorkloadProfile profile, HoodieEngineContext context)
         LOG.info("Total insert buckets for partition path " + partitionPath + " => " + insertBuckets);
         partitionPathToInsertBucketInfos.put(partitionPath, insertBuckets);
       }
+      if (profile.hasOutputWorkLoadStats()) {
+        profile.updateOutputPartitionPathStatMap(partitionPath, outputWorkloadStats);
+      }
     }
   }
 
   private Map<String, List<SmallFile>> getSmallFilesForPartitions(List<String> partitionPaths, HoodieEngineContext context) {
     Map<String, List<SmallFile>> partitionSmallFilesMap = new HashMap<>();
+
+    if (config.getParquetSmallFileLimit() <= 0) {
+      return partitionSmallFilesMap;
+    }
+
     if (partitionPaths != null && partitionPaths.size() > 0) {
       context.setJobStatus(this.getClass().getSimpleName(), "Getting small files from partitions");
       partitionSmallFilesMap = context.mapToPair(partitionPaths,
@@ -266,7 +290,7 @@ public int getPartition(Object key) {
       String partitionPath = keyLocation.getLeft().getPartitionPath();
       List<InsertBucketCumulativeWeightPair> targetBuckets = partitionPathToInsertBucketInfos.get(partitionPath);
       // pick the target bucket to use based on the weights.
-      final long totalInserts = Math.max(1, profile.getWorkloadStat(partitionPath).getNumInserts());
+      final long totalInserts = Math.max(1, workloadProfile.getWorkloadStat(partitionPath).getNumInserts());
       final long hashOfKey = NumericUtils.getMessageDigestHash("MD5", keyLocation.getLeft().getRecordKey());
       final double r = 1.0 * Math.floorMod(hashOfKey, totalInserts) / totalInserts;
 
diff --git a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaWriteHelper.java b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaWriteHelper.java
index 8af7707ea2f98..3a1fa4b884fd0 100644
--- a/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaWriteHelper.java
+++ b/hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaWriteHelper.java
@@ -21,6 +21,7 @@
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.common.data.HoodieList;
 import org.apache.hudi.common.engine.HoodieEngineContext;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
@@ -33,7 +34,7 @@
 import java.util.Objects;
 import java.util.stream.Collectors;
 
-public class JavaWriteHelper<T extends HoodieRecordPayload,R> extends AbstractWriteHelper<T, List<HoodieRecord<T>>,
+public class JavaWriteHelper<T extends HoodieRecordPayload,R> extends BaseWriteHelper<T, List<HoodieRecord<T>>,
     List<HoodieKey>, List<WriteStatus>, R> {
 
   private JavaWriteHelper() {
@@ -55,7 +56,7 @@ protected List<HoodieRecord<T>> tag(List<HoodieRecord<T>> dedupedRecords, Hoodie
 
   @Override
   public List<HoodieRecord<T>> deduplicateRecords(
-      List<HoodieRecord<T>> records, HoodieIndex<T, ?, ?, ?> index, int parallelism) {
+      List<HoodieRecord<T>> records, HoodieIndex<?, ?> index, int parallelism) {
     boolean isIndexingGlobal = index.isGlobal();
     Map<Object, List<Pair<Object, HoodieRecord<T>>>> keyedRecords = records.stream().map(record -> {
       HoodieKey hoodieKey = record.getKey();
@@ -70,7 +71,7 @@ public List<HoodieRecord<T>> deduplicateRecords(
       // we cannot allow the user to change the key or partitionPath, since that will affect
       // everything
       // so pick it from one of the records.
-      return new HoodieRecord<T>(rec1.getKey(), reducedData);
+      return new HoodieAvroRecord<T>(rec1.getKey(), reducedData);
     }).orElse(null)).filter(Objects::nonNull).collect(Collectors.toList());
   }
 }
diff --git a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/table/action/commit/TestHoodieConcatHandle.java b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/table/action/commit/TestHoodieConcatHandle.java
new file mode 100644
index 0000000000000..d81b76b0f4577
--- /dev/null
+++ b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/table/action/commit/TestHoodieConcatHandle.java
@@ -0,0 +1,228 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.table.action.commit;
+
+import org.apache.hudi.client.HoodieJavaWriteClient;
+import org.apache.hudi.common.bloom.BloomFilter;
+import org.apache.hudi.common.engine.EngineType;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.HoodieAvroRecord;
+import org.apache.hudi.common.model.HoodieKey;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
+import org.apache.hudi.common.testutils.HoodieTestUtils;
+import org.apache.hudi.common.testutils.RawTripTestPayload;
+import org.apache.hudi.common.util.BaseFileUtils;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.hadoop.HoodieParquetInputFormat;
+import org.apache.hudi.hadoop.utils.HoodieHiveUtils;
+import org.apache.hudi.testutils.HoodieJavaClientTestBase;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
+
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA;
+import static org.apache.hudi.common.testutils.HoodieTestTable.makeNewCommitTime;
+import static org.apache.hudi.common.testutils.SchemaTestUtil.getSchemaFromResource;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class TestHoodieConcatHandle extends HoodieJavaClientTestBase {
+  private static final Schema SCHEMA = getSchemaFromResource(TestJavaCopyOnWriteActionExecutor.class, "/exampleSchema.avsc");
+
+  private HoodieWriteConfig.Builder makeHoodieClientConfigBuilder() {
+    return makeHoodieClientConfigBuilder(SCHEMA.toString());
+  }
+
+  private HoodieWriteConfig.Builder makeHoodieClientConfigBuilder(String schema) {
+    // Prepare the AvroParquetIO
+    return HoodieWriteConfig.newBuilder()
+        .withEngineType(EngineType.JAVA)
+        .withPath(basePath)
+        .withSchema(schema);
+  }
+
+  private FileStatus[] getIncrementalFiles(String partitionPath, String startCommitTime, int numCommitsToPull)
+      throws Exception {
+    // initialize parquet input format
+    HoodieParquetInputFormat hoodieInputFormat = new HoodieParquetInputFormat();
+    JobConf jobConf = new JobConf(hadoopConf);
+    hoodieInputFormat.setConf(jobConf);
+    HoodieTestUtils.init(hadoopConf, basePath, HoodieTableType.COPY_ON_WRITE);
+    setupIncremental(jobConf, startCommitTime, numCommitsToPull);
+    FileInputFormat.setInputPaths(jobConf, Paths.get(basePath, partitionPath).toString());
+    return hoodieInputFormat.listStatus(jobConf);
+  }
+
+  private void setupIncremental(JobConf jobConf, String startCommit, int numberOfCommitsToPull) {
+    String modePropertyName =
+        String.format(HoodieHiveUtils.HOODIE_CONSUME_MODE_PATTERN, HoodieTestUtils.RAW_TRIPS_TEST_NAME);
+    jobConf.set(modePropertyName, HoodieHiveUtils.INCREMENTAL_SCAN_MODE);
+
+    String startCommitTimestampName =
+        String.format(HoodieHiveUtils.HOODIE_START_COMMIT_PATTERN, HoodieTestUtils.RAW_TRIPS_TEST_NAME);
+    jobConf.set(startCommitTimestampName, startCommit);
+
+    String maxCommitPulls =
+        String.format(HoodieHiveUtils.HOODIE_MAX_COMMIT_PATTERN, HoodieTestUtils.RAW_TRIPS_TEST_NAME);
+    jobConf.setInt(maxCommitPulls, numberOfCommitsToPull);
+  }
+
+  @Test
+  public void testInsert() throws Exception {
+    HoodieWriteConfig config = makeHoodieClientConfigBuilder().withMergeAllowDuplicateOnInserts(true).build();
+
+    HoodieJavaWriteClient writeClient = getHoodieWriteClient(config);
+    metaClient = HoodieTableMetaClient.reload(metaClient);
+    BaseFileUtils fileUtils = BaseFileUtils.getInstance(metaClient);
+
+    // Get some records belong to the same partition (2021/09/11)
+    String insertRecordStr1 = "{\"_row_key\":\"1\","
+        + "\"time\":\"2021-09-11T16:16:41.415Z\",\"number\":1}";
+    String insertRecordStr2 = "{\"_row_key\":\"2\","
+        + "\"time\":\"2021-09-11T16:16:41.415Z\",\"number\":2}";
+    List<HoodieRecord> records1 = new ArrayList<>();
+    RawTripTestPayload insertRow1 = new RawTripTestPayload(insertRecordStr1);
+    RawTripTestPayload insertRow2 = new RawTripTestPayload(insertRecordStr2);
+    records1.add(new HoodieAvroRecord(new HoodieKey(insertRow1.getRowKey(), insertRow1.getPartitionPath()), insertRow1));
+    records1.add(new HoodieAvroRecord(new HoodieKey(insertRow2.getRowKey(), insertRow2.getPartitionPath()), insertRow2));
+
+    int startInstant = 1;
+    String firstCommitTime = makeNewCommitTime(startInstant++);
+    // First insert
+    writeClient.startCommitWithTime(firstCommitTime);
+    writeClient.insert(records1, firstCommitTime);
+
+    String partitionPath = "2021/09/11";
+    FileStatus[] allFiles = getIncrementalFiles(partitionPath, "0", -1);
+    assertEquals(1, allFiles.length);
+
+    // Read out the bloom filter and make sure filter can answer record exist or not
+    Path filePath = allFiles[0].getPath();
+    BloomFilter filter = fileUtils.readBloomFilterFromMetadata(hadoopConf, filePath);
+    for (HoodieRecord record : records1) {
+      assertTrue(filter.mightContain(record.getRecordKey()));
+    }
+
+    insertRecordStr1 = "{\"_row_key\":\"1\","
+        + "\"time\":\"2021-09-11T16:39:41.415Z\",\"number\":3}";
+    insertRecordStr2 = "{\"_row_key\":\"2\","
+        + "\"time\":\"2021-09-11T16:39:41.415Z\",\"number\":4}";
+
+    List<HoodieRecord> records2 = new ArrayList<>();
+    insertRow1 = new RawTripTestPayload(insertRecordStr1);
+    insertRow2 = new RawTripTestPayload(insertRecordStr2);
+    // The recordKey of records2 and records1 are the same, but the values of other fields are different
+    records2.add(new HoodieAvroRecord(new HoodieKey(insertRow1.getRowKey(), insertRow1.getPartitionPath()), insertRow1));
+    records2.add(new HoodieAvroRecord(new HoodieKey(insertRow2.getRowKey(), insertRow2.getPartitionPath()), insertRow2));
+
+    String newCommitTime = makeNewCommitTime(startInstant++);
+    writeClient.startCommitWithTime(newCommitTime);
+    // Second insert is the same as the _row_key of the first one,test allowDuplicateInserts
+    writeClient.insert(records2, newCommitTime);
+
+    allFiles = getIncrementalFiles(partitionPath, firstCommitTime, -1);
+    assertEquals(1, allFiles.length);
+    // verify new incremental file group is same as the previous one
+    assertEquals(FSUtils.getFileId(filePath.getName()), FSUtils.getFileId(allFiles[0].getPath().getName()));
+
+    filePath = allFiles[0].getPath();
+    // The final result should be a collection of records1 and records2
+    records1.addAll(records2);
+
+    // Read the base file, check the record content
+    List<GenericRecord> fileRecords = fileUtils.readAvroRecords(hadoopConf, filePath);
+    int index = 0;
+    for (GenericRecord record : fileRecords) {
+      assertEquals(records1.get(index).getRecordKey(), record.get("_row_key").toString());
+      assertEquals(index + 1, record.get("number"));
+      index++;
+    }
+  }
+
+  @ParameterizedTest
+  @ValueSource(booleans = {false, true})
+  public void testInsertWithDataGenerator(boolean mergeAllowDuplicateOnInsertsEnable) throws Exception {
+    HoodieWriteConfig config = makeHoodieClientConfigBuilder(TRIP_EXAMPLE_SCHEMA)
+        .withMergeAllowDuplicateOnInserts(mergeAllowDuplicateOnInsertsEnable).build();
+
+    HoodieJavaWriteClient writeClient = getHoodieWriteClient(config);
+    metaClient = HoodieTableMetaClient.reload(metaClient);
+    BaseFileUtils fileUtils = BaseFileUtils.getInstance(metaClient);
+
+    String partitionPath = "2021/09/11";
+    HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator(new String[]{partitionPath});
+
+    int startInstant = 1;
+    String firstCommitTime = makeNewCommitTime(startInstant++);
+    List<HoodieRecord> records1 = dataGenerator.generateInserts(firstCommitTime, 100);
+
+    // First insert
+    writeClient.startCommitWithTime(firstCommitTime);
+    writeClient.insert(records1, firstCommitTime);
+
+    FileStatus[] allFiles = getIncrementalFiles(partitionPath, "0", -1);
+    assertEquals(1, allFiles.length);
+
+    // Read out the bloom filter and make sure filter can answer record exist or not
+    Path filePath = allFiles[0].getPath();
+    BloomFilter filter = fileUtils.readBloomFilterFromMetadata(hadoopConf, filePath);
+    for (HoodieRecord record : records1) {
+      assertTrue(filter.mightContain(record.getRecordKey()));
+    }
+
+    String newCommitTime = makeNewCommitTime(startInstant++);
+    List<HoodieRecord> records2 = dataGenerator.generateUpdates(newCommitTime, 100);
+    writeClient.startCommitWithTime(newCommitTime);
+    // Second insert is the same as the _row_key of the first one,test allowDuplicateInserts
+    writeClient.insert(records2, newCommitTime);
+
+    allFiles = getIncrementalFiles(partitionPath, firstCommitTime, -1);
+    assertEquals(1, allFiles.length);
+    // verify new incremental file group is same as the previous one
+    assertEquals(FSUtils.getFileId(filePath.getName()), FSUtils.getFileId(allFiles[0].getPath().getName()));
+
+    filePath = allFiles[0].getPath();
+    // If mergeAllowDuplicateOnInsertsEnable is true, the final result should be a collection of records1 and records2
+    records1.addAll(records2);
+
+    // Read the base file, check the record content
+    List<GenericRecord> fileRecords = fileUtils.readAvroRecords(hadoopConf, filePath);
+    assertEquals(fileRecords.size(), mergeAllowDuplicateOnInsertsEnable ? records1.size() : records2.size());
+
+    int index = 0;
+    for (GenericRecord record : fileRecords) {
+      assertEquals(records1.get(index).getRecordKey(), record.get("_row_key").toString());
+      index++;
+    }
+  }
+}
diff --git a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/table/action/commit/TestJavaCopyOnWriteActionExecutor.java b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/table/action/commit/TestJavaCopyOnWriteActionExecutor.java
index 796d7b74a83c5..793b26703011e 100644
--- a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/table/action/commit/TestJavaCopyOnWriteActionExecutor.java
+++ b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/table/action/commit/TestJavaCopyOnWriteActionExecutor.java
@@ -23,6 +23,7 @@
 import org.apache.hudi.common.bloom.BloomFilter;
 import org.apache.hudi.common.engine.EngineType;
 import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieTableType;
@@ -42,6 +43,8 @@
 import org.apache.hudi.table.HoodieJavaCopyOnWriteTable;
 import org.apache.hudi.table.HoodieJavaTable;
 import org.apache.hudi.table.HoodieTable;
+import org.apache.hudi.testutils.HoodieJavaClientTestBase;
+import org.apache.hudi.testutils.MetadataMergeWriteStatus;
 
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericRecord;
@@ -49,8 +52,6 @@
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.FileInputFormat;
 import org.apache.hadoop.mapred.JobConf;
-import org.apache.hudi.testutils.HoodieJavaClientTestBase;
-import org.apache.hudi.testutils.MetadataMergeWriteStatus;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.parquet.avro.AvroReadSupport;
@@ -121,14 +122,14 @@ private HoodieWriteConfig.Builder makeHoodieClientConfigBuilder() {
   public void testUpdateRecords() throws Exception {
     // Prepare the AvroParquetIO
     HoodieWriteConfig config = makeHoodieClientConfig();
-    String firstCommitTime = makeNewCommitTime();
+    int startInstant = 1;
+    String firstCommitTime = makeNewCommitTime(startInstant++);
     HoodieJavaWriteClient writeClient = getHoodieWriteClient(config);
     writeClient.startCommitWithTime(firstCommitTime);
     metaClient = HoodieTableMetaClient.reload(metaClient);
     BaseFileUtils fileUtils = BaseFileUtils.getInstance(metaClient);
 
     String partitionPath = "2016/01/31";
-    HoodieJavaCopyOnWriteTable table = (HoodieJavaCopyOnWriteTable) HoodieJavaTable.create(config, context, metaClient);
 
     // Get some records belong to the same partition (2016/01/31)
     String recordStr1 = "{\"_row_key\":\"8eb5b87a-1feh-4edd-87b4-6ec96dc405a0\","
@@ -142,14 +143,13 @@ public void testUpdateRecords() throws Exception {
 
     List<HoodieRecord> records = new ArrayList<>();
     RawTripTestPayload rowChange1 = new RawTripTestPayload(recordStr1);
-    records.add(new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1));
+    records.add(new HoodieAvroRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1));
     RawTripTestPayload rowChange2 = new RawTripTestPayload(recordStr2);
-    records.add(new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2));
+    records.add(new HoodieAvroRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2));
     RawTripTestPayload rowChange3 = new RawTripTestPayload(recordStr3);
-    records.add(new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3));
+    records.add(new HoodieAvroRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3));
 
     // Insert new records
-    final HoodieJavaCopyOnWriteTable cowTable = table;
     writeClient.insert(records, firstCommitTime);
 
     FileStatus[] allFiles = getIncrementalFiles(partitionPath, "0", -1);
@@ -176,17 +176,16 @@ public void testUpdateRecords() throws Exception {
     String updateRecordStr1 = "{\"_row_key\":\"8eb5b87a-1feh-4edd-87b4-6ec96dc405a0\","
         + "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}";
     RawTripTestPayload updateRowChanges1 = new RawTripTestPayload(updateRecordStr1);
-    HoodieRecord updatedRecord1 = new HoodieRecord(
+    HoodieRecord updatedRecord1 = new HoodieAvroRecord(
         new HoodieKey(updateRowChanges1.getRowKey(), updateRowChanges1.getPartitionPath()), updateRowChanges1);
 
     RawTripTestPayload rowChange4 = new RawTripTestPayload(recordStr4);
     HoodieRecord insertedRecord1 =
-        new HoodieRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
+        new HoodieAvroRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
 
     List<HoodieRecord> updatedRecords = Arrays.asList(updatedRecord1, insertedRecord1);
 
-    Thread.sleep(1000);
-    String newCommitTime = makeNewCommitTime();
+    String newCommitTime = makeNewCommitTime(startInstant++);
     metaClient = HoodieTableMetaClient.reload(metaClient);
     writeClient.startCommitWithTime(newCommitTime);
     List<WriteStatus> statuses = writeClient.upsert(updatedRecords, newCommitTime);
@@ -197,9 +196,9 @@ public void testUpdateRecords() throws Exception {
     assertEquals(FSUtils.getFileId(filePath.getName()), FSUtils.getFileId(allFiles[0].getPath().getName()));
 
     // Check whether the record has been updated
-    Path updatedfilePath = allFiles[0].getPath();
+    Path updatedFilePath = allFiles[0].getPath();
     BloomFilter updatedFilter =
-        fileUtils.readBloomFilterFromMetadata(hadoopConf, updatedfilePath);
+        fileUtils.readBloomFilterFromMetadata(hadoopConf, updatedFilePath);
     for (HoodieRecord record : records) {
       // No change to the _row_key
       assertTrue(updatedFilter.mightContain(record.getRecordKey()));
@@ -208,7 +207,7 @@ public void testUpdateRecords() throws Exception {
     assertTrue(updatedFilter.mightContain(insertedRecord1.getRecordKey()));
     records.add(insertedRecord1);// add this so it can further check below
 
-    ParquetReader updatedReader = ParquetReader.builder(new AvroReadSupport<>(), updatedfilePath).build();
+    ParquetReader updatedReader = ParquetReader.builder(new AvroReadSupport<>(), updatedFilePath).build();
     index = 0;
     while ((newRecord = (GenericRecord) updatedReader.read()) != null) {
       assertEquals(newRecord.get("_row_key").toString(), records.get(index).getRecordKey());
@@ -256,7 +255,7 @@ private List<HoodieRecord> newHoodieRecords(int n, String time) throws Exception
       String recordStr =
           String.format("{\"_row_key\":\"%s\",\"time\":\"%s\",\"number\":%d}", UUID.randomUUID().toString(), time, i);
       RawTripTestPayload rowChange = new RawTripTestPayload(recordStr);
-      records.add(new HoodieRecord(new HoodieKey(rowChange.getRowKey(), rowChange.getPartitionPath()), rowChange));
+      records.add(new HoodieAvroRecord(new HoodieKey(rowChange.getRowKey(), rowChange.getPartitionPath()), rowChange));
     }
     return records;
   }
@@ -282,11 +281,11 @@ public void testMetadataAggregateFromWriteStatus() throws Exception {
 
     List<HoodieRecord> records = new ArrayList<>();
     RawTripTestPayload rowChange1 = new RawTripTestPayload(recordStr1);
-    records.add(new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1));
+    records.add(new HoodieAvroRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1));
     RawTripTestPayload rowChange2 = new RawTripTestPayload(recordStr2);
-    records.add(new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2));
+    records.add(new HoodieAvroRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2));
     RawTripTestPayload rowChange3 = new RawTripTestPayload(recordStr3);
-    records.add(new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3));
+    records.add(new HoodieAvroRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3));
 
     // Insert new records
     BaseJavaCommitActionExecutor actionExecutor = new JavaInsertCommitActionExecutor(context, config, table,
@@ -384,7 +383,7 @@ public void testFileSizeUpsertRecords() throws Exception {
       String recordStr = "{\"_row_key\":\"" + UUID.randomUUID().toString()
           + "\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":" + i + "}";
       RawTripTestPayload rowChange = new RawTripTestPayload(recordStr);
-      records.add(new HoodieRecord(new HoodieKey(rowChange.getRowKey(), rowChange.getPartitionPath()), rowChange));
+      records.add(new HoodieAvroRecord(new HoodieKey(rowChange.getRowKey(), rowChange.getPartitionPath()), rowChange));
     }
 
     // Insert new records
diff --git a/hudi-client/hudi-spark-client/pom.xml b/hudi-client/hudi-spark-client/pom.xml
index e4a8fd56b6a65..d6c60cb61bc45 100644
--- a/hudi-client/hudi-spark-client/pom.xml
+++ b/hudi-client/hudi-spark-client/pom.xml
@@ -169,9 +169,9 @@
     </dependency>
       <!-- Other Utils -->
     <dependency>
-        <groupId>org.awaitility</groupId>
-        <artifactId>awaitility</artifactId>
-        <scope>test</scope>
+      <groupId>org.awaitility</groupId>
+      <artifactId>awaitility</artifactId>
+      <scope>test</scope>
     </dependency>
   </dependencies>
 
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/async/SparkAsyncClusteringService.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/async/SparkAsyncClusteringService.java
index ce436ba034d98..8f6535b11d9b3 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/async/SparkAsyncClusteringService.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/async/SparkAsyncClusteringService.java
@@ -19,8 +19,8 @@
 
 package org.apache.hudi.async;
 
-import org.apache.hudi.client.AbstractClusteringClient;
-import org.apache.hudi.client.AbstractHoodieWriteClient;
+import org.apache.hudi.client.BaseClusterer;
+import org.apache.hudi.client.BaseHoodieWriteClient;
 import org.apache.hudi.client.HoodieSparkClusteringClient;
 
 /**
@@ -28,12 +28,12 @@
  */
 public class SparkAsyncClusteringService extends AsyncClusteringService {
 
-  public SparkAsyncClusteringService(AbstractHoodieWriteClient writeClient) {
+  public SparkAsyncClusteringService(BaseHoodieWriteClient writeClient) {
     super(writeClient);
   }
 
   @Override
-  protected AbstractClusteringClient createClusteringClient(AbstractHoodieWriteClient client) {
+  protected BaseClusterer createClusteringClient(BaseHoodieWriteClient client) {
     return new HoodieSparkClusteringClient(client);
   }
 }
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/async/SparkAsyncCompactService.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/async/SparkAsyncCompactService.java
index 5235a3cd0a15d..d54fe386bd06b 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/async/SparkAsyncCompactService.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/async/SparkAsyncCompactService.java
@@ -18,19 +18,19 @@
 
 package org.apache.hudi.async;
 
-import org.apache.hudi.client.AbstractCompactor;
-import org.apache.hudi.client.AbstractHoodieWriteClient;
+import org.apache.hudi.client.BaseCompactor;
+import org.apache.hudi.client.BaseHoodieWriteClient;
 import org.apache.hudi.client.HoodieSparkCompactor;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 
 public class SparkAsyncCompactService extends AsyncCompactService {
 
-  public SparkAsyncCompactService(HoodieEngineContext context, AbstractHoodieWriteClient client) {
+  public SparkAsyncCompactService(HoodieEngineContext context, BaseHoodieWriteClient client) {
     super(context, client);
   }
 
   @Override
-  protected AbstractCompactor createCompactor(AbstractHoodieWriteClient client) {
+  protected BaseCompactor createCompactor(BaseHoodieWriteClient client) {
     return new HoodieSparkCompactor(client, this.context);
   }
 }
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/HoodieReadClient.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/HoodieReadClient.java
index 84040f906ce32..e9bdc427e8356 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/HoodieReadClient.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/HoodieReadClient.java
@@ -18,9 +18,9 @@
 
 package org.apache.hudi.client;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hudi.avro.model.HoodieCompactionPlan;
 import org.apache.hudi.client.common.HoodieSparkEngineContext;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieBaseFile;
 import org.apache.hudi.common.model.HoodieFileFormat;
 import org.apache.hudi.common.model.HoodieKey;
@@ -39,6 +39,7 @@
 import org.apache.hudi.table.HoodieSparkTable;
 import org.apache.hudi.table.HoodieTable;
 
+import org.apache.hadoop.conf.Configuration;
 import org.apache.spark.SparkConf;
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaRDD;
@@ -64,9 +65,9 @@ public class HoodieReadClient<T extends HoodieRecordPayload<T>> implements Seria
 
   /**
    * TODO: We need to persist the index type into hoodie.properties and be able to access the index just with a simple
-   * basepath pointing to the table. Until, then just always assume a BloomIndex
+   * base path pointing to the table. Until, then just always assume a BloomIndex
    */
-  private final transient HoodieIndex<T, ?, ?, ?> index;
+  private final transient HoodieIndex<?, ?> index;
   private HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> hoodieTable;
   private transient Option<SQLContext> sqlContextOpt;
   private final transient HoodieSparkEngineContext context;
@@ -172,7 +173,7 @@ public Dataset<Row> readROView(JavaRDD<HoodieKey> hoodieKeys, int parallelism) {
    */
   public JavaPairRDD<HoodieKey, Option<Pair<String, String>>> checkExists(JavaRDD<HoodieKey> hoodieKeys) {
     return HoodieJavaRDD.getJavaRDD(
-        index.tagLocation(HoodieJavaRDD.of(hoodieKeys.map(k -> new HoodieRecord<>(k, null))),
+        index.tagLocation(HoodieJavaRDD.of(hoodieKeys.map(k -> new HoodieAvroRecord<>(k, null))),
             context, hoodieTable))
         .mapToPair(hr -> new Tuple2<>(hr.getKey(), hr.isCurrentLocationKnown()
             ? Option.of(Pair.of(hr.getPartitionPath(), hr.getCurrentLocation().getFileId()))
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/HoodieSparkClusteringClient.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/HoodieSparkClusteringClient.java
index 16e54a21551c6..0812b366aadac 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/HoodieSparkClusteringClient.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/HoodieSparkClusteringClient.java
@@ -38,12 +38,12 @@
  * Async clustering client for Spark datasource.
  */
 public class HoodieSparkClusteringClient<T extends HoodieRecordPayload> extends
-    AbstractClusteringClient<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> {
+    BaseClusterer<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> {
 
   private static final Logger LOG = LogManager.getLogger(HoodieSparkClusteringClient.class);
 
   public HoodieSparkClusteringClient(
-      AbstractHoodieWriteClient<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> clusteringClient) {
+      BaseHoodieWriteClient<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> clusteringClient) {
     super(clusteringClient);
   }
 
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/HoodieSparkCompactor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/HoodieSparkCompactor.java
index 60a064ab32bf9..b3dc27b6fc65b 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/HoodieSparkCompactor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/HoodieSparkCompactor.java
@@ -22,33 +22,36 @@
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.common.model.HoodieWriteStat;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.table.action.HoodieWriteMetadata;
+
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.spark.api.java.JavaRDD;
 
-import java.io.IOException;
+import java.util.List;
 
-public class HoodieSparkCompactor<T extends HoodieRecordPayload> extends AbstractCompactor<T,
+public class HoodieSparkCompactor<T extends HoodieRecordPayload> extends BaseCompactor<T,
     JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> {
   private static final Logger LOG = LogManager.getLogger(HoodieSparkCompactor.class);
   private transient HoodieEngineContext context;
 
-  public HoodieSparkCompactor(AbstractHoodieWriteClient<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> compactionClient,
+  public HoodieSparkCompactor(BaseHoodieWriteClient<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> compactionClient,
                               HoodieEngineContext context) {
     super(compactionClient);
     this.context = context;
   }
 
   @Override
-  public void compact(HoodieInstant instant) throws IOException {
+  public void compact(HoodieInstant instant) {
     LOG.info("Compactor executing compaction " + instant);
     SparkRDDWriteClient<T> writeClient = (SparkRDDWriteClient<T>) compactionClient;
-    JavaRDD<WriteStatus> res = writeClient.compact(instant.getTimestamp());
-    this.context.setJobStatus(this.getClass().getSimpleName(), "Collect compaction write status");
-    long numWriteErrors = res.collect().stream().filter(WriteStatus::hasErrors).count();
+    HoodieWriteMetadata<JavaRDD<WriteStatus>> compactionMetadata = writeClient.compact(instant.getTimestamp());
+    List<HoodieWriteStat> writeStats = compactionMetadata.getCommitMetadata().get().getWriteStats();
+    long numWriteErrors = writeStats.stream().mapToLong(HoodieWriteStat::getTotalWriteErrors).sum();
     if (numWriteErrors != 0) {
       // We treat even a single error in compaction as fatal
       LOG.error("Compaction for instant (" + instant + ") failed with write errors. Errors :" + numWriteErrors);
@@ -56,6 +59,6 @@ public void compact(HoodieInstant instant) throws IOException {
           "Compaction for instant (" + instant + ") failed with write errors. Errors :" + numWriteErrors);
     }
     // Commit compaction
-    writeClient.commitCompaction(instant.getTimestamp(), res, Option.empty());
+    writeClient.commitCompaction(instant.getTimestamp(), compactionMetadata.getCommitMetadata().get(), Option.empty());
   }
 }
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/SparkRDDWriteClient.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/SparkRDDWriteClient.java
index 9b2aad3ebafa1..d51d25616c70d 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/SparkRDDWriteClient.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/SparkRDDWriteClient.java
@@ -65,7 +65,6 @@
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 
-import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 import java.text.ParseException;
 import java.util.List;
@@ -74,7 +73,7 @@
 
 @SuppressWarnings("checkstyle:LineLength")
 public class SparkRDDWriteClient<T extends HoodieRecordPayload> extends
-    AbstractHoodieWriteClient<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> {
+    BaseHoodieWriteClient<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> {
 
   private static final Logger LOG = LogManager.getLogger(SparkRDDWriteClient.class);
 
@@ -286,26 +285,24 @@ protected JavaRDD<WriteStatus> postWrite(HoodieWriteMetadata<JavaRDD<WriteStatus
   }
 
   @Override
-  public void commitCompaction(String compactionInstantTime, JavaRDD<WriteStatus> writeStatuses, Option<Map<String, String>> extraMetadata) throws IOException {
+  public void commitCompaction(String compactionInstantTime, HoodieCommitMetadata metadata, Option<Map<String, String>> extraMetadata) {
     HoodieSparkTable<T> table = HoodieSparkTable.create(config, context);
-    HoodieCommitMetadata metadata = CompactHelpers.getInstance().createCompactionMetadata(
-        table, compactionInstantTime, HoodieJavaRDD.of(writeStatuses), config.getSchema());
     extraMetadata.ifPresent(m -> m.forEach(metadata::addMetadata));
-    completeCompaction(metadata, writeStatuses, table, compactionInstantTime);
+    completeCompaction(metadata, table, compactionInstantTime);
   }
 
   @Override
-  protected void completeCompaction(HoodieCommitMetadata metadata, JavaRDD<WriteStatus> writeStatuses,
+  protected void completeCompaction(HoodieCommitMetadata metadata,
                                     HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> table,
                                     String compactionCommitTime) {
     this.context.setJobStatus(this.getClass().getSimpleName(), "Collect compaction write status and commit compaction");
-    List<HoodieWriteStat> writeStats = writeStatuses.map(WriteStatus::getStat).collect();
+    List<HoodieWriteStat> writeStats = metadata.getWriteStats();
     final HoodieInstant compactionInstant = new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMPACTION_ACTION, compactionCommitTime);
     try {
       this.txnManager.beginTransaction(Option.of(compactionInstant), Option.empty());
       finalizeWrite(table, compactionCommitTime, writeStats);
       // commit to data table after committing to metadata table.
-      writeTableMetadataForTableServices(table, metadata, compactionInstant);
+      updateTableMetadata(table, metadata, compactionInstant);
       LOG.info("Committing Compaction " + compactionCommitTime + ". Finished with result " + metadata);
       CompactHelpers.getInstance().completeInflightCompaction(table, compactionCommitTime, metadata);
     } finally {
@@ -327,7 +324,7 @@ protected void completeCompaction(HoodieCommitMetadata metadata, JavaRDD<WriteSt
   }
 
   @Override
-  protected JavaRDD<WriteStatus> compact(String compactionInstantTime, boolean shouldComplete) {
+  protected HoodieWriteMetadata<JavaRDD<WriteStatus>> compact(String compactionInstantTime, boolean shouldComplete) {
     HoodieSparkTable<T> table = HoodieSparkTable.create(config, context, true);
     preWrite(compactionInstantTime, WriteOperationType.COMPACT, table.getMetaClient());
     HoodieTimeline pendingCompactionTimeline = table.getActiveTimeline().filterPendingCompactionTimeline();
@@ -339,11 +336,10 @@ protected JavaRDD<WriteStatus> compact(String compactionInstantTime, boolean sho
     compactionTimer = metrics.getCompactionCtx();
     HoodieWriteMetadata<JavaRDD<WriteStatus>> compactionMetadata =
         table.compact(context, compactionInstantTime);
-    JavaRDD<WriteStatus> statuses = compactionMetadata.getWriteStatuses();
     if (shouldComplete && compactionMetadata.getCommitMetadata().isPresent()) {
-      completeTableService(TableServiceType.COMPACT, compactionMetadata.getCommitMetadata().get(), statuses, table, compactionInstantTime);
+      completeTableService(TableServiceType.COMPACT, compactionMetadata.getCommitMetadata().get(), table, compactionInstantTime);
     }
-    return statuses;
+    return compactionMetadata;
   }
 
   @Override
@@ -359,15 +355,14 @@ public HoodieWriteMetadata<JavaRDD<WriteStatus>> cluster(String clusteringInstan
     clusteringTimer = metrics.getClusteringCtx();
     LOG.info("Starting clustering at " + clusteringInstant);
     HoodieWriteMetadata<JavaRDD<WriteStatus>> clusteringMetadata = table.cluster(context, clusteringInstant);
-    JavaRDD<WriteStatus> statuses = clusteringMetadata.getWriteStatuses();
     // TODO : Where is shouldComplete used ?
     if (shouldComplete && clusteringMetadata.getCommitMetadata().isPresent()) {
-      completeTableService(TableServiceType.CLUSTER, clusteringMetadata.getCommitMetadata().get(), statuses, table, clusteringInstant);
+      completeTableService(TableServiceType.CLUSTER, clusteringMetadata.getCommitMetadata().get(), table, clusteringInstant);
     }
     return clusteringMetadata;
   }
 
-  private void completeClustering(HoodieReplaceCommitMetadata metadata, JavaRDD<WriteStatus> writeStatuses,
+  private void completeClustering(HoodieReplaceCommitMetadata metadata,
                                     HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> table,
                                     String clusteringCommitTime) {
 
@@ -378,17 +373,20 @@ private void completeClustering(HoodieReplaceCommitMetadata metadata, JavaRDD<Wr
       throw new HoodieClusteringException("Clustering failed to write to files:"
           + writeStats.stream().filter(s -> s.getTotalWriteErrors() > 0L).map(s -> s.getFileId()).collect(Collectors.joining(",")));
     }
+
     final HoodieInstant clusteringInstant = new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.REPLACE_COMMIT_ACTION, clusteringCommitTime);
     try {
       this.txnManager.beginTransaction(Option.of(clusteringInstant), Option.empty());
+
       finalizeWrite(table, clusteringCommitTime, writeStats);
-      writeTableMetadataForTableServices(table, metadata,clusteringInstant);
-      // Update outstanding metadata indexes
-      if (config.isLayoutOptimizationEnabled()
-          && !config.getClusteringSortColumns().isEmpty()) {
-        table.updateMetadataIndexes(context, writeStats, clusteringCommitTime);
-      }
+      // Update table's metadata (table)
+      updateTableMetadata(table, metadata, clusteringInstant);
+      // Update tables' metadata indexes
+      // NOTE: This overlaps w/ metadata table (above) and will be reconciled in the future
+      table.updateMetadataIndexes(context, writeStats, clusteringCommitTime);
+
       LOG.info("Committing Clustering " + clusteringCommitTime + ". Finished with result " + metadata);
+
       table.getActiveTimeline().transitionReplaceInflightToComplete(
           HoodieTimeline.getReplaceCommitInflightInstant(clusteringCommitTime),
           Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
@@ -412,17 +410,18 @@ private void completeClustering(HoodieReplaceCommitMetadata metadata, JavaRDD<Wr
     LOG.info("Clustering successfully on commit " + clusteringCommitTime);
   }
 
-  private void writeTableMetadataForTableServices(HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> table, HoodieCommitMetadata commitMetadata,
-                                  HoodieInstant hoodieInstant) {
+  private void updateTableMetadata(HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> table, HoodieCommitMetadata commitMetadata,
+                                   HoodieInstant hoodieInstant) {
     boolean isTableServiceAction = table.isTableServiceAction(hoodieInstant.getAction());
     // Do not do any conflict resolution here as we do with regular writes. We take the lock here to ensure all writes to metadata table happens within a
     // single lock (single writer). Because more than one write to metadata table will result in conflicts since all of them updates the same partition.
-    table.getMetadataWriter(hoodieInstant.getTimestamp()).ifPresent(
-        w -> w.update(commitMetadata, hoodieInstant.getTimestamp(), isTableServiceAction));
+    table.getMetadataWriter(hoodieInstant.getTimestamp())
+        .ifPresent(writer -> writer.update(commitMetadata, hoodieInstant.getTimestamp(), isTableServiceAction));
   }
 
   @Override
-  protected HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> getTableAndInitCtx(WriteOperationType operationType, String instantTime) {
+  protected HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> getTableAndInitCtx(WriteOperationType operationType,
+                                                                                                                  String instantTime) {
     HoodieTableMetaClient metaClient = createMetaClient(true);
     UpgradeDowngrade upgradeDowngrade = new UpgradeDowngrade(
         metaClient, config, context, SparkUpgradeDowngradeHelper.getInstance());
@@ -439,8 +438,11 @@ protected HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<W
             metaClient, config, context, SparkUpgradeDowngradeHelper.getInstance())
             .run(HoodieTableVersion.current(), instantTime);
         metaClient.reloadActiveTimeline();
-        initializeMetadataTable(Option.of(instantTime));
       }
+      // Initialize Metadata Table to make sure it's bootstrapped _before_ the operation,
+      // if it didn't exist before
+      // See https://issues.apache.org/jira/browse/HUDI-3343 for more details
+      initializeMetadataTable(Option.of(instantTime));
     } finally {
       this.txnManager.endTransaction();
     }
@@ -462,16 +464,16 @@ private void initializeMetadataTable(Option<String> inFlightInstantTimestamp) {
   }
 
   // TODO : To enforce priority between table service and ingestion writer, use transactions here and invoke strategy
-  private void completeTableService(TableServiceType tableServiceType, HoodieCommitMetadata metadata, JavaRDD<WriteStatus> writeStatuses,
+  private void completeTableService(TableServiceType tableServiceType, HoodieCommitMetadata metadata,
                                     HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> table,
                                     String commitInstant) {
 
     switch (tableServiceType) {
       case CLUSTER:
-        completeClustering((HoodieReplaceCommitMetadata) metadata, writeStatuses, table, commitInstant);
+        completeClustering((HoodieReplaceCommitMetadata) metadata, table, commitInstant);
         break;
       case COMPACT:
-        completeCompaction(metadata, writeStatuses, table, commitInstant);
+        completeCompaction(metadata, table, commitInstant);
         break;
       default:
         throw new IllegalArgumentException("This table service is not valid " + tableServiceType);
@@ -497,7 +499,7 @@ private HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<Wri
   @Override
   protected void preCommit(HoodieInstant inflightInstant, HoodieCommitMetadata metadata) {
     // Create a Hoodie table after startTxn which encapsulated the commits and files visible.
-    // Important to create this after the lock to ensure latest commits show up in the timeline without need for reload
+    // Important to create this after the lock to ensure the latest commits show up in the timeline without need for reload
     HoodieTable table = createTable(config, hadoopConf);
     TransactionUtils.resolveWriteConflictIfAny(table, this.txnManager.getCurrentTransactionOwner(),
         Option.of(metadata), config, txnManager.getLastCompletedTransactionOwner());
@@ -529,7 +531,11 @@ protected void initWrapperFSMetrics() {
 
   @Override
   protected void releaseResources() {
-    ((HoodieSparkEngineContext) context).getJavaSparkContext().getPersistentRDDs().values()
-        .forEach(rdd -> rdd.unpersist());
+    // If we do not explicitly release the resource, spark will automatically manage the resource and clean it up automatically
+    // see: https://spark.apache.org/docs/latest/rdd-programming-guide.html#removing-data
+    if (config.areReleaseResourceEnabled()) {
+      ((HoodieSparkEngineContext) context).getJavaSparkContext().getPersistentRDDs().values()
+          .forEach(JavaRDD::unpersist);
+    }
   }
 }
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/plan/strategy/SparkSizeBasedClusteringPlanStrategy.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/plan/strategy/SparkSizeBasedClusteringPlanStrategy.java
index b38931c2d93d1..6629569d096b3 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/plan/strategy/SparkSizeBasedClusteringPlanStrategy.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/plan/strategy/SparkSizeBasedClusteringPlanStrategy.java
@@ -87,7 +87,7 @@ protected Stream<HoodieClusteringGroup> buildClusteringGroupsForPartition(String
 
       // Add to the current file-group
       currentGroup.add(currentSlice);
-      // assume each filegroup size is ~= parquet.max.file.size
+      // assume each file group size is ~= parquet.max.file.size
       totalSizeSoFar += currentSlice.getBaseFile().isPresent() ? currentSlice.getBaseFile().get().getFileSize() : writeConfig.getParquetMaxFileSize();
     }
 
@@ -118,7 +118,7 @@ protected Map<String, String> getStrategyParams() {
   @Override
   protected Stream<FileSlice> getFileSlicesEligibleForClustering(final String partition) {
     return super.getFileSlicesEligibleForClustering(partition)
-        // Only files that have basefile size smaller than small file size are eligible.
+        // Only files that have base file size smaller than small file size are eligible.
         .filter(slice -> slice.getBaseFile().map(HoodieBaseFile::getFileSize).orElse(0L) < getWriteConfig().getClusteringSmallFileLimit());
   }
 
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/MultipleSparkJobExecutionStrategy.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/MultipleSparkJobExecutionStrategy.java
index c88b848ddf8cf..91d1f4e4e4fa2 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/MultipleSparkJobExecutionStrategy.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/MultipleSparkJobExecutionStrategy.java
@@ -26,8 +26,10 @@
 import org.apache.hudi.client.common.HoodieSparkEngineContext;
 import org.apache.hudi.client.utils.ConcatenatingIterator;
 import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.data.HoodieData;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.model.ClusteringOperation;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieFileGroupId;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
@@ -39,11 +41,13 @@
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.config.HoodieClusteringConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.data.HoodieJavaRDD;
 import org.apache.hudi.exception.HoodieClusteringException;
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.execution.bulkinsert.RDDCustomColumnsSortPartitioner;
-import org.apache.hudi.execution.bulkinsert.RDDSpatialCurveOptimizationSortPartitioner;
+import org.apache.hudi.execution.bulkinsert.RDDSpatialCurveSortPartitioner;
 import org.apache.hudi.io.IOUtils;
 import org.apache.hudi.io.storage.HoodieFileReader;
 import org.apache.hudi.io.storage.HoodieFileReaderFactory;
@@ -80,7 +84,7 @@
  * Clustering strategy to submit multiple spark jobs and union the results.
  */
 public abstract class MultipleSparkJobExecutionStrategy<T extends HoodieRecordPayload<T>>
-    extends ClusteringExecutionStrategy<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> {
+    extends ClusteringExecutionStrategy<T, HoodieData<HoodieRecord<T>>, HoodieData<HoodieKey>, HoodieData<WriteStatus>> {
   private static final Logger LOG = LogManager.getLogger(MultipleSparkJobExecutionStrategy.class);
 
   public MultipleSparkJobExecutionStrategy(HoodieTable table, HoodieEngineContext engineContext, HoodieWriteConfig writeConfig) {
@@ -88,7 +92,7 @@ public MultipleSparkJobExecutionStrategy(HoodieTable table, HoodieEngineContext
   }
 
   @Override
-  public HoodieWriteMetadata<JavaRDD<WriteStatus>> performClustering(final HoodieClusteringPlan clusteringPlan, final Schema schema, final String instantTime) {
+  public HoodieWriteMetadata<HoodieData<WriteStatus>> performClustering(final HoodieClusteringPlan clusteringPlan, final Schema schema, final String instantTime) {
     JavaSparkContext engineContext = HoodieSparkEngineContext.getSparkContext(getEngineContext());
     // execute clustering for each group async and collect WriteStatus
     Stream<JavaRDD<WriteStatus>> writeStatusRDDStream = FutureUtils.allOf(
@@ -103,8 +107,8 @@ public HoodieWriteMetadata<JavaRDD<WriteStatus>> performClustering(final HoodieC
     JavaRDD<WriteStatus>[] writeStatuses = convertStreamToArray(writeStatusRDDStream);
     JavaRDD<WriteStatus> writeStatusRDD = engineContext.union(writeStatuses);
 
-    HoodieWriteMetadata<JavaRDD<WriteStatus>> writeMetadata = new HoodieWriteMetadata<>();
-    writeMetadata.setWriteStatuses(writeStatusRDD);
+    HoodieWriteMetadata<HoodieData<WriteStatus>> writeMetadata = new HoodieWriteMetadata<>();
+    writeMetadata.setWriteStatuses(HoodieJavaRDD.of(writeStatusRDD));
     return writeMetadata;
   }
 
@@ -133,17 +137,29 @@ public abstract JavaRDD<WriteStatus> performClusteringWithRecordsRDD(final JavaR
    * @param schema         Schema of the data including metadata fields.
    * @return {@link RDDCustomColumnsSortPartitioner} if sort columns are provided, otherwise empty.
    */
-  protected Option<BulkInsertPartitioner<T>> getPartitioner(Map<String, String> strategyParams, Schema schema) {
-    if (getWriteConfig().isLayoutOptimizationEnabled()) {
-      // sort input records by z-order/hilbert
-      return Option.of(new RDDSpatialCurveOptimizationSortPartitioner((HoodieSparkEngineContext) getEngineContext(),
-          getWriteConfig(), HoodieAvroUtils.addMetadataFields(schema)));
-    } else if (strategyParams.containsKey(PLAN_STRATEGY_SORT_COLUMNS.key())) {
-      return Option.of(new RDDCustomColumnsSortPartitioner(strategyParams.get(PLAN_STRATEGY_SORT_COLUMNS.key()).split(","),
-          HoodieAvroUtils.addMetadataFields(schema), getWriteConfig().isConsistentLogicalTimestampEnabled()));
-    } else {
-      return Option.empty();
-    }
+  protected Option<BulkInsertPartitioner<JavaRDD<HoodieRecord<T>>>> getPartitioner(Map<String, String> strategyParams, Schema schema) {
+    Option<String[]> orderByColumnsOpt =
+        Option.ofNullable(strategyParams.get(PLAN_STRATEGY_SORT_COLUMNS.key()))
+            .map(listStr -> listStr.split(","));
+
+    return orderByColumnsOpt.map(orderByColumns -> {
+      HoodieClusteringConfig.LayoutOptimizationStrategy layoutOptStrategy = getWriteConfig().getLayoutOptimizationStrategy();
+      switch (layoutOptStrategy) {
+        case ZORDER:
+        case HILBERT:
+          return new RDDSpatialCurveSortPartitioner(
+              (HoodieSparkEngineContext) getEngineContext(),
+              orderByColumns,
+              layoutOptStrategy,
+              getWriteConfig().getLayoutOptimizationCurveBuildMethod(),
+              HoodieAvroUtils.addMetadataFields(schema));
+        case LINEAR:
+          return new RDDCustomColumnsSortPartitioner(orderByColumns, HoodieAvroUtils.addMetadataFields(schema),
+              getWriteConfig().isConsistentLogicalTimestampEnabled());
+        default:
+          throw new UnsupportedOperationException(String.format("Layout optimization strategy '%s' is not supported", layoutOptStrategy));
+      }
+    });
   }
 
   /**
@@ -278,7 +294,7 @@ private HoodieRecord<T> transform(IndexedRecord indexedRecord) {
     HoodieKey hoodieKey = new HoodieKey(key, partition);
 
     HoodieRecordPayload avroPayload = new RewriteAvroPayload(record);
-    HoodieRecord hoodieRecord = new HoodieRecord(hoodieKey, avroPayload);
+    HoodieRecord hoodieRecord = new HoodieAvroRecord(hoodieKey, avroPayload);
     return hoodieRecord;
   }
 }
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/SingleSparkJobExecutionStrategy.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/SingleSparkJobExecutionStrategy.java
index 98bf9151fc9ef..1158d0ada42f0 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/SingleSparkJobExecutionStrategy.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/run/strategy/SingleSparkJobExecutionStrategy.java
@@ -25,10 +25,12 @@
 import org.apache.hudi.client.utils.ConcatenatingIterator;
 import org.apache.hudi.common.config.SerializableSchema;
 import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.data.HoodieData;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.engine.TaskContextSupplier;
 import org.apache.hudi.common.model.ClusteringGroupInfo;
 import org.apache.hudi.common.model.ClusteringOperation;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieFileGroupId;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
@@ -36,6 +38,7 @@
 import org.apache.hudi.common.model.RewriteAvroPayload;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.data.HoodieJavaRDD;
 import org.apache.hudi.exception.HoodieClusteringException;
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.io.storage.HoodieFileReaderFactory;
@@ -70,7 +73,7 @@
  * MultipleSparkJobExecution strategy is not ideal for use cases that require large number of clustering groups
  */
 public abstract class SingleSparkJobExecutionStrategy<T extends HoodieRecordPayload<T>>
-    extends ClusteringExecutionStrategy<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> {
+    extends ClusteringExecutionStrategy<T, HoodieData<HoodieRecord<T>>, HoodieData<HoodieKey>, HoodieData<WriteStatus>> {
   private static final Logger LOG = LogManager.getLogger(SingleSparkJobExecutionStrategy.class);
 
   public SingleSparkJobExecutionStrategy(HoodieTable table, HoodieEngineContext engineContext, HoodieWriteConfig writeConfig) {
@@ -78,7 +81,7 @@ public SingleSparkJobExecutionStrategy(HoodieTable table, HoodieEngineContext en
   }
 
   @Override
-  public HoodieWriteMetadata<JavaRDD<WriteStatus>> performClustering(final HoodieClusteringPlan clusteringPlan, final Schema schema, final String instantTime) {
+  public HoodieWriteMetadata<HoodieData<WriteStatus>> performClustering(final HoodieClusteringPlan clusteringPlan, final Schema schema, final String instantTime) {
     JavaSparkContext engineContext = HoodieSparkEngineContext.getSparkContext(getEngineContext());
     final TaskContextSupplier taskContextSupplier = getEngineContext().getTaskContextSupplier();
     final SerializableSchema serializableSchema = new SerializableSchema(schema);
@@ -103,8 +106,8 @@ public HoodieWriteMetadata<JavaRDD<WriteStatus>> performClustering(final HoodieC
           ).iterator();
         });
 
-    HoodieWriteMetadata<JavaRDD<WriteStatus>> writeMetadata = new HoodieWriteMetadata<>();
-    writeMetadata.setWriteStatuses(writeStatusRDD);
+    HoodieWriteMetadata<HoodieData<WriteStatus>> writeMetadata = new HoodieWriteMetadata<>();
+    writeMetadata.setWriteStatuses(HoodieJavaRDD.of(writeStatusRDD));
     return writeMetadata;
   }
 
@@ -124,8 +127,8 @@ private Stream<WriteStatus> runClusteringForGroup(ClusteringGroupInfo clustering
     Iterator<List<WriteStatus>> writeStatuses = performClusteringWithRecordsIterator(inputRecords, clusteringOps.getNumOutputGroups(), instantTime,
         strategyParams, schema.get(), inputFileIds, preserveHoodieMetadata, taskContextSupplier);
 
-    Iterable<List<WriteStatus>> writestatusIterable = () -> writeStatuses;
-    return StreamSupport.stream(writestatusIterable.spliterator(), false)
+    Iterable<List<WriteStatus>> writeStatusIterable = () -> writeStatuses;
+    return StreamSupport.stream(writeStatusIterable.spliterator(), false)
         .flatMap(writeStatusList -> writeStatusList.stream());
   }
 
@@ -181,7 +184,7 @@ private HoodieRecord<T> transform(IndexedRecord indexedRecord) {
     HoodieKey hoodieKey = new HoodieKey(key, partition);
 
     HoodieRecordPayload avroPayload = new RewriteAvroPayload(record);
-    HoodieRecord hoodieRecord = new HoodieRecord(hoodieKey, avroPayload);
+    HoodieRecord hoodieRecord = new HoodieAvroRecord(hoodieKey, avroPayload);
     return hoodieRecord;
   }
 }
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/update/strategy/BaseSparkUpdateStrategy.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/update/strategy/BaseSparkUpdateStrategy.java
new file mode 100644
index 0000000000000..655c11983e46b
--- /dev/null
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/update/strategy/BaseSparkUpdateStrategy.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.client.clustering.update.strategy;
+
+import org.apache.hudi.client.common.HoodieSparkEngineContext;
+import org.apache.hudi.common.model.HoodieFileGroupId;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.table.action.cluster.strategy.UpdateStrategy;
+
+import org.apache.spark.api.java.JavaRDD;
+
+import java.util.List;
+import java.util.Set;
+
+/**
+ * Spark base update strategy, write records to the file groups which are in clustering
+ * need to check. Spark relate implementations should extend this base class.
+ */
+public abstract class BaseSparkUpdateStrategy<T extends HoodieRecordPayload<T>> extends UpdateStrategy<T, JavaRDD<HoodieRecord<T>>> {
+
+  public BaseSparkUpdateStrategy(HoodieSparkEngineContext engineContext,
+                                 Set<HoodieFileGroupId> fileGroupsInPendingClustering) {
+    super(engineContext, fileGroupsInPendingClustering);
+  }
+
+  /**
+   * Get records matched file group ids.
+   * @param inputRecords the records to write, tagged with target file id
+   * @return the records matched file group ids
+   */
+  protected List<HoodieFileGroupId> getGroupIdsWithUpdate(JavaRDD<HoodieRecord<T>> inputRecords) {
+    return inputRecords
+            .filter(record -> record.getCurrentLocation() != null)
+            .map(record -> new HoodieFileGroupId(record.getPartitionPath(), record.getCurrentLocation().getFileId())).distinct().collect();
+  }
+}
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/update/strategy/SparkAllowUpdateStrategy.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/update/strategy/SparkAllowUpdateStrategy.java
index 403a0c2e1ca87..92a5fb69a7cd9 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/update/strategy/SparkAllowUpdateStrategy.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/update/strategy/SparkAllowUpdateStrategy.java
@@ -23,7 +23,6 @@
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.util.collection.Pair;
-import org.apache.hudi.table.action.cluster.strategy.UpdateStrategy;
 
 import org.apache.spark.api.java.JavaRDD;
 
@@ -35,20 +34,13 @@
 /**
  * Allow ingestion commits during clustering job.
  */
-public class SparkAllowUpdateStrategy<T extends HoodieRecordPayload<T>> extends UpdateStrategy<T, JavaRDD<HoodieRecord<T>>> {
+public class SparkAllowUpdateStrategy<T extends HoodieRecordPayload<T>> extends BaseSparkUpdateStrategy<T> {
 
-  public SparkAllowUpdateStrategy(
-      HoodieSparkEngineContext engineContext, HashSet<HoodieFileGroupId> fileGroupsInPendingClustering) {
+  public SparkAllowUpdateStrategy(HoodieSparkEngineContext engineContext,
+                                  HashSet<HoodieFileGroupId> fileGroupsInPendingClustering) {
     super(engineContext, fileGroupsInPendingClustering);
   }
 
-  private List<HoodieFileGroupId> getGroupIdsWithUpdate(JavaRDD<HoodieRecord<T>> inputRecords) {
-    List<HoodieFileGroupId> fileGroupIdsWithUpdates = inputRecords
-        .filter(record -> record.getCurrentLocation() != null)
-        .map(record -> new HoodieFileGroupId(record.getPartitionPath(), record.getCurrentLocation().getFileId())).distinct().collect();
-    return fileGroupIdsWithUpdates;
-  }
-
   @Override
   public Pair<JavaRDD<HoodieRecord<T>>, Set<HoodieFileGroupId>> handleUpdate(JavaRDD<HoodieRecord<T>> taggedRecordsRDD) {
     List<HoodieFileGroupId> fileGroupIdsWithRecordUpdate = getGroupIdsWithUpdate(taggedRecordsRDD);
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/update/strategy/SparkRejectUpdateStrategy.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/update/strategy/SparkRejectUpdateStrategy.java
index b12d9ad435713..ac058a4d85095 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/update/strategy/SparkRejectUpdateStrategy.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/clustering/update/strategy/SparkRejectUpdateStrategy.java
@@ -24,7 +24,6 @@
 import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.exception.HoodieClusteringUpdateException;
-import org.apache.hudi.table.action.cluster.strategy.UpdateStrategy;
 
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
@@ -37,22 +36,16 @@
 
 /**
  * Update strategy based on following.
- * if some file group have update record, throw exception
+ * if some file groups have update record, throw exception
  */
-public class SparkRejectUpdateStrategy<T extends HoodieRecordPayload<T>> extends UpdateStrategy<T, JavaRDD<HoodieRecord<T>>> {
+public class SparkRejectUpdateStrategy<T extends HoodieRecordPayload<T>> extends BaseSparkUpdateStrategy<T> {
   private static final Logger LOG = LogManager.getLogger(SparkRejectUpdateStrategy.class);
 
-  public SparkRejectUpdateStrategy(HoodieSparkEngineContext engineContext, HashSet<HoodieFileGroupId> fileGroupsInPendingClustering) {
+  public SparkRejectUpdateStrategy(HoodieSparkEngineContext engineContext,
+                                   HashSet<HoodieFileGroupId> fileGroupsInPendingClustering) {
     super(engineContext, fileGroupsInPendingClustering);
   }
 
-  private List<HoodieFileGroupId> getGroupIdsWithUpdate(JavaRDD<HoodieRecord<T>> inputRecords) {
-    List<HoodieFileGroupId> fileGroupIdsWithUpdates = inputRecords
-        .filter(record -> record.getCurrentLocation() != null)
-        .map(record -> new HoodieFileGroupId(record.getPartitionPath(), record.getCurrentLocation().getFileId())).distinct().collect();
-    return fileGroupIdsWithUpdates;
-  }
-
   @Override
   public Pair<JavaRDD<HoodieRecord<T>>, Set<HoodieFileGroupId>> handleUpdate(JavaRDD<HoodieRecord<T>> taggedRecordsRDD) {
     List<HoodieFileGroupId> fileGroupIdsWithRecordUpdate = getGroupIdsWithUpdate(taggedRecordsRDD);
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/utils/SparkValidatorUtils.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/utils/SparkValidatorUtils.java
index 604abbd5c0282..9e72390e49f55 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/utils/SparkValidatorUtils.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/utils/SparkValidatorUtils.java
@@ -31,13 +31,13 @@
 import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.table.action.HoodieWriteMetadata;
 import org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor;
+
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SQLContext;
-import scala.collection.JavaConverters;
 
 import java.util.Arrays;
 import java.util.HashSet;
@@ -47,6 +47,8 @@
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
+import scala.collection.JavaConverters;
+
 /**
  * Spark validator utils to verify and run any precommit validators configured.
  */
@@ -97,7 +99,7 @@ public static void runValidators(HoodieWriteConfig config,
   }
 
   /**
-   * Run validators in a separate threadpool for parallelism. Each of validator can submit a distributed spark job if needed.
+   * Run validators in a separate thread pool for parallelism. Each of validator can submit a distributed spark job if needed.
    */
   private static CompletableFuture<Boolean> runValidatorAsync(SparkPreCommitValidator validator, HoodieWriteMetadata writeMetadata,
                                                        Dataset<Row> beforeState, Dataset<Row> afterState, String instantTime) {
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/validator/SqlQueryInequalityPreCommitValidator.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/validator/SqlQueryInequalityPreCommitValidator.java
index 454638c2daa70..026334fde0cde 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/validator/SqlQueryInequalityPreCommitValidator.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/validator/SqlQueryInequalityPreCommitValidator.java
@@ -34,11 +34,11 @@
 import org.apache.spark.sql.SQLContext;
 
 /**
- * Validator to run sql query and compare table state 
+ * Validator to run sql query and compare table state
  * 1) before new commit started.
  * 2) current inflight commit (if successful).
- * 
- * Expects query results dont match.
+ * <p>
+ * Expects query results do not match.
  */
 public class SqlQueryInequalityPreCommitValidator<T extends HoodieRecordPayload, I, K, O extends JavaRDD<WriteStatus>> extends SqlQueryPreCommitValidator<T, I, K, O> {
   private static final Logger LOG = LogManager.getLogger(SqlQueryInequalityPreCommitValidator.class);
@@ -66,7 +66,7 @@ protected void validateUsingQuery(String query, String prevTableSnapshot, String
     LOG.info("Completed Inequality Validation, datasets equal? " + areDatasetsEqual);
     if (areDatasetsEqual) {
       LOG.error("query validation failed. See stdout for sample query results. Query: " + query);
-      System.out.println("Expected query results to be inequal, but they are same. Result (sample records only):");
+      System.out.println("Expected query results to be different, but they are same. Result (sample records only):");
       prevRows.show();
       throw new HoodieValidationException("Query validation failed for '" + query 
           + "'. Expected " + prevRows.count() + " rows, Found " + newRows.count());
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/validator/SqlQuerySingleResultPreCommitValidator.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/validator/SqlQuerySingleResultPreCommitValidator.java
index 631f0e633c025..66e956dc59650 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/validator/SqlQuerySingleResultPreCommitValidator.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/validator/SqlQuerySingleResultPreCommitValidator.java
@@ -35,9 +35,9 @@
 import java.util.List;
 
 /**
- * Validator to run sql queries on new table state and expects a single result. If the result doesnt match expected result,
- * throw validation error. 
- * 
+ * Validator to run sql queries on new table state and expects a single result. If the result does not match expected result,
+ * throw validation error.
+ * <p>
  * Example configuration: "query1#expectedResult1;query2#expectedResult2;"
  */
 public class SqlQuerySingleResultPreCommitValidator<T extends HoodieRecordPayload, I, K, O extends JavaRDD<WriteStatus>> extends SqlQueryPreCommitValidator<T, I, K, O> {
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/data/HoodieJavaRDD.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/data/HoodieJavaRDD.java
index d4eb25963e5be..1381ea86e481c 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/data/HoodieJavaRDD.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/data/HoodieJavaRDD.java
@@ -83,8 +83,8 @@ public JavaRDD<T> get() {
   }
 
   @Override
-  public void persist(String storageLevel) {
-    rddData.persist(StorageLevel.fromString(storageLevel));
+  public void persist(String cacheConfig) {
+    rddData.persist(StorageLevel.fromString(cacheConfig));
   }
 
   @Override
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/SparkBoundedInMemoryExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/SparkBoundedInMemoryExecutor.java
deleted file mode 100644
index d240c065d0834..0000000000000
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/SparkBoundedInMemoryExecutor.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hudi.execution;
-
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.common.util.queue.BoundedInMemoryExecutor;
-import org.apache.hudi.common.util.queue.BoundedInMemoryQueueConsumer;
-import org.apache.hudi.common.util.queue.BoundedInMemoryQueueProducer;
-import org.apache.hudi.common.util.queue.IteratorBasedQueueProducer;
-import org.apache.hudi.config.HoodieWriteConfig;
-
-import org.apache.spark.TaskContext;
-import org.apache.spark.TaskContext$;
-
-import java.util.Iterator;
-import java.util.function.Function;
-
-public class SparkBoundedInMemoryExecutor<I, O, E> extends BoundedInMemoryExecutor<I, O, E> {
-
-  // Need to set current spark thread's TaskContext into newly launched thread so that new thread can access
-  // TaskContext properties.
-  final TaskContext sparkThreadTaskContext;
-
-  public SparkBoundedInMemoryExecutor(final HoodieWriteConfig hoodieConfig, final Iterator<I> inputItr,
-      BoundedInMemoryQueueConsumer<O, E> consumer, Function<I, O> bufferedIteratorTransform) {
-    this(hoodieConfig, new IteratorBasedQueueProducer<>(inputItr), consumer, bufferedIteratorTransform);
-  }
-
-  public SparkBoundedInMemoryExecutor(final HoodieWriteConfig hoodieConfig, BoundedInMemoryQueueProducer<I> producer,
-      BoundedInMemoryQueueConsumer<O, E> consumer, Function<I, O> bufferedIteratorTransform) {
-    super(hoodieConfig.getWriteBufferLimitBytes(), producer, Option.of(consumer), bufferedIteratorTransform);
-    this.sparkThreadTaskContext = TaskContext.get();
-  }
-
-  @Override
-  public void preExecute() {
-    // Passing parent thread's TaskContext to newly launched thread for it to access original TaskContext properties.
-    TaskContext$.MODULE$.setTaskContext(sparkThreadTaskContext);
-  }
-}
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/SparkLazyInsertIterable.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/SparkLazyInsertIterable.java
index 088872bbd4381..a8a9e49c01c00 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/SparkLazyInsertIterable.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/SparkLazyInsertIterable.java
@@ -18,7 +18,6 @@
 
 package org.apache.hudi.execution;
 
-import org.apache.avro.Schema;
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.common.engine.TaskContextSupplier;
@@ -30,6 +29,8 @@
 import org.apache.hudi.io.WriteHandleFactory;
 import org.apache.hudi.table.HoodieTable;
 
+import org.apache.avro.Schema;
+
 import java.util.Iterator;
 import java.util.List;
 
@@ -84,8 +85,8 @@ protected List<WriteStatus> computeNext() {
         schema = HoodieAvroUtils.addMetadataFields(schema);
       }
       bufferedIteratorExecutor =
-          new SparkBoundedInMemoryExecutor<>(hoodieConfig, inputItr, getInsertHandler(),
-            getTransformFunction(schema, hoodieConfig));
+          new BoundedInMemoryExecutor<>(hoodieConfig.getWriteBufferLimitBytes(), inputItr, getInsertHandler(),
+              getTransformFunction(schema, hoodieConfig), hoodieTable.getPreExecuteRunnable());
       final List<WriteStatus> result = bufferedIteratorExecutor.execute();
       assert result != null && !result.isEmpty() && !bufferedIteratorExecutor.isRemaining();
       return result;
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/RDDSpatialCurveOptimizationSortPartitioner.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/RDDSpatialCurveSortPartitioner.java
similarity index 51%
rename from hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/RDDSpatialCurveOptimizationSortPartitioner.java
rename to hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/RDDSpatialCurveSortPartitioner.java
index ca7dfa3e7f2cd..219fb0b165972 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/RDDSpatialCurveOptimizationSortPartitioner.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/execution/bulkinsert/RDDSpatialCurveSortPartitioner.java
@@ -18,100 +18,96 @@
 
 package org.apache.hudi.execution.bulkinsert;
 
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
 import org.apache.hudi.AvroConversionUtils;
 import org.apache.hudi.HoodieSparkUtils;
 import org.apache.hudi.client.common.HoodieSparkEngineContext;
 import org.apache.hudi.common.config.SerializableSchema;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.model.RewriteAvroPayload;
+import org.apache.hudi.common.util.Option;
 import org.apache.hudi.config.HoodieClusteringConfig;
-import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.sort.SpaceCurveSortingHelper;
 import org.apache.hudi.table.BulkInsertPartitioner;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
 
 import java.util.Arrays;
 import java.util.List;
-import java.util.stream.Collectors;
-
-import static org.apache.hudi.common.util.StringUtils.isNullOrEmpty;
 
 /**
  * A partitioner that does spatial curve optimization sorting based on specified column values for each RDD partition.
  * support z-curve optimization, hilbert will come soon.
  * @param <T> HoodieRecordPayload type
  */
-public class RDDSpatialCurveOptimizationSortPartitioner<T extends HoodieRecordPayload>
+public class RDDSpatialCurveSortPartitioner<T extends HoodieRecordPayload>
     implements BulkInsertPartitioner<JavaRDD<HoodieRecord<T>>> {
-  private final HoodieSparkEngineContext sparkEngineContext;
-  private final SerializableSchema serializableSchema;
-  private final HoodieWriteConfig config;
 
-  public RDDSpatialCurveOptimizationSortPartitioner(HoodieSparkEngineContext sparkEngineContext, HoodieWriteConfig config, Schema schema) {
+  private final HoodieSparkEngineContext sparkEngineContext;
+  private final String[] orderByColumns;
+  private final Schema schema;
+  private final HoodieClusteringConfig.LayoutOptimizationStrategy layoutOptStrategy;
+  private final HoodieClusteringConfig.SpatialCurveCompositionStrategyType curveCompositionStrategyType;
+
+  public RDDSpatialCurveSortPartitioner(HoodieSparkEngineContext sparkEngineContext,
+                                        String[] orderByColumns,
+                                        HoodieClusteringConfig.LayoutOptimizationStrategy layoutOptStrategy,
+                                        HoodieClusteringConfig.SpatialCurveCompositionStrategyType curveCompositionStrategyType,
+                                        Schema schema) {
     this.sparkEngineContext = sparkEngineContext;
-    this.config = config;
-    this.serializableSchema = new SerializableSchema(schema);
+    this.orderByColumns = orderByColumns;
+    this.layoutOptStrategy = layoutOptStrategy;
+    this.curveCompositionStrategyType = curveCompositionStrategyType;
+    this.schema = schema;
   }
 
   @Override
   public JavaRDD<HoodieRecord<T>> repartitionRecords(JavaRDD<HoodieRecord<T>> records, int outputSparkPartitions) {
-    JavaRDD<GenericRecord> preparedRecord = prepareGenericRecord(records, outputSparkPartitions, serializableSchema.get());
-    return preparedRecord.map(record -> {
-      String key = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
-      String partition = record.get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
-      HoodieKey hoodieKey = new HoodieKey(key, partition);
-      HoodieRecord hoodieRecord = new HoodieRecord(hoodieKey, new RewriteAvroPayload(record));
-      return hoodieRecord;
-    });
-  }
-
-  private JavaRDD<GenericRecord> prepareGenericRecord(JavaRDD<HoodieRecord<T>> inputRecords, final int numOutputGroups, final Schema schema) {
     SerializableSchema serializableSchema = new SerializableSchema(schema);
-    JavaRDD<GenericRecord> genericRecordJavaRDD =  inputRecords.map(f -> (GenericRecord) f.getData().getInsertValue(serializableSchema.get()).get());
-    Dataset<Row> originDF =
+    JavaRDD<GenericRecord> genericRecordsRDD =
+        records.map(f -> (GenericRecord) f.getData().getInsertValue(serializableSchema.get()).get());
+
+    Dataset<Row> sourceDataset =
         AvroConversionUtils.createDataFrame(
-            genericRecordJavaRDD.rdd(),
+            genericRecordsRDD.rdd(),
             schema.toString(),
             sparkEngineContext.getSqlContext().sparkSession()
         );
 
-    Dataset<Row> sortedDF = reorder(originDF, numOutputGroups);
-
-    return HoodieSparkUtils.createRdd(sortedDF, schema.getName(),
-        schema.getNamespace(), false, org.apache.hudi.common.util.Option.empty()).toJavaRDD();
+    Dataset<Row> sortedDataset = reorder(sourceDataset, outputSparkPartitions);
+
+    return HoodieSparkUtils.createRdd(sortedDataset, schema.getName(), schema.getNamespace(), false, Option.empty())
+        .toJavaRDD()
+        .map(record -> {
+          String key = record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
+          String partition = record.get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
+          HoodieKey hoodieKey = new HoodieKey(key, partition);
+          HoodieRecord hoodieRecord = new HoodieAvroRecord(hoodieKey, new RewriteAvroPayload(record));
+          return hoodieRecord;
+        });
   }
 
-  private Dataset<Row> reorder(Dataset<Row> originDF, int numOutputGroups) {
-    String orderedColumnsListConfig = config.getClusteringSortColumns();
-
-    if (isNullOrEmpty(orderedColumnsListConfig) || numOutputGroups <= 0) {
+  private Dataset<Row> reorder(Dataset<Row> dataset, int numOutputGroups) {
+    if (orderByColumns.length == 0) {
       // No-op
-      return originDF;
+      return dataset;
     }
 
-    List<String> orderedCols =
-        Arrays.stream(orderedColumnsListConfig.split(","))
-            .map(String::trim)
-            .collect(Collectors.toList());
-
-    HoodieClusteringConfig.LayoutOptimizationStrategy layoutOptStrategy =
-        HoodieClusteringConfig.LayoutOptimizationStrategy.fromValue(config.getLayoutOptimizationStrategy());
-
-    HoodieClusteringConfig.BuildCurveStrategyType curveBuildStrategyType = config.getLayoutOptimizationCurveBuildMethod();
+    List<String> orderedCols = Arrays.asList(orderByColumns);
 
-    switch (curveBuildStrategyType) {
+    switch (curveCompositionStrategyType) {
       case DIRECT:
-        return SpaceCurveSortingHelper.orderDataFrameByMappingValues(originDF, layoutOptStrategy, orderedCols, numOutputGroups);
+        return SpaceCurveSortingHelper.orderDataFrameByMappingValues(dataset, layoutOptStrategy, orderedCols, numOutputGroups);
       case SAMPLE:
-        return SpaceCurveSortingHelper.orderDataFrameBySamplingValues(originDF, layoutOptStrategy, orderedCols, numOutputGroups);
+        return SpaceCurveSortingHelper.orderDataFrameBySamplingValues(dataset, layoutOptStrategy, orderedCols, numOutputGroups);
       default:
-        throw new UnsupportedOperationException(String.format("Unsupported space-curve curve building strategy (%s)", curveBuildStrategyType));
+        throw new UnsupportedOperationException(String.format("Unsupported space-curve curve building strategy (%s)", curveCompositionStrategyType));
     }
   }
 
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/SparkHoodieIndex.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/SparkHoodieIndex.java
index 62bf5c100a949..aece86a3878ee 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/SparkHoodieIndex.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/SparkHoodieIndex.java
@@ -24,7 +24,6 @@
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.common.data.HoodieData;
 import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.config.HoodieWriteConfig;
@@ -36,7 +35,7 @@
 
 @SuppressWarnings("checkstyle:LineLength")
 public abstract class SparkHoodieIndex<T extends HoodieRecordPayload<T>>
-    extends HoodieIndex<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> {
+    extends HoodieIndex<JavaRDD<HoodieRecord<T>>, JavaRDD<WriteStatus>> {
   protected SparkHoodieIndex(HoodieWriteConfig config) {
     super(config);
   }
@@ -46,21 +45,23 @@ protected SparkHoodieIndex(HoodieWriteConfig config) {
   @PublicAPIMethod(maturity = ApiMaturityLevel.DEPRECATED)
   public abstract JavaRDD<WriteStatus> updateLocation(JavaRDD<WriteStatus> writeStatusRDD,
                                                       HoodieEngineContext context,
-                                                      HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> hoodieTable) throws HoodieIndexException;
+                                                      HoodieTable hoodieTable) throws HoodieIndexException;
 
   @Override
   @Deprecated
   @PublicAPIMethod(maturity = ApiMaturityLevel.DEPRECATED)
   public abstract JavaRDD<HoodieRecord<T>> tagLocation(JavaRDD<HoodieRecord<T>> records,
                                                        HoodieEngineContext context,
-                                                       HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> hoodieTable) throws HoodieIndexException;
+                                                       HoodieTable hoodieTable) throws HoodieIndexException;
 
   @Override
   @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
-  public HoodieData<HoodieRecord<T>> tagLocation(
-      HoodieData<HoodieRecord<T>> records, HoodieEngineContext context,
+  public <R> HoodieData<HoodieRecord<R>> tagLocation(
+      HoodieData<HoodieRecord<R>> records, HoodieEngineContext context,
       HoodieTable hoodieTable) throws HoodieIndexException {
-    return HoodieJavaRDD.of(tagLocation(HoodieJavaRDD.getJavaRDD(records), context, hoodieTable));
+    return HoodieJavaRDD.of(tagLocation(
+        HoodieJavaRDD.getJavaRDD(records.map(record -> (HoodieRecord<T>) record)), context, hoodieTable)
+        .map(r -> (HoodieRecord<R>) r));
   }
 
   @Override
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/SparkHoodieIndexFactory.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/SparkHoodieIndexFactory.java
index 69e18714c49c2..d1f40dca484c5 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/SparkHoodieIndexFactory.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/SparkHoodieIndexFactory.java
@@ -53,19 +53,19 @@ public static HoodieIndex createIndex(HoodieWriteConfig config) {
     }
     switch (config.getIndexType()) {
       case HBASE:
-        return new SparkHoodieHBaseIndex<>(config);
+        return new SparkHoodieHBaseIndex(config);
       case INMEMORY:
-        return new HoodieInMemoryHashIndex<>(config);
+        return new HoodieInMemoryHashIndex(config);
       case BUCKET:
         return new HoodieBucketIndex(config);
       case BLOOM:
-        return new HoodieBloomIndex<>(config, SparkHoodieBloomIndexHelper.getInstance());
+        return new HoodieBloomIndex(config, SparkHoodieBloomIndexHelper.getInstance());
       case GLOBAL_BLOOM:
-        return new HoodieGlobalBloomIndex<>(config, SparkHoodieBloomIndexHelper.getInstance());
+        return new HoodieGlobalBloomIndex(config, SparkHoodieBloomIndexHelper.getInstance());
       case SIMPLE:
-        return new HoodieSimpleIndex<>(config, getKeyGeneratorForSimpleIndex(config));
+        return new HoodieSimpleIndex(config, getKeyGeneratorForSimpleIndex(config));
       case GLOBAL_SIMPLE:
-        return new HoodieGlobalSimpleIndex<>(config, getKeyGeneratorForSimpleIndex(config));
+        return new HoodieGlobalSimpleIndex(config, getKeyGeneratorForSimpleIndex(config));
       default:
         throw new HoodieIndexException("Index type unspecified, set " + config.getIndexType());
     }
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndexCheckFunction.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndexCheckFunction.java
index 148203c9b71f1..e19a429ea7234 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndexCheckFunction.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/bloom/HoodieBloomIndexCheckFunction.java
@@ -25,7 +25,7 @@
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.HoodieIndexException;
 import org.apache.hudi.io.HoodieKeyLookupHandle;
-import org.apache.hudi.io.HoodieKeyLookupHandle.KeyLookupResult;
+import org.apache.hudi.io.HoodieKeyLookupResult;
 import org.apache.hudi.table.HoodieTable;
 
 import org.apache.spark.api.java.function.Function2;
@@ -40,7 +40,7 @@
  * Function performing actual checking of RDD partition containing (fileId, hoodieKeys) against the actual files.
  */
 public class HoodieBloomIndexCheckFunction
-    implements Function2<Integer, Iterator<Tuple2<String, HoodieKey>>, Iterator<List<KeyLookupResult>>> {
+    implements Function2<Integer, Iterator<Tuple2<String, HoodieKey>>, Iterator<List<HoodieKeyLookupResult>>> {
 
   private final HoodieTable hoodieTable;
 
@@ -52,12 +52,12 @@ public HoodieBloomIndexCheckFunction(HoodieTable hoodieTable, HoodieWriteConfig
   }
 
   @Override
-  public Iterator<List<KeyLookupResult>> call(Integer partition,
-      Iterator<Tuple2<String, HoodieKey>> filePartitionRecordKeyTripletItr) {
+  public Iterator<List<HoodieKeyLookupResult>> call(Integer partition,
+                                                    Iterator<Tuple2<String, HoodieKey>> filePartitionRecordKeyTripletItr) {
     return new LazyKeyCheckIterator(filePartitionRecordKeyTripletItr);
   }
 
-  class LazyKeyCheckIterator extends LazyIterableIterator<Tuple2<String, HoodieKey>, List<KeyLookupResult>> {
+  class LazyKeyCheckIterator extends LazyIterableIterator<Tuple2<String, HoodieKey>, List<HoodieKeyLookupResult>> {
 
     private HoodieKeyLookupHandle keyLookupHandle;
 
@@ -70,9 +70,9 @@ protected void start() {
     }
 
     @Override
-    protected List<HoodieKeyLookupHandle.KeyLookupResult> computeNext() {
+    protected List<HoodieKeyLookupResult> computeNext() {
 
-      List<HoodieKeyLookupHandle.KeyLookupResult> ret = new ArrayList<>();
+      List<HoodieKeyLookupResult> ret = new ArrayList<>();
       try {
         // process one file in each go.
         while (inputItr.hasNext()) {
@@ -88,7 +88,7 @@ protected List<HoodieKeyLookupHandle.KeyLookupResult> computeNext() {
           }
 
           // if continue on current file
-          if (keyLookupHandle.getPartitionPathFilePair().equals(partitionPathFilePair)) {
+          if (keyLookupHandle.getPartitionPathFileIDPair().equals(partitionPathFilePair)) {
             keyLookupHandle.addKey(recordKey);
           } else {
             // do the actual checking of file & break out
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/bloom/HoodieMetadataBloomIndexCheckFunction.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/bloom/HoodieMetadataBloomIndexCheckFunction.java
new file mode 100644
index 0000000000000..32bca55099eda
--- /dev/null
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/bloom/HoodieMetadataBloomIndexCheckFunction.java
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.index.bloom;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hudi.client.utils.LazyIterableIterator;
+import org.apache.hudi.common.bloom.BloomFilterTypeCode;
+import org.apache.hudi.common.bloom.HoodieDynamicBoundedBloomFilter;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.HoodieBaseFile;
+import org.apache.hudi.common.model.HoodieKey;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.ValidationUtils;
+import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.exception.HoodieIndexException;
+import org.apache.hudi.index.HoodieIndexUtils;
+import org.apache.hudi.io.HoodieKeyLookupResult;
+import org.apache.hudi.table.HoodieTable;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.spark.api.java.function.Function2;
+import scala.Tuple2;
+
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * Spark Function2 implementation for checking bloom filters for the
+ * requested keys from the metadata table index. The bloom filter
+ * checking for keys and the actual file verification for the
+ * candidate keys is done in an iterative fashion. In each iteration,
+ * bloom filters are requested for a batch of partition files and the
+ * keys are checked against them.
+ */
+public class HoodieMetadataBloomIndexCheckFunction implements
+    Function2<Integer, Iterator<Tuple2<String, HoodieKey>>, Iterator<List<HoodieKeyLookupResult>>> {
+
+  private static final Logger LOG = LogManager.getLogger(HoodieMetadataBloomIndexCheckFunction.class);
+
+  // Assuming each file bloom filter takes up 512K, sizing the max file count
+  // per batch so that the total fetched bloom filters would not cross 128 MB.
+  private static final long BLOOM_FILTER_CHECK_MAX_FILE_COUNT_PER_BATCH = 256;
+  private final HoodieTable hoodieTable;
+
+  public HoodieMetadataBloomIndexCheckFunction(HoodieTable hoodieTable) {
+    this.hoodieTable = hoodieTable;
+  }
+
+  @Override
+  public Iterator<List<HoodieKeyLookupResult>> call(Integer integer, Iterator<Tuple2<String, HoodieKey>> tuple2Iterator) throws Exception {
+    return new BloomIndexLazyKeyCheckIterator(tuple2Iterator);
+  }
+
+  private class BloomIndexLazyKeyCheckIterator extends LazyIterableIterator<Tuple2<String, HoodieKey>, List<HoodieKeyLookupResult>> {
+    public BloomIndexLazyKeyCheckIterator(Iterator<Tuple2<String, HoodieKey>> tuple2Iterator) {
+      super(tuple2Iterator);
+    }
+
+    @Override
+    protected void start() {
+    }
+
+    @Override
+    protected List<HoodieKeyLookupResult> computeNext() {
+      // Partition path and file name pair to list of keys
+      final Map<Pair<String, String>, List<HoodieKey>> fileToKeysMap = new HashMap<>();
+      final Map<String, HoodieBaseFile> fileIDBaseFileMap = new HashMap<>();
+      final List<HoodieKeyLookupResult> resultList = new ArrayList<>();
+
+      while (inputItr.hasNext()) {
+        Tuple2<String, HoodieKey> entry = inputItr.next();
+        final String partitionPath = entry._2.getPartitionPath();
+        final String fileId = entry._1;
+        if (!fileIDBaseFileMap.containsKey(fileId)) {
+          Option<HoodieBaseFile> baseFile = hoodieTable.getBaseFileOnlyView().getLatestBaseFile(partitionPath, fileId);
+          if (!baseFile.isPresent()) {
+            throw new HoodieIndexException("Failed to find the base file for partition: " + partitionPath
+                + ", fileId: " + fileId);
+          }
+          fileIDBaseFileMap.put(fileId, baseFile.get());
+        }
+        fileToKeysMap.computeIfAbsent(Pair.of(partitionPath, fileIDBaseFileMap.get(fileId).getFileName()),
+            k -> new ArrayList<>()).add(entry._2);
+        if (fileToKeysMap.size() > BLOOM_FILTER_CHECK_MAX_FILE_COUNT_PER_BATCH) {
+          break;
+        }
+      }
+      if (fileToKeysMap.isEmpty()) {
+        return Collections.emptyList();
+      }
+
+      List<Pair<String, String>> partitionNameFileNameList = new ArrayList<>(fileToKeysMap.keySet());
+      Map<Pair<String, String>, ByteBuffer> fileToBloomFilterMap =
+          hoodieTable.getMetadataTable().getBloomFilters(partitionNameFileNameList);
+
+      final AtomicInteger totalKeys = new AtomicInteger(0);
+      fileToKeysMap.forEach((partitionPathFileNamePair, hoodieKeyList) -> {
+        final String partitionPath = partitionPathFileNamePair.getLeft();
+        final String fileName = partitionPathFileNamePair.getRight();
+        final String fileId = FSUtils.getFileId(fileName);
+        ValidationUtils.checkState(!fileId.isEmpty());
+
+        if (!fileToBloomFilterMap.containsKey(partitionPathFileNamePair)) {
+          throw new HoodieIndexException("Failed to get the bloom filter for " + partitionPathFileNamePair);
+        }
+        final ByteBuffer fileBloomFilterByteBuffer = fileToBloomFilterMap.get(partitionPathFileNamePair);
+
+        HoodieDynamicBoundedBloomFilter fileBloomFilter =
+            new HoodieDynamicBoundedBloomFilter(StandardCharsets.UTF_8.decode(fileBloomFilterByteBuffer).toString(),
+                BloomFilterTypeCode.DYNAMIC_V0);
+
+        List<String> candidateRecordKeys = new ArrayList<>();
+        hoodieKeyList.forEach(hoodieKey -> {
+          totalKeys.incrementAndGet();
+          if (fileBloomFilter.mightContain(hoodieKey.getRecordKey())) {
+            candidateRecordKeys.add(hoodieKey.getRecordKey());
+          }
+        });
+
+        final HoodieBaseFile dataFile = fileIDBaseFileMap.get(fileId);
+        List<String> matchingKeys =
+            HoodieIndexUtils.filterKeysFromFile(new Path(dataFile.getPath()), candidateRecordKeys,
+                hoodieTable.getHadoopConf());
+        LOG.debug(
+            String.format("Total records (%d), bloom filter candidates (%d)/fp(%d), actual matches (%d)",
+                hoodieKeyList.size(), candidateRecordKeys.size(),
+                candidateRecordKeys.size() - matchingKeys.size(), matchingKeys.size()));
+
+        resultList.add(new HoodieKeyLookupResult(fileId, partitionPath, dataFile.getCommitTime(), matchingKeys));
+      });
+      return resultList;
+    }
+
+    @Override
+    protected void end() {
+    }
+  }
+}
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/bloom/SparkHoodieBloomIndexHelper.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/bloom/SparkHoodieBloomIndexHelper.java
index bbb50d5cf6fff..1659fe016ca1d 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/bloom/SparkHoodieBloomIndexHelper.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/bloom/SparkHoodieBloomIndexHelper.java
@@ -24,25 +24,23 @@
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecordLocation;
-import org.apache.hudi.common.util.collection.ImmutablePair;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.data.HoodieJavaPairRDD;
 import org.apache.hudi.data.HoodieJavaRDD;
+import org.apache.hudi.io.HoodieKeyLookupResult;
 import org.apache.hudi.table.HoodieTable;
-
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.spark.Partitioner;
 import org.apache.spark.api.java.JavaRDD;
+import scala.Tuple2;
 
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
 
-import scala.Tuple2;
-
 /**
  * Helper for {@link HoodieBloomIndex} containing Spark-specific logic.
  */
@@ -64,32 +62,44 @@ public static SparkHoodieBloomIndexHelper getInstance() {
   public HoodiePairData<HoodieKey, HoodieRecordLocation> findMatchingFilesForRecordKeys(
       HoodieWriteConfig config, HoodieEngineContext context, HoodieTable hoodieTable,
       HoodiePairData<String, String> partitionRecordKeyPairs,
-      HoodieData<ImmutablePair<String, HoodieKey>> fileComparisonPairs,
+      HoodieData<Pair<String, HoodieKey>> fileComparisonPairs,
       Map<String, List<BloomIndexFileInfo>> partitionToFileInfo,
       Map<String, Long> recordsPerPartition) {
     JavaRDD<Tuple2<String, HoodieKey>> fileComparisonsRDD =
         HoodieJavaRDD.getJavaRDD(fileComparisonPairs)
             .map(pair -> new Tuple2<>(pair.getLeft(), pair.getRight()));
-    Map<String, Long> comparisonsPerFileGroup = computeComparisonsPerFileGroup(
-        config, recordsPerPartition, partitionToFileInfo, fileComparisonsRDD, context);
-    int inputParallelism =
-        HoodieJavaPairRDD.getJavaPairRDD(partitionRecordKeyPairs).partitions().size();
+
+    int inputParallelism = HoodieJavaPairRDD.getJavaPairRDD(partitionRecordKeyPairs).partitions().size();
     int joinParallelism = Math.max(inputParallelism, config.getBloomIndexParallelism());
     LOG.info("InputParallelism: ${" + inputParallelism + "}, IndexParallelism: ${"
         + config.getBloomIndexParallelism() + "}");
 
-    if (config.useBloomIndexBucketizedChecking()) {
+    JavaRDD<List<HoodieKeyLookupResult>> keyLookupResultRDD;
+    if (config.isMetadataBloomFilterIndexEnabled()) {
+      // Step 1: Sort by file id
+      JavaRDD<Tuple2<String, HoodieKey>> sortedFileIdAndKeyPairs =
+          fileComparisonsRDD.sortBy(Tuple2::_1, true, joinParallelism);
+
+      // Step 2: Use bloom filter to filter and the actual log file to get the record location
+      keyLookupResultRDD = sortedFileIdAndKeyPairs.mapPartitionsWithIndex(
+          new HoodieMetadataBloomIndexCheckFunction(hoodieTable), true);
+    } else if (config.useBloomIndexBucketizedChecking()) {
+      Map<String, Long> comparisonsPerFileGroup = computeComparisonsPerFileGroup(
+          config, recordsPerPartition, partitionToFileInfo, fileComparisonsRDD, context);
       Partitioner partitioner = new BucketizedBloomCheckPartitioner(joinParallelism, comparisonsPerFileGroup,
           config.getBloomIndexKeysPerBucket());
 
-      fileComparisonsRDD = fileComparisonsRDD.mapToPair(t -> new Tuple2<>(Pair.of(t._1, t._2.getRecordKey()), t))
-          .repartitionAndSortWithinPartitions(partitioner).map(Tuple2::_2);
+      keyLookupResultRDD = fileComparisonsRDD.mapToPair(t -> new Tuple2<>(Pair.of(t._1, t._2.getRecordKey()), t))
+          .repartitionAndSortWithinPartitions(partitioner)
+          .map(Tuple2::_2)
+          .mapPartitionsWithIndex(new HoodieBloomIndexCheckFunction(hoodieTable, config), true);
     } else {
-      fileComparisonsRDD = fileComparisonsRDD.sortBy(Tuple2::_1, true, joinParallelism);
+      keyLookupResultRDD = fileComparisonsRDD.sortBy(Tuple2::_1, true, joinParallelism)
+          .mapPartitionsWithIndex(new HoodieBloomIndexCheckFunction(hoodieTable, config), true);
     }
 
-    return HoodieJavaPairRDD.of(fileComparisonsRDD.mapPartitionsWithIndex(new HoodieBloomIndexCheckFunction(hoodieTable, config), true)
-        .flatMap(List::iterator).filter(lr -> lr.getMatchingRecordKeys().size() > 0)
+    return HoodieJavaPairRDD.of(keyLookupResultRDD.flatMap(List::iterator)
+        .filter(lr -> lr.getMatchingRecordKeys().size() > 0)
         .flatMapToPair(lookupResult -> lookupResult.getMatchingRecordKeys().stream()
             .map(recordKey -> new Tuple2<>(new HoodieKey(recordKey, lookupResult.getPartitionPath()),
                 new HoodieRecordLocation(lookupResult.getBaseInstantTime(), lookupResult.getFileId())))
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/columnstats/ColumnStatsIndexHelper.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/columnstats/ColumnStatsIndexHelper.java
index d92bac4d84714..521bdb20c58fc 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/columnstats/ColumnStatsIndexHelper.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/columnstats/ColumnStatsIndexHelper.java
@@ -29,7 +29,6 @@
 import org.apache.hudi.exception.HoodieException;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
-import org.apache.parquet.io.api.Binary;
 import org.apache.spark.SparkContext;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
@@ -62,6 +61,7 @@
 import javax.annotation.Nonnull;
 import java.io.IOException;
 import java.math.BigDecimal;
+import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
@@ -200,7 +200,7 @@ public static Dataset<Row> buildColumnStatsTableFor(
 
                 indexRow.add(minMaxValue.getLeft());      // min
                 indexRow.add(minMaxValue.getRight());     // max
-                indexRow.add(colMetadata.getNumNulls());
+                indexRow.add(colMetadata.getNullCount());
               });
 
               return Row$.MODULE$.apply(JavaConversions.asScalaBuffer(indexRow));
@@ -262,10 +262,10 @@ public static void updateColumnStatsIndexFor(
       // │   │   ├── <part-...>.parquet
       // │   │   └── ...
       //
-      // If index is currently empty (no persisted tables), we simply create one
-      // using clustering operation's commit instance as it's name
       Path newIndexTablePath = new Path(indexFolderPath, commitTime);
 
+      // If index is currently empty (no persisted tables), we simply create one
+      // using clustering operation's commit instance as it's name
       if (!fs.exists(new Path(indexFolderPath))) {
         newColStatsIndexDf.repartition(1)
             .write()
@@ -326,6 +326,9 @@ public static void updateColumnStatsIndexFor(
           .repartition(1)
           .write()
           .format("parquet")
+          // NOTE: We intend to potentially overwrite index-table from the previous Clustering
+          //       operation that has failed to commit
+          .mode("overwrite")
           .save(newIndexTablePath.toString());
 
       // Clean up residual col-stats-index tables that have might have been dangling since
@@ -419,9 +422,8 @@ private static String composeZIndexColName(String col, String statName) {
       );
     } else if (colType instanceof StringType) {
       return Pair.of(
-          new String(((Binary) colMetadata.getMinValue()).getBytes()),
-          new String(((Binary) colMetadata.getMaxValue()).getBytes())
-      );
+          colMetadata.getMinValue().toString(),
+          colMetadata.getMaxValue().toString());
     } else if (colType instanceof DecimalType) {
       return Pair.of(
           new BigDecimal(colMetadata.getMinValue().toString()),
@@ -444,8 +446,8 @@ private static String composeZIndexColName(String col, String statName) {
           new Float(colMetadata.getMaxValue().toString()));
     } else if (colType instanceof BinaryType) {
       return Pair.of(
-          ((Binary) colMetadata.getMinValue()).getBytes(),
-          ((Binary) colMetadata.getMaxValue()).getBytes());
+          ((ByteBuffer) colMetadata.getMinValue()).array(),
+          ((ByteBuffer) colMetadata.getMaxValue()).array());
     } else if (colType instanceof BooleanType) {
       return Pair.of(
           Boolean.valueOf(colMetadata.getMinValue().toString()),
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/hbase/SparkHoodieHBaseIndex.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/hbase/SparkHoodieHBaseIndex.java
index e940c0b8211c1..fc73a0aed7d70 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/hbase/SparkHoodieHBaseIndex.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/index/hbase/SparkHoodieHBaseIndex.java
@@ -24,6 +24,7 @@
 import org.apache.hudi.common.data.HoodieData;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.model.EmptyHoodieRecordPayload;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordLocation;
@@ -85,8 +86,7 @@
 /**
  * Hoodie Index implementation backed by HBase.
  */
-public class SparkHoodieHBaseIndex<T extends HoodieRecordPayload<T>>
-    extends HoodieIndex<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> {
+public class SparkHoodieHBaseIndex extends HoodieIndex<Object, Object> {
 
   public static final String DEFAULT_SPARK_EXECUTOR_INSTANCES_CONFIG_NAME = "spark.executor.instances";
   public static final String DEFAULT_SPARK_DYNAMIC_ALLOCATION_ENABLED_CONFIG_NAME = "spark.dynamicAllocation.enabled";
@@ -203,15 +203,13 @@ private boolean checkIfValidCommit(HoodieTableMetaClient metaClient, String comm
   /**
    * Function that tags each HoodieRecord with an existing location, if known.
    */
-  private Function2<Integer, Iterator<HoodieRecord<T>>, Iterator<HoodieRecord<T>>> locationTagFunction(
+  private <R> Function2<Integer, Iterator<HoodieRecord<R>>, Iterator<HoodieRecord<R>>> locationTagFunction(
       HoodieTableMetaClient metaClient) {
 
     // `multiGetBatchSize` is intended to be a batch per 100ms. To create a rate limiter that measures
     // operations per second, we need to multiply `multiGetBatchSize` by 10.
     Integer multiGetBatchSize = config.getHbaseIndexGetBatchSize();
-    return (Function2<Integer, Iterator<HoodieRecord<T>>, Iterator<HoodieRecord<T>>>) (partitionNum,
-        hoodieRecordIterator) -> {
-
+    return (partitionNum, hoodieRecordIterator) -> {
       boolean updatePartitionPath = config.getHbaseIndexUpdatePartitionPath();
       RateLimiter limiter = RateLimiter.create(multiGetBatchSize * 10, TimeUnit.SECONDS);
       // Grab the global HBase connection
@@ -220,7 +218,7 @@ private Function2<Integer, Iterator<HoodieRecord<T>>, Iterator<HoodieRecord<T>>>
           hbaseConnection = getHBaseConnection();
         }
       }
-      List<HoodieRecord<T>> taggedRecords = new ArrayList<>();
+      List<HoodieRecord<R>> taggedRecords = new ArrayList<>();
       try (HTable hTable = (HTable) hbaseConnection.getTable(TableName.valueOf(tableName))) {
         List<Get> statements = new ArrayList<>();
         List<HoodieRecord> currentBatchOfRecords = new LinkedList<>();
@@ -256,19 +254,19 @@ private Function2<Integer, Iterator<HoodieRecord<T>>, Iterator<HoodieRecord<T>>>
             // check whether to do partition change processing
             if (updatePartitionPath && !partitionPath.equals(currentRecord.getPartitionPath())) {
               // delete partition old data record
-              HoodieRecord emptyRecord = new HoodieRecord(new HoodieKey(currentRecord.getRecordKey(), partitionPath),
+              HoodieRecord emptyRecord = new HoodieAvroRecord(new HoodieKey(currentRecord.getRecordKey(), partitionPath),
                   new EmptyHoodieRecordPayload());
               emptyRecord.unseal();
               emptyRecord.setCurrentLocation(new HoodieRecordLocation(commitTs, fileId));
               emptyRecord.seal();
               // insert partition new data record
-              currentRecord = new HoodieRecord(new HoodieKey(currentRecord.getRecordKey(), currentRecord.getPartitionPath()),
-                  currentRecord.getData());
+              currentRecord = new HoodieAvroRecord(new HoodieKey(currentRecord.getRecordKey(), currentRecord.getPartitionPath()),
+                  (HoodieRecordPayload) currentRecord.getData());
               taggedRecords.add(emptyRecord);
               taggedRecords.add(currentRecord);
             } else {
-              currentRecord = new HoodieRecord(new HoodieKey(currentRecord.getRecordKey(), partitionPath),
-                  currentRecord.getData());
+              currentRecord = new HoodieAvroRecord(new HoodieKey(currentRecord.getRecordKey(), partitionPath),
+                  (HoodieRecordPayload) currentRecord.getData());
               currentRecord.unseal();
               currentRecord.setCurrentLocation(new HoodieRecordLocation(commitTs, fileId));
               currentRecord.seal();
@@ -294,8 +292,8 @@ private Result[] doGet(HTable hTable, List<Get> keys, RateLimiter limiter) throw
   }
 
   @Override
-  public HoodieData<HoodieRecord<T>> tagLocation(
-      HoodieData<HoodieRecord<T>> records, HoodieEngineContext context,
+  public <R> HoodieData<HoodieRecord<R>> tagLocation(
+      HoodieData<HoodieRecord<R>> records, HoodieEngineContext context,
       HoodieTable hoodieTable) {
     return HoodieJavaRDD.of(HoodieJavaRDD.getJavaRDD(records)
         .mapPartitionsWithIndex(locationTagFunction(hoodieTable.getMetaClient()), true));
@@ -303,7 +301,7 @@ public HoodieData<HoodieRecord<T>> tagLocation(
 
   private Function2<Integer, Iterator<WriteStatus>, Iterator<WriteStatus>> updateLocationFunction() {
 
-    return (Function2<Integer, Iterator<WriteStatus>, Iterator<WriteStatus>>) (partition, statusIterator) -> {
+    return (partition, statusIterator) -> {
 
       List<WriteStatus> writeStatusList = new ArrayList<>();
       // Grab the global HBase connection
@@ -385,7 +383,7 @@ private void doMutations(BufferedMutator mutator, List<Mutation> mutations, Rate
     mutations.clear();
   }
 
-  public Map<String, Integer> mapFileWithInsertsToUniquePartition(JavaRDD<WriteStatus> writeStatusRDD) {
+  Map<String, Integer> mapFileWithInsertsToUniquePartition(JavaRDD<WriteStatus> writeStatusRDD) {
     final Map<String, Integer> fileIdPartitionMap = new HashMap<>();
     int partitionIndex = 0;
     // Map each fileId that has inserts to a unique partition Id. This will be used while
@@ -466,7 +464,7 @@ private void acquireQPSResourcesAndSetBatchSize(final Option<Float> desiredQPSFr
     }
   }
 
-  public Tuple2<Long, Integer> getHBasePutAccessParallelism(final JavaRDD<WriteStatus> writeStatusRDD) {
+  Tuple2<Long, Integer> getHBasePutAccessParallelism(final JavaRDD<WriteStatus> writeStatusRDD) {
     final JavaPairRDD<Long, Integer> insertOnlyWriteStatusRDD = writeStatusRDD
         .filter(w -> w.getStat().getNumInserts() > 0).mapToPair(w -> new Tuple2<>(w.getStat().getNumInserts(), 1));
     return insertOnlyWriteStatusRDD.fold(new Tuple2<>(0L, 0), (w, c) -> new Tuple2<>(w._1 + c._1, w._2 + c._2));
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowCreateHandle.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowCreateHandle.java
index 3566a8d8f4120..5cdb2ff68fc63 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowCreateHandle.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowCreateHandle.java
@@ -45,7 +45,7 @@
 import java.util.concurrent.atomic.AtomicLong;
 
 /**
- * Create handle with InternalRow for datasource implemention of bulk insert.
+ * Create handle with InternalRow for datasource implementation of bulk insert.
  */
 public class HoodieRowCreateHandle implements Serializable {
 
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/BuiltinKeyGenerator.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/BuiltinKeyGenerator.java
index 0b30f19899ec3..fe03f60ee816c 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/BuiltinKeyGenerator.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/BuiltinKeyGenerator.java
@@ -18,28 +18,26 @@
 
 package org.apache.hudi.keygen;
 
+import org.apache.avro.generic.GenericRecord;
 import org.apache.hudi.ApiMaturityLevel;
-import org.apache.hudi.AvroConversionHelper;
+import org.apache.hudi.AvroConversionUtils;
 import org.apache.hudi.HoodieSparkUtils;
 import org.apache.hudi.PublicAPIMethod;
 import org.apache.hudi.client.utils.SparkRowSerDe;
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.exception.HoodieKeyException;
-
-import org.apache.avro.generic.GenericRecord;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.types.DataType;
 import org.apache.spark.sql.types.StructType;
+import scala.Function1;
 
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
-import scala.Function1;
-
 /**
  * Base class for the built-in key generators. Contains methods structured for
  * code reuse amongst them.
@@ -48,7 +46,7 @@ public abstract class BuiltinKeyGenerator extends BaseKeyGenerator implements Sp
 
   private static final String STRUCT_NAME = "hoodieRowTopLevelField";
   private static final String NAMESPACE = "hoodieRow";
-  private transient Function1<Object, Object> converterFn = null;
+  private transient Function1<Row, GenericRecord> converterFn = null;
   private SparkRowSerDe sparkRowSerDe;
   protected StructType structType;
 
@@ -69,10 +67,9 @@ protected BuiltinKeyGenerator(TypedProperties config) {
   @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
   public String getRecordKey(Row row) {
     if (null == converterFn) {
-      converterFn = AvroConversionHelper.createConverterToAvro(row.schema(), STRUCT_NAME, NAMESPACE);
+      converterFn = AvroConversionUtils.createConverterToAvro(row.schema(), STRUCT_NAME, NAMESPACE);
     }
-    GenericRecord genericRecord = (GenericRecord) converterFn.apply(row);
-    return getKey(genericRecord).getRecordKey();
+    return getKey(converterFn.apply(row)).getRecordKey();
   }
 
   /**
@@ -84,10 +81,9 @@ public String getRecordKey(Row row) {
   @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
   public String getPartitionPath(Row row) {
     if (null == converterFn) {
-      converterFn = AvroConversionHelper.createConverterToAvro(row.schema(), STRUCT_NAME, NAMESPACE);
+      converterFn = AvroConversionUtils.createConverterToAvro(row.schema(), STRUCT_NAME, NAMESPACE);
     }
-    GenericRecord genericRecord = (GenericRecord) converterFn.apply(row);
-    return getKey(genericRecord).getPartitionPath();
+    return getKey(converterFn.apply(row)).getPartitionPath();
   }
 
   /**
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/ComplexKeyGenerator.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/ComplexKeyGenerator.java
index 8d02ce60f3bec..2e2167f9379f0 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/ComplexKeyGenerator.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/ComplexKeyGenerator.java
@@ -37,10 +37,14 @@ public class ComplexKeyGenerator extends BuiltinKeyGenerator {
 
   public ComplexKeyGenerator(TypedProperties props) {
     super(props);
-    this.recordKeyFields = Arrays.stream(props.getString(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key())
-        .split(",")).map(String::trim).filter(s -> !s.isEmpty()).collect(Collectors.toList());
-    this.partitionPathFields = Arrays.stream(props.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key())
-        .split(",")).map(String::trim).filter(s -> !s.isEmpty()).collect(Collectors.toList());
+    this.recordKeyFields = Arrays.stream(props.getString(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key()).split(","))
+        .map(String::trim)
+        .filter(s -> !s.isEmpty())
+        .collect(Collectors.toList());
+    this.partitionPathFields = Arrays.stream(props.getString(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key()).split(","))
+        .map(String::trim)
+        .filter(s -> !s.isEmpty())
+        .collect(Collectors.toList());
     complexAvroKeyGenerator = new ComplexAvroKeyGenerator(props);
   }
 
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/NonpartitionedKeyGenerator.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/NonpartitionedKeyGenerator.java
index 1664c86f9baa5..032c750f03240 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/NonpartitionedKeyGenerator.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/NonpartitionedKeyGenerator.java
@@ -18,9 +18,10 @@
 
 package org.apache.hudi.keygen;
 
-import org.apache.avro.generic.GenericRecord;
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
+
+import org.apache.avro.generic.GenericRecord;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.types.StructType;
@@ -31,7 +32,7 @@
 import java.util.stream.Collectors;
 
 /**
- * Simple Key generator for unpartitioned Hive Tables.
+ * Simple Key generator for non-partitioned Hive Tables.
  */
 public class NonpartitionedKeyGenerator extends BuiltinKeyGenerator {
 
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/RowKeyGeneratorHelper.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/RowKeyGeneratorHelper.java
index 24f6e7a4fa4b5..6a28fbe9501a9 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/RowKeyGeneratorHelper.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/keygen/RowKeyGeneratorHelper.java
@@ -40,9 +40,9 @@
 
 import scala.Option;
 
-import static org.apache.hudi.keygen.KeyGenUtils.HUDI_DEFAULT_PARTITION_PATH;
 import static org.apache.hudi.keygen.KeyGenUtils.DEFAULT_PARTITION_PATH_SEPARATOR;
 import static org.apache.hudi.keygen.KeyGenUtils.EMPTY_RECORDKEY_PLACEHOLDER;
+import static org.apache.hudi.keygen.KeyGenUtils.HUDI_DEFAULT_PARTITION_PATH;
 import static org.apache.hudi.keygen.KeyGenUtils.NULL_RECORDKEY_PLACEHOLDER;
 
 /**
@@ -230,9 +230,10 @@ public static Object getNestedFieldVal(Row row, List<Integer> positions) {
 
   /**
    * Generate the tree style positions for the field requested for as per the defined struct type.
-   * @param structType schema of interest
-   * @param field field of interest for which the positions are requested for
-   * @param isRecordKey {@code true} if the field requested for is a record key. {@code false} incase of a partition path.
+   *
+   * @param structType  schema of interest
+   * @param field       field of interest for which the positions are requested for
+   * @param isRecordKey {@code true} if the field requested for is a record key. {@code false} in case of a partition path.
    * @return the positions of the field as per the struct type.
    */
   public static List<Integer> getNestedFieldIndices(StructType structType, String field, boolean isRecordKey) {
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/metadata/SparkHoodieBackedTableMetadataWriter.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/metadata/SparkHoodieBackedTableMetadataWriter.java
index ccb258a8cdc61..c905f92c2eac0 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/metadata/SparkHoodieBackedTableMetadataWriter.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/metadata/SparkHoodieBackedTableMetadataWriter.java
@@ -18,24 +18,23 @@
 
 package org.apache.hudi.metadata;
 
-import org.apache.avro.specific.SpecificRecordBase;
 import org.apache.hudi.client.SparkRDDWriteClient;
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.client.common.HoodieSparkEngineContext;
 import org.apache.hudi.common.data.HoodieData;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.metrics.Registry;
-import org.apache.hudi.common.model.FileSlice;
 import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordLocation;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.data.HoodieJavaRDD;
 import org.apache.hudi.exception.HoodieMetadataException;
 import org.apache.hudi.metrics.DistributedRegistry;
 
+import org.apache.avro.specific.SpecificRecordBase;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
@@ -43,6 +42,7 @@
 
 import java.io.IOException;
 import java.util.List;
+import java.util.Map;
 
 public class SparkHoodieBackedTableMetadataWriter extends HoodieBackedTableMetadataWriter {
 
@@ -51,8 +51,8 @@ public class SparkHoodieBackedTableMetadataWriter extends HoodieBackedTableMetad
   /**
    * Return a Spark based implementation of {@code HoodieTableMetadataWriter} which can be used to
    * write to the metadata table.
-   *
-   * If the metadata table does not exist, an attempt is made to bootstrap it but there is no guarantted that
+   * <p>
+   * If the metadata table does not exist, an attempt is made to bootstrap it but there is no guaranteed that
    * table will end up bootstrapping at this time.
    *
    * @param conf
@@ -121,11 +121,12 @@ protected <T extends SpecificRecordBase> void initialize(HoodieEngineContext eng
     }
   }
 
-  protected void commit(HoodieData<HoodieRecord> hoodieDataRecords, String partitionName, String instantTime, boolean canTriggerTableService) {
+  @Override
+  protected void commit(String instantTime, Map<MetadataPartitionType, HoodieData<HoodieRecord>> partitionRecordsMap, boolean canTriggerTableService) {
     ValidationUtils.checkState(metadataMetaClient != null, "Metadata table is not fully initialized yet.");
     ValidationUtils.checkState(enabled, "Metadata table cannot be committed to as it is not enabled");
-    JavaRDD<HoodieRecord> records = (JavaRDD<HoodieRecord>) hoodieDataRecords.get();
-    JavaRDD<HoodieRecord> recordRDD = prepRecords(records, partitionName, 1);
+    HoodieData<HoodieRecord> preppedRecords = prepRecords(partitionRecordsMap);
+    JavaRDD<HoodieRecord> preppedRecordRDD = HoodieJavaRDD.getJavaRDD(preppedRecords);
 
     try (SparkRDDWriteClient writeClient = new SparkRDDWriteClient(engineContext, metadataWriteConfig, true)) {
       if (canTriggerTableService) {
@@ -150,7 +151,7 @@ protected void commit(HoodieData<HoodieRecord> hoodieDataRecords, String partiti
         HoodieActiveTimeline.deleteInstantFile(metadataMetaClient.getFs(), metadataMetaClient.getMetaPath(), alreadyCompletedInstant);
         metadataMetaClient.reloadActiveTimeline();
       }
-      List<WriteStatus> statuses = writeClient.upsertPreppedRecords(recordRDD, instantTime).collect();
+      List<WriteStatus> statuses = writeClient.upsertPreppedRecords(preppedRecordRDD, instantTime).collect();
       statuses.forEach(writeStatus -> {
         if (writeStatus.hasErrors()) {
           throw new HoodieMetadataException("Failed to commit metadata table records at instant " + instantTime);
@@ -168,20 +169,4 @@ protected void commit(HoodieData<HoodieRecord> hoodieDataRecords, String partiti
     // Update total size of the metadata and count of base/log files
     metrics.ifPresent(m -> m.updateSizeMetrics(metadataMetaClient, metadata));
   }
-
-  /**
-   * Tag each record with the location in the given partition.
-   *
-   * The record is tagged with respective file slice's location based on its record key.
-   */
-  private JavaRDD<HoodieRecord> prepRecords(JavaRDD<HoodieRecord> recordsRDD, String partitionName, int numFileGroups) {
-    List<FileSlice> fileSlices = HoodieTableMetadataUtil.getPartitionLatestFileSlices(metadataMetaClient, partitionName);
-    ValidationUtils.checkArgument(fileSlices.size() == numFileGroups, String.format("Invalid number of file groups: found=%d, required=%d", fileSlices.size(), numFileGroups));
-
-    return recordsRDD.map(r -> {
-      FileSlice slice = fileSlices.get(HoodieTableMetadataUtil.mapRecordKeyToFileGroupIndex(r.getRecordKey(), numFileGroups));
-      r.setCurrentLocation(new HoodieRecordLocation(slice.getBaseInstantTime(), slice.getFileId()));
-      return r;
-    });
-  }
 }
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkCopyOnWriteTable.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkCopyOnWriteTable.java
index aa9a924ed6925..31bd436612c11 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkCopyOnWriteTable.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkCopyOnWriteTable.java
@@ -25,6 +25,7 @@
 import org.apache.hudi.avro.model.HoodieClusteringPlan;
 import org.apache.hudi.avro.model.HoodieCompactionPlan;
 import org.apache.hudi.avro.model.HoodieRestoreMetadata;
+import org.apache.hudi.avro.model.HoodieRestorePlan;
 import org.apache.hudi.avro.model.HoodieRollbackMetadata;
 import org.apache.hudi.avro.model.HoodieRollbackPlan;
 import org.apache.hudi.avro.model.HoodieSavepointMetadata;
@@ -58,7 +59,7 @@
 import org.apache.hudi.table.action.bootstrap.SparkBootstrapCommitActionExecutor;
 import org.apache.hudi.table.action.clean.CleanActionExecutor;
 import org.apache.hudi.table.action.clean.CleanPlanActionExecutor;
-import org.apache.hudi.table.action.cluster.SparkClusteringPlanActionExecutor;
+import org.apache.hudi.table.action.cluster.ClusteringPlanActionExecutor;
 import org.apache.hudi.table.action.cluster.SparkExecuteClusteringCommitActionExecutor;
 import org.apache.hudi.table.action.commit.SparkBulkInsertCommitActionExecutor;
 import org.apache.hudi.table.action.commit.SparkBulkInsertPreppedCommitActionExecutor;
@@ -74,6 +75,7 @@
 import org.apache.hudi.table.action.restore.CopyOnWriteRestoreActionExecutor;
 import org.apache.hudi.table.action.rollback.BaseRollbackPlanActionExecutor;
 import org.apache.hudi.table.action.rollback.CopyOnWriteRollbackActionExecutor;
+import org.apache.hudi.table.action.rollback.RestorePlanActionExecutor;
 import org.apache.hudi.table.action.savepoint.SavepointActionExecutor;
 
 import org.apache.avro.Schema;
@@ -184,13 +186,6 @@ private void updateColumnsStatsIndex(
     String basePath = metaClient.getBasePath();
     String indexPath = metaClient.getColumnStatsIndexPath();
 
-    List<String> completedCommits =
-        metaClient.getCommitsTimeline()
-            .filterCompletedInstants()
-            .getInstants()
-            .map(HoodieInstant::getTimestamp)
-            .collect(Collectors.toList());
-
     List<String> touchedFiles =
         updatedFilesStats.stream()
             .map(s -> new Path(basePath, s.getPath()).toString())
@@ -214,6 +209,13 @@ private void updateColumnsStatsIndex(
             new TableSchemaResolver(metaClient).getTableAvroSchemaWithoutMetadataFields()
         );
 
+    List<String> completedCommits =
+        metaClient.getCommitsTimeline()
+            .filterCompletedInstants()
+            .getInstants()
+            .map(HoodieInstant::getTimestamp)
+            .collect(Collectors.toList());
+
     ColumnStatsIndexHelper.updateColumnStatsIndexFor(
         sparkEngineContext.getSqlContext().sparkSession(),
         AvroConversionUtils.convertAvroSchemaToStructType(tableWriteSchema),
@@ -242,7 +244,7 @@ public HoodieWriteMetadata<JavaRDD<WriteStatus>> compact(
   public Option<HoodieClusteringPlan> scheduleClustering(HoodieEngineContext context,
                                                          String instantTime,
                                                          Option<Map<String, String>> extraMetadata) {
-    return new SparkClusteringPlanActionExecutor<>(context, config,this, instantTime, extraMetadata).execute();
+    return new ClusteringPlanActionExecutor<>(context, config,this, instantTime, extraMetadata).execute();
   }
 
   @Override
@@ -258,6 +260,7 @@ public HoodieBootstrapWriteMetadata<JavaRDD<WriteStatus>> bootstrap(HoodieEngine
 
   @Override
   public void rollbackBootstrap(HoodieEngineContext context, String instantTime) {
+    new RestorePlanActionExecutor<>(context, config, this, instantTime, HoodieTimeline.INIT_INSTANT_TS).execute();
     new CopyOnWriteRestoreActionExecutor(context, config, this, instantTime, HoodieTimeline.INIT_INSTANT_TS).execute();
   }
 
@@ -353,4 +356,8 @@ public HoodieRestoreMetadata restore(HoodieEngineContext context, String restore
     return new CopyOnWriteRestoreActionExecutor(context, config, this, restoreInstantTime, instantToRestore).execute();
   }
 
+  @Override
+  public Option<HoodieRestorePlan> scheduleRestore(HoodieEngineContext context, String restoreInstantTime, String instantToRestore) {
+    return new RestorePlanActionExecutor(context, config, this, restoreInstantTime, instantToRestore).execute();
+  }
 }
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkMergeOnReadTable.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkMergeOnReadTable.java
index 75af5d0f685fc..334efa7fc91f4 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkMergeOnReadTable.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkMergeOnReadTable.java
@@ -52,6 +52,7 @@
 import org.apache.hudi.table.action.restore.MergeOnReadRestoreActionExecutor;
 import org.apache.hudi.table.action.rollback.BaseRollbackPlanActionExecutor;
 import org.apache.hudi.table.action.rollback.MergeOnReadRollbackActionExecutor;
+import org.apache.hudi.table.action.rollback.RestorePlanActionExecutor;
 
 import org.apache.spark.api.java.JavaRDD;
 
@@ -150,6 +151,7 @@ public HoodieBootstrapWriteMetadata<JavaRDD<WriteStatus>> bootstrap(HoodieEngine
 
   @Override
   public void rollbackBootstrap(HoodieEngineContext context, String instantTime) {
+    new RestorePlanActionExecutor<>(context, config, this, instantTime, HoodieTimeline.INIT_INSTANT_TS).execute();
     new MergeOnReadRestoreActionExecutor(context, config, this, instantTime, HoodieTimeline.INIT_INSTANT_TS).execute();
   }
 
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkTable.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkTable.java
index 35c9ab3a0fe94..bb8c95d745ab1 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkTable.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkTable.java
@@ -18,8 +18,6 @@
 
 package org.apache.hudi.table;
 
-import org.apache.avro.specific.SpecificRecordBase;
-import org.apache.hadoop.fs.Path;
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.client.common.HoodieSparkEngineContext;
 import org.apache.hudi.common.data.HoodieData;
@@ -39,6 +37,11 @@
 import org.apache.hudi.metadata.HoodieTableMetadataWriter;
 import org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter;
 import org.apache.hudi.table.action.HoodieWriteMetadata;
+
+import org.apache.avro.specific.SpecificRecordBase;
+import org.apache.hadoop.fs.Path;
+import org.apache.spark.TaskContext;
+import org.apache.spark.TaskContext$;
 import org.apache.spark.api.java.JavaRDD;
 
 import java.io.IOException;
@@ -63,7 +66,8 @@ public static <T extends HoodieRecordPayload> HoodieSparkTable<T> create(HoodieW
     HoodieTableMetaClient metaClient =
         HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(config.getBasePath())
             .setLoadActiveTimelineOnLoad(true).setConsistencyGuardConfig(config.getConsistencyGuardConfig())
-            .setLayoutVersion(Option.of(new TimelineLayoutVersion(config.getTimelineLayoutVersion()))).build();
+            .setLayoutVersion(Option.of(new TimelineLayoutVersion(config.getTimelineLayoutVersion())))
+            .setFileSystemRetryConfig(config.getFileSystemRetryConfig()).build();
     return HoodieSparkTable.create(config, (HoodieSparkEngineContext) context, metaClient, refreshTimeline);
   }
 
@@ -110,8 +114,8 @@ protected HoodieIndex getIndex(HoodieWriteConfig config, HoodieEngineContext con
    * @return instance of {@link HoodieTableMetadataWriter}
    */
   @Override
-  public <T extends SpecificRecordBase> Option<HoodieTableMetadataWriter> getMetadataWriter(String triggeringInstantTimestamp,
-                                                                                            Option<T> actionMetadata) {
+  public <R extends SpecificRecordBase> Option<HoodieTableMetadataWriter> getMetadataWriter(String triggeringInstantTimestamp,
+                                                                                            Option<R> actionMetadata) {
     if (config.isMetadataTableEnabled()) {
       // Create the metadata table writer. First time after the upgrade this creation might trigger
       // metadata table bootstrapping. Bootstrapping process could fail and checking the table
@@ -131,4 +135,10 @@ public <T extends SpecificRecordBase> Option<HoodieTableMetadataWriter> getMetad
 
     return Option.empty();
   }
+
+  @Override
+  public Runnable getPreExecuteRunnable() {
+    final TaskContext taskContext = TaskContext.get();
+    return () -> TaskContext$.MODULE$.setTaskContext(taskContext);
+  }
 }
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapMetadataHandler.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapMetadataHandler.java
index 75daca739c8f5..237fe6cf84849 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapMetadataHandler.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/BootstrapMetadataHandler.java
@@ -26,10 +26,11 @@
  */
 public interface BootstrapMetadataHandler {
   /**
-   * Execute bootstrap with only metatata.
+   * Execute bootstrap with only metadata.
+   *
    * @param srcPartitionPath source partition path.
-   * @param partitionPath destination partition path.
-   * @param keyGenerator key generator to use.
+   * @param partitionPath    destination partition path.
+   * @param keyGenerator     key generator to use.
    * @return the {@link BootstrapWriteStatus} which has the result of execution.
    */
   BootstrapWriteStatus runMetadataBootstrap(String srcPartitionPath, String partitionPath, KeyGeneratorInterface keyGenerator);
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/OrcBootstrapMetadataHandler.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/OrcBootstrapMetadataHandler.java
index 9587c5b30cb74..e3d0e9b3c69d4 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/OrcBootstrapMetadataHandler.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/OrcBootstrapMetadataHandler.java
@@ -21,6 +21,7 @@
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.avro.model.HoodieFileStatus;
 import org.apache.hudi.client.bootstrap.BootstrapRecordPayload;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.util.AvroOrcUtils;
@@ -28,7 +29,6 @@
 import org.apache.hudi.common.util.queue.BoundedInMemoryExecutor;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieException;
-import org.apache.hudi.execution.SparkBoundedInMemoryExecutor;
 import org.apache.hudi.io.HoodieBootstrapHandle;
 import org.apache.hudi.keygen.KeyGeneratorInterface;
 import org.apache.hudi.table.HoodieTable;
@@ -67,15 +67,15 @@ void executeBootstrap(HoodieBootstrapHandle<?, ?, ?, ?> bootstrapHandle, Path so
     Reader orcReader = OrcFile.createReader(sourceFilePath, OrcFile.readerOptions(table.getHadoopConf()));
     TypeDescription orcSchema = orcReader.getSchema();
     try (RecordReader reader = orcReader.rows(new Reader.Options(table.getHadoopConf()).schema(orcSchema))) {
-      wrapper = new SparkBoundedInMemoryExecutor<GenericRecord, HoodieRecord, Void>(config,
+      wrapper = new BoundedInMemoryExecutor<GenericRecord, HoodieRecord, Void>(config.getWriteBufferLimitBytes(),
           new OrcReaderIterator(reader, avroSchema, orcSchema), new BootstrapRecordConsumer(bootstrapHandle), inp -> {
         String recKey = keyGenerator.getKey(inp).getRecordKey();
         GenericRecord gr = new GenericData.Record(HoodieAvroUtils.RECORD_KEY_SCHEMA);
         gr.put(HoodieRecord.RECORD_KEY_METADATA_FIELD, recKey);
         BootstrapRecordPayload payload = new BootstrapRecordPayload(gr);
-        HoodieRecord rec = new HoodieRecord(new HoodieKey(recKey, partitionPath), payload);
+        HoodieRecord rec = new HoodieAvroRecord(new HoodieKey(recKey, partitionPath), payload);
         return rec;
-      });
+      }, table.getPreExecuteRunnable());
       wrapper.execute();
     } catch (Exception e) {
       throw new HoodieException(e);
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/ParquetBootstrapMetadataHandler.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/ParquetBootstrapMetadataHandler.java
index 058c2d4267abb..d07ea771bc557 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/ParquetBootstrapMetadataHandler.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/ParquetBootstrapMetadataHandler.java
@@ -21,13 +21,13 @@
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.avro.model.HoodieFileStatus;
 import org.apache.hudi.client.bootstrap.BootstrapRecordPayload;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.util.ParquetReaderIterator;
 import org.apache.hudi.common.util.queue.BoundedInMemoryExecutor;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieException;
-import org.apache.hudi.execution.SparkBoundedInMemoryExecutor;
 import org.apache.hudi.io.HoodieBootstrapHandle;
 import org.apache.hudi.keygen.KeyGeneratorInterface;
 import org.apache.hudi.table.HoodieTable;
@@ -71,15 +71,15 @@ void executeBootstrap(HoodieBootstrapHandle<?, ?, ?, ?> bootstrapHandle,
     try {
       ParquetReader<IndexedRecord> reader =
           AvroParquetReader.<IndexedRecord>builder(sourceFilePath).withConf(table.getHadoopConf()).build();
-      wrapper = new SparkBoundedInMemoryExecutor<GenericRecord, HoodieRecord, Void>(config,
+      wrapper = new BoundedInMemoryExecutor<GenericRecord, HoodieRecord, Void>(config.getWriteBufferLimitBytes(),
           new ParquetReaderIterator(reader), new BootstrapRecordConsumer(bootstrapHandle), inp -> {
         String recKey = keyGenerator.getKey(inp).getRecordKey();
         GenericRecord gr = new GenericData.Record(HoodieAvroUtils.RECORD_KEY_SCHEMA);
         gr.put(HoodieRecord.RECORD_KEY_METADATA_FIELD, recKey);
         BootstrapRecordPayload payload = new BootstrapRecordPayload(gr);
-        HoodieRecord rec = new HoodieRecord(new HoodieKey(recKey, partitionPath), payload);
+        HoodieRecord rec = new HoodieAvroRecord(new HoodieKey(recKey, partitionPath), payload);
         return rec;
-      });
+      }, table.getPreExecuteRunnable());
       wrapper.execute();
     } catch (Exception e) {
       throw new HoodieException(e);
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/SparkBootstrapCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/SparkBootstrapCommitActionExecutor.java
index ea558c39275da..a970e8f0f97d3 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/SparkBootstrapCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/bootstrap/SparkBootstrapCommitActionExecutor.java
@@ -113,9 +113,9 @@ public HoodieBootstrapWriteMetadata execute() {
     validate();
     try {
       HoodieTableMetaClient metaClient = table.getMetaClient();
-      Option<HoodieInstant> completetedInstant =
+      Option<HoodieInstant> completedInstant =
           metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().lastInstant();
-      ValidationUtils.checkArgument(!completetedInstant.isPresent(),
+      ValidationUtils.checkArgument(!completedInstant.isPresent(),
           "Active Timeline is expected to be empty for bootstrap to be performed. "
               + "If you want to re-bootstrap, please rollback bootstrap first !!");
       Map<BootstrapMode, List<Pair<String, List<HoodieFileStatus>>>> partitionSelections = listAndProcessSourcePartitions();
@@ -181,6 +181,11 @@ public HoodieWriteMetadata<JavaRDD<WriteStatus>> execute(JavaRDD<HoodieRecord<T>
     return null;
   }
 
+  @Override
+  protected void setCommitMetadata(HoodieWriteMetadata<JavaRDD<WriteStatus>> result) {
+    result.setCommitMetadata(Option.of(new HoodieCommitMetadata()));
+  }
+
   @Override
   protected void commit(Option<Map<String, String>> extraMetadata, HoodieWriteMetadata<JavaRDD<WriteStatus>> result) {
     // Perform bootstrap index write and then commit. Make sure both record-key and bootstrap-index
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/cluster/SparkClusteringPlanActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/cluster/SparkClusteringPlanActionExecutor.java
deleted file mode 100644
index 81a0a74aee1d3..0000000000000
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/cluster/SparkClusteringPlanActionExecutor.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hudi.table.action.cluster;
-
-import org.apache.hudi.client.WriteStatus;
-import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieKey;
-import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.config.HoodieWriteConfig;
-import org.apache.hudi.table.HoodieTable;
-import org.apache.spark.api.java.JavaRDD;
-
-import java.util.Map;
-
-@SuppressWarnings("checkstyle:LineLength")
-public class SparkClusteringPlanActionExecutor<T extends HoodieRecordPayload> extends
-    BaseClusteringPlanActionExecutor<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> {
-
-  public SparkClusteringPlanActionExecutor(HoodieEngineContext context,
-                                           HoodieWriteConfig config,
-                                           HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> table,
-                                           String instantTime,
-                                           Option<Map<String, String>> extraMetadata) {
-    super(context, config, table, instantTime, extraMetadata);
-  }
-}
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/cluster/SparkExecuteClusteringCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/cluster/SparkExecuteClusteringCommitActionExecutor.java
index 5b0224b0f0050..594a910428aad 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/cluster/SparkExecuteClusteringCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/cluster/SparkExecuteClusteringCommitActionExecutor.java
@@ -18,111 +18,48 @@
 
 package org.apache.hudi.table.action.cluster;
 
-import org.apache.hudi.avro.HoodieAvroUtils;
-import org.apache.hudi.avro.model.HoodieClusteringGroup;
 import org.apache.hudi.avro.model.HoodieClusteringPlan;
 import org.apache.hudi.client.WriteStatus;
-import org.apache.hudi.client.clustering.run.strategy.SparkSingleFileSortExecutionStrategy;
+import org.apache.hudi.common.data.HoodieData;
 import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieCommitMetadata;
-import org.apache.hudi.common.model.HoodieFileGroupId;
-import org.apache.hudi.common.model.HoodieKey;
-import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.model.WriteOperationType;
-import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.util.ClusteringUtils;
-import org.apache.hudi.common.util.CommitUtils;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.common.util.ReflectionUtils;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.data.HoodieJavaRDD;
 import org.apache.hudi.exception.HoodieClusteringException;
 import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.table.action.HoodieWriteMetadata;
-import org.apache.hudi.table.action.cluster.strategy.ClusteringExecutionStrategy;
 import org.apache.hudi.table.action.commit.BaseSparkCommitActionExecutor;
 
-import org.apache.avro.Schema;
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
 import org.apache.spark.api.java.JavaRDD;
 
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.stream.Collectors;
-
 public class SparkExecuteClusteringCommitActionExecutor<T extends HoodieRecordPayload<T>>
     extends BaseSparkCommitActionExecutor<T> {
 
-  private static final Logger LOG = LogManager.getLogger(SparkExecuteClusteringCommitActionExecutor.class);
   private final HoodieClusteringPlan clusteringPlan;
 
   public SparkExecuteClusteringCommitActionExecutor(HoodieEngineContext context,
                                                     HoodieWriteConfig config, HoodieTable table,
                                                     String instantTime) {
     super(context, config, table, instantTime, WriteOperationType.CLUSTER);
-    this.clusteringPlan = ClusteringUtils.getClusteringPlan(table.getMetaClient(), HoodieTimeline.getReplaceCommitRequestedInstant(instantTime))
-      .map(Pair::getRight).orElseThrow(() -> new HoodieClusteringException("Unable to read clustering plan for instant: " + instantTime));
+    this.clusteringPlan = ClusteringUtils.getClusteringPlan(
+        table.getMetaClient(), HoodieTimeline.getReplaceCommitRequestedInstant(instantTime))
+        .map(Pair::getRight).orElseThrow(() -> new HoodieClusteringException(
+            "Unable to read clustering plan for instant: " + instantTime));
   }
 
   @Override
   public HoodieWriteMetadata<JavaRDD<WriteStatus>> execute() {
-    HoodieInstant instant = HoodieTimeline.getReplaceCommitRequestedInstant(instantTime);
-    // Mark instant as clustering inflight
-    table.getActiveTimeline().transitionReplaceRequestedToInflight(instant, Option.empty());
-    table.getMetaClient().reloadActiveTimeline();
-
-    final Schema schema = HoodieAvroUtils.addMetadataFields(new Schema.Parser().parse(config.getSchema()));
-    HoodieWriteMetadata<JavaRDD<WriteStatus>> writeMetadata = ((ClusteringExecutionStrategy<T, JavaRDD<HoodieRecord<? extends HoodieRecordPayload>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>>)
-        ReflectionUtils.loadClass(config.getClusteringExecutionStrategyClass(),
-            new Class<?>[] {HoodieTable.class, HoodieEngineContext.class, HoodieWriteConfig.class}, table, context, config))
-        .performClustering(clusteringPlan, schema, instantTime);
-    JavaRDD<WriteStatus> writeStatusRDD = writeMetadata.getWriteStatuses();
-    JavaRDD<WriteStatus> statuses = updateIndex(writeStatusRDD, writeMetadata);
-    writeMetadata.setWriteStats(statuses.map(WriteStatus::getStat).collect());
-    writeMetadata.setPartitionToReplaceFileIds(getPartitionToReplacedFileIds(writeMetadata));
-    commitOnAutoCommit(writeMetadata);
-    if (!writeMetadata.getCommitMetadata().isPresent()) {
-      HoodieCommitMetadata commitMetadata = CommitUtils.buildMetadata(writeMetadata.getWriteStats().get(), writeMetadata.getPartitionToReplaceFileIds(),
-          extraMetadata, operationType, getSchemaToStoreInCommit(), getCommitActionType());
-      writeMetadata.setCommitMetadata(Option.of(commitMetadata));
-    }
-    return writeMetadata;
-  }
-
-  /**
-   * Validate actions taken by clustering. In the first implementation, we validate at least one new file is written.
-   * But we can extend this to add more validation. E.g. number of records read = number of records written etc.
-   * We can also make these validations in BaseCommitActionExecutor to reuse pre-commit hooks for multiple actions.
-   */
-  private void validateWriteResult(HoodieWriteMetadata<JavaRDD<WriteStatus>> writeMetadata) {
-    if (writeMetadata.getWriteStatuses().isEmpty()) {
-      throw new HoodieClusteringException("Clustering plan produced 0 WriteStatus for " + instantTime
-          + " #groups: " + clusteringPlan.getInputGroups().size() + " expected at least "
-          + clusteringPlan.getInputGroups().stream().mapToInt(HoodieClusteringGroup::getNumOutputFileGroups).sum()
-          + " write statuses");
-    }
+    HoodieWriteMetadata<HoodieData<WriteStatus>> writeMetadata = executeClustering(clusteringPlan);
+    JavaRDD<WriteStatus> transformedWriteStatuses = HoodieJavaRDD.getJavaRDD(writeMetadata.getWriteStatuses());
+    return writeMetadata.clone(transformedWriteStatuses);
   }
 
   @Override
   protected String getCommitActionType() {
     return HoodieTimeline.REPLACE_COMMIT_ACTION;
   }
-
-  @Override
-  protected Map<String, List<String>> getPartitionToReplacedFileIds(HoodieWriteMetadata<JavaRDD<WriteStatus>> writeMetadata) {
-    Set<HoodieFileGroupId> newFilesWritten = writeMetadata.getWriteStats().get().stream()
-        .map(s -> new HoodieFileGroupId(s.getPartitionPath(), s.getFileId())).collect(Collectors.toSet());
-    // for the below execution strategy, new filegroup id would be same as old filegroup id
-    if (SparkSingleFileSortExecutionStrategy.class.getName().equals(config.getClusteringExecutionStrategyClass())) {
-      return ClusteringUtils.getFileGroupsFromClusteringPlan(clusteringPlan)
-          .collect(Collectors.groupingBy(fg -> fg.getPartitionPath(), Collectors.mapping(fg -> fg.getFileId(), Collectors.toList())));
-    }
-    return ClusteringUtils.getFileGroupsFromClusteringPlan(clusteringPlan)
-        .filter(fg -> !newFilesWritten.contains(fg))
-        .collect(Collectors.groupingBy(fg -> fg.getPartitionPath(), Collectors.mapping(fg -> fg.getFileId(), Collectors.toList())));
-  }
 }
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/BaseSparkCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/BaseSparkCommitActionExecutor.java
index c551310bafdd1..ba3b0be1641ee 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/BaseSparkCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/BaseSparkCommitActionExecutor.java
@@ -20,16 +20,15 @@
 
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.client.utils.SparkMemoryUtils;
-import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.client.utils.SparkValidatorUtils;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
+import org.apache.hudi.common.model.HoodieFileGroupId;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordLocation;
 import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.model.HoodieWriteStat;
-import org.apache.hudi.common.model.HoodieFileGroupId;
 import org.apache.hudi.common.model.WriteOperationType;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
@@ -44,9 +43,9 @@
 import org.apache.hudi.exception.HoodieUpsertException;
 import org.apache.hudi.execution.SparkLazyInsertIterable;
 import org.apache.hudi.io.CreateHandleFactory;
+import org.apache.hudi.io.HoodieConcatHandle;
 import org.apache.hudi.io.HoodieMergeHandle;
 import org.apache.hudi.io.HoodieSortedMergeHandle;
-import org.apache.hudi.io.HoodieConcatHandle;
 import org.apache.hudi.keygen.BaseKeyGenerator;
 import org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory;
 import org.apache.hudi.table.HoodieSparkTable;
@@ -68,14 +67,14 @@
 import java.nio.charset.StandardCharsets;
 import java.time.Duration;
 import java.time.Instant;
-import java.util.stream.Collectors;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
-import java.util.Set;
 import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
 
 import static org.apache.hudi.common.util.ClusteringUtils.getAllFileGroupsInPendingClusteringPlans;
 
@@ -83,15 +82,14 @@ public abstract class BaseSparkCommitActionExecutor<T extends HoodieRecordPayloa
     BaseCommitActionExecutor<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>, HoodieWriteMetadata> {
 
   private static final Logger LOG = LogManager.getLogger(BaseSparkCommitActionExecutor.class);
-  protected Option<BaseKeyGenerator> keyGeneratorOpt = Option.empty();
+  protected final Option<BaseKeyGenerator> keyGeneratorOpt;
 
   public BaseSparkCommitActionExecutor(HoodieEngineContext context,
                                        HoodieWriteConfig config,
                                        HoodieTable table,
                                        String instantTime,
                                        WriteOperationType operationType) {
-    super(context, config, table, instantTime, operationType, Option.empty());
-    initKeyGenIfNeeded(config.populateMetaFields());
+    this(context, config, table, instantTime, operationType, Option.empty());
   }
 
   public BaseSparkCommitActionExecutor(HoodieEngineContext context,
@@ -101,16 +99,12 @@ public BaseSparkCommitActionExecutor(HoodieEngineContext context,
                                        WriteOperationType operationType,
                                        Option extraMetadata) {
     super(context, config, table, instantTime, operationType, extraMetadata);
-    initKeyGenIfNeeded(config.populateMetaFields());
-  }
-
-  private void initKeyGenIfNeeded(boolean populateMetaFields) {
-    if (!populateMetaFields) {
-      try {
-        keyGeneratorOpt = Option.of((BaseKeyGenerator) HoodieSparkKeyGeneratorFactory.createKeyGenerator(new TypedProperties(config.getProps())));
-      } catch (IOException e) {
-        throw new HoodieIOException("Only BaseKeyGenerators are supported when meta columns are disabled ", e);
-      }
+    try {
+      keyGeneratorOpt = config.populateMetaFields()
+          ? Option.empty()
+          : Option.of((BaseKeyGenerator) HoodieSparkKeyGeneratorFactory.createKeyGenerator(this.config.getProps()));
+    } catch (IOException e) {
+      throw new HoodieIOException("Only BaseKeyGenerators are supported when meta columns are disabled ", e);
     }
   }
 
@@ -126,7 +120,7 @@ private JavaRDD<HoodieRecord<T>> clusteringHandleUpdate(JavaRDD<HoodieRecord<T>>
     if (fileGroupsWithUpdatesAndPendingClustering.isEmpty()) {
       return recordsAndPendingClusteringFileGroups.getLeft();
     }
-    // there are filegroups pending clustering and receiving updates, so rollback the pending clustering instants
+    // there are file groups pending clustering and receiving updates, so rollback the pending clustering instants
     // there could be race condition, for example, if the clustering completes after instants are fetched but before rollback completed
     if (config.isRollbackPendingClustering()) {
       Set<HoodieInstant> pendingClusteringInstantsToRollback = getAllFileGroupsInPendingClusteringPlans(table.getMetaClient()).entrySet().stream()
@@ -154,19 +148,22 @@ public HoodieWriteMetadata<JavaRDD<WriteStatus>> execute(JavaRDD<HoodieRecord<T>
       LOG.info("RDD PreppedRecords was persisted at: " + inputRecordsRDD.getStorageLevel());
     }
 
-    WorkloadProfile profile = null;
+    WorkloadProfile workloadProfile = null;
     if (isWorkloadProfileNeeded()) {
       context.setJobStatus(this.getClass().getSimpleName(), "Building workload profile");
-      profile = new WorkloadProfile(buildProfile(inputRecordsRDD), operationType);
-      LOG.info("Workload profile :" + profile);
-      saveWorkloadProfileMetadataToInflight(profile, instantTime);
+      workloadProfile = new WorkloadProfile(buildProfile(inputRecordsRDD), operationType, table.getIndex().canIndexLogFiles());
+      LOG.info("Input workload profile :" + workloadProfile);
+    }
+
+    // partition using the insert partitioner
+    final Partitioner partitioner = getPartitioner(workloadProfile);
+    if (isWorkloadProfileNeeded()) {
+      saveWorkloadProfileMetadataToInflight(workloadProfile, instantTime);
     }
 
     // handle records update with clustering
     JavaRDD<HoodieRecord<T>> inputRecordsRDDWithClusteringUpdate = clusteringHandleUpdate(inputRecordsRDD);
 
-    // partition using the insert partitioner
-    final Partitioner partitioner = getPartitioner(profile);
     context.setJobStatus(this.getClass().getSimpleName(), "Doing partition and writing data");
     JavaRDD<HoodieRecord<T>> partitionedRecords = partition(inputRecordsRDDWithClusteringUpdate, partitioner);
     JavaRDD<WriteStatus> writeStatusRDD = partitionedRecords.mapPartitionsWithIndex((partition, recordItr) -> {
@@ -273,6 +270,13 @@ protected String getCommitActionType() {
     return  table.getMetaClient().getCommitActionType();
   }
 
+  @Override
+  protected void setCommitMetadata(HoodieWriteMetadata<JavaRDD<WriteStatus>> result) {
+    result.setCommitMetadata(Option.of(CommitUtils.buildMetadata(result.getWriteStatuses().map(WriteStatus::getStat).collect(),
+        result.getPartitionToReplaceFileIds(),
+        extraMetadata, operationType, getSchemaToStoreInCommit(), getCommitActionType())));
+  }
+
   @Override
   protected void commit(Option<Map<String, String>> extraMetadata, HoodieWriteMetadata<JavaRDD<WriteStatus>> result) {
     context.setJobStatus(this.getClass().getSimpleName(), "Commit write status collect");
@@ -288,8 +292,7 @@ protected void commit(Option<Map<String, String>> extraMetadata, HoodieWriteMeta
     finalizeWrite(instantTime, writeStats, result);
     try {
       HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
-      HoodieCommitMetadata metadata = CommitUtils.buildMetadata(writeStats, result.getPartitionToReplaceFileIds(),
-          extraMetadata, operationType, getSchemaToStoreInCommit(), getCommitActionType());
+      HoodieCommitMetadata metadata = result.getCommitMetadata().get();
       writeTableMetadata(metadata, actionType);
       activeTimeline.saveAsComplete(new HoodieInstant(true, getCommitActionType(), instantTime),
           Option.of(metadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBucketIndexPartitioner.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBucketIndexPartitioner.java
index 71da2244db56f..65a45e1c6a047 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBucketIndexPartitioner.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBucketIndexPartitioner.java
@@ -74,7 +74,7 @@ public SparkBucketIndexPartitioner(WorkloadProfile profile,
           " Bucket index partitioner should only be used by BucketIndex other than "
               + table.getIndex().getClass().getSimpleName());
     }
-    this.numBuckets = ((HoodieBucketIndex<T>) table.getIndex()).getNumBuckets();
+    this.numBuckets = ((HoodieBucketIndex) table.getIndex()).getNumBuckets();
     this.indexKeyField = config.getBucketIndexHashField();
     this.totalPartitionPaths = profile.getPartitionPaths().size();
     partitionPaths = new ArrayList<>(profile.getPartitionPaths());
@@ -90,7 +90,7 @@ public SparkBucketIndexPartitioner(WorkloadProfile profile,
   private void assignUpdates(WorkloadProfile profile) {
     updatePartitionPathFileIds = new HashMap<>();
     // each update location gets a partition
-    Set<Entry<String, WorkloadStat>> partitionStatEntries = profile.getPartitionPathStatMap()
+    Set<Entry<String, WorkloadStat>> partitionStatEntries = profile.getInputPartitionPathStatMap()
         .entrySet();
     for (Entry<String, WorkloadStat> partitionStat : partitionStatEntries) {
       if (!updatePartitionPathFileIds.containsKey(partitionStat.getKey())) {
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBulkInsertCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBulkInsertCommitActionExecutor.java
index 2b00d47b01564..f4f1d3ad06ccf 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBulkInsertCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBulkInsertCommitActionExecutor.java
@@ -29,6 +29,7 @@
 import org.apache.hudi.table.BulkInsertPartitioner;
 import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.table.action.HoodieWriteMetadata;
+
 import org.apache.spark.api.java.JavaRDD;
 
 import java.util.Map;
@@ -36,17 +37,17 @@
 public class SparkBulkInsertCommitActionExecutor<T extends HoodieRecordPayload<T>> extends BaseSparkCommitActionExecutor<T> {
 
   private final JavaRDD<HoodieRecord<T>> inputRecordsRDD;
-  private final Option<BulkInsertPartitioner<T>> bulkInsertPartitioner;
+  private final Option<BulkInsertPartitioner<JavaRDD<HoodieRecord<T>>>> bulkInsertPartitioner;
 
   public SparkBulkInsertCommitActionExecutor(HoodieSparkEngineContext context, HoodieWriteConfig config, HoodieTable table,
                                              String instantTime, JavaRDD<HoodieRecord<T>> inputRecordsRDD,
-                                             Option<BulkInsertPartitioner<T>> bulkInsertPartitioner) {
+                                             Option<BulkInsertPartitioner<JavaRDD<HoodieRecord<T>>>> bulkInsertPartitioner) {
     this(context, config, table, instantTime, inputRecordsRDD, bulkInsertPartitioner, Option.empty());
   }
 
   public SparkBulkInsertCommitActionExecutor(HoodieSparkEngineContext context, HoodieWriteConfig config, HoodieTable table,
                                         String instantTime, JavaRDD<HoodieRecord<T>> inputRecordsRDD,
-                                        Option<BulkInsertPartitioner<T>> bulkInsertPartitioner,
+                                        Option<BulkInsertPartitioner<JavaRDD<HoodieRecord<T>>>> bulkInsertPartitioner,
                                         Option<Map<String, String>> extraMetadata) {
     super(context, config, table, instantTime, WriteOperationType.BULK_INSERT, extraMetadata);
     this.inputRecordsRDD = inputRecordsRDD;
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBulkInsertHelper.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBulkInsertHelper.java
index 4644d29e00fa1..d0c5ddef5e71d 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBulkInsertHelper.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBulkInsertHelper.java
@@ -41,12 +41,12 @@
 import java.util.stream.IntStream;
 
 /**
- * A spark implementation of {@link AbstractBulkInsertHelper}.
+ * A spark implementation of {@link BaseBulkInsertHelper}.
  *
  * @param <T>
  */
 @SuppressWarnings("checkstyle:LineLength")
-public class SparkBulkInsertHelper<T extends HoodieRecordPayload, R> extends AbstractBulkInsertHelper<T, JavaRDD<HoodieRecord<T>>,
+public class SparkBulkInsertHelper<T extends HoodieRecordPayload, R> extends BaseBulkInsertHelper<T, JavaRDD<HoodieRecord<T>>,
     JavaRDD<HoodieKey>, JavaRDD<WriteStatus>, R> {
 
   private SparkBulkInsertHelper() {
@@ -67,7 +67,7 @@ public HoodieWriteMetadata<JavaRDD<WriteStatus>> bulkInsert(final JavaRDD<Hoodie
                                                               final HoodieWriteConfig config,
                                                               final BaseCommitActionExecutor<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>, R> executor,
                                                               final boolean performDedupe,
-                                                              final Option<BulkInsertPartitioner<T>> userDefinedBulkInsertPartitioner) {
+                                                              final Option<BulkInsertPartitioner<JavaRDD<HoodieRecord<T>>>> userDefinedBulkInsertPartitioner) {
     HoodieWriteMetadata result = new HoodieWriteMetadata();
 
     //transition bulk_insert state to inflight
@@ -88,7 +88,7 @@ public JavaRDD<WriteStatus> bulkInsert(JavaRDD<HoodieRecord<T>> inputRecords,
                                          HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> table,
                                          HoodieWriteConfig config,
                                          boolean performDedupe,
-                                         Option<BulkInsertPartitioner<T>> userDefinedBulkInsertPartitioner,
+                                         Option<BulkInsertPartitioner<JavaRDD<HoodieRecord<T>>>> userDefinedBulkInsertPartitioner,
                                          boolean useWriterSchema,
                                          int parallelism,
                                          WriteHandleFactory writeHandleFactory) {
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBulkInsertPreppedCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBulkInsertPreppedCommitActionExecutor.java
index e6b6809498e29..28d8cb0b26422 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBulkInsertPreppedCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkBulkInsertPreppedCommitActionExecutor.java
@@ -26,22 +26,22 @@
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieInsertException;
-import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.table.BulkInsertPartitioner;
-
+import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.table.action.HoodieWriteMetadata;
+
 import org.apache.spark.api.java.JavaRDD;
 
 public class SparkBulkInsertPreppedCommitActionExecutor<T extends HoodieRecordPayload<T>>
     extends BaseSparkCommitActionExecutor<T> {
 
   private final JavaRDD<HoodieRecord<T>> preppedInputRecordRdd;
-  private final Option<BulkInsertPartitioner<T>> userDefinedBulkInsertPartitioner;
+  private final Option<BulkInsertPartitioner<JavaRDD<HoodieRecord<T>>>> userDefinedBulkInsertPartitioner;
 
   public SparkBulkInsertPreppedCommitActionExecutor(HoodieSparkEngineContext context,
                                                     HoodieWriteConfig config, HoodieTable table,
                                                     String instantTime, JavaRDD<HoodieRecord<T>> preppedInputRecordRdd,
-                                                    Option<BulkInsertPartitioner<T>> userDefinedBulkInsertPartitioner) {
+                                                    Option<BulkInsertPartitioner<JavaRDD<HoodieRecord<T>>>> userDefinedBulkInsertPartitioner) {
     super(context, config, table, instantTime, WriteOperationType.BULK_INSERT);
     this.preppedInputRecordRdd = preppedInputRecordRdd;
     this.userDefinedBulkInsertPartitioner = userDefinedBulkInsertPartitioner;
@@ -60,4 +60,4 @@ public HoodieWriteMetadata<JavaRDD<WriteStatus>> execute() {
     }
   }
 
-}
\ No newline at end of file
+}
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkDeleteHelper.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkDeleteHelper.java
index 5c3b4ca22f845..381c115533897 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkDeleteHelper.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkDeleteHelper.java
@@ -22,6 +22,7 @@
 import org.apache.hudi.client.common.HoodieSparkEngineContext;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.model.EmptyHoodieRecordPayload;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
@@ -42,13 +43,13 @@
 import java.util.HashMap;
 
 /**
- * A spark implementation of {@link AbstractDeleteHelper}.
+ * A spark implementation of {@link BaseDeleteHelper}.
  *
  * @param <T>
  */
 @SuppressWarnings("checkstyle:LineLength")
 public class SparkDeleteHelper<T extends HoodieRecordPayload,R> extends
-    AbstractDeleteHelper<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>, R> {
+    BaseDeleteHelper<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>, R> {
   private SparkDeleteHelper() {
   }
 
@@ -93,7 +94,7 @@ public HoodieWriteMetadata<JavaRDD<WriteStatus>> execute(String instantTime,
       }
 
       JavaRDD<HoodieRecord<T>> dedupedRecords =
-          dedupedKeys.map(key -> new HoodieRecord(key, new EmptyHoodieRecordPayload()));
+          dedupedKeys.map(key -> new HoodieAvroRecord(key, new EmptyHoodieRecordPayload()));
       Instant beginTag = Instant.now();
       // perform index loop up to get existing location of records
       JavaRDD<HoodieRecord<T>> taggedRecords = HoodieJavaRDD.getJavaRDD(
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertOverwritePartitioner.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertOverwritePartitioner.java
index 75dfbda30b7fb..dd545d5262846 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertOverwritePartitioner.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkInsertOverwritePartitioner.java
@@ -22,6 +22,7 @@
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.table.WorkloadProfile;
+
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
@@ -44,7 +45,7 @@ public SparkInsertOverwritePartitioner(WorkloadProfile profile, HoodieEngineCont
    * Returns a list of small files in the given partition path.
    */
   protected List<SmallFile> getSmallFiles(String partitionPath) {
-    // for overwrite, we ignore all existing files. So dont consider any file to be smallFiles
+    // for overwrite, we ignore all existing files. So do not consider any file to be smallFiles
     return Collections.emptyList();
   }
 }
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkMergeHelper.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkMergeHelper.java
index 5e82dbd8c566d..e87c3ef5ba77e 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkMergeHelper.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkMergeHelper.java
@@ -25,7 +25,6 @@
 import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.util.queue.BoundedInMemoryExecutor;
 import org.apache.hudi.exception.HoodieException;
-import org.apache.hudi.execution.SparkBoundedInMemoryExecutor;
 import org.apache.hudi.io.HoodieMergeHandle;
 import org.apache.hudi.io.storage.HoodieFileReader;
 import org.apache.hudi.io.storage.HoodieFileReaderFactory;
@@ -43,7 +42,7 @@
 import java.io.IOException;
 import java.util.Iterator;
 
-public class SparkMergeHelper<T extends HoodieRecordPayload> extends AbstractMergeHelper<T, JavaRDD<HoodieRecord<T>>,
+public class SparkMergeHelper<T extends HoodieRecordPayload> extends BaseMergeHelper<T, JavaRDD<HoodieRecord<T>>,
     JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> {
 
   private SparkMergeHelper() {
@@ -90,13 +89,13 @@ public void runMerge(HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>
 
       ThreadLocal<BinaryEncoder> encoderCache = new ThreadLocal<>();
       ThreadLocal<BinaryDecoder> decoderCache = new ThreadLocal<>();
-      wrapper = new SparkBoundedInMemoryExecutor(table.getConfig(), readerIterator,
+      wrapper = new BoundedInMemoryExecutor(table.getConfig().getWriteBufferLimitBytes(), readerIterator,
           new UpdateHandler(mergeHandle), record -> {
         if (!externalSchemaTransformation) {
           return record;
         }
         return transformRecordBasedOnNewSchema(gReader, gWriter, encoderCache, decoderCache, (GenericRecord) record);
-      });
+      }, table.getPreExecuteRunnable());
       wrapper.execute();
     } catch (Exception e) {
       throw new HoodieException(e);
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkWriteHelper.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkWriteHelper.java
index f4eff44a26f3a..23dceb1382f34 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkWriteHelper.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/SparkWriteHelper.java
@@ -20,6 +20,7 @@
 
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.common.engine.HoodieEngineContext;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
@@ -32,11 +33,11 @@
 import scala.Tuple2;
 
 /**
- * A spark implementation of {@link AbstractWriteHelper}.
+ * A spark implementation of {@link BaseWriteHelper}.
  *
  * @param <T>
  */
-public class SparkWriteHelper<T extends HoodieRecordPayload,R> extends AbstractWriteHelper<T, JavaRDD<HoodieRecord<T>>,
+public class SparkWriteHelper<T extends HoodieRecordPayload,R> extends BaseWriteHelper<T, JavaRDD<HoodieRecord<T>>,
     JavaRDD<HoodieKey>, JavaRDD<WriteStatus>, R> {
   private SparkWriteHelper() {
   }
@@ -58,7 +59,7 @@ protected JavaRDD<HoodieRecord<T>> tag(JavaRDD<HoodieRecord<T>> dedupedRecords,
 
   @Override
   public JavaRDD<HoodieRecord<T>> deduplicateRecords(
-      JavaRDD<HoodieRecord<T>> records, HoodieIndex<T, ?, ?, ?> index, int parallelism) {
+      JavaRDD<HoodieRecord<T>> records, HoodieIndex<?, ?> index, int parallelism) {
     boolean isIndexingGlobal = index.isGlobal();
     return records.mapToPair(record -> {
       HoodieKey hoodieKey = record.getKey();
@@ -70,7 +71,7 @@ public JavaRDD<HoodieRecord<T>> deduplicateRecords(
       T reducedData = (T) rec2.getData().preCombine(rec1.getData());
       HoodieKey reducedKey = rec1.getData().equals(reducedData) ? rec1.getKey() : rec2.getKey();
 
-      return new HoodieRecord<T>(reducedKey, reducedData);
+      return new HoodieAvroRecord<T>(reducedKey, reducedData);
     }, parallelism).map(Tuple2::_2);
   }
 
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/UpsertPartitioner.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/UpsertPartitioner.java
index 6729da72d65eb..c54c526253f0b 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/UpsertPartitioner.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/commit/UpsertPartitioner.java
@@ -26,6 +26,7 @@
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecordLocation;
 import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.common.model.HoodieWriteStat;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.util.NumericUtils;
@@ -100,11 +101,19 @@ public UpsertPartitioner(WorkloadProfile profile, HoodieEngineContext context, H
 
   private void assignUpdates(WorkloadProfile profile) {
     // each update location gets a partition
-    Set<Entry<String, WorkloadStat>> partitionStatEntries = profile.getPartitionPathStatMap().entrySet();
+    Set<Entry<String, WorkloadStat>> partitionStatEntries = profile.getInputPartitionPathStatMap().entrySet();
     for (Map.Entry<String, WorkloadStat> partitionStat : partitionStatEntries) {
+      WorkloadStat outputWorkloadStats = profile.getOutputPartitionPathStatMap().getOrDefault(partitionStat.getKey(), new WorkloadStat());
       for (Map.Entry<String, Pair<String, Long>> updateLocEntry :
           partitionStat.getValue().getUpdateLocationToCount().entrySet()) {
         addUpdateBucket(partitionStat.getKey(), updateLocEntry.getKey());
+        if (profile.hasOutputWorkLoadStats()) {
+          HoodieRecordLocation hoodieRecordLocation = new HoodieRecordLocation(updateLocEntry.getValue().getKey(), updateLocEntry.getKey());
+          outputWorkloadStats.addUpdates(hoodieRecordLocation, updateLocEntry.getValue().getValue());
+        }
+      }
+      if (profile.hasOutputWorkLoadStats()) {
+        profile.updateOutputPartitionPathStatMap(partitionStat.getKey(), outputWorkloadStats);
       }
     }
   }
@@ -161,11 +170,12 @@ private void assignInserts(WorkloadProfile profile, HoodieEngineContext context)
 
     for (String partitionPath : partitionPaths) {
       WorkloadStat pStat = profile.getWorkloadStat(partitionPath);
+      WorkloadStat outputWorkloadStats = profile.getOutputPartitionPathStatMap().getOrDefault(partitionPath, new WorkloadStat());
       if (pStat.getNumInserts() > 0) {
 
         List<SmallFile> smallFiles =
             filterSmallFilesInClustering(partitionPathToPendingClusteringFileGroupsId.getOrDefault(partitionPath, Collections.emptySet()),
-                partitionSmallFilesMap.get(partitionPath));
+                partitionSmallFilesMap.getOrDefault(partitionPath, new ArrayList<>()));
 
         this.smallFiles.addAll(smallFiles);
 
@@ -189,6 +199,9 @@ private void assignInserts(WorkloadProfile profile, HoodieEngineContext context)
               bucket = addUpdateBucket(partitionPath, smallFile.location.getFileId());
               LOG.info("Assigning " + recordsToAppend + " inserts to new update bucket " + bucket);
             }
+            if (profile.hasOutputWorkLoadStats()) {
+              outputWorkloadStats.addInserts(smallFile.location, recordsToAppend);
+            }
             bucketNumbers.add(bucket);
             recordsPerBucket.add(recordsToAppend);
             totalUnassignedInserts -= recordsToAppend;
@@ -218,6 +231,9 @@ private void assignInserts(WorkloadProfile profile, HoodieEngineContext context)
             }
             BucketInfo bucketInfo = new BucketInfo(BucketType.INSERT, FSUtils.createNewFileIdPfx(), partitionPath);
             bucketInfoMap.put(totalBuckets, bucketInfo);
+            if (profile.hasOutputWorkLoadStats()) {
+              outputWorkloadStats.addInserts(new HoodieRecordLocation(HoodieWriteStat.NULL_COMMIT, bucketInfo.getFileIdPrefix()), recordsPerBucket.get(recordsPerBucket.size() - 1));
+            }
             totalBuckets++;
           }
         }
@@ -235,12 +251,20 @@ private void assignInserts(WorkloadProfile profile, HoodieEngineContext context)
         LOG.info("Total insert buckets for partition path " + partitionPath + " => " + insertBuckets);
         partitionPathToInsertBucketInfos.put(partitionPath, insertBuckets);
       }
+      if (profile.hasOutputWorkLoadStats()) {
+        profile.updateOutputPartitionPathStatMap(partitionPath, outputWorkloadStats);
+      }
     }
   }
 
   private Map<String, List<SmallFile>> getSmallFilesForPartitions(List<String> partitionPaths, HoodieEngineContext context) {
     JavaSparkContext jsc = HoodieSparkEngineContext.getSparkContext(context);
     Map<String, List<SmallFile>> partitionSmallFilesMap = new HashMap<>();
+
+    if (config.getParquetSmallFileLimit() <= 0) {
+      return partitionSmallFilesMap;
+    }
+
     if (partitionPaths != null && partitionPaths.size() > 0) {
       context.setJobStatus(this.getClass().getSimpleName(), "Getting small files from partitions");
       JavaRDD<String> partitionPathRdds = jsc.parallelize(partitionPaths, partitionPaths.size());
@@ -297,6 +321,11 @@ public int numPartitions() {
     return totalBuckets;
   }
 
+  @Override
+  public int getNumPartitions() {
+    return totalBuckets;
+  }
+
   @Override
   public int getPartition(Object key) {
     Tuple2<HoodieKey, Option<HoodieRecordLocation>> keyLocation =
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/AbstractSparkDeltaCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/BaseSparkDeltaCommitActionExecutor.java
similarity index 90%
rename from hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/AbstractSparkDeltaCommitActionExecutor.java
rename to hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/BaseSparkDeltaCommitActionExecutor.java
index 3b3edd3084572..222506e7bbb36 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/AbstractSparkDeltaCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/BaseSparkDeltaCommitActionExecutor.java
@@ -43,19 +43,19 @@
 import java.util.Iterator;
 import java.util.List;
 
-public abstract class AbstractSparkDeltaCommitActionExecutor<T extends HoodieRecordPayload<T>>
+public abstract class BaseSparkDeltaCommitActionExecutor<T extends HoodieRecordPayload<T>>
     extends BaseSparkCommitActionExecutor<T> {
-  private static final Logger LOG = LogManager.getLogger(AbstractSparkDeltaCommitActionExecutor.class);
+  private static final Logger LOG = LogManager.getLogger(BaseSparkDeltaCommitActionExecutor.class);
 
   // UpsertPartitioner for MergeOnRead table type
   private SparkUpsertDeltaCommitPartitioner mergeOnReadUpsertPartitioner;
 
-  public AbstractSparkDeltaCommitActionExecutor(HoodieSparkEngineContext context, HoodieWriteConfig config, HoodieTable table,
+  public BaseSparkDeltaCommitActionExecutor(HoodieSparkEngineContext context, HoodieWriteConfig config, HoodieTable table,
                                                 String instantTime, WriteOperationType operationType) {
     this(context, config, table, instantTime, operationType, Option.empty());
   }
 
-  public AbstractSparkDeltaCommitActionExecutor(HoodieSparkEngineContext context, HoodieWriteConfig config, HoodieTable table,
+  public BaseSparkDeltaCommitActionExecutor(HoodieSparkEngineContext context, HoodieWriteConfig config, HoodieTable table,
                                                 String instantTime, WriteOperationType operationType,
                                                 Option<Map<String, String>> extraMetadata) {
     super(context, config, table, instantTime, operationType, extraMetadata);
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkBulkInsertDeltaCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkBulkInsertDeltaCommitActionExecutor.java
index 281304d957620..6f23e41773bbd 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkBulkInsertDeltaCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkBulkInsertDeltaCommitActionExecutor.java
@@ -18,8 +18,6 @@
 
 package org.apache.hudi.table.action.deltacommit;
 
-import java.util.Map;
-
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.client.common.HoodieSparkEngineContext;
 import org.apache.hudi.common.model.HoodieRecord;
@@ -28,28 +26,30 @@
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieInsertException;
-import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.table.BulkInsertPartitioner;
-
+import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.table.action.HoodieWriteMetadata;
 import org.apache.hudi.table.action.commit.SparkBulkInsertHelper;
+
 import org.apache.spark.api.java.JavaRDD;
 
+import java.util.Map;
+
 public class SparkBulkInsertDeltaCommitActionExecutor<T extends HoodieRecordPayload<T>>
-    extends AbstractSparkDeltaCommitActionExecutor<T> {
+    extends BaseSparkDeltaCommitActionExecutor<T> {
 
   private final JavaRDD<HoodieRecord<T>> inputRecordsRDD;
-  private final Option<BulkInsertPartitioner<T>> bulkInsertPartitioner;
+  private final Option<BulkInsertPartitioner<JavaRDD<HoodieRecord<T>>>> bulkInsertPartitioner;
 
   public SparkBulkInsertDeltaCommitActionExecutor(HoodieSparkEngineContext context, HoodieWriteConfig config, HoodieTable table,
                                                   String instantTime, JavaRDD<HoodieRecord<T>> inputRecordsRDD,
-                                                  Option<BulkInsertPartitioner<T>> bulkInsertPartitioner)  {
+                                                  Option<BulkInsertPartitioner<JavaRDD<HoodieRecord<T>>>> bulkInsertPartitioner)  {
     this(context, config, table, instantTime, inputRecordsRDD, bulkInsertPartitioner, Option.empty());
   }
 
   public SparkBulkInsertDeltaCommitActionExecutor(HoodieSparkEngineContext context, HoodieWriteConfig config, HoodieTable table,
                                                   String instantTime, JavaRDD<HoodieRecord<T>> inputRecordsRDD,
-                                                  Option<BulkInsertPartitioner<T>> bulkInsertPartitioner,
+                                                  Option<BulkInsertPartitioner<JavaRDD<HoodieRecord<T>>>> bulkInsertPartitioner,
                                                   Option<Map<String, String>> extraMetadata) {
     super(context, config, table, instantTime, WriteOperationType.BULK_INSERT, extraMetadata);
     this.inputRecordsRDD = inputRecordsRDD;
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkBulkInsertPreppedDeltaCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkBulkInsertPreppedDeltaCommitActionExecutor.java
index 21fc013af69c9..be5b903c7642d 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkBulkInsertPreppedDeltaCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkBulkInsertPreppedDeltaCommitActionExecutor.java
@@ -26,23 +26,23 @@
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieInsertException;
-import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.table.BulkInsertPartitioner;
-
+import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.table.action.HoodieWriteMetadata;
 import org.apache.hudi.table.action.commit.SparkBulkInsertHelper;
+
 import org.apache.spark.api.java.JavaRDD;
 
 public class SparkBulkInsertPreppedDeltaCommitActionExecutor<T extends HoodieRecordPayload<T>>
-    extends AbstractSparkDeltaCommitActionExecutor<T> {
+    extends BaseSparkDeltaCommitActionExecutor<T> {
 
   private final JavaRDD<HoodieRecord<T>> preppedInputRecordRdd;
-  private final Option<BulkInsertPartitioner<T>> bulkInsertPartitioner;
+  private final Option<BulkInsertPartitioner<JavaRDD<HoodieRecord<T>>>> bulkInsertPartitioner;
 
   public SparkBulkInsertPreppedDeltaCommitActionExecutor(HoodieSparkEngineContext context,
                                                          HoodieWriteConfig config, HoodieTable table,
                                                          String instantTime, JavaRDD<HoodieRecord<T>> preppedInputRecordRdd,
-                                                         Option<BulkInsertPartitioner<T>> bulkInsertPartitioner) {
+                                                         Option<BulkInsertPartitioner<JavaRDD<HoodieRecord<T>>>> bulkInsertPartitioner) {
     super(context, config, table, instantTime, WriteOperationType.BULK_INSERT);
     this.preppedInputRecordRdd = preppedInputRecordRdd;
     this.bulkInsertPartitioner = bulkInsertPartitioner;
@@ -61,4 +61,4 @@ public HoodieWriteMetadata<JavaRDD<WriteStatus>> execute() {
     }
   }
 
-}
\ No newline at end of file
+}
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkDeleteDeltaCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkDeleteDeltaCommitActionExecutor.java
index 4fb6a90f90a41..7cff563571459 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkDeleteDeltaCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkDeleteDeltaCommitActionExecutor.java
@@ -25,13 +25,13 @@
 import org.apache.hudi.common.model.WriteOperationType;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.table.HoodieTable;
-
 import org.apache.hudi.table.action.HoodieWriteMetadata;
 import org.apache.hudi.table.action.commit.SparkDeleteHelper;
+
 import org.apache.spark.api.java.JavaRDD;
 
 public class SparkDeleteDeltaCommitActionExecutor<T extends HoodieRecordPayload<T>>
-    extends AbstractSparkDeltaCommitActionExecutor<T> {
+    extends BaseSparkDeltaCommitActionExecutor<T> {
 
   private final JavaRDD<HoodieKey> keys;
 
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkInsertDeltaCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkInsertDeltaCommitActionExecutor.java
index 7dd91710d66e9..7e38823fc8838 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkInsertDeltaCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkInsertDeltaCommitActionExecutor.java
@@ -25,13 +25,13 @@
 import org.apache.hudi.common.model.WriteOperationType;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.table.HoodieTable;
-
 import org.apache.hudi.table.action.HoodieWriteMetadata;
 import org.apache.hudi.table.action.commit.SparkWriteHelper;
+
 import org.apache.spark.api.java.JavaRDD;
 
 public class SparkInsertDeltaCommitActionExecutor<T extends HoodieRecordPayload<T>>
-    extends AbstractSparkDeltaCommitActionExecutor<T> {
+    extends BaseSparkDeltaCommitActionExecutor<T> {
 
   private final JavaRDD<HoodieRecord<T>> inputRecordsRDD;
 
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkInsertPreppedDeltaCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkInsertPreppedDeltaCommitActionExecutor.java
index 1f1e0165b494a..e401d9555e434 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkInsertPreppedDeltaCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkInsertPreppedDeltaCommitActionExecutor.java
@@ -26,10 +26,11 @@
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.table.action.HoodieWriteMetadata;
+
 import org.apache.spark.api.java.JavaRDD;
 
 public class SparkInsertPreppedDeltaCommitActionExecutor<T extends HoodieRecordPayload<T>>
-    extends AbstractSparkDeltaCommitActionExecutor<T> {
+    extends BaseSparkDeltaCommitActionExecutor<T> {
 
   private final JavaRDD<HoodieRecord<T>> preppedRecords;
 
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkUpsertDeltaCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkUpsertDeltaCommitActionExecutor.java
index c6f3901a352b8..c63be6289004d 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkUpsertDeltaCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkUpsertDeltaCommitActionExecutor.java
@@ -24,13 +24,13 @@
 import org.apache.hudi.common.model.WriteOperationType;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.table.HoodieTable;
-
 import org.apache.hudi.table.action.HoodieWriteMetadata;
 import org.apache.hudi.table.action.commit.SparkWriteHelper;
+
 import org.apache.spark.api.java.JavaRDD;
 
 public class SparkUpsertDeltaCommitActionExecutor<T extends HoodieRecordPayload<T>>
-    extends AbstractSparkDeltaCommitActionExecutor<T> {
+    extends BaseSparkDeltaCommitActionExecutor<T> {
 
   private JavaRDD<HoodieRecord<T>> inputRecordsRDD;
 
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkUpsertDeltaCommitPartitioner.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkUpsertDeltaCommitPartitioner.java
index 8dd3146f5161d..e498019c415d8 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkUpsertDeltaCommitPartitioner.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkUpsertDeltaCommitPartitioner.java
@@ -97,6 +97,10 @@ private List<FileSlice> getSmallFileCandidates(String partitionPath, HoodieInsta
               .collect(Collectors.toList());
     }
 
+    if (config.getParquetSmallFileLimit() <= 0) {
+      return Collections.emptyList();
+    }
+
     // If we cannot index log files, then we choose the smallest parquet file in the partition and add inserts to
     // it. Doing this overtime for a partition, we ensure that we handle small file issues
     return table.getSliceView()
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkUpsertPreppedDeltaCommitActionExecutor.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkUpsertPreppedDeltaCommitActionExecutor.java
index 3509efa6bfa9f..f593fea779029 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkUpsertPreppedDeltaCommitActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/deltacommit/SparkUpsertPreppedDeltaCommitActionExecutor.java
@@ -26,10 +26,11 @@
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.table.action.HoodieWriteMetadata;
+
 import org.apache.spark.api.java.JavaRDD;
 
 public class SparkUpsertPreppedDeltaCommitActionExecutor<T extends HoodieRecordPayload<T>>
-    extends AbstractSparkDeltaCommitActionExecutor<T> {
+    extends BaseSparkDeltaCommitActionExecutor<T> {
 
   private final JavaRDD<HoodieRecord<T>> preppedRecords;
 
diff --git a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/upgrade/SparkUpgradeDowngradeHelper.java b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/upgrade/SparkUpgradeDowngradeHelper.java
index f943b701757ed..1a911d5b42bba 100644
--- a/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/upgrade/SparkUpgradeDowngradeHelper.java
+++ b/hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/upgrade/SparkUpgradeDowngradeHelper.java
@@ -28,7 +28,7 @@
 /**
  * Spark upgrade and downgrade helper.
  */
-public class SparkUpgradeDowngradeHelper implements BaseUpgradeDowngradeHelper {
+public class SparkUpgradeDowngradeHelper implements SupportsUpgradeDowngrade {
 
   private static final SparkUpgradeDowngradeHelper SINGLETON_INSTANCE =
       new SparkUpgradeDowngradeHelper();
diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionHelper.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionHelper.scala
deleted file mode 100644
index f968cbe1c77bd..0000000000000
--- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionHelper.scala
+++ /dev/null
@@ -1,380 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hudi
-
-import java.nio.ByteBuffer
-import java.sql.{Date, Timestamp}
-import java.time.Instant
-
-import org.apache.avro.Conversions.DecimalConversion
-import org.apache.avro.LogicalTypes.{TimestampMicros, TimestampMillis}
-import org.apache.avro.Schema.Type._
-import org.apache.avro.generic.GenericData.{Fixed, Record}
-import org.apache.avro.generic.{GenericData, GenericFixed, GenericRecord}
-import org.apache.avro.{LogicalTypes, Schema}
-
-import org.apache.spark.sql.Row
-import org.apache.spark.sql.avro.SchemaConverters
-import org.apache.spark.sql.catalyst.expressions.GenericRow
-import org.apache.spark.sql.catalyst.util.DateTimeUtils
-import org.apache.spark.sql.types._
-
-import org.apache.hudi.AvroConversionUtils._
-import org.apache.hudi.exception.HoodieIncompatibleSchemaException
-
-import scala.collection.JavaConverters._
-
-object AvroConversionHelper {
-
-  private def createDecimal(decimal: java.math.BigDecimal, precision: Int, scale: Int): Decimal = {
-    if (precision <= Decimal.MAX_LONG_DIGITS) {
-      // Constructs a `Decimal` with an unscaled `Long` value if possible.
-      Decimal(decimal.unscaledValue().longValue(), precision, scale)
-    } else {
-      // Otherwise, resorts to an unscaled `BigInteger` instead.
-      Decimal(decimal, precision, scale)
-    }
-  }
-
-  /**
-    *
-    * Returns a converter function to convert row in avro format to GenericRow of catalyst.
-    *
-    * @param sourceAvroSchema Source schema before conversion inferred from avro file by passed in
-    *                         by user.
-    * @param targetSqlType    Target catalyst sql type after the conversion.
-    * @return returns a converter function to convert row in avro format to GenericRow of catalyst.
-    */
-  def createConverterToRow(sourceAvroSchema: Schema,
-                           targetSqlType: DataType): AnyRef => AnyRef = {
-
-    def createConverter(avroSchema: Schema, sqlType: DataType, path: List[String]): AnyRef => AnyRef = {
-      val avroType = avroSchema.getType
-      (sqlType, avroType) match {
-        // Avro strings are in Utf8, so we have to call toString on them
-        case (StringType, STRING) | (StringType, ENUM) =>
-          (item: AnyRef) => if (item == null) null else item.toString
-        // Byte arrays are reused by avro, so we have to make a copy of them.
-        case (IntegerType, INT) | (BooleanType, BOOLEAN) | (DoubleType, DOUBLE) |
-             (FloatType, FLOAT) | (LongType, LONG) =>
-          identity
-        case (BinaryType, FIXED) =>
-          (item: AnyRef) =>
-            if (item == null) {
-              null
-            } else {
-              item.asInstanceOf[Fixed].bytes().clone()
-            }
-        case (BinaryType, BYTES) =>
-          (item: AnyRef) =>
-            if (item == null) {
-              null
-            } else {
-              val byteBuffer = item.asInstanceOf[ByteBuffer]
-              val bytes = new Array[Byte](byteBuffer.remaining)
-              byteBuffer.get(bytes)
-              bytes
-            }
-        case (d: DecimalType, FIXED) =>
-          (item: AnyRef) =>
-            if (item == null) {
-              null
-            } else {
-              val decimalConversion = new DecimalConversion
-              val bigDecimal = decimalConversion.fromFixed(item.asInstanceOf[GenericFixed], avroSchema,
-                LogicalTypes.decimal(d.precision, d.scale))
-              createDecimal(bigDecimal, d.precision, d.scale)
-            }
-        case (d: DecimalType, BYTES) =>
-          (item: AnyRef) =>
-            if (item == null) {
-              null
-            } else {
-              val decimalConversion = new DecimalConversion
-              val bigDecimal = decimalConversion.fromBytes(item.asInstanceOf[ByteBuffer], avroSchema,
-                LogicalTypes.decimal(d.precision, d.scale))
-              createDecimal(bigDecimal, d.precision, d.scale)
-            }
-        case (DateType, INT) =>
-          (item: AnyRef) =>
-            if (item == null) {
-              null
-            } else {
-              item match {
-                case integer: Integer => DateTimeUtils.toJavaDate(integer)
-                case _ => new Date(item.asInstanceOf[Long])
-              }
-            }
-        case (TimestampType, LONG) =>
-          (item: AnyRef) =>
-            if (item == null) {
-              null
-            } else {
-              avroSchema.getLogicalType match {
-                case _: TimestampMillis =>
-                  new Timestamp(item.asInstanceOf[Long])
-                case _: TimestampMicros =>
-                  new Timestamp(item.asInstanceOf[Long] / 1000)
-                case null =>
-                  new Timestamp(item.asInstanceOf[Long])
-                case other =>
-                  throw new HoodieIncompatibleSchemaException(
-                    s"Cannot convert Avro logical type $other to Catalyst Timestamp type.")
-              }
-            }
-        case (struct: StructType, RECORD) =>
-          val length = struct.fields.length
-          val converters = new Array[AnyRef => AnyRef](length)
-          val avroFieldIndexes = new Array[Int](length)
-          var i = 0
-          while (i < length) {
-            val sqlField = struct.fields(i)
-            val avroField = avroSchema.getField(sqlField.name)
-            if (avroField != null) {
-              val converter = createConverter(avroField.schema(), sqlField.dataType,
-                path :+ sqlField.name)
-              converters(i) = converter
-              avroFieldIndexes(i) = avroField.pos()
-            } else if (!sqlField.nullable) {
-              throw new HoodieIncompatibleSchemaException(
-                s"Cannot find non-nullable field ${sqlField.name} at path ${path.mkString(".")} " +
-                  "in Avro schema\n" +
-                  s"Source Avro schema: $sourceAvroSchema.\n" +
-                  s"Target Catalyst type: $targetSqlType")
-            }
-            i += 1
-          }
-
-          (item: AnyRef) => {
-            if (item == null) {
-              null
-            } else {
-              val record = item.asInstanceOf[GenericRecord]
-
-              val result = new Array[Any](length)
-              var i = 0
-              while (i < converters.length) {
-                if (converters(i) != null) {
-                  val converter = converters(i)
-                  result(i) = converter(record.get(avroFieldIndexes(i)))
-                }
-                i += 1
-              }
-              new GenericRow(result)
-            }
-          }
-        case (arrayType: ArrayType, ARRAY) =>
-          val elementConverter = createConverter(avroSchema.getElementType, arrayType.elementType,
-            path)
-          val allowsNull = arrayType.containsNull
-          (item: AnyRef) => {
-            if (item == null) {
-              null
-            } else {
-              item.asInstanceOf[java.lang.Iterable[AnyRef]].asScala.map { element =>
-                if (element == null && !allowsNull) {
-                  throw new RuntimeException(s"Array value at path ${path.mkString(".")} is not " +
-                    "allowed to be null")
-                } else {
-                  elementConverter(element)
-                }
-              }
-            }
-          }
-        case (mapType: MapType, MAP) if mapType.keyType == StringType =>
-          val valueConverter = createConverter(avroSchema.getValueType, mapType.valueType, path)
-          val allowsNull = mapType.valueContainsNull
-          (item: AnyRef) => {
-            if (item == null) {
-              null
-            } else {
-              item.asInstanceOf[java.util.Map[AnyRef, AnyRef]].asScala.map { x =>
-                if (x._2 == null && !allowsNull) {
-                  throw new RuntimeException(s"Map value at path ${path.mkString(".")} is not " +
-                    "allowed to be null")
-                } else {
-                  (x._1.toString, valueConverter(x._2))
-                }
-              }.toMap
-            }
-          }
-        case (sqlType, UNION) =>
-          if (avroSchema.getTypes.asScala.exists(_.getType == NULL)) {
-            val remainingUnionTypes = avroSchema.getTypes.asScala.filterNot(_.getType == NULL)
-            if (remainingUnionTypes.size == 1) {
-              createConverter(remainingUnionTypes.head, sqlType, path)
-            } else {
-              createConverter(Schema.createUnion(remainingUnionTypes.asJava), sqlType, path)
-            }
-          } else avroSchema.getTypes.asScala.map(_.getType) match {
-            case Seq(_) => createConverter(avroSchema.getTypes.get(0), sqlType, path)
-            case Seq(a, b) if Set(a, b) == Set(INT, LONG) && sqlType == LongType =>
-              (item: AnyRef) => {
-                item match {
-                  case null => null
-                  case l: java.lang.Long => l
-                  case i: java.lang.Integer => new java.lang.Long(i.longValue())
-                }
-              }
-            case Seq(a, b) if Set(a, b) == Set(FLOAT, DOUBLE) && sqlType == DoubleType =>
-              (item: AnyRef) => {
-                item match {
-                  case null => null
-                  case d: java.lang.Double => d
-                  case f: java.lang.Float => new java.lang.Double(f.doubleValue())
-                }
-              }
-            case other =>
-              sqlType match {
-                case t: StructType if t.fields.length == avroSchema.getTypes.size =>
-                  val fieldConverters = t.fields.zip(avroSchema.getTypes.asScala).map {
-                    case (field, schema) =>
-                      createConverter(schema, field.dataType, path :+ field.name)
-                  }
-
-                  (item: AnyRef) =>
-                    if (item == null) {
-                      null
-                    } else {
-                      val i = GenericData.get().resolveUnion(avroSchema, item)
-                      val converted = new Array[Any](fieldConverters.length)
-                      converted(i) = fieldConverters(i)(item)
-                      new GenericRow(converted)
-                    }
-                case _ => throw new HoodieIncompatibleSchemaException(
-                  s"Cannot convert Avro schema to catalyst type because schema at path " +
-                    s"${path.mkString(".")} is not compatible " +
-                    s"(avroType = $other, sqlType = $sqlType). \n" +
-                    s"Source Avro schema: $sourceAvroSchema.\n" +
-                    s"Target Catalyst type: $targetSqlType")
-              }
-          }
-        case (left, right) =>
-          throw new HoodieIncompatibleSchemaException(
-            s"Cannot convert Avro schema to catalyst type because schema at path " +
-              s"${path.mkString(".")} is not compatible (avroType = $left, sqlType = $right). \n" +
-              s"Source Avro schema: $sourceAvroSchema.\n" +
-              s"Target Catalyst type: $targetSqlType")
-      }
-    }
-
-    createConverter(sourceAvroSchema, targetSqlType, List.empty[String])
-  }
-
-  def createConverterToAvro(dataType: DataType,
-                            structName: String,
-                            recordNamespace: String): Any => Any = {
-    dataType match {
-      case BinaryType => (item: Any) =>
-        item match {
-          case null => null
-          case bytes: Array[Byte] => ByteBuffer.wrap(bytes)
-        }
-      case IntegerType | LongType |
-           FloatType | DoubleType | StringType | BooleanType => identity
-      case ByteType => (item: Any) =>
-        if (item == null) null else item.asInstanceOf[Byte].intValue
-      case ShortType => (item: Any) =>
-        if (item == null) null else item.asInstanceOf[Short].intValue
-      case dec: DecimalType =>
-        val schema = SchemaConverters.toAvroType(dec, nullable = false, structName, recordNamespace)
-        (item: Any) => {
-          Option(item).map { _ =>
-            val bigDecimalValue = item.asInstanceOf[java.math.BigDecimal]
-            val decimalConversions = new DecimalConversion()
-            decimalConversions.toFixed(bigDecimalValue, schema, LogicalTypes.decimal(dec.precision, dec.scale))
-          }.orNull
-        }
-      case TimestampType => (item: Any) =>
-        if (item == null) {
-          null
-        } else {
-          val timestamp = item match {
-            case i: Instant => Timestamp.from(i)
-            case t: Timestamp => t
-          }
-          // Convert time to microseconds since spark-avro by default converts TimestampType to
-          // Avro Logical TimestampMicros
-          timestamp.getTime * 1000
-        }
-      case DateType => (item: Any) =>
-        Option(item).map(_.asInstanceOf[Date].toLocalDate.toEpochDay.toInt).orNull
-      case ArrayType(elementType, _) =>
-        val elementConverter = createConverterToAvro(
-          elementType,
-          structName,
-          recordNamespace)
-        (item: Any) => {
-          if (item == null) {
-            null
-          } else {
-            val sourceArray = item.asInstanceOf[Seq[Any]]
-            val sourceArraySize = sourceArray.size
-            val targetList = new java.util.ArrayList[Any](sourceArraySize)
-            var idx = 0
-            while (idx < sourceArraySize) {
-              targetList.add(elementConverter(sourceArray(idx)))
-              idx += 1
-            }
-            targetList
-          }
-        }
-      case MapType(StringType, valueType, _) =>
-        val valueConverter = createConverterToAvro(
-          valueType,
-          structName,
-          recordNamespace)
-        (item: Any) => {
-          if (item == null) {
-            null
-          } else {
-            val javaMap = new java.util.HashMap[String, Any]()
-            item.asInstanceOf[Map[String, Any]].foreach { case (key, value) =>
-              javaMap.put(key, valueConverter(value))
-            }
-            javaMap
-          }
-        }
-      case structType: StructType =>
-        val schema: Schema = convertStructTypeToAvroSchema(structType, structName, recordNamespace)
-        val childNameSpace = if (recordNamespace != "") s"$recordNamespace.$structName" else structName
-        val fieldConverters = structType.fields.map(field =>
-          createConverterToAvro(
-            field.dataType,
-            field.name,
-            childNameSpace))
-        (item: Any) => {
-          if (item == null) {
-            null
-          } else {
-            val record = new Record(schema)
-            val convertersIterator = fieldConverters.iterator
-            val fieldNamesIterator = dataType.asInstanceOf[StructType].fieldNames.iterator
-            val rowIterator = item.asInstanceOf[Row].toSeq.iterator
-
-            while (convertersIterator.hasNext && rowIterator.hasNext) {
-              val converter = convertersIterator.next()
-              record.put(fieldNamesIterator.next(), converter(rowIterator.next()))
-            }
-            record
-          }
-        }
-    }
-  }
-}
diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala
index 5b87fee14a1e2..62bcbf684b836 100644
--- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala
+++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala
@@ -17,21 +17,105 @@
  */
 
 package org.apache.hudi
-
-import org.apache.avro.Schema
-import org.apache.avro.JsonProperties
+import org.apache.avro.Schema.Type
 import org.apache.avro.generic.{GenericRecord, GenericRecordBuilder, IndexedRecord}
+import org.apache.avro.{AvroRuntimeException, JsonProperties, Schema}
+import org.apache.hudi.HoodieSparkUtils.sparkAdapter
 import org.apache.hudi.avro.HoodieAvroUtils
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.avro.SchemaConverters
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.encoders.RowEncoder
+import org.apache.spark.sql.types.{DataType, StructType}
 import org.apache.spark.sql.{Dataset, Row, SparkSession}
 
-import scala.collection.JavaConverters._
 import scala.collection.JavaConversions._
+import scala.collection.JavaConverters._
 
 object AvroConversionUtils {
 
+  /**
+   * Check the nullability of the input Avro type and resolve it when it is nullable. The first
+   * return value is a [[Boolean]] indicating if the input Avro type is nullable. The second
+   * return value is either provided Avro type if it's not nullable, or its resolved non-nullable part
+   * in case it is
+   */
+  def resolveAvroTypeNullability(avroType: Schema): (Boolean, Schema) = {
+    if (avroType.getType == Type.UNION) {
+      val fields = avroType.getTypes.asScala
+      val actualType = fields.filter(_.getType != Type.NULL)
+      if (fields.length != 2 || actualType.length != 1) {
+        throw new AvroRuntimeException(
+          s"Unsupported Avro UNION type $avroType: Only UNION of a null type and a non-null " +
+            "type is supported")
+      }
+      (true, actualType.head)
+    } else {
+      (false, avroType)
+    }
+  }
+
+  /**
+   * Creates converter to transform Avro payload into Spark's Catalyst one
+   *
+   * @param rootAvroType Avro [[Schema]] to be transformed from
+   * @param rootCatalystType Catalyst [[StructType]] to be transformed into
+   * @return converter accepting Avro payload and transforming it into a Catalyst one (in the form of [[InternalRow]])
+   */
+  def createAvroToInternalRowConverter(rootAvroType: Schema, rootCatalystType: StructType): GenericRecord => Option[InternalRow] =
+    record => sparkAdapter.createAvroDeserializer(rootAvroType, rootCatalystType)
+      .deserialize(record)
+      .map(_.asInstanceOf[InternalRow])
+
+  /**
+   * Creates converter to transform Catalyst payload into Avro one
+   *
+   * @param rootCatalystType Catalyst [[StructType]] to be transformed from
+   * @param rootAvroType Avro [[Schema]] to be transformed into
+   * @param nullable whether Avro record is nullable
+   * @return converter accepting Catalyst payload (in the form of [[InternalRow]]) and transforming it into an Avro one
+   */
+  def createInternalRowToAvroConverter(rootCatalystType: StructType, rootAvroType: Schema, nullable: Boolean): InternalRow => GenericRecord = {
+    row => sparkAdapter.createAvroSerializer(rootCatalystType, rootAvroType, nullable)
+      .serialize(row)
+      .asInstanceOf[GenericRecord]
+  }
+
+  /**
+   * @deprecated please use [[AvroConversionUtils.createAvroToInternalRowConverter]]
+   */
+  @Deprecated
+  def createConverterToRow(sourceAvroSchema: Schema,
+                           targetSqlType: StructType): GenericRecord => Row = {
+    val encoder = RowEncoder.apply(targetSqlType).resolveAndBind()
+    val serde = sparkAdapter.createSparkRowSerDe(encoder)
+    val converter = AvroConversionUtils.createAvroToInternalRowConverter(sourceAvroSchema, targetSqlType)
+
+    avro => converter.apply(avro).map(serde.deserializeRow).get
+  }
+
+  /**
+   * @deprecated please use [[AvroConversionUtils.createInternalRowToAvroConverter]]
+   */
+  @Deprecated
+  def createConverterToAvro(sourceSqlType: StructType,
+                            structName: String,
+                            recordNamespace: String): Row => GenericRecord = {
+    val encoder = RowEncoder.apply(sourceSqlType).resolveAndBind()
+    val serde = sparkAdapter.createSparkRowSerDe(encoder)
+    val avroSchema = AvroConversionUtils.convertStructTypeToAvroSchema(sourceSqlType, structName, recordNamespace)
+    val (nullable, _) = resolveAvroTypeNullability(avroSchema)
+
+    val converter = AvroConversionUtils.createInternalRowToAvroConverter(sourceSqlType, avroSchema, nullable)
+
+    row => converter.apply(serde.serializeRow(row))
+  }
+
+  /**
+   * Creates [[org.apache.spark.sql.DataFrame]] from the provided [[RDD]] of [[GenericRecord]]s
+   *
+   * TODO convert directly from GenericRecord into InternalRow instead
+   */
   def createDataFrame(rdd: RDD[GenericRecord], schemaStr: String, ss: SparkSession): Dataset[Row] = {
     if (rdd.isEmpty()) {
       ss.emptyDataFrame
@@ -41,8 +125,8 @@ object AvroConversionUtils {
         else {
           val schema = new Schema.Parser().parse(schemaStr)
           val dataType = convertAvroSchemaToStructType(schema)
-          val convertor = AvroConversionHelper.createConverterToRow(schema, dataType)
-          records.map { x => convertor(x).asInstanceOf[Row] }
+          val converter = createConverterToRow(schema, dataType)
+          records.map { r => converter(r) }
         }
       }, convertAvroSchemaToStructType(new Schema.Parser().parse(schemaStr)))
     }
@@ -57,7 +141,7 @@ object AvroConversionUtils {
     * @param recordNamespace  Avro record namespace.
     * @return                 Avro schema corresponding to given struct type.
     */
-  def convertStructTypeToAvroSchema(structType: StructType,
+  def convertStructTypeToAvroSchema(structType: DataType,
                                     structName: String,
                                     recordNamespace: String): Schema = {
     getAvroSchemaWithDefaults(SchemaConverters.toAvroType(structType, nullable = false, structName, recordNamespace))
@@ -117,7 +201,7 @@ object AvroConversionUtils {
 
   def buildAvroRecordBySchema(record: IndexedRecord,
                               requiredSchema: Schema,
-                              requiredPos: List[Int],
+                              requiredPos: Seq[Int],
                               recordBuilder: GenericRecordBuilder): GenericRecord = {
     val requiredFields = requiredSchema.getFields.asScala
     assert(requiredFields.length == requiredPos.length)
diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieSparkUtils.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieSparkUtils.scala
index 3e5402565c151..b288289ac82ec 100644
--- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieSparkUtils.scala
+++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieSparkUtils.scala
@@ -18,13 +18,10 @@
 
 package org.apache.hudi
 
-import java.util.Properties
-
 import org.apache.avro.Schema
 import org.apache.avro.generic.GenericRecord
-
 import org.apache.hadoop.fs.{FileSystem, Path}
-
+import org.apache.hudi.avro.HoodieAvroUtils.rewriteRecord
 import org.apache.hudi.client.utils.SparkRowSerDe
 import org.apache.hudi.common.config.TypedProperties
 import org.apache.hudi.common.model.HoodieRecord
@@ -32,18 +29,17 @@ import org.apache.hudi.common.table.HoodieTableMetaClient
 import org.apache.hudi.keygen.constant.KeyGeneratorOptions
 import org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory
 import org.apache.hudi.keygen.{BaseKeyGenerator, CustomAvroKeyGenerator, CustomKeyGenerator, KeyGenerator}
-
 import org.apache.spark.SPARK_VERSION
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, Literal}
 import org.apache.spark.sql.execution.datasources.{FileStatusCache, InMemoryFileIndex}
-import org.apache.spark.sql.functions._
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types.{StringType, StructField, StructType}
 import org.apache.spark.sql.{DataFrame, SparkSession}
 
-import scala.collection.JavaConverters.asScalaBufferConverter
+import java.util.Properties
+import scala.collection.JavaConverters._
 
 object HoodieSparkUtils extends SparkAdapterSupport {
 
@@ -53,8 +49,12 @@ object HoodieSparkUtils extends SparkAdapterSupport {
 
   def isSpark3_0: Boolean = SPARK_VERSION.startsWith("3.0")
 
+  def isSpark3_1: Boolean = SPARK_VERSION.startsWith("3.1")
+
   def isSpark3_2: Boolean = SPARK_VERSION.startsWith("3.2")
 
+  def gteqSpark3_2: Boolean = SPARK_VERSION > "3.2"
+
   def getMetaSchema: StructType = {
     StructType(HoodieRecord.HOODIE_META_COLUMNS.asScala.map(col => {
       StructField(col, StringType, nullable = true)
@@ -123,46 +123,49 @@ object HoodieSparkUtils extends SparkAdapterSupport {
     new InMemoryFileIndex(sparkSession, globbedPaths, Map(), Option.empty, fileStatusCache)
   }
 
-  def createRdd(df: DataFrame, structName: String, recordNamespace: String, reconcileToLatestSchema: Boolean, latestTableSchema:
-  org.apache.hudi.common.util.Option[Schema] = org.apache.hudi.common.util.Option.empty()): RDD[GenericRecord] = {
-    val dfWriteSchema = AvroConversionUtils.convertStructTypeToAvroSchema(df.schema, structName, recordNamespace)
-    var writeSchema : Schema = null;
-    var toReconcileSchema : Schema = null;
-    if (reconcileToLatestSchema && latestTableSchema.isPresent) {
-      // if reconcileToLatestSchema is set to true and latestSchema is present, then try to leverage latestTableSchema.
-      // this code path will handle situations where records are serialized in odl schema, but callers wish to convert
-      // to Rdd[GenericRecord] using different schema(could be evolved schema or could be latest table schema)
-      writeSchema = dfWriteSchema
-      toReconcileSchema = latestTableSchema.get()
-    } else {
-      // there are paths where callers wish to use latestTableSchema to convert to Rdd[GenericRecords] and not use
-      // row's schema. So use latestTableSchema if present. if not available, fallback to using row's schema.
-      writeSchema = if (latestTableSchema.isPresent) { latestTableSchema.get()} else { dfWriteSchema}
-    }
-    createRddInternal(df, writeSchema, toReconcileSchema, structName, recordNamespace)
+  /**
+   * @deprecated please use other overload [[createRdd]]
+   */
+  def createRdd(df: DataFrame, structName: String, recordNamespace: String, reconcileToLatestSchema: Boolean,
+                latestTableSchema: org.apache.hudi.common.util.Option[Schema] = org.apache.hudi.common.util.Option.empty()): RDD[GenericRecord] = {
+    val latestTableSchemaConverted = if (latestTableSchema.isPresent && reconcileToLatestSchema) Some(latestTableSchema.get()) else None
+    createRdd(df, structName, recordNamespace, latestTableSchemaConverted)
   }
 
-  def createRddInternal(df: DataFrame, writeSchema: Schema, latestTableSchema: Schema, structName: String, recordNamespace: String)
-  : RDD[GenericRecord] = {
-    // Use the write avro schema to derive the StructType which has the correct nullability information
-    val writeDataType = AvroConversionUtils.convertAvroSchemaToStructType(writeSchema)
-    val encoder = RowEncoder.apply(writeDataType).resolveAndBind()
-    val deserializer = sparkAdapter.createSparkRowSerDe(encoder)
-    // if records were serialized with old schema, but an evolved schema was passed in with latestTableSchema, we need
-    // latestTableSchema equivalent datatype to be passed in to AvroConversionHelper.createConverterToAvro()
-    val reconciledDataType =
-      if (latestTableSchema != null) AvroConversionUtils.convertAvroSchemaToStructType(latestTableSchema) else writeDataType
-    // Note: deserializer.deserializeRow(row) is not capable of handling evolved schema. i.e. if Row was serialized in
-    // old schema, but deserializer was created with an encoder with evolved schema, deserialization fails.
-    // Hence we always need to deserialize in the same schema as serialized schema.
-    df.queryExecution.toRdd.map(row => deserializer.deserializeRow(row))
-      .mapPartitions { records =>
-        if (records.isEmpty) Iterator.empty
-        else {
-          val convertor = AvroConversionHelper.createConverterToAvro(reconciledDataType, structName, recordNamespace)
-          records.map { x => convertor(x).asInstanceOf[GenericRecord] }
-        }
+  def createRdd(df: DataFrame, structName: String, recordNamespace: String, readerAvroSchemaOpt: Option[Schema]): RDD[GenericRecord] = {
+    val writerSchema = df.schema
+    val writerAvroSchema = AvroConversionUtils.convertStructTypeToAvroSchema(writerSchema, structName, recordNamespace)
+    val readerAvroSchema = readerAvroSchemaOpt.getOrElse(writerAvroSchema)
+    // We check whether passed in reader schema is identical to writer schema to avoid costly serde loop of
+    // making Spark deserialize its internal representation [[InternalRow]] into [[Row]] for subsequent conversion
+    // (and back)
+    val sameSchema = writerAvroSchema.equals(readerAvroSchema)
+    val (nullable, _) = AvroConversionUtils.resolveAvroTypeNullability(writerAvroSchema)
+
+    // NOTE: We have to serialize Avro schema, and then subsequently parse it on the executor node, since Spark
+    //       serializer is not able to digest it
+    val readerAvroSchemaStr = readerAvroSchema.toString
+    val writerAvroSchemaStr = writerAvroSchema.toString
+    // NOTE: We're accessing toRdd here directly to avoid [[InternalRow]] to [[Row]] conversion
+    df.queryExecution.toRdd.mapPartitions { rows =>
+      if (rows.isEmpty) {
+        Iterator.empty
+      } else {
+        val transform: GenericRecord => GenericRecord =
+          if (sameSchema) identity
+          else {
+            val readerAvroSchema = new Schema.Parser().parse(readerAvroSchemaStr)
+            rewriteRecord(_, readerAvroSchema)
+          }
+
+        // Since caller might request to get records in a different ("evolved") schema, we will be rewriting from
+        // existing Writer's schema into Reader's (avro) schema
+        val writerAvroSchema = new Schema.Parser().parse(writerAvroSchemaStr)
+        val convert = AvroConversionUtils.createInternalRowToAvroConverter(writerSchema, writerAvroSchema, nullable = nullable)
+
+        rows.map { ir => transform(convert(ir)) }
       }
+    }
   }
 
   def getDeserializer(structType: StructType) : SparkRowSerDe = {
@@ -293,4 +296,30 @@ object HoodieSparkUtils extends SparkAdapterSupport {
       s"${tableSchema.fieldNames.mkString(",")}")
     AttributeReference(columnName, field.get.dataType, field.get.nullable)()
   }
+
+  def getRequiredSchema(tableAvroSchema: Schema, requiredColumns: Array[String]): (Schema, StructType) = {
+    // First get the required avro-schema, then convert the avro-schema to spark schema.
+    val name2Fields = tableAvroSchema.getFields.asScala.map(f => f.name() -> f).toMap
+    // Here have to create a new Schema.Field object
+    // to prevent throwing exceptions like "org.apache.avro.AvroRuntimeException: Field already used".
+    val requiredFields = requiredColumns.map(c => name2Fields(c))
+      .map(f => new Schema.Field(f.name(), f.schema(), f.doc(), f.defaultVal(), f.order())).toList
+    val requiredAvroSchema = Schema.createRecord(tableAvroSchema.getName, tableAvroSchema.getDoc,
+      tableAvroSchema.getNamespace, tableAvroSchema.isError, requiredFields.asJava)
+    val requiredStructSchema = AvroConversionUtils.convertAvroSchemaToStructType(requiredAvroSchema)
+    (requiredAvroSchema, requiredStructSchema)
+  }
+
+  def toAttribute(tableSchema: StructType): Seq[AttributeReference] = {
+    tableSchema.map { field =>
+      AttributeReference(field.name, field.dataType, field.nullable, field.metadata)()
+    }
+  }
+
+  def collectFieldIndexes(projectedSchema: StructType, originalSchema: StructType): Seq[Int] = {
+    val nameToIndex = originalSchema.fields.zipWithIndex.map{ case (field, index) =>
+      field.name -> index
+    }.toMap
+    projectedSchema.map(field => nameToIndex(field.name))
+  }
 }
diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/avro/HoodieAvroDeserializerTrait.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/avro/HoodieAvroDeserializerTrait.scala
new file mode 100644
index 0000000000000..5c3035304cee7
--- /dev/null
+++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/avro/HoodieAvroDeserializerTrait.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.avro
+
+/**
+ * Deserializes Avro payload into Catalyst object
+ *
+ * NOTE: This is low-level component operating on Spark internal data-types (comprising [[InternalRow]]).
+ *       If you're looking to convert Avro into "deserialized" [[Row]] (comprised of Java native types),
+ *       please check [[AvroConversionUtils]]
+ */
+trait HoodieAvroDeserializerTrait {
+  final def deserialize(data: Any): Option[Any] =
+    doDeserialize(data) match {
+      case opt: Option[_] => opt    // As of Spark 3.1, this will return data wrapped with Option, so we fetch the data
+      case row => Some(row)         // For other Spark versions, return the data as is
+    }
+
+  protected def doDeserialize(data: Any): Any
+}
diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/avro/HoodieAvroSerializerTrait.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/avro/HoodieAvroSerializerTrait.scala
new file mode 100644
index 0000000000000..159d8da74d2db
--- /dev/null
+++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/avro/HoodieAvroSerializerTrait.scala
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.avro
+
+/**
+ * Serializes Catalyst payload into Avro object
+ *
+ * NOTE: This is low-level component operating on Spark internal data-types (comprising [[InternalRow]]).
+ *       If you're looking to convert "deserialized" [[Row]] into Avro, please check [[AvroConversionUtils]]
+ */
+trait HoodieAvroSerializerTrait {
+  def serialize(catalystData: Any): Any
+}
diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/SparkAdapter.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/SparkAdapter.scala
index 79c858e062519..32ed2b16ce639 100644
--- a/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/SparkAdapter.scala
+++ b/hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/SparkAdapter.scala
@@ -18,24 +18,42 @@
 
 package org.apache.spark.sql.hudi
 
+import org.apache.avro.Schema
 import org.apache.hudi.client.utils.SparkRowSerDe
+import org.apache.spark.sql.avro.{HoodieAvroDeserializerTrait, HoodieAvroSerializerTrait}
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
+import org.apache.spark.sql.catalyst.catalog.CatalogTable
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.catalyst.plans.JoinType
-import org.apache.spark.sql.catalyst.plans.logical.{Join, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.{Join, LogicalPlan, SubqueryAlias}
 import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier}
-import org.apache.spark.sql.execution.datasources.SparkParsePartitionUtil
+import org.apache.spark.sql.execution.datasources.{FilePartition, LogicalRelation, PartitionedFile, SparkParsePartitionUtil}
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.DataType
 import org.apache.spark.sql.{Row, SparkSession}
 
+import java.util.Locale
+
 /**
  * An interface to adapter the difference between spark2 and spark3
  * in some spark related class.
  */
 trait SparkAdapter extends Serializable {
 
+  /**
+   * Creates instance of [[HoodieAvroSerializerTrait]] providing for ability to serialize
+   * Spark's [[InternalRow]] into Avro payloads
+   */
+  def createAvroSerializer(rootCatalystType: DataType, rootAvroType: Schema, nullable: Boolean): HoodieAvroSerializerTrait
+
+  /**
+   * Creates instance of [[HoodieAvroDeserializerTrait]] providing for ability to deserialize
+   * Avro payloads into Spark's [[InternalRow]]
+   */
+  def createAvroDeserializer(rootAvroType: Schema, rootCatalystType: DataType): HoodieAvroDeserializerTrait
+
   /**
    * Create the SparkRowSerDe.
    */
@@ -92,4 +110,41 @@ trait SparkAdapter extends Serializable {
    * ParserInterface#parseMultipartIdentifier is supported since spark3, for spark2 this should not be called.
    */
   def parseMultipartIdentifier(parser: ParserInterface, sqlText: String): Seq[String]
+
+  /**
+   * Combine [[PartitionedFile]] to [[FilePartition]] according to `maxSplitBytes`.
+   */
+  def getFilePartitions(sparkSession: SparkSession, partitionedFiles: Seq[PartitionedFile],
+      maxSplitBytes: Long): Seq[FilePartition]
+
+  def isHoodieTable(table: LogicalPlan, spark: SparkSession): Boolean = {
+    tripAlias(table) match {
+      case LogicalRelation(_, _, Some(tbl), _) => isHoodieTable(tbl)
+      case relation: UnresolvedRelation =>
+        isHoodieTable(toTableIdentifier(relation), spark)
+      case _=> false
+    }
+  }
+
+  def isHoodieTable(map: java.util.Map[String, String]): Boolean = {
+    map.getOrDefault("provider", "").equals("hudi")
+  }
+
+  def isHoodieTable(table: CatalogTable): Boolean = {
+    table.provider.map(_.toLowerCase(Locale.ROOT)).orNull == "hudi"
+  }
+
+  def isHoodieTable(tableId: TableIdentifier, spark: SparkSession): Boolean = {
+    val table = spark.sessionState.catalog.getTableMetadata(tableId)
+    isHoodieTable(table)
+  }
+
+  def tripAlias(plan: LogicalPlan): LogicalPlan = {
+    plan match {
+      case SubqueryAlias(_, relation: LogicalPlan) =>
+        tripAlias(relation)
+      case other =>
+        other
+    }
+  }
 }
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestClientRollback.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestClientRollback.java
index 08960d97d8cb6..3b5393527fd79 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestClientRollback.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestClientRollback.java
@@ -75,7 +75,7 @@ public void testSavepointAndRollback() throws Exception {
     HoodieWriteConfig cfg = getConfigBuilder().withCompactionConfig(HoodieCompactionConfig.newBuilder()
         .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(1).build()).build();
     try (SparkRDDWriteClient client = getHoodieWriteClient(cfg)) {
-      HoodieTestDataGenerator.writePartitionMetadata(fs, HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS, basePath);
+      HoodieTestDataGenerator.writePartitionMetadataDeprecated(fs, HoodieTestDataGenerator.DEFAULT_PARTITION_PATHS, basePath);
 
       /**
        * Write 1 (only inserts)
@@ -171,7 +171,7 @@ public void testSavepointAndRollback() throws Exception {
   }
 
   /**
-   * Test Cases for effects of rollbacking completed/inflight commits.
+   * Test Cases for effects of rolling back completed/inflight commits.
    */
   @Test
   public void testRollbackCommit() throws Exception {
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestHoodieClientMultiWriter.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestHoodieClientMultiWriter.java
index e629a76654780..3aeca0f275891 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestHoodieClientMultiWriter.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestHoodieClientMultiWriter.java
@@ -18,7 +18,6 @@
 
 package org.apache.hudi.client;
 
-import org.apache.hadoop.fs.Path;
 import org.apache.hudi.client.transaction.lock.InProcessLockProvider;
 import org.apache.hudi.common.config.LockConfiguration;
 import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
@@ -38,7 +37,10 @@
 import org.apache.hudi.config.HoodieLockConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieWriteConflictException;
+import org.apache.hudi.table.action.HoodieWriteMetadata;
 import org.apache.hudi.testutils.HoodieClientTestBase;
+
+import org.apache.hadoop.fs.Path;
 import org.apache.spark.api.java.JavaRDD;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.Test;
@@ -54,6 +56,7 @@
 import java.util.Set;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.CyclicBarrier;
+import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
@@ -64,6 +67,7 @@
 import static org.apache.hudi.common.config.LockConfiguration.FILESYSTEM_LOCK_PATH_PROP_KEY;
 import static org.apache.hudi.testutils.Assertions.assertNoWriteErrors;
 import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
+import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.junit.jupiter.api.Assertions.fail;
@@ -94,10 +98,11 @@ public void testHoodieClientBasicMultiWriter(HoodieTableType tableType) throws E
     }
     Properties properties = new Properties();
     properties.setProperty(FILESYSTEM_LOCK_PATH_PROP_KEY, basePath + "/.hoodie/.locks");
-    properties.setProperty(LockConfiguration.LOCK_ACQUIRE_WAIT_TIMEOUT_MS_PROP_KEY,"3000");
+    properties.setProperty(LockConfiguration.LOCK_ACQUIRE_WAIT_TIMEOUT_MS_PROP_KEY, "3000");
     HoodieWriteConfig writeConfig = getConfigBuilder()
         .withCompactionConfig(HoodieCompactionConfig.newBuilder()
-            .withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY).withAutoClean(false).build())
+            .withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY)
+            .withAutoArchive(false).withAutoClean(false).build())
         .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL)
         // Timeline-server-based markers are not used for multi-writer tests
         .withMarkersType(MarkerType.DIRECT.name())
@@ -105,7 +110,7 @@ public void testHoodieClientBasicMultiWriter(HoodieTableType tableType) throws E
             .build()).withAutoCommit(false).withProperties(properties).build();
 
     // Create the first commit
-    createCommitWithInserts(writeConfig, getHoodieWriteClient(writeConfig), "000", "001", 200);
+    createCommitWithInserts(writeConfig, getHoodieWriteClient(writeConfig), "000", "001", 200, true);
 
     final int threadCount = 2;
     final ExecutorService executors = Executors.newFixedThreadPool(2);
@@ -182,9 +187,9 @@ public void testMultiWriterWithInsertsToDistinctPartitions(HoodieTableType table
 
     Properties properties = new Properties();
     properties.setProperty(FILESYSTEM_LOCK_PATH_PROP_KEY, basePath + "/.hoodie/.locks");
-    properties.setProperty(LockConfiguration.LOCK_ACQUIRE_WAIT_TIMEOUT_MS_PROP_KEY,"3000");
-    properties.setProperty(LockConfiguration.LOCK_ACQUIRE_CLIENT_RETRY_WAIT_TIME_IN_MILLIS_PROP_KEY,"3000");
-    properties.setProperty(LockConfiguration.LOCK_ACQUIRE_CLIENT_NUM_RETRIES_PROP_KEY,"20");
+    properties.setProperty(LockConfiguration.LOCK_ACQUIRE_WAIT_TIMEOUT_MS_PROP_KEY, "3000");
+    properties.setProperty(LockConfiguration.LOCK_ACQUIRE_CLIENT_RETRY_WAIT_TIME_IN_MILLIS_PROP_KEY, "3000");
+    properties.setProperty(LockConfiguration.LOCK_ACQUIRE_CLIENT_NUM_RETRIES_PROP_KEY, "20");
 
     HoodieWriteConfig cfg = getConfigBuilder()
         .withCompactionConfig(HoodieCompactionConfig.newBuilder()
@@ -257,7 +262,7 @@ private void testMultiWriterWithAsyncTableServicesWithConflict(HoodieTableType t
     Properties properties = new Properties();
     properties.setProperty(FILESYSTEM_LOCK_PATH_PROP_KEY, basePath + "/.hoodie/.locks");
     properties.setProperty(FILESYSTEM_LOCK_PATH_PROP_KEY, basePath);
-    properties.setProperty(LockConfiguration.LOCK_ACQUIRE_WAIT_TIMEOUT_MS_PROP_KEY,"3000");
+    properties.setProperty(LockConfiguration.LOCK_ACQUIRE_WAIT_TIMEOUT_MS_PROP_KEY, "3000");
     // Disabling embedded timeline server, it doesn't work with multiwriter
     HoodieWriteConfig.Builder writeConfigBuilder = getConfigBuilder()
         .withCompactionConfig(HoodieCompactionConfig.newBuilder().withAutoClean(false)
@@ -276,7 +281,7 @@ private void testMultiWriterWithAsyncTableServicesWithConflict(HoodieTableType t
     // Create the first commit with inserts
     HoodieWriteConfig cfg = writeConfigBuilder.build();
     SparkRDDWriteClient client = getHoodieWriteClient(cfg);
-    createCommitWithInserts(cfg, client, "000", "001", 200);
+    createCommitWithInserts(cfg, client, "000", "001", 200, true);
     validInstants.add("001");
     // Create 2 commits with upserts
     createCommitWithUpserts(cfg, client, "001", "000", "002", 100);
@@ -351,7 +356,7 @@ private void testMultiWriterWithAsyncTableServicesWithConflict(HoodieTableType t
       final int numRecords = 100;
       latchCountDownAndWait(runCountDownLatch, 30000);
       assertDoesNotThrow(() -> {
-        createCommitWithInserts(cfg, client1, "003", newCommitTime, numRecords);
+        createCommitWithInserts(cfg, client1, "003", newCommitTime, numRecords, true);
         validInstants.add("007");
       });
     });
@@ -360,8 +365,8 @@ private void testMultiWriterWithAsyncTableServicesWithConflict(HoodieTableType t
       latchCountDownAndWait(runCountDownLatch, 30000);
       if (tableType == HoodieTableType.MERGE_ON_READ) {
         assertDoesNotThrow(() -> {
-          JavaRDD<WriteStatus> writeStatusJavaRDD = (JavaRDD<WriteStatus>) client2.compact("005");
-          client2.commitCompaction("005", writeStatusJavaRDD, Option.empty());
+          HoodieWriteMetadata<JavaRDD<WriteStatus>> compactionMetadata =  client2.compact("005");
+          client2.commitCompaction("005", compactionMetadata.getCommitMetadata().get(), Option.empty());
           validInstants.add("005");
         });
       }
@@ -395,7 +400,7 @@ public void testHoodieClientMultiWriterWithClustering(HoodieTableType tableType)
     }
     Properties properties = new Properties();
     properties.setProperty(FILESYSTEM_LOCK_PATH_PROP_KEY, basePath + "/.hoodie/.locks");
-    properties.setProperty(LockConfiguration.LOCK_ACQUIRE_WAIT_TIMEOUT_MS_PROP_KEY,"3000");
+    properties.setProperty(LockConfiguration.LOCK_ACQUIRE_WAIT_TIMEOUT_MS_PROP_KEY, "3000");
     HoodieWriteConfig.Builder writeConfigBuilder = getConfigBuilder()
         .withCompactionConfig(HoodieCompactionConfig.newBuilder().withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY)
             .withAutoClean(false).build())
@@ -411,7 +416,7 @@ public void testHoodieClientMultiWriterWithClustering(HoodieTableType tableType)
         .build();
 
     // Create the first commit
-    createCommitWithInserts(cfg, getHoodieWriteClient(cfg), "000", "001", 200);
+    createCommitWithInserts(cfg, getHoodieWriteClient(cfg), "000", "001", 200, true);
     // Start another inflight commit
     String newCommitTime = "003";
     int numRecords = 100;
@@ -441,6 +446,133 @@ public void testHoodieClientMultiWriterWithClustering(HoodieTableType tableType)
     }
   }
 
+  @Test
+  public void testHoodieClientMultiWriterAutoCommitForConflict() throws Exception {
+    Properties properties = new Properties();
+    properties.setProperty(FILESYSTEM_LOCK_PATH_PROP_KEY, basePath + "/.hoodie/.locks");
+    properties.setProperty(LockConfiguration.LOCK_ACQUIRE_WAIT_TIMEOUT_MS_PROP_KEY, "3000");
+    properties.setProperty(LockConfiguration.LOCK_ACQUIRE_CLIENT_NUM_RETRIES_PROP_KEY, "100");
+    HoodieWriteConfig.Builder writeConfigBuilder = getConfigBuilder()
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder().withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY)
+            .withAutoClean(false).build())
+        .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL)
+        // Timeline-server-based markers are not used for multi-writer tests
+        .withMarkersType(MarkerType.DIRECT.name())
+        .withLockConfig(HoodieLockConfig.newBuilder().withLockProvider(InProcessLockProvider.class)
+            .build()).withAutoCommit(true).withProperties(properties);
+    HoodieWriteConfig cfg = writeConfigBuilder.build();
+    HoodieWriteConfig cfg2 = writeConfigBuilder.build();
+
+    // Create the first commit
+    createCommitWithInserts(cfg, getHoodieWriteClient(cfg), "000", "001", 5000, false);
+    // Start another inflight commit
+    String newCommitTime1 = "003";
+    String newCommitTime2 = "004";
+    SparkRDDWriteClient client1 = getHoodieWriteClient(cfg);
+    SparkRDDWriteClient client2 = getHoodieWriteClient(cfg2);
+
+    List<HoodieRecord> updates1 = dataGen.generateUpdates(newCommitTime1, 5000);
+    List<HoodieRecord> updates2 = dataGen.generateUpdates(newCommitTime2, 5000);
+
+    JavaRDD<HoodieRecord> writeRecords1 = jsc.parallelize(updates1, 4);
+    JavaRDD<HoodieRecord> writeRecords2 = jsc.parallelize(updates2, 4);
+
+    runConcurrentAndAssert(writeRecords1, writeRecords2, client1, client2, SparkRDDWriteClient::upsert, true);
+  }
+
+  private void runConcurrentAndAssert(JavaRDD<HoodieRecord> writeRecords1, JavaRDD<HoodieRecord> writeRecords2,
+                                      SparkRDDWriteClient client1, SparkRDDWriteClient client2,
+                                      Function3<JavaRDD<WriteStatus>, SparkRDDWriteClient, JavaRDD<HoodieRecord>, String> writeFn,
+                                      boolean assertForConflict) throws ExecutionException, InterruptedException {
+
+    CountDownLatch runCountDownLatch = new CountDownLatch(2);
+    final ExecutorService executors = Executors.newFixedThreadPool(2);
+    String newCommitTime1 = "003";
+    String newCommitTime2 = "004";
+
+    AtomicBoolean client1Succeeded = new AtomicBoolean(true);
+    AtomicBoolean client2Succeeded = new AtomicBoolean(true);
+
+    Future future1 = executors.submit(() -> {
+          try {
+            ingestBatch(writeFn, client1, newCommitTime1, writeRecords1, runCountDownLatch);
+          } catch (IOException e) {
+            LOG.error("IOException thrown " + e.getMessage());
+          } catch (InterruptedException e) {
+            LOG.error("Interrupted Exception thrown " + e.getMessage());
+          } catch (Exception e) {
+            client1Succeeded.set(false);
+          }
+        }
+    );
+
+    Future future2 = executors.submit(() -> {
+          try {
+            ingestBatch(writeFn, client2, newCommitTime2, writeRecords2, runCountDownLatch);
+          } catch (IOException e) {
+            LOG.error("IOException thrown " + e.getMessage());
+          } catch (InterruptedException e) {
+            LOG.error("Interrupted Exception thrown " + e.getMessage());
+          } catch (Exception e) {
+            client2Succeeded.set(false);
+          }
+        }
+    );
+
+    future1.get();
+    future2.get();
+    if (assertForConflict) {
+      assertFalse(client1Succeeded.get() && client2Succeeded.get());
+      assertTrue(client1Succeeded.get() || client2Succeeded.get());
+    } else {
+      assertTrue(client2Succeeded.get() && client1Succeeded.get());
+    }
+  }
+
+  @Test
+  public void testHoodieClientMultiWriterAutoCommitNonConflict() throws Exception {
+    Properties properties = new Properties();
+    properties.setProperty(FILESYSTEM_LOCK_PATH_PROP_KEY, basePath + "/.hoodie/.locks");
+    properties.setProperty(LockConfiguration.LOCK_ACQUIRE_WAIT_TIMEOUT_MS_PROP_KEY, "3000");
+    properties.setProperty(LockConfiguration.LOCK_ACQUIRE_CLIENT_NUM_RETRIES_PROP_KEY, "100");
+    HoodieWriteConfig.Builder writeConfigBuilder = getConfigBuilder()
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder().withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY)
+            .withAutoClean(false).build())
+        .withWriteConcurrencyMode(WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL)
+        // Timeline-server-based markers are not used for multi-writer tests
+        .withMarkersType(MarkerType.DIRECT.name())
+        .withLockConfig(HoodieLockConfig.newBuilder().withLockProvider(InProcessLockProvider.class)
+            .build()).withAutoCommit(true).withProperties(properties);
+    HoodieWriteConfig cfg = writeConfigBuilder.build();
+    HoodieWriteConfig cfg2 = writeConfigBuilder.build();
+
+    // Create the first commit
+    createCommitWithInserts(cfg, getHoodieWriteClient(cfg), "000", "001", 200, false);
+    // Start another inflight commit
+    String newCommitTime1 = "003";
+    String newCommitTime2 = "004";
+    SparkRDDWriteClient client1 = getHoodieWriteClient(cfg);
+    SparkRDDWriteClient client2 = getHoodieWriteClient(cfg2);
+
+    List<HoodieRecord> updates1 = dataGen.generateInserts(newCommitTime1, 200);
+    List<HoodieRecord> updates2 = dataGen.generateInserts(newCommitTime2, 200);
+
+    JavaRDD<HoodieRecord> writeRecords1 = jsc.parallelize(updates1, 1);
+    JavaRDD<HoodieRecord> writeRecords2 = jsc.parallelize(updates2, 1);
+
+    runConcurrentAndAssert(writeRecords1, writeRecords2, client1, client2, SparkRDDWriteClient::bulkInsert, false);
+  }
+
+  private void ingestBatch(Function3<JavaRDD<WriteStatus>, SparkRDDWriteClient, JavaRDD<HoodieRecord>, String> writeFn,
+                           SparkRDDWriteClient writeClient, String commitTime, JavaRDD<HoodieRecord> records,
+                           CountDownLatch countDownLatch) throws IOException, InterruptedException {
+    writeClient.startCommitWithTime(commitTime);
+    countDownLatch.countDown();
+    countDownLatch.await();
+    JavaRDD<WriteStatus> statusJavaRDD = writeFn.apply(writeClient, records, commitTime);
+    statusJavaRDD.collect();
+  }
+
   private void createCommitWithInsertsForPartition(HoodieWriteConfig cfg, SparkRDDWriteClient client,
                                                    String prevCommitTime, String newCommitTime, int numRecords,
                                                    String partition) throws Exception {
@@ -450,11 +582,14 @@ private void createCommitWithInsertsForPartition(HoodieWriteConfig cfg, SparkRDD
   }
 
   private void createCommitWithInserts(HoodieWriteConfig cfg, SparkRDDWriteClient client,
-                                       String prevCommitTime, String newCommitTime, int numRecords) throws Exception {
-    // Finish first base commmit
+                                       String prevCommitTime, String newCommitTime, int numRecords,
+                                       boolean doCommit) throws Exception {
+    // Finish first base commit
     JavaRDD<WriteStatus> result = insertFirstBatch(cfg, client, newCommitTime, prevCommitTime, numRecords, SparkRDDWriteClient::bulkInsert,
         false, false, numRecords);
-    assertTrue(client.commit(newCommitTime, result), "Commit should succeed");
+    if (doCommit) {
+      assertTrue(client.commit(newCommitTime, result), "Commit should succeed");
+    }
   }
 
   private void createCommitWithUpserts(HoodieWriteConfig cfg, SparkRDDWriteClient client, String prevCommit,
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestHoodieReadClient.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestHoodieReadClient.java
index 1cd7d6ee9947d..872a4a4215ffc 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestHoodieReadClient.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestHoodieReadClient.java
@@ -18,6 +18,7 @@
 
 package org.apache.hudi.client;
 
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.util.Option;
@@ -209,7 +210,7 @@ private void testTagLocation(HoodieWriteConfig hoodieWriteConfig,
       // since they have been modified in the DAG
       JavaRDD<HoodieRecord> recordRDD =
           jsc.parallelize(result.collect().stream().map(WriteStatus::getWrittenRecords).flatMap(Collection::stream)
-              .map(record -> new HoodieRecord(record.getKey(), null)).collect(Collectors.toList()));
+              .map(record -> new HoodieAvroRecord(record.getKey(), null)).collect(Collectors.toList()));
       // Should have 100 records in table (check using Index), all in locations marked at commit
       HoodieReadClient readClient = getHoodieReadClient(hoodieWriteConfig.getBasePath());
       List<HoodieRecord> taggedRecords = readClient.tagLocation(recordRDD).collect();
@@ -225,7 +226,7 @@ private void testTagLocation(HoodieWriteConfig hoodieWriteConfig,
           numRecords, 200, 2);
       recordRDD =
           jsc.parallelize(result.collect().stream().map(WriteStatus::getWrittenRecords).flatMap(Collection::stream)
-              .map(record -> new HoodieRecord(record.getKey(), null)).collect(Collectors.toList()));
+              .map(record -> new HoodieAvroRecord(record.getKey(), null)).collect(Collectors.toList()));
       // Index should be able to locate all updates in correct locations.
       readClient = getHoodieReadClient(hoodieWriteConfig.getBasePath());
       taggedRecords = readClient.tagLocation(recordRDD).collect();
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestMultiFS.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestMultiFS.java
index 457b8b526aa04..df0fed027cec1 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestMultiFS.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestMultiFS.java
@@ -18,7 +18,6 @@
 
 package org.apache.hudi.client;
 
-import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.HoodieAvroPayload;
 import org.apache.hudi.common.model.HoodieRecord;
@@ -69,14 +68,9 @@ public void tearDown() throws Exception {
   }
 
   protected HoodieWriteConfig getHoodieWriteConfig(String basePath) {
-    return getHoodieWriteConfig(basePath, HoodieMetadataConfig.ENABLE.defaultValue());
-  }
-
-  protected HoodieWriteConfig getHoodieWriteConfig(String basePath, boolean enableMetadata) {
     return HoodieWriteConfig.newBuilder().withPath(basePath).withEmbeddedTimelineServerEnabled(true)
         .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable(tableName)
         .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build())
-        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(enableMetadata).build())
         .build();
   }
 
@@ -84,21 +78,21 @@ protected HoodieWriteConfig getHoodieWriteConfig(String basePath, boolean enable
   public void readLocalWriteHDFS() throws Exception {
     // Initialize table and filesystem
     HoodieTableMetaClient.withPropertyBuilder()
-      .setTableType(tableType)
-      .setTableName(tableName)
-      .setPayloadClass(HoodieAvroPayload.class)
-      .initTable(hadoopConf, dfsBasePath);
+        .setTableType(tableType)
+        .setTableName(tableName)
+        .setPayloadClass(HoodieAvroPayload.class)
+        .initTable(hadoopConf, dfsBasePath);
 
     // Create write client to write some records in
-    HoodieWriteConfig cfg = getHoodieWriteConfig(dfsBasePath, false);
-    HoodieWriteConfig localConfig = getHoodieWriteConfig(tablePath, false);
+    HoodieWriteConfig cfg = getHoodieWriteConfig(dfsBasePath);
+    HoodieWriteConfig localConfig = getHoodieWriteConfig(tablePath);
 
     HoodieTableMetaClient.withPropertyBuilder()
         .setTableType(tableType)
         .setTableName(tableName)
         .setPayloadClass(HoodieAvroPayload.class)
         .setRecordKeyFields(localConfig.getProps().getProperty(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key()))
-            .setPartitionFields(localConfig.getProps().getProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key()))
+        .setPartitionFields(localConfig.getProps().getProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key()))
         .initTable(hadoopConf, tablePath);
 
 
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestTableSchemaEvolution.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestTableSchemaEvolution.java
index dda396a135676..3fb454940bf5d 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestTableSchemaEvolution.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestTableSchemaEvolution.java
@@ -19,6 +19,7 @@
 package org.apache.hudi.client;
 
 import org.apache.hudi.avro.HoodieAvroUtils;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieTableType;
@@ -146,7 +147,7 @@ public void testSchemaCompatibilityBasic() throws Exception {
         + TIP_NESTED_SCHEMA + EXTRA_FIELD_SCHEMA + EXTRA_FIELD_SCHEMA.replace("new_field", "new_new_field")
         + TRIP_SCHEMA_SUFFIX;
     assertTrue(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA, multipleAddedFieldSchema),
-        "Multiple added fields with defauls are compatible");
+        "Multiple added fields with defaults are compatible");
 
     assertFalse(TableSchemaResolver.isSchemaCompatible(TRIP_EXAMPLE_SCHEMA,
         TRIP_SCHEMA_PREFIX + EXTRA_TYPE_SCHEMA + MAP_TYPE_SCHEMA
@@ -204,7 +205,7 @@ public void testMORTable() throws Exception {
     final List<HoodieRecord> failedRecords = generateInsertsWithSchema("004", numRecords, TRIP_EXAMPLE_SCHEMA_DEVOLVED);
     try {
       // We cannot use insertBatch directly here because we want to insert records
-      // with a devolved schema and insertBatch inserts records using the TRIP_EXMPLE_SCHEMA.
+      // with a devolved schema and insertBatch inserts records using the TRIP_EXAMPLE_SCHEMA.
       writeBatch(client, "005", "004", Option.empty(), "003", numRecords,
           (String s, Integer a) -> failedRecords, SparkRDDWriteClient::insert, false, 0, 0, 0, false);
       fail("Insert with devolved scheme should fail");
@@ -232,7 +233,7 @@ public void testMORTable() throws Exception {
     client = getHoodieWriteClient(hoodieEvolvedWriteConfig);
 
     // We cannot use insertBatch directly here because we want to insert records
-    // with a evolved schemaand insertBatch inserts records using the TRIP_EXMPLE_SCHEMA.
+    // with an evolved schema and insertBatch inserts records using the TRIP_EXAMPLE_SCHEMA.
     final List<HoodieRecord> evolvedRecords = generateInsertsWithSchema("005", numRecords, TRIP_EXAMPLE_SCHEMA_EVOLVED);
     writeBatch(client, "005", "004", Option.empty(), initCommitTime, numRecords,
         (String s, Integer a) -> evolvedRecords, SparkRDDWriteClient::insert, false, 0, 0, 0, false);
@@ -497,9 +498,9 @@ private List<HoodieRecord> convertToSchema(List<HoodieRecord> records, String sc
       HoodieKey key = r.getKey();
       GenericRecord payload;
       try {
-        payload = (GenericRecord)r.getData().getInsertValue(HoodieTestDataGenerator.AVRO_SCHEMA).get();
+        payload = (GenericRecord) ((HoodieAvroRecord) r).getData().getInsertValue(HoodieTestDataGenerator.AVRO_SCHEMA).get();
         GenericRecord newPayload = HoodieAvroUtils.rewriteRecord(payload, newSchema);
-        return new HoodieRecord(key, new RawTripTestPayload(newPayload.toString(), key.getRecordKey(), key.getPartitionPath(), schemaStr));
+        return new HoodieAvroRecord(key, new RawTripTestPayload(newPayload.toString(), key.getRecordKey(), key.getPartitionPath(), schemaStr));
       } catch (IOException e) {
         throw new RuntimeException("Conversion to new schema failed");
       }
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestUpdateSchemaEvolution.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestUpdateSchemaEvolution.java
index 00e65a67c08e7..70f5e9f3bfd1d 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestUpdateSchemaEvolution.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/TestUpdateSchemaEvolution.java
@@ -19,6 +19,7 @@
 package org.apache.hudi.client;
 
 import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordLocation;
@@ -82,7 +83,7 @@ private WriteStatus prepareFirstRecordCommit(List<String> recordsStrs) throws IO
       for (String recordStr : recordsStrs) {
         RawTripTestPayload rowChange = new RawTripTestPayload(recordStr);
         insertRecords
-            .add(new HoodieRecord(new HoodieKey(rowChange.getRowKey(), rowChange.getPartitionPath()), rowChange));
+            .add(new HoodieAvroRecord(new HoodieKey(rowChange.getRowKey(), rowChange.getPartitionPath()), rowChange));
       }
       Map<String, HoodieRecord> insertRecordMap = insertRecords.stream()
           .collect(Collectors.toMap(r -> r.getRecordKey(), Function.identity()));
@@ -147,7 +148,7 @@ private List<HoodieRecord> buildUpdateRecords(String recordStr, String insertFil
     List<HoodieRecord> updateRecords = new ArrayList<>();
     RawTripTestPayload rowChange = new RawTripTestPayload(recordStr);
     HoodieRecord record =
-        new HoodieRecord(new HoodieKey(rowChange.getRowKey(), rowChange.getPartitionPath()), rowChange);
+        new HoodieAvroRecord(new HoodieKey(rowChange.getRowKey(), rowChange.getPartitionPath()), rowChange);
     record.setCurrentLocation(new HoodieRecordLocation("101", insertFileId));
     record.seal();
     updateRecords.add(record);
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java
index e3db3914ada77..223625fe7e469 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedMetadata.java
@@ -18,6 +18,7 @@
 
 package org.apache.hudi.client.functional;
 
+import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.avro.model.HoodieMetadataRecord;
 import org.apache.hudi.client.SparkRDDWriteClient;
 import org.apache.hudi.client.WriteStatus;
@@ -31,20 +32,24 @@
 import org.apache.hudi.common.metrics.Registry;
 import org.apache.hudi.common.model.FileSlice;
 import org.apache.hudi.common.model.HoodieBaseFile;
-import org.apache.hudi.common.model.HoodieCleaningPolicy;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
 import org.apache.hudi.common.model.HoodieFileFormat;
 import org.apache.hudi.common.model.HoodieFileGroup;
 import org.apache.hudi.common.model.HoodieFileGroupId;
 import org.apache.hudi.common.model.HoodieKey;
+import org.apache.hudi.common.model.HoodieLogFile;
 import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.model.WriteConcurrencyMode;
 import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.HoodieTableVersion;
-import org.apache.hudi.common.table.marker.MarkerType;
+import org.apache.hudi.common.table.TableSchemaResolver;
+import org.apache.hudi.common.table.log.HoodieLogFormat;
+import org.apache.hudi.common.table.log.block.HoodieDataBlock;
+import org.apache.hudi.common.table.log.block.HoodieLogBlock;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -57,26 +62,25 @@
 import org.apache.hudi.common.testutils.HoodieMetadataTestTable;
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
 import org.apache.hudi.common.testutils.HoodieTestTable;
+import org.apache.hudi.common.util.ClosableIterator;
 import org.apache.hudi.common.util.HoodieTimer;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.collection.ExternalSpillableMap;
 import org.apache.hudi.config.HoodieClusteringConfig;
 import org.apache.hudi.config.HoodieCompactionConfig;
 import org.apache.hudi.config.HoodieIndexConfig;
 import org.apache.hudi.config.HoodieLockConfig;
 import org.apache.hudi.config.HoodieStorageConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
-import org.apache.hudi.config.metrics.HoodieMetricsConfig;
-import org.apache.hudi.config.metrics.HoodieMetricsGraphiteConfig;
-import org.apache.hudi.config.metrics.HoodieMetricsJmxConfig;
 import org.apache.hudi.exception.HoodieMetadataException;
 import org.apache.hudi.index.HoodieIndex;
 import org.apache.hudi.io.storage.HoodieHFileReader;
 import org.apache.hudi.metadata.FileSystemBackedTableMetadata;
 import org.apache.hudi.metadata.HoodieBackedTableMetadataWriter;
+import org.apache.hudi.metadata.HoodieMetadataMergedLogRecordReader;
 import org.apache.hudi.metadata.HoodieMetadataMetrics;
 import org.apache.hudi.metadata.HoodieMetadataPayload;
 import org.apache.hudi.metadata.HoodieTableMetadata;
-import org.apache.hudi.metadata.HoodieTableMetadataKeyGenerator;
 import org.apache.hudi.metadata.MetadataPartitionType;
 import org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter;
 import org.apache.hudi.table.HoodieSparkTable;
@@ -86,6 +90,7 @@
 import org.apache.hudi.table.upgrade.UpgradeDowngrade;
 import org.apache.hudi.testutils.MetadataMergeWriteStatus;
 
+import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.generic.IndexedRecord;
 import org.apache.hadoop.fs.FSDataOutputStream;
@@ -96,6 +101,8 @@
 import org.apache.hadoop.util.Time;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
+import org.apache.parquet.avro.AvroSchemaConverter;
+import org.apache.parquet.schema.MessageType;
 import org.apache.spark.api.java.JavaRDD;
 import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Tag;
@@ -135,8 +142,8 @@
 import static org.apache.hudi.common.model.WriteOperationType.INSERT;
 import static org.apache.hudi.common.model.WriteOperationType.UPSERT;
 import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA;
-import static org.apache.hudi.metadata.HoodieTableMetadata.METADATA_TABLE_NAME_SUFFIX;
 import static org.apache.hudi.testutils.Assertions.assertNoWriteErrors;
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
@@ -185,6 +192,8 @@ public void testMetadataTableBootstrap(HoodieTableType tableType, boolean addRol
     // trigger couple of upserts
     doWriteOperation(testTable, "0000005");
     doWriteOperation(testTable, "0000006");
+    doWriteOperation(testTable, "0000007");
+    doCleanAndValidate(testTable, "0000008", Arrays.asList("0000007"));
     validateMetadata(testTable, true);
   }
 
@@ -216,7 +225,7 @@ public void testOnlyValidPartitionsAdded(HoodieTableType tableType) throws Excep
     testTable.doWriteOperation("0000003", UPSERT, emptyList(), asList("p1", "p2"), 1, true);
     syncTableMetadata(writeConfig);
 
-    List<String> partitions = metadataWriter(writeConfig).metadata().getAllPartitionPaths();
+    List<String> partitions = metadataWriter(writeConfig).getTableMetadata().getAllPartitionPaths();
     assertFalse(partitions.contains(nonPartitionDirectory),
         "Must not contain the non-partition " + nonPartitionDirectory);
     assertTrue(partitions.contains("p1"), "Must contain partition p1");
@@ -339,6 +348,7 @@ public void testInsertUpsertCluster(HoodieTableType tableType) throws Exception
     if (tableType == MERGE_ON_READ) {
       doCompaction(testTable, "0000004");
     }
+    doCleanAndValidate(testTable, "0000005", Arrays.asList("0000001"));
     validateMetadata(testTable, emptyList(), true);
   }
 
@@ -374,6 +384,31 @@ public void testMetadataTableServices() throws Exception {
     assertEquals(tableMetadata.getLatestCompactionTime().get(), "0000003001");
   }
 
+  @ParameterizedTest
+  @EnumSource(HoodieTableType.class)
+  public void testTableOperationsWithMetadataIndex(HoodieTableType tableType) throws Exception {
+    initPath();
+    HoodieWriteConfig writeConfig = getWriteConfigBuilder(true, true, false)
+        .withIndexConfig(HoodieIndexConfig.newBuilder()
+            .bloomIndexBucketizedChecking(false)
+            .build())
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder()
+            .enable(true)
+            .withMetadataIndexBloomFilter(true)
+            .withMetadataIndexBloomFilterFileGroups(4)
+            .withMetadataIndexColumnStats(true)
+            .withMetadataIndexBloomFilterFileGroups(2)
+            .withMetadataIndexForAllColumns(true)
+            .build())
+        .build();
+    init(tableType, writeConfig);
+    testTableOperationsForMetaIndexImpl(writeConfig);
+  }
+
+  private void testTableOperationsForMetaIndexImpl(final HoodieWriteConfig writeConfig) throws Exception {
+    HoodieSparkEngineContext engineContext = new HoodieSparkEngineContext(jsc);
+    testTableOperationsImpl(engineContext, writeConfig);
+  }
 
   /**
    * Tests that virtual key configs are honored in base files after compaction in metadata table.
@@ -508,6 +543,257 @@ public void testMetadataTableWithPendingCompaction(boolean simulateFailedCompact
     }
   }
 
+  /**
+   * Test arguments - Table type, populate meta fields, exclude key from payload.
+   */
+  public static List<Arguments> testMetadataRecordKeyExcludeFromPayloadArgs() {
+    return asList(
+        Arguments.of(COPY_ON_WRITE, true),
+        Arguments.of(COPY_ON_WRITE, false),
+        Arguments.of(MERGE_ON_READ, true),
+        Arguments.of(MERGE_ON_READ, false)
+    );
+  }
+
+  /**
+   * 1. Verify metadata table records key deduplication feature. When record key
+   * deduplication is enabled, verify the metadata record payload on disk has empty key.
+   * Otherwise, verify the valid key.
+   * 2. Verify populate meta fields work irrespective of record key deduplication config.
+   * 3. Verify table services like compaction benefit from record key deduplication feature.
+   */
+  @ParameterizedTest
+  @MethodSource("testMetadataRecordKeyExcludeFromPayloadArgs")
+  public void testMetadataRecordKeyExcludeFromPayload(final HoodieTableType tableType, final boolean enableMetaFields) throws Exception {
+    initPath();
+    writeConfig = getWriteConfigBuilder(true, true, false)
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder()
+            .enable(true)
+            .withPopulateMetaFields(enableMetaFields)
+            .withMaxNumDeltaCommitsBeforeCompaction(3)
+            .build())
+        .build();
+    init(tableType, writeConfig);
+
+    // 2nd commit
+    doWriteOperation(testTable, "0000001", INSERT);
+
+    final HoodieTableMetaClient metadataMetaClient = HoodieTableMetaClient.builder()
+        .setConf(hadoopConf)
+        .setBasePath(metadataTableBasePath)
+        .build();
+    HoodieWriteConfig metadataTableWriteConfig = getMetadataWriteConfig(writeConfig);
+    metadataMetaClient.reloadActiveTimeline();
+    final HoodieTable table = HoodieSparkTable.create(metadataTableWriteConfig, context, metadataMetaClient);
+
+    // Compaction has not yet kicked in. Verify all the log files
+    // for the metadata records persisted on disk as per the config.
+    assertDoesNotThrow(() -> {
+      verifyMetadataRecordKeyExcludeFromPayloadLogFiles(table, metadataMetaClient, "0000001",
+          enableMetaFields);
+    }, "Metadata table should have valid log files!");
+
+    // Verify no base file created yet.
+    assertThrows(IllegalStateException.class, () -> {
+      verifyMetadataRecordKeyExcludeFromPayloadBaseFiles(table, enableMetaFields);
+    }, "Metadata table should not have a base file yet!");
+
+    // 2 more commits
+    doWriteOperation(testTable, "0000002", UPSERT);
+    doWriteOperation(testTable, "0000004", UPSERT);
+
+    // Compaction should be triggered by now. Let's verify the log files
+    // if any for the metadata records persisted on disk as per the config.
+    assertDoesNotThrow(() -> {
+      verifyMetadataRecordKeyExcludeFromPayloadLogFiles(table, metadataMetaClient, "0000002",
+          enableMetaFields);
+    }, "Metadata table should have valid log files!");
+
+    // Verify the base file created by the just completed compaction.
+    assertDoesNotThrow(() -> {
+      verifyMetadataRecordKeyExcludeFromPayloadBaseFiles(table, enableMetaFields);
+    }, "Metadata table should have a valid base file!");
+
+    // 2 more commits to trigger one more compaction, along with a clean
+    doWriteOperation(testTable, "0000005", UPSERT);
+    doClean(testTable, "0000006", Arrays.asList("0000004"));
+    doWriteOperation(testTable, "0000007", UPSERT);
+
+    assertDoesNotThrow(() -> {
+      verifyMetadataRecordKeyExcludeFromPayloadLogFiles(table, metadataMetaClient, "7", enableMetaFields);
+    }, "Metadata table should have valid log files!");
+
+    assertDoesNotThrow(() -> {
+      verifyMetadataRecordKeyExcludeFromPayloadBaseFiles(table, enableMetaFields);
+    }, "Metadata table should have a valid base file!");
+
+    validateMetadata(testTable);
+  }
+
+  /**
+   * Verify the metadata table log files for the record field correctness. On disk format
+   * should be based on meta fields and key deduplication config. And the in-memory merged
+   * records should all be materialized fully irrespective of the config.
+   *
+   * @param table                 - Hoodie metadata test table
+   * @param metadataMetaClient    - Metadata meta client
+   * @param latestCommitTimestamp - Latest commit timestamp
+   * @param enableMetaFields      - Enable meta fields for the table records
+   * @throws IOException
+   */
+  private void verifyMetadataRecordKeyExcludeFromPayloadLogFiles(HoodieTable table, HoodieTableMetaClient metadataMetaClient,
+                                                                 String latestCommitTimestamp,
+                                                                 boolean enableMetaFields) throws IOException {
+    table.getHoodieView().sync();
+
+    // Compaction should not be triggered yet. Let's verify no base file
+    // and few log files available.
+    List<FileSlice> fileSlices = table.getSliceView()
+        .getLatestFileSlices(MetadataPartitionType.FILES.getPartitionPath()).collect(Collectors.toList());
+    if (fileSlices.isEmpty()) {
+      throw new IllegalStateException("LogFile slices are not available!");
+    }
+
+    // Verify the log files honor the key deduplication and virtual keys config
+    List<HoodieLogFile> logFiles = fileSlices.get(0).getLogFiles().map(logFile -> {
+      return logFile;
+    }).collect(Collectors.toList());
+
+    List<String> logFilePaths = logFiles.stream().map(logFile -> {
+      return logFile.getPath().toString();
+    }).collect(Collectors.toList());
+
+    // Verify the on-disk raw records before they get materialized
+    verifyMetadataRawRecords(table, logFiles, enableMetaFields);
+
+    // Verify the in-memory materialized and merged records
+    verifyMetadataMergedRecords(metadataMetaClient, logFilePaths, latestCommitTimestamp, enableMetaFields);
+  }
+
+  /**
+   * Verify the metadata table on-disk raw records. When populate meta fields is enabled,
+   * these records should have additional meta fields in the payload. When key deduplication
+   * is enabled, these records on the disk should have key in the payload as empty string.
+   *
+   * @param table
+   * @param logFiles         - Metadata table log files to be verified
+   * @param enableMetaFields - Enable meta fields for records
+   * @throws IOException
+   */
+  private void verifyMetadataRawRecords(HoodieTable table, List<HoodieLogFile> logFiles, boolean enableMetaFields) throws IOException {
+    for (HoodieLogFile logFile : logFiles) {
+      FileStatus[] fsStatus = fs.listStatus(logFile.getPath());
+      MessageType writerSchemaMsg = TableSchemaResolver.readSchemaFromLogFile(fs, logFile.getPath());
+      if (writerSchemaMsg == null) {
+        // not a data block
+        continue;
+      }
+
+      Schema writerSchema = new AvroSchemaConverter().convert(writerSchemaMsg);
+      HoodieLogFormat.Reader logFileReader = HoodieLogFormat.newReader(fs, new HoodieLogFile(fsStatus[0].getPath()), writerSchema);
+
+      while (logFileReader.hasNext()) {
+        HoodieLogBlock logBlock = logFileReader.next();
+        if (logBlock instanceof HoodieDataBlock) {
+          try (ClosableIterator<IndexedRecord> recordItr = ((HoodieDataBlock) logBlock).getRecordItr()) {
+            recordItr.forEachRemaining(indexRecord -> {
+              final GenericRecord record = (GenericRecord) indexRecord;
+              if (enableMetaFields) {
+                // Metadata table records should have meta fields!
+                assertNotNull(record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
+                assertNotNull(record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD));
+              } else {
+                // Metadata table records should not have meta fields!
+                assertNull(record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
+                assertNull(record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD));
+              }
+
+              final String key = String.valueOf(record.get(HoodieMetadataPayload.KEY_FIELD_NAME));
+              assertFalse(key.isEmpty());
+              if (enableMetaFields) {
+                assertTrue(key.equals(String.valueOf(record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD))));
+              }
+            });
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * Verify the metadata table in-memory merged records. Irrespective of key deduplication
+   * config, the in-memory merged records should always have the key field in the record
+   * payload fully materialized.
+   *
+   * @param metadataMetaClient    - Metadata table meta client
+   * @param logFilePaths          - Metadata table log file paths
+   * @param latestCommitTimestamp
+   * @param enableMetaFields      - Enable meta fields
+   */
+  private void verifyMetadataMergedRecords(HoodieTableMetaClient metadataMetaClient, List<String> logFilePaths,
+                                           String latestCommitTimestamp, boolean enableMetaFields) {
+    Schema schema = HoodieAvroUtils.addMetadataFields(HoodieMetadataRecord.getClassSchema());
+    if (enableMetaFields) {
+      schema = HoodieAvroUtils.addMetadataFields(schema);
+    }
+    HoodieMetadataMergedLogRecordReader logRecordReader = HoodieMetadataMergedLogRecordReader.newBuilder()
+        .withFileSystem(metadataMetaClient.getFs())
+        .withBasePath(metadataMetaClient.getBasePath())
+        .withLogFilePaths(logFilePaths)
+        .withLatestInstantTime(latestCommitTimestamp)
+        .withPartition(MetadataPartitionType.FILES.getPartitionPath())
+        .withReaderSchema(schema)
+        .withMaxMemorySizeInBytes(100000L)
+        .withBufferSize(4096)
+        .withSpillableMapBasePath(tempDir.toString())
+        .withDiskMapType(ExternalSpillableMap.DiskMapType.BITCASK)
+        .build();
+
+    assertDoesNotThrow(() -> {
+      logRecordReader.scan();
+    }, "Metadata log records materialization failed");
+
+    for (Map.Entry<String, HoodieRecord<? extends HoodieRecordPayload>> entry : logRecordReader.getRecords().entrySet()) {
+      assertFalse(entry.getKey().isEmpty());
+      assertFalse(entry.getValue().getRecordKey().isEmpty());
+      assertEquals(entry.getKey(), entry.getValue().getRecordKey());
+    }
+  }
+
+  /**
+   * Verify metadata table base files for the records persisted based on the config. When
+   * the key deduplication is enabled, the records persisted on the disk in the base file
+   * should have key field in the payload as empty string.
+   *
+   * @param table            - Metadata table
+   * @param enableMetaFields - Enable meta fields
+   */
+  private void verifyMetadataRecordKeyExcludeFromPayloadBaseFiles(HoodieTable table, boolean enableMetaFields) throws IOException {
+    table.getHoodieView().sync();
+    List<FileSlice> fileSlices = table.getSliceView()
+        .getLatestFileSlices(MetadataPartitionType.FILES.getPartitionPath()).collect(Collectors.toList());
+    if (!fileSlices.get(0).getBaseFile().isPresent()) {
+      throw new IllegalStateException("Base file not available!");
+    }
+    final HoodieBaseFile baseFile = fileSlices.get(0).getBaseFile().get();
+
+    HoodieHFileReader hoodieHFileReader = new HoodieHFileReader(context.getHadoopConf().get(),
+        new Path(baseFile.getPath()),
+        new CacheConfig(context.getHadoopConf().get()));
+    List<Pair<String, IndexedRecord>> records = hoodieHFileReader.readAllRecords();
+    records.forEach(entry -> {
+      if (enableMetaFields) {
+        assertNotNull(((GenericRecord) entry.getSecond()).get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
+      } else {
+        assertNull(((GenericRecord) entry.getSecond()).get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
+      }
+
+      final String keyInPayload = (String) ((GenericRecord) entry.getSecond())
+          .get(HoodieMetadataPayload.KEY_FIELD_NAME);
+      assertFalse(keyInPayload.isEmpty());
+    });
+  }
+
   /**
    * Test rollback of various table operations sync to Metadata Table correctly.
    */
@@ -804,10 +1090,20 @@ public void testMetadataPayloadSpuriousDeletes(boolean ignoreSpuriousDeletes) th
   public void testTableOperationsWithRestore(HoodieTableType tableType) throws Exception {
     init(tableType);
     HoodieSparkEngineContext engineContext = new HoodieSparkEngineContext(jsc);
+    HoodieWriteConfig writeConfig = getWriteConfigBuilder(true, true, false)
+        .withRollbackUsingMarkers(false).build();
+    testTableOperationsImpl(engineContext, writeConfig);
+  }
 
-    try (SparkRDDWriteClient client = new SparkRDDWriteClient(engineContext,
-        getWriteConfigBuilder(true, true, false).withRollbackUsingMarkers(false).build())) {
-
+  /**
+   * Test all major table operations with the given table, config and context.
+   *
+   * @param engineContext - Engine context
+   * @param writeConfig   - Write config
+   * @throws IOException
+   */
+  private void testTableOperationsImpl(HoodieSparkEngineContext engineContext, HoodieWriteConfig writeConfig) throws IOException {
+    try (SparkRDDWriteClient client = new SparkRDDWriteClient(engineContext, writeConfig)) {
       // Write 1 (Bulk insert)
       String newCommitTime = "0000001";
       List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 20);
@@ -892,7 +1188,8 @@ public void testMetadataMultiWriter() throws Exception {
 
     Properties properties = new Properties();
     properties.setProperty(FILESYSTEM_LOCK_PATH_PROP_KEY, basePath + "/.hoodie/.locks");
-    properties.setProperty(LockConfiguration.LOCK_ACQUIRE_WAIT_TIMEOUT_MS_PROP_KEY,"3000");
+    properties.setProperty(LockConfiguration.LOCK_ACQUIRE_WAIT_TIMEOUT_MS_PROP_KEY,"1000");
+    properties.setProperty(LockConfiguration.LOCK_ACQUIRE_CLIENT_NUM_RETRIES_PROP_KEY,"20");
     HoodieWriteConfig writeConfig = getWriteConfigBuilder(true, true, false)
         .withCompactionConfig(HoodieCompactionConfig.newBuilder()
             .withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY).withAutoClean(false).build())
@@ -1254,7 +1551,7 @@ public void testUpgradeDowngrade() throws IOException {
     assertTrue(currentStatus.getModificationTime() > prevStatus.getModificationTime());
 
     initMetaClient();
-    assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), HoodieTableVersion.THREE.versionCode());
+    assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), HoodieTableVersion.FOUR.versionCode());
     assertTrue(fs.exists(new Path(metadataTableBasePath)), "Metadata table should exist");
     FileStatus newStatus = fs.getFileStatus(new Path(metadataTableBasePath));
     assertTrue(oldStatus.getModificationTime() < newStatus.getModificationTime());
@@ -1336,7 +1633,7 @@ public void testRollbackDuringUpgradeForDoubleLocking() throws IOException, Inte
     }
 
     initMetaClient();
-    assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), HoodieTableVersion.THREE.versionCode());
+    assertEquals(metaClient.getTableConfig().getTableVersion().versionCode(), HoodieTableVersion.FOUR.versionCode());
     assertTrue(fs.exists(new Path(metadataTableBasePath)), "Metadata table should exist");
     FileStatus newStatus = fs.getFileStatus(new Path(metadataTableBasePath));
     assertTrue(oldStatus.getModificationTime() < newStatus.getModificationTime());
@@ -1483,7 +1780,7 @@ public void testMetadataMetrics() throws Exception {
       assertTrue(metricsRegistry.getAllCounts().containsKey(HoodieMetadataMetrics.INITIALIZE_STR + ".count"));
       assertTrue(metricsRegistry.getAllCounts().containsKey(HoodieMetadataMetrics.INITIALIZE_STR + ".totalDuration"));
       assertTrue(metricsRegistry.getAllCounts().get(HoodieMetadataMetrics.INITIALIZE_STR + ".count") >= 1L);
-      final String prefix = MetadataPartitionType.FILES.partitionPath() + ".";
+      final String prefix = MetadataPartitionType.FILES.getPartitionPath() + ".";
       assertTrue(metricsRegistry.getAllCounts().containsKey(prefix + HoodieMetadataMetrics.STAT_COUNT_BASE_FILES));
       assertTrue(metricsRegistry.getAllCounts().containsKey(prefix + HoodieMetadataMetrics.STAT_COUNT_LOG_FILES));
       assertTrue(metricsRegistry.getAllCounts().containsKey(prefix + HoodieMetadataMetrics.STAT_TOTAL_BASE_FILE_SIZE));
@@ -1491,95 +1788,6 @@ public void testMetadataMetrics() throws Exception {
     }
   }
 
-  /**
-   * Fetching WriteConfig for metadata table from Data table's writeConfig is not trivial and the method is not public in source code. so, for now,
-   * using this method which mimics source code.
-   * @param writeConfig
-   * @return
-   */
-  private HoodieWriteConfig getMetadataWriteConfig(HoodieWriteConfig writeConfig) {
-    int parallelism = writeConfig.getMetadataInsertParallelism();
-
-    int minCommitsToKeep = Math.max(writeConfig.getMetadataMinCommitsToKeep(), writeConfig.getMinCommitsToKeep());
-    int maxCommitsToKeep = Math.max(writeConfig.getMetadataMaxCommitsToKeep(), writeConfig.getMaxCommitsToKeep());
-
-    // Create the write config for the metadata table by borrowing options from the main write config.
-    HoodieWriteConfig.Builder builder = HoodieWriteConfig.newBuilder()
-        .withTimelineLayoutVersion(TimelineLayoutVersion.CURR_VERSION)
-        .withConsistencyGuardConfig(ConsistencyGuardConfig.newBuilder()
-            .withConsistencyCheckEnabled(writeConfig.getConsistencyGuardConfig().isConsistencyCheckEnabled())
-            .withInitialConsistencyCheckIntervalMs(writeConfig.getConsistencyGuardConfig().getInitialConsistencyCheckIntervalMs())
-            .withMaxConsistencyCheckIntervalMs(writeConfig.getConsistencyGuardConfig().getMaxConsistencyCheckIntervalMs())
-            .withMaxConsistencyChecks(writeConfig.getConsistencyGuardConfig().getMaxConsistencyChecks())
-            .build())
-        .withWriteConcurrencyMode(WriteConcurrencyMode.SINGLE_WRITER)
-        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(false).withFileListingParallelism(writeConfig.getFileListingParallelism()).build())
-        .withAutoCommit(true)
-        .withAvroSchemaValidate(true)
-        .withEmbeddedTimelineServerEnabled(false)
-        .withMarkersType(MarkerType.DIRECT.name())
-        .withRollbackUsingMarkers(false)
-        .withPath(HoodieTableMetadata.getMetadataTableBasePath(writeConfig.getBasePath()))
-        .withSchema(HoodieMetadataRecord.getClassSchema().toString())
-        .forTable(writeConfig.getTableName() + METADATA_TABLE_NAME_SUFFIX)
-        .withCompactionConfig(HoodieCompactionConfig.newBuilder()
-            .withAsyncClean(writeConfig.isMetadataAsyncClean())
-            // we will trigger cleaning manually, to control the instant times
-            .withAutoClean(false)
-            .withCleanerParallelism(parallelism)
-            .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS)
-            .withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY)
-            .retainCommits(writeConfig.getMetadataCleanerCommitsRetained())
-            .archiveCommitsWith(minCommitsToKeep, maxCommitsToKeep)
-            // we will trigger compaction manually, to control the instant times
-            .withInlineCompaction(false)
-            .withMaxNumDeltaCommitsBeforeCompaction(writeConfig.getMetadataCompactDeltaCommitMax()).build())
-        .withParallelism(parallelism, parallelism)
-        .withDeleteParallelism(parallelism)
-        .withRollbackParallelism(parallelism)
-        .withFinalizeWriteParallelism(parallelism)
-        .withAllowMultiWriteOnSameInstant(true)
-        .withKeyGenerator(HoodieTableMetadataKeyGenerator.class.getCanonicalName())
-        .withPopulateMetaFields(writeConfig.getMetadataConfig().populateMetaFields());
-
-    // RecordKey properties are needed for the metadata table records
-    final Properties properties = new Properties();
-    properties.put(HoodieTableConfig.RECORDKEY_FIELDS.key(), HoodieMetadataPayload.SCHEMA_FIELD_ID_KEY);
-    properties.put("hoodie.datasource.write.recordkey.field", HoodieMetadataPayload.SCHEMA_FIELD_ID_KEY);
-    builder.withProperties(properties);
-
-    if (writeConfig.isMetricsOn()) {
-      builder.withMetricsConfig(HoodieMetricsConfig.newBuilder()
-          .withReporterType(writeConfig.getMetricsReporterType().toString())
-          .withExecutorMetrics(writeConfig.isExecutorMetricsEnabled())
-          .on(true).build());
-      switch (writeConfig.getMetricsReporterType()) {
-        case GRAPHITE:
-          builder.withMetricsGraphiteConfig(HoodieMetricsGraphiteConfig.newBuilder()
-              .onGraphitePort(writeConfig.getGraphiteServerPort())
-              .toGraphiteHost(writeConfig.getGraphiteServerHost())
-              .usePrefix(writeConfig.getGraphiteMetricPrefix()).build());
-          break;
-        case JMX:
-          builder.withMetricsJmxConfig(HoodieMetricsJmxConfig.newBuilder()
-              .onJmxPort(writeConfig.getJmxPort())
-              .toJmxHost(writeConfig.getJmxHost())
-              .build());
-          break;
-        case DATADOG:
-        case PROMETHEUS:
-        case PROMETHEUS_PUSHGATEWAY:
-        case CONSOLE:
-        case INMEMORY:
-        case CLOUDWATCH:
-          break;
-        default:
-          throw new HoodieMetadataException("Unsupported Metrics Reporter type " + writeConfig.getMetricsReporterType());
-      }
-    }
-    return builder.build();
-  }
-
   private void doPreBootstrapOperations(HoodieTestTable testTable) throws Exception {
     doPreBootstrapOperations(testTable, "0000001", "0000002");
   }
@@ -1765,7 +1973,10 @@ private void validateMetadata(SparkRDDWriteClient testClient) throws IOException
     // in the .hoodie folder.
     List<String> metadataTablePartitions = FSUtils.getAllPartitionPaths(engineContext, HoodieTableMetadata.getMetadataTableBasePath(basePath),
         false, false);
-    assertEquals(MetadataPartitionType.values().length, metadataTablePartitions.size());
+    assertEquals(metadataWriter.getEnabledPartitionTypes().size(), metadataTablePartitions.size());
+
+    final Map<String, MetadataPartitionType> metadataEnabledPartitionTypes = new HashMap<>();
+    metadataWriter.getEnabledPartitionTypes().forEach(e -> metadataEnabledPartitionTypes.put(e.getPartitionPath(), e));
 
     // Metadata table should automatically compact and clean
     // versions are +1 as autoclean / compaction happens end of commits
@@ -1773,10 +1984,13 @@ private void validateMetadata(SparkRDDWriteClient testClient) throws IOException
     HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metadataMetaClient, metadataMetaClient.getActiveTimeline());
     metadataTablePartitions.forEach(partition -> {
       List<FileSlice> latestSlices = fsView.getLatestFileSlices(partition).collect(Collectors.toList());
-      assertTrue(latestSlices.stream().map(FileSlice::getBaseFile).count() <= 1, "Should have a single latest base file");
-      assertTrue(latestSlices.size() <= 1, "Should have a single latest file slice");
-      assertTrue(latestSlices.size() <= numFileVersions, "Should limit file slice to "
-          + numFileVersions + " but was " + latestSlices.size());
+      assertTrue(latestSlices.stream().map(FileSlice::getBaseFile).count()
+          <= metadataEnabledPartitionTypes.get(partition).getFileGroupCount(), "Should have a single latest base file per file group");
+      assertTrue(latestSlices.size()
+          <= metadataEnabledPartitionTypes.get(partition).getFileGroupCount(), "Should have a single latest file slice per file group");
+      assertTrue(latestSlices.size()
+          <= (numFileVersions * metadataEnabledPartitionTypes.get(partition).getFileGroupCount()), "Should limit file slice to "
+          + numFileVersions + " per file group, but was " + latestSlices.size());
     });
 
     LOG.info("Validation time=" + timer.endTimer());
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedTableMetadata.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedTableMetadata.java
index d6f151e34255a..70f54b111980e 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedTableMetadata.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieBackedTableMetadata.java
@@ -18,30 +18,64 @@
 
 package org.apache.hudi.client.functional;
 
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.io.hfile.CacheConfig;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hudi.avro.HoodieAvroUtils;
+import org.apache.hudi.avro.model.HoodieMetadataRecord;
+import org.apache.hudi.common.config.HoodieMetadataConfig;
+import org.apache.hudi.common.model.FileSlice;
+import org.apache.hudi.common.model.HoodieBaseFile;
+import org.apache.hudi.common.model.HoodieLogFile;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.TableSchemaResolver;
+import org.apache.hudi.common.table.log.HoodieLogFormat;
+import org.apache.hudi.common.table.log.block.HoodieDataBlock;
+import org.apache.hudi.common.table.log.block.HoodieLogBlock;
 import org.apache.hudi.common.table.view.TableFileSystemView;
 import org.apache.hudi.common.testutils.HoodieTestTable;
+import org.apache.hudi.common.util.ClosableIterator;
+import org.apache.hudi.common.util.collection.ExternalSpillableMap;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.io.storage.HoodieHFileReader;
 import org.apache.hudi.metadata.HoodieBackedTableMetadata;
+import org.apache.hudi.metadata.HoodieMetadataMergedLogRecordReader;
+import org.apache.hudi.metadata.HoodieMetadataPayload;
 import org.apache.hudi.metadata.HoodieTableMetadataKeyGenerator;
+import org.apache.hudi.metadata.MetadataPartitionType;
 import org.apache.hudi.table.HoodieSparkTable;
 import org.apache.hudi.table.HoodieTable;
 
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
+import org.apache.parquet.avro.AvroSchemaConverter;
+import org.apache.parquet.schema.MessageType;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.EnumSource;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
 
+import static org.apache.hudi.common.model.WriteOperationType.INSERT;
+import static org.apache.hudi.common.model.WriteOperationType.UPSERT;
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.junit.jupiter.api.Assertions.fail;
 
@@ -123,4 +157,216 @@ public void testNotExistPartition(final HoodieTableType tableType) throws Except
         tableMetadata.getAllFilesInPartition(new Path(writeConfig.getBasePath() + "dummy"));
     assertEquals(allFilesInPartition.length, 0);
   }
+
+  /**
+   * 1. Verify metadata table records key deduplication feature. When record key
+   * deduplication is enabled, verify the metadata record payload on disk has empty key.
+   * Otherwise, verify the valid key.
+   * 2. Verify populate meta fields work irrespective of record key deduplication config.
+   * 3. Verify table services like compaction benefit from record key deduplication feature.
+   */
+  @ParameterizedTest
+  @EnumSource(HoodieTableType.class)
+  public void testMetadataRecordKeyExcludeFromPayload(final HoodieTableType tableType) throws Exception {
+    initPath();
+    writeConfig = getWriteConfigBuilder(true, true, false)
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder()
+            .enable(true)
+            .withPopulateMetaFields(false)
+            .withMaxNumDeltaCommitsBeforeCompaction(3)
+            .build())
+        .build();
+    init(tableType, writeConfig);
+
+    // 2nd commit
+    doWriteOperation(testTable, "0000001", INSERT);
+
+    final HoodieTableMetaClient metadataMetaClient = HoodieTableMetaClient.builder()
+        .setConf(hadoopConf)
+        .setBasePath(metadataTableBasePath)
+        .build();
+    HoodieWriteConfig metadataTableWriteConfig = getMetadataWriteConfig(writeConfig);
+    metadataMetaClient.reloadActiveTimeline();
+    final HoodieTable table = HoodieSparkTable.create(metadataTableWriteConfig, context, metadataMetaClient);
+
+    // Compaction has not yet kicked in. Verify all the log files
+    // for the metadata records persisted on disk as per the config.
+    assertDoesNotThrow(() -> {
+      verifyMetadataRecordKeyExcludeFromPayloadLogFiles(table, metadataMetaClient, "0000001");
+    }, "Metadata table should have valid log files!");
+
+    // Verify no base file created yet.
+    assertThrows(IllegalStateException.class, () -> {
+      verifyMetadataRecordKeyExcludeFromPayloadBaseFiles(table);
+    }, "Metadata table should not have a base file yet!");
+
+    // 2 more commits
+    doWriteOperation(testTable, "0000002", UPSERT);
+    doWriteOperation(testTable, "0000004", UPSERT);
+
+    // Compaction should be triggered by now. Let's verify the log files
+    // if any for the metadata records persisted on disk as per the config.
+    assertDoesNotThrow(() -> {
+      verifyMetadataRecordKeyExcludeFromPayloadLogFiles(table, metadataMetaClient, "0000002");
+    }, "Metadata table should have valid log files!");
+
+    // Verify the base file created by the just completed compaction.
+    assertDoesNotThrow(() -> {
+      verifyMetadataRecordKeyExcludeFromPayloadBaseFiles(table);
+    }, "Metadata table should have a valid base file!");
+
+    // 2 more commits to trigger one more compaction, along with a clean
+    doWriteOperation(testTable, "0000005", UPSERT);
+    doClean(testTable, "0000006", Arrays.asList("0000004"));
+    doWriteOperation(testTable, "0000007", UPSERT);
+
+    assertDoesNotThrow(() -> {
+      verifyMetadataRecordKeyExcludeFromPayloadLogFiles(table, metadataMetaClient, "7");
+    }, "Metadata table should have valid log files!");
+
+    assertDoesNotThrow(() -> {
+      verifyMetadataRecordKeyExcludeFromPayloadBaseFiles(table);
+    }, "Metadata table should have a valid base file!");
+
+    validateMetadata(testTable);
+  }
+
+  /**
+   * Verify the metadata table log files for the record field correctness. On disk format
+   * should be based on meta fields and key deduplication config. And the in-memory merged
+   * records should all be materialized fully irrespective of the config.
+   *
+   * @param table                 - Hoodie metadata test table
+   * @param metadataMetaClient    - Metadata meta client
+   * @param latestCommitTimestamp - Latest commit timestamp
+   * @throws IOException
+   */
+  private void verifyMetadataRecordKeyExcludeFromPayloadLogFiles(HoodieTable table, HoodieTableMetaClient metadataMetaClient,
+                                                                 String latestCommitTimestamp) throws IOException {
+    table.getHoodieView().sync();
+
+    // Compaction should not be triggered yet. Let's verify no base file
+    // and few log files available.
+    List<FileSlice> fileSlices = table.getSliceView()
+        .getLatestFileSlices(MetadataPartitionType.FILES.getPartitionPath()).collect(Collectors.toList());
+    if (fileSlices.isEmpty()) {
+      throw new IllegalStateException("LogFile slices are not available!");
+    }
+
+    // Verify the log files honor the key deduplication and virtual keys config
+    List<HoodieLogFile> logFiles = fileSlices.get(0).getLogFiles().map(logFile -> {
+      return logFile;
+    }).collect(Collectors.toList());
+
+    List<String> logFilePaths = logFiles.stream().map(logFile -> {
+      return logFile.getPath().toString();
+    }).collect(Collectors.toList());
+
+    // Verify the on-disk raw records before they get materialized
+    verifyMetadataRawRecords(table, logFiles);
+
+    // Verify the in-memory materialized and merged records
+    verifyMetadataMergedRecords(metadataMetaClient, logFilePaths, latestCommitTimestamp);
+  }
+
+  /**
+   * Verify the metadata table on-disk raw records. When populate meta fields is enabled,
+   * these records should have additional meta fields in the payload. When key deduplication
+   * is enabled, these records on the disk should have key in the payload as empty string.
+   *
+   * @param table
+   * @param logFiles - Metadata table log files to be verified
+   * @throws IOException
+   */
+  private void verifyMetadataRawRecords(HoodieTable table, List<HoodieLogFile> logFiles) throws IOException {
+    for (HoodieLogFile logFile : logFiles) {
+      FileStatus[] fsStatus = fs.listStatus(logFile.getPath());
+      MessageType writerSchemaMsg = TableSchemaResolver.readSchemaFromLogFile(fs, logFile.getPath());
+      if (writerSchemaMsg == null) {
+        // not a data block
+        continue;
+      }
+
+      Schema writerSchema = new AvroSchemaConverter().convert(writerSchemaMsg);
+      HoodieLogFormat.Reader logFileReader = HoodieLogFormat.newReader(fs, new HoodieLogFile(fsStatus[0].getPath()), writerSchema);
+
+      while (logFileReader.hasNext()) {
+        HoodieLogBlock logBlock = logFileReader.next();
+        if (logBlock instanceof HoodieDataBlock) {
+          try (ClosableIterator<IndexedRecord> recordItr = ((HoodieDataBlock) logBlock).getRecordItr()) {
+            recordItr.forEachRemaining(indexRecord -> {
+              final GenericRecord record = (GenericRecord) indexRecord;
+              assertNull(record.get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
+              assertNull(record.get(HoodieRecord.COMMIT_TIME_METADATA_FIELD));
+              final String key = String.valueOf(record.get(HoodieMetadataPayload.KEY_FIELD_NAME));
+              assertFalse(key.isEmpty());
+            });
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * Verify the metadata table in-memory merged records. Irrespective of key deduplication
+   * config, the in-memory merged records should always have the key field in the record
+   * payload fully materialized.
+   *
+   * @param metadataMetaClient    - Metadata table meta client
+   * @param logFilePaths          - Metadata table log file paths
+   * @param latestCommitTimestamp - Latest commit timestamp
+   */
+  private void verifyMetadataMergedRecords(HoodieTableMetaClient metadataMetaClient, List<String> logFilePaths, String latestCommitTimestamp) {
+    Schema schema = HoodieAvroUtils.addMetadataFields(HoodieMetadataRecord.getClassSchema());
+    HoodieMetadataMergedLogRecordReader logRecordReader = HoodieMetadataMergedLogRecordReader.newBuilder()
+        .withFileSystem(metadataMetaClient.getFs())
+        .withBasePath(metadataMetaClient.getBasePath())
+        .withLogFilePaths(logFilePaths)
+        .withLatestInstantTime(latestCommitTimestamp)
+        .withPartition(MetadataPartitionType.FILES.getPartitionPath())
+        .withReaderSchema(schema)
+        .withMaxMemorySizeInBytes(100000L)
+        .withBufferSize(4096)
+        .withSpillableMapBasePath(tempDir.toString())
+        .withDiskMapType(ExternalSpillableMap.DiskMapType.BITCASK)
+        .build();
+
+    assertDoesNotThrow(() -> {
+      logRecordReader.scan();
+    }, "Metadata log records materialization failed");
+
+    for (Map.Entry<String, HoodieRecord<? extends HoodieRecordPayload>> entry : logRecordReader.getRecords().entrySet()) {
+      assertFalse(entry.getKey().isEmpty());
+      assertFalse(entry.getValue().getRecordKey().isEmpty());
+      assertEquals(entry.getKey(), entry.getValue().getRecordKey());
+    }
+  }
+
+  /**
+   * Verify metadata table base files for the records persisted based on the config. When
+   * the key deduplication is enabled, the records persisted on the disk in the base file
+   * should have key field in the payload as empty string.
+   *
+   * @param table - Metadata table
+   */
+  private void verifyMetadataRecordKeyExcludeFromPayloadBaseFiles(HoodieTable table) throws IOException {
+    table.getHoodieView().sync();
+    List<FileSlice> fileSlices = table.getSliceView()
+        .getLatestFileSlices(MetadataPartitionType.FILES.getPartitionPath()).collect(Collectors.toList());
+    if (!fileSlices.get(0).getBaseFile().isPresent()) {
+      throw new IllegalStateException("Base file not available!");
+    }
+    final HoodieBaseFile baseFile = fileSlices.get(0).getBaseFile().get();
+
+    HoodieHFileReader hoodieHFileReader = new HoodieHFileReader(context.getHadoopConf().get(),
+        new Path(baseFile.getPath()),
+        new CacheConfig(context.getHadoopConf().get()));
+    List<Pair<String, IndexedRecord>> records = hoodieHFileReader.readAllRecords();
+    records.forEach(entry -> {
+      assertNull(((GenericRecord) entry.getSecond()).get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
+      final String keyInPayload = (String) ((GenericRecord) entry.getSecond())
+          .get(HoodieMetadataPayload.KEY_FIELD_NAME);
+      assertFalse(keyInPayload.isEmpty());
+    });
+  }
 }
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieClientOnCopyOnWriteStorage.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieClientOnCopyOnWriteStorage.java
index 4466d4672bd3d..6ab33b422d0c1 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieClientOnCopyOnWriteStorage.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieClientOnCopyOnWriteStorage.java
@@ -21,7 +21,7 @@
 import org.apache.hudi.avro.model.HoodieCleanMetadata;
 import org.apache.hudi.avro.model.HoodieClusteringPlan;
 import org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata;
-import org.apache.hudi.client.AbstractHoodieWriteClient;
+import org.apache.hudi.client.BaseHoodieWriteClient;
 import org.apache.hudi.client.HoodieWriteResult;
 import org.apache.hudi.client.SparkRDDWriteClient;
 import org.apache.hudi.client.SparkTaskContextSupplier;
@@ -36,6 +36,7 @@
 import org.apache.hudi.common.fs.ConsistencyGuardConfig;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.FileSlice;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieBaseFile;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
@@ -438,15 +439,15 @@ private void testDeduplication(
     String recordKey = UUID.randomUUID().toString();
     HoodieKey keyOne = new HoodieKey(recordKey, "2018-01-01");
     HoodieRecord<RawTripTestPayload> recordOne =
-        new HoodieRecord(keyOne, dataGen.generateRandomValue(keyOne, newCommitTime));
+        new HoodieAvroRecord(keyOne, dataGen.generateRandomValue(keyOne, newCommitTime));
 
     HoodieKey keyTwo = new HoodieKey(recordKey, "2018-02-01");
     HoodieRecord recordTwo =
-        new HoodieRecord(keyTwo, dataGen.generateRandomValue(keyTwo, newCommitTime));
+        new HoodieAvroRecord(keyTwo, dataGen.generateRandomValue(keyTwo, newCommitTime));
 
     // Same key and partition as keyTwo
     HoodieRecord recordThree =
-        new HoodieRecord(keyTwo, dataGen.generateRandomValue(keyTwo, newCommitTime));
+        new HoodieAvroRecord(keyTwo, dataGen.generateRandomValue(keyTwo, newCommitTime));
 
     JavaRDD<HoodieRecord<RawTripTestPayload>> records =
         jsc.parallelize(Arrays.asList(recordOne, recordTwo, recordThree), 1);
@@ -575,6 +576,9 @@ private void testUpsertsInternal(HoodieWriteConfig config,
     HoodieWriteConfig newConfig = getConfigBuilder().withProps(config.getProps()).withTimelineLayoutVersion(
         TimelineLayoutVersion.CURR_VERSION).build();
     client = getHoodieWriteClient(newConfig);
+
+    client.savepoint("004", "user1","comment1");
+
     client.restoreToInstant("004");
 
     assertFalse(metaClient.reloadActiveTimeline().getRollbackTimeline().lastInstant().isPresent());
@@ -687,7 +691,7 @@ public void testInsertsPreppedWithHoodieConcatHandle(boolean populateMetaFields)
   }
 
   /**
-   * Test one of HoodieConcatHandle w/ {@link AbstractHoodieWriteClient#insert(Object, String)} API.
+   * Test one of HoodieConcatHandle w/ {@link BaseHoodieWriteClient#insert(Object, String)} API.
    *
    * @param config Write Config
    * @throws Exception in case of error
@@ -973,8 +977,8 @@ private void testUpsertsUpdatePartitionPath(IndexType indexType, HoodieWriteConf
         throw new IllegalStateException("Unknown partition path " + rec.getPartitionPath());
       }
       recordsToUpsert.add(
-          new HoodieRecord(new HoodieKey(rec.getRecordKey(), newPartitionPath),
-              rec.getData()));
+          new HoodieAvroRecord(new HoodieKey(rec.getRecordKey(), newPartitionPath),
+              (HoodieRecordPayload) rec.getData()));
       // populate expected partition path and record keys
       expectedPartitionPathRecKeyPairs.add(Pair.of(newPartitionPath, rec.getRecordKey()));
     }
@@ -1375,6 +1379,40 @@ public void testSimpleClustering(boolean populateMetaFields, boolean preserveCom
     testInsertAndClustering(clusteringConfig, populateMetaFields, true, false, SqlQueryEqualityPreCommitValidator.class.getName(), COUNT_SQL_QUERY_FOR_VALIDATION, "");
   }
 
+  @ParameterizedTest
+  @ValueSource(booleans = {true, false})
+  public void testInlineScheduleClustering(boolean scheduleInlineClustering) throws IOException {
+    testInsertTwoBatches(true);
+
+    // setup clustering config.
+    HoodieClusteringConfig clusteringConfig = HoodieClusteringConfig.newBuilder().withClusteringMaxNumGroups(10)
+        .withClusteringTargetPartitions(0).withInlineClusteringNumCommits(1).withInlineClustering(false).withScheduleInlineClustering(scheduleInlineClustering)
+        .withPreserveHoodieCommitMetadata(true).build();
+
+    HoodieWriteConfig config = getConfigBuilder(HoodieFailedWritesCleaningPolicy.LAZY).withAutoCommit(false)
+        .withClusteringConfig(clusteringConfig)
+        .withProps(getPropertiesForKeyGen()).build();
+    SparkRDDWriteClient client = getHoodieWriteClient(config);
+    dataGen = new HoodieTestDataGenerator(new String[] {"2015/03/16"});
+    String commitTime1 = HoodieActiveTimeline.createNewInstantTime();
+    List<HoodieRecord> records1 = dataGen.generateInserts(commitTime1, 200);
+    client.startCommitWithTime(commitTime1);
+    JavaRDD<HoodieRecord> insertRecordsRDD1 = jsc.parallelize(records1, 2);
+    JavaRDD<WriteStatus> statuses = client.upsert(insertRecordsRDD1, commitTime1);
+    List<WriteStatus> statusList = statuses.collect();
+    assertNoWriteErrors(statusList);
+    client.commit(commitTime1, statuses);
+
+    HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(basePath).build();
+    List<Pair<HoodieInstant, HoodieClusteringPlan>> pendingClusteringPlans =
+        ClusteringUtils.getAllPendingClusteringPlans(metaClient).collect(Collectors.toList());
+    if (scheduleInlineClustering) {
+      assertEquals(1, pendingClusteringPlans.size());
+    } else {
+      assertEquals(0, pendingClusteringPlans.size());
+    }
+  }
+
   @ParameterizedTest
   @MethodSource("populateMetaFieldsAndPreserveMetadataParams")
   public void testClusteringWithSortColumns(boolean populateMetaFields, boolean preserveCommitMetadata) throws Exception {
@@ -1528,7 +1566,6 @@ private List<HoodieRecord> testInsertAndClustering(HoodieClusteringConfig cluste
     Pair<Pair<List<HoodieRecord>, List<String>>, Set<HoodieFileGroupId>> allRecords = testInsertTwoBatches(populateMetaFields);
     testClustering(clusteringConfig, populateMetaFields, completeClustering, assertSameFileIds, validatorClasses, sqlQueryForEqualityValidation, sqlQueryForSingleResultValidation, allRecords);
     return allRecords.getLeft().getLeft();
-
   }
 
   /**
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieIndex.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieIndex.java
index 6cd25f3992259..2ff67c3c9156d 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieIndex.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieIndex.java
@@ -19,9 +19,9 @@
 package org.apache.hudi.client.functional;
 
 import org.apache.hudi.client.WriteStatus;
-import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.fs.ConsistencyGuardConfig;
 import org.apache.hudi.common.model.EmptyHoodieRecordPayload;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieTableType;
@@ -104,10 +104,10 @@ private static Stream<Arguments> indexTypeParams() {
   private HoodieWriteConfig config;
 
   private void setUp(IndexType indexType, boolean populateMetaFields) throws Exception {
-    setUp(indexType, populateMetaFields, true, true);
+    setUp(indexType, populateMetaFields, true);
   }
 
-  private void setUp(IndexType indexType, boolean populateMetaFields, boolean enableMetadata, boolean rollbackUsingMarkers) throws Exception {
+  private void setUp(IndexType indexType, boolean populateMetaFields, boolean rollbackUsingMarkers) throws Exception {
     this.indexType = indexType;
     initPath();
     initSparkContexts();
@@ -121,8 +121,8 @@ private void setUp(IndexType indexType, boolean populateMetaFields, boolean enab
     config = getConfigBuilder()
         .withProperties(populateMetaFields ? new Properties() : getPropertiesForKeyGen())
         .withRollbackUsingMarkers(rollbackUsingMarkers)
-        .withIndexConfig(indexBuilder
-            .build()).withAutoCommit(false).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(enableMetadata).build())
+        .withIndexConfig(indexBuilder.build())
+        .withAutoCommit(false)
         .withLayoutConfig(HoodieLayoutConfig.newBuilder().fromProperties(indexBuilder.build().getProps())
             .withLayoutPartitioner(SparkBucketIndexPartitioner.class.getName()).build()).build();
     writeClient = getHoodieWriteClient(config);
@@ -237,7 +237,7 @@ public void testTagLocationAndDuplicateUpdate(IndexType indexType, boolean popul
   @ParameterizedTest
   @MethodSource("indexTypeParams")
   public void testSimpleTagLocationAndUpdateWithRollback(IndexType indexType, boolean populateMetaFields) throws Exception {
-    setUp(indexType, populateMetaFields, true, false);
+    setUp(indexType, populateMetaFields, false);
     String newCommitTime = writeClient.startCommit();
     int totalRecords = 20 + random.nextInt(20);
     List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, totalRecords);
@@ -309,16 +309,16 @@ public void testTagLocationAndFetchRecordLocations(IndexType indexType, boolean
     String recordStr4 = "{\"_row_key\":\"" + rowKey1 + "\",\"time\":\"2015-01-31T03:16:41.415Z\",\"number\":32}";
     RawTripTestPayload rowChange1 = new RawTripTestPayload(recordStr1);
     HoodieRecord record1 =
-        new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
+        new HoodieAvroRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
     RawTripTestPayload rowChange2 = new RawTripTestPayload(recordStr2);
     HoodieRecord record2 =
-        new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
+        new HoodieAvroRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
     RawTripTestPayload rowChange3 = new RawTripTestPayload(recordStr3);
     HoodieRecord record3 =
-        new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
+        new HoodieAvroRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
     RawTripTestPayload rowChange4 = new RawTripTestPayload(recordStr4);
     HoodieRecord record4 =
-        new HoodieRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
+        new HoodieAvroRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
     JavaRDD<HoodieRecord> recordRDD = jsc.parallelize(Arrays.asList(record1, record2, record3, record4));
 
     HoodieTable hoodieTable = HoodieSparkTable.create(config, context, metaClient);
@@ -384,8 +384,6 @@ public void testSimpleGlobalIndexTagLocationWhenShouldUpdatePartitionPath() thro
             .withGlobalSimpleIndexUpdatePartitionPath(true)
             .withBloomIndexUpdatePartitionPath(true)
             .build())
-        .withMetadataConfig(
-            HoodieMetadataConfig.newBuilder().enable(true).build())
         .build();
     writeClient = getHoodieWriteClient(config);
     index = writeClient.getIndex();
@@ -405,7 +403,7 @@ public void testSimpleGlobalIndexTagLocationWhenShouldUpdatePartitionPath() thro
     RawTripTestPayload originalPayload =
         new RawTripTestPayload("{\"_row_key\":\"000\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
     HoodieRecord originalRecord =
-        new HoodieRecord(new HoodieKey(originalPayload.getRowKey(), originalPayload.getPartitionPath()),
+        new HoodieAvroRecord(new HoodieKey(originalPayload.getRowKey(), originalPayload.getPartitionPath()),
             originalPayload);
 
     /*
@@ -418,7 +416,7 @@ public void testSimpleGlobalIndexTagLocationWhenShouldUpdatePartitionPath() thro
     RawTripTestPayload incomingPayload =
         new RawTripTestPayload("{\"_row_key\":\"000\",\"time\":\"2016-02-28T03:16:41.415Z\",\"number\":12}");
     HoodieRecord incomingRecord =
-        new HoodieRecord(new HoodieKey(incomingPayload.getRowKey(), incomingPayload.getPartitionPath()),
+        new HoodieAvroRecord(new HoodieKey(incomingPayload.getRowKey(), incomingPayload.getPartitionPath()),
             incomingPayload);
     /*
     This record has the same record key as originalRecord and the same partition
@@ -428,7 +426,7 @@ public void testSimpleGlobalIndexTagLocationWhenShouldUpdatePartitionPath() thro
     RawTripTestPayload incomingPayloadSamePartition =
         new RawTripTestPayload("{\"_row_key\":\"000\",\"time\":\"2016-01-31T04:16:41.415Z\",\"number\":15}");
     HoodieRecord incomingRecordSamePartition =
-        new HoodieRecord(
+        new HoodieAvroRecord(
             new HoodieKey(incomingPayloadSamePartition.getRowKey(), incomingPayloadSamePartition.getPartitionPath()),
             incomingPayloadSamePartition);
 
@@ -487,7 +485,7 @@ private HoodieWriteConfig.Builder getConfigBuilder() {
 
   private JavaPairRDD<HoodieKey, Option<Pair<String, String>>> getRecordLocations(JavaRDD<HoodieKey> keyRDD, HoodieTable hoodieTable) {
     JavaRDD<HoodieRecord> recordRDD = tagLocation(
-        index, keyRDD.map(k -> new HoodieRecord(k, new EmptyHoodieRecordPayload())), hoodieTable);
+        index, keyRDD.map(k -> new HoodieAvroRecord(k, new EmptyHoodieRecordPayload())), hoodieTable);
     return recordRDD.mapToPair(hr -> new Tuple2<>(hr.getKey(), hr.isCurrentLocationKnown()
         ? Option.of(Pair.of(hr.getPartitionPath(), hr.getCurrentLocation().getFileId()))
         : Option.empty())
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieMetadataBase.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieMetadataBase.java
index 56c9f016bcc6e..3141e1051ce5f 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieMetadataBase.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieMetadataBase.java
@@ -18,32 +18,42 @@
 
 package org.apache.hudi.client.functional;
 
+import org.apache.hadoop.fs.Path;
+import org.apache.hudi.avro.model.HoodieMetadataRecord;
 import org.apache.hudi.common.config.HoodieMetadataConfig;
+import org.apache.hudi.common.fs.ConsistencyGuardConfig;
+import org.apache.hudi.common.model.HoodieCleaningPolicy;
 import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
 import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.model.WriteConcurrencyMode;
 import org.apache.hudi.common.model.WriteOperationType;
 import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.marker.MarkerType;
+import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
 import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
 import org.apache.hudi.common.testutils.HoodieMetadataTestTable;
 import org.apache.hudi.common.testutils.HoodieTestTable;
+import org.apache.hudi.common.util.Option;
 import org.apache.hudi.config.HoodieCompactionConfig;
 import org.apache.hudi.config.HoodieIndexConfig;
 import org.apache.hudi.config.HoodieStorageConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.config.metrics.HoodieMetricsConfig;
 import org.apache.hudi.config.metrics.HoodieMetricsGraphiteConfig;
+import org.apache.hudi.config.metrics.HoodieMetricsJmxConfig;
+import org.apache.hudi.exception.HoodieMetadataException;
 import org.apache.hudi.index.HoodieIndex;
 import org.apache.hudi.keygen.SimpleKeyGenerator;
+import org.apache.hudi.metadata.HoodieMetadataPayload;
 import org.apache.hudi.metadata.HoodieTableMetadata;
+import org.apache.hudi.metadata.HoodieTableMetadataKeyGenerator;
 import org.apache.hudi.metadata.HoodieTableMetadataWriter;
 import org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter;
 import org.apache.hudi.table.HoodieSparkTable;
 import org.apache.hudi.table.HoodieTable;
-import org.apache.hudi.table.HoodieTimelineArchiveLog;
+import org.apache.hudi.client.HoodieTimelineArchiver;
 import org.apache.hudi.testutils.HoodieClientTestHarness;
-
-import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.junit.jupiter.api.AfterEach;
@@ -59,6 +69,7 @@
 import static org.apache.hudi.common.model.WriteOperationType.INSERT;
 import static org.apache.hudi.common.model.WriteOperationType.UPSERT;
 import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA;
+import static org.apache.hudi.metadata.HoodieTableMetadata.METADATA_TABLE_NAME_SUFFIX;
 
 public class TestHoodieMetadataBase extends HoodieClientTestHarness {
 
@@ -74,12 +85,22 @@ public void init(HoodieTableType tableType) throws IOException {
     init(tableType, true);
   }
 
+  public void init(HoodieTableType tableType, HoodieWriteConfig writeConfig) throws IOException {
+    init(tableType, Option.of(writeConfig), true, false, false, false);
+  }
+
   public void init(HoodieTableType tableType, boolean enableMetadataTable) throws IOException {
     init(tableType, enableMetadataTable, true, false, false);
   }
 
   public void init(HoodieTableType tableType, boolean enableMetadataTable, boolean enableFullScan, boolean enableMetrics, boolean
-                   validateMetadataPayloadStateConsistency) throws IOException {
+      validateMetadataPayloadStateConsistency) throws IOException {
+    init(tableType, Option.empty(), enableMetadataTable, enableFullScan, enableMetrics,
+        validateMetadataPayloadStateConsistency);
+  }
+
+  public void init(HoodieTableType tableType, Option<HoodieWriteConfig> writeConfig, boolean enableMetadataTable,
+                   boolean enableFullScan, boolean enableMetrics, boolean validateMetadataPayloadStateConsistency) throws IOException {
     this.tableType = tableType;
     initPath();
     initSparkContexts("TestHoodieMetadata");
@@ -89,9 +110,12 @@ public void init(HoodieTableType tableType, boolean enableMetadataTable, boolean
     initMetaClient(tableType);
     initTestDataGenerator();
     metadataTableBasePath = HoodieTableMetadata.getMetadataTableBasePath(basePath);
-    writeConfig = getWriteConfigBuilder(HoodieFailedWritesCleaningPolicy.EAGER, true, enableMetadataTable, enableMetrics,
-        enableFullScan, true, validateMetadataPayloadStateConsistency).build();
-    initWriteConfigAndMetatableWriter(writeConfig, enableMetadataTable);
+    this.writeConfig = writeConfig.isPresent()
+        ? writeConfig.get() : getWriteConfigBuilder(HoodieFailedWritesCleaningPolicy.EAGER, true,
+        enableMetadataTable, enableMetrics, enableFullScan, true,
+        validateMetadataPayloadStateConsistency)
+        .build();
+    initWriteConfigAndMetatableWriter(this.writeConfig, enableMetadataTable);
   }
 
   protected void initWriteConfigAndMetatableWriter(HoodieWriteConfig writeConfig, boolean enableMetadataTable) {
@@ -262,8 +286,8 @@ protected void doPreBootstrapRestore(HoodieTestTable testTable, String restoreTi
 
   protected void archiveDataTable(HoodieWriteConfig writeConfig, HoodieTableMetaClient metaClient) throws IOException {
     HoodieTable table = HoodieSparkTable.create(writeConfig, context, metaClient);
-    HoodieTimelineArchiveLog archiveLog = new HoodieTimelineArchiveLog(writeConfig, table);
-    archiveLog.archiveIfRequired(context);
+    HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(writeConfig, table);
+    archiver.archiveIfRequired(context);
   }
 
   protected void validateMetadata(HoodieTestTable testTable) throws IOException {
@@ -327,4 +351,91 @@ protected HoodieWriteConfig.Builder getWriteConfigBuilder(HoodieFailedWritesClea
         .withProperties(properties);
   }
 
+  /**
+   * Fetching WriteConfig for metadata table from Data table's writeConfig is not trivial and
+   * the method is not public in source code. so, for now, using this method which mimics source code.
+   */
+  protected HoodieWriteConfig getMetadataWriteConfig(HoodieWriteConfig writeConfig) {
+    int parallelism = writeConfig.getMetadataInsertParallelism();
+
+    int minCommitsToKeep = Math.max(writeConfig.getMetadataMinCommitsToKeep(), writeConfig.getMinCommitsToKeep());
+    int maxCommitsToKeep = Math.max(writeConfig.getMetadataMaxCommitsToKeep(), writeConfig.getMaxCommitsToKeep());
+
+    // Create the write config for the metadata table by borrowing options from the main write config.
+    HoodieWriteConfig.Builder builder = HoodieWriteConfig.newBuilder()
+        .withTimelineLayoutVersion(TimelineLayoutVersion.CURR_VERSION)
+        .withConsistencyGuardConfig(ConsistencyGuardConfig.newBuilder()
+            .withConsistencyCheckEnabled(writeConfig.getConsistencyGuardConfig().isConsistencyCheckEnabled())
+            .withInitialConsistencyCheckIntervalMs(writeConfig.getConsistencyGuardConfig().getInitialConsistencyCheckIntervalMs())
+            .withMaxConsistencyCheckIntervalMs(writeConfig.getConsistencyGuardConfig().getMaxConsistencyCheckIntervalMs())
+            .withMaxConsistencyChecks(writeConfig.getConsistencyGuardConfig().getMaxConsistencyChecks())
+            .build())
+        .withWriteConcurrencyMode(WriteConcurrencyMode.SINGLE_WRITER)
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(false).withFileListingParallelism(writeConfig.getFileListingParallelism()).build())
+        .withAutoCommit(true)
+        .withAvroSchemaValidate(true)
+        .withEmbeddedTimelineServerEnabled(false)
+        .withMarkersType(MarkerType.DIRECT.name())
+        .withRollbackUsingMarkers(false)
+        .withPath(HoodieTableMetadata.getMetadataTableBasePath(writeConfig.getBasePath()))
+        .withSchema(HoodieMetadataRecord.getClassSchema().toString())
+        .forTable(writeConfig.getTableName() + METADATA_TABLE_NAME_SUFFIX)
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder()
+            .withAsyncClean(writeConfig.isMetadataAsyncClean())
+            // we will trigger cleaning manually, to control the instant times
+            .withAutoClean(false)
+            .withCleanerParallelism(parallelism)
+            .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS)
+            .withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.LAZY)
+            .retainCommits(writeConfig.getMetadataCleanerCommitsRetained())
+            .archiveCommitsWith(minCommitsToKeep, maxCommitsToKeep)
+            // we will trigger compaction manually, to control the instant times
+            .withInlineCompaction(false)
+            .withMaxNumDeltaCommitsBeforeCompaction(writeConfig.getMetadataCompactDeltaCommitMax()).build())
+        .withParallelism(parallelism, parallelism)
+        .withDeleteParallelism(parallelism)
+        .withRollbackParallelism(parallelism)
+        .withFinalizeWriteParallelism(parallelism)
+        .withAllowMultiWriteOnSameInstant(true)
+        .withKeyGenerator(HoodieTableMetadataKeyGenerator.class.getCanonicalName())
+        .withPopulateMetaFields(writeConfig.getMetadataConfig().populateMetaFields());
+
+    // RecordKey properties are needed for the metadata table records
+    final Properties properties = new Properties();
+    properties.put(HoodieTableConfig.RECORDKEY_FIELDS.key(), HoodieMetadataPayload.KEY_FIELD_NAME);
+    properties.put("hoodie.datasource.write.recordkey.field", HoodieMetadataPayload.KEY_FIELD_NAME);
+    builder.withProperties(properties);
+
+    if (writeConfig.isMetricsOn()) {
+      builder.withMetricsConfig(HoodieMetricsConfig.newBuilder()
+          .withReporterType(writeConfig.getMetricsReporterType().toString())
+          .withExecutorMetrics(writeConfig.isExecutorMetricsEnabled())
+          .on(true).build());
+      switch (writeConfig.getMetricsReporterType()) {
+        case GRAPHITE:
+          builder.withMetricsGraphiteConfig(HoodieMetricsGraphiteConfig.newBuilder()
+              .onGraphitePort(writeConfig.getGraphiteServerPort())
+              .toGraphiteHost(writeConfig.getGraphiteServerHost())
+              .usePrefix(writeConfig.getGraphiteMetricPrefix()).build());
+          break;
+        case JMX:
+          builder.withMetricsJmxConfig(HoodieMetricsJmxConfig.newBuilder()
+              .onJmxPort(writeConfig.getJmxPort())
+              .toJmxHost(writeConfig.getJmxHost())
+              .build());
+          break;
+        case DATADOG:
+        case PROMETHEUS:
+        case PROMETHEUS_PUSHGATEWAY:
+        case CONSOLE:
+        case INMEMORY:
+        case CLOUDWATCH:
+          break;
+        default:
+          throw new HoodieMetadataException("Unsupported Metrics Reporter type " + writeConfig.getMetricsReporterType());
+      }
+    }
+    return builder.build();
+  }
+
 }
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieMetadataBootstrap.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieMetadataBootstrap.java
index 057968f6f7ca5..bdbc9e72d3f4a 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieMetadataBootstrap.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHoodieMetadataBootstrap.java
@@ -278,7 +278,7 @@ private HoodieWriteConfig getWriteConfig(int minArchivalCommits, int maxArchival
         .withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(minArchivalCommits, maxArchivalCommits).build())
         .withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder()
             .withRemoteServerPort(timelineServicePort).build())
-        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(false).build())
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true).build())
         .forTable("test-trip-table").build();
   }
 
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/execution/TestSparkBoundedInMemoryExecutor.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/execution/TestBoundedInMemoryExecutorInSpark.java
similarity index 82%
rename from hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/execution/TestSparkBoundedInMemoryExecutor.java
rename to hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/execution/TestBoundedInMemoryExecutorInSpark.java
index ecb18c6bc2828..91f9cbc96e6ed 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/execution/TestSparkBoundedInMemoryExecutor.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/execution/TestBoundedInMemoryExecutorInSpark.java
@@ -22,12 +22,15 @@
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.queue.BoundedInMemoryExecutor;
 import org.apache.hudi.common.util.queue.BoundedInMemoryQueueConsumer;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.testutils.HoodieClientTestHarness;
 
 import org.apache.avro.generic.IndexedRecord;
+import org.apache.spark.TaskContext;
+import org.apache.spark.TaskContext$;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
@@ -44,7 +47,7 @@
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
 
-public class TestSparkBoundedInMemoryExecutor extends HoodieClientTestHarness {
+public class TestBoundedInMemoryExecutorInSpark extends HoodieClientTestHarness {
 
   private final String instantTime = HoodieActiveTimeline.createNewInstantTime();
 
@@ -58,6 +61,11 @@ public void tearDown() throws Exception {
     cleanupResources();
   }
 
+  private Runnable getPreExecuteRunnable() {
+    final TaskContext taskContext = TaskContext.get();
+    return () -> TaskContext$.MODULE$.setTaskContext(taskContext);
+  }
+
   @Test
   public void testExecutor() {
 
@@ -85,10 +93,10 @@ protected Integer getResult() {
           }
         };
 
-    SparkBoundedInMemoryExecutor<HoodieRecord, Tuple2<HoodieRecord, Option<IndexedRecord>>, Integer> executor = null;
+    BoundedInMemoryExecutor<HoodieRecord, Tuple2<HoodieRecord, Option<IndexedRecord>>, Integer> executor = null;
     try {
-      executor = new SparkBoundedInMemoryExecutor(hoodieWriteConfig, hoodieRecords.iterator(), consumer,
-          getTransformFunction(HoodieTestDataGenerator.AVRO_SCHEMA));
+      executor = new BoundedInMemoryExecutor(hoodieWriteConfig.getWriteBufferLimitBytes(), hoodieRecords.iterator(), consumer,
+          getTransformFunction(HoodieTestDataGenerator.AVRO_SCHEMA), getPreExecuteRunnable());
       int result = executor.execute();
       // It should buffer and write 100 records
       assertEquals(100, result);
@@ -131,11 +139,11 @@ protected Integer getResult() {
           }
         };
 
-    SparkBoundedInMemoryExecutor<HoodieRecord, Tuple2<HoodieRecord, Option<IndexedRecord>>, Integer> executor = null;
+    BoundedInMemoryExecutor<HoodieRecord, Tuple2<HoodieRecord, Option<IndexedRecord>>, Integer> executor = null;
     try {
-      executor = new SparkBoundedInMemoryExecutor(hoodieWriteConfig, hoodieRecords.iterator(), consumer,
-          getTransformFunction(HoodieTestDataGenerator.AVRO_SCHEMA));
-      SparkBoundedInMemoryExecutor<HoodieRecord, Tuple2<HoodieRecord, Option<IndexedRecord>>, Integer> finalExecutor = executor;
+      executor = new BoundedInMemoryExecutor(hoodieWriteConfig.getWriteBufferLimitBytes(), hoodieRecords.iterator(), consumer,
+          getTransformFunction(HoodieTestDataGenerator.AVRO_SCHEMA), getPreExecuteRunnable());
+      BoundedInMemoryExecutor<HoodieRecord, Tuple2<HoodieRecord, Option<IndexedRecord>>, Integer> finalExecutor = executor;
 
       Thread.currentThread().interrupt();
 
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/execution/TestBoundedInMemoryQueue.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/execution/TestBoundedInMemoryQueue.java
index c30635bb12f9e..4707a68072e9a 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/execution/TestBoundedInMemoryQueue.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/execution/TestBoundedInMemoryQueue.java
@@ -18,6 +18,7 @@
 
 package org.apache.hudi.execution;
 
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
@@ -82,7 +83,7 @@ public void tearDown() throws Exception {
   public void testRecordReading() throws Exception {
     final int numRecords = 128;
     final List<HoodieRecord> hoodieRecords = dataGen.generateInserts(instantTime, numRecords);
-    final BoundedInMemoryQueue<HoodieRecord, HoodieLazyInsertIterable.HoodieInsertValueGenResult<HoodieRecord>> queue =
+    final BoundedInMemoryQueue<HoodieRecord, HoodieLazyInsertIterable.HoodieInsertValueGenResult> queue =
         new BoundedInMemoryQueue(FileIOUtils.KB, getTransformFunction(HoodieTestDataGenerator.AVRO_SCHEMA));
     // Produce
     Future<Boolean> resFuture = executorService.submit(() -> {
@@ -93,7 +94,7 @@ public void testRecordReading() throws Exception {
     final Iterator<HoodieRecord> originalRecordIterator = hoodieRecords.iterator();
     int recordsRead = 0;
     while (queue.iterator().hasNext()) {
-      final HoodieRecord originalRecord = originalRecordIterator.next();
+      final HoodieAvroRecord originalRecord = (HoodieAvroRecord) originalRecordIterator.next();
       final Option<IndexedRecord> originalInsertValue =
           originalRecord.getData().getInsertValue(HoodieTestDataGenerator.AVRO_SCHEMA);
       final HoodieLazyInsertIterable.HoodieInsertValueGenResult<HoodieRecord> payload = queue.iterator().next();
@@ -101,7 +102,7 @@ public void testRecordReading() throws Exception {
       assertEquals(originalRecord, payload.record);
       // cached insert value matches the expected insert value.
       assertEquals(originalInsertValue,
-          payload.record.getData().getInsertValue(HoodieTestDataGenerator.AVRO_SCHEMA));
+          ((HoodieAvroRecord) payload.record).getData().getInsertValue(HoodieTestDataGenerator.AVRO_SCHEMA));
       recordsRead++;
     }
     assertFalse(queue.iterator().hasNext() || originalRecordIterator.hasNext());
@@ -122,7 +123,7 @@ public void testCompositeProducerRecordReading() throws Exception {
     final int numProducers = 40;
     final List<List<HoodieRecord>> recs = new ArrayList<>();
 
-    final BoundedInMemoryQueue<HoodieRecord, HoodieLazyInsertIterable.HoodieInsertValueGenResult<HoodieRecord>> queue =
+    final BoundedInMemoryQueue<HoodieRecord, HoodieLazyInsertIterable.HoodieInsertValueGenResult> queue =
         new BoundedInMemoryQueue(FileIOUtils.KB, getTransformFunction(HoodieTestDataGenerator.AVRO_SCHEMA));
 
     // Record Key to <Producer Index, Rec Index within a producer>
@@ -188,7 +189,7 @@ public void testCompositeProducerRecordReading() throws Exception {
 
     // Read recs and ensure we have covered all producer recs.
     while (queue.iterator().hasNext()) {
-      final HoodieLazyInsertIterable.HoodieInsertValueGenResult<HoodieRecord> payload = queue.iterator().next();
+      final HoodieLazyInsertIterable.HoodieInsertValueGenResult payload = queue.iterator().next();
       final HoodieRecord rec = payload.record;
       Tuple2<Integer, Integer> producerPos = keyToProducerAndIndexMap.get(rec.getRecordKey());
       Integer lastSeenPos = lastSeenMap.get(producerPos._1());
@@ -216,12 +217,12 @@ public void testMemoryLimitForBuffering() throws Exception {
     final List<HoodieRecord> hoodieRecords = dataGen.generateInserts(instantTime, numRecords);
     // maximum number of records to keep in memory.
     final int recordLimit = 5;
-    final SizeEstimator<HoodieLazyInsertIterable.HoodieInsertValueGenResult<HoodieRecord>> sizeEstimator = new DefaultSizeEstimator<>();
-    HoodieLazyInsertIterable.HoodieInsertValueGenResult<HoodieRecord> payload =
-        getTransformFunction(HoodieTestDataGenerator.AVRO_SCHEMA).apply(hoodieRecords.get(0));
+    final SizeEstimator<HoodieLazyInsertIterable.HoodieInsertValueGenResult> sizeEstimator = new DefaultSizeEstimator<>();
+    HoodieLazyInsertIterable.HoodieInsertValueGenResult payload =
+        getTransformFunction(HoodieTestDataGenerator.AVRO_SCHEMA).apply((HoodieAvroRecord) hoodieRecords.get(0));
     final long objSize = sizeEstimator.sizeEstimate(payload);
     final long memoryLimitInBytes = recordLimit * objSize;
-    final BoundedInMemoryQueue<HoodieRecord, HoodieLazyInsertIterable.HoodieInsertValueGenResult<HoodieRecord>> queue =
+    final BoundedInMemoryQueue<HoodieRecord, HoodieLazyInsertIterable.HoodieInsertValueGenResult> queue =
         new BoundedInMemoryQueue(memoryLimitInBytes, getTransformFunction(HoodieTestDataGenerator.AVRO_SCHEMA));
 
     // Produce
@@ -266,8 +267,8 @@ public void testException() throws Exception {
     final List<HoodieRecord> hoodieRecords = dataGen.generateInserts(instantTime, numRecords);
     final SizeEstimator<Tuple2<HoodieRecord, Option<IndexedRecord>>> sizeEstimator = new DefaultSizeEstimator<>();
     // queue memory limit
-    HoodieLazyInsertIterable.HoodieInsertValueGenResult<HoodieRecord> payload =
-        getTransformFunction(HoodieTestDataGenerator.AVRO_SCHEMA).apply(hoodieRecords.get(0));
+    HoodieLazyInsertIterable.HoodieInsertValueGenResult payload =
+        getTransformFunction(HoodieTestDataGenerator.AVRO_SCHEMA).apply((HoodieAvroRecord) hoodieRecords.get(0));
     final long objSize = sizeEstimator.sizeEstimate(new Tuple2<>(payload.record, payload.insertValue));
     final long memoryLimitInBytes = 4 * objSize;
 
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/functional/SparkClientFunctionalTestSuite.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/functional/SparkClientFunctionalTestSuite.java
index ee7427866feb8..5b20a51f5a2ed 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/functional/SparkClientFunctionalTestSuite.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/functional/SparkClientFunctionalTestSuite.java
@@ -25,7 +25,10 @@
 import org.junit.runner.RunWith;
 
 @RunWith(JUnitPlatform.class)
-@SelectPackages({"org.apache.hudi.client.functional", "org.apache.hudi.table.functional"})
+@SelectPackages({
+    "org.apache.hudi.client.functional",
+    "org.apache.hudi.table.functional",
+    "org.apache.hudi.index.hbase"})
 @IncludeTags("functional")
 public class SparkClientFunctionalTestSuite {
 
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/TestHoodieIndexConfigs.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/TestHoodieIndexConfigs.java
index 665e3a6a8e4a9..171403eb03847 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/TestHoodieIndexConfigs.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/TestHoodieIndexConfigs.java
@@ -19,16 +19,10 @@
 
 package org.apache.hudi.index;
 
-import org.apache.hudi.client.WriteStatus;
-import org.apache.hudi.common.engine.HoodieEngineContext;
-import org.apache.hudi.common.model.HoodieKey;
-import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.config.HoodieHBaseIndexConfig;
 import org.apache.hudi.config.HoodieIndexConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieException;
-import org.apache.hudi.exception.HoodieIndexException;
 import org.apache.hudi.index.HoodieIndex.IndexType;
 import org.apache.hudi.index.bloom.HoodieBloomIndex;
 import org.apache.hudi.index.bloom.HoodieGlobalBloomIndex;
@@ -36,9 +30,7 @@
 import org.apache.hudi.index.hbase.SparkHoodieHBaseIndex;
 import org.apache.hudi.index.inmemory.HoodieInMemoryHashIndex;
 import org.apache.hudi.index.simple.HoodieSimpleIndex;
-import org.apache.hudi.table.HoodieTable;
 
-import org.apache.spark.api.java.JavaRDD;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.io.TempDir;
@@ -62,7 +54,7 @@ public void setUp(@TempDir Path tempDir) {
 
   @ParameterizedTest
   @EnumSource(value = IndexType.class, names = {"BLOOM", "GLOBAL_BLOOM", "SIMPLE", "GLOBAL_SIMPLE", "HBASE", "BUCKET"})
-  public void testCreateIndex(IndexType indexType) throws Exception {
+  public void testCreateIndex(IndexType indexType) {
     HoodieWriteConfig config;
     HoodieWriteConfig.Builder clientConfigBuilder = HoodieWriteConfig.newBuilder();
     HoodieIndexConfig.Builder indexConfigBuilder = HoodieIndexConfig.newBuilder();
@@ -104,15 +96,6 @@ public void testCreateIndex(IndexType indexType) throws Exception {
     }
   }
 
-  @Test
-  public void testCreateDummyIndex() {
-    HoodieWriteConfig.Builder clientConfigBuilder = HoodieWriteConfig.newBuilder();
-    HoodieIndexConfig.Builder indexConfigBuilder = HoodieIndexConfig.newBuilder();
-    HoodieWriteConfig config = clientConfigBuilder.withPath(basePath)
-        .withIndexConfig(indexConfigBuilder.withIndexClass(DummyHoodieIndex.class.getName()).build()).build();
-    assertTrue(SparkHoodieIndexFactory.createIndex(config) instanceof DummyHoodieIndex);
-  }
-
   @Test
   public void testCreateIndexWithException() {
     HoodieWriteConfig.Builder clientConfigBuilder = HoodieWriteConfig.newBuilder();
@@ -132,47 +115,6 @@ public void testCreateIndexWithException() {
     assertTrue(thrown2.getMessage().contains("Unable to instantiate class"));
   }
 
-  public static class DummyHoodieIndex<T extends HoodieRecordPayload<T>> extends SparkHoodieIndex<T> {
-
-    public DummyHoodieIndex(HoodieWriteConfig config) {
-      super(config);
-    }
-
-    @Override
-    public JavaRDD<WriteStatus> updateLocation(JavaRDD<WriteStatus> writeStatusRDD,
-                                               HoodieEngineContext context,
-                                               HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> hoodieTable) throws HoodieIndexException {
-      return null;
-    }
-
-    @Override
-    public JavaRDD<HoodieRecord<T>> tagLocation(JavaRDD<HoodieRecord<T>> records,
-                                                HoodieEngineContext context,
-                                                HoodieTable<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> hoodieTable) throws HoodieIndexException {
-      return null;
-    }
-
-    @Override
-    public boolean rollbackCommit(String instantTime) {
-      return false;
-    }
-
-    @Override
-    public boolean isGlobal() {
-      return false;
-    }
-
-    @Override
-    public boolean canIndexLogFiles() {
-      return false;
-    }
-
-    @Override
-    public boolean isImplicitWithStorage() {
-      return false;
-    }
-  }
-
   public static class IndexWithConstructor {
 
     public IndexWithConstructor(HoodieWriteConfig config) {
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/bloom/TestHoodieBloomIndex.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/bloom/TestHoodieBloomIndex.java
index 1334adb20d052..e61d6057cd80f 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/bloom/TestHoodieBloomIndex.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/bloom/TestHoodieBloomIndex.java
@@ -18,9 +18,14 @@
 
 package org.apache.hudi.index.bloom;
 
+import org.apache.avro.Schema;
+import org.apache.hadoop.fs.Path;
+import org.apache.hudi.client.functional.TestHoodieMetadataBase;
 import org.apache.hudi.common.bloom.BloomFilter;
 import org.apache.hudi.common.bloom.BloomFilterFactory;
 import org.apache.hudi.common.bloom.BloomFilterTypeCode;
+import org.apache.hudi.common.config.HoodieMetadataConfig;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
@@ -32,14 +37,10 @@
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.data.HoodieJavaPairRDD;
 import org.apache.hudi.data.HoodieJavaRDD;
-import org.apache.hudi.io.HoodieKeyLookupHandle;
+import org.apache.hudi.index.HoodieIndexUtils;
 import org.apache.hudi.table.HoodieSparkTable;
 import org.apache.hudi.table.HoodieTable;
-import org.apache.hudi.testutils.HoodieClientTestHarness;
 import org.apache.hudi.testutils.HoodieSparkWriteableTestTable;
-
-import org.apache.avro.Schema;
-import org.apache.hadoop.fs.Path;
 import org.apache.spark.api.java.JavaPairRDD;
 import org.apache.spark.api.java.JavaRDD;
 import org.junit.jupiter.api.AfterEach;
@@ -48,6 +49,7 @@
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.Arguments;
 import org.junit.jupiter.params.provider.MethodSource;
+import scala.Tuple2;
 
 import java.nio.file.Paths;
 import java.util.Arrays;
@@ -59,8 +61,6 @@
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
-import scala.Tuple2;
-
 import static org.apache.hudi.common.testutils.SchemaTestUtil.getSchemaFromResource;
 import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
 import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -69,14 +69,14 @@
 import static org.junit.jupiter.api.Assertions.assertNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
-public class TestHoodieBloomIndex extends HoodieClientTestHarness {
+public class TestHoodieBloomIndex extends TestHoodieMetadataBase {
 
   private static final Schema SCHEMA = getSchemaFromResource(TestHoodieBloomIndex.class, "/exampleSchema.avsc", true);
   private static final String TEST_NAME_WITH_PARAMS = "[{index}] Test with rangePruning={0}, treeFiltering={1}, bucketizedChecking={2}";
 
   public static Stream<Arguments> configParams() {
     Object[][] data =
-        new Object[][] {{true, true, true}, {false, true, true}, {true, true, false}, {true, false, true}};
+        new Object[][]{{true, true, true}, {false, true, true}, {true, true, false}, {true, false, true}};
     return Stream.of(data).map(Arguments::of);
   }
 
@@ -99,6 +99,10 @@ private HoodieWriteConfig makeConfig(boolean rangePruning, boolean treeFiltering
         .withIndexConfig(HoodieIndexConfig.newBuilder().bloomIndexPruneByRanges(rangePruning)
             .bloomIndexTreebasedFilter(treeFiltering).bloomIndexBucketizedChecking(bucketizedChecking)
             .bloomIndexKeysPerBucket(2).build())
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder()
+            .withMetadataIndexBloomFilter(false)
+            .withMetadataIndexColumnStats(false)
+            .build())
         .build();
   }
 
@@ -119,22 +123,22 @@ public void testLoadInvolvedFiles(boolean rangePruning, boolean treeFiltering, b
     RawTripTestPayload rowChange1 =
         new RawTripTestPayload("{\"_row_key\":\"000\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
     HoodieRecord record1 =
-        new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
+        new HoodieAvroRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
     RawTripTestPayload rowChange2 =
         new RawTripTestPayload("{\"_row_key\":\"001\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
     HoodieRecord record2 =
-        new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
+        new HoodieAvroRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
     RawTripTestPayload rowChange3 =
         new RawTripTestPayload("{\"_row_key\":\"002\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
     HoodieRecord record3 =
-        new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
+        new HoodieAvroRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
     RawTripTestPayload rowChange4 =
         new RawTripTestPayload("{\"_row_key\":\"003\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
     HoodieRecord record4 =
-        new HoodieRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
+        new HoodieAvroRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
 
     List<String> partitions = Arrays.asList("2016/01/21", "2016/04/01", "2015/03/12");
-    List<ImmutablePair<String, BloomIndexFileInfo>> filesList = index.loadInvolvedFiles(partitions, context, hoodieTable);
+    List<Pair<String, BloomIndexFileInfo>> filesList = index.loadColumnRangesFromFiles(partitions, context, hoodieTable);
     // Still 0, as no valid commit
     assertEquals(0, filesList.size());
 
@@ -143,7 +147,7 @@ public void testLoadInvolvedFiles(boolean rangePruning, boolean treeFiltering, b
         .withInserts("2015/03/12", "3", record1)
         .withInserts("2015/03/12", "4", record2, record3, record4);
 
-    filesList = index.loadInvolvedFiles(partitions, context, hoodieTable);
+    filesList = index.loadColumnRangesFromFiles(partitions, context, hoodieTable);
     assertEquals(4, filesList.size());
 
     if (rangePruning) {
@@ -210,16 +214,16 @@ public void testCheckUUIDsAgainstOneFile() throws Exception {
         + "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":32}";
     RawTripTestPayload rowChange1 = new RawTripTestPayload(recordStr1);
     HoodieRecord record1 =
-        new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
+        new HoodieAvroRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
     RawTripTestPayload rowChange2 = new RawTripTestPayload(recordStr2);
     HoodieRecord record2 =
-        new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
+        new HoodieAvroRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
     RawTripTestPayload rowChange3 = new RawTripTestPayload(recordStr3);
     HoodieRecord record3 =
-        new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
+        new HoodieAvroRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
     RawTripTestPayload rowChange4 = new RawTripTestPayload(recordStr4);
     HoodieRecord record4 =
-        new HoodieRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
+        new HoodieAvroRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
 
     // We write record1, record2 to a parquet file, but the bloom filter contains (record1,
     // record2, record3).
@@ -241,9 +245,9 @@ public void testCheckUUIDsAgainstOneFile() throws Exception {
 
     HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).build();
     HoodieSparkTable table = HoodieSparkTable.create(config, context, metaClient);
-    HoodieKeyLookupHandle keyHandle = new HoodieKeyLookupHandle<>(config, table, Pair.of(partition, fileId));
-    List<String> results = keyHandle.checkCandidatesAgainstFile(hadoopConf, uuids,
-        new Path(Paths.get(basePath, partition, filename).toString()));
+    List<String> results = HoodieIndexUtils.filterKeysFromFile(
+        new Path(Paths.get(basePath, partition, filename).toString()), uuids, hadoopConf);
+
     assertEquals(results.size(), 2);
     assertTrue(results.get(0).equals("1eb5b87a-1feh-4edd-87b4-6ec96dc405a0")
         || results.get(1).equals("1eb5b87a-1feh-4edd-87b4-6ec96dc405a0"));
@@ -286,16 +290,16 @@ public void testTagLocation(boolean rangePruning, boolean treeFiltering, boolean
     String recordStr4 = "{\"_row_key\":\"" + rowKey1 + "\",\"time\":\"2015-01-31T03:16:41.415Z\",\"number\":32}";
     RawTripTestPayload rowChange1 = new RawTripTestPayload(recordStr1);
     HoodieRecord record1 =
-        new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
+        new HoodieAvroRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
     RawTripTestPayload rowChange2 = new RawTripTestPayload(recordStr2);
     HoodieRecord record2 =
-        new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
+        new HoodieAvroRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
     RawTripTestPayload rowChange3 = new RawTripTestPayload(recordStr3);
     HoodieRecord record3 =
-        new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
+        new HoodieAvroRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
     RawTripTestPayload rowChange4 = new RawTripTestPayload(recordStr4);
     HoodieRecord record4 =
-        new HoodieRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
+        new HoodieAvroRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
     JavaRDD<HoodieRecord> recordRDD = jsc.parallelize(Arrays.asList(record1, record2, record3, record4));
 
     // Also create the metadata and config
@@ -352,15 +356,15 @@ public void testCheckExists(boolean rangePruning, boolean treeFiltering, boolean
         + "\"time\":\"2015-01-31T03:16:41.415Z\",\"number\":32}";
     RawTripTestPayload rowChange1 = new RawTripTestPayload(recordStr1);
     HoodieKey key1 = new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath());
-    HoodieRecord record1 = new HoodieRecord(key1, rowChange1);
+    HoodieRecord record1 = new HoodieAvroRecord(key1, rowChange1);
     RawTripTestPayload rowChange2 = new RawTripTestPayload(recordStr2);
     HoodieKey key2 = new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath());
-    HoodieRecord record2 = new HoodieRecord(key2, rowChange2);
+    HoodieRecord record2 = new HoodieAvroRecord(key2, rowChange2);
     RawTripTestPayload rowChange3 = new RawTripTestPayload(recordStr3);
     HoodieKey key3 = new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath());
     RawTripTestPayload rowChange4 = new RawTripTestPayload(recordStr4);
     HoodieKey key4 = new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath());
-    HoodieRecord record4 = new HoodieRecord(key4, rowChange4);
+    HoodieRecord record4 = new HoodieAvroRecord(key4, rowChange4);
     JavaRDD<HoodieKey> keysRDD = jsc.parallelize(Arrays.asList(key1, key2, key3, key4));
 
     // Also create the metadata and config
@@ -371,7 +375,7 @@ public void testCheckExists(boolean rangePruning, boolean treeFiltering, boolean
     // Let's tag
     HoodieBloomIndex bloomIndex = new HoodieBloomIndex(config, SparkHoodieBloomIndexHelper.getInstance());
     JavaRDD<HoodieRecord> taggedRecords = tagLocation(
-        bloomIndex, keysRDD.map(k -> new HoodieRecord(k, null)), hoodieTable);
+        bloomIndex, keysRDD.map(k -> new HoodieAvroRecord(k, null)), hoodieTable);
     JavaPairRDD<HoodieKey, Option<Pair<String, String>>> recordLocationsRDD = taggedRecords
         .mapToPair(hr -> new Tuple2<>(hr.getKey(), hr.isCurrentLocationKnown()
             ? Option.of(Pair.of(hr.getPartitionPath(), hr.getCurrentLocation().getFileId()))
@@ -391,7 +395,7 @@ public void testCheckExists(boolean rangePruning, boolean treeFiltering, boolean
     // We do the tag again
     metaClient = HoodieTableMetaClient.reload(metaClient);
     hoodieTable = HoodieSparkTable.create(config, context, metaClient);
-    taggedRecords = tagLocation(bloomIndex, keysRDD.map(k -> new HoodieRecord(k, null)), hoodieTable);
+    taggedRecords = tagLocation(bloomIndex, keysRDD.map(k -> new HoodieAvroRecord(k, null)), hoodieTable);
     recordLocationsRDD = taggedRecords
         .mapToPair(hr -> new Tuple2<>(hr.getKey(), hr.isCurrentLocationKnown()
             ? Option.of(Pair.of(hr.getPartitionPath(), hr.getCurrentLocation().getFileId()))
@@ -428,10 +432,10 @@ public void testBloomFilterFalseError(boolean rangePruning, boolean treeFilterin
     // We write record1 to a parquet file, using a bloom filter having both records
     RawTripTestPayload rowChange1 = new RawTripTestPayload(recordStr1);
     HoodieRecord record1 =
-        new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
+        new HoodieAvroRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
     RawTripTestPayload rowChange2 = new RawTripTestPayload(recordStr2);
     HoodieRecord record2 =
-        new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
+        new HoodieAvroRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
 
     BloomFilter filter = BloomFilterFactory.createBloomFilter(10000, 0.0000001, -1,
         BloomFilterTypeCode.SIMPLE.name());
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/bloom/TestHoodieGlobalBloomIndex.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/bloom/TestHoodieGlobalBloomIndex.java
index fa7d586d2dc0a..9d25907b4bf9d 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/bloom/TestHoodieGlobalBloomIndex.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/bloom/TestHoodieGlobalBloomIndex.java
@@ -19,10 +19,10 @@
 package org.apache.hudi.index.bloom;
 
 import org.apache.hudi.common.model.EmptyHoodieRecordPayload;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.testutils.RawTripTestPayload;
-import org.apache.hudi.common.util.collection.ImmutablePair;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.config.HoodieIndexConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
@@ -92,24 +92,24 @@ public void testLoadInvolvedFiles() throws Exception {
     RawTripTestPayload rowChange1 =
         new RawTripTestPayload("{\"_row_key\":\"000\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
     HoodieRecord record1 =
-        new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
+        new HoodieAvroRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
     RawTripTestPayload rowChange2 =
         new RawTripTestPayload("{\"_row_key\":\"001\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
     HoodieRecord record2 =
-        new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
+        new HoodieAvroRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
     RawTripTestPayload rowChange3 =
         new RawTripTestPayload("{\"_row_key\":\"002\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
     HoodieRecord record3 =
-        new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
+        new HoodieAvroRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
     RawTripTestPayload rowChange4 =
         new RawTripTestPayload("{\"_row_key\":\"003\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
     HoodieRecord record4 =
-        new HoodieRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
+        new HoodieAvroRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
 
     // intentionally missed the partition "2015/03/12" to see if the GlobalBloomIndex can pick it up
     List<String> partitions = Arrays.asList("2016/01/21", "2016/04/01");
     // partitions will NOT be respected by this loadInvolvedFiles(...) call
-    List<Pair<String, BloomIndexFileInfo>> filesList = index.loadInvolvedFiles(partitions, context, hoodieTable);
+    List<Pair<String, BloomIndexFileInfo>> filesList = index.loadColumnRangesFromFiles(partitions, context, hoodieTable);
     // Still 0, as no valid commit
     assertEquals(0, filesList.size());
 
@@ -118,7 +118,7 @@ public void testLoadInvolvedFiles() throws Exception {
         .withInserts("2015/03/12", "3", record1)
         .withInserts("2015/03/12", "4", record2, record3, record4);
 
-    filesList = index.loadInvolvedFiles(partitions, context, hoodieTable);
+    filesList = index.loadColumnRangesFromFiles(partitions, context, hoodieTable);
     assertEquals(4, filesList.size());
 
     Map<String, BloomIndexFileInfo> filesMap = toFileMap(filesList);
@@ -158,7 +158,7 @@ public void testExplodeRecordRDDWithFileComparisons() {
         jsc.parallelize(Arrays.asList(new Tuple2<>("2017/10/21", "003"), new Tuple2<>("2017/10/22", "002"),
             new Tuple2<>("2017/10/22", "005"), new Tuple2<>("2017/10/23", "004"))).mapToPair(t -> t);
 
-    List<ImmutablePair<String, HoodieKey>> comparisonKeyList = HoodieJavaRDD.getJavaRDD(
+    List<Pair<String, HoodieKey>> comparisonKeyList = HoodieJavaRDD.getJavaRDD(
         index.explodeRecordsWithFileComparisons(partitionToFileIndexInfo,
             HoodieJavaPairRDD.of(partitionRecordKeyPairRDD))).collect();
 
@@ -200,28 +200,28 @@ public void testTagLocation() throws Exception {
     RawTripTestPayload rowChange1 =
         new RawTripTestPayload("{\"_row_key\":\"000\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
     HoodieRecord record1 =
-        new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
+        new HoodieAvroRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
     RawTripTestPayload rowChange2 =
         new RawTripTestPayload("{\"_row_key\":\"001\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
     HoodieRecord record2 =
-        new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
+        new HoodieAvroRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
     RawTripTestPayload rowChange3 =
         new RawTripTestPayload("{\"_row_key\":\"002\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
     HoodieRecord record3 =
-        new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
+        new HoodieAvroRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
 
     // this record will be saved in table and will be tagged to the incoming record5
     RawTripTestPayload rowChange4 =
         new RawTripTestPayload("{\"_row_key\":\"003\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
     HoodieRecord record4 =
-        new HoodieRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
+        new HoodieAvroRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
 
     // this has the same record key as record4 but different time so different partition, but globalbloomIndex should
     // tag the original partition of the saved record4
     RawTripTestPayload rowChange5 =
         new RawTripTestPayload("{\"_row_key\":\"003\",\"time\":\"2016-02-31T03:16:41.415Z\",\"number\":12}");
     HoodieRecord record5 =
-        new HoodieRecord(new HoodieKey(rowChange5.getRowKey(), rowChange5.getPartitionPath()), rowChange5);
+        new HoodieAvroRecord(new HoodieKey(rowChange5.getRowKey(), rowChange5.getPartitionPath()), rowChange5);
 
     JavaRDD<HoodieRecord> recordRDD = jsc.parallelize(Arrays.asList(record1, record2, record3, record5));
 
@@ -281,7 +281,7 @@ public void testTagLocationWhenShouldUpdatePartitionPath() throws Exception {
     RawTripTestPayload originalPayload =
         new RawTripTestPayload("{\"_row_key\":\"000\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":12}");
     HoodieRecord originalRecord =
-        new HoodieRecord(new HoodieKey(originalPayload.getRowKey(), originalPayload.getPartitionPath()),
+        new HoodieAvroRecord(new HoodieKey(originalPayload.getRowKey(), originalPayload.getPartitionPath()),
             originalPayload);
 
     /*
@@ -294,7 +294,7 @@ public void testTagLocationWhenShouldUpdatePartitionPath() throws Exception {
     RawTripTestPayload incomingPayload =
         new RawTripTestPayload("{\"_row_key\":\"000\",\"time\":\"2016-02-28T03:16:41.415Z\",\"number\":12}");
     HoodieRecord incomingRecord =
-        new HoodieRecord(new HoodieKey(incomingPayload.getRowKey(), incomingPayload.getPartitionPath()),
+        new HoodieAvroRecord(new HoodieKey(incomingPayload.getRowKey(), incomingPayload.getPartitionPath()),
             incomingPayload);
 
     /*
@@ -305,7 +305,7 @@ public void testTagLocationWhenShouldUpdatePartitionPath() throws Exception {
     RawTripTestPayload incomingPayloadSamePartition =
         new RawTripTestPayload("{\"_row_key\":\"000\",\"time\":\"2016-01-31T04:16:41.415Z\",\"number\":15}");
     HoodieRecord incomingRecordSamePartition =
-        new HoodieRecord(
+        new HoodieAvroRecord(
             new HoodieKey(incomingPayloadSamePartition.getRowKey(), incomingPayloadSamePartition.getPartitionPath()),
             incomingPayloadSamePartition);
 
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/bloom/TestKeyRangeLookupTree.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/bloom/TestKeyRangeLookupTree.java
index 012d0dfa35910..1c6973db746bc 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/bloom/TestKeyRangeLookupTree.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/bloom/TestKeyRangeLookupTree.java
@@ -80,7 +80,7 @@ public void testFileGroupLookUpManyEntriesWithSameStartValue() {
    * Tests for many duplicate entries in the tree.
    */
   @Test
-  public void testFileGroupLookUpManyDulicateEntries() {
+  public void testFileGroupLookUpManyDuplicateEntries() {
     KeyRangeNode toInsert = new KeyRangeNode(Long.toString(1200), Long.toString(2000), UUID.randomUUID().toString());
     updateExpectedMatchesToTest(toInsert);
     keyRangeLookupTree.insert(toInsert);
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/bucket/TestBucketIdentifier.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/bucket/TestBucketIdentifier.java
index 879d9933978a0..4491a74fa62ba 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/bucket/TestBucketIdentifier.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/bucket/TestBucketIdentifier.java
@@ -18,16 +18,18 @@
 
 package org.apache.hudi.index.bucket;
 
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.avro.generic.GenericRecord;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.keygen.KeyGenUtils;
 import org.apache.hudi.testutils.KeyGeneratorTestUtilities;
+
+import org.apache.avro.generic.GenericRecord;
 import org.junit.jupiter.api.Test;
 
+import java.util.Arrays;
+import java.util.List;
+
 public class TestBucketIdentifier {
 
   @Test
@@ -44,7 +46,7 @@ public void testBucketIdWithSimpleRecordKey() {
     String recordKeyField = "_row_key";
     String indexKeyField = "_row_key";
     GenericRecord record = KeyGeneratorTestUtilities.getRecord();
-    HoodieRecord hoodieRecord = new HoodieRecord(
+    HoodieRecord hoodieRecord = new HoodieAvroRecord(
         new HoodieKey(KeyGenUtils.getRecordKey(record, recordKeyField, false), ""), null);
     int bucketId = BucketIdentifier.getBucketId(hoodieRecord, indexKeyField, 8);
     assert bucketId == BucketIdentifier.getBucketId(
@@ -56,7 +58,7 @@ public void testBucketIdWithComplexRecordKey() {
     List<String> recordKeyField = Arrays.asList("_row_key","ts_ms");
     String indexKeyField = "_row_key";
     GenericRecord record = KeyGeneratorTestUtilities.getRecord();
-    HoodieRecord hoodieRecord = new HoodieRecord(
+    HoodieRecord hoodieRecord = new HoodieAvroRecord(
         new HoodieKey(KeyGenUtils.getRecordKey(record, recordKeyField, false), ""), null);
     int bucketId = BucketIdentifier.getBucketId(hoodieRecord, indexKeyField, 8);
     assert bucketId == BucketIdentifier.getBucketId(
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/bucket/TestHoodieBucketIndex.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/bucket/TestHoodieBucketIndex.java
index c79f9aec773ed..2b3765948bb63 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/bucket/TestHoodieBucketIndex.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/bucket/TestHoodieBucketIndex.java
@@ -19,8 +19,8 @@
 
 package org.apache.hudi.index.bucket;
 
-import org.apache.avro.Schema;
 import org.apache.hudi.common.data.HoodieData;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.testutils.RawTripTestPayload;
@@ -34,6 +34,8 @@
 import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.testutils.HoodieClientTestHarness;
 import org.apache.hudi.testutils.HoodieSparkWriteableTestTable;
+
+import org.apache.avro.Schema;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.spark.api.java.JavaRDD;
@@ -46,8 +48,8 @@
 import java.util.UUID;
 
 import static org.apache.hudi.common.testutils.SchemaTestUtil.getSchemaFromResource;
-import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class TestHoodieBucketIndex extends HoodieClientTestHarness {
@@ -93,23 +95,23 @@ public void testTagLocation() throws Exception {
     String recordStr3 = "{\"_row_key\":\"" + rowKey3 + "\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}";
     String recordStr4 = "{\"_row_key\":\"" + rowKey1 + "\",\"time\":\"2015-01-31T03:16:41.415Z\",\"number\":32}";
     RawTripTestPayload rowChange1 = new RawTripTestPayload(recordStr1);
-    HoodieRecord record1 = new HoodieRecord(
+    HoodieRecord record1 = new HoodieAvroRecord(
         new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1);
     RawTripTestPayload rowChange2 = new RawTripTestPayload(recordStr2);
-    HoodieRecord record2 = new HoodieRecord(
+    HoodieRecord record2 = new HoodieAvroRecord(
         new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2);
     RawTripTestPayload rowChange3 = new RawTripTestPayload(recordStr3);
-    HoodieRecord record3 = new HoodieRecord(
+    HoodieRecord record3 = new HoodieAvroRecord(
         new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3);
     RawTripTestPayload rowChange4 = new RawTripTestPayload(recordStr4);
-    HoodieRecord record4 = new HoodieRecord(
+    HoodieRecord record4 = new HoodieAvroRecord(
         new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
-    JavaRDD<HoodieRecord> recordRDD = jsc.parallelize(Arrays.asList(record1, record2, record3, record4));
+    JavaRDD<HoodieRecord<HoodieAvroRecord>> recordRDD = jsc.parallelize(Arrays.asList(record1, record2, record3, record4));
 
     HoodieWriteConfig config = makeConfig();
     HoodieTable table = HoodieSparkTable.create(config, context, metaClient);
     HoodieBucketIndex bucketIndex = new HoodieBucketIndex(config);
-    HoodieData<HoodieRecord> taggedRecordRDD = bucketIndex.tagLocation(HoodieJavaRDD.of(recordRDD), context, table);
+    HoodieData<HoodieRecord<HoodieAvroRecord>> taggedRecordRDD = bucketIndex.tagLocation(HoodieJavaRDD.of(recordRDD), context, table);
     assertFalse(taggedRecordRDD.collectAsList().stream().anyMatch(r -> r.isCurrentLocationKnown()));
 
     HoodieSparkWriteableTestTable testTable = HoodieSparkWriteableTestTable.of(table, SCHEMA);
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHBaseIndex.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/hbase/TestSparkHoodieHBaseIndex.java
similarity index 97%
rename from hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHBaseIndex.java
rename to hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/hbase/TestSparkHoodieHBaseIndex.java
index b35fee0153103..87bcad04bc85e 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/client/functional/TestHBaseIndex.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/hbase/TestSparkHoodieHBaseIndex.java
@@ -16,15 +16,16 @@
  * limitations under the License.
  */
 
-package org.apache.hudi.client.functional;
+package org.apache.hudi.index.hbase;
 
 import org.apache.hudi.client.SparkRDDWriteClient;
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.client.common.HoodieSparkEngineContext;
-import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.model.EmptyHoodieRecordPayload;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.model.HoodieWriteStat;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
@@ -37,7 +38,6 @@
 import org.apache.hudi.config.HoodieStorageConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.index.HoodieIndex;
-import org.apache.hudi.index.hbase.SparkHoodieHBaseIndex;
 import org.apache.hudi.table.HoodieSparkTable;
 import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.testutils.SparkClientFunctionalTestHarness;
@@ -93,7 +93,7 @@
  */
 @TestMethodOrder(MethodOrderer.Alphanumeric.class)
 @Tag("functional")
-public class TestHBaseIndex extends SparkClientFunctionalTestHarness {
+public class TestSparkHoodieHBaseIndex extends SparkClientFunctionalTestHarness {
 
   private static final String TABLE_NAME = "test_table";
   private static HBaseTestingUtility utility;
@@ -190,13 +190,13 @@ public void testTagLocationAndPartitionPathUpdate() throws Exception {
     final String newCommitTime = "001";
     final int numRecords = 10;
     final String oldPartitionPath = "1970/01/01";
-    final String emptyHoodieRecordPayloadClasssName = EmptyHoodieRecordPayload.class.getName();
+    final String emptyHoodieRecordPayloadClassName = EmptyHoodieRecordPayload.class.getName();
 
     List<HoodieRecord> newRecords = dataGen.generateInserts(newCommitTime, numRecords);
     List<HoodieRecord> oldRecords = new LinkedList();
     for (HoodieRecord newRecord: newRecords) {
       HoodieKey key = new HoodieKey(newRecord.getRecordKey(), oldPartitionPath);
-      HoodieRecord hoodieRecord = new HoodieRecord(key, newRecord.getData());
+      HoodieRecord hoodieRecord = new HoodieAvroRecord(key, (HoodieRecordPayload) newRecord.getData());
       oldRecords.add(hoodieRecord);
     }
 
@@ -225,12 +225,12 @@ public void testTagLocationAndPartitionPathUpdate() throws Exception {
       assertEquals(numRecords * 2L, taggedRecords.stream().count());
       // Verify the number of deleted records
       assertEquals(numRecords, taggedRecords.stream().filter(record -> record.getKey().getPartitionPath().equals(oldPartitionPath)
-          && record.getData().getClass().getName().equals(emptyHoodieRecordPayloadClasssName)).count());
+          && record.getData().getClass().getName().equals(emptyHoodieRecordPayloadClassName)).count());
       // Verify the number of inserted records
       assertEquals(numRecords, taggedRecords.stream().filter(record -> !record.getKey().getPartitionPath().equals(oldPartitionPath)).count());
 
       // not allowed path change test
-      index = new SparkHoodieHBaseIndex<>(getConfig(false, false));
+      index = new SparkHoodieHBaseIndex(getConfig(false, false));
       List<HoodieRecord> notAllowPathChangeRecords = tagLocation(index, newWriteRecords, hoodieTable).collect();
       assertEquals(numRecords, notAllowPathChangeRecords.stream().count());
       assertEquals(numRecords, taggedRecords.stream().filter(hoodieRecord -> hoodieRecord.isCurrentLocationKnown()
@@ -291,7 +291,7 @@ public void testTagLocationAndPartitionPathUpdateWithExplicitRollback() throws E
       List<HoodieRecord> oldRecords = new LinkedList();
       for (HoodieRecord newRecord: newRecords) {
         HoodieKey key = new HoodieKey(newRecord.getRecordKey(), oldPartitionPath);
-        HoodieRecord hoodieRecord = new HoodieRecord(key, newRecord.getData());
+        HoodieRecord hoodieRecord = new HoodieAvroRecord(key, (HoodieRecordPayload) newRecord.getData());
         oldRecords.add(hoodieRecord);
       }
       JavaRDD<HoodieRecord> newWriteRecords = jsc().parallelize(newRecords, 1);
@@ -341,8 +341,7 @@ public void testTagLocationAndPartitionPathUpdateWithExplicitRollback() throws E
   public void testSimpleTagLocationAndUpdateWithRollback() throws Exception {
     // Load to memory
     HoodieWriteConfig config = getConfigBuilder(100, false, false)
-        .withRollbackUsingMarkers(false)
-        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true).build()).build();
+        .withRollbackUsingMarkers(false).build();
     SparkHoodieHBaseIndex index = new SparkHoodieHBaseIndex(config);
     SparkRDDWriteClient writeClient = getHoodieWriteClient(config);
 
@@ -430,8 +429,7 @@ public void testSimpleTagLocationWithInvalidCommit() throws Exception {
   public void testEnsureTagLocationUsesCommitTimeline() throws Exception {
     // Load to memory
     HoodieWriteConfig config = getConfigBuilder(100, false, false)
-        .withRollbackUsingMarkers(false)
-        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true).build()).build();
+        .withRollbackUsingMarkers(false).build();
     SparkHoodieHBaseIndex index = new SparkHoodieHBaseIndex(config);
     SparkRDDWriteClient writeClient = getHoodieWriteClient(config);
 
@@ -764,7 +762,7 @@ public void testDelete() throws Exception {
       // is not implemented via HoodieWriteClient
       JavaRDD<WriteStatus> deleteWriteStatues = writeStatues.map(w -> {
         WriteStatus newWriteStatus = new WriteStatus(true, 1.0);
-        w.getWrittenRecords().forEach(r -> newWriteStatus.markSuccess(new HoodieRecord(r.getKey(), null), Option.empty()));
+        w.getWrittenRecords().forEach(r -> newWriteStatus.markSuccess(new HoodieAvroRecord(r.getKey(), null), Option.empty()));
         assertEquals(w.getTotalRecords(), newWriteStatus.getTotalRecords());
         newWriteStatus.setStat(new HoodieWriteStat());
         return newWriteStatus;
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiveLog.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java
similarity index 89%
rename from hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiveLog.java
rename to hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java
index 4902d74264a09..652dbcb155b0e 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiveLog.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/TestHoodieTimelineArchiver.java
@@ -18,8 +18,8 @@
 
 package org.apache.hudi.io;
 
-import org.apache.hadoop.fs.FileStatus;
 import org.apache.hudi.avro.model.HoodieRollbackMetadata;
+import org.apache.hudi.client.HoodieTimelineArchiver;
 import org.apache.hudi.client.utils.MetadataConversionUtils;
 import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.fs.HoodieWrapperFileSystem;
@@ -48,16 +48,17 @@
 import org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter;
 import org.apache.hudi.table.HoodieSparkTable;
 import org.apache.hudi.table.HoodieTable;
-import org.apache.hudi.table.HoodieTimelineArchiveLog;
 import org.apache.hudi.testutils.HoodieClientTestHarness;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.CsvSource;
 import org.junit.jupiter.params.provider.ValueSource;
 
 import java.io.IOException;
@@ -72,14 +73,15 @@
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
+import static org.apache.hudi.common.testutils.HoodieTestUtils.createCompactionCommitInMetadataTable;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
-public class TestHoodieTimelineArchiveLog extends HoodieClientTestHarness {
+public class TestHoodieTimelineArchiver extends HoodieClientTestHarness {
 
-  private static final Logger LOG = LogManager.getLogger(TestHoodieTimelineArchiveLog.class);
+  private static final Logger LOG = LogManager.getLogger(TestHoodieTimelineArchiver.class);
 
   private Configuration hadoopConf;
   private HoodieWrapperFileSystem wrapperFs;
@@ -172,8 +174,8 @@ public void testArchiveEmptyTable() throws Exception {
             .withParallelism(2, 2).forTable("test-trip-table").build();
     metaClient = HoodieTableMetaClient.reload(metaClient);
     HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
-    HoodieTimelineArchiveLog archiveLog = new HoodieTimelineArchiveLog(cfg, table);
-    boolean result = archiveLog.archiveIfRequired(context);
+    HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(cfg, table);
+    boolean result = archiver.archiveIfRequired(context);
     assertTrue(result);
   }
 
@@ -213,7 +215,7 @@ public void testArchiveTableWithArchival(boolean enableMetadata) throws Exceptio
   @ParameterizedTest
   @ValueSource(booleans = {true, false})
   public void testMergeSmallArchiveFilesRecoverFromBuildPlanFailed(boolean enableArchiveMerge) throws Exception {
-    HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(false, 2, 3, 2, enableArchiveMerge, 3, 209715200);
+    HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(true, 2, 3, 2, enableArchiveMerge, 3, 209715200);
 
     // do ingestion and trigger archive actions here.
     for (int i = 1; i < 8; i++) {
@@ -224,14 +226,14 @@ public void testMergeSmallArchiveFilesRecoverFromBuildPlanFailed(boolean enableA
     // build a merge small archive plan with dummy content
     // this plan can not be deserialized.
     HoodieTable table = HoodieSparkTable.create(writeConfig, context, metaClient);
-    HoodieTimelineArchiveLog archiveLog = new HoodieTimelineArchiveLog(writeConfig, table);
+    HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(writeConfig, table);
     FileStatus[] fsStatuses = metaClient.getFs().globStatus(
         new Path(metaClient.getArchivePath() + "/.commits_.archive*"));
     List<String> candidateFiles = Arrays.stream(fsStatuses).map(fs -> fs.getPath().toString()).collect(Collectors.toList());
 
-    archiveLog.reOpenWriter();
+    archiver.reOpenWriter();
     Path plan = new Path(metaClient.getArchivePath(), HoodieArchivedTimeline.MERGE_ARCHIVE_PLAN_NAME);
-    archiveLog.buildArchiveMergePlan(candidateFiles, plan, ".commits_.archive.3_1-0-1");
+    archiver.buildArchiveMergePlan(candidateFiles, plan, ".commits_.archive.3_1-0-1");
     String s = "Dummy Content";
     // stain the current merge plan file.
     FileIOUtils.createFileInPath(metaClient.getFs(), plan, Option.of(s.getBytes()));
@@ -264,7 +266,7 @@ public void testMergeSmallArchiveFilesRecoverFromBuildPlanFailed(boolean enableA
   @ParameterizedTest
   @ValueSource(booleans = {true, false})
   public void testMergeSmallArchiveFilesRecoverFromMergeFailed(boolean enableArchiveMerge) throws Exception {
-    HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(false, 2, 3, 2, enableArchiveMerge, 3, 209715200);
+    HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(true, 2, 3, 2, enableArchiveMerge, 3, 209715200);
 
     // do ingestion and trigger archive actions here.
     for (int i = 1; i < 8; i++) {
@@ -274,15 +276,15 @@ public void testMergeSmallArchiveFilesRecoverFromMergeFailed(boolean enableArchi
 
     // do a single merge small archive files
     HoodieTable table = HoodieSparkTable.create(writeConfig, context, metaClient);
-    HoodieTimelineArchiveLog archiveLog = new HoodieTimelineArchiveLog(writeConfig, table);
+    HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(writeConfig, table);
     FileStatus[] fsStatuses = metaClient.getFs().globStatus(
         new Path(metaClient.getArchivePath() + "/.commits_.archive*"));
     List<String> candidateFiles = Arrays.stream(fsStatuses).map(fs -> fs.getPath().toString()).collect(Collectors.toList());
-    archiveLog.reOpenWriter();
+    archiver.reOpenWriter();
 
-    archiveLog.buildArchiveMergePlan(candidateFiles, new Path(metaClient.getArchivePath(), HoodieArchivedTimeline.MERGE_ARCHIVE_PLAN_NAME), ".commits_.archive.3_1-0-1");
-    archiveLog.mergeArchiveFiles(Arrays.stream(fsStatuses).collect(Collectors.toList()));
-    HoodieLogFormat.Writer writer = archiveLog.reOpenWriter();
+    archiver.buildArchiveMergePlan(candidateFiles, new Path(metaClient.getArchivePath(), HoodieArchivedTimeline.MERGE_ARCHIVE_PLAN_NAME), ".commits_.archive.3_1-0-1");
+    archiver.mergeArchiveFiles(Arrays.stream(fsStatuses).collect(Collectors.toList()));
+    HoodieLogFormat.Writer writer = archiver.reOpenWriter();
 
     // check loading archived and active timeline success
     HoodieActiveTimeline rawActiveTimeline = new HoodieActiveTimeline(metaClient, false);
@@ -317,7 +319,7 @@ public void testMergeSmallArchiveFilesRecoverFromMergeFailed(boolean enableArchi
   @ParameterizedTest
   @ValueSource(booleans = {true, false})
   public void testMergeSmallArchiveFilesRecoverFromDeleteFailed(boolean enableArchiveMerge) throws Exception {
-    HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(false, 2, 3, 2, enableArchiveMerge, 3, 209715200);
+    HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(true, 2, 3, 2, enableArchiveMerge, 3, 209715200);
 
     // do ingestion and trigger archive actions here.
     for (int i = 1; i < 8; i++) {
@@ -327,16 +329,16 @@ public void testMergeSmallArchiveFilesRecoverFromDeleteFailed(boolean enableArch
 
     // do a single merge small archive files
     HoodieTable table = HoodieSparkTable.create(writeConfig, context, metaClient);
-    HoodieTimelineArchiveLog archiveLog = new HoodieTimelineArchiveLog(writeConfig, table);
+    HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(writeConfig, table);
     FileStatus[] fsStatuses = metaClient.getFs().globStatus(
         new Path(metaClient.getArchivePath() + "/.commits_.archive*"));
     List<String> candidateFiles = Arrays.stream(fsStatuses).map(fs -> fs.getPath().toString()).collect(Collectors.toList());
 
-    archiveLog.reOpenWriter();
+    archiver.reOpenWriter();
 
-    archiveLog.buildArchiveMergePlan(candidateFiles, new Path(metaClient.getArchivePath(), HoodieArchivedTimeline.MERGE_ARCHIVE_PLAN_NAME), ".commits_.archive.3_1-0-1");
-    archiveLog.mergeArchiveFiles(Arrays.stream(fsStatuses).collect(Collectors.toList()));
-    archiveLog.reOpenWriter();
+    archiver.buildArchiveMergePlan(candidateFiles, new Path(metaClient.getArchivePath(), HoodieArchivedTimeline.MERGE_ARCHIVE_PLAN_NAME), ".commits_.archive.3_1-0-1");
+    archiver.mergeArchiveFiles(Arrays.stream(fsStatuses).collect(Collectors.toList()));
+    archiver.reOpenWriter();
 
     // delete only one of the small archive file to simulate delete action failed.
     metaClient.getFs().delete(fsStatuses[0].getPath());
@@ -362,7 +364,7 @@ public void testMergeSmallArchiveFilesRecoverFromDeleteFailed(boolean enableArch
   @ParameterizedTest
   @ValueSource(booleans = {true, false})
   public void testLoadArchiveTimelineWithDamagedPlanFile(boolean enableArchiveMerge) throws Exception {
-    HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(false, 2, 3, 2, enableArchiveMerge, 3, 209715200);
+    HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(true, 2, 3, 2, enableArchiveMerge, 3, 209715200);
 
     // do ingestion and trigger archive actions here.
     for (int i = 1; i < 8; i++) {
@@ -390,23 +392,23 @@ public void testLoadArchiveTimelineWithDamagedPlanFile(boolean enableArchiveMerg
   @ParameterizedTest
   @ValueSource(booleans = {true, false})
   public void testLoadArchiveTimelineWithUncompletedMergeArchiveFile(boolean enableArchiveMerge) throws Exception {
-    HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(false, 2, 3, 2, enableArchiveMerge, 3, 209715200);
+    HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(true, 2, 3, 2, enableArchiveMerge, 3, 209715200);
     for (int i = 1; i < 8; i++) {
       testTable.doWriteOperation("0000000" + i, WriteOperationType.UPSERT, i == 1 ? Arrays.asList("p1", "p2") : Collections.emptyList(), Arrays.asList("p1", "p2"), 2);
       archiveAndGetCommitsList(writeConfig);
     }
 
     HoodieTable table = HoodieSparkTable.create(writeConfig, context, metaClient);
-    HoodieTimelineArchiveLog archiveLog = new HoodieTimelineArchiveLog(writeConfig, table);
+    HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(writeConfig, table);
     FileStatus[] fsStatuses = metaClient.getFs().globStatus(
         new Path(metaClient.getArchivePath() + "/.commits_.archive*"));
     List<String> candidateFiles = Arrays.stream(fsStatuses).map(fs -> fs.getPath().toString()).collect(Collectors.toList());
 
-    archiveLog.reOpenWriter();
+    archiver.reOpenWriter();
 
-    archiveLog.buildArchiveMergePlan(candidateFiles, new Path(metaClient.getArchivePath(), HoodieArchivedTimeline.MERGE_ARCHIVE_PLAN_NAME), ".commits_.archive.3_1-0-1");
-    archiveLog.mergeArchiveFiles(Arrays.stream(fsStatuses).collect(Collectors.toList()));
-    HoodieLogFormat.Writer writer = archiveLog.reOpenWriter();
+    archiver.buildArchiveMergePlan(candidateFiles, new Path(metaClient.getArchivePath(), HoodieArchivedTimeline.MERGE_ARCHIVE_PLAN_NAME), ".commits_.archive.3_1-0-1");
+    archiver.mergeArchiveFiles(Arrays.stream(fsStatuses).collect(Collectors.toList()));
+    HoodieLogFormat.Writer writer = archiver.reOpenWriter();
 
     String s = "Dummy Content";
     // stain the current merged archive file.
@@ -451,15 +453,16 @@ public void testNoArchivalUntilMaxArchiveConfigWithExtraInflightCommits(boolean
     assertEquals(originalCommits, commitsAfterArchival);
   }
 
-  @Test
-  public void testArchiveCommitSavepointNoHole() throws Exception {
+  @ParameterizedTest
+  @ValueSource(booleans = {true, false})
+  public void testArchiveCommitSavepointNoHole(boolean enableMetadataTable) throws Exception {
     init();
     HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath)
         .withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA).withParallelism(2, 2).forTable("test-trip-table")
         .withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 5).build())
         .withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder()
             .withRemoteServerPort(timelineServicePort).build())
-        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(false).build())
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(enableMetadataTable).build())
         .build();
 
     HoodieTestDataGenerator.createCommitFile(basePath, "100", wrapperFs.getConf());
@@ -470,11 +473,17 @@ public void testArchiveCommitSavepointNoHole() throws Exception {
     HoodieTestDataGenerator.createCommitFile(basePath, "104", wrapperFs.getConf());
     HoodieTestDataGenerator.createCommitFile(basePath, "105", wrapperFs.getConf());
     HoodieTable table = HoodieSparkTable.create(cfg, context);
-    HoodieTimelineArchiveLog archiveLog = new HoodieTimelineArchiveLog(cfg, table);
+    HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(cfg, table);
+
+    if (enableMetadataTable) {
+      // Simulate a compaction commit in metadata table timeline
+      // so the archival in data table can happen
+      createCompactionCommitInMetadataTable(hadoopConf, wrapperFs, basePath, "105");
+    }
 
     HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
     assertEquals(6, timeline.countInstants(), "Loaded 6 commits and the count should match");
-    assertTrue(archiveLog.archiveIfRequired(context));
+    assertTrue(archiver.archiveIfRequired(context));
     timeline = metaClient.getActiveTimeline().reload().getCommitsTimeline().filterCompletedInstants();
     assertEquals(5, timeline.countInstants(),
         "Since we have a savepoint at 101, we should never archive any commit after 101 (we only archive 100)");
@@ -593,8 +602,9 @@ public void testNoArchivalWithInflightCompactionInMiddle(boolean enableMetadata)
     verifyArchival(archivedInstants, getActiveCommitInstants(Arrays.asList("00000007", "00000008"), HoodieTimeline.DELTA_COMMIT_ACTION), commitsAfterArchival);
   }
 
-  @Test
-  public void testArchiveCommitTimeline() throws Exception {
+  @ParameterizedTest
+  @ValueSource(booleans = {true, false})
+  public void testArchiveCommitTimeline(boolean enableMetadataTable) throws Exception {
     init();
     HoodieWriteConfig cfg =
         HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA)
@@ -602,7 +612,7 @@ public void testArchiveCommitTimeline() throws Exception {
             .withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build())
             .withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder()
                 .withRemoteServerPort(timelineServicePort).build())
-            .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(false).build())
+            .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(enableMetadataTable).build())
             .build();
     metaClient = HoodieTableMetaClient.reload(metaClient);
 
@@ -619,9 +629,15 @@ public void testArchiveCommitTimeline() throws Exception {
     HoodieTestDataGenerator.createCommitFile(basePath, "4", wrapperFs.getConf());
     HoodieTestDataGenerator.createCommitFile(basePath, "5", wrapperFs.getConf());
 
+    if (enableMetadataTable) {
+      // Simulate a compaction commit in metadata table timeline
+      // so the archival in data table can happen
+      createCompactionCommitInMetadataTable(hadoopConf, wrapperFs, basePath, "5");
+    }
+
     HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
-    HoodieTimelineArchiveLog archiveLog = new HoodieTimelineArchiveLog(cfg, table);
-    boolean result = archiveLog.archiveIfRequired(context);
+    HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(cfg, table);
+    boolean result = archiver.archiveIfRequired(context);
     assertTrue(result);
     HoodieArchivedTimeline archivedTimeline = metaClient.getArchivedTimeline();
     List<HoodieInstant> archivedInstants = Arrays.asList(instant1, instant2, instant3);
@@ -655,7 +671,8 @@ public void testConvertCommitMetadata() throws Exception {
   public void testArchiveTableWithCleanCommits(boolean enableMetadata) throws Exception {
     HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(enableMetadata, 2, 4, 2);
 
-    // min archival commits is 2 and max archival commits is 4(either clean commits has to be > 4 or commits has to be greater than 4.
+    // min archival commits is 2 and max archival commits is 4
+    // (either clean commits has to be > 4 or commits has to be greater than 4)
     // and so, after 5th commit, 3 commits will be archived.
     // 1,2,3,4,5,6 : after archival -> 1,5,6 (because, 2,3,4,5 and 6 are clean commits and are eligible for archival)
     // after 7th and 8th commit no-op wrt archival.
@@ -712,10 +729,9 @@ public void testArchiveTableWithCleanCommits(boolean enableMetadata) throws Exce
 
   @Test
   public void testArchiveRollbacksAndCleanTestTable() throws Exception {
-    boolean enableMetadata = false;
     int minArchiveCommits = 2;
     int maxArchiveCommits = 9;
-    HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(enableMetadata, minArchiveCommits, maxArchiveCommits, 2);
+    HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(true, minArchiveCommits, maxArchiveCommits, 2);
 
     // trigger 1 commit to add lot of files so that future cleans can clean them up
     testTable.doWriteOperation("00000001", WriteOperationType.UPSERT, Arrays.asList("p1", "p2"), Arrays.asList("p1", "p2"), 20);
@@ -750,8 +766,8 @@ public void testArchiveRollbacksAndCleanTestTable() throws Exception {
   }
 
   @ParameterizedTest
-  @ValueSource(booleans = {true, false})
-  public void testArchiveCompletedRollbackAndClean(boolean isEmpty) throws Exception {
+  @CsvSource({"true,true", "true,false", "false,true", "false,false"})
+  public void testArchiveCompletedRollbackAndClean(boolean isEmpty, boolean enableMetadataTable) throws Exception {
     init();
     int minInstantsToKeep = 2;
     int maxInstantsToKeep = 10;
@@ -761,7 +777,7 @@ public void testArchiveCompletedRollbackAndClean(boolean isEmpty) throws Excepti
             .withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(minInstantsToKeep, maxInstantsToKeep).build())
             .withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder()
                 .withRemoteServerPort(timelineServicePort).build())
-            .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(false).build())
+            .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(enableMetadataTable).build())
             .build();
     metaClient = HoodieTableMetaClient.reload(metaClient);
 
@@ -774,10 +790,16 @@ public void testArchiveCompletedRollbackAndClean(boolean isEmpty) throws Excepti
       createCommitAndRollbackFile(startInstant + 1 + "", startInstant + "", false, isEmpty || i % 2 == 0);
     }
 
+    if (enableMetadataTable) {
+      // Simulate a compaction commit in metadata table timeline
+      // so the archival in data table can happen
+      createCompactionCommitInMetadataTable(hadoopConf, wrapperFs, basePath, Integer.toString(99));
+    }
+
     HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
-    HoodieTimelineArchiveLog archiveLog = new HoodieTimelineArchiveLog(cfg, table);
+    HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(cfg, table);
 
-    archiveLog.archiveIfRequired(context);
+    archiver.archiveIfRequired(context);
 
     Stream<HoodieInstant> currentInstants = metaClient.getActiveTimeline().reload().getInstants();
     Map<Object, List<HoodieInstant>> actionInstantMap = currentInstants.collect(Collectors.groupingBy(HoodieInstant::getAction));
@@ -789,8 +811,9 @@ public void testArchiveCompletedRollbackAndClean(boolean isEmpty) throws Excepti
     assertEquals(minInstantsToKeep, actionInstantMap.get("rollback").size(), "Should have min instant");
   }
 
-  @Test
-  public void testArchiveInflightClean() throws Exception {
+  @ParameterizedTest
+  @ValueSource(booleans = {true, false})
+  public void testArchiveInflightClean(boolean enableMetadataTable) throws Exception {
     init();
     HoodieWriteConfig cfg =
         HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA)
@@ -798,7 +821,7 @@ public void testArchiveInflightClean() throws Exception {
             .withCompactionConfig(HoodieCompactionConfig.newBuilder().retainCommits(1).archiveCommitsWith(2, 3).build())
             .withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder()
                 .withRemoteServerPort(timelineServicePort).build())
-            .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(false).build())
+            .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(enableMetadataTable).build())
             .build();
     metaClient = HoodieTableMetaClient.reload(metaClient);
 
@@ -808,10 +831,16 @@ public void testArchiveInflightClean() throws Exception {
     HoodieInstant notArchivedInstant2 = createCleanMetadata("13", false);
     HoodieInstant notArchivedInstant3 = createCleanMetadata("14", true);
 
+    if (enableMetadataTable) {
+      // Simulate a compaction commit in metadata table timeline
+      // so the archival in data table can happen
+      createCompactionCommitInMetadataTable(hadoopConf, wrapperFs, basePath, "14");
+    }
+
     HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
-    HoodieTimelineArchiveLog archiveLog = new HoodieTimelineArchiveLog(cfg, table);
+    HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(cfg, table);
 
-    archiveLog.archiveIfRequired(context);
+    archiver.archiveIfRequired(context);
 
     List<HoodieInstant> notArchivedInstants = metaClient.getActiveTimeline().reload().getInstants().collect(Collectors.toList());
     assertEquals(3, notArchivedInstants.size(), "Not archived instants should be 3");
@@ -888,13 +917,42 @@ public void testArchiveTableWithMetadataTableCompaction() throws Exception {
         "00000009", "00000010", "00000011", "00000012")), getActiveCommitInstants(Arrays.asList("00000013", "00000014")), commitsAfterArchival);
   }
 
+  @Test
+  public void testArchiveCommitsWithCompactionCommitInMetadataTableTimeline() throws Exception {
+    HoodieWriteConfig writeConfig = initTestTableAndGetWriteConfig(true, 2, 4, 20);
+    int startInstantTime = 100;
+    int numCommits = 15;
+    int numExpectedArchived = 6; // "100" till "105" should be archived in this case
+
+    for (int i = startInstantTime; i < startInstantTime + numCommits; i++) {
+      HoodieTestDataGenerator.createCommitFile(basePath, Integer.toString(i), wrapperFs.getConf());
+    }
+    // Simulate a compaction commit in metadata table timeline
+    // so the archival in data table can happen
+    createCompactionCommitInMetadataTable(hadoopConf, wrapperFs, basePath, "105");
+
+    HoodieTable table = HoodieSparkTable.create(writeConfig, context);
+    HoodieTimelineArchiver archiveLog = new HoodieTimelineArchiver(writeConfig, table);
+
+    HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
+    assertEquals(numCommits, timeline.countInstants(), String.format("Loaded %d commits and the count should match", numCommits));
+    assertTrue(archiveLog.archiveIfRequired(context));
+    timeline = metaClient.getActiveTimeline().reload().getCommitsTimeline().filterCompletedInstants();
+    assertEquals(numCommits - numExpectedArchived, timeline.countInstants(),
+        "Since we have a compaction commit of 105 in metadata table timeline, we should never archive any commit after that");
+    for (int i = startInstantTime + numExpectedArchived; i < startInstantTime + numCommits; i++) {
+      assertTrue(timeline.containsInstant(new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, Integer.toString(i))),
+          String.format("Commit %d should not be archived", i));
+    }
+  }
+
   private Pair<List<HoodieInstant>, List<HoodieInstant>> archiveAndGetCommitsList(HoodieWriteConfig writeConfig) throws IOException {
     metaClient.reloadActiveTimeline();
     HoodieTimeline timeline = metaClient.getActiveTimeline().reload().getAllCommitsTimeline().filterCompletedInstants();
     List<HoodieInstant> originalCommits = timeline.getInstants().collect(Collectors.toList());
     HoodieTable table = HoodieSparkTable.create(writeConfig, context, metaClient);
-    HoodieTimelineArchiveLog archiveLog = new HoodieTimelineArchiveLog(writeConfig, table);
-    archiveLog.archiveIfRequired(context);
+    HoodieTimelineArchiver archiver = new HoodieTimelineArchiver(writeConfig, table);
+    archiver.archiveIfRequired(context);
     timeline = metaClient.getActiveTimeline().reload().getAllCommitsTimeline().filterCompletedInstants();
     List<HoodieInstant> commitsAfterArchival = timeline.getInstants().collect(Collectors.toList());
     return Pair.of(originalCommits, commitsAfterArchival);
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/row/TestHoodieRowCreateHandle.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/row/TestHoodieRowCreateHandle.java
index 0f308425bc1c0..5a19f0afe9c65 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/row/TestHoodieRowCreateHandle.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/row/TestHoodieRowCreateHandle.java
@@ -25,6 +25,7 @@
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieInsertException;
+import org.apache.hudi.exception.TableNotFoundException;
 import org.apache.hudi.table.HoodieSparkTable;
 import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.testutils.HoodieClientTestHarness;
@@ -36,8 +37,9 @@
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
 
-import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Random;
@@ -170,19 +172,29 @@ public void testGlobalFailure() throws Exception {
     assertRows(inputRows, result, instantTime, fileNames);
   }
 
-  @Test
-  public void testInstantiationFailure() throws IOException {
+  @ParameterizedTest
+  @ValueSource(booleans = {true, false})
+  public void testInstantiationFailure(boolean enableMetadataTable) {
     // init config and table
     HoodieWriteConfig cfg = SparkDatasetTestUtils.getConfigBuilder(basePath, timelineServicePort)
-        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(false).build())
-        .withPath("/dummypath/abc/").build();
-    HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
+        .withPath("/dummypath/abc/")
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(enableMetadataTable).build())
+        .build();
 
     try {
+      HoodieTable table = HoodieSparkTable.create(cfg, context, metaClient);
       new HoodieRowCreateHandle(table, cfg, " def", UUID.randomUUID().toString(), "001", RANDOM.nextInt(100000), RANDOM.nextLong(), RANDOM.nextLong(), SparkDatasetTestUtils.STRUCT_TYPE);
       fail("Should have thrown exception");
     } catch (HoodieInsertException ioe) {
-      // expected
+      // expected without metadata table
+      if (enableMetadataTable) {
+        fail("Should have thrown TableNotFoundException");
+      }
+    } catch (TableNotFoundException e) {
+      // expected with metadata table
+      if (!enableMetadataTable) {
+        fail("Should have thrown HoodieInsertException");
+      }
     }
   }
 
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java
index 8a1f4abd29cea..f51a169dd9b44 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestCleaner.java
@@ -114,6 +114,7 @@
 
 import scala.Tuple3;
 
+import static org.apache.hudi.HoodieTestCommitGenerator.getBaseFilename;
 import static org.apache.hudi.common.testutils.HoodieTestTable.makeIncrementalCommitTimes;
 import static org.apache.hudi.common.testutils.HoodieTestTable.makeNewCommitTime;
 import static org.apache.hudi.common.testutils.HoodieTestUtils.DEFAULT_PARTITION_PATHS;
@@ -121,6 +122,7 @@
 import static org.awaitility.Awaitility.await;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
@@ -253,6 +255,73 @@ public void testBulkInsertPreppedAndCleanByVersions() throws Exception {
         SparkRDDWriteClient::upsertPreppedRecords, true);
   }
 
+
+  /**
+   * Tests no more than 1 clean is scheduled/executed if HoodieCompactionConfig.allowMultipleCleanSchedule config is disabled.
+   */
+  @Test
+  public void testMultiClean() {
+    HoodieWriteConfig writeConfig = getConfigBuilder()
+        .withFileSystemViewConfig(new FileSystemViewStorageConfig.Builder()
+            .withEnableBackupForRemoteFileSystemView(false).build())
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024 * 1024)
+            .withInlineCompaction(false).withMaxNumDeltaCommitsBeforeCompaction(1)
+            .withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.EAGER)
+            .allowMultipleCleans(false)
+            .withAutoClean(false).retainCommits(1).retainFileVersions(1).build())
+        .withEmbeddedTimelineServerEnabled(false).build();
+
+    int index = 0;
+    String cleanInstantTime;
+    final String partition = "2015/03/16";
+    try (SparkRDDWriteClient client = new SparkRDDWriteClient(context, writeConfig)) {
+      // Three writes so we can initiate a clean
+      for (; index < 3; ++index) {
+        String newCommitTime = "00" + index;
+        List<HoodieRecord> records = dataGen.generateInsertsForPartition(newCommitTime, 1, partition);
+        client.startCommitWithTime(newCommitTime);
+        client.insert(jsc.parallelize(records, 1), newCommitTime).collect();
+      }
+    }
+
+    // mimic failed/leftover clean by scheduling a clean but not performing it
+    cleanInstantTime = "00" + index++;
+    HoodieTable table = HoodieSparkTable.create(writeConfig, context);
+    Option<HoodieCleanerPlan> cleanPlan = table.scheduleCleaning(context, cleanInstantTime, Option.empty());
+    assertEquals(cleanPlan.get().getFilePathsToBeDeletedPerPartition().get(partition).size(), 1);
+    assertEquals(metaClient.reloadActiveTimeline().getCleanerTimeline().filterInflightsAndRequested().countInstants(), 1);
+
+    try (SparkRDDWriteClient client = new SparkRDDWriteClient(context, writeConfig)) {
+      // Next commit. This is required so that there is an additional file version to clean.
+      String newCommitTime = "00" + index++;
+      List<HoodieRecord> records = dataGen.generateInsertsForPartition(newCommitTime, 1, partition);
+      client.startCommitWithTime(newCommitTime);
+      client.insert(jsc.parallelize(records, 1), newCommitTime).collect();
+
+      // Initiate another clean. The previous leftover clean will be attempted first, followed by another clean
+      // due to the commit above.
+      String newCleanInstantTime = "00" + index++;
+      HoodieCleanMetadata cleanMetadata = client.clean(newCleanInstantTime);
+      // subsequent clean should not be triggered since allowMultipleCleanSchedules is set to false
+      assertNull(cleanMetadata);
+
+      // let the old clean complete
+      table = HoodieSparkTable.create(writeConfig, context);
+      cleanMetadata = table.clean(context, cleanInstantTime, false);
+      assertNotNull(cleanMetadata);
+
+      // any new clean should go ahead
+      cleanMetadata = client.clean(newCleanInstantTime);
+      // subsequent clean should not be triggered since allowMultipleCleanSchedules is set to false
+      assertNotNull(cleanMetadata);
+
+      // 1 file cleaned
+      assertEquals(cleanMetadata.getPartitionMetadata().get(partition).getSuccessDeleteFiles().size(), 1);
+      assertEquals(cleanMetadata.getPartitionMetadata().get(partition).getFailedDeleteFiles().size(), 0);
+      assertEquals(cleanMetadata.getPartitionMetadata().get(partition).getDeletePathPatterns().size(), 1);
+    }
+  }
+
   /**
    * Test Helper for Cleaning by versions logic from HoodieWriteClient API perspective.
    *
@@ -272,7 +341,6 @@ private void testInsertAndCleanByVersions(
             .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS).retainFileVersions(maxVersions).build())
         .withParallelism(1, 1).withBulkInsertParallelism(1).withFinalizeWriteParallelism(1).withDeleteParallelism(1)
         .withConsistencyGuardConfig(ConsistencyGuardConfig.newBuilder().withConsistencyCheckEnabled(true).build())
-        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true).build())
         .build();
     try (SparkRDDWriteClient client = getHoodieWriteClient(cfg)) {
 
@@ -442,7 +510,6 @@ private void testInsertAndCleanByCommits(
             .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(maxCommits).build())
         .withParallelism(1, 1).withBulkInsertParallelism(1).withFinalizeWriteParallelism(1).withDeleteParallelism(1)
         .withConsistencyGuardConfig(ConsistencyGuardConfig.newBuilder().withConsistencyCheckEnabled(true).build())
-        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true).build())
         .build();
     SparkRDDWriteClient client = getHoodieWriteClient(cfg);
 
@@ -519,7 +586,6 @@ private void testFailedInsertAndCleanByCommits(
             .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(maxCommits).build())
         .withParallelism(1, 1).withBulkInsertParallelism(1).withFinalizeWriteParallelism(1).withDeleteParallelism(1)
         .withConsistencyGuardConfig(ConsistencyGuardConfig.newBuilder().withConsistencyCheckEnabled(true).build())
-        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true).build())
         .build();
     SparkRDDWriteClient client = getHoodieWriteClient(cfg);
 
@@ -571,7 +637,7 @@ private List<HoodieCleanStat> runCleaner(HoodieWriteConfig config, int firstComm
     return runCleaner(config, false, firstCommitSequence);
   }
 
-  private List<HoodieCleanStat> runCleaner(HoodieWriteConfig config, boolean simulateRetryFailure) throws IOException {
+  protected List<HoodieCleanStat> runCleaner(HoodieWriteConfig config, boolean simulateRetryFailure) throws IOException {
     return runCleaner(config, simulateRetryFailure, 1);
   }
 
@@ -648,7 +714,7 @@ private List<HoodieCleanStat> runCleaner(HoodieWriteConfig config, boolean simul
   public void testKeepLatestFileVersions(Boolean enableBootstrapSourceClean) throws Exception {
     HoodieWriteConfig config =
         HoodieWriteConfig.newBuilder().withPath(basePath)
-            .withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).enable(true).build())
+            .withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).build())
             .withCompactionConfig(HoodieCompactionConfig.newBuilder()
                 .withCleanBootstrapBaseFileEnabled(enableBootstrapSourceClean)
                 .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS).retainFileVersions(1).build())
@@ -811,7 +877,7 @@ public void testKeepLatestFileVersionsMOR() throws Exception {
 
     HoodieWriteConfig config =
         HoodieWriteConfig.newBuilder().withPath(basePath)
-            .withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).enable(false).build())
+            .withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).build())
             .withCompactionConfig(HoodieCompactionConfig.newBuilder()
                 .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS).retainFileVersions(1).build())
             .build();
@@ -848,8 +914,8 @@ public void testKeepLatestFileVersionsMOR() throws Exception {
   public void testKeepLatestCommitsMOR() throws Exception {
 
     HoodieWriteConfig config =
-            HoodieWriteConfig.newBuilder().withPath(basePath)
-                .withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).enable(false).build())
+        HoodieWriteConfig.newBuilder().withPath(basePath)
+            .withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).build())
                     .withCompactionConfig(HoodieCompactionConfig.newBuilder()
                             .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(1).build())
                     .build();
@@ -889,12 +955,15 @@ public void testKeepLatestCommitsMOR() throws Exception {
   @Test
   public void testCleanWithReplaceCommits() throws Exception {
     HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
-        .withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).enable(false).build())
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder()
+            .withMaxNumDeltaCommitsBeforeCompaction(1)
+            .withAssumeDatePartitioning(true).build())
         .withCompactionConfig(HoodieCompactionConfig.newBuilder()
             .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build())
         .build();
 
-    HoodieTestTable testTable = HoodieTestTable.of(metaClient);
+    HoodieTableMetadataWriter metadataWriter = SparkHoodieBackedTableMetadataWriter.create(hadoopConf, config, context);
+    HoodieTestTable testTable = HoodieMetadataTestTable.of(metaClient, metadataWriter);
     String p0 = "2020/01/01";
     String p1 = "2020/01/02";
 
@@ -903,7 +972,7 @@ public void testCleanWithReplaceCommits() throws Exception {
     String file1P1C0 = UUID.randomUUID().toString();
     testTable.addInflightCommit("00000000000001").withBaseFilesInPartition(p0, file1P0C0).withBaseFilesInPartition(p1, file1P1C0);
 
-    HoodieCommitMetadata commitMetadata = generateCommitMetadata(
+    HoodieCommitMetadata commitMetadata = generateCommitMetadata("00000000000001",
         Collections.unmodifiableMap(new HashMap<String, List<String>>() {
           {
             put(p0, CollectionUtils.createImmutableList(file1P0C0));
@@ -911,6 +980,7 @@ public void testCleanWithReplaceCommits() throws Exception {
           }
         })
     );
+    metadataWriter.update(commitMetadata, "00000000000001", false);
     metaClient.getActiveTimeline().saveAsComplete(
         new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "00000000000001"),
         Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
@@ -926,7 +996,8 @@ public void testCleanWithReplaceCommits() throws Exception {
     // notice that clustering generates empty inflight commit files
     Map<String, String> partitionAndFileId002 = testTable.forReplaceCommit("00000000000002").getFileIdsWithBaseFilesInPartitions(p0);
     String file2P0C1 = partitionAndFileId002.get(p0);
-    Pair<HoodieRequestedReplaceMetadata, HoodieReplaceCommitMetadata> replaceMetadata = generateReplaceCommitMetadata(p0, file1P0C0, file2P0C1);
+    Pair<HoodieRequestedReplaceMetadata, HoodieReplaceCommitMetadata> replaceMetadata =
+        generateReplaceCommitMetadata("00000000000002", p0, file1P0C0, file2P0C1);
     testTable.addReplaceCommit("00000000000002", Option.of(replaceMetadata.getKey()), Option.empty(), replaceMetadata.getValue());
 
     // run cleaner
@@ -940,7 +1011,7 @@ public void testCleanWithReplaceCommits() throws Exception {
     // notice that clustering generates empty inflight commit files
     Map<String, String> partitionAndFileId003 = testTable.forReplaceCommit("00000000000003").getFileIdsWithBaseFilesInPartitions(p1);
     String file3P1C2 = partitionAndFileId003.get(p1);
-    replaceMetadata = generateReplaceCommitMetadata(p1, file1P1C0, file3P1C2);
+    replaceMetadata = generateReplaceCommitMetadata("00000000000003", p1, file1P1C0, file3P1C2);
     testTable.addReplaceCommit("00000000000003", Option.of(replaceMetadata.getKey()), Option.empty(), replaceMetadata.getValue());
 
     // run cleaner
@@ -955,11 +1026,11 @@ public void testCleanWithReplaceCommits() throws Exception {
     // notice that clustering generates empty inflight commit files
     Map<String, String> partitionAndFileId004 = testTable.forReplaceCommit("00000000000004").getFileIdsWithBaseFilesInPartitions(p0);
     String file4P0C3 = partitionAndFileId004.get(p0);
-    replaceMetadata = generateReplaceCommitMetadata(p0, file2P0C1, file4P0C3);
+    replaceMetadata = generateReplaceCommitMetadata("00000000000004", p0, file2P0C1, file4P0C3);
     testTable.addReplaceCommit("00000000000004", Option.of(replaceMetadata.getKey()), Option.empty(), replaceMetadata.getValue());
 
     // run cleaner
-    List<HoodieCleanStat> hoodieCleanStatsFour = runCleaner(config);
+    List<HoodieCleanStat> hoodieCleanStatsFour = runCleaner(config, 5);
     assertTrue(testTable.baseFileExists(p0, "00000000000004", file4P0C3));
     assertTrue(testTable.baseFileExists(p0, "00000000000002", file2P0C1));
     assertTrue(testTable.baseFileExists(p1, "00000000000003", file3P1C2));
@@ -969,12 +1040,12 @@ public void testCleanWithReplaceCommits() throws Exception {
 
     // make next replacecommit, with 1 clustering operation. Replace all data in p1. no new files created
     // notice that clustering generates empty inflight commit files
-    Map<String, String> partitionAndFileId005 = testTable.forReplaceCommit("00000000000005").getFileIdsWithBaseFilesInPartitions(p1);
+    Map<String, String> partitionAndFileId005 = testTable.forReplaceCommit("00000000000006").getFileIdsWithBaseFilesInPartitions(p1);
     String file4P1C4 = partitionAndFileId005.get(p1);
-    replaceMetadata = generateReplaceCommitMetadata(p0, file3P1C2, file4P1C4);
-    testTable.addReplaceCommit("00000000000005", Option.of(replaceMetadata.getKey()), Option.empty(), replaceMetadata.getValue());
+    replaceMetadata = generateReplaceCommitMetadata("00000000000006", p0, file3P1C2, file4P1C4);
+    testTable.addReplaceCommit("00000000000006", Option.of(replaceMetadata.getKey()), Option.empty(), replaceMetadata.getValue());
 
-    List<HoodieCleanStat> hoodieCleanStatsFive = runCleaner(config, 2);
+    List<HoodieCleanStat> hoodieCleanStatsFive = runCleaner(config, 7);
     assertTrue(testTable.baseFileExists(p0, "00000000000004", file4P0C3));
     assertTrue(testTable.baseFileExists(p0, "00000000000002", file2P0C1));
     assertTrue(testTable.baseFileExists(p1, "00000000000003", file3P1C2));
@@ -982,9 +1053,8 @@ public void testCleanWithReplaceCommits() throws Exception {
     assertFalse(testTable.baseFileExists(p1, "00000000000001", file1P1C0));
   }
 
-  private Pair<HoodieRequestedReplaceMetadata, HoodieReplaceCommitMetadata> generateReplaceCommitMetadata(String partition,
-                                                                                                          String replacedFileId,
-                                                                                                          String newFileId) {
+  private Pair<HoodieRequestedReplaceMetadata, HoodieReplaceCommitMetadata> generateReplaceCommitMetadata(
+      String instantTime, String partition, String replacedFileId, String newFileId) {
     HoodieRequestedReplaceMetadata requestedReplaceMetadata = new HoodieRequestedReplaceMetadata();
     requestedReplaceMetadata.setOperationType(WriteOperationType.CLUSTER.toString());
     requestedReplaceMetadata.setVersion(1);
@@ -1005,7 +1075,7 @@ private Pair<HoodieRequestedReplaceMetadata, HoodieReplaceCommitMetadata> genera
     if (!StringUtils.isNullOrEmpty(newFileId)) {
       HoodieWriteStat writeStat = new HoodieWriteStat();
       writeStat.setPartitionPath(partition);
-      writeStat.setPath(newFileId);
+      writeStat.setPath(partition + "/" + getBaseFilename(instantTime, newFileId));
       writeStat.setFileId(newFileId);
       replaceMetadata.addWriteStat(partition, writeStat);
     }
@@ -1180,7 +1250,7 @@ private static void assertCleanMetadataPathEquals(Map<String, Tuple3> expected,
     }
   }
 
-  private static Stream<Arguments> argumentsForTestKeepLatestCommits() {
+  protected static Stream<Arguments> argumentsForTestKeepLatestCommits() {
     return Stream.of(
         Arguments.of(false, false, false),
         Arguments.of(true, false, false),
@@ -1196,7 +1266,7 @@ private static Stream<Arguments> argumentsForTestKeepLatestCommits() {
   @MethodSource("argumentsForTestKeepLatestCommits")
   public void testKeepLatestCommits(boolean simulateFailureRetry, boolean enableIncrementalClean, boolean enableBootstrapSourceClean) throws Exception {
     HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
-        .withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).enable(false).build())
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).build())
         .withCompactionConfig(HoodieCompactionConfig.newBuilder()
             .withIncrementalCleaningMode(enableIncrementalClean)
             .withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.EAGER)
@@ -1216,7 +1286,7 @@ public void testKeepLatestCommits(boolean simulateFailureRetry, boolean enableIn
         : UUID.randomUUID().toString();
     testTable.addInflightCommit("00000000000001").withBaseFilesInPartition(p0, file1P0C0).withBaseFilesInPartition(p1, file1P1C0);
 
-    HoodieCommitMetadata commitMetadata = generateCommitMetadata(
+    HoodieCommitMetadata commitMetadata = generateCommitMetadata("00000000000001",
         Collections.unmodifiableMap(new HashMap<String, List<String>>() {
           {
             put(p0, CollectionUtils.createImmutableList(file1P0C0));
@@ -1240,7 +1310,7 @@ public void testKeepLatestCommits(boolean simulateFailureRetry, boolean enableIn
     String file2P0C1 = partitionAndFileId002.get(p0);
     String file2P1C1 = partitionAndFileId002.get(p1);
     testTable.forCommit("00000000000002").withBaseFilesInPartition(p0, file1P0C0).withBaseFilesInPartition(p1, file1P1C0);
-    commitMetadata = generateCommitMetadata(new HashMap<String, List<String>>() {
+    commitMetadata = generateCommitMetadata("00000000000002", new HashMap<String, List<String>>() {
       {
         put(p0, CollectionUtils.createImmutableList(file1P0C0, file2P0C1));
         put(p1, CollectionUtils.createImmutableList(file1P1C0, file2P1C1));
@@ -1261,9 +1331,9 @@ public void testKeepLatestCommits(boolean simulateFailureRetry, boolean enableIn
         .withBaseFilesInPartition(p0, file1P0C0)
         .withBaseFilesInPartition(p0, file2P0C1)
         .getFileIdsWithBaseFilesInPartitions(p0).get(p0);
-    commitMetadata = generateCommitMetadata(CollectionUtils
-        .createImmutableMap(p0,
-            CollectionUtils.createImmutableList(file1P0C0, file2P0C1, file3P0C2)));
+    commitMetadata = generateCommitMetadata("00000000000003",
+        CollectionUtils.createImmutableMap(
+            p0, CollectionUtils.createImmutableList(file1P0C0, file2P0C1, file3P0C2)));
     metaClient.getActiveTimeline().saveAsComplete(
         new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "00000000000003"),
         Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
@@ -1278,8 +1348,9 @@ public void testKeepLatestCommits(boolean simulateFailureRetry, boolean enableIn
         .withBaseFilesInPartition(p0, file1P0C0)
         .withBaseFilesInPartition(p0, file2P0C1)
         .getFileIdsWithBaseFilesInPartitions(p0).get(p0);
-    commitMetadata = generateCommitMetadata(CollectionUtils.createImmutableMap(
-        p0, CollectionUtils.createImmutableList(file1P0C0, file2P0C1, file4P0C3)));
+    commitMetadata = generateCommitMetadata("00000000000004",
+        CollectionUtils.createImmutableMap(
+            p0, CollectionUtils.createImmutableList(file1P0C0, file2P0C1, file4P0C3)));
     metaClient.getActiveTimeline().saveAsComplete(
         new HoodieInstant(State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, "00000000000004"),
         Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
@@ -1305,8 +1376,8 @@ public void testKeepLatestCommits(boolean simulateFailureRetry, boolean enableIn
 
     // No cleaning on partially written file, with no commit.
     testTable.forCommit("00000000000005").withBaseFilesInPartition(p0, file3P0C2);
-    commitMetadata = generateCommitMetadata(CollectionUtils.createImmutableMap(p0,
-        CollectionUtils.createImmutableList(file3P0C2)));
+    commitMetadata = generateCommitMetadata("00000000000005",
+        CollectionUtils.createImmutableMap(p0, CollectionUtils.createImmutableList(file3P0C2)));
     metaClient.getActiveTimeline().createNewInstant(
         new HoodieInstant(State.REQUESTED, HoodieTimeline.COMMIT_ACTION, "00000000000005"));
     metaClient.getActiveTimeline().transitionRequestedToInflight(
@@ -1325,7 +1396,7 @@ public void testKeepLatestCommits(boolean simulateFailureRetry, boolean enableIn
    * @return Partition to BootstrapFileMapping Map
    * @throws IOException
    */
-  private Map<String, List<BootstrapFileMapping>> generateBootstrapIndexAndSourceData(String... partitions) throws IOException {
+  protected Map<String, List<BootstrapFileMapping>> generateBootstrapIndexAndSourceData(String... partitions) throws IOException {
     // create bootstrap source data path
     java.nio.file.Path sourcePath = tempDir.resolve("data");
     java.nio.file.Files.createDirectories(sourcePath);
@@ -1378,7 +1449,7 @@ public void testCleanMarkerDataFilesOnRollback() throws Exception {
   @Test
   public void testCleaningWithZeroPartitionPaths() throws Exception {
     HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
-        .withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).enable(true).build())
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).build())
         .withCompactionConfig(HoodieCompactionConfig.newBuilder()
             .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build())
         .build();
@@ -1402,7 +1473,7 @@ public void testCleaningWithZeroPartitionPaths() throws Exception {
   @Test
   public void testKeepLatestCommitsWithPendingCompactions() throws Exception {
     HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
-        .withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).enable(true).build())
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).build())
         .withCompactionConfig(HoodieCompactionConfig.newBuilder()
             .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build())
         .build();
@@ -1426,7 +1497,7 @@ public void testKeepLatestCommitsWithPendingCompactions() throws Exception {
   public void testKeepLatestVersionsWithPendingCompactions(boolean retryFailure) throws Exception {
     HoodieWriteConfig config =
         HoodieWriteConfig.newBuilder().withPath(basePath)
-            .withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).enable(true).build())
+            .withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).build())
             .withCompactionConfig(HoodieCompactionConfig.newBuilder()
                 .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS).retainFileVersions(2).build())
             .build();
@@ -1677,14 +1748,15 @@ private Stream<Pair<String, String>> convertPathToFileIdWithCommitTime(final Hoo
     return Stream.concat(stream1, stream2);
   }
 
-  private static HoodieCommitMetadata generateCommitMetadata(Map<String, List<String>> partitionToFilePaths) {
+  protected static HoodieCommitMetadata generateCommitMetadata(
+      String instantTime, Map<String, List<String>> partitionToFilePaths) {
     HoodieCommitMetadata metadata = new HoodieCommitMetadata();
-    partitionToFilePaths.forEach((key, value) -> value.forEach(f -> {
+    partitionToFilePaths.forEach((partitionPath, fileList) -> fileList.forEach(f -> {
       HoodieWriteStat writeStat = new HoodieWriteStat();
-      writeStat.setPartitionPath(key);
-      writeStat.setPath(f);
+      writeStat.setPartitionPath(partitionPath);
+      writeStat.setPath(partitionPath + "/" + getBaseFilename(instantTime, f));
       writeStat.setFileId(f);
-      metadata.addWriteStat(key, writeStat);
+      metadata.addWriteStat(partitionPath, writeStat);
     }));
     return metadata;
   }
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestConsistencyGuard.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestConsistencyGuard.java
index afbe94937949f..22fafe4a58747 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestConsistencyGuard.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestConsistencyGuard.java
@@ -169,9 +169,9 @@ private ConsistencyGuardConfig getConsistencyGuardConfig() {
     return getConsistencyGuardConfig(3, 10, 10);
   }
 
-  private ConsistencyGuardConfig getConsistencyGuardConfig(int maxChecks, int initalSleep, int maxSleep) {
+  private ConsistencyGuardConfig getConsistencyGuardConfig(int maxChecks, int initialSleep, int maxSleep) {
     return ConsistencyGuardConfig.newBuilder().withConsistencyCheckEnabled(true)
-        .withInitialConsistencyCheckIntervalMs(initalSleep).withMaxConsistencyCheckIntervalMs(maxSleep)
+        .withInitialConsistencyCheckIntervalMs(initialSleep).withMaxConsistencyCheckIntervalMs(maxSleep)
         .withMaxConsistencyChecks(maxChecks).build();
   }
 }
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestHoodieMergeOnReadTable.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestHoodieMergeOnReadTable.java
index 8b8df197ba78b..dcc41addc8f31 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestHoodieMergeOnReadTable.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/TestHoodieMergeOnReadTable.java
@@ -21,7 +21,6 @@
 import org.apache.hudi.client.HoodieReadClient;
 import org.apache.hudi.client.SparkRDDWriteClient;
 import org.apache.hudi.client.WriteStatus;
-import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.model.FileSlice;
 import org.apache.hudi.common.model.HoodieBaseFile;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
@@ -44,7 +43,8 @@
 import org.apache.hudi.index.HoodieIndex.IndexType;
 import org.apache.hudi.metadata.HoodieTableMetadataWriter;
 import org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter;
-import org.apache.hudi.table.action.deltacommit.AbstractSparkDeltaCommitActionExecutor;
+import org.apache.hudi.table.action.HoodieWriteMetadata;
+import org.apache.hudi.table.action.deltacommit.BaseSparkDeltaCommitActionExecutor;
 import org.apache.hudi.table.action.deltacommit.SparkDeleteDeltaCommitActionExecutor;
 import org.apache.hudi.testutils.HoodieClientTestUtils;
 import org.apache.hudi.testutils.HoodieMergeOnReadTestUtils;
@@ -54,10 +54,12 @@
 
 import org.apache.avro.generic.GenericRecord;
 import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
+import org.apache.spark.storage.StorageLevel;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.params.ParameterizedTest;
@@ -189,8 +191,10 @@ public void testUpsertPartitioner(boolean populateMetaFields) throws Exception {
 
       assertTrue(fileIdToNewSize.entrySet().stream().anyMatch(entry -> fileIdToSize.get(entry.getKey()) < entry.getValue()));
 
-      List<String> dataFiles = roView.getLatestBaseFiles().map(HoodieBaseFile::getPath).collect(Collectors.toList());
-      List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(hadoopConf(), dataFiles,
+      List<String> inputPaths = roView.getLatestBaseFiles()
+          .map(baseFile -> new Path(baseFile.getPath()).getParent().toString())
+          .collect(Collectors.toList());
+      List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(hadoopConf(), inputPaths,
           basePath(), new JobConf(hadoopConf()), true, false);
       // Wrote 20 records in 2 batches
       assertEquals(40, recordsRead.size(), "Must contain 40 records");
@@ -204,8 +208,7 @@ public void testLogFileCountsAfterCompaction(boolean preserveCommitMeta) throws
     boolean populateMetaFields = true;
     // insert 100 records
     HoodieWriteConfig.Builder cfgBuilder = getConfigBuilder(true, false, HoodieIndex.IndexType.BLOOM,
-        1024 * 1024 * 1024L, HoodieClusteringConfig.newBuilder().build(), preserveCommitMeta)
-        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true).build());
+        1024 * 1024 * 1024L, HoodieClusteringConfig.newBuilder().build(), preserveCommitMeta);
     addConfigsForPopulateMetaFields(cfgBuilder, populateMetaFields);
     HoodieWriteConfig config = cfgBuilder.build();
 
@@ -255,7 +258,7 @@ public void testLogFileCountsAfterCompaction(boolean preserveCommitMeta) throws
 
       // Do a compaction
       String compactionInstantTime = writeClient.scheduleCompaction(Option.empty()).get().toString();
-      JavaRDD<WriteStatus> result = (JavaRDD<WriteStatus>) writeClient.compact(compactionInstantTime);
+      HoodieWriteMetadata<JavaRDD<WriteStatus>> result = writeClient.compact(compactionInstantTime);
 
       // Verify that recently written compacted data file has no log file
       metaClient = HoodieTableMetaClient.reload(metaClient);
@@ -272,8 +275,7 @@ public void testLogFileCountsAfterCompaction(boolean preserveCommitMeta) throws
         for (FileSlice slice : groupedLogFiles) {
           assertEquals(0, slice.getLogFiles().count(), "After compaction there should be no log files visible on a full view");
         }
-        List<WriteStatus> writeStatuses = result.collect();
-        assertTrue(writeStatuses.stream().anyMatch(writeStatus -> writeStatus.getStat().getPartitionPath().contentEquals(partitionPath)));
+        assertTrue(result.getCommitMetadata().get().getWritePartitionPaths().stream().anyMatch(part -> part.contentEquals(partitionPath)));
       }
 
       // Check the entire dataset has all records still
@@ -439,8 +441,9 @@ public void testRollingStatsWithSmallFileHandling() throws Exception {
       // Test small file handling after compaction
       instantTime = "002";
       client.scheduleCompactionAtInstant(instantTime, Option.of(metadata.getExtraMetadata()));
-      statuses = (JavaRDD<WriteStatus>) client.compact(instantTime);
-      client.commitCompaction(instantTime, statuses, Option.empty());
+      HoodieWriteMetadata<JavaRDD<WriteStatus>> compactionMetadata = client.compact(instantTime);
+      statuses = compactionMetadata.getWriteStatuses();
+      client.commitCompaction(instantTime, compactionMetadata.getCommitMetadata().get(), Option.empty());
 
       // Read from commit file
       table = HoodieSparkTable.create(cfg, context());
@@ -552,7 +555,7 @@ public void testHandleUpdateWithMultiplePartitions() throws Exception {
 
       // initialize partitioner
       hoodieTable.getHoodieView().sync();
-      AbstractSparkDeltaCommitActionExecutor actionExecutor = new SparkDeleteDeltaCommitActionExecutor(context(), cfg, hoodieTable,
+      BaseSparkDeltaCommitActionExecutor actionExecutor = new SparkDeleteDeltaCommitActionExecutor(context(), cfg, hoodieTable,
           newDeleteTime, deleteRDD);
       actionExecutor.getUpsertPartitioner(new WorkloadProfile(buildProfile(deleteRDD)));
       final List<List<WriteStatus>> deleteStatus = jsc().parallelize(Arrays.asList(1)).map(x -> {
@@ -564,9 +567,52 @@ public void testHandleUpdateWithMultiplePartitions() throws Exception {
       WriteStatus status = deleteStatus.get(0).get(0);
       assertTrue(status.hasErrors());
       long numRecordsInPartition = fewRecordsForDelete.stream().filter(u ->
-              u.getPartitionPath().equals(partitionPath)).count();
+          u.getPartitionPath().equals(partitionPath)).count();
       assertEquals(fewRecordsForDelete.size() - numRecordsInPartition, status.getTotalErrorRecords());
     }
   }
+
+  @Test
+  public void testReleaseResource() throws Exception {
+    HoodieWriteConfig.Builder builder = getConfigBuilder(true);
+    builder.withReleaseResourceEnabled(true);
+    builder.withAutoCommit(false);
+    /**
+     * Write 1 (test when RELEASE_RESOURCE_ENABLE is true)
+     */
+    try (SparkRDDWriteClient client = getHoodieWriteClient(builder.build())) {
+
+      String newCommitTime = "001";
+      client.startCommitWithTime(newCommitTime);
+
+      List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 20);
+      JavaRDD<HoodieRecord> writeRecords = jsc().parallelize(records, 1);
+      writeRecords.persist(StorageLevel.MEMORY_AND_DISK());
+      List<WriteStatus> statuses = client.upsert(writeRecords, newCommitTime).collect();
+      assertNoWriteErrors(statuses);
+      client.commitStats(newCommitTime, statuses.stream().map(WriteStatus::getStat).collect(Collectors.toList()), Option.empty(), metaClient.getCommitActionType());
+      assertEquals(spark().sparkContext().persistentRdds().size(), 0);
+    }
+
+    builder.withReleaseResourceEnabled(false);
+
+    /**
+     * Write 2 (test when RELEASE_RESOURCE_ENABLE is false)
+     */
+    try (SparkRDDWriteClient client = getHoodieWriteClient(builder.build())) {
+      String newCommitTime = "002";
+      client.startCommitWithTime(newCommitTime);
+
+      List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 20);
+      JavaRDD<HoodieRecord> writeRecords = jsc().parallelize(records, 1);
+
+      writeRecords.persist(StorageLevel.MEMORY_AND_DISK());
+      List<WriteStatus> statuses = client.upsert(writeRecords, newCommitTime).collect();
+      assertNoWriteErrors(statuses);
+      client.commitStats(newCommitTime, statuses.stream().map(WriteStatus::getStat).collect(Collectors.toList()), Option.empty(), metaClient.getCommitActionType());
+      assertTrue(spark().sparkContext().persistentRdds().size() > 0);
+    }
+
+  }
 }
 
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestCopyOnWriteActionExecutor.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestCopyOnWriteActionExecutor.java
index cba77b0c7e55a..53cd6e5d1e749 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestCopyOnWriteActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestCopyOnWriteActionExecutor.java
@@ -22,6 +22,7 @@
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.common.bloom.BloomFilter;
 import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieTableType;
@@ -177,11 +178,11 @@ public void testUpdateRecords(HoodieIndex.IndexType indexType) throws Exception
 
     List<HoodieRecord> records = new ArrayList<>();
     RawTripTestPayload rowChange1 = new RawTripTestPayload(recordStr1);
-    records.add(new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1));
+    records.add(new HoodieAvroRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1));
     RawTripTestPayload rowChange2 = new RawTripTestPayload(recordStr2);
-    records.add(new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2));
+    records.add(new HoodieAvroRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2));
     RawTripTestPayload rowChange3 = new RawTripTestPayload(recordStr3);
-    records.add(new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3));
+    records.add(new HoodieAvroRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3));
 
     // Insert new records
     final HoodieSparkCopyOnWriteTable cowTable = table;
@@ -210,12 +211,12 @@ public void testUpdateRecords(HoodieIndex.IndexType indexType) throws Exception
     String updateRecordStr1 = "{\"_row_key\":\"8eb5b87a-1feh-4edd-87b4-6ec96dc405a0\","
         + "\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":15}";
     RawTripTestPayload updateRowChanges1 = new RawTripTestPayload(updateRecordStr1);
-    HoodieRecord updatedRecord1 = new HoodieRecord(
+    HoodieRecord updatedRecord1 = new HoodieAvroRecord(
         new HoodieKey(updateRowChanges1.getRowKey(), updateRowChanges1.getPartitionPath()), updateRowChanges1);
 
     RawTripTestPayload rowChange4 = new RawTripTestPayload(recordStr4);
     HoodieRecord insertedRecord1 =
-        new HoodieRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
+        new HoodieAvroRecord(new HoodieKey(rowChange4.getRowKey(), rowChange4.getPartitionPath()), rowChange4);
 
     List<HoodieRecord> updatedRecords = Arrays.asList(updatedRecord1, insertedRecord1);
 
@@ -290,7 +291,7 @@ private List<HoodieRecord> newHoodieRecords(int n, String time) throws Exception
       String recordStr =
           String.format("{\"_row_key\":\"%s\",\"time\":\"%s\",\"number\":%d}", UUID.randomUUID().toString(), time, i);
       RawTripTestPayload rowChange = new RawTripTestPayload(recordStr);
-      records.add(new HoodieRecord(new HoodieKey(rowChange.getRowKey(), rowChange.getPartitionPath()), rowChange));
+      records.add(new HoodieAvroRecord(new HoodieKey(rowChange.getRowKey(), rowChange.getPartitionPath()), rowChange));
     }
     return records;
   }
@@ -316,11 +317,11 @@ public void testMetadataAggregateFromWriteStatus() throws Exception {
 
     List<HoodieRecord> records = new ArrayList<>();
     RawTripTestPayload rowChange1 = new RawTripTestPayload(recordStr1);
-    records.add(new HoodieRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1));
+    records.add(new HoodieAvroRecord(new HoodieKey(rowChange1.getRowKey(), rowChange1.getPartitionPath()), rowChange1));
     RawTripTestPayload rowChange2 = new RawTripTestPayload(recordStr2);
-    records.add(new HoodieRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2));
+    records.add(new HoodieAvroRecord(new HoodieKey(rowChange2.getRowKey(), rowChange2.getPartitionPath()), rowChange2));
     RawTripTestPayload rowChange3 = new RawTripTestPayload(recordStr3);
-    records.add(new HoodieRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3));
+    records.add(new HoodieAvroRecord(new HoodieKey(rowChange3.getRowKey(), rowChange3.getPartitionPath()), rowChange3));
 
     // Insert new records
     BaseSparkCommitActionExecutor actionExecutor = new SparkInsertCommitActionExecutor(context, config, table,
@@ -416,7 +417,7 @@ public void testFileSizeUpsertRecords() throws Exception {
       String recordStr = "{\"_row_key\":\"" + UUID.randomUUID().toString()
           + "\",\"time\":\"2016-01-31T03:16:41.415Z\",\"number\":" + i + "}";
       RawTripTestPayload rowChange = new RawTripTestPayload(recordStr);
-      records.add(new HoodieRecord(new HoodieKey(rowChange.getRowKey(), rowChange.getPartitionPath()), rowChange));
+      records.add(new HoodieAvroRecord(new HoodieKey(rowChange.getRowKey(), rowChange.getPartitionPath()), rowChange));
     }
 
     // Insert new records
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestDeleteHelper.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestDeleteHelper.java
index 8617c848729c2..2d852f8107ef0 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestDeleteHelper.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestDeleteHelper.java
@@ -24,7 +24,7 @@
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.data.HoodieJavaRDD;
-import org.apache.hudi.index.bloom.HoodieBloomIndex;
+import org.apache.hudi.index.HoodieIndex;
 import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.table.action.HoodieWriteMetadata;
 
@@ -66,7 +66,7 @@ private enum CombineTestMode {
   private static final int DELETE_PARALLELISM = 200;
 
   @Mock
-  private HoodieBloomIndex index;
+  private HoodieIndex index;
   @Mock
   private HoodieTable<EmptyHoodieRecordPayload, JavaRDD<HoodieRecord>, JavaRDD<HoodieKey>, JavaRDD<WriteStatus>> table;
   @Mock
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestUpsertPartitioner.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestUpsertPartitioner.java
index 1e5f8029a7145..3039eb3bd9b5f 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestUpsertPartitioner.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestUpsertPartitioner.java
@@ -21,7 +21,6 @@
 import org.apache.hudi.avro.model.HoodieClusteringPlan;
 import org.apache.hudi.avro.model.HoodieCompactionPlan;
 import org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata;
-import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordLocation;
@@ -219,7 +218,7 @@ public void testPartitionWeight() throws Exception {
     final String testPartitionPath = "2016/09/26";
     int totalInsertNum = 2000;
 
-    HoodieWriteConfig config = makeHoodieClientConfigBuilder().withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(false).build())
+    HoodieWriteConfig config = makeHoodieClientConfigBuilder()
         .withCompactionConfig(HoodieCompactionConfig.newBuilder().compactionSmallFileSize(0)
             .insertSplitSize(totalInsertNum / 2).autoTuneInsertSplits(false).build()).build();
 
@@ -374,23 +373,23 @@ public void testUpsertPartitionerWithSmallFileHandlingAndClusteringPlan() throws
             .setClusteringPlan(clusteringPlan).setOperationType(WriteOperationType.CLUSTER.name()).build();
     FileCreateUtils.createRequestedReplaceCommit(basePath,"002", Option.of(requestedReplaceMetadata));
 
-    // create file slice 002
-    FileCreateUtils.createBaseFile(basePath, testPartitionPath, "002", "2", 1);
-    FileCreateUtils.createCommit(basePath, "002");
+    // create file slice 003
+    FileCreateUtils.createBaseFile(basePath, testPartitionPath, "003", "3", 1);
+    FileCreateUtils.createCommit(basePath, "003");
 
     metaClient = HoodieTableMetaClient.reload(metaClient);
 
     // generate new data to be ingested
     HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator(new String[] {testPartitionPath});
-    List<HoodieRecord> insertRecords = dataGenerator.generateInserts("003", 100);
+    List<HoodieRecord> insertRecords = dataGenerator.generateInserts("004", 100);
     WorkloadProfile profile = new WorkloadProfile(buildProfile(jsc.parallelize(insertRecords)));
 
     HoodieSparkTable table = HoodieSparkTable.create(config, context, metaClient);
     // create UpsertPartitioner
     UpsertPartitioner partitioner = new UpsertPartitioner(profile, context, table, config);
 
-    // for now we have file slice1 and file slice2 and file slice1 is contained in pending clustering plan
-    // So that only file slice2 can be used for ingestion.
+    // for now we have file slice1 and file slice3 and file slice1 is contained in pending clustering plan
+    // So that only file slice3 can be used for ingestion.
     assertEquals(1, partitioner.smallFiles.size(), "Should have 1 small file to be ingested.");
   }
 
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/TestAsyncCompaction.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/TestAsyncCompaction.java
index c2879fb1aaf4c..87d8613303347 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/TestAsyncCompaction.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/TestAsyncCompaction.java
@@ -18,11 +18,8 @@
 
 package org.apache.hudi.table.action.compact;
 
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.Path;
 import org.apache.hudi.client.HoodieReadClient;
 import org.apache.hudi.client.SparkRDDWriteClient;
-import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.model.HoodieFileGroupId;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
@@ -32,6 +29,9 @@
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.table.HoodieSparkTable;
 import org.apache.hudi.table.HoodieTable;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
 import org.apache.spark.api.java.JavaRDD;
 import org.junit.jupiter.api.Test;
 
@@ -52,7 +52,6 @@ public class TestAsyncCompaction extends CompactionTestBase {
 
   private HoodieWriteConfig getConfig(Boolean autoCommit) {
     return getConfigBuilder(autoCommit)
-        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true).build())
         .build();
   }
 
@@ -204,8 +203,8 @@ public void testScheduleIngestionBeforePendingCompaction() throws Exception {
     String compactionInstantTime = "006";
     int numRecs = 2000;
 
-    final List<HoodieRecord> initalRecords = dataGen.generateInserts(firstInstantTime, numRecs);
-    final List<HoodieRecord> records = runNextDeltaCommits(client, readClient, Arrays.asList(firstInstantTime, secondInstantTime), initalRecords, cfg, true,
+    final List<HoodieRecord> initialRecords = dataGen.generateInserts(firstInstantTime, numRecs);
+    final List<HoodieRecord> records = runNextDeltaCommits(client, readClient, Arrays.asList(firstInstantTime, secondInstantTime), initialRecords, cfg, true,
         new ArrayList<>());
 
     // Schedule compaction but do not run them
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/TestHoodieCompactor.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/TestHoodieCompactor.java
index 454c289dbd6d8..9afe5f3533cac 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/TestHoodieCompactor.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/TestHoodieCompactor.java
@@ -21,7 +21,6 @@
 import org.apache.hudi.avro.model.HoodieCompactionPlan;
 import org.apache.hudi.client.SparkRDDWriteClient;
 import org.apache.hudi.client.WriteStatus;
-import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.FileSlice;
 import org.apache.hudi.common.model.HoodieRecord;
@@ -159,7 +158,6 @@ public void testWriteStatusContentsAfterCompaction() throws Exception {
     // insert 100 records
     HoodieWriteConfig config = getConfigBuilder()
         .withCompactionConfig(HoodieCompactionConfig.newBuilder().withMaxNumDeltaCommitsBeforeCompaction(1).build())
-        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true).build())
         .build();
     try (SparkRDDWriteClient writeClient = getHoodieWriteClient(config)) {
       String newCommitTime = "100";
@@ -175,7 +173,7 @@ public void testWriteStatusContentsAfterCompaction() throws Exception {
 
       List<HoodieRecord> updatedRecords = dataGen.generateUpdates(newCommitTime, records);
       JavaRDD<HoodieRecord> updatedRecordsRDD = jsc.parallelize(updatedRecords, 1);
-      HoodieIndex index = new HoodieBloomIndex<>(config, SparkHoodieBloomIndexHelper.getInstance());
+      HoodieIndex index = new HoodieBloomIndex(config, SparkHoodieBloomIndexHelper.getInstance());
       JavaRDD<HoodieRecord> updatedTaggedRecordsRDD = tagLocation(index, updatedRecordsRDD, table);
 
       writeClient.startCommitWithTime(newCommitTime);
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/TestInlineCompaction.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/TestInlineCompaction.java
index ef52953a2f0c8..310ff4fe8aede 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/TestInlineCompaction.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/TestInlineCompaction.java
@@ -28,6 +28,7 @@
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.table.HoodieSparkTable;
 import org.apache.hudi.table.marker.WriteMarkersFactory;
+
 import org.junit.jupiter.api.Test;
 
 import java.util.ArrayList;
@@ -62,7 +63,7 @@ public void testCompactionIsNotScheduledEarly() throws Exception {
       runNextDeltaCommits(writeClient, readClient, instants, records, cfg, true, new ArrayList<>());
       HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
 
-      // Then: ensure no compaction is executedm since there are only 2 delta commits
+      // Then: ensure no compaction is executed since there are only 2 delta commits
       assertEquals(2, metaClient.getActiveTimeline().getWriteTimeline().countInstants());
     }
   }
@@ -152,7 +153,7 @@ public void testSuccessfulCompactionBasedOnNumAndTime() throws Exception {
       runNextDeltaCommits(writeClient, readClient, instants, records, cfg, true, new ArrayList<>());
       HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
 
-      // Then: ensure no compaction is executedm since there are only 3 delta commits
+      // Then: ensure no compaction is executed since there are only 3 delta commits
       assertEquals(3, metaClient.getActiveTimeline().getWriteTimeline().countInstants());
       // 4th commit, that will trigger compaction
       metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(cfg.getBasePath()).build();
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/strategy/TestHoodieCompactionStrategy.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/strategy/TestHoodieCompactionStrategy.java
index dee1fadd73d5f..0c7190092e730 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/strategy/TestHoodieCompactionStrategy.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/compact/strategy/TestHoodieCompactionStrategy.java
@@ -143,10 +143,10 @@ public void testDayBasedCompactionSimple() {
         "DayBasedCompactionStrategy should have resulted in fewer compactions");
     assertEquals(2, returned.size(), "DayBasedCompactionStrategy should have resulted in fewer compactions");
 
-    int comparision = strategy.getComparator().compare(returned.get(returned.size() - 1).getPartitionPath(),
+    int comparison = strategy.getComparator().compare(returned.get(returned.size() - 1).getPartitionPath(),
         returned.get(0).getPartitionPath());
     // Either the partition paths are sorted in descending order or they are equal
-    assertTrue(comparision >= 0, "DayBasedCompactionStrategy should sort partitions in descending order");
+    assertTrue(comparison >= 0, "DayBasedCompactionStrategy should sort partitions in descending order");
   }
 
   @Test
@@ -192,10 +192,10 @@ public void testBoundedPartitionAwareCompactionSimple() {
     assertEquals(5, returned.size(),
         "BoundedPartitionAwareCompactionStrategy should have resulted in fewer compactions");
 
-    int comparision = strategy.getComparator().compare(returned.get(returned.size() - 1).getPartitionPath(),
+    int comparison = strategy.getComparator().compare(returned.get(returned.size() - 1).getPartitionPath(),
         returned.get(0).getPartitionPath());
     // Either the partition paths are sorted in descending order or they are equal
-    assertTrue(comparision >= 0, "BoundedPartitionAwareCompactionStrategy should sort partitions in descending order");
+    assertTrue(comparison >= 0, "BoundedPartitionAwareCompactionStrategy should sort partitions in descending order");
   }
 
   @Test
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/rollback/HoodieClientRollbackTestBase.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/rollback/HoodieClientRollbackTestBase.java
index 3b0829b1655cb..33a1c58a3a991 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/rollback/HoodieClientRollbackTestBase.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/rollback/HoodieClientRollbackTestBase.java
@@ -33,6 +33,7 @@
 import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.testutils.Assertions;
 import org.apache.hudi.testutils.HoodieClientTestBase;
+
 import org.apache.spark.api.java.JavaRDD;
 
 import java.io.IOException;
@@ -52,7 +53,7 @@ protected void twoUpsertCommitDataWithTwoPartitions(List<FileSlice> firstPartiti
     //just generate two partitions
     dataGen = new HoodieTestDataGenerator(new String[]{DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH});
     //1. prepare data
-    HoodieTestDataGenerator.writePartitionMetadata(fs, new String[]{DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH}, basePath);
+    HoodieTestDataGenerator.writePartitionMetadataDeprecated(fs, new String[]{DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH}, basePath);
     SparkRDDWriteClient client = getHoodieWriteClient(cfg);
     /**
      * Write 1 (only inserts)
@@ -78,18 +79,18 @@ protected void twoUpsertCommitDataWithTwoPartitions(List<FileSlice> firstPartiti
     }
 
 
-    //2. assert filegroup and get the first partition fileslice
+    //2. assert file group and get the first partition file slice
     HoodieTable table = this.getHoodieTable(metaClient, cfg);
     SyncableFileSystemView fsView = getFileSystemViewWithUnCommittedSlices(table.getMetaClient());
     List<HoodieFileGroup> firstPartitionCommit2FileGroups = fsView.getAllFileGroups(DEFAULT_FIRST_PARTITION_PATH).collect(Collectors.toList());
     assertEquals(1, firstPartitionCommit2FileGroups.size());
     firstPartitionCommit2FileSlices.addAll(firstPartitionCommit2FileGroups.get(0).getAllFileSlices().collect(Collectors.toList()));
-    //3. assert filegroup and get the second partition fileslice
+    //3. assert file group and get the second partition file slice
     List<HoodieFileGroup> secondPartitionCommit2FileGroups = fsView.getAllFileGroups(DEFAULT_SECOND_PARTITION_PATH).collect(Collectors.toList());
     assertEquals(1, secondPartitionCommit2FileGroups.size());
     secondPartitionCommit2FileSlices.addAll(secondPartitionCommit2FileGroups.get(0).getAllFileSlices().collect(Collectors.toList()));
 
-    //4. assert fileslice
+    //4. assert file slice
     HoodieTableType tableType = this.getTableType();
     if (tableType.equals(HoodieTableType.COPY_ON_WRITE)) {
       assertEquals(2, firstPartitionCommit2FileSlices.size());
@@ -106,7 +107,7 @@ protected void insertOverwriteCommitDataWithTwoPartitions(List<FileSlice> firstP
                                                             boolean commitSecondInsertOverwrite) throws IOException {
     //just generate two partitions
     dataGen = new HoodieTestDataGenerator(new String[]{DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH});
-    HoodieTestDataGenerator.writePartitionMetadata(fs, new String[]{DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH}, basePath);
+    HoodieTestDataGenerator.writePartitionMetadataDeprecated(fs, new String[]{DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH}, basePath);
     SparkRDDWriteClient client = getHoodieWriteClient(cfg);
     /**
      * Write 1 (upsert)
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/rollback/TestMergeOnReadRollbackActionExecutor.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/rollback/TestMergeOnReadRollbackActionExecutor.java
index 4e98b220f3613..c9e3fed871acf 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/rollback/TestMergeOnReadRollbackActionExecutor.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/rollback/TestMergeOnReadRollbackActionExecutor.java
@@ -20,18 +20,33 @@
 
 import org.apache.hudi.avro.model.HoodieRollbackPartitionMetadata;
 import org.apache.hudi.client.SparkRDDWriteClient;
-import org.apache.hudi.common.config.HoodieMetadataConfig;
+import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.common.fs.ConsistencyGuardConfig;
 import org.apache.hudi.common.model.FileSlice;
+import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieFileGroup;
 import org.apache.hudi.common.model.HoodieLogFile;
+import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.model.HoodieWriteStat;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
+import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
+import org.apache.hudi.common.table.view.FileSystemViewStorageType;
+import org.apache.hudi.common.table.view.SyncableFileSystemView;
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
+import org.apache.hudi.common.util.StringUtils;
+import org.apache.hudi.config.HoodieCompactionConfig;
+import org.apache.hudi.config.HoodieIndexConfig;
+import org.apache.hudi.config.HoodieStorageConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.index.HoodieIndex;
 import org.apache.hudi.table.HoodieTable;
 import org.apache.hudi.table.marker.WriteMarkersFactory;
+import org.apache.hudi.testutils.MetadataMergeWriteStatus;
 
+import org.apache.spark.api.java.JavaRDD;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.BeforeEach;
@@ -41,9 +56,11 @@
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
+import java.util.stream.Stream;
 
 import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH;
 import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.DEFAULT_SECOND_PARTITION_PATH;
@@ -112,7 +129,7 @@ public void testMergeOnReadRollbackActionExecutor(boolean isUsingMarkers) throws
       assertTrue(meta.getSuccessDeleteFiles() == null || meta.getSuccessDeleteFiles().size() == 0);
     }
 
-    //4. assert filegroup after rollback, and compare to the rollbackstat
+    //4. assert file group after rollback, and compare to the rollbackstat
     // assert the first partition data and log file size
     List<HoodieFileGroup> firstPartitionRollBack1FileGroups = table.getFileSystemView().getAllFileGroups(DEFAULT_FIRST_PARTITION_PATH).collect(Collectors.toList());
     assertEquals(1, firstPartitionRollBack1FileGroups.size());
@@ -140,6 +157,131 @@ public void testMergeOnReadRollbackActionExecutor(boolean isUsingMarkers) throws
     assertFalse(WriteMarkersFactory.get(cfg.getMarkersType(), table, "002").doesMarkerDirExist());
   }
 
+  @Test
+  public void testRollbackForCanIndexLogFile() throws IOException {
+    cleanupResources();
+    setUpDFS();
+    //1. prepare data and assert data result
+    //just generate one partitions
+    dataGen = new HoodieTestDataGenerator(new String[]{DEFAULT_FIRST_PARTITION_PATH});
+    HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder().withPath(basePath).withSchema(HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA)
+        .withParallelism(2, 2).withBulkInsertParallelism(2).withFinalizeWriteParallelism(2).withDeleteParallelism(2)
+        .withTimelineLayoutVersion(TimelineLayoutVersion.CURR_VERSION)
+        .withWriteStatusClass(MetadataMergeWriteStatus.class)
+        .withConsistencyGuardConfig(ConsistencyGuardConfig.newBuilder().withConsistencyCheckEnabled(true).build())
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024).build())
+        .withStorageConfig(HoodieStorageConfig.newBuilder().hfileMaxFileSize(1024 * 1024).parquetMaxFileSize(1024 * 1024).build())
+        .forTable("test-trip-table")
+        .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.INMEMORY).build())
+        .withEmbeddedTimelineServerEnabled(true).withFileSystemViewConfig(FileSystemViewStorageConfig.newBuilder()
+            .withEnableBackupForRemoteFileSystemView(false) // Fail test if problem connecting to timeline-server
+            .withStorageType(FileSystemViewStorageType.EMBEDDED_KV_STORE).build()).withRollbackUsingMarkers(false).withAutoCommit(false).build();
+
+    //1. prepare data
+    new HoodieTestDataGenerator().writePartitionMetadata(fs, new String[]{DEFAULT_FIRST_PARTITION_PATH}, basePath);
+    SparkRDDWriteClient client = getHoodieWriteClient(cfg);
+    // Write 1 (only inserts)
+    String newCommitTime = "001";
+    client.startCommitWithTime(newCommitTime);
+    List<HoodieRecord> records = dataGen.generateInsertsForPartition(newCommitTime, 2, DEFAULT_FIRST_PARTITION_PATH);
+    JavaRDD<HoodieRecord> writeRecords = jsc.parallelize(records, 1);
+    JavaRDD<WriteStatus> statuses = client.upsert(writeRecords, newCommitTime);
+    org.apache.hudi.testutils.Assertions.assertNoWriteErrors(statuses.collect());
+    client.commit(newCommitTime, statuses);
+
+    // check fileSlice
+    HoodieTable table = this.getHoodieTable(metaClient, cfg);
+    SyncableFileSystemView fsView = getFileSystemViewWithUnCommittedSlices(table.getMetaClient());
+    List<HoodieFileGroup> firstPartitionCommit2FileGroups = fsView.getAllFileGroups(DEFAULT_FIRST_PARTITION_PATH).collect(Collectors.toList());
+    assertEquals(1, firstPartitionCommit2FileGroups.size());
+    assertEquals(1, (int) firstPartitionCommit2FileGroups.get(0).getAllFileSlices().count());
+    assertFalse(firstPartitionCommit2FileGroups.get(0).getAllFileSlices().findFirst().get().getBaseFile().isPresent());
+    assertEquals(1, firstPartitionCommit2FileGroups.get(0).getAllFileSlices().findFirst().get().getLogFiles().count());
+    String generatedFileID = firstPartitionCommit2FileGroups.get(0).getFileGroupId().getFileId();
+
+    // check hoodieCommitMeta
+    HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(
+        table.getMetaClient().getCommitTimeline()
+            .getInstantDetails(new HoodieInstant(true, HoodieTimeline.DELTA_COMMIT_ACTION, "001"))
+            .get(),
+        HoodieCommitMetadata.class);
+    List<HoodieWriteStat> firstPartitionWriteStat = commitMetadata.getPartitionToWriteStats().get(DEFAULT_FIRST_PARTITION_PATH);
+    assertEquals(2, firstPartitionWriteStat.size());
+    // we have an empty writeStat for all partition
+    assert firstPartitionWriteStat.stream().anyMatch(wStat -> StringUtils.isNullOrEmpty(wStat.getFileId()));
+    // we have one  non-empty writeStat which must contains update or insert
+    assertEquals(1, firstPartitionWriteStat.stream().filter(wStat -> !StringUtils.isNullOrEmpty(wStat.getFileId())).count());
+    firstPartitionWriteStat.stream().filter(wStat -> !StringUtils.isNullOrEmpty(wStat.getFileId())).forEach(wStat -> {
+      assert wStat.getNumInserts() > 0;
+    });
+
+    // Write 2 (inserts)
+    newCommitTime = "002";
+    client.startCommitWithTime(newCommitTime);
+    List<HoodieRecord> updateRecords = Collections.singletonList(dataGen.generateUpdateRecord(records.get(0).getKey(), newCommitTime));
+    List<HoodieRecord> insertRecordsInSamePartition = dataGen.generateInsertsForPartition(newCommitTime, 2, DEFAULT_FIRST_PARTITION_PATH);
+    List<HoodieRecord> insertRecordsInOtherPartition = dataGen.generateInsertsForPartition(newCommitTime, 2, DEFAULT_SECOND_PARTITION_PATH);
+    List<HoodieRecord> recordsToBeWrite = Stream.concat(Stream.concat(updateRecords.stream(), insertRecordsInSamePartition.stream()), insertRecordsInOtherPartition.stream())
+        .collect(Collectors.toList());
+    writeRecords = jsc.parallelize(recordsToBeWrite, 1);
+    statuses = client.upsert(writeRecords, newCommitTime);
+    client.commit(newCommitTime, statuses);
+    table = this.getHoodieTable(metaClient, cfg);
+    commitMetadata = HoodieCommitMetadata.fromBytes(
+        table.getMetaClient().getCommitTimeline()
+            .getInstantDetails(new HoodieInstant(false, HoodieTimeline.DELTA_COMMIT_ACTION, newCommitTime))
+            .get(),
+        HoodieCommitMetadata.class);
+    assert commitMetadata.getPartitionToWriteStats().containsKey(DEFAULT_FIRST_PARTITION_PATH);
+    assert commitMetadata.getPartitionToWriteStats().containsKey(DEFAULT_SECOND_PARTITION_PATH);
+    List<HoodieWriteStat> hoodieWriteStatOptionList = commitMetadata.getPartitionToWriteStats().get(DEFAULT_FIRST_PARTITION_PATH);
+    // Both update and insert record should enter same existing fileGroup due to small file handling
+    assertEquals(1, hoodieWriteStatOptionList.size());
+    assertEquals(generatedFileID, hoodieWriteStatOptionList.get(0).getFileId());
+    // check insert and update numbers
+    assertEquals(2, hoodieWriteStatOptionList.get(0).getNumInserts());
+    assertEquals(1, hoodieWriteStatOptionList.get(0).getNumUpdateWrites());
+
+    List<HoodieWriteStat> secondHoodieWriteStatOptionList = commitMetadata.getPartitionToWriteStats().get(DEFAULT_SECOND_PARTITION_PATH);
+    // All insert should enter one fileGroup
+    assertEquals(1, secondHoodieWriteStatOptionList.size());
+    String fileIdInPartitionTwo = secondHoodieWriteStatOptionList.get(0).getFileId();
+    assertEquals(2, hoodieWriteStatOptionList.get(0).getNumInserts());
+
+    // Rollback
+    HoodieInstant rollBackInstant = new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.DELTA_COMMIT_ACTION, "002");
+    BaseRollbackPlanActionExecutor mergeOnReadRollbackPlanActionExecutor =
+        new BaseRollbackPlanActionExecutor(context, cfg, table, "003", rollBackInstant, false,
+            cfg.shouldRollbackUsingMarkers());
+    mergeOnReadRollbackPlanActionExecutor.execute().get();
+    MergeOnReadRollbackActionExecutor mergeOnReadRollbackActionExecutor = new MergeOnReadRollbackActionExecutor(
+        context,
+        cfg,
+        table,
+        "003",
+        rollBackInstant,
+        true,
+        false);
+
+    //3. assert the rollback stat
+    Map<String, HoodieRollbackPartitionMetadata> rollbackMetadata = mergeOnReadRollbackActionExecutor.execute().getPartitionMetadata();
+    assertEquals(2, rollbackMetadata.size());
+
+    //4. assert filegroup after rollback, and compare to the rollbackstat
+    // assert the first partition data and log file size
+    HoodieRollbackPartitionMetadata partitionMetadata = rollbackMetadata.get(DEFAULT_FIRST_PARTITION_PATH);
+    assertTrue(partitionMetadata.getSuccessDeleteFiles().isEmpty());
+    assertTrue(partitionMetadata.getFailedDeleteFiles().isEmpty());
+    assertEquals(1, partitionMetadata.getRollbackLogFiles().size());
+
+    // assert the second partition data and log file size
+    partitionMetadata = rollbackMetadata.get(DEFAULT_SECOND_PARTITION_PATH);
+    assertEquals(1, partitionMetadata.getSuccessDeleteFiles().size());
+    assertTrue(partitionMetadata.getFailedDeleteFiles().isEmpty());
+    assertTrue(partitionMetadata.getRollbackLogFiles().isEmpty());
+    assertEquals(1, partitionMetadata.getSuccessDeleteFiles().size());
+  }
+
   @Test
   public void testFailForCompletedInstants() {
     Assertions.assertThrows(IllegalArgumentException.class, () -> {
@@ -163,11 +305,20 @@ public void testRollbackWhenFirstCommitFail() throws Exception {
 
     HoodieWriteConfig config = HoodieWriteConfig.newBuilder()
         .withRollbackUsingMarkers(false)
-        .withPath(basePath).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true).build()).build();
+        .withPath(basePath).build();
     try (SparkRDDWriteClient client = getHoodieWriteClient(config)) {
       client.startCommitWithTime("001");
       client.insert(jsc.emptyRDD(), "001");
       client.rollback("001");
     }
   }
+
+  private void setUpDFS() throws IOException {
+    initDFS();
+    initSparkContexts();
+    //just generate two partitions
+    dataGen = new HoodieTestDataGenerator(new String[] {DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH});
+    initFileSystem();
+    initDFSMetaClient();
+  }
 }
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestCleanPlanExecutor.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestCleanPlanExecutor.java
new file mode 100644
index 0000000000000..961523eb6b993
--- /dev/null
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestCleanPlanExecutor.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.table.functional;
+
+import org.apache.hudi.common.HoodieCleanStat;
+import org.apache.hudi.common.config.HoodieMetadataConfig;
+import org.apache.hudi.common.model.BootstrapFileMapping;
+import org.apache.hudi.common.model.HoodieCleaningPolicy;
+import org.apache.hudi.common.model.HoodieCommitMetadata;
+import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.common.testutils.HoodieTestTable;
+import org.apache.hudi.common.util.CollectionUtils;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.config.HoodieCompactionConfig;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.table.TestCleaner;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import java.nio.charset.StandardCharsets;
+import java.time.Instant;
+import java.time.ZoneId;
+import java.time.ZonedDateTime;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.List;
+import java.util.UUID;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class TestCleanPlanExecutor extends TestCleaner {
+
+  /**
+   * Tests cleaning service based on number of hours retained.
+   */
+  @ParameterizedTest
+  @MethodSource("argumentsForTestKeepLatestCommits")
+  public void testKeepXHoursWithCleaning(boolean simulateFailureRetry, boolean enableIncrementalClean, boolean enableBootstrapSourceClean) throws Exception {
+    HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath)
+            .withMetadataConfig(HoodieMetadataConfig.newBuilder().withAssumeDatePartitioning(true).build())
+            .withCompactionConfig(HoodieCompactionConfig.newBuilder()
+                    .withIncrementalCleaningMode(enableIncrementalClean)
+                    .withFailedWritesCleaningPolicy(HoodieFailedWritesCleaningPolicy.EAGER)
+                    .withCleanBootstrapBaseFileEnabled(enableBootstrapSourceClean)
+                    .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_BY_HOURS).cleanerNumHoursRetained(2).build())
+            .build();
+
+    HoodieTestTable testTable = HoodieTestTable.of(metaClient);
+    String p0 = "2020/01/01";
+    String p1 = "2020/01/02";
+    Map<String, List<BootstrapFileMapping>> bootstrapMapping = enableBootstrapSourceClean ? generateBootstrapIndexAndSourceData(p0, p1) : null;
+
+    String file1P0C0 = enableBootstrapSourceClean ? bootstrapMapping.get(p0).get(0).getFileId()
+            : UUID.randomUUID().toString();
+    String file1P1C0 = enableBootstrapSourceClean ? bootstrapMapping.get(p1).get(0).getFileId()
+            : UUID.randomUUID().toString();
+    Instant instant = Instant.now();
+    ZonedDateTime commitDateTime = ZonedDateTime.ofInstant(instant, ZoneId.systemDefault());
+    int minutesForFirstCommit = 150;
+    String firstCommitTs = HoodieActiveTimeline.formatDate(Date.from(commitDateTime.minusMinutes(minutesForFirstCommit).toInstant()));
+    testTable.addInflightCommit(firstCommitTs).withBaseFilesInPartition(p0, file1P0C0).withBaseFilesInPartition(p1, file1P1C0);
+
+    HoodieCommitMetadata commitMetadata = generateCommitMetadata(firstCommitTs,
+            Collections.unmodifiableMap(new HashMap<String, List<String>>() {
+              {
+                put(p0, CollectionUtils.createImmutableList(file1P0C0));
+                put(p1, CollectionUtils.createImmutableList(file1P1C0));
+              }
+            })
+    );
+    metaClient.getActiveTimeline().saveAsComplete(
+            new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, firstCommitTs),
+            Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
+
+    metaClient = HoodieTableMetaClient.reload(metaClient);
+
+    List<HoodieCleanStat> hoodieCleanStatsOne = runCleaner(config, simulateFailureRetry);
+    assertEquals(0, hoodieCleanStatsOne.size(), "Must not scan any partitions and clean any files");
+    assertTrue(testTable.baseFileExists(p0, firstCommitTs, file1P0C0));
+    assertTrue(testTable.baseFileExists(p1, firstCommitTs, file1P1C0));
+
+    // make next commit, with 1 insert & 1 update per partition
+    int minutesForSecondCommit = 90;
+    String secondCommitTs = HoodieActiveTimeline.formatDate(Date.from(commitDateTime.minusMinutes(minutesForSecondCommit).toInstant()));
+    Map<String, String> partitionAndFileId002 = testTable.addInflightCommit(secondCommitTs).getFileIdsWithBaseFilesInPartitions(p0, p1);
+    String file2P0C1 = partitionAndFileId002.get(p0);
+    String file2P1C1 = partitionAndFileId002.get(p1);
+    testTable.forCommit(secondCommitTs).withBaseFilesInPartition(p0, file1P0C0).withBaseFilesInPartition(p1, file1P1C0);
+    commitMetadata = generateCommitMetadata(secondCommitTs, new HashMap<String, List<String>>() {
+      {
+        put(p0, CollectionUtils.createImmutableList(file1P0C0, file2P0C1));
+        put(p1, CollectionUtils.createImmutableList(file1P1C0, file2P1C1));
+      }
+    });
+    metaClient.getActiveTimeline().saveAsComplete(
+            new HoodieInstant(HoodieInstant.State.INFLIGHT, HoodieTimeline.COMMIT_ACTION, secondCommitTs),
+            Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
+    List<HoodieCleanStat> hoodieCleanStatsTwo = runCleaner(config, simulateFailureRetry);
+    assertEquals(2, hoodieCleanStatsTwo.size(), "Should clean one file each from both the partitions");
+    assertTrue(testTable.baseFileExists(p0, secondCommitTs, file2P0C1));
+    assertTrue(testTable.baseFileExists(p1, secondCommitTs, file2P1C1));
+    assertTrue(testTable.baseFileExists(p0, secondCommitTs, file1P0C0));
+    assertTrue(testTable.baseFileExists(p1, secondCommitTs, file1P1C0));
+    assertFalse(testTable.baseFileExists(p0, firstCommitTs, file1P0C0));
+    assertFalse(testTable.baseFileExists(p1, firstCommitTs, file1P1C0));
+  }
+}
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableClustering.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableClustering.java
index a0ec0de371478..5438fbcfc0d98 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableClustering.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableClustering.java
@@ -132,7 +132,7 @@ void testClustering(boolean doUpdates, boolean populateMetaFields, boolean prese
         newCommitTime = "003";
         client.startCommitWithTime(newCommitTime);
         records = dataGen.generateUpdates(newCommitTime, 100);
-        updateRecordsInMORTable(metaClient, records, client, cfg, newCommitTime);
+        updateRecordsInMORTable(metaClient, records, client, cfg, newCommitTime, false);
       }
 
       HoodieTable hoodieTable = HoodieSparkTable.create(cfg, context(), metaClient);
@@ -201,7 +201,7 @@ void testClusteringWithNoBaseFiles(boolean doUpdates) throws Exception {
         newCommitTime = "003";
         client.startCommitWithTime(newCommitTime);
         records = dataGen.generateUpdates(newCommitTime, 100);
-        updateRecordsInMORTable(metaClient, records, client, cfg, newCommitTime);
+        updateRecordsInMORTable(metaClient, records, client, cfg, newCommitTime, false);
       }
 
       HoodieTable hoodieTable = HoodieSparkTable.create(cfg, context(), metaClient);
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableCompaction.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableCompaction.java
index 13903bf54b70d..f4f47d375b22d 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableCompaction.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableCompaction.java
@@ -22,6 +22,7 @@
 import org.apache.hudi.client.SparkRDDWriteClient;
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.model.HoodieWriteStat;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
@@ -113,13 +114,14 @@ private List<WriteStatus> writeData(String instant, int numRecords, boolean doCo
     JavaRDD records = jsc().parallelize(dataGen.generateInserts(instant, numRecords), 2);
     metaClient = HoodieTableMetaClient.reload(metaClient);
     client.startCommitWithTime(instant);
-    List<WriteStatus> writeStatues = client.upsert(records, instant).collect();
-    org.apache.hudi.testutils.Assertions.assertNoWriteErrors(writeStatues);
+    List<WriteStatus> writeStatuses = client.upsert(records, instant).collect();
+    org.apache.hudi.testutils.Assertions.assertNoWriteErrors(writeStatuses);
     if (doCommit) {
-      Assertions.assertTrue(client.commitStats(instant, writeStatues.stream().map(WriteStatus::getStat).collect(Collectors.toList()),
-          Option.empty(), metaClient.getCommitActionType()));
+      List<HoodieWriteStat> writeStats = writeStatuses.stream().map(WriteStatus::getStat).collect(Collectors.toList());
+      boolean committed = client.commitStats(instant, writeStats, Option.empty(), metaClient.getCommitActionType());
+      Assertions.assertTrue(committed);
     }
     metaClient = HoodieTableMetaClient.reload(metaClient);
-    return writeStatues;
+    return writeStatuses;
   }
 }
\ No newline at end of file
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableIncrementalRead.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableIncrementalRead.java
index c80374b64f4a2..5df7b4daecc72 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableIncrementalRead.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableIncrementalRead.java
@@ -20,7 +20,6 @@
 package org.apache.hudi.table.functional;
 
 import org.apache.hudi.client.SparkRDDWriteClient;
-import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.model.HoodieBaseFile;
 import org.apache.hudi.common.model.HoodieFileFormat;
 import org.apache.hudi.common.model.HoodieRecord;
@@ -86,7 +85,7 @@ public void testIncrementalReadsWithCompaction() throws Exception {
     Properties props = new Properties();
     props.setProperty(HoodieTableConfig.BASE_FILE_FORMAT.key(), HoodieFileFormat.PARQUET.toString());
     HoodieTableMetaClient metaClient = getHoodieMetaClient(HoodieTableType.MERGE_ON_READ, props);
-    HoodieWriteConfig cfg = getConfigBuilder(true).withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(false).build()).build();
+    HoodieWriteConfig cfg = getConfigBuilder(true).build();
     try (SparkRDDWriteClient client = getHoodieWriteClient(cfg)) {
 
       /*
@@ -118,7 +117,7 @@ public void testIncrementalReadsWithCompaction() throws Exception {
       String updateTime = "004";
       client.startCommitWithTime(updateTime);
       List<HoodieRecord> records004 = dataGen.generateUpdates(updateTime, 100);
-      updateRecordsInMORTable(metaClient, records004, client, cfg, updateTime);
+      updateRecordsInMORTable(metaClient, records004, client, cfg, updateTime, false);
 
       // verify RO incremental reads - only one base file shows up because updates to into log files
       incrementalROFiles = getROIncrementalFiles(partitionPath, false);
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableInsertUpdateDelete.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableInsertUpdateDelete.java
index 62ce007496683..2955147b4053f 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableInsertUpdateDelete.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableInsertUpdateDelete.java
@@ -19,6 +19,7 @@
 
 package org.apache.hudi.table.functional;
 
+import org.apache.hadoop.fs.Path;
 import org.apache.hudi.client.SparkRDDWriteClient;
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.common.model.FileSlice;
@@ -27,6 +28,7 @@
 import org.apache.hudi.common.model.HoodieLogFile;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.model.HoodieWriteStat;
 import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
@@ -35,10 +37,12 @@
 import org.apache.hudi.common.table.view.TableFileSystemView;
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.config.HoodieCompactionConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.index.HoodieIndex;
 import org.apache.hudi.table.HoodieSparkTable;
 import org.apache.hudi.table.HoodieTable;
+import org.apache.hudi.table.action.HoodieWriteMetadata;
 import org.apache.hudi.testutils.HoodieClientTestUtils;
 import org.apache.hudi.testutils.HoodieMergeOnReadTestUtils;
 import org.apache.hudi.testutils.SparkClientFunctionalTestHarness;
@@ -54,6 +58,7 @@
 import org.junit.jupiter.params.provider.MethodSource;
 import org.junit.jupiter.params.provider.ValueSource;
 
+import java.util.Collection;
 import java.util.List;
 import java.util.Properties;
 import java.util.stream.Collectors;
@@ -104,7 +109,7 @@ public void testSimpleInsertAndUpdate(HoodieFileFormat fileFormat, boolean popul
       newCommitTime = "004";
       client.startCommitWithTime(newCommitTime);
       records = dataGen.generateUpdates(newCommitTime, 100);
-      updateRecordsInMORTable(metaClient, records, client, cfg, newCommitTime);
+      updateRecordsInMORTable(metaClient, records, client, cfg, newCommitTime, false);
 
       String compactionCommitTime = client.scheduleCompaction(Option.empty()).get().toString();
       client.compact(compactionCommitTime);
@@ -133,6 +138,48 @@ public void testSimpleInsertAndUpdate(HoodieFileFormat fileFormat, boolean popul
     }
   }
 
+  @ParameterizedTest
+  @ValueSource(booleans = {true, false})
+  public void testInlineScheduleCompaction(boolean scheduleInlineCompaction) throws Exception {
+    HoodieFileFormat fileFormat = HoodieFileFormat.PARQUET;
+    Properties properties = new Properties();
+    properties.setProperty(HoodieTableConfig.BASE_FILE_FORMAT.key(), fileFormat.toString());
+    HoodieTableMetaClient metaClient = getHoodieMetaClient(HoodieTableType.MERGE_ON_READ, properties);
+
+    HoodieWriteConfig cfg = getConfigBuilder(false)
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder().compactionSmallFileSize(1024 * 1024 * 1024)
+            .withInlineCompaction(false).withMaxNumDeltaCommitsBeforeCompaction(2).withPreserveCommitMetadata(true).withScheduleInlineCompaction(scheduleInlineCompaction).build())
+        .build();
+    try (SparkRDDWriteClient client = getHoodieWriteClient(cfg)) {
+
+      HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();
+      /*
+       * Write 1 (only inserts)
+       */
+      String newCommitTime = "001";
+      client.startCommitWithTime(newCommitTime);
+
+      List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 200);
+      Stream<HoodieBaseFile> dataFiles = insertRecordsToMORTable(metaClient, records, client, cfg, newCommitTime, true);
+      assertTrue(dataFiles.findAny().isPresent(), "should list the base files we wrote in the delta commit");
+
+      /*
+       * Write 2 (updates)
+       */
+      newCommitTime = "004";
+      client.startCommitWithTime(newCommitTime);
+      records = dataGen.generateUpdates(newCommitTime, 100);
+      updateRecordsInMORTable(metaClient, records, client, cfg, newCommitTime, true);
+
+      // verify that there is a commit
+      if (scheduleInlineCompaction) {
+        assertEquals(metaClient.reloadActiveTimeline().getAllCommitsTimeline().filterPendingCompactionTimeline().countInstants(), 1);
+      } else {
+        assertEquals(metaClient.reloadActiveTimeline().getAllCommitsTimeline().filterPendingCompactionTimeline().countInstants(), 0);
+      }
+    }
+  }
+
   @ParameterizedTest
   @ValueSource(booleans = {true, false})
   public void testSimpleInsertUpdateAndDelete(boolean populateMetaFields) throws Exception {
@@ -213,8 +260,11 @@ public void testSimpleInsertUpdateAndDelete(boolean populateMetaFields) throws E
       dataFilesToRead = tableView.getLatestBaseFiles();
       assertTrue(dataFilesToRead.findAny().isPresent());
 
-      List<String> dataFiles = tableView.getLatestBaseFiles().map(HoodieBaseFile::getPath).collect(Collectors.toList());
-      List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(hadoopConf(), dataFiles, basePath(), new JobConf(hadoopConf()), true, false);
+      List<String> inputPaths = tableView.getLatestBaseFiles()
+          .map(baseFile -> new Path(baseFile.getPath()).getParent().toString())
+          .collect(Collectors.toList());
+      List<GenericRecord> recordsRead =
+          HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(hadoopConf(), inputPaths, basePath(), new JobConf(hadoopConf()), true, false);
       // Wrote 20 records and deleted 20 records, so remaining 20-20 = 0
       assertEquals(0, recordsRead.size(), "Must contain 0 records");
     }
@@ -260,11 +310,12 @@ public void testSimpleInsertsGeneratedIntoLogFiles() throws Exception {
       assertTrue(numLogFiles > 0);
       // Do a compaction
       String instantTime = writeClient.scheduleCompaction(Option.empty()).get().toString();
-      statuses = (JavaRDD<WriteStatus>) writeClient.compact(instantTime);
+      HoodieWriteMetadata<JavaRDD<WriteStatus>> compactionMetadata = writeClient.compact(instantTime);
       String extension = table.getBaseFileExtension();
-      assertEquals(numLogFiles, statuses.map(status -> status.getStat().getPath().contains(extension)).count());
-      assertEquals(numLogFiles, statuses.count());
-      writeClient.commitCompaction(instantTime, statuses, Option.empty());
+      Collection<List<HoodieWriteStat>> stats = compactionMetadata.getCommitMetadata().get().getPartitionToWriteStats().values();
+      assertEquals(numLogFiles, stats.stream().flatMap(Collection::stream).filter(state -> state.getPath().contains(extension)).count());
+      assertEquals(numLogFiles, stats.stream().mapToLong(Collection::size).sum());
+      writeClient.commitCompaction(instantTime, compactionMetadata.getCommitMetadata().get(), Option.empty());
     }
   }
 }
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableRollback.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableRollback.java
index 38becc92c65ff..d552955030baa 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableRollback.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableRollback.java
@@ -24,10 +24,13 @@
 import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.model.FileSlice;
 import org.apache.hudi.common.model.HoodieBaseFile;
+import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
 import org.apache.hudi.common.model.HoodieFileGroup;
 import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.model.HoodieWriteStat;
 import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.marker.MarkerType;
@@ -40,6 +43,7 @@
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
 import org.apache.hudi.common.testutils.HoodieTestTable;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.config.HoodieCompactionConfig;
 import org.apache.hudi.config.HoodieStorageConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
@@ -47,6 +51,7 @@
 import org.apache.hudi.index.HoodieIndex;
 import org.apache.hudi.table.HoodieSparkTable;
 import org.apache.hudi.table.HoodieTable;
+import org.apache.hudi.table.action.HoodieWriteMetadata;
 import org.apache.hudi.testutils.HoodieMergeOnReadTestUtils;
 import org.apache.hudi.testutils.SparkClientFunctionalTestHarness;
 
@@ -64,6 +69,7 @@
 import java.nio.file.Files;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -142,8 +148,14 @@ void testCOWToMORConvertedTableRollback(boolean rollbackUsingMarkers) throws Exc
   @ParameterizedTest
   @ValueSource(booleans = {true, false})
   void testRollbackWithDeltaAndCompactionCommit(boolean rollbackUsingMarkers) throws Exception {
-    HoodieWriteConfig.Builder cfgBuilder = getConfigBuilder(false, rollbackUsingMarkers, HoodieIndex.IndexType.SIMPLE)
-        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(false).build());
+    // NOTE: First writer will have Metadata table DISABLED
+    HoodieWriteConfig.Builder cfgBuilder =
+        getConfigBuilder(false, rollbackUsingMarkers, HoodieIndex.IndexType.SIMPLE)
+            .withMetadataConfig(
+                HoodieMetadataConfig.newBuilder()
+                    .enable(false)
+                    .build());
+    
     addConfigsForPopulateMetaFields(cfgBuilder, true);
     HoodieWriteConfig cfg = cfgBuilder.build();
 
@@ -166,10 +178,12 @@ void testRollbackWithDeltaAndCompactionCommit(boolean rollbackUsingMarkers) thro
       JavaRDD<HoodieRecord> writeRecords = jsc().parallelize(records, 1);
 
       JavaRDD<WriteStatus> writeStatusJavaRDD = client.upsert(writeRecords, newCommitTime);
-      client.commit(newCommitTime, writeStatusJavaRDD);
+
       List<WriteStatus> statuses = writeStatusJavaRDD.collect();
       assertNoWriteErrors(statuses);
 
+      client.commit(newCommitTime, jsc().parallelize(statuses));
+
       HoodieTable hoodieTable = HoodieSparkTable.create(cfg, context(), metaClient);
 
       Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().firstInstant();
@@ -194,6 +208,7 @@ void testRollbackWithDeltaAndCompactionCommit(boolean rollbackUsingMarkers) thro
        */
       final String commitTime1 = "002";
       // WriteClient with custom config (disable small file handling)
+      // NOTE: Second writer will have Metadata table ENABLED
       try (SparkRDDWriteClient secondClient = getHoodieWriteClient(getHoodieWriteConfigWithSmallFileHandlingOff(false));) {
         secondClient.startCommitWithTime(commitTime1);
 
@@ -201,8 +216,10 @@ void testRollbackWithDeltaAndCompactionCommit(boolean rollbackUsingMarkers) thro
         copyOfRecords = dataGen.generateUpdates(commitTime1, copyOfRecords);
         copyOfRecords.addAll(dataGen.generateInserts(commitTime1, 200));
 
-        List<String> dataFiles = tableView.getLatestBaseFiles().map(HoodieBaseFile::getPath).collect(Collectors.toList());
-        List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(hadoopConf(), dataFiles,
+        List<String> inputPaths = tableView.getLatestBaseFiles()
+            .map(baseFile -> new Path(baseFile.getPath()).getParent().toString())
+            .collect(Collectors.toList());
+        List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(hadoopConf(), inputPaths,
             basePath());
         assertEquals(200, recordsRead.size());
 
@@ -218,8 +235,10 @@ void testRollbackWithDeltaAndCompactionCommit(boolean rollbackUsingMarkers) thro
             .contains(commitTime1)).map(fileStatus -> fileStatus.getPath().toString()).collect(Collectors.toList());
         assertEquals(0, remainingFiles.size(), "There files should have been rolled-back "
             + "when rolling back commit " + commitTime1 + " but are still remaining. Files: " + remainingFiles);
-        dataFiles = tableView.getLatestBaseFiles().map(HoodieBaseFile::getPath).collect(Collectors.toList());
-        recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(hadoopConf(), dataFiles, basePath());
+        inputPaths = tableView.getLatestBaseFiles()
+            .map(baseFile -> new Path(baseFile.getPath()).getParent().toString())
+            .collect(Collectors.toList());
+        recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(hadoopConf(), inputPaths, basePath());
         assertEquals(200, recordsRead.size());
       }
 
@@ -234,8 +253,10 @@ void testRollbackWithDeltaAndCompactionCommit(boolean rollbackUsingMarkers) thro
         copyOfRecords = dataGen.generateUpdates(commitTime2, copyOfRecords);
         copyOfRecords.addAll(dataGen.generateInserts(commitTime2, 200));
 
-        List<String> dataFiles = tableView.getLatestBaseFiles().map(HoodieBaseFile::getPath).collect(Collectors.toList());
-        List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(hadoopConf(), dataFiles,
+        List<String> inputPaths = tableView.getLatestBaseFiles()
+            .map(baseFile -> new Path(baseFile.getPath()).getParent().toString())
+            .collect(Collectors.toList());
+        List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(hadoopConf(), inputPaths,
             basePath());
         assertEquals(200, recordsRead.size());
 
@@ -255,8 +276,10 @@ void testRollbackWithDeltaAndCompactionCommit(boolean rollbackUsingMarkers) thro
         metaClient = HoodieTableMetaClient.reload(metaClient);
         hoodieTable = HoodieSparkTable.create(cfg, context(), metaClient);
         tableView = getHoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
-        dataFiles = tableView.getLatestBaseFiles().map(HoodieBaseFile::getPath).collect(Collectors.toList());
-        recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(hadoopConf(), dataFiles, basePath());
+        inputPaths = tableView.getLatestBaseFiles()
+            .map(baseFile -> new Path(baseFile.getPath()).getParent().toString())
+            .collect(Collectors.toList());
+        recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(hadoopConf(), inputPaths, basePath());
         // check that the number of records read is still correct after rollback operation
         assertEquals(200, recordsRead.size());
 
@@ -268,11 +291,13 @@ void testRollbackWithDeltaAndCompactionCommit(boolean rollbackUsingMarkers) thro
         thirdClient.startCommitWithTime(newCommitTime);
 
         writeStatusJavaRDD = thirdClient.upsert(writeRecords, newCommitTime);
+
         statuses = writeStatusJavaRDD.collect();
-        thirdClient.commit(newCommitTime, writeStatusJavaRDD);
         // Verify there are no errors
         assertNoWriteErrors(statuses);
 
+        thirdClient.commit(newCommitTime, jsc().parallelize(statuses));
+
         metaClient = HoodieTableMetaClient.reload(metaClient);
 
         String compactionInstantTime = thirdClient.scheduleCompaction(Option.empty()).get().toString();
@@ -300,8 +325,7 @@ void testMultiRollbackWithDeltaAndCompactionCommit() throws Exception {
     boolean populateMetaFields = true;
     HoodieWriteConfig.Builder cfgBuilder = getConfigBuilder(false)
         // Timeline-server-based markers are not used for multi-rollback tests
-        .withMarkersType(MarkerType.DIRECT.name())
-        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true).build());
+        .withMarkersType(MarkerType.DIRECT.name());
     addConfigsForPopulateMetaFields(cfgBuilder, populateMetaFields);
     HoodieWriteConfig cfg = cfgBuilder.build();
 
@@ -310,8 +334,8 @@ void testMultiRollbackWithDeltaAndCompactionCommit() throws Exception {
     HoodieTableMetaClient metaClient = getHoodieMetaClient(HoodieTableType.MERGE_ON_READ, properties);
 
     try (final SparkRDDWriteClient client = getHoodieWriteClient(cfg)) {
-
       HoodieTestDataGenerator dataGen = new HoodieTestDataGenerator();
+
       /*
        * Write 1 (only inserts)
        */
@@ -322,20 +346,29 @@ void testMultiRollbackWithDeltaAndCompactionCommit() throws Exception {
       JavaRDD<HoodieRecord> writeRecords = jsc().parallelize(records, 1);
 
       JavaRDD<WriteStatus> writeStatusJavaRDD = client.upsert(writeRecords, newCommitTime);
-      client.commit(newCommitTime, writeStatusJavaRDD);
+
       List<WriteStatus> statuses = writeStatusJavaRDD.collect();
       assertNoWriteErrors(statuses);
+
+      client.commit(newCommitTime, jsc().parallelize(statuses));
       client.close();
 
-      HoodieTable hoodieTable = HoodieSparkTable.create(cfg, context(), metaClient);
+      Option<Pair<HoodieInstant, HoodieCommitMetadata>> instantCommitMetadataPairOpt =
+          metaClient.getActiveTimeline().getLastCommitMetadataWithValidData();
 
-      Option<HoodieInstant> deltaCommit = metaClient.getActiveTimeline().getDeltaCommitTimeline().firstInstant();
-      assertTrue(deltaCommit.isPresent());
-      assertEquals("001", deltaCommit.get().getTimestamp(), "Delta commit should be 001");
+      assertTrue(instantCommitMetadataPairOpt.isPresent());
+
+      HoodieInstant commitInstant = instantCommitMetadataPairOpt.get().getKey();
+
+      assertEquals("001", commitInstant.getTimestamp());
+      assertEquals(HoodieTimeline.DELTA_COMMIT_ACTION, commitInstant.getAction());
+      assertEquals(200, getTotalRecordsWritten(instantCommitMetadataPairOpt.get().getValue()));
 
       Option<HoodieInstant> commit = metaClient.getActiveTimeline().getCommitTimeline().firstInstant();
       assertFalse(commit.isPresent());
 
+      HoodieTable hoodieTable = HoodieSparkTable.create(cfg, context(), metaClient);
+
       FileStatus[] allFiles = listAllBaseFilesInPath(hoodieTable);
       HoodieTableFileSystemView tableView = getHoodieTableFileSystemView(metaClient, metaClient.getCommitTimeline().filterCompletedInstants(), allFiles);
       Stream<HoodieBaseFile> dataFilesToRead = tableView.getLatestBaseFiles();
@@ -345,6 +378,7 @@ void testMultiRollbackWithDeltaAndCompactionCommit() throws Exception {
       dataFilesToRead = tableView.getLatestBaseFiles();
       assertTrue(dataFilesToRead.findAny().isPresent(),
           "Should list the base files we wrote in the delta commit");
+
       /*
        * Write 2 (inserts + updates)
        */
@@ -352,8 +386,7 @@ void testMultiRollbackWithDeltaAndCompactionCommit() throws Exception {
       // WriteClient with custom config (disable small file handling)
       HoodieWriteConfig smallFileWriteConfig = getHoodieWriteConfigWithSmallFileHandlingOffBuilder(populateMetaFields)
           // Timeline-server-based markers are not used for multi-rollback tests
-          .withMarkersType(MarkerType.DIRECT.name())
-          .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true).build()).build();
+          .withMarkersType(MarkerType.DIRECT.name()).build();
       try (SparkRDDWriteClient nClient = getHoodieWriteClient(smallFileWriteConfig)) {
         nClient.startCommitWithTime(newCommitTime);
 
@@ -361,7 +394,9 @@ void testMultiRollbackWithDeltaAndCompactionCommit() throws Exception {
         copyOfRecords = dataGen.generateUpdates(newCommitTime, copyOfRecords);
         copyOfRecords.addAll(dataGen.generateInserts(newCommitTime, 200));
 
-        List<String> dataFiles = tableView.getLatestBaseFiles().map(hf -> hf.getPath()).collect(Collectors.toList());
+        List<String> dataFiles = tableView.getLatestBaseFiles()
+            .map(baseFile -> new Path(baseFile.getPath()).getParent().toString())
+            .collect(Collectors.toList());
         List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(hadoopConf(), dataFiles,
             basePath());
         assertEquals(200, recordsRead.size());
@@ -369,7 +404,9 @@ void testMultiRollbackWithDeltaAndCompactionCommit() throws Exception {
         statuses = nClient.upsert(jsc().parallelize(copyOfRecords, 1), newCommitTime).collect();
         // Verify there are no errors
         assertNoWriteErrors(statuses);
-        nClient.commit(newCommitTime, writeStatusJavaRDD);
+
+        nClient.commit(newCommitTime, jsc().parallelize(statuses));
+
         copyOfRecords.clear();
       }
 
@@ -386,11 +423,12 @@ void testMultiRollbackWithDeltaAndCompactionCommit() throws Exception {
       writeRecords = jsc().parallelize(records, 1);
 
       writeStatusJavaRDD = client.upsert(writeRecords, newCommitTime);
-      client.commit(newCommitTime, writeStatusJavaRDD);
       statuses = writeStatusJavaRDD.collect();
       // Verify there are no errors
       assertNoWriteErrors(statuses);
 
+      client.commit(newCommitTime, jsc().parallelize(statuses));
+
       metaClient = HoodieTableMetaClient.reload(metaClient);
 
       String compactionInstantTime = "004";
@@ -407,17 +445,18 @@ void testMultiRollbackWithDeltaAndCompactionCommit() throws Exception {
       writeRecords = jsc().parallelize(records, 1);
 
       writeStatusJavaRDD = client.upsert(writeRecords, newCommitTime);
-      client.commit(newCommitTime, writeStatusJavaRDD);
       statuses = writeStatusJavaRDD.collect();
       // Verify there are no errors
       assertNoWriteErrors(statuses);
 
+      client.commit(newCommitTime, jsc().parallelize(statuses));
+
       metaClient = HoodieTableMetaClient.reload(metaClient);
 
       compactionInstantTime = "006";
       client.scheduleCompactionAtInstant(compactionInstantTime, Option.empty());
-      JavaRDD<WriteStatus> ws = (JavaRDD<WriteStatus>) client.compact(compactionInstantTime);
-      client.commitCompaction(compactionInstantTime, ws, Option.empty());
+      HoodieWriteMetadata<JavaRDD<WriteStatus>> compactionMetadata = client.compact(compactionInstantTime);
+      client.commitCompaction(compactionInstantTime, compactionMetadata.getCommitMetadata().get(), Option.empty());
 
       allFiles = listAllBaseFilesInPath(hoodieTable);
       metaClient = HoodieTableMetaClient.reload(metaClient);
@@ -440,7 +479,9 @@ void testMultiRollbackWithDeltaAndCompactionCommit() throws Exception {
       statuses = client.upsert(jsc().parallelize(copyOfRecords, 1), newCommitTime).collect();
       // Verify there are no errors
       assertNoWriteErrors(statuses);
-      client.commit(newCommitTime, writeStatusJavaRDD);
+
+      client.commit(newCommitTime, jsc().parallelize(statuses));
+
       copyOfRecords.clear();
 
       // Rollback latest commit first
@@ -464,13 +505,19 @@ void testMultiRollbackWithDeltaAndCompactionCommit() throws Exception {
     }
   }
 
+  private long getTotalRecordsWritten(HoodieCommitMetadata commitMetadata) {
+    return commitMetadata.getPartitionToWriteStats().values().stream()
+        .flatMap(Collection::stream)
+        .map(stat -> stat.getNumWrites() + stat.getNumUpdateWrites())
+        .reduce(0L, Long::sum);
+  }
+
   @ParameterizedTest
   @ValueSource(booleans = {true, false})
   void testMORTableRestore(boolean restoreAfterCompaction) throws Exception {
     HoodieWriteConfig.Builder cfgBuilder = getConfigBuilder(false)
         // Timeline-server-based markers are not used for multi-rollback tests
-        .withMarkersType(MarkerType.DIRECT.name())
-        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(true).build());
+        .withMarkersType(MarkerType.DIRECT.name());
     HoodieWriteConfig cfg = cfgBuilder.build();
 
     Properties properties = new Properties();
@@ -495,8 +542,8 @@ void testMORTableRestore(boolean restoreAfterCompaction) throws Exception {
         metaClient = HoodieTableMetaClient.reload(metaClient);
         String compactionInstantTime = "005";
         client.scheduleCompactionAtInstant(compactionInstantTime, Option.empty());
-        JavaRDD<WriteStatus> ws = (JavaRDD<WriteStatus>) client.compact(compactionInstantTime);
-        client.commitCompaction(compactionInstantTime, ws, Option.empty());
+        HoodieWriteMetadata<JavaRDD<WriteStatus>> compactionMetadata = client.compact(compactionInstantTime);
+        client.commitCompaction(compactionInstantTime, compactionMetadata.getCommitMetadata().get(), Option.empty());
 
         validateRecords(cfg, metaClient, updates3);
         List<HoodieRecord> updates4 = updateAndGetRecords("006", client, dataGen, records);
@@ -516,8 +563,6 @@ private List<HoodieRecord> insertAndGetRecords(String newCommitTime, SparkRDDWri
     JavaRDD<HoodieRecord> writeRecords = jsc().parallelize(records, 1);
     JavaRDD<WriteStatus> writeStatusJavaRDD = client.upsert(writeRecords, newCommitTime);
     client.commit(newCommitTime, writeStatusJavaRDD);
-    List<WriteStatus> statuses = writeStatusJavaRDD.collect();
-    assertNoWriteErrors(statuses);
     return records;
   }
 
@@ -534,8 +579,10 @@ private void validateRecords(HoodieWriteConfig cfg, HoodieTableMetaClient metaCl
     HoodieTable hoodieTable = HoodieSparkTable.create(cfg, context(), metaClient);
     FileStatus[] allFiles = listAllBaseFilesInPath(hoodieTable);
     HoodieTableFileSystemView tableView = getHoodieTableFileSystemView(metaClient, hoodieTable.getCompletedCommitsTimeline(), allFiles);
-    List<String> dataFiles = tableView.getLatestBaseFiles().map(hf -> hf.getPath()).collect(Collectors.toList());
-    List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(hadoopConf(), dataFiles,
+    List<String> inputPaths = tableView.getLatestBaseFiles()
+        .map(hf -> new Path(hf.getPath()).getParent().toString())
+        .collect(Collectors.toList());
+    List<GenericRecord> recordsRead = HoodieMergeOnReadTestUtils.getRecordsUsingInputFormat(hadoopConf(), inputPaths,
         basePath());
     assertRecords(expectedRecords, recordsRead);
   }
@@ -545,7 +592,7 @@ private void assertRecords(List<HoodieRecord> inputRecords, List<GenericRecord>
     Map<String, GenericRecord> expectedRecords = new HashMap<>();
     inputRecords.forEach(entry -> {
       try {
-        expectedRecords.put(entry.getRecordKey(), ((GenericRecord) entry.getData().getInsertValue(HoodieTestDataGenerator.AVRO_SCHEMA).get()));
+        expectedRecords.put(entry.getRecordKey(), (GenericRecord) ((HoodieRecordPayload) entry.getData()).getInsertValue(HoodieTestDataGenerator.AVRO_SCHEMA).get());
       } catch (IOException e) {
         e.printStackTrace();
       }
@@ -596,9 +643,8 @@ void testInsertsGeneratedIntoLogFilesRollback(boolean rollbackUsingMarkers) thro
 
       List<HoodieRecord> records = dataGen.generateInserts(newCommitTime, 100);
       JavaRDD<HoodieRecord> recordsRDD = jsc().parallelize(records, 1);
-      JavaRDD<WriteStatus> statuses = writeClient.insert(recordsRDD, newCommitTime);
       // trigger an action
-      List<WriteStatus> writeStatuses = statuses.collect();
+      List<WriteStatus> writeStatuses = ((JavaRDD<WriteStatus>) writeClient.insert(recordsRDD, newCommitTime)).collect();
 
       // Ensure that inserts are written to only log files
       assertEquals(0,
@@ -708,11 +754,14 @@ void testInsertsGeneratedIntoLogFilesRollbackAfterCompaction(boolean rollbackUsi
       assertTrue(numLogFiles > 0);
       // Do a compaction
       newCommitTime = writeClient.scheduleCompaction(Option.empty()).get().toString();
-      statuses = (JavaRDD<WriteStatus>) writeClient.compact(newCommitTime);
+      HoodieWriteMetadata<JavaRDD<WriteStatus>> compactionMetadata = writeClient.compact(newCommitTime);
+      statuses = compactionMetadata.getWriteStatuses();
       // Ensure all log files have been compacted into base files
       String extension = table.getBaseFileExtension();
-      assertEquals(numLogFiles, statuses.map(status -> status.getStat().getPath().contains(extension)).count());
-      assertEquals(numLogFiles, statuses.count());
+      Collection<List<HoodieWriteStat>> stats = compactionMetadata.getCommitMetadata().get().getPartitionToWriteStats().values();
+      assertEquals(numLogFiles, stats.stream().flatMap(Collection::stream).filter(state -> state.getPath().contains(extension)).count());
+      assertEquals(numLogFiles, stats.stream().mapToLong(Collection::size).sum());
+
       //writeClient.commitCompaction(newCommitTime, statuses, Option.empty());
       // Trigger a rollback of compaction
       table.getActiveTimeline().reload();
@@ -815,14 +864,15 @@ private List<HoodieRecord> updateRecords(SparkRDDWriteClient client, HoodieTestD
   private long doCompaction(SparkRDDWriteClient client, HoodieTableMetaClient metaClient, HoodieWriteConfig cfg, long numLogFiles) throws IOException {
     // Do a compaction
     String instantTime = client.scheduleCompaction(Option.empty()).get().toString();
-    JavaRDD<WriteStatus> writeStatuses = (JavaRDD<WriteStatus>) client.compact(instantTime);
+    HoodieWriteMetadata<JavaRDD<WriteStatus>> compactionMetadata = client.compact(instantTime);
 
     metaClient.reloadActiveTimeline();
     HoodieTable table = HoodieSparkTable.create(cfg, context(), metaClient);
     String extension = table.getBaseFileExtension();
-    assertEquals(numLogFiles, writeStatuses.map(status -> status.getStat().getPath().contains(extension)).count());
-    assertEquals(numLogFiles, writeStatuses.count());
-    client.commitCompaction(instantTime, writeStatuses, Option.empty());
+    Collection<List<HoodieWriteStat>> stats = compactionMetadata.getCommitMetadata().get().getPartitionToWriteStats().values();
+    assertEquals(numLogFiles, stats.stream().flatMap(Collection::stream).filter(state -> state.getPath().contains(extension)).count());
+    assertEquals(numLogFiles, stats.stream().mapToLong(Collection::size).sum());
+    client.commitCompaction(instantTime, compactionMetadata.getCommitMetadata().get(), Option.empty());
     return numLogFiles;
   }
 
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestMarkerBasedRollbackStrategy.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestMarkerBasedRollbackStrategy.java
index 8b23cf25768e3..fd2af1cdca25a 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestMarkerBasedRollbackStrategy.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestMarkerBasedRollbackStrategy.java
@@ -133,7 +133,6 @@ public void testCopyOnWriteRollback(boolean useFileListingMetadata) throws Excep
         assertEquals(1, stat.getSuccessDeleteFiles().size());
         assertEquals(0, stat.getFailedDeleteFiles().size());
         assertEquals(0, stat.getCommandBlocksCount().size());
-        assertEquals(0, stat.getWrittenLogFileSizeMap().size());
       }
     }
   }
@@ -162,8 +161,6 @@ public void testMergeOnReadRollback(boolean useFileListingMetadata) throws Excep
         assertEquals(0, stat.getFailedDeleteFiles().size());
         assertEquals(1, stat.getCommandBlocksCount().size());
         stat.getCommandBlocksCount().forEach((fileStatus, len) -> assertTrue(fileStatus.getPath().getName().contains(HoodieFileFormat.HOODIE_LOG.getFileExtension())));
-        assertEquals(1, stat.getWrittenLogFileSizeMap().size());
-        stat.getWrittenLogFileSizeMap().forEach((fileStatus, len) -> assertTrue(fileStatus.getPath().getName().contains(HoodieFileFormat.HOODIE_LOG.getFileExtension())));
       }
     }
   }
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/marker/TestWriteMarkersBase.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/marker/TestWriteMarkersBase.java
index 5f96041b372d9..6ba783c749ffb 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/marker/TestWriteMarkersBase.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/marker/TestWriteMarkersBase.java
@@ -103,7 +103,7 @@ public void testDeletionWhenMarkerDirNotExists() throws IOException {
   @ParameterizedTest
   @ValueSource(booleans = {true, false})
   public void testDataPathsWhenCreatingOrMerging(boolean isTablePartitioned) throws IOException {
-    // add markfiles
+    // add marker files
     createSomeMarkers(isTablePartitioned);
     // add invalid file
     createInvalidFile(isTablePartitioned ? "2020/06/01" : "", "invalid_file3");
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/upgrade/TestUpgradeDowngrade.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/upgrade/TestUpgradeDowngrade.java
index 5f5dfdec5dce4..403b67e554d76 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/upgrade/TestUpgradeDowngrade.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/upgrade/TestUpgradeDowngrade.java
@@ -232,6 +232,43 @@ public void testUpgradeOneToTwo(HoodieTableType tableType) throws IOException {
     assertTableProps(cfg);
   }
 
+  @Test
+  public void testUpgradeDowngradeBetweenThreeAndCurrentVersion() throws IOException {
+    // init config, table and client.
+    Map<String, String> params = new HashMap<>();
+    addNewTableParamsToProps(params);
+    HoodieWriteConfig cfg = getConfigBuilder().withAutoCommit(false).withRollbackUsingMarkers(false).withProps(params).build();
+
+    // write inserts
+    SparkRDDWriteClient client = getHoodieWriteClient(cfg);
+    doInsert(client);
+
+    // current version should have TABLE_CHECKSUM key
+    assertEquals(HoodieTableVersion.current(), metaClient.getTableConfig().getTableVersion());
+    assertTableVersionFromPropertyFile(HoodieTableVersion.current());
+    assertTrue(metaClient.getTableConfig().getProps().containsKey(HoodieTableConfig.TABLE_CHECKSUM.key()));
+    String checksum = metaClient.getTableConfig().getProps().getString(HoodieTableConfig.TABLE_CHECKSUM.key());
+
+    // downgrade to version 3 and check TABLE_CHECKSUM is still present
+    new UpgradeDowngrade(metaClient, cfg, context, SparkUpgradeDowngradeHelper.getInstance()).run(HoodieTableVersion.THREE, null);
+    assertEquals(HoodieTableVersion.THREE.versionCode(), metaClient.getTableConfig().getTableVersion().versionCode());
+    assertTableVersionFromPropertyFile(HoodieTableVersion.THREE);
+    assertTrue(metaClient.getTableConfig().getProps().containsKey(HoodieTableConfig.TABLE_CHECKSUM.key()));
+    assertEquals(checksum, metaClient.getTableConfig().getProps().getString(HoodieTableConfig.TABLE_CHECKSUM.key()));
+
+    // remove TABLE_CHECKSUM and upgrade to current version
+    metaClient.getTableConfig().getProps().remove(HoodieTableConfig.TABLE_CHECKSUM.key());
+    new UpgradeDowngrade(metaClient, cfg, context, SparkUpgradeDowngradeHelper.getInstance()).run(HoodieTableVersion.current(), null);
+
+    // verify upgrade and TABLE_CHECKSUM
+    metaClient = HoodieTableMetaClient.builder().setConf(context.getHadoopConf().get()).setBasePath(cfg.getBasePath())
+        .setLayoutVersion(Option.of(new TimelineLayoutVersion(cfg.getTimelineLayoutVersion()))).build();
+    assertEquals(HoodieTableVersion.current().versionCode(), metaClient.getTableConfig().getTableVersion().versionCode());
+    assertTableVersionFromPropertyFile(HoodieTableVersion.current());
+    assertTrue(metaClient.getTableConfig().getProps().containsKey(HoodieTableConfig.TABLE_CHECKSUM.key()));
+    assertEquals(checksum, metaClient.getTableConfig().getProps().getString(HoodieTableConfig.TABLE_CHECKSUM.key()));
+  }
+
   private void addNewTableParamsToProps(Map<String, String> params) {
     params.put(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), "uuid");
     params.put(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), "partition_path");
@@ -497,7 +534,7 @@ private Pair<List<HoodieRecord>, List<HoodieRecord>> twoUpsertCommitDataWithTwoP
     //just generate two partitions
     dataGen = new HoodieTestDataGenerator(new String[] {DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH});
     //1. prepare data
-    HoodieTestDataGenerator.writePartitionMetadata(metaClient.getFs(), new String[] {DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH}, basePath);
+    HoodieTestDataGenerator.writePartitionMetadataDeprecated(metaClient.getFs(), new String[] {DEFAULT_FIRST_PARTITION_PATH, DEFAULT_SECOND_PARTITION_PATH}, basePath);
     /**
      * Write 1 (only inserts)
      */
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestBase.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestBase.java
index ee3c309b30f28..16fd48af6c014 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestBase.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestBase.java
@@ -23,6 +23,7 @@
 import org.apache.hudi.common.HoodieCleanStat;
 import org.apache.hudi.common.fs.ConsistencyGuardConfig;
 import org.apache.hudi.common.model.EmptyHoodieRecordPayload;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodiePartitionMetadata;
@@ -274,7 +275,7 @@ private Function<Integer, List<HoodieKey>> wrapDeleteKeysGenFunctionForPreppedCa
       final HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(basePath).setLoadActiveTimelineOnLoad(true).build();
       HoodieSparkTable table = HoodieSparkTable.create(writeConfig, context, metaClient);
       JavaRDD<HoodieRecord> recordsToDelete = jsc.parallelize(records, 1)
-          .map(key -> new HoodieRecord(key, new EmptyHoodieRecordPayload()));
+          .map(key -> new HoodieAvroRecord(key, new EmptyHoodieRecordPayload()));
       JavaRDD<HoodieRecord> taggedRecords = tagLocation(index, recordsToDelete, table);
       return taggedRecords.map(record -> record.getKey()).collect();
     };
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestHarness.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestHarness.java
index 906f13d7a63b8..f339f5ed910db 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestHarness.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestHarness.java
@@ -59,7 +59,6 @@
 import org.apache.hudi.metadata.HoodieBackedTableMetadataWriter;
 import org.apache.hudi.metadata.HoodieTableMetadata;
 import org.apache.hudi.metadata.HoodieTableMetadataWriter;
-import org.apache.hudi.metadata.MetadataPartitionType;
 import org.apache.hudi.metadata.SparkHoodieBackedTableMetadataWriter;
 import org.apache.hudi.table.HoodieSparkTable;
 import org.apache.hudi.table.HoodieTable;
@@ -680,7 +679,7 @@ private void runFullValidation(HoodieWriteConfig writeConfig, String metadataTab
     // in the .hoodie folder.
     List<String> metadataTablePartitions = FSUtils.getAllPartitionPaths(engineContext, HoodieTableMetadata.getMetadataTableBasePath(basePath),
         false, false);
-    Assertions.assertEquals(MetadataPartitionType.values().length, metadataTablePartitions.size());
+    Assertions.assertEquals(metadataWriter.getEnabledPartitionTypes().size(), metadataTablePartitions.size());
 
     // Metadata table should automatically compact and clean
     // versions are +1 as autoClean / compaction happens end of commits
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java
index 6dffd535b9145..05d7f99446e94 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/HoodieClientTestUtils.java
@@ -207,7 +207,7 @@ public static List<HoodieBaseFile> getLatestBaseFiles(String basePath, FileSyste
   }
 
   /**
-   * Reads the paths under the a hoodie table out as a DataFrame.
+   * Reads the paths under the hoodie table out as a DataFrame.
    */
   public static Dataset<Row> read(JavaSparkContext jsc, String basePath, SQLContext sqlContext, FileSystem fs,
                                   String... paths) {
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/KeyGeneratorTestUtilities.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/KeyGeneratorTestUtilities.java
index 37a58fb3ecfb7..c2256f40c6b98 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/KeyGeneratorTestUtilities.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/KeyGeneratorTestUtilities.java
@@ -18,12 +18,10 @@
 
 package org.apache.hudi.testutils;
 
-import org.apache.hudi.AvroConversionHelper;
-import org.apache.hudi.AvroConversionUtils;
-
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericData;
 import org.apache.avro.generic.GenericRecord;
+import org.apache.hudi.AvroConversionUtils;
 import org.apache.spark.package$;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.catalyst.InternalRow;
@@ -33,16 +31,15 @@
 import org.apache.spark.sql.catalyst.expressions.Attribute;
 import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema;
 import org.apache.spark.sql.types.StructType;
+import scala.Function1;
+import scala.collection.JavaConversions;
+import scala.collection.JavaConverters;
 
 import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
 import java.util.List;
 import java.util.stream.Collectors;
 
-import scala.Function1;
-import scala.collection.JavaConversions;
-import scala.collection.JavaConverters;
-
 public class KeyGeneratorTestUtilities {
 
   public static final String NESTED_COL_SCHEMA = "{\"type\":\"record\", \"name\":\"nested_col\",\"fields\": ["
@@ -51,8 +48,7 @@ public class KeyGeneratorTestUtilities {
       + "{\"name\": \"timestamp\",\"type\": \"long\"},{\"name\": \"_row_key\", \"type\": \"string\"},"
       + "{\"name\": \"ts_ms\", \"type\": \"string\"},"
       + "{\"name\": \"pii_col\", \"type\": \"string\"},"
-      + "{\"name\": \"nested_col\",\"type\": "
-      + NESTED_COL_SCHEMA + "}"
+      + "{\"name\": \"nested_col\",\"type\": [\"null\", " + NESTED_COL_SCHEMA + "]}"
       + "]}";
 
   public static final String TEST_STRUCTNAME = "test_struct_name";
@@ -86,8 +82,8 @@ public static Row getRow(GenericRecord record) {
   }
 
   public static Row getRow(GenericRecord record, Schema schema, StructType structType) {
-    Function1<Object, Object> converterFn = AvroConversionHelper.createConverterToRow(schema, structType);
-    Row row = (Row) converterFn.apply(record);
+    Function1<GenericRecord, Row> converterFn = AvroConversionUtils.createConverterToRow(schema, structType);
+    Row row = converterFn.apply(record);
     int fieldCount = structType.fieldNames().length;
     Object[] values = new Object[fieldCount];
     for (int i = 0; i < fieldCount; i++) {
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/SparkClientFunctionalTestHarness.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/SparkClientFunctionalTestHarness.java
index fb19a63259e19..94e080cae4804 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/SparkClientFunctionalTestHarness.java
+++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/SparkClientFunctionalTestHarness.java
@@ -214,12 +214,22 @@ protected JavaRDD<WriteStatus> updateLocation(
   }
 
   protected Stream<HoodieBaseFile> insertRecordsToMORTable(HoodieTableMetaClient metaClient, List<HoodieRecord> records,
-                                                 SparkRDDWriteClient client, HoodieWriteConfig cfg, String commitTime) throws IOException {
+                                                           SparkRDDWriteClient client, HoodieWriteConfig cfg, String commitTime) throws IOException {
+    return insertRecordsToMORTable(metaClient, records, client, cfg, commitTime, false);
+  }
+
+  protected Stream<HoodieBaseFile> insertRecordsToMORTable(HoodieTableMetaClient metaClient, List<HoodieRecord> records,
+                                                 SparkRDDWriteClient client, HoodieWriteConfig cfg, String commitTime,
+                                                           boolean doExplicitCommit) throws IOException {
     HoodieTableMetaClient reloadedMetaClient = HoodieTableMetaClient.reload(metaClient);
 
     JavaRDD<HoodieRecord> writeRecords = jsc().parallelize(records, 1);
-    List<WriteStatus> statuses = client.insert(writeRecords, commitTime).collect();
+    JavaRDD<WriteStatus> statusesRdd = client.insert(writeRecords, commitTime);
+    List<WriteStatus> statuses = statusesRdd.collect();
     assertNoWriteErrors(statuses);
+    if (doExplicitCommit) {
+      client.commit(commitTime, statusesRdd);
+    }
     assertFileSizesEqual(statuses, status -> FSUtils.getFileSize(reloadedMetaClient.getFs(), new Path(reloadedMetaClient.getBasePath(), status.getStat().getPath())));
 
     HoodieTable hoodieTable = HoodieSparkTable.create(cfg, context(), reloadedMetaClient);
@@ -243,6 +253,11 @@ protected Stream<HoodieBaseFile> insertRecordsToMORTable(HoodieTableMetaClient m
   }
 
   protected void updateRecordsInMORTable(HoodieTableMetaClient metaClient, List<HoodieRecord> records, SparkRDDWriteClient client, HoodieWriteConfig cfg, String commitTime) throws IOException {
+    updateRecordsInMORTable(metaClient, records, client, cfg, commitTime, true);
+  }
+
+  protected void updateRecordsInMORTable(HoodieTableMetaClient metaClient, List<HoodieRecord> records, SparkRDDWriteClient client, HoodieWriteConfig cfg, String commitTime,
+                                         boolean doExplicitCommit) throws IOException {
     HoodieTableMetaClient reloadedMetaClient = HoodieTableMetaClient.reload(metaClient);
 
     Map<HoodieKey, HoodieRecord> recordsMap = new HashMap<>();
@@ -252,9 +267,13 @@ protected void updateRecordsInMORTable(HoodieTableMetaClient metaClient, List<Ho
       }
     }
 
-    List<WriteStatus> statuses = client.upsert(jsc().parallelize(records, 1), commitTime).collect();
+    JavaRDD<WriteStatus> statusesRdd = client.upsert(jsc().parallelize(records, 1), commitTime);
+    List<WriteStatus> statuses = statusesRdd.collect();
     // Verify there are no errors
     assertNoWriteErrors(statuses);
+    if (doExplicitCommit) {
+      client.commit(commitTime, statusesRdd);
+    }
     assertFileSizesEqual(statuses, status -> FSUtils.getFileSize(reloadedMetaClient.getFs(), new Path(reloadedMetaClient.getBasePath(), status.getStat().getPath())));
 
     Option<HoodieInstant> deltaCommit = reloadedMetaClient.getActiveTimeline().getDeltaCommitTimeline().lastInstant();
diff --git a/hudi-common/pom.xml b/hudi-common/pom.xml
index e19070a6f9afe..1a558aeae3326 100644
--- a/hudi-common/pom.xml
+++ b/hudi-common/pom.xml
@@ -36,61 +36,7 @@
       </resource>
     </resources>
 
-    <pluginManagement>
-      <plugins>
-        <plugin>
-          <groupId>net.alchim31.maven</groupId>
-          <artifactId>scala-maven-plugin</artifactId>
-          <version>${scala-maven-plugin.version}</version>
-          <configuration>
-            <args>
-              <arg>-nobootcp</arg>
-              <arg>-target:jvm-1.8</arg>
-            </args>
-            <checkMultipleScalaVersions>false</checkMultipleScalaVersions>
-          </configuration>
-        </plugin>
-        <plugin>
-          <groupId>org.apache.maven.plugins</groupId>
-          <artifactId>maven-compiler-plugin</artifactId>
-        </plugin>
-      </plugins>
-    </pluginManagement>
-
     <plugins>
-      <plugin>
-        <groupId>net.alchim31.maven</groupId>
-        <artifactId>scala-maven-plugin</artifactId>
-        <executions>
-          <execution>
-            <id>scala-compile-first</id>
-            <phase>process-resources</phase>
-            <goals>
-              <goal>add-source</goal>
-              <goal>compile</goal>
-            </goals>
-          </execution>
-          <execution>
-            <id>scala-test-compile</id>
-            <phase>process-test-resources</phase>
-            <goals>
-              <goal>testCompile</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-compiler-plugin</artifactId>
-        <executions>
-          <execution>
-            <phase>compile</phase>
-            <goals>
-              <goal>compile</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-jar-plugin</artifactId>
@@ -111,10 +57,6 @@
         <groupId>org.apache.rat</groupId>
         <artifactId>apache-rat-plugin</artifactId>
       </plugin>
-      <plugin>
-        <groupId>org.scalastyle</groupId>
-        <artifactId>scalastyle-maven-plugin</artifactId>
-      </plugin>
       <plugin>
         <groupId>org.jacoco</groupId>
         <artifactId>jacoco-maven-plugin</artifactId>
@@ -156,13 +98,6 @@
   </build>
 
   <dependencies>
-    <!-- Scala -->
-    <dependency>
-      <groupId>org.scala-lang</groupId>
-      <artifactId>scala-library</artifactId>
-      <version>${scala.version}</version>
-    </dependency>
-
     <!-- Fasterxml -->
     <dependency>
       <groupId>com.fasterxml.jackson.core</groupId>
diff --git a/hudi-common/src/main/avro/HoodieClusteringGroup.avsc b/hudi-common/src/main/avro/HoodieClusteringGroup.avsc
index b2444be84aa00..f2af6b68db873 100644
--- a/hudi-common/src/main/avro/HoodieClusteringGroup.avsc
+++ b/hudi-common/src/main/avro/HoodieClusteringGroup.avsc
@@ -19,7 +19,6 @@
    "namespace":"org.apache.hudi.avro.model",
    "type":"record",
    "name":"HoodieClusteringGroup",
-   "type":"record",
    "fields":[
       {
          /* Group of files that needs to merged. All the slices in a group will belong to same partition initially.
diff --git a/hudi-common/src/main/avro/HoodieMetadata.avsc b/hudi-common/src/main/avro/HoodieMetadata.avsc
index bf85587a3a7ac..4037dd0f1ab01 100644
--- a/hudi-common/src/main/avro/HoodieMetadata.avsc
+++ b/hudi-common/src/main/avro/HoodieMetadata.avsc
@@ -30,27 +30,142 @@
             "doc": "Type of the metadata record",
             "type": "int"
         },
-        {   "name": "filesystemMetadata",
+        {
             "doc": "Contains information about partitions and files within the dataset",
-            "type": ["null", {
-               "type": "map",
-               "values": {
+            "name": "filesystemMetadata",
+            "type": [
+                "null",
+                {
+                    "type": "map",
+                    "values": {
+                        "type": "record",
+                        "name": "HoodieMetadataFileInfo",
+                        "fields": [
+                            {
+                                "name": "size",
+                                "type": "long",
+                                "doc": "Size of the file"
+                            },
+                            {
+                                "name": "isDeleted",
+                                "type": "boolean",
+                                "doc": "True if this file has been deleted"
+                            }
+                        ]
+                    }
+                }
+            ]
+        },
+        {
+            "doc": "Metadata Index of bloom filters for all data files in the user table",
+            "name": "BloomFilterMetadata",
+            "type": [
+                "null",
+                {
+                    "doc": "Data file bloom filter details",
+                    "name": "HoodieMetadataBloomFilter",
                     "type": "record",
-                    "name": "HoodieMetadataFileInfo",
                     "fields": [
                         {
-                            "name": "size",
-                            "type": "long",
-                            "doc": "Size of the file"
+                            "doc": "Bloom filter type code",
+                            "name": "type",
+                            "type": "string"
+                        },
+                        {
+                            "doc": "Instant timestamp when this metadata was created/updated",
+                            "name": "timestamp",
+                            "type": "string"
+                        },
+                        {
+                            "doc": "Bloom filter binary byte array",
+                            "name": "bloomFilter",
+                            "type": "bytes"
+                        },
+                        {
+                            "doc": "Bloom filter entry valid/deleted flag",
+                            "name": "isDeleted",
+                            "type": "boolean"
+                        }
+                    ]
+                }
+            ],
+            "default" : null
+        },
+        {
+            "doc": "Metadata Index of column statistics for all data files in the user table",
+            "name": "ColumnStatsMetadata",
+            "type": [
+                "null",
+                {
+                    "doc": "Data file column statistics",
+                    "name": "HoodieMetadataColumnStats",
+                    "type": "record",
+                    "fields": [
+                        {
+                            "doc": "File name for which this column statistics applies",
+                            "name": "fileName",
+                            "type": [
+                                "null",
+                                "string"
+                            ]
+                        },
+                        {
+                            "doc": "Minimum value in the range. Based on user data table schema, we can convert this to appropriate type",
+                            "name": "minValue",
+                            "type": [
+                                "null",
+                                "string"
+                            ]
+                        },
+                        {
+                            "doc": "Maximum value in the range. Based on user data table schema, we can convert it to appropriate type",
+                            "name": "maxValue",
+                            "type": [
+                                "null",
+                                "string"
+                            ]
+                        },
+                        {
+                            "doc": "Total count of values",
+                            "name": "valueCount",
+                            "type": [
+                                "null",
+                                "long"
+                            ]
+                        },
+                        {
+                            "doc": "Total count of null values",
+                            "name": "nullCount",
+                            "type": [
+                                "null",
+                                "long"
+                            ]
+                        },
+                        {
+                            "doc": "Total storage size on disk",
+                            "name": "totalSize",
+                            "type": [
+                                "null",
+                                "long"
+                            ]
+                        },
+                        {
+                            "doc": "Total uncompressed storage size on disk",
+                            "name": "totalUncompressedSize",
+                            "type": [
+                                "null",
+                                "long"
+                            ]
                         },
                         {
+                            "doc": "Column range entry valid/deleted flag",
                             "name": "isDeleted",
-                            "type": "boolean",
-                            "doc": "True if this file has been deleted"
+                            "type": "boolean"
                         }
                     ]
                 }
-            }]
+            ],
+            "default" : null
         }
     ]
 }
diff --git a/hudi-common/src/main/avro/HoodieRestorePlan.avsc b/hudi-common/src/main/avro/HoodieRestorePlan.avsc
new file mode 100644
index 0000000000000..1ad9e6a4b9c80
--- /dev/null
+++ b/hudi-common/src/main/avro/HoodieRestorePlan.avsc
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+{
+   "namespace":"org.apache.hudi.avro.model",
+   "type":"record",
+   "name":"HoodieRestorePlan",
+   "fields":[
+    {
+           "name": "instantsToRollback",
+           "default": [],
+           "type": {
+             "type": "array",
+             "default": null,
+             "items": "HoodieInstantInfo"
+           }
+    },
+    {
+           "name":"version",
+           "type":["int", "null"],
+           "default": 1
+    }]
+}
\ No newline at end of file
diff --git a/hudi-common/src/main/avro/HoodieRollbackMetadata.avsc b/hudi-common/src/main/avro/HoodieRollbackMetadata.avsc
index f342db8738d33..5a300cda9e638 100644
--- a/hudi-common/src/main/avro/HoodieRollbackMetadata.avsc
+++ b/hudi-common/src/main/avro/HoodieRollbackMetadata.avsc
@@ -38,14 +38,6 @@
                     "type": "long",
                     "doc": "Size of this file in bytes"
                 }
-            }], "default":null },
-            {"name": "writtenLogFiles", "type": ["null", {
-                "type": "map",
-                "doc": "Log files written that were expected to be rolledback",
-                 "values": {
-                    "type": "long",
-                    "doc": "Size of this file in bytes"
-                }
             }], "default":null }
         ]
      }}},
diff --git a/hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java b/hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java
new file mode 100644
index 0000000000000..428da925c49ea
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java
@@ -0,0 +1,373 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hudi.common.config.HoodieMetadataConfig;
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.engine.HoodieEngineContext;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.BaseFile;
+import org.apache.hudi.common.model.FileSlice;
+import org.apache.hudi.common.model.HoodieLogFile;
+import org.apache.hudi.common.model.HoodieTableQueryType;
+import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
+import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
+import org.apache.hudi.common.util.CollectionUtils;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.exception.HoodieIOException;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+/**
+ * Common (engine-agnostic) File Index implementation enabling individual query engines to
+ * list Hudi Table contents based on the
+ *
+ * <ul>
+ *   <li>Table type (MOR, COW)</li>
+ *   <li>Query type (snapshot, read_optimized, incremental)</li>
+ *   <li>Query instant/range</li>
+ * </ul>
+ */
+public abstract class BaseHoodieTableFileIndex {
+
+  private static final Logger LOG = LogManager.getLogger(BaseHoodieTableFileIndex.class);
+
+  private final String[] partitionColumns;
+
+  private final FileSystemViewStorageConfig fileSystemStorageConfig;
+  private final HoodieMetadataConfig metadataConfig;
+
+  private final HoodieTableQueryType queryType;
+  private final Option<String> specifiedQueryInstant;
+  protected final List<Path> queryPaths;
+
+  private final boolean shouldIncludePendingCommits;
+  private final boolean shouldValidateInstant;
+
+  private final HoodieTableType tableType;
+  protected final String basePath;
+
+  private final HoodieTableMetaClient metaClient;
+  private final HoodieEngineContext engineContext;
+
+  private final transient FileStatusCache fileStatusCache;
+
+  protected transient volatile long cachedFileSize = 0L;
+  protected transient volatile Map<PartitionPath, List<FileSlice>> cachedAllInputFileSlices;
+
+  protected volatile boolean queryAsNonePartitionedTable = false;
+
+  private transient volatile HoodieTableFileSystemView fileSystemView = null;
+
+  /**
+   * @param engineContext Hudi engine-specific context
+   * @param metaClient Hudi table's meta-client
+   * @param configProperties unifying configuration (in the form of generic properties)
+   * @param queryType target query type
+   * @param queryPaths target DFS paths being queried
+   * @param specifiedQueryInstant instant as of which table is being queried
+   * @param shouldIncludePendingCommits flags whether file-index should exclude any pending operations
+   * @param shouldValidateInstant flags to validate whether query instant is present in the timeline
+   * @param fileStatusCache transient cache of fetched [[FileStatus]]es
+   */
+  public BaseHoodieTableFileIndex(HoodieEngineContext engineContext,
+                                  HoodieTableMetaClient metaClient,
+                                  TypedProperties configProperties,
+                                  HoodieTableQueryType queryType,
+                                  List<Path> queryPaths,
+                                  Option<String> specifiedQueryInstant,
+                                  boolean shouldIncludePendingCommits,
+                                  boolean shouldValidateInstant,
+                                  FileStatusCache fileStatusCache) {
+    this.partitionColumns = metaClient.getTableConfig().getPartitionFields()
+        .orElse(new String[0]);
+
+    this.fileSystemStorageConfig = FileSystemViewStorageConfig.newBuilder()
+        .fromProperties(configProperties)
+        .build();
+    this.metadataConfig = HoodieMetadataConfig.newBuilder()
+        .fromProperties(configProperties)
+        .build();
+
+    this.queryType = queryType;
+    this.queryPaths = queryPaths;
+    this.specifiedQueryInstant = specifiedQueryInstant;
+    this.shouldIncludePendingCommits = shouldIncludePendingCommits;
+    this.shouldValidateInstant = shouldValidateInstant;
+
+    this.tableType = metaClient.getTableType();
+    this.basePath = metaClient.getBasePath();
+
+    this.metaClient = metaClient;
+    this.engineContext = engineContext;
+    this.fileStatusCache = fileStatusCache;
+
+    doRefresh();
+  }
+
+  protected abstract Object[] parsePartitionColumnValues(String[] partitionColumns, String partitionPath);
+
+  /**
+   * Returns latest completed instant as seen by this instance of the file-index
+   */
+  public Option<HoodieInstant> getLatestCompletedInstant() {
+    return getActiveTimeline().filterCompletedInstants().lastInstant();
+  }
+
+  /**
+   * Returns table's base-path
+   */
+  public String getBasePath() {
+    return metaClient.getBasePath();
+  }
+
+  /**
+   * Fetch list of latest base files and log files per partition.
+   *
+   * @return mapping from string partition paths to its base/log files
+   */
+  public Map<String, List<FileSlice>> listFileSlices() {
+    return cachedAllInputFileSlices.entrySet()
+        .stream()
+        .collect(Collectors.toMap(e -> e.getKey().path, Map.Entry::getValue));
+  }
+
+  protected List<PartitionPath> getAllQueryPartitionPaths() {
+    List<String> queryRelativePartitionPaths = queryPaths.stream()
+        .map(path -> FSUtils.getRelativePartitionPath(new Path(basePath), path))
+        .collect(Collectors.toList());
+
+    // Load all the partition path from the basePath, and filter by the query partition path.
+    // TODO load files from the queryRelativePartitionPaths directly.
+    List<String> matchedPartitionPaths = FSUtils.getAllPartitionPaths(engineContext, metadataConfig, basePath)
+        .stream()
+        .filter(path -> queryRelativePartitionPaths.stream().anyMatch(path::startsWith))
+        .collect(Collectors.toList());
+
+    // Convert partition's path into partition descriptor
+    return matchedPartitionPaths.stream()
+        .map(partitionPath -> {
+          Object[] partitionColumnValues = parsePartitionColumnValues(partitionColumns, partitionPath);
+          return new PartitionPath(partitionPath, partitionColumnValues);
+        })
+        .collect(Collectors.toList());
+  }
+
+  protected void refresh() {
+    fileStatusCache.invalidate();
+    doRefresh();
+  }
+
+  protected HoodieTimeline getActiveTimeline() {
+    // NOTE: We have to use commits and compactions timeline, to make sure that we're properly
+    //       handling the following case: when records are inserted into the new log-file w/in the file-group
+    //       that is under the pending compaction process, new log-file will bear the compaction's instant (on the
+    //       timeline) in its name, as opposed to the base-file's commit instant. To make sure we're not filtering
+    //       such log-file we have to _always_ include pending compaction instants into consideration
+    // TODO(HUDI-3302) re-evaluate whether we should not filter any commits in here
+    HoodieTimeline timeline = metaClient.getCommitsAndCompactionTimeline();
+    if (shouldIncludePendingCommits) {
+      return timeline;
+    } else {
+      return timeline.filterCompletedAndCompactionInstants();
+    }
+  }
+
+  /**
+   * Load all partition paths and it's files under the query table path.
+   */
+  private Map<PartitionPath, FileStatus[]> loadPartitionPathFiles() {
+    // List files in all partition paths
+    List<PartitionPath> pathToFetch = new ArrayList<>();
+    Map<PartitionPath, FileStatus[]> cachedPartitionToFiles = new HashMap<>();
+
+    // Fetch from the FileStatusCache
+    List<PartitionPath> partitionPaths = getAllQueryPartitionPaths();
+    partitionPaths.forEach(partitionPath -> {
+      Option<FileStatus[]> filesInPartition = fileStatusCache.get(partitionPath.fullPartitionPath(basePath));
+      if (filesInPartition.isPresent()) {
+        cachedPartitionToFiles.put(partitionPath, filesInPartition.get());
+      } else {
+        pathToFetch.add(partitionPath);
+      }
+    });
+
+    Map<PartitionPath, FileStatus[]> fetchedPartitionToFiles;
+
+    if (pathToFetch.isEmpty()) {
+      fetchedPartitionToFiles = Collections.emptyMap();
+    } else {
+      Map<String, PartitionPath> fullPartitionPathsMapToFetch = pathToFetch.stream()
+          .collect(Collectors.toMap(
+              partitionPath -> partitionPath.fullPartitionPath(basePath).toString(),
+              Function.identity())
+          );
+
+      fetchedPartitionToFiles =
+          FSUtils.getFilesInPartitions(
+                  engineContext,
+                  metadataConfig,
+                  basePath,
+                  fullPartitionPathsMapToFetch.keySet().toArray(new String[0]),
+                  fileSystemStorageConfig.getSpillableDir())
+              .entrySet()
+              .stream()
+              .collect(Collectors.toMap(e -> fullPartitionPathsMapToFetch.get(e.getKey()), e -> e.getValue()));
+
+    }
+
+    // Update the fileStatusCache
+    fetchedPartitionToFiles.forEach((partitionPath, filesInPartition) -> {
+      fileStatusCache.put(partitionPath.fullPartitionPath(basePath), filesInPartition);
+    });
+
+    return CollectionUtils.combine(cachedPartitionToFiles, fetchedPartitionToFiles);
+  }
+
+  private void doRefresh() {
+    long startTime = System.currentTimeMillis();
+
+    Map<PartitionPath, FileStatus[]> partitionFiles = loadPartitionPathFiles();
+    FileStatus[] allFiles = partitionFiles.values().stream().flatMap(Arrays::stream).toArray(FileStatus[]::new);
+
+    metaClient.reloadActiveTimeline();
+
+    HoodieTimeline activeTimeline = getActiveTimeline();
+    Option<HoodieInstant> latestInstant = activeTimeline.lastInstant();
+
+    // TODO we can optimize the flow by:
+    //  - First fetch list of files from instants of interest
+    //  - Load FileStatus's
+    fileSystemView = new HoodieTableFileSystemView(metaClient, activeTimeline, allFiles);
+
+    Option<String> queryInstant =
+        specifiedQueryInstant.or(() -> latestInstant.map(HoodieInstant::getTimestamp));
+
+    validate(activeTimeline, queryInstant);
+
+    if (tableType.equals(HoodieTableType.MERGE_ON_READ) && queryType.equals(HoodieTableQueryType.SNAPSHOT)) {
+      cachedAllInputFileSlices = partitionFiles.keySet().stream()
+          .collect(Collectors.toMap(
+              Function.identity(),
+              partitionPath ->
+                  queryInstant.map(instant ->
+                    fileSystemView.getLatestMergedFileSlicesBeforeOrOn(partitionPath.path, queryInstant.get())
+                        .collect(Collectors.toList())
+                  )
+                  .orElse(Collections.emptyList())
+              )
+          );
+    } else {
+      cachedAllInputFileSlices = partitionFiles.keySet().stream()
+         .collect(Collectors.toMap(
+             Function.identity(),
+             partitionPath ->
+                 queryInstant.map(instant ->
+                     fileSystemView.getLatestFileSlicesBeforeOrOn(partitionPath.path, instant, true)
+                 )
+                   .orElse(fileSystemView.getLatestFileSlices(partitionPath.path))
+                   .collect(Collectors.toList())
+             )
+         );
+    }
+
+    cachedFileSize = cachedAllInputFileSlices.values().stream()
+        .flatMap(Collection::stream)
+        .mapToLong(BaseHoodieTableFileIndex::fileSliceSize)
+        .sum();
+
+    // If the partition value contains InternalRow.empty, we query it as a non-partitioned table.
+    queryAsNonePartitionedTable = partitionFiles.keySet().stream().anyMatch(p -> p.values.length == 0);
+
+    long duration = System.currentTimeMillis() - startTime;
+
+    LOG.info(String.format("Refresh table %s, spent: %d ms", metaClient.getTableConfig().getTableName(), duration));
+  }
+
+  private void validate(HoodieTimeline activeTimeline, Option<String> queryInstant) {
+    if (shouldValidateInstant) {
+      if (queryInstant.isPresent() && !activeTimeline.containsInstant(queryInstant.get())) {
+        throw new HoodieIOException(String.format("Query instant (%s) not found in the timeline", queryInstant.get()));
+      }
+    }
+  }
+
+  private static long fileSliceSize(FileSlice fileSlice) {
+    long logFileSize = fileSlice.getLogFiles().map(HoodieLogFile::getFileSize)
+        .filter(s -> s > 0)
+        .reduce(0L, Long::sum);
+
+    return fileSlice.getBaseFile().map(BaseFile::getFileLen).orElse(0L) + logFileSize;
+  }
+
+  protected static final class PartitionPath {
+    final String path;
+    final Object[] values;
+
+    public PartitionPath(String path, Object[] values) {
+      this.path = path;
+      this.values = values;
+    }
+
+    Path fullPartitionPath(String basePath) {
+      if (!path.isEmpty()) {
+        return new Path(basePath, path);
+      }
+
+      return new Path(basePath);
+    }
+
+    @Override
+    public boolean equals(Object other) {
+      return other instanceof PartitionPath
+          && Objects.equals(path, ((PartitionPath) other).path)
+          && Arrays.equals(values, ((PartitionPath) other).values);
+    }
+
+    @Override
+    public int hashCode() {
+      return path.hashCode() * 1103 + Arrays.hashCode(values);
+    }
+  }
+
+  protected interface FileStatusCache {
+    Option<FileStatus[]> get(Path path);
+
+    void put(Path path, FileStatus[] leafFiles);
+
+    void invalidate();
+  }
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/TypeUtils.java b/hudi-common/src/main/java/org/apache/hudi/TypeUtils.java
new file mode 100644
index 0000000000000..6e7d2c87459b5
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/TypeUtils.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi;
+
+public class TypeUtils {
+
+  /**
+   * This utility abstracts unsafe type-casting in a way that allows to
+   * <ul>
+   *   <li>Search for such type-casts more easily (just searching for usages of this method)</li>
+   *   <li>Avoid type-cast warnings from the compiler</li>
+   * </ul>
+   */
+  @SuppressWarnings("unchecked")
+  public static <T> T unsafeCast(Object o) {
+    return (T) o;
+  }
+
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
index 9fabc647d7773..209721e24a8d9 100644
--- a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroUtils.java
@@ -18,17 +18,7 @@
 
 package org.apache.hudi.avro;
 
-import org.apache.hudi.common.config.SerializableSchema;
-import org.apache.hudi.common.model.HoodieOperation;
-import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.model.HoodieRecordPayload;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.common.util.StringUtils;
-import org.apache.hudi.common.util.collection.Pair;
-import org.apache.hudi.exception.HoodieException;
-import org.apache.hudi.exception.HoodieIOException;
-import org.apache.hudi.exception.SchemaCompatibilityException;
-
+import org.apache.avro.AvroRuntimeException;
 import org.apache.avro.Conversions.DecimalConversion;
 import org.apache.avro.JsonProperties;
 import org.apache.avro.LogicalTypes;
@@ -50,15 +40,22 @@
 import org.apache.avro.io.JsonDecoder;
 import org.apache.avro.io.JsonEncoder;
 import org.apache.avro.specific.SpecificRecordBase;
+import org.apache.hudi.common.config.SerializableSchema;
+import org.apache.hudi.common.model.HoodieOperation;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.StringUtils;
+import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hudi.exception.SchemaCompatibilityException;
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
 import java.math.BigDecimal;
 import java.nio.ByteBuffer;
-import java.nio.charset.StandardCharsets;
 import java.sql.Timestamp;
 import java.time.LocalDate;
 import java.util.ArrayList;
@@ -67,8 +64,6 @@
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
-import java.util.zip.DeflaterOutputStream;
-import java.util.zip.InflaterInputStream;
 
 /**
  * Helper class to do common stuff across Avro.
@@ -283,7 +278,7 @@ public static Schema getSchemaForFields(Schema fileSchema, List<String> fields)
 
     for (Schema.Field schemaField: fileSchema.getFields()) {
       if (fields.contains(schemaField.name())) {
-        toBeAddedFields.add(new Schema.Field(schemaField.name(), schemaField.schema(), schemaField.doc(), schemaField.defaultValue()));
+        toBeAddedFields.add(new Schema.Field(schemaField.name(), schemaField.schema(), schemaField.doc(), schemaField.defaultVal()));
       }
     }
     recordSchema.setFields(toBeAddedFields);
@@ -343,16 +338,26 @@ public static GenericRecord stitchRecords(GenericRecord left, GenericRecord righ
   }
 
   /**
-   * Given a avro record with a given schema, rewrites it into the new schema while setting fields only from the new
+   * Given an Avro record with a given schema, rewrites it into the new schema while setting fields only from the new
    * schema.
+   *
+   * NOTE: This method is rewriting every record's field that is record itself recursively. It's
+   *       caller's responsibility to make sure that no unnecessary re-writing occurs (by preemptively
+   *       checking whether the record does require re-writing to adhere to the new schema)
+   *
    * NOTE: Here, the assumption is that you cannot go from an evolved schema (schema with (N) fields)
-   * to an older schema (schema with (N-1) fields). All fields present in the older record schema MUST be present in the
-   * new schema and the default/existing values are carried over.
-   * This particular method does the following things :
-   * a) Create a new empty GenericRecord with the new schema.
-   * b) For GenericRecord, copy over the data from the old schema to the new schema or set default values for all fields of this
-   * transformed schema
-   * c) For SpecificRecord, hoodie_metadata_fields have a special treatment. This is done because for code generated
+   *       to an older schema (schema with (N-1) fields). All fields present in the older record schema MUST be present in the
+   *       new schema and the default/existing values are carried over.
+   *
+   * This particular method does the following:
+   * <ol>
+   *   <li>Create a new empty GenericRecord with the new schema.</li>
+   *   <li>For GenericRecord, copy over the data from the old schema to the new schema or set default values for all
+   *   fields of this transformed schema</li>
+   *   <li>For SpecificRecord, hoodie_metadata_fields have a special treatment (see below)</li>
+   * </ol>
+   *
+   * For SpecificRecord we ignore Hudi Metadata fields, because for code generated
    * avro classes (HoodieMetadataRecord), the avro record is a SpecificBaseRecord type instead of a GenericRecord.
    * SpecificBaseRecord throws null pointer exception for record.get(name) if name is not present in the schema of the
    * record (which happens when converting a SpecificBaseRecord without hoodie_metadata_fields to a new record with it).
@@ -364,58 +369,43 @@ public static GenericRecord rewriteRecord(GenericRecord oldRecord, Schema newSch
     GenericRecord newRecord = new GenericData.Record(newSchema);
     boolean isSpecificRecord = oldRecord instanceof SpecificRecordBase;
     for (Schema.Field f : newSchema.getFields()) {
-      if (!isSpecificRecord) {
-        copyOldValueOrSetDefault(oldRecord, newRecord, f);
-      } else if (!isMetadataField(f.name())) {
+      if (!(isSpecificRecord && isMetadataField(f.name()))) {
         copyOldValueOrSetDefault(oldRecord, newRecord, f);
       }
     }
+
     if (!GenericData.get().validate(newSchema, newRecord)) {
       throw new SchemaCompatibilityException(
           "Unable to validate the rewritten record " + oldRecord + " against schema " + newSchema);
     }
-    return newRecord;
-  }
 
-  private static void copyOldValueOrSetDefault(GenericRecord oldRecord, GenericRecord newRecord, Schema.Field f) {
-    // cache the result of oldRecord.get() to save CPU expensive hash lookup
-    Schema oldSchema = oldRecord.getSchema();
-    Object fieldValue = oldSchema.getField(f.name()) == null ? null : oldRecord.get(f.name());
-    if (fieldValue == null) {
-      if (f.defaultVal() instanceof JsonProperties.Null) {
-        newRecord.put(f.name(), null);
-      } else {
-        newRecord.put(f.name(), f.defaultVal());
-      }
-    } else {
-      newRecord.put(f.name(), fieldValue);
-    }
+    return newRecord;
   }
 
-  public static byte[] compress(String text) {
-    ByteArrayOutputStream baos = new ByteArrayOutputStream();
-    try {
-      OutputStream out = new DeflaterOutputStream(baos);
-      out.write(text.getBytes(StandardCharsets.UTF_8));
-      out.close();
-    } catch (IOException e) {
-      throw new HoodieIOException("IOException while compressing text " + text, e);
-    }
-    return baos.toByteArray();
+  /**
+   * Converts list of {@link GenericRecord} provided into the {@link GenericRecord} adhering to the
+   * provided {@code newSchema}.
+   *
+   * To better understand conversion rules please check {@link #rewriteRecord(GenericRecord, Schema)}
+   */
+  public static List<GenericRecord> rewriteRecords(List<GenericRecord> records, Schema newSchema) {
+    return records.stream().map(r -> rewriteRecord(r, newSchema)).collect(Collectors.toList());
   }
 
-  public static String decompress(byte[] bytes) {
-    InputStream in = new InflaterInputStream(new ByteArrayInputStream(bytes));
-    ByteArrayOutputStream baos = new ByteArrayOutputStream();
-    try {
-      byte[] buffer = new byte[8192];
-      int len;
-      while ((len = in.read(buffer)) > 0) {
-        baos.write(buffer, 0, len);
-      }
-      return new String(baos.toByteArray(), StandardCharsets.UTF_8);
-    } catch (IOException e) {
-      throw new HoodieIOException("IOException while decompressing text", e);
+  private static void copyOldValueOrSetDefault(GenericRecord oldRecord, GenericRecord newRecord, Schema.Field field) {
+    Schema oldSchema = oldRecord.getSchema();
+    Object fieldValue = oldSchema.getField(field.name()) == null ? null : oldRecord.get(field.name());
+
+    if (fieldValue != null) {
+      // In case field's value is a nested record, we have to rewrite it as well
+      Object newFieldValue = fieldValue instanceof GenericRecord
+          ? rewriteRecord((GenericRecord) fieldValue, resolveNullableSchema(field.schema()))
+          : fieldValue;
+      newRecord.put(field.name(), newFieldValue);
+    } else if (field.defaultVal() instanceof JsonProperties.Null) {
+      newRecord.put(field.name(), null);
+    } else {
+      newRecord.put(field.name(), field.defaultVal());
     }
   }
 
@@ -457,23 +447,32 @@ public static Object getNestedFieldVal(GenericRecord record, String fieldName, b
     String[] parts = fieldName.split("\\.");
     GenericRecord valueNode = record;
     int i = 0;
-    for (; i < parts.length; i++) {
-      String part = parts[i];
-      Object val = valueNode.get(part);
-      if (val == null) {
-        break;
-      }
+    try {
+      for (; i < parts.length; i++) {
+        String part = parts[i];
+        Object val = valueNode.get(part);
+        if (val == null) {
+          break;
+        }
 
-      // return, if last part of name
-      if (i == parts.length - 1) {
-        Schema fieldSchema = valueNode.getSchema().getField(part).schema();
-        return convertValueForSpecificDataTypes(fieldSchema, val, consistentLogicalTimestampEnabled);
-      } else {
-        // VC: Need a test here
-        if (!(val instanceof GenericRecord)) {
-          throw new HoodieException("Cannot find a record at part value :" + part);
+        // return, if last part of name
+        if (i == parts.length - 1) {
+          Schema fieldSchema = valueNode.getSchema().getField(part).schema();
+          return convertValueForSpecificDataTypes(fieldSchema, val, consistentLogicalTimestampEnabled);
+        } else {
+          // VC: Need a test here
+          if (!(val instanceof GenericRecord)) {
+            throw new HoodieException("Cannot find a record at part value :" + part);
+          }
+          valueNode = (GenericRecord) val;
         }
-        valueNode = (GenericRecord) val;
+      }
+    } catch (AvroRuntimeException e) {
+      // Since avro 1.10, arvo will throw AvroRuntimeException("Not a valid schema field: " + key)
+      // rather than return null like the previous version if if record doesn't contain this key.
+      // So when returnNullIfNotFound is true, catch this exception.
+      if (!returnNullIfNotFound) {
+        throw e;
       }
     }
 
@@ -622,4 +621,24 @@ public static Object getRecordColumnValues(HoodieRecord<? extends HoodieRecordPa
                                              SerializableSchema schema, boolean consistentLogicalTimestampEnabled) {
     return getRecordColumnValues(record, columns, schema.get(), consistentLogicalTimestampEnabled);
   }
+
+  private static Schema resolveNullableSchema(Schema schema) {
+    if (schema.getType() != Schema.Type.UNION) {
+      return schema;
+    }
+
+    List<Schema> innerTypes = schema.getTypes();
+    Schema nonNullType =
+        innerTypes.stream()
+          .filter(it -> it.getType() != Schema.Type.NULL)
+          .findFirst()
+          .orElse(null);
+
+    if (innerTypes.size() != 2 || nonNullType == null) {
+      throw new AvroRuntimeException(
+          String.format("Unsupported Avro UNION type %s: Only UNION of a null type and a non-null type is supported", schema));
+    }
+
+    return nonNullType;
+  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroWriteSupport.java b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroWriteSupport.java
index 3207cfccd80c2..18827c66bf096 100644
--- a/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroWriteSupport.java
+++ b/hudi-common/src/main/java/org/apache/hudi/avro/HoodieAvroWriteSupport.java
@@ -18,11 +18,10 @@
 
 package org.apache.hudi.avro;
 
+import org.apache.avro.Schema;
 import org.apache.hudi.common.bloom.BloomFilter;
 import org.apache.hudi.common.bloom.HoodieDynamicBoundedBloomFilter;
 import org.apache.hudi.common.util.Option;
-
-import org.apache.avro.Schema;
 import org.apache.parquet.avro.AvroWriteSupport;
 import org.apache.parquet.hadoop.api.WriteSupport;
 import org.apache.parquet.schema.MessageType;
diff --git a/hudi-common/src/main/java/org/apache/hudi/avro/MercifulJsonConverter.java b/hudi-common/src/main/java/org/apache/hudi/avro/MercifulJsonConverter.java
index d759a8debf602..15335193414ae 100644
--- a/hudi-common/src/main/java/org/apache/hudi/avro/MercifulJsonConverter.java
+++ b/hudi-common/src/main/java/org/apache/hudi/avro/MercifulJsonConverter.java
@@ -18,15 +18,14 @@
 
 package org.apache.hudi.avro;
 
-import org.apache.hudi.common.util.collection.Pair;
-import org.apache.hudi.exception.HoodieException;
-import org.apache.hudi.exception.HoodieIOException;
-
 import com.fasterxml.jackson.databind.ObjectMapper;
 import org.apache.avro.Schema;
 import org.apache.avro.Schema.Type;
 import org.apache.avro.generic.GenericData;
 import org.apache.avro.generic.GenericRecord;
+import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.exception.HoodieIOException;
 
 import java.io.IOException;
 import java.io.Serializable;
@@ -293,7 +292,7 @@ public Pair<Boolean, Object> convert(Object value, String name, Schema schema) {
         for (Object v : (List) value) {
           listRes.add(convertJsonToAvroField(v, name, elementSchema));
         }
-        return Pair.of(true, listRes);
+        return Pair.of(true, new GenericData.Array<>(schema, listRes));
       }
     };
   }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/HoodieRollbackStat.java b/hudi-common/src/main/java/org/apache/hudi/common/HoodieRollbackStat.java
index 3e4ee34319c7c..a3191fa026c84 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/HoodieRollbackStat.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/HoodieRollbackStat.java
@@ -38,16 +38,13 @@ public class HoodieRollbackStat implements Serializable {
   private final List<String> failedDeleteFiles;
   // Count of HoodieLogFile to commandBlocks written for a particular rollback
   private final Map<FileStatus, Long> commandBlocksCount;
-  // all log files with same base instant as instant to be rolledback
-  private final Map<FileStatus, Long> writtenLogFileSizeMap;
 
   public HoodieRollbackStat(String partitionPath, List<String> successDeleteFiles, List<String> failedDeleteFiles,
-      Map<FileStatus, Long> commandBlocksCount, Map<FileStatus, Long> writtenLogFileSizeMap) {
+      Map<FileStatus, Long> commandBlocksCount) {
     this.partitionPath = partitionPath;
     this.successDeleteFiles = successDeleteFiles;
     this.failedDeleteFiles = failedDeleteFiles;
     this.commandBlocksCount = commandBlocksCount;
-    this.writtenLogFileSizeMap = writtenLogFileSizeMap;
   }
 
   public Map<FileStatus, Long> getCommandBlocksCount() {
@@ -66,10 +63,6 @@ public List<String> getFailedDeleteFiles() {
     return failedDeleteFiles;
   }
 
-  public Map<FileStatus, Long> getWrittenLogFileSizeMap() {
-    return writtenLogFileSizeMap;
-  }
-
   public static HoodieRollbackStat.Builder newBuilder() {
     return new Builder();
   }
@@ -82,7 +75,6 @@ public static class Builder {
     private List<String> successDeleteFiles;
     private List<String> failedDeleteFiles;
     private Map<FileStatus, Long> commandBlocksCount;
-    private Map<FileStatus, Long> writtenLogFileSizeMap;
     private String partitionPath;
 
     public Builder withDeletedFileResults(Map<FileStatus, Boolean> deletedFiles) {
@@ -108,11 +100,6 @@ public Builder withRollbackBlockAppendResults(Map<FileStatus, Long> commandBlock
       return this;
     }
 
-    public Builder withWrittenLogFileSizeMap(Map<FileStatus, Long> writtenLogFileSizeMap) {
-      this.writtenLogFileSizeMap = writtenLogFileSizeMap;
-      return this;
-    }
-
     public Builder withPartitionPath(String partitionPath) {
       this.partitionPath = partitionPath;
       return this;
@@ -128,10 +115,7 @@ public HoodieRollbackStat build() {
       if (commandBlocksCount == null) {
         commandBlocksCount = Collections.EMPTY_MAP;
       }
-      if (writtenLogFileSizeMap == null) {
-        writtenLogFileSizeMap = Collections.EMPTY_MAP;
-      }
-      return new HoodieRollbackStat(partitionPath, successDeleteFiles, failedDeleteFiles, commandBlocksCount, writtenLogFileSizeMap);
+      return new HoodieRollbackStat(partitionPath, successDeleteFiles, failedDeleteFiles, commandBlocksCount);
     }
   }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bloom/HoodieDynamicBoundedBloomFilter.java b/hudi-common/src/main/java/org/apache/hudi/common/bloom/HoodieDynamicBoundedBloomFilter.java
index 343822b13adec..d4bc287c551c2 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/bloom/HoodieDynamicBoundedBloomFilter.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/bloom/HoodieDynamicBoundedBloomFilter.java
@@ -63,7 +63,7 @@ public class HoodieDynamicBoundedBloomFilter implements BloomFilter {
    * @param serString the serialized string which represents the {@link HoodieDynamicBoundedBloomFilter}
    * @param typeCode  type code of the bloom filter
    */
-  HoodieDynamicBoundedBloomFilter(String serString, BloomFilterTypeCode typeCode) {
+  public HoodieDynamicBoundedBloomFilter(String serString, BloomFilterTypeCode typeCode) {
     // ignoring the type code for now, since we have just one version
     byte[] bytes = Base64CodecUtil.decode(serString);
     DataInputStream dis = new DataInputStream(new ByteArrayInputStream(bytes));
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java
index 51791c945d589..86ff64177b73a 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java
@@ -83,20 +83,24 @@ public final class HoodieMetadataConfig extends HoodieConfig {
       .key(METADATA_PREFIX + ".keep.min.commits")
       .defaultValue(20)
       .sinceVersion("0.7.0")
-      .withDocumentation("Controls the archival of the metadata table’s timeline.");
+      .withDocumentation("Archiving service moves older entries from metadata table’s timeline "
+          + "into an archived log after each write, to keep the overhead constant, even as the "
+          + "metadata table size grows.  This config controls the minimum number of instants "
+          + "to retain in the active timeline.");
 
   public static final ConfigProperty<Integer> MAX_COMMITS_TO_KEEP = ConfigProperty
       .key(METADATA_PREFIX + ".keep.max.commits")
       .defaultValue(30)
       .sinceVersion("0.7.0")
-      .withDocumentation("Controls the archival of the metadata table’s timeline.");
+      .withDocumentation("Similar to " + MIN_COMMITS_TO_KEEP.key() + ", this config controls "
+          + "the maximum number of instants to retain in the active timeline.");
 
   // Cleaner commits retained
   public static final ConfigProperty<Integer> CLEANER_COMMITS_RETAINED = ConfigProperty
       .key(METADATA_PREFIX + ".cleaner.commits.retained")
       .defaultValue(3)
       .sinceVersion("0.7.0")
-      .withDocumentation("Controls retention/history for metadata table.");
+      .withDocumentation("Number of commits to retain, without cleaning, on metadata table.");
 
   // Regex to filter out matching directories during bootstrap
   public static final ConfigProperty<String> DIR_FILTER_REGEX = ConfigProperty
@@ -122,20 +126,62 @@ public final class HoodieMetadataConfig extends HoodieConfig {
       .key(METADATA_PREFIX + ".enable.full.scan.log.files")
       .defaultValue(true)
       .sinceVersion("0.10.0")
-      .withDocumentation("Enable full scanning of log files while reading log records. If disabled, hudi does look up of only interested entries.");
+      .withDocumentation("Enable full scanning of log files while reading log records. If disabled, Hudi does look up of only interested entries.");
+
+  public static final ConfigProperty<Boolean> ENABLE_METADATA_INDEX_BLOOM_FILTER = ConfigProperty
+      .key(METADATA_PREFIX + ".index.bloom.filter.enable")
+      .defaultValue(false)
+      .sinceVersion("0.11.0")
+      .withDocumentation("Enable indexing bloom filters of user data files under metadata table. When enabled, "
+          + "metadata table will have a partition to store the bloom filter index and will be "
+          + "used during the index lookups.");
+
+  public static final ConfigProperty<Integer> METADATA_INDEX_BLOOM_FILTER_FILE_GROUP_COUNT = ConfigProperty
+      .key(METADATA_PREFIX + ".index.bloom.filter.file.group.count")
+      .defaultValue(4)
+      .sinceVersion("0.11.0")
+      .withDocumentation("Metadata bloom filter index partition file group count. This controls the size of the base and "
+          + "log files and read parallelism in the bloom filter index partition. The recommendation is to size the "
+          + "file group count such that the base files are under 1GB.");
+
+  public static final ConfigProperty<Boolean> ENABLE_METADATA_INDEX_COLUMN_STATS = ConfigProperty
+      .key(METADATA_PREFIX + ".index.column.stats.enable")
+      .defaultValue(false)
+      .sinceVersion("0.11.0")
+      .withDocumentation("Enable indexing column ranges of user data files under metadata table key lookups. When "
+          + "enabled, metadata table will have a partition to store the column ranges and will be "
+          + "used for pruning files during the index lookups.");
+
+  public static final ConfigProperty<Integer> METADATA_INDEX_COLUMN_STATS_FILE_GROUP_COUNT = ConfigProperty
+      .key(METADATA_PREFIX + ".index.column.stats.file.group.count")
+      .defaultValue(2)
+      .sinceVersion("0.11.0")
+      .withDocumentation("Metadata column stats partition file group count. This controls the size of the base and "
+          + "log files and read parallelism in the column stats index partition. The recommendation is to size the "
+          + "file group count such that the base files are under 1GB.");
+
+  public static final ConfigProperty<Boolean> ENABLE_METADATA_INDEX_COLUMN_STATS_FOR_ALL_COLUMNS = ConfigProperty
+      .key(METADATA_PREFIX + ".index.column.stats.all_columns.enable")
+      .defaultValue(true)
+      .sinceVersion("0.11.0")
+      .withDocumentation("Enable indexing column ranges of user data files for all columns under "
+          + "metadata table key lookups. When enabled, metadata table will have a partition to "
+          + "store the column ranges and will be used for pruning files during the index lookups. "
+          + "Only applies if " + ENABLE_METADATA_INDEX_COLUMN_STATS.key() + " is enabled.");
 
   public static final ConfigProperty<Boolean> POPULATE_META_FIELDS = ConfigProperty
       .key(METADATA_PREFIX + ".populate.meta.fields")
-      .defaultValue(true)
+      .defaultValue(false)
       .sinceVersion("0.10.0")
       .withDocumentation("When enabled, populates all meta fields. When disabled, no meta fields are populated.");
 
   public static final ConfigProperty<Boolean> IGNORE_SPURIOUS_DELETES = ConfigProperty
       .key("_" + METADATA_PREFIX + ".ignore.spurious.deletes")
       .defaultValue(true)
-      .sinceVersion("0.10.10")
-      .withDocumentation("There are cases when extra files are requested to be deleted from metadata table which was never added before. This config"
-          + "determines how to handle such spurious deletes");
+      .sinceVersion("0.10.0")
+      .withDocumentation("There are cases when extra files are requested to be deleted from "
+          + "metadata table which are never added before. This config determines how to handle "
+          + "such spurious deletes");
 
   private HoodieMetadataConfig() {
     super();
@@ -157,6 +203,26 @@ public boolean enabled() {
     return getBoolean(ENABLE);
   }
 
+  public boolean isBloomFilterIndexEnabled() {
+    return getBooleanOrDefault(ENABLE_METADATA_INDEX_BLOOM_FILTER);
+  }
+
+  public boolean isColumnStatsIndexEnabled() {
+    return getBooleanOrDefault(ENABLE_METADATA_INDEX_COLUMN_STATS);
+  }
+
+  public boolean isMetadataColumnStatsIndexForAllColumnsEnabled() {
+    return getBooleanOrDefault(ENABLE_METADATA_INDEX_COLUMN_STATS_FOR_ALL_COLUMNS);
+  }
+
+  public int getBloomFilterIndexFileGroupCount() {
+    return getIntOrDefault(METADATA_INDEX_BLOOM_FILTER_FILE_GROUP_COUNT);
+  }
+
+  public int getColumnStatsIndexFileGroupCount() {
+    return getIntOrDefault(METADATA_INDEX_COLUMN_STATS_FILE_GROUP_COUNT);
+  }
+
   public boolean enableMetrics() {
     return getBoolean(METRICS_ENABLE);
   }
@@ -199,6 +265,31 @@ public Builder enable(boolean enable) {
       return this;
     }
 
+    public Builder withMetadataIndexBloomFilter(boolean enable) {
+      metadataConfig.setValue(ENABLE_METADATA_INDEX_BLOOM_FILTER, String.valueOf(enable));
+      return this;
+    }
+
+    public Builder withMetadataIndexBloomFilterFileGroups(int fileGroupCount) {
+      metadataConfig.setValue(METADATA_INDEX_BLOOM_FILTER_FILE_GROUP_COUNT, String.valueOf(fileGroupCount));
+      return this;
+    }
+
+    public Builder withMetadataIndexColumnStats(boolean enable) {
+      metadataConfig.setValue(ENABLE_METADATA_INDEX_COLUMN_STATS, String.valueOf(enable));
+      return this;
+    }
+
+    public Builder withMetadataIndexColumnStatsFileGroupCount(int fileGroupCount) {
+      metadataConfig.setValue(METADATA_INDEX_COLUMN_STATS_FILE_GROUP_COUNT, String.valueOf(fileGroupCount));
+      return this;
+    }
+
+    public Builder withMetadataIndexForAllColumns(boolean enable) {
+      metadataConfig.setValue(ENABLE_METADATA_INDEX_COLUMN_STATS_FOR_ALL_COLUMNS, String.valueOf(enable));
+      return this;
+    }
+
     public Builder enableMetrics(boolean enableMetrics) {
       metadataConfig.setValue(METRICS_ENABLE, String.valueOf(enableMetrics));
       return this;
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/TypedProperties.java b/hudi-common/src/main/java/org/apache/hudi/common/config/TypedProperties.java
index 49db9b23ae706..6639e88d56f3f 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/config/TypedProperties.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/config/TypedProperties.java
@@ -20,7 +20,12 @@
 
 import java.io.Serializable;
 import java.util.Arrays;
+import java.util.Collections;
+import java.util.Enumeration;
+import java.util.HashSet;
+import java.util.LinkedHashSet;
 import java.util.List;
+import java.util.Map;
 import java.util.Objects;
 import java.util.Properties;
 import java.util.Set;
@@ -31,6 +36,8 @@
  */
 public class TypedProperties extends Properties implements Serializable {
 
+  private final HashSet<Object> keys = new LinkedHashSet<>();
+
   public TypedProperties() {
     super(null);
   }
@@ -43,15 +50,60 @@ public TypedProperties(Properties defaults) {
     }
   }
 
-  private void checkKey(String property) {
-    if (!keyExists(property)) {
-      throw new IllegalArgumentException("Property " + property + " not found");
+  @Override
+  public Enumeration propertyNames() {
+    return Collections.enumeration(keys);
+  }
+
+  @Override
+  public synchronized Enumeration<Object> keys() {
+    return Collections.enumeration(keys);
+  }
+
+  @Override
+  public Set<String> stringPropertyNames() {
+    Set<String> set = new LinkedHashSet<>();
+    for (Object key : this.keys) {
+      if (key instanceof String) {
+        set.add((String) key);
+      }
+    }
+    return set;
+  }
+
+  public synchronized void putAll(Properties t) {
+    for (Map.Entry<?, ?> e : t.entrySet()) {
+      if (!containsKey(String.valueOf(e.getKey()))) {
+        keys.add(e.getKey());
+      }
+      super.put(e.getKey(), e.getValue());
+    }
+  }
+
+  @Override
+  public synchronized Object put(Object key, Object value) {
+    keys.remove(key);
+    keys.add(key);
+    return super.put(key, value);
+  }
+
+  public synchronized Object putIfAbsent(Object key, Object value) {
+    if (!containsKey(String.valueOf(key))) {
+      keys.add(key);
     }
+    return super.putIfAbsent(key, value);
   }
 
-  private boolean keyExists(String property) {
-    Set<String> keys = super.stringPropertyNames();
-    return keys.contains(property);
+  @Override
+  public Object remove(Object key) {
+    keys.remove(key);
+    return super.remove(key);
+  }
+
+  private void checkKey(String property) {
+    if (!containsKey(property)) {
+      throw new IllegalArgumentException("Property " + property + " not found");
+    }
   }
 
   public String getString(String property) {
@@ -60,11 +112,11 @@ public String getString(String property) {
   }
 
   public String getString(String property, String defaultValue) {
-    return keyExists(property) ? getProperty(property) : defaultValue;
+    return containsKey(property) ? getProperty(property) : defaultValue;
   }
 
   public List<String> getStringList(String property, String delimiter, List<String> defaultVal) {
-    if (!keyExists(property)) {
+    if (!containsKey(property)) {
       return defaultVal;
     }
     return Arrays.stream(getProperty(property).split(delimiter)).map(String::trim).collect(Collectors.toList());
@@ -76,7 +128,7 @@ public int getInteger(String property) {
   }
 
   public int getInteger(String property, int defaultValue) {
-    return keyExists(property) ? Integer.parseInt(getProperty(property)) : defaultValue;
+    return containsKey(property) ? Integer.parseInt(getProperty(property)) : defaultValue;
   }
 
   public long getLong(String property) {
@@ -85,7 +137,7 @@ public long getLong(String property) {
   }
 
   public long getLong(String property, long defaultValue) {
-    return keyExists(property) ? Long.parseLong(getProperty(property)) : defaultValue;
+    return containsKey(property) ? Long.parseLong(getProperty(property)) : defaultValue;
   }
 
   public boolean getBoolean(String property) {
@@ -94,7 +146,7 @@ public boolean getBoolean(String property) {
   }
 
   public boolean getBoolean(String property, boolean defaultValue) {
-    return keyExists(property) ? Boolean.parseBoolean(getProperty(property)) : defaultValue;
+    return containsKey(property) ? Boolean.parseBoolean(getProperty(property)) : defaultValue;
   }
 
   public double getDouble(String property) {
@@ -103,6 +155,6 @@ public double getDouble(String property) {
   }
 
   public double getDouble(String property, double defaultValue) {
-    return keyExists(property) ? Double.parseDouble(getProperty(property)) : defaultValue;
+    return containsKey(property) ? Double.parseDouble(getProperty(property)) : defaultValue;
   }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
index d7af8a7d46d8b..7c9b7cc806fa4 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
@@ -135,6 +135,17 @@ public static Path addSchemeIfLocalPath(String path) {
     return providedPath;
   }
 
+  /**
+   * Makes path qualified w/ {@link FileSystem}'s URI
+   *
+   * @param fs instance of {@link FileSystem} path belongs to
+   * @param path path to be qualified
+   * @return qualified path, prefixed w/ the URI of the target FS object provided
+   */
+  public static Path makeQualified(FileSystem fs, Path path) {
+    return path.makeQualified(fs.getUri(), fs.getWorkingDirectory());
+  }
+
   /**
    * A write token uniquely identifies an attempt at one of the IOHandle operations (Merge/Create/Append).
    */
@@ -484,24 +495,25 @@ public static FileStatus[] getAllDataFilesInPartition(FileSystem fs, Path partit
   }
 
   /**
-   * Get the latest log file written from the list of log files passed in.
+   * Get the latest log file for the passed in file-id in the partition path
    */
-  public static Option<HoodieLogFile> getLatestLogFile(Stream<HoodieLogFile> logFiles) {
-    return Option.fromJavaOptional(logFiles.min(HoodieLogFile.getReverseLogFileComparator()));
+  public static Option<HoodieLogFile> getLatestLogFile(FileSystem fs, Path partitionPath, String fileId,
+                                                       String logFileExtension, String baseCommitTime) throws IOException {
+    return getLatestLogFile(getAllLogFiles(fs, partitionPath, fileId, logFileExtension, baseCommitTime));
   }
 
   /**
-   * Get all the log files for the passed in FileId in the partition path.
+   * Get all the log files for the passed in file-id in the partition path.
    */
   public static Stream<HoodieLogFile> getAllLogFiles(FileSystem fs, Path partitionPath, final String fileId,
       final String logFileExtension, final String baseCommitTime) throws IOException {
     try {
-      return Arrays
-          .stream(fs.listStatus(partitionPath,
-              path -> path.getName().startsWith("." + fileId) && path.getName().contains(logFileExtension)))
-          .map(HoodieLogFile::new).filter(s -> s.getBaseCommitTime().equals(baseCommitTime));
+      PathFilter pathFilter = path -> path.getName().startsWith("." + fileId) && path.getName().contains(logFileExtension);
+      return Arrays.stream(fs.listStatus(partitionPath, pathFilter))
+          .map(HoodieLogFile::new)
+          .filter(s -> s.getBaseCommitTime().equals(baseCommitTime));
     } catch (FileNotFoundException e) {
-      return Stream.<HoodieLogFile>builder().build();
+      return Stream.of();
     }
   }
 
@@ -776,4 +788,8 @@ public static List<FileStatus> getFileStatusAtLevel(
 
   public interface SerializableFunction<T, R> extends Function<T, R>, Serializable {
   }
+
+  private static Option<HoodieLogFile> getLatestLogFile(Stream<HoodieLogFile> logFiles) {
+    return Option.fromJavaOptional(logFiles.min(HoodieLogFile.getReverseLogFileComparator()));
+  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/FileSystemRetryConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/fs/FileSystemRetryConfig.java
new file mode 100644
index 0000000000000..c7f99ece7e45d
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/common/fs/FileSystemRetryConfig.java
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.fs;
+
+import org.apache.hudi.common.config.ConfigClassProperty;
+import org.apache.hudi.common.config.ConfigGroups;
+import org.apache.hudi.common.config.ConfigProperty;
+import org.apache.hudi.common.config.HoodieConfig;
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Properties;
+
+/**
+ * The file system retry relevant config options.
+ */
+@ConfigClassProperty(name = "FileSystem Guard Configurations",
+        groupName = ConfigGroups.Names.WRITE_CLIENT,
+        description = "The filesystem retry related config options, to help deal with runtime exception like list/get/put/delete performance issues.")
+public class FileSystemRetryConfig  extends HoodieConfig {
+
+  public static final ConfigProperty<String> FILESYSTEM_RETRY_ENABLE = ConfigProperty
+      .key("hoodie.filesystem.operation.retry.enable")
+      .defaultValue("false")
+      .sinceVersion("0.11.0")
+      .withDocumentation("Enabled to handle list/get/delete etc file system performance issue.");
+
+  public static final ConfigProperty<Long> INITIAL_RETRY_INTERVAL_MS = ConfigProperty
+      .key("hoodie.filesystem.operation.retry.initial_interval_ms")
+      .defaultValue(100L)
+      .sinceVersion("0.11.0")
+      .withDocumentation("Amount of time (in ms) to wait, before retry to do operations on storage.");
+
+  public static final ConfigProperty<Long> MAX_RETRY_INTERVAL_MS = ConfigProperty
+      .key("hoodie.filesystem.operation.retry.max_interval_ms")
+      .defaultValue(2000L)
+      .sinceVersion("0.11.0")
+      .withDocumentation("Maximum amount of time (in ms), to wait for next retry.");
+
+  public static final ConfigProperty<Integer> MAX_RETRY_NUMBERS = ConfigProperty
+      .key("hoodie.filesystem.operation.retry.max_numbers")
+      .defaultValue(4)
+      .sinceVersion("0.11.0")
+      .withDocumentation("Maximum number of retry actions to perform, with exponential backoff.");
+
+  public static final ConfigProperty<String> RETRY_EXCEPTIONS = ConfigProperty
+      .key("hoodie.filesystem.operation.retry.exceptions")
+      .defaultValue("")
+      .sinceVersion("0.11.0")
+      .withDocumentation("The class name of the Exception that needs to be re-tryed, separated by commas. "
+          + "Default is empty which means retry all the IOException and RuntimeException from FileSystem");
+
+  private FileSystemRetryConfig() {
+    super();
+  }
+
+  public long getInitialRetryIntervalMs() {
+    return getLong(INITIAL_RETRY_INTERVAL_MS);
+  }
+
+  public long getMaxRetryIntervalMs() {
+    return getLong(MAX_RETRY_INTERVAL_MS);
+  }
+
+  public int getMaxRetryNumbers() {
+    return getInt(MAX_RETRY_NUMBERS);
+  }
+
+  public boolean isFileSystemActionRetryEnable() {
+    return Boolean.parseBoolean(getStringOrDefault(FILESYSTEM_RETRY_ENABLE));
+  }
+
+  public static FileSystemRetryConfig.Builder newBuilder() {
+    return new Builder();
+  }
+
+  public String getRetryExceptions() {
+    return getString(RETRY_EXCEPTIONS);
+  }
+
+  /**
+   * The builder used to build filesystem configurations.
+   */
+  public static class Builder {
+
+    private final FileSystemRetryConfig fileSystemRetryConfig = new FileSystemRetryConfig();
+
+    public Builder fromFile(File propertiesFile) throws IOException {
+      try (FileReader reader = new FileReader(propertiesFile)) {
+        fileSystemRetryConfig.getProps().load(reader);
+        return this;
+      }
+    }
+
+    public Builder fromProperties(Properties props) {
+      this.fileSystemRetryConfig.getProps().putAll(props);
+      return this;
+    }
+
+    public Builder withMaxRetryNumbers(int numbers) {
+      fileSystemRetryConfig.setValue(MAX_RETRY_NUMBERS, String.valueOf(numbers));
+      return this;
+    }
+
+    public Builder withInitialRetryIntervalMs(long intervalMs) {
+      fileSystemRetryConfig.setValue(INITIAL_RETRY_INTERVAL_MS, String.valueOf(intervalMs));
+      return this;
+    }
+
+    public Builder withMaxRetryIntervalMs(long intervalMs) {
+      fileSystemRetryConfig.setValue(MAX_RETRY_INTERVAL_MS, String.valueOf(intervalMs));
+      return this;
+    }
+
+    public Builder withFileSystemActionRetryEnabled(boolean enabled) {
+      fileSystemRetryConfig.setValue(FILESYSTEM_RETRY_ENABLE, String.valueOf(enabled));
+      return this;
+    }
+
+    public FileSystemRetryConfig build() {
+      fileSystemRetryConfig.setDefaults(FileSystemRetryConfig.class.getName());
+      return fileSystemRetryConfig;
+    }
+  }
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/HoodieRetryWrapperFileSystem.java b/hudi-common/src/main/java/org/apache/hudi/common/fs/HoodieRetryWrapperFileSystem.java
new file mode 100644
index 0000000000000..075f811a42ea7
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/common/fs/HoodieRetryWrapperFileSystem.java
@@ -0,0 +1,257 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.fs;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CreateFlag;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Options;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.util.Progressable;
+import org.apache.hudi.common.util.RetryHelper;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URI;
+import java.util.EnumSet;
+
+public class HoodieRetryWrapperFileSystem extends FileSystem {
+
+  private FileSystem fileSystem;
+  private long maxRetryIntervalMs;
+  private int maxRetryNumbers;
+  private long initialRetryIntervalMs;
+  private String retryExceptionsList;
+
+  public HoodieRetryWrapperFileSystem(FileSystem fs, long maxRetryIntervalMs, int maxRetryNumbers, long initialRetryIntervalMs, String retryExceptions) {
+    this.fileSystem = fs;
+    this.maxRetryIntervalMs = maxRetryIntervalMs;
+    this.maxRetryNumbers = maxRetryNumbers;
+    this.initialRetryIntervalMs = initialRetryIntervalMs;
+    this.retryExceptionsList = retryExceptions;
+
+  }
+
+  @Override
+  public URI getUri() {
+    return fileSystem.getUri();
+  }
+
+  @Override
+  public FSDataInputStream open(Path f, int bufferSize) throws IOException {
+    return (FSDataInputStream) new RetryHelper(maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptionsList).tryWith(() -> fileSystem.open(f, bufferSize)).start();
+  }
+
+  @Override
+  public FSDataInputStream open(Path f) throws IOException {
+    return (FSDataInputStream) new RetryHelper(maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptionsList).tryWith(() -> fileSystem.open(f)).start();
+  }
+
+  @Override
+  public FSDataOutputStream create(Path f,
+                                   FsPermission permission,
+                                   boolean overwrite,
+                                   int bufferSize,
+                                   short replication,
+                                   long blockSize,
+                                   Progressable progress) throws IOException {
+    return (FSDataOutputStream) new RetryHelper(maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptionsList)
+        .tryWith(() -> fileSystem.create(f, permission, overwrite, bufferSize, replication, blockSize, progress)).start();
+  }
+
+  @Override
+  public FSDataOutputStream create(Path f, boolean overwrite) throws IOException {
+    return (FSDataOutputStream) new RetryHelper(maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptionsList).tryWith(() -> fileSystem.create(f, overwrite)).start();
+  }
+
+  @Override
+  public FSDataOutputStream create(Path f) throws IOException {
+    return (FSDataOutputStream) new RetryHelper(maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptionsList).tryWith(() -> fileSystem.create(f)).start();
+  }
+
+  @Override
+  public FSDataOutputStream create(Path f, Progressable progress) throws IOException {
+    return (FSDataOutputStream) new RetryHelper(maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptionsList).tryWith(() -> fileSystem.create(f, progress)).start();
+  }
+
+  @Override
+  public FSDataOutputStream create(Path f, short replication) throws IOException {
+    return (FSDataOutputStream) new RetryHelper(maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptionsList).tryWith(() -> fileSystem.create(f, replication)).start();
+  }
+
+  @Override
+  public FSDataOutputStream create(Path f, short replication, Progressable progress) throws IOException {
+    return (FSDataOutputStream) new RetryHelper(maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptionsList).tryWith(() -> fileSystem.create(f, replication, progress)).start();
+  }
+
+  @Override
+  public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize) throws IOException {
+    return (FSDataOutputStream) new RetryHelper(maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptionsList)
+        .tryWith(() -> fileSystem.create(f, overwrite, bufferSize)).start();
+  }
+
+  @Override
+  public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize, Progressable progress)
+      throws IOException {
+    return (FSDataOutputStream) new RetryHelper(maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptionsList)
+        .tryWith(() -> fileSystem.create(f, overwrite, bufferSize, progress)).start();
+  }
+
+  @Override
+  public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize, short replication, long blockSize,
+                                   Progressable progress) throws IOException {
+    return (FSDataOutputStream) new RetryHelper(maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptionsList)
+        .tryWith(() -> fileSystem.create(f, overwrite, bufferSize, replication, blockSize, progress)).start();
+  }
+
+  @Override
+  public FSDataOutputStream create(Path f, FsPermission permission, EnumSet<CreateFlag> flags, int bufferSize,
+                                   short replication, long blockSize, Progressable progress) throws IOException {
+    return (FSDataOutputStream) new RetryHelper(maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptionsList)
+        .tryWith(() -> fileSystem.create(f, permission, flags, bufferSize, replication, blockSize, progress)).start();
+  }
+
+  @Override
+  public FSDataOutputStream create(Path f, FsPermission permission, EnumSet<CreateFlag> flags, int bufferSize,
+                                   short replication, long blockSize, Progressable progress, Options.ChecksumOpt checksumOpt) throws IOException {
+    return (FSDataOutputStream) new RetryHelper(maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptionsList)
+        .tryWith(() -> fileSystem.create(f, permission, flags, bufferSize, replication,
+                blockSize, progress, checksumOpt)).start();
+  }
+
+  @Override
+  public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize, short replication, long blockSize)
+      throws IOException {
+    return (FSDataOutputStream) new RetryHelper(maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptionsList)
+        .tryWith(() -> fileSystem.create(f, overwrite, bufferSize, replication, blockSize)).start();
+  }
+
+  @Override
+  public boolean createNewFile(Path f) throws IOException {
+    return (boolean) new RetryHelper(maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptionsList).tryWith(() -> fileSystem.createNewFile(f)).start();
+  }
+
+  @Override
+  public FSDataOutputStream append(Path f, int bufferSize, Progressable progress) throws IOException {
+    return (FSDataOutputStream) new RetryHelper(maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptionsList).tryWith(() -> fileSystem.append(f, bufferSize, progress)).start();
+  }
+
+  @Override
+  public FSDataOutputStream append(Path f) throws IOException {
+    return (FSDataOutputStream) new RetryHelper(maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptionsList).tryWith(() -> fileSystem.append(f)).start();
+  }
+
+  @Override
+  public FSDataOutputStream append(Path f, int bufferSize) throws IOException {
+    return (FSDataOutputStream) new RetryHelper(maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptionsList).tryWith(() -> fileSystem.append(f, bufferSize)).start();
+  }
+
+  @Override
+  public boolean rename(Path src, Path dst) throws IOException {
+    return (boolean) new RetryHelper(maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptionsList).tryWith(() -> fileSystem.rename(src, dst)).start();
+  }
+
+  @Override
+  public boolean delete(Path f, boolean recursive) throws IOException {
+    return (boolean) new RetryHelper(maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptionsList).tryWith(() -> fileSystem.delete(f, recursive)).start();
+  }
+
+  @Override
+  public boolean delete(Path f) throws IOException {
+    return (boolean) new RetryHelper(maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptionsList).tryWith(() -> fileSystem.delete(f, true)).start();
+  }
+
+  @Override
+  public FileStatus[] listStatus(Path f) throws FileNotFoundException, IOException {
+    return (FileStatus[]) new RetryHelper(maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptionsList).tryWith(() -> fileSystem.listStatus(f)).start();
+  }
+
+  @Override
+  public FileStatus[] listStatus(Path f, PathFilter filter) throws IOException {
+    return (FileStatus[]) new RetryHelper(maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptionsList).tryWith(() -> fileSystem.listStatus(f, filter)).start();
+  }
+
+  @Override
+  public FileStatus[] listStatus(Path[] files) throws IOException {
+    return (FileStatus[]) new RetryHelper(maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptionsList).tryWith(() -> fileSystem.listStatus(files)).start();
+  }
+
+  @Override
+  public FileStatus[] listStatus(Path[] files, PathFilter filter) throws IOException {
+    return (FileStatus[]) new RetryHelper(maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptionsList).tryWith(() -> fileSystem.listStatus(files, filter)).start();
+  }
+
+  @Override
+  public FileStatus[] globStatus(Path pathPattern) throws IOException {
+    return (FileStatus[]) new RetryHelper(maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptionsList).tryWith(() -> fileSystem.globStatus(pathPattern)).start();
+  }
+
+  @Override
+  public FileStatus[] globStatus(Path pathPattern, PathFilter filter) throws IOException {
+    return (FileStatus[]) new RetryHelper(maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptionsList).tryWith(() -> fileSystem.globStatus(pathPattern, filter)).start();
+  }
+
+  @Override
+  public RemoteIterator<LocatedFileStatus> listLocatedStatus(Path f) throws IOException {
+    return (RemoteIterator<LocatedFileStatus>) new RetryHelper(maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptionsList).tryWith(() -> fileSystem.listLocatedStatus(f)).start();
+  }
+
+  @Override
+  public RemoteIterator<LocatedFileStatus> listFiles(Path f, boolean recursive) throws IOException {
+    return (RemoteIterator<LocatedFileStatus>) new RetryHelper(maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptionsList)
+        .tryWith(() -> fileSystem.listFiles(f, recursive)).start();
+  }
+
+  @Override
+  public void setWorkingDirectory(Path newDir) {
+    fileSystem.setWorkingDirectory(newDir);
+  }
+
+  @Override
+  public Path getWorkingDirectory() {
+    return fileSystem.getWorkingDirectory();
+  }
+
+  @Override
+  public boolean mkdirs(Path f, FsPermission permission) throws IOException {
+    return (boolean) new RetryHelper(maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptionsList).tryWith(() -> fileSystem.mkdirs(f, permission)).start();
+  }
+
+  @Override
+  public FileStatus getFileStatus(Path f) throws IOException {
+    return (FileStatus) new RetryHelper(maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptionsList).tryWith(() -> fileSystem.getFileStatus(f)).start();
+  }
+
+  @Override
+  public boolean exists(Path f) throws IOException {
+    return (boolean) new RetryHelper(maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, retryExceptionsList).tryWith(() -> fileSystem.exists(f)).start();
+  }
+
+  @Override
+  public Configuration getConf() {
+    return fileSystem.getConf();
+  }
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/HoodieWrapperFileSystem.java b/hudi-common/src/main/java/org/apache/hudi/common/fs/HoodieWrapperFileSystem.java
index 8521fd8205808..4bbd94384420d 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/fs/HoodieWrapperFileSystem.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/fs/HoodieWrapperFileSystem.java
@@ -136,7 +136,7 @@ public static Path convertToHoodiePath(Path file, Configuration conf) {
     }
   }
 
-  private static Path convertPathWithScheme(Path oldPath, String newScheme) {
+  public static Path convertPathWithScheme(Path oldPath, String newScheme) {
     URI oldURI = oldPath.toUri();
     URI newURI;
     try {
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/inline/InLineFSUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/fs/inline/InLineFSUtils.java
index a2c60bc318e4b..080f228f161e9 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/fs/inline/InLineFSUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/fs/inline/InLineFSUtils.java
@@ -19,10 +19,11 @@
 package org.apache.hudi.common.fs.inline;
 
 import org.apache.hadoop.fs.Path;
-import org.apache.hudi.common.util.ValidationUtils;
 
 import java.io.File;
 
+import static org.apache.hudi.common.util.ValidationUtils.checkArgument;
+
 /**
  * Utils to parse InLineFileSystem paths.
  * Inline FS format:
@@ -61,10 +62,10 @@ public static Path getInlineFilePath(Path outerPath, String origScheme, long inL
 
   /**
    * InlineFS Path format:
-   * "inlinefs://path/to/outer/file/outer_file_schema/?start_offset=start_offset>&length=<length>"
+   * "inlinefs://path/to/outer/file/outer_file_scheme/?start_offset=start_offset>&length=<length>"
    * <p>
    * Outer File Path format:
-   * "outer_file_schema://path/to/outer/file"
+   * "outer_file_scheme://path/to/outer/file"
    * <p>
    * Example
    * Input: "inlinefs://file1/s3a/?start_offset=20&length=40".
@@ -74,40 +75,48 @@ public static Path getInlineFilePath(Path outerPath, String origScheme, long inL
    * @return Outer file Path from the InLineFS Path
    */
   public static Path getOuterFilePathFromInlinePath(Path inlineFSPath) {
-    final String scheme = inlineFSPath.getParent().getName();
+    assertInlineFSPath(inlineFSPath);
+
+    final String outerFileScheme = inlineFSPath.getParent().getName();
     final Path basePath = inlineFSPath.getParent().getParent();
-    ValidationUtils.checkArgument(basePath.toString().contains(SCHEME_SEPARATOR),
-        "Invalid InLineFSPath: " + inlineFSPath);
+    checkArgument(basePath.toString().contains(SCHEME_SEPARATOR),
+        "Invalid InLineFS path: " + inlineFSPath);
 
     final String pathExceptScheme = basePath.toString().substring(basePath.toString().indexOf(SCHEME_SEPARATOR) + 1);
-    final String fullPath = scheme + SCHEME_SEPARATOR
-        + (scheme.equals(LOCAL_FILESYSTEM_SCHEME) ? PATH_SEPARATOR : "")
+    final String fullPath = outerFileScheme + SCHEME_SEPARATOR
+        + (outerFileScheme.equals(LOCAL_FILESYSTEM_SCHEME) ? PATH_SEPARATOR : "")
         + pathExceptScheme;
     return new Path(fullPath);
   }
 
   /**
-   * Eg input : "inlinefs://file1/s3a/?start_offset=20&length=40".
-   * output: 20
+   * Returns start offset w/in the base for the block identified by the given InlineFS path
    *
-   * @param inlinePath
-   * @return
+   * input: "inlinefs://file1/s3a/?start_offset=20&length=40".
+   * output: 20
    */
-  public static int startOffset(Path inlinePath) {
-    String[] slices = inlinePath.toString().split("[?&=]");
+  public static int startOffset(Path inlineFSPath) {
+    assertInlineFSPath(inlineFSPath);
+
+    String[] slices = inlineFSPath.toString().split("[?&=]");
     return Integer.parseInt(slices[slices.length - 3]);
   }
 
   /**
-   * Eg input : "inlinefs:/file1/s3a/?start_offset=20&length=40".
-   * Output: 40
+   * Returns length of the block (embedded w/in the base file) identified by the given InlineFS path
    *
-   * @param inlinePath
-   * @return
+   * input: "inlinefs:/file1/s3a/?start_offset=20&length=40".
+   * output: 40
    */
   public static int length(Path inlinePath) {
+    assertInlineFSPath(inlinePath);
+
     String[] slices = inlinePath.toString().split("[?&=]");
     return Integer.parseInt(slices[slices.length - 1]);
   }
 
+  private static void assertInlineFSPath(Path inlinePath) {
+    String scheme = inlinePath.toUri().getScheme();
+    checkArgument(InLineFileSystem.SCHEME.equals(scheme));
+  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/fs/inline/InLineFileSystem.java b/hudi-common/src/main/java/org/apache/hudi/common/fs/inline/InLineFileSystem.java
index 712b6c7ff4e32..1b2ea3cbedcf5 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/fs/inline/InLineFileSystem.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/fs/inline/InLineFileSystem.java
@@ -57,6 +57,7 @@ public URI getUri() {
     return URI.create(getScheme());
   }
 
+  @Override
   public String getScheme() {
     return SCHEME;
   }
@@ -129,5 +130,4 @@ public Path getWorkingDirectory() {
   public boolean mkdirs(Path path, FsPermission fsPermission) throws IOException {
     throw new UnsupportedOperationException("Can't set working directory");
   }
-
 }
\ No newline at end of file
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/DefaultHoodieRecordPayload.java b/hudi-common/src/main/java/org/apache/hudi/common/model/DefaultHoodieRecordPayload.java
index 3e7971b1b26f1..5e4b445dfc85e 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/model/DefaultHoodieRecordPayload.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/DefaultHoodieRecordPayload.java
@@ -18,6 +18,7 @@
 
 package org.apache.hudi.common.model;
 
+import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
 
@@ -30,9 +31,6 @@
 import java.util.Map;
 import java.util.Properties;
 
-import static org.apache.hudi.avro.HoodieAvroUtils.bytesToAvro;
-import static org.apache.hudi.avro.HoodieAvroUtils.getNestedFieldVal;
-
 /**
  * {@link HoodieRecordPayload} impl that honors ordering field in both preCombine and combineAndGetUpdateValue.
  * <p>
@@ -57,7 +55,7 @@ public Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue
       return Option.empty();
     }
 
-    GenericRecord incomingRecord = bytesToAvro(recordBytes, schema);
+    GenericRecord incomingRecord = HoodieAvroUtils.bytesToAvro(recordBytes, schema);
 
     // Null check is needed here to support schema evolution. The record in storage may be from old schema where
     // the new ordering column might not be present and hence returns null.
@@ -81,7 +79,7 @@ public Option<IndexedRecord> getInsertValue(Schema schema, Properties properties
     if (recordBytes.length == 0) {
       return Option.empty();
     }
-    GenericRecord incomingRecord = bytesToAvro(recordBytes, schema);
+    GenericRecord incomingRecord = HoodieAvroUtils.bytesToAvro(recordBytes, schema);
     eventTime = updateEventTime(incomingRecord, properties);
 
     return isDeleteRecord(incomingRecord) ? Option.empty() : Option.of(incomingRecord);
@@ -91,7 +89,13 @@ private static Option<Object> updateEventTime(GenericRecord record, Properties p
     boolean consistentLogicalTimestampEnabled = Boolean.parseBoolean(properties.getProperty(
         KeyGeneratorOptions.KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED.key(),
         KeyGeneratorOptions.KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED.defaultValue()));
-    return Option.ofNullable(getNestedFieldVal(record, properties.getProperty(HoodiePayloadProps.PAYLOAD_EVENT_TIME_FIELD_PROP_KEY), true, consistentLogicalTimestampEnabled));
+    return Option.ofNullable(
+        HoodieAvroUtils.getNestedFieldVal(
+            record,
+            properties.getProperty(HoodiePayloadProps.PAYLOAD_EVENT_TIME_FIELD_PROP_KEY),
+            true,
+            consistentLogicalTimestampEnabled)
+    );
   }
 
   @Override
@@ -117,10 +121,13 @@ protected boolean needUpdatingPersistedRecord(IndexedRecord currentValue,
     boolean consistentLogicalTimestampEnabled = Boolean.parseBoolean(properties.getProperty(
         KeyGeneratorOptions.KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED.key(),
         KeyGeneratorOptions.KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED.defaultValue()));
-    Object persistedOrderingVal = getNestedFieldVal((GenericRecord) currentValue,
-        properties.getProperty(HoodiePayloadProps.PAYLOAD_ORDERING_FIELD_PROP_KEY), true, consistentLogicalTimestampEnabled);
-    Comparable incomingOrderingVal = (Comparable) getNestedFieldVal((GenericRecord) incomingRecord,
-        properties.getProperty(HoodiePayloadProps.PAYLOAD_ORDERING_FIELD_PROP_KEY), true, consistentLogicalTimestampEnabled);
+    Object persistedOrderingVal = HoodieAvroUtils.getNestedFieldVal((GenericRecord) currentValue,
+        properties.getProperty(HoodiePayloadProps.PAYLOAD_ORDERING_FIELD_PROP_KEY),
+        true, consistentLogicalTimestampEnabled);
+    Comparable incomingOrderingVal = (Comparable) HoodieAvroUtils.getNestedFieldVal((GenericRecord) incomingRecord,
+        properties.getProperty(HoodiePayloadProps.PAYLOAD_ORDERING_FIELD_PROP_KEY),
+        true, consistentLogicalTimestampEnabled);
     return persistedOrderingVal == null || ((Comparable) persistedOrderingVal).compareTo(incomingOrderingVal) <= 0;
   }
+
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieAvroRecord.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieAvroRecord.java
new file mode 100644
index 0000000000000..9a9bbb2b7427f
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieAvroRecord.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.common.model;
+
+public class HoodieAvroRecord<T extends HoodieRecordPayload> extends HoodieRecord<T> {
+  public HoodieAvroRecord(HoodieKey key, T data) {
+    super(key, data);
+  }
+
+  public HoodieAvroRecord(HoodieKey key, T data, HoodieOperation operation) {
+    super(key, data, operation);
+  }
+
+  public HoodieAvroRecord(HoodieRecord<T> record) {
+    super(record);
+  }
+
+  public HoodieAvroRecord() {
+  }
+
+  @Override
+  public HoodieRecord<T> newInstance() {
+    return new HoodieAvroRecord<>(this);
+  }
+
+  @Override
+  public T getData() {
+    if (data == null) {
+      throw new IllegalStateException("Payload already deflated for record.");
+    }
+    return data;
+  }
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieCleaningPolicy.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieCleaningPolicy.java
index 647232fb7e3a9..58b9f7475a35f 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieCleaningPolicy.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieCleaningPolicy.java
@@ -22,5 +22,5 @@
  * Hoodie cleaning policies.
  */
 public enum HoodieCleaningPolicy {
-  KEEP_LATEST_FILE_VERSIONS, KEEP_LATEST_COMMITS;
+  KEEP_LATEST_FILE_VERSIONS, KEEP_LATEST_COMMITS, KEEP_LATEST_BY_HOURS;
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieColumnRangeMetadata.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieColumnRangeMetadata.java
index ca977ae53b5f9..acf5b2298987a 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieColumnRangeMetadata.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieColumnRangeMetadata.java
@@ -18,8 +18,6 @@
 
 package org.apache.hudi.common.model;
 
-import org.apache.parquet.schema.PrimitiveStringifier;
-
 import java.util.Objects;
 
 /**
@@ -30,16 +28,21 @@ public class HoodieColumnRangeMetadata<T> {
   private final String columnName;
   private final T minValue;
   private final T maxValue;
-  private final long numNulls;
-  private final PrimitiveStringifier stringifier;
+  private final long nullCount;
+  private final long valueCount;
+  private final long totalSize;
+  private final long totalUncompressedSize;
 
-  public HoodieColumnRangeMetadata(final String filePath, final String columnName, final T minValue, final T maxValue, final long numNulls, final PrimitiveStringifier stringifier) {
+  public HoodieColumnRangeMetadata(final String filePath, final String columnName, final T minValue, final T maxValue,
+                                   final long nullCount, long valueCount, long totalSize, long totalUncompressedSize) {
     this.filePath = filePath;
     this.columnName = columnName;
     this.minValue = minValue;
     this.maxValue = maxValue;
-    this.numNulls = numNulls;
-    this.stringifier = stringifier;
+    this.nullCount = nullCount;
+    this.valueCount = valueCount;
+    this.totalSize = totalSize;
+    this.totalUncompressedSize = totalUncompressedSize;
   }
 
   public String getFilePath() {
@@ -58,12 +61,20 @@ public T getMaxValue() {
     return this.maxValue;
   }
 
-  public PrimitiveStringifier getStringifier() {
-    return stringifier;
+  public long getNullCount() {
+    return nullCount;
+  }
+
+  public long getValueCount() {
+    return valueCount;
+  }
+
+  public long getTotalSize() {
+    return totalSize;
   }
 
-  public long getNumNulls() {
-    return numNulls;
+  public long getTotalUncompressedSize() {
+    return totalUncompressedSize;
   }
 
   @Override
@@ -79,21 +90,28 @@ public boolean equals(final Object o) {
         && Objects.equals(getColumnName(), that.getColumnName())
         && Objects.equals(getMinValue(), that.getMinValue())
         && Objects.equals(getMaxValue(), that.getMaxValue())
-        && Objects.equals(getNumNulls(), that.getNumNulls());
+        && Objects.equals(getNullCount(), that.getNullCount())
+        && Objects.equals(getValueCount(), that.getValueCount())
+        && Objects.equals(getTotalSize(), that.getTotalSize())
+        && Objects.equals(getTotalUncompressedSize(), that.getTotalUncompressedSize());
   }
 
   @Override
   public int hashCode() {
-    return Objects.hash(getColumnName(), getMinValue(), getMaxValue(), getNumNulls());
+    return Objects.hash(getColumnName(), getMinValue(), getMaxValue(), getNullCount());
   }
 
   @Override
   public String toString() {
     return "HoodieColumnRangeMetadata{"
         + "filePath ='" + filePath + '\''
-        + "columnName='" + columnName + '\''
+        + ", columnName='" + columnName + '\''
         + ", minValue=" + minValue
         + ", maxValue=" + maxValue
-        + ", numNulls=" + numNulls + '}';
+        + ", nullCount=" + nullCount
+        + ", valueCount=" + valueCount
+        + ", totalSize=" + totalSize
+        + ", totalUncompressedSize=" + totalUncompressedSize
+        + '}';
   }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieCommitMetadata.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieCommitMetadata.java
index c1e8cbf08b11c..c57965d727210 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieCommitMetadata.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieCommitMetadata.java
@@ -18,17 +18,17 @@
 
 package org.apache.hudi.common.model;
 
-import org.apache.hudi.common.fs.FSUtils;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.common.util.collection.Pair;
-
 import com.fasterxml.jackson.annotation.JsonAutoDetect;
 import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
 import com.fasterxml.jackson.annotation.PropertyAccessor;
 import com.fasterxml.jackson.databind.DeserializationFeature;
 import com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.collection.Pair;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
@@ -36,10 +36,12 @@
 import java.io.Serializable;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.stream.Collectors;
 
 /**
  * All the metadata that gets stored along with a commit.
@@ -90,6 +92,10 @@ public Map<String, List<HoodieWriteStat>> getPartitionToWriteStats() {
     return partitionToWriteStats;
   }
 
+  public List<HoodieWriteStat> getWriteStats() {
+    return partitionToWriteStats.values().stream().flatMap(Collection::stream).collect(Collectors.toList());
+  }
+
   public String getMetadata(String metaKey) {
     return extraMetadata.get(metaKey);
   }
@@ -148,10 +154,12 @@ public Map<HoodieFileGroupId, String> getFileGroupIdAndFullPaths(String basePath
    * been touched multiple times in the given commits, the return value will keep the one
    * from the latest commit.
    *
+   *
+   * @param hadoopConf
    * @param basePath The base path
    * @return the file full path to file status mapping
    */
-  public Map<String, FileStatus> getFullPathToFileStatus(String basePath) {
+  public Map<String, FileStatus> getFullPathToFileStatus(Configuration hadoopConf, String basePath) {
     Map<String, FileStatus> fullPathToFileStatus = new HashMap<>();
     for (List<HoodieWriteStat> stats : getPartitionToWriteStats().values()) {
       // Iterate through all the written files.
@@ -159,7 +167,8 @@ public Map<String, FileStatus> getFullPathToFileStatus(String basePath) {
         String relativeFilePath = stat.getPath();
         Path fullPath = relativeFilePath != null ? FSUtils.getPartitionPath(basePath, relativeFilePath) : null;
         if (fullPath != null) {
-          FileStatus fileStatus = new FileStatus(stat.getFileSizeInBytes(), false, 0, 0,
+          long blockSize = FSUtils.getFs(fullPath.toString(), hadoopConf).getDefaultBlockSize(fullPath);
+          FileStatus fileStatus = new FileStatus(stat.getFileSizeInBytes(), false, 0, blockSize,
               0, fullPath);
           fullPathToFileStatus.put(fullPath.getName(), fileStatus);
         }
@@ -173,14 +182,16 @@ public Map<String, FileStatus> getFullPathToFileStatus(String basePath) {
    * been touched multiple times in the given commits, the return value will keep the one
    * from the latest commit by file group ID.
    *
-   * <p>Note: different with {@link #getFullPathToFileStatus(String)},
+   * <p>Note: different with {@link #getFullPathToFileStatus(Configuration, String)},
    * only the latest commit file for a file group is returned,
    * this is an optimization for COPY_ON_WRITE table to eliminate legacy files for filesystem view.
    *
+   *
+   * @param hadoopConf
    * @param basePath The base path
    * @return the file ID to file status mapping
    */
-  public Map<String, FileStatus> getFileIdToFileStatus(String basePath) {
+  public Map<String, FileStatus> getFileIdToFileStatus(Configuration hadoopConf, String basePath) {
     Map<String, FileStatus> fileIdToFileStatus = new HashMap<>();
     for (List<HoodieWriteStat> stats : getPartitionToWriteStats().values()) {
       // Iterate through all the written files.
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieLogFile.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieLogFile.java
index 2515659c7b5fd..5b5a6432e633c 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieLogFile.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieLogFile.java
@@ -18,11 +18,10 @@
 
 package org.apache.hudi.common.model;
 
-import org.apache.hudi.common.fs.FSUtils;
-
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hudi.common.fs.FSUtils;
 
 import java.io.IOException;
 import java.io.Serializable;
@@ -60,7 +59,7 @@ public HoodieLogFile(FileStatus fileStatus) {
   public HoodieLogFile(Path logPath) {
     this.fileStatus = null;
     this.pathStr = logPath.toString();
-    this.fileLen = 0;
+    this.fileLen = -1;
   }
 
   public HoodieLogFile(Path logPath, Long fileLen) {
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecord.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecord.java
index 17427781eabb3..ac30766dd2f03 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecord.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecord.java
@@ -18,21 +18,21 @@
 
 package org.apache.hudi.common.model;
 
-import java.util.Map;
-import java.util.stream.Collectors;
-import java.util.stream.IntStream;
 import org.apache.hudi.common.util.CollectionUtils;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.collection.Pair;
 
 import java.io.Serializable;
 import java.util.List;
+import java.util.Map;
 import java.util.Objects;
-import org.apache.hudi.common.util.collection.Pair;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
 
 /**
  * A Single Record managed by Hoodie.
  */
-public class HoodieRecord<T extends HoodieRecordPayload> implements Serializable {
+public abstract class HoodieRecord<T> implements Serializable {
 
   public static final String COMMIT_TIME_METADATA_FIELD = "_hoodie_commit_time";
   public static final String COMMIT_SEQNO_METADATA_FIELD = "_hoodie_commit_seqno";
@@ -40,6 +40,7 @@ public class HoodieRecord<T extends HoodieRecordPayload> implements Serializable
   public static final String PARTITION_PATH_METADATA_FIELD = "_hoodie_partition_path";
   public static final String FILENAME_METADATA_FIELD = "_hoodie_file_name";
   public static final String OPERATION_METADATA_FIELD = "_hoodie_operation";
+  public static final String HOODIE_IS_DELETED = "_hoodie_is_deleted";
 
   public static final List<String> HOODIE_META_COLUMNS =
       CollectionUtils.createImmutableList(COMMIT_TIME_METADATA_FIELD, COMMIT_SEQNO_METADATA_FIELD,
@@ -64,7 +65,7 @@ public class HoodieRecord<T extends HoodieRecordPayload> implements Serializable
   /**
    * Actual payload of the record.
    */
-  private T data;
+  protected T data;
 
   /**
    * Current location of record on storage. Filled in by looking up index
@@ -110,6 +111,8 @@ public HoodieRecord(HoodieRecord<T> record) {
   public HoodieRecord() {
   }
 
+  public abstract HoodieRecord<T> newInstance();
+
   public HoodieKey getKey() {
     return key;
   }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieTableQueryType.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieTableQueryType.java
index 15449b32959e3..f1d7557ae22f8 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieTableQueryType.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieTableQueryType.java
@@ -30,7 +30,7 @@
  * </ol>
  */
 public enum HoodieTableQueryType {
-  QUERY_TYPE_SNAPSHOT,
-  QUERY_TYPE_INCREMENTAL,
-  QUERY_TYPE_READ_OPTIMIZED
+  SNAPSHOT,
+  INCREMENTAL,
+  READ_OPTIMIZED
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/OverwriteWithLatestAvroPayload.java b/hudi-common/src/main/java/org/apache/hudi/common/model/OverwriteWithLatestAvroPayload.java
index 4be2e3e093e90..7b7bd6c6b2e5e 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/model/OverwriteWithLatestAvroPayload.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/OverwriteWithLatestAvroPayload.java
@@ -85,7 +85,7 @@ public Option<IndexedRecord> getInsertValue(Schema schema) throws IOException {
    * @returns {@code true} if record represents a delete record. {@code false} otherwise.
    */
   protected boolean isDeleteRecord(GenericRecord genericRecord) {
-    final String isDeleteKey = "_hoodie_is_deleted";
+    final String isDeleteKey = HoodieRecord.HOODIE_IS_DELETED;
     // Modify to be compatible with new version Avro.
     // The new version Avro throws for GenericRecord.get if the field name
     // does not exist in the schema.
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/TableServiceType.java b/hudi-common/src/main/java/org/apache/hudi/common/model/TableServiceType.java
index 90444a3d61aa2..69dd30782ff77 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/model/TableServiceType.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/TableServiceType.java
@@ -24,10 +24,13 @@
  * Supported runtime table services.
  */
 public enum TableServiceType {
-  COMPACT, CLUSTER, CLEAN;
+  ARCHIVE, COMPACT, CLUSTER, CLEAN;
 
   public String getAction() {
     switch (this) {
+      case ARCHIVE:
+        // for table service type completeness; there is no timeline action associated with archive
+        return "NONE";
       case COMPACT:
         return HoodieTimeline.COMPACTION_ACTION;
       case CLEAN:
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/debezium/PostgresDebeziumAvroPayload.java b/hudi-common/src/main/java/org/apache/hudi/common/model/debezium/PostgresDebeziumAvroPayload.java
index 448627d97cbf7..d4be1899a1c96 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/model/debezium/PostgresDebeziumAvroPayload.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/model/debezium/PostgresDebeziumAvroPayload.java
@@ -31,6 +31,7 @@
 import java.nio.ByteBuffer;
 import java.nio.charset.StandardCharsets;
 import java.util.List;
+import java.util.Properties;
 
 /**
  * Provides support for seamlessly applying changes captured via Debezium for PostgresDB.
@@ -71,6 +72,19 @@ protected boolean shouldPickCurrentRecord(IndexedRecord currentRecord, IndexedRe
     return insertSourceLSN < currentSourceLSN;
   }
 
+  @Override
+  public Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema, Properties properties) throws IOException {
+    // Specific to Postgres: If the updated record has TOASTED columns,
+    // we will need to keep the previous value for those columns
+    // see https://debezium.io/documentation/reference/connectors/postgresql.html#postgresql-toasted-values
+    Option<IndexedRecord> insertOrDeleteRecord = super.combineAndGetUpdateValue(currentValue, schema, properties);
+
+    if (insertOrDeleteRecord.isPresent()) {
+      mergeToastedValuesIfPresent(insertOrDeleteRecord.get(), currentValue);
+    }
+    return insertOrDeleteRecord;
+  }
+
   @Override
   public Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema) throws IOException {
     // Specific to Postgres: If the updated record has TOASTED columns,
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java
index 624c02726c528..dc010366cd3b5 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java
@@ -18,12 +18,20 @@
 
 package org.apache.hudi.common.table;
 
+import org.apache.avro.Schema;
+
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
 import org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex;
 import org.apache.hudi.common.bootstrap.index.NoOpBootstrapIndex;
 import org.apache.hudi.common.config.ConfigClassProperty;
 import org.apache.hudi.common.config.ConfigGroups;
 import org.apache.hudi.common.config.ConfigProperty;
 import org.apache.hudi.common.config.HoodieConfig;
+import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.model.HoodieFileFormat;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieTableType;
@@ -31,29 +39,29 @@
 import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload;
 import org.apache.hudi.common.table.timeline.HoodieInstantTimeGenerator;
 import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
+import org.apache.hudi.common.util.BinaryUtil;
 import org.apache.hudi.common.util.FileIOUtils;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
+import org.apache.hudi.keygen.constant.KeyGeneratorOptions.Config;
 
-import org.apache.avro.Schema;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Date;
+import java.util.List;
 import java.util.Map;
 import java.util.Properties;
 import java.util.Set;
 import java.util.function.BiConsumer;
 import java.util.stream.Collectors;
 
+import static java.nio.charset.StandardCharsets.UTF_8;
+
 /**
  * Configurations on the Hoodie Table like type of ingestion, storage formats, hive table name etc Configurations are loaded from hoodie.properties, these properties are usually set during
  * initializing a path as hoodie base path and never changes during the lifetime of a hoodie table.
@@ -183,8 +191,24 @@ public class HoodieTableConfig extends HoodieConfig {
   public static final ConfigProperty<String> URL_ENCODE_PARTITIONING = KeyGeneratorOptions.URL_ENCODE_PARTITIONING;
   public static final ConfigProperty<String> HIVE_STYLE_PARTITIONING_ENABLE = KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_ENABLE;
 
+  public static final List<String> PERSISTED_CONFIG_LIST = Arrays.asList(
+      Config.DATE_TIME_PARSER_PROP,
+      Config.INPUT_TIME_UNIT, Config.TIMESTAMP_INPUT_DATE_FORMAT_LIST_DELIMITER_REGEX_PROP,
+      Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP, Config.TIMESTAMP_INPUT_TIMEZONE_FORMAT_PROP,
+      Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP, Config.TIMESTAMP_OUTPUT_TIMEZONE_FORMAT_PROP,
+      Config.TIMESTAMP_TIMEZONE_FORMAT_PROP, Config.DATE_TIME_PARSER_PROP
+  );
+
   public static final String NO_OP_BOOTSTRAP_INDEX_CLASS = NoOpBootstrapIndex.class.getName();
 
+  public static final ConfigProperty<String> TABLE_CHECKSUM = ConfigProperty
+      .key("hoodie.table.checksum")
+      .noDefaultValue()
+      .sinceVersion("0.11.0")
+      .withDocumentation("Table checksum is used to guard against partial writes in HDFS. It is added as the last entry in hoodie.properties and then used to validate while reading table config.");
+
+  private static final String TABLE_CHECKSUM_FORMAT = "%s.%s"; // <database_name>.<table_name>
+
   public HoodieTableConfig(FileSystem fs, String metaPath, String payloadClassName) {
     super();
     Path propertyPath = new Path(metaPath, HOODIE_PROPERTIES_FILE);
@@ -196,6 +220,9 @@ public HoodieTableConfig(FileSystem fs, String metaPath, String payloadClassName
         setValue(PAYLOAD_CLASS_NAME, payloadClassName);
         // FIXME(vc): wonder if this can be removed. Need to look into history.
         try (FSDataOutputStream outputStream = fs.create(propertyPath)) {
+          if (!isValidChecksum()) {
+            setValue(TABLE_CHECKSUM, String.valueOf(generateChecksum(props)));
+          }
           props.store(outputStream, "Properties saved on " + new Date(System.currentTimeMillis()));
         }
       }
@@ -206,6 +233,10 @@ public HoodieTableConfig(FileSystem fs, String metaPath, String payloadClassName
         "hoodie.properties file seems invalid. Please check for left over `.updated` files if any, manually copy it to hoodie.properties and retry");
   }
 
+  private boolean isValidChecksum() {
+    return contains(TABLE_CHECKSUM) && validateChecksum(props);
+  }
+
   /**
    * For serializing and de-serializing.
    */
@@ -215,13 +246,20 @@ public HoodieTableConfig() {
 
   private void fetchConfigs(FileSystem fs, String metaPath) throws IOException {
     Path cfgPath = new Path(metaPath, HOODIE_PROPERTIES_FILE);
+    Path backupCfgPath = new Path(metaPath, HOODIE_PROPERTIES_FILE_BACKUP);
     try (FSDataInputStream is = fs.open(cfgPath)) {
       props.load(is);
+      // validate checksum for latest table version
+      if (getTableVersion().versionCode() >= HoodieTableVersion.FOUR.versionCode() && !isValidChecksum()) {
+        LOG.warn("Checksum validation failed. Falling back to backed up configs.");
+        try (FSDataInputStream fsDataInputStream = fs.open(backupCfgPath)) {
+          props.load(fsDataInputStream);
+        }
+      }
     } catch (IOException ioe) {
       if (!fs.exists(cfgPath)) {
         LOG.warn("Run `table recover-configs` if config update/delete failed midway. Falling back to backed up configs.");
         // try the backup. this way no query ever fails if update fails midway.
-        Path backupCfgPath = new Path(metaPath, HOODIE_PROPERTIES_FILE_BACKUP);
         try (FSDataInputStream is = fs.open(backupCfgPath)) {
           props.load(is);
         }
@@ -272,15 +310,31 @@ private static void modify(FileSystem fs, Path metadataFolder, Properties modify
       /// 2. delete the properties file, reads will go to the backup, until we are done.
       fs.delete(cfgPath, false);
       // 3. read current props, upsert and save back.
+      String checksum;
       try (FSDataInputStream in = fs.open(backupCfgPath);
            FSDataOutputStream out = fs.create(cfgPath, true)) {
-        Properties props = new Properties();
+        Properties props = new TypedProperties();
         props.load(in);
         modifyFn.accept(props, modifyProps);
+        if (props.containsKey(TABLE_CHECKSUM.key()) && validateChecksum(props)) {
+          checksum = props.getProperty(TABLE_CHECKSUM.key());
+        } else {
+          checksum = String.valueOf(generateChecksum(props));
+          props.setProperty(TABLE_CHECKSUM.key(), checksum);
+        }
         props.store(out, "Updated at " + System.currentTimeMillis());
       }
       // 4. verify and remove backup.
-      // FIXME(vc): generate a hash for verification.
+      try (FSDataInputStream in = fs.open(cfgPath)) {
+        Properties props = new TypedProperties();
+        props.load(in);
+        if (!props.containsKey(TABLE_CHECKSUM.key()) || !props.getProperty(TABLE_CHECKSUM.key()).equals(checksum)) {
+          // delete the properties file and throw exception indicating update failure
+          // subsequent writes will recover and update, reads will go to the backup until then
+          fs.delete(cfgPath, false);
+          throw new HoodieIOException("Checksum property missing or does not match.");
+        }
+      }
       fs.delete(backupCfgPath, false);
     } catch (IOException e) {
       throw new HoodieIOException("Error updating table configs.", e);
@@ -331,10 +385,28 @@ public static void create(FileSystem fs, Path metadataFolder, Properties propert
       if (hoodieConfig.contains(TIMELINE_TIMEZONE)) {
         HoodieInstantTimeGenerator.setCommitTimeZone(HoodieTimelineTimeZone.valueOf(hoodieConfig.getString(TIMELINE_TIMEZONE)));
       }
+      if (hoodieConfig.contains(TABLE_CHECKSUM)) {
+        hoodieConfig.setValue(TABLE_CHECKSUM, hoodieConfig.getString(TABLE_CHECKSUM));
+      } else {
+        hoodieConfig.setValue(TABLE_CHECKSUM, String.valueOf(generateChecksum(hoodieConfig.getProps())));
+      }
       hoodieConfig.getProps().store(outputStream, "Properties saved on " + new Date(System.currentTimeMillis()));
     }
   }
 
+  public static long generateChecksum(Properties props) {
+    if (!props.containsKey(NAME.key())) {
+      throw new IllegalArgumentException(NAME.key() + " property needs to be specified");
+    }
+    String table = props.getProperty(NAME.key());
+    String database = props.getProperty(DATABASE_NAME.key(), "");
+    return BinaryUtil.generateChecksum(String.format(TABLE_CHECKSUM_FORMAT, database, table).getBytes(UTF_8));
+  }
+
+  public static boolean validateChecksum(Properties props) {
+    return Long.parseLong(props.getProperty(TABLE_CHECKSUM.key())) == generateChecksum(props);
+  }
+
   /**
    * Read the table type from the table properties and if not found, return the default.
    */
@@ -493,6 +565,13 @@ public String getUrlEncodePartitioning() {
     return getString(URL_ENCODE_PARTITIONING);
   }
 
+  /**
+   * Read the table checksum.
+   */
+  private Long getTableChecksum() {
+    return getLong(TABLE_CHECKSUM);
+  }
+
   public Map<String, String> propsMap() {
     return props.entrySet().stream()
         .collect(Collectors.toMap(e -> String.valueOf(e.getKey()), e -> String.valueOf(e.getValue())));
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java
index b9a3673960fb3..4c1eac79dc413 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java
@@ -23,6 +23,8 @@
 import org.apache.hudi.common.fs.ConsistencyGuardConfig;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.fs.FailSafeConsistencyGuard;
+import org.apache.hudi.common.fs.FileSystemRetryConfig;
+import org.apache.hudi.common.fs.HoodieRetryWrapperFileSystem;
 import org.apache.hudi.common.fs.HoodieWrapperFileSystem;
 import org.apache.hudi.common.fs.NoOpConsistencyGuard;
 import org.apache.hudi.common.model.HoodieRecordPayload;
@@ -54,6 +56,7 @@
 import java.io.Serializable;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Map;
 import java.util.Objects;
 import java.util.Properties;
 import java.util.Set;
@@ -99,12 +102,14 @@ public class HoodieTableMetaClient implements Serializable {
   private HoodieActiveTimeline activeTimeline;
   private HoodieArchivedTimeline archivedTimeline;
   private ConsistencyGuardConfig consistencyGuardConfig = ConsistencyGuardConfig.newBuilder().build();
+  private FileSystemRetryConfig fileSystemRetryConfig = FileSystemRetryConfig.newBuilder().build();
 
   private HoodieTableMetaClient(Configuration conf, String basePath, boolean loadActiveTimelineOnLoad,
                                 ConsistencyGuardConfig consistencyGuardConfig, Option<TimelineLayoutVersion> layoutVersion,
-                                String payloadClassName) {
+                                String payloadClassName, FileSystemRetryConfig fileSystemRetryConfig) {
     LOG.info("Loading HoodieTableMetaClient from " + basePath);
     this.consistencyGuardConfig = consistencyGuardConfig;
+    this.fileSystemRetryConfig = fileSystemRetryConfig;
     this.hadoopConf = new SerializableConfiguration(conf);
     Path basePathDir = new Path(basePath);
     this.basePath = basePathDir.toString();
@@ -140,7 +145,8 @@ public HoodieTableMetaClient() {}
 
   public static HoodieTableMetaClient reload(HoodieTableMetaClient oldMetaClient) {
     return HoodieTableMetaClient.builder().setConf(oldMetaClient.hadoopConf.get()).setBasePath(oldMetaClient.basePath).setLoadActiveTimelineOnLoad(oldMetaClient.loadActiveTimelineOnLoad)
-        .setConsistencyGuardConfig(oldMetaClient.consistencyGuardConfig).setLayoutVersion(Option.of(oldMetaClient.timelineLayoutVersion)).setPayloadClassName(null).build();
+        .setConsistencyGuardConfig(oldMetaClient.consistencyGuardConfig).setLayoutVersion(Option.of(oldMetaClient.timelineLayoutVersion)).setPayloadClassName(null)
+        .setFileSystemRetryConfig(oldMetaClient.fileSystemRetryConfig).build();
   }
 
   /**
@@ -150,7 +156,7 @@ public static HoodieTableMetaClient reload(HoodieTableMetaClient oldMetaClient)
    */
   private void readObject(java.io.ObjectInputStream in) throws IOException, ClassNotFoundException {
     in.defaultReadObject();
-    fs = null; // will be lazily inited
+    fs = null; // will be lazily initialized
   }
 
   private void writeObject(java.io.ObjectOutputStream out) throws IOException {
@@ -255,6 +261,14 @@ public TimelineLayoutVersion getTimelineLayoutVersion() {
   public HoodieWrapperFileSystem getFs() {
     if (fs == null) {
       FileSystem fileSystem = FSUtils.getFs(metaPath, hadoopConf.newCopy());
+
+      if (fileSystemRetryConfig.isFileSystemActionRetryEnable()) {
+        fileSystem = new HoodieRetryWrapperFileSystem(fileSystem,
+            fileSystemRetryConfig.getMaxRetryIntervalMs(),
+            fileSystemRetryConfig.getMaxRetryNumbers(),
+            fileSystemRetryConfig.getInitialRetryIntervalMs(),
+            fileSystemRetryConfig.getRetryExceptions());
+      }
       ValidationUtils.checkArgument(!(fileSystem instanceof HoodieWrapperFileSystem),
           "File System not expected to be that of HoodieWrapperFileSystem");
       fs = new HoodieWrapperFileSystem(fileSystem,
@@ -265,6 +279,10 @@ public HoodieWrapperFileSystem getFs() {
     return fs;
   }
 
+  public void setFs(HoodieWrapperFileSystem fs) {
+    this.fs = fs;
+  }
+
   /**
    * Return raw file-system.
    * 
@@ -304,11 +322,15 @@ public ConsistencyGuardConfig getConsistencyGuardConfig() {
     return consistencyGuardConfig;
   }
 
+  public FileSystemRetryConfig getFileSystemRetryConfig() {
+    return fileSystemRetryConfig;
+  }
+
   /**
    * Get the archived commits as a timeline. This is costly operation, as all data from the archived files are read.
    * This should not be used, unless for historical debugging purposes.
    *
-   * @return Active commit timeline
+   * @return Archived commit timeline
    */
   public synchronized HoodieArchivedTimeline getArchivedTimeline() {
     if (archivedTimeline == null) {
@@ -317,6 +339,20 @@ public synchronized HoodieArchivedTimeline getArchivedTimeline() {
     return archivedTimeline;
   }
 
+  /**
+   * Returns fresh new archived commits as a timeline from startTs (inclusive).
+   *
+   * <p>This is costly operation if really early endTs is specified.
+   * Be caution to use this only when the time range is short.
+   *
+   * <p>This method is not thread safe.
+   *
+   * @return Archived commit timeline
+   */
+  public HoodieArchivedTimeline getArchivedTimeline(String startTs) {
+    return new HoodieArchivedTimeline(this, startTs);
+  }
+
   /**
    * Validate table properties.
    * @param properties Properties from writeConfig.
@@ -577,6 +613,7 @@ public static class Builder {
     private boolean loadActiveTimelineOnLoad = false;
     private String payloadClassName = null;
     private ConsistencyGuardConfig consistencyGuardConfig = ConsistencyGuardConfig.newBuilder().build();
+    private FileSystemRetryConfig fileSystemRetryConfig = FileSystemRetryConfig.newBuilder().build();
     private Option<TimelineLayoutVersion> layoutVersion = Option.of(TimelineLayoutVersion.CURR_LAYOUT_VERSION);
 
     public Builder setConf(Configuration conf) {
@@ -604,6 +641,11 @@ public Builder setConsistencyGuardConfig(ConsistencyGuardConfig consistencyGuard
       return this;
     }
 
+    public Builder setFileSystemRetryConfig(FileSystemRetryConfig fileSystemRetryConfig) {
+      this.fileSystemRetryConfig = fileSystemRetryConfig;
+      return this;
+    }
+
     public Builder setLayoutVersion(Option<TimelineLayoutVersion> layoutVersion) {
       this.layoutVersion = layoutVersion;
       return this;
@@ -613,7 +655,7 @@ public HoodieTableMetaClient build() {
       ValidationUtils.checkArgument(conf != null, "Configuration needs to be set to init HoodieTableMetaClient");
       ValidationUtils.checkArgument(basePath != null, "basePath needs to be set to init HoodieTableMetaClient");
       return new HoodieTableMetaClient(conf, basePath,
-          loadActiveTimelineOnLoad, consistencyGuardConfig, layoutVersion, payloadClassName);
+          loadActiveTimelineOnLoad, consistencyGuardConfig, layoutVersion, payloadClassName, fileSystemRetryConfig);
     }
   }
 
@@ -643,6 +685,12 @@ public static class PropertyBuilder {
     private Boolean urlEncodePartitioning;
     private HoodieTimelineTimeZone commitTimeZone;
 
+    /**
+     * Persist the configs that is written at the first time, and should not be changed.
+     * Like KeyGenerator's configs.
+     */
+    private Properties others = new Properties();
+
     private PropertyBuilder() {
 
     }
@@ -750,6 +798,23 @@ public PropertyBuilder setCommitTimezone(HoodieTimelineTimeZone timelineTimeZone
       return this;
     }
 
+    public PropertyBuilder set(String key, Object value) {
+      if (HoodieTableConfig.PERSISTED_CONFIG_LIST.contains(key)) {
+        this.others.put(key, value);
+      }
+      return this;
+    }
+
+    public PropertyBuilder set(Map<String, Object> props) {
+      for (String key: HoodieTableConfig.PERSISTED_CONFIG_LIST) {
+        Object value = props.get(key);
+        if (value != null) {
+          set(key, value);
+        }
+      }
+      return this;
+    }
+
     public PropertyBuilder fromMetaClient(HoodieTableMetaClient metaClient) {
       return setTableType(metaClient.getTableType())
         .setTableName(metaClient.getTableConfig().getTableName())
@@ -759,6 +824,14 @@ public PropertyBuilder fromMetaClient(HoodieTableMetaClient metaClient) {
 
     public PropertyBuilder fromProperties(Properties properties) {
       HoodieConfig hoodieConfig = new HoodieConfig(properties);
+
+      for (String key: HoodieTableConfig.PERSISTED_CONFIG_LIST) {
+        Object value = hoodieConfig.getString(key);
+        if (value != null) {
+          set(key, value);
+        }
+      }
+
       if (hoodieConfig.contains(HoodieTableConfig.DATABASE_NAME)) {
         setDatabaseName(hoodieConfig.getString(HoodieTableConfig.DATABASE_NAME));
       }
@@ -828,6 +901,9 @@ public Properties build() {
       ValidationUtils.checkArgument(tableName != null, "tableName is null");
 
       HoodieTableConfig tableConfig = new HoodieTableConfig();
+
+      tableConfig.setAll(others);
+
       if (databaseName != null) {
         tableConfig.setValue(HoodieTableConfig.DATABASE_NAME, databaseName);
       }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableVersion.java b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableVersion.java
index 122c387756e88..3a249689ad2b3 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableVersion.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableVersion.java
@@ -34,7 +34,9 @@ public enum HoodieTableVersion {
   // 0.9.0 onwards
   TWO(2),
   // 0.10.0 onwards
-  THREE(3);
+  THREE(3),
+  // 0.11.0 onwards
+  FOUR(4);
 
   private final int versionCode;
 
@@ -47,7 +49,7 @@ public int versionCode() {
   }
 
   public static HoodieTableVersion current() {
-    return THREE;
+    return FOUR;
   }
 
   public static HoodieTableVersion versionFromCode(int versionCode) {
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/TableSchemaResolver.java b/hudi-common/src/main/java/org/apache/hudi/common/table/TableSchemaResolver.java
index a70774896ceb7..a84a9482a6707 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/TableSchemaResolver.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/TableSchemaResolver.java
@@ -21,14 +21,15 @@
 import org.apache.avro.Schema;
 import org.apache.avro.Schema.Field;
 import org.apache.avro.SchemaCompatibility;
-
+import org.apache.avro.generic.IndexedRecord;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-
+import org.apache.hadoop.hbase.io.hfile.CacheConfig;
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieFileFormat;
 import org.apache.hudi.common.model.HoodieLogFile;
+import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.table.log.HoodieLogFormat;
 import org.apache.hudi.common.table.log.HoodieLogFormat.Reader;
 import org.apache.hudi.common.table.log.block.HoodieDataBlock;
@@ -42,10 +43,11 @@
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.InvalidTableException;
+import org.apache.hudi.io.storage.HoodieHFileReader;
 
+import org.apache.hudi.io.storage.HoodieOrcReader;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
-
 import org.apache.parquet.avro.AvroSchemaConverter;
 import org.apache.parquet.format.converter.ParquetMetadataConverter;
 import org.apache.parquet.hadoop.ParquetFileReader;
@@ -61,15 +63,11 @@ public class TableSchemaResolver {
 
   private static final Logger LOG = LogManager.getLogger(TableSchemaResolver.class);
   private final HoodieTableMetaClient metaClient;
-  private final boolean withOperationField;
+  private final boolean hasOperationField;
 
   public TableSchemaResolver(HoodieTableMetaClient metaClient) {
-    this(metaClient, false);
-  }
-
-  public TableSchemaResolver(HoodieTableMetaClient metaClient, boolean withOperationField) {
     this.metaClient = metaClient;
-    this.withOperationField = withOperationField;
+    this.hasOperationField = hasOperationField();
   }
 
   /**
@@ -85,29 +83,26 @@ private MessageType getTableParquetSchemaFromDataFile() {
     try {
       switch (metaClient.getTableType()) {
         case COPY_ON_WRITE:
-          // For COW table, the file has data written must be in parquet format currently.
+          // For COW table, the file has data written must be in parquet or orc format currently.
           if (instantAndCommitMetadata.isPresent()) {
             HoodieCommitMetadata commitMetadata = instantAndCommitMetadata.get().getRight();
             String filePath = commitMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values().stream().findAny().get();
-            return readSchemaFromBaseFile(new Path(filePath));
+            return readSchemaFromBaseFile(filePath);
           } else {
             throw new IllegalArgumentException("Could not find any data file written for commit, "
                 + "so could not get schema for table " + metaClient.getBasePath());
           }
         case MERGE_ON_READ:
-          // For MOR table, the file has data written may be a parquet file or .log file.
+          // For MOR table, the file has data written may be a parquet file, .log file, orc file or hfile.
           // Determine the file format based on the file name, and then extract schema from it.
           if (instantAndCommitMetadata.isPresent()) {
             HoodieCommitMetadata commitMetadata = instantAndCommitMetadata.get().getRight();
             String filePath = commitMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values().stream().findAny().get();
-            if (filePath.contains(HoodieLogFile.DELTA_EXTENSION)) {
+            if (filePath.contains(HoodieFileFormat.HOODIE_LOG.getFileExtension())) {
               // this is a log file
               return readSchemaFromLogFile(new Path(filePath));
-            } else if (filePath.contains(HoodieFileFormat.PARQUET.getFileExtension())) {
-              // this is a parquet file
-              return readSchemaFromBaseFile(new Path(filePath));
             } else {
-              throw new IllegalArgumentException("Unknown file format :" + filePath);
+              return readSchemaFromBaseFile(filePath);
             }
           } else {
             throw new IllegalArgumentException("Could not find any data file written for commit, "
@@ -122,7 +117,22 @@ private MessageType getTableParquetSchemaFromDataFile() {
     }
   }
 
-  public Schema getTableAvroSchemaFromDataFile() throws Exception {
+  private MessageType readSchemaFromBaseFile(String filePath) throws IOException {
+    if (filePath.contains(HoodieFileFormat.PARQUET.getFileExtension())) {
+      // this is a parquet file
+      return readSchemaFromParquetBaseFile(new Path(filePath));
+    } else if (filePath.contains(HoodieFileFormat.HFILE.getFileExtension())) {
+      // this is a HFile
+      return readSchemaFromHFileBaseFile(new Path(filePath));
+    } else if (filePath.contains(HoodieFileFormat.ORC.getFileExtension())) {
+      // this is a ORC file
+      return readSchemaFromORCBaseFile(new Path(filePath));
+    } else {
+      throw new IllegalArgumentException("Unknown base file format :" + filePath);
+    }
+  }
+
+  public Schema getTableAvroSchemaFromDataFile() {
     return convertParquetSchemaToAvro(getTableParquetSchemaFromDataFile());
   }
 
@@ -133,7 +143,7 @@ public Schema getTableAvroSchemaFromDataFile() throws Exception {
    * @throws Exception
    */
   public Schema getTableAvroSchema() throws Exception {
-    return getTableAvroSchema(true);
+    return getTableAvroSchema(metaClient.getTableConfig().populateMetaFields());
   }
 
   /**
@@ -151,7 +161,7 @@ public Schema getTableAvroSchema(boolean includeMetadataFields) throws Exception
     Option<Schema> schemaFromTableConfig = metaClient.getTableConfig().getTableCreateSchema();
     if (schemaFromTableConfig.isPresent()) {
       if (includeMetadataFields) {
-        return HoodieAvroUtils.addMetadataFields(schemaFromTableConfig.get(), withOperationField);
+        return HoodieAvroUtils.addMetadataFields(schemaFromTableConfig.get(), hasOperationField);
       } else {
         return schemaFromTableConfig.get();
       }
@@ -176,7 +186,7 @@ public MessageType getTableParquetSchema() throws Exception {
     }
     Option<Schema> schemaFromTableConfig = metaClient.getTableConfig().getTableCreateSchema();
     if (schemaFromTableConfig.isPresent()) {
-      Schema schema = HoodieAvroUtils.addMetadataFields(schemaFromTableConfig.get(), withOperationField);
+      Schema schema = HoodieAvroUtils.addMetadataFields(schemaFromTableConfig.get(), hasOperationField);
       return convertAvroSchemaToParquet(schema);
     }
     return getTableParquetSchemaFromDataFile();
@@ -212,14 +222,21 @@ public Schema getTableAvroSchemaWithoutMetadataFields(HoodieInstant instant) thr
   }
 
   /**
-   * Gets the schema for a hoodie table in Avro format from the HoodieCommitMetadata of the last commit.
+   * Gets the schema for a hoodie table in Avro format from the HoodieCommitMetadata of the last commit with valid schema.
    *
    * @return Avro schema for this table
    */
   private Option<Schema> getTableSchemaFromCommitMetadata(boolean includeMetadataFields) {
-    HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
-    if (timeline.lastInstant().isPresent()) {
-      return getTableSchemaFromCommitMetadata(timeline.lastInstant().get(), includeMetadataFields);
+    Option<Pair<HoodieInstant, HoodieCommitMetadata>> instantAndCommitMetadata =
+        metaClient.getActiveTimeline().getLastCommitMetadataWithValidSchema();
+    if (instantAndCommitMetadata.isPresent()) {
+      HoodieCommitMetadata commitMetadata = instantAndCommitMetadata.get().getRight();
+      String schemaStr = commitMetadata.getMetadata(HoodieCommitMetadata.SCHEMA_KEY);
+      Schema schema = new Schema.Parser().parse(schemaStr);
+      if (includeMetadataFields) {
+        schema = HoodieAvroUtils.addMetadataFields(schema, hasOperationField);
+      }
+      return Option.of(schema);
     } else {
       return Option.empty();
     }
@@ -244,7 +261,7 @@ private Option<Schema> getTableSchemaFromCommitMetadata(HoodieInstant instant, b
 
       Schema schema = new Schema.Parser().parse(existingSchemaStr);
       if (includeMetadataFields) {
-        schema = HoodieAvroUtils.addMetadataFields(schema, withOperationField);
+        schema = HoodieAvroUtils.addMetadataFields(schema, hasOperationField);
       }
       return Option.of(schema);
     } catch (Exception e) {
@@ -416,19 +433,41 @@ public Option<HoodieCommitMetadata> getLatestCommitMetadata() {
   /**
    * Read the parquet schema from a parquet File.
    */
-  public MessageType readSchemaFromBaseFile(Path parquetFilePath) throws IOException {
+  public MessageType readSchemaFromParquetBaseFile(Path parquetFilePath) throws IOException {
     LOG.info("Reading schema from " + parquetFilePath);
 
     FileSystem fs = metaClient.getRawFs();
-    if (!fs.exists(parquetFilePath)) {
-      throw new IllegalArgumentException(
-          "Failed to read schema from data file " + parquetFilePath + ". File does not exist.");
-    }
     ParquetMetadata fileFooter =
         ParquetFileReader.readFooter(fs.getConf(), parquetFilePath, ParquetMetadataConverter.NO_FILTER);
     return fileFooter.getFileMetaData().getSchema();
   }
 
+  /**
+   * Read the parquet schema from a HFile.
+   */
+  public MessageType readSchemaFromHFileBaseFile(Path hFilePath) throws IOException {
+    LOG.info("Reading schema from " + hFilePath);
+
+    FileSystem fs = metaClient.getRawFs();
+    CacheConfig cacheConfig = new CacheConfig(fs.getConf());
+    HoodieHFileReader<IndexedRecord> hFileReader = new HoodieHFileReader<>(fs.getConf(), hFilePath, cacheConfig);
+
+    return convertAvroSchemaToParquet(hFileReader.getSchema());
+  }
+
+
+  /**
+   * Read the parquet schema from a ORC file.
+   */
+  public MessageType readSchemaFromORCBaseFile(Path orcFilePath) throws IOException {
+    LOG.info("Reading schema from " + orcFilePath);
+
+    FileSystem fs = metaClient.getRawFs();
+    HoodieOrcReader<IndexedRecord> orcReader = new HoodieOrcReader<>(fs.getConf(), orcFilePath);
+
+    return convertAvroSchemaToParquet(orcReader.getSchema());
+  }
+
   /**
    * Read schema from a data file from the last compaction commit done.
    * @throws Exception
@@ -445,7 +484,7 @@ public MessageType readSchemaFromLastCompaction(Option<HoodieInstant> lastCompac
     String filePath = compactionMetadata.getFileIdAndFullPaths(metaClient.getBasePath()).values().stream().findAny()
         .orElseThrow(() -> new IllegalArgumentException("Could not find any data file written for compaction "
             + lastCompactionCommit + ", could not get schema for table " + metaClient.getBasePath()));
-    return readSchemaFromBaseFile(new Path(filePath));
+    return readSchemaFromBaseFile(filePath);
   }
 
   /**
@@ -477,4 +516,18 @@ public static MessageType readSchemaFromLogFile(FileSystem fs, Path path) throws
     }
     return null;
   }
+
+  public boolean isHasOperationField() {
+    return hasOperationField;
+  }
+
+  private boolean hasOperationField() {
+    try {
+      Schema tableAvroSchema = getTableAvroSchemaFromDataFile();
+      return tableAvroSchema.getField(HoodieRecord.OPERATION_METADATA_FIELD) != null;
+    } catch (Exception e) {
+      LOG.info(String.format("Failed to read operation field from avro schema (%s)", e.getMessage()));
+      return false;
+    }
+  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java
index d495badeca4eb..fa5117e41fa76 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/AbstractHoodieLogRecordReader.java
@@ -18,6 +18,7 @@
 
 package org.apache.hudi.common.table.log;
 
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieLogFile;
 import org.apache.hudi.common.model.HoodieRecord;
@@ -30,7 +31,9 @@
 import org.apache.hudi.common.table.log.block.HoodieDeleteBlock;
 import org.apache.hudi.common.table.log.block.HoodieHFileDataBlock;
 import org.apache.hudi.common.table.log.block.HoodieLogBlock;
+import org.apache.hudi.common.table.log.block.HoodieParquetDataBlock;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.common.util.ClosableIterator;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.SpillableMapUtils;
 import org.apache.hudi.common.util.ValidationUtils;
@@ -48,8 +51,8 @@
 
 import java.io.IOException;
 import java.util.ArrayDeque;
-import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.Deque;
 import java.util.HashSet;
 import java.util.List;
@@ -174,11 +177,11 @@ protected String getKeyField() {
     return this.simpleKeyGenFields.get().getKey();
   }
 
-  public void scan() {
+  public synchronized void scan() {
     scan(Option.empty());
   }
 
-  public void scan(Option<List<String>> keys) {
+  public synchronized void scan(Option<List<String>> keys) {
     currentInstantLogBlocks = new ArrayDeque<>();
     progress = 0.0f;
     totalLogFiles = new AtomicLong(0);
@@ -230,6 +233,7 @@ public void scan(Option<List<String>> keys) {
         switch (logBlock.getBlockType()) {
           case HFILE_DATA_BLOCK:
           case AVRO_DATA_BLOCK:
+          case PARQUET_DATA_BLOCK:
             LOG.info("Reading a data block from file " + logFile.getPath() + " at instant "
                 + logBlock.getLogBlockHeader().get(INSTANT_TIME));
             if (isNewInstantBlock(logBlock) && !readBlocksLazily) {
@@ -356,17 +360,13 @@ private boolean isNewInstantBlock(HoodieLogBlock logBlock) {
    * handle it.
    */
   private void processDataBlock(HoodieDataBlock dataBlock, Option<List<String>> keys) throws Exception {
-    // TODO (NA) - Implement getRecordItr() in HoodieAvroDataBlock and use that here
-    List<IndexedRecord> recs = new ArrayList<>();
-    if (!keys.isPresent()) {
-      recs = dataBlock.getRecords();
-    } else {
-      recs = dataBlock.getRecords(keys.get());
-    }
-    totalLogRecords.addAndGet(recs.size());
-    for (IndexedRecord rec : recs) {
-      processNextRecord(createHoodieRecord(rec, this.hoodieTableMetaClient.getTableConfig(), this.payloadClassFQN,
-          this.preCombineField, this.withOperationField, this.simpleKeyGenFields, this.partitionName));
+    try (ClosableIterator<IndexedRecord> recordItr = dataBlock.getRecordItr(keys.orElse(Collections.emptyList()))) {
+      while (recordItr.hasNext()) {
+        IndexedRecord record = recordItr.next();
+        processNextRecord(createHoodieRecord(record, this.hoodieTableMetaClient.getTableConfig(), this.payloadClassFQN,
+            this.preCombineField, this.withOperationField, this.simpleKeyGenFields, this.partitionName));
+        totalLogRecords.incrementAndGet();
+      }
     }
   }
 
@@ -382,7 +382,7 @@ private void processDataBlock(HoodieDataBlock dataBlock, Option<List<String>> ke
    * @param partitionName      - Partition name
    * @return HoodieRecord created from the IndexedRecord
    */
-  protected HoodieRecord<?> createHoodieRecord(final IndexedRecord rec, final HoodieTableConfig hoodieTableConfig,
+  protected HoodieAvroRecord<?> createHoodieRecord(final IndexedRecord rec, final HoodieTableConfig hoodieTableConfig,
                                                final String payloadClassFQN, final String preCombineField,
                                                final boolean withOperationField,
                                                final Option<Pair<String, String>> simpleKeyGenFields,
@@ -426,6 +426,9 @@ private void processQueuedBlocksForInstant(Deque<HoodieLogBlock> logBlocks, int
         case HFILE_DATA_BLOCK:
           processDataBlock((HoodieHFileDataBlock) lastBlock, keys);
           break;
+        case PARQUET_DATA_BLOCK:
+          processDataBlock((HoodieParquetDataBlock) lastBlock, keys);
+          break;
         case DELETE_BLOCK:
           Arrays.stream(((HoodieDeleteBlock) lastBlock).getKeysToDelete()).forEach(this::processNextDeletedKey);
           break;
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java
index e6ead54a48d77..07cb36bb169bb 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java
@@ -31,13 +31,14 @@
 import org.apache.hudi.common.table.log.block.HoodieLogBlock;
 import org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType;
 import org.apache.hudi.common.table.log.block.HoodieLogBlock.HoodieLogBlockType;
+import org.apache.hudi.common.table.log.block.HoodieParquetDataBlock;
 import org.apache.hudi.common.util.Option;
-import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.exception.CorruptedLogFileException;
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.exception.HoodieNotSupportedException;
 
 import org.apache.avro.Schema;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.BufferedFSInputStream;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSInputStream;
@@ -46,6 +47,8 @@
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
+import javax.annotation.Nullable;
+
 import java.io.EOFException;
 import java.io.IOException;
 import java.util.Arrays;
@@ -53,6 +56,9 @@
 import java.util.Map;
 import java.util.Objects;
 
+import static org.apache.hudi.common.util.ValidationUtils.checkArgument;
+import static org.apache.hudi.common.util.ValidationUtils.checkState;
+
 /**
  * Scans a log file and provides block level iterator on the log file Loads the entire block contents in memory Can emit
  * either a DataBlock, CommandBlock, DeleteBlock or CorruptBlock (if one is found).
@@ -63,6 +69,7 @@ public class HoodieLogFileReader implements HoodieLogFormat.Reader {
   private static final int BLOCK_SCAN_READ_BUFFER_SIZE = 1024 * 1024; // 1 MB
   private static final Logger LOG = LogManager.getLogger(HoodieLogFileReader.class);
 
+  private final Configuration hadoopConf;
   private final FSDataInputStream inputStream;
   private final HoodieLogFile logFile;
   private final byte[] magicBuffer = new byte[6];
@@ -72,7 +79,7 @@ public class HoodieLogFileReader implements HoodieLogFormat.Reader {
   private long reverseLogFilePosition;
   private long lastReverseLogFilePosition;
   private boolean reverseReader;
-  private boolean enableInlineReading;
+  private boolean enableRecordLookups;
   private boolean closed = false;
   private transient Thread shutdownThread = null;
 
@@ -88,74 +95,24 @@ public HoodieLogFileReader(FileSystem fs, HoodieLogFile logFile, Schema readerSc
   }
 
   public HoodieLogFileReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema, int bufferSize,
-                             boolean readBlockLazily, boolean reverseReader, boolean enableInlineReading,
+                             boolean readBlockLazily, boolean reverseReader, boolean enableRecordLookups,
                              String keyField) throws IOException {
-    FSDataInputStream fsDataInputStream = fs.open(logFile.getPath(), bufferSize);
-    this.logFile = logFile;
-    this.inputStream = getFSDataInputStream(fsDataInputStream, fs, bufferSize);
+    this.hadoopConf = fs.getConf();
+    // NOTE: We repackage {@code HoodieLogFile} here to make sure that the provided path
+    //       is prefixed with an appropriate scheme given that we're not propagating the FS
+    //       further
+    this.logFile = new HoodieLogFile(FSUtils.makeQualified(fs, logFile.getPath()), logFile.getFileSize());
+    this.inputStream = getFSDataInputStream(fs, this.logFile, bufferSize);
     this.readerSchema = readerSchema;
     this.readBlockLazily = readBlockLazily;
     this.reverseReader = reverseReader;
-    this.enableInlineReading = enableInlineReading;
+    this.enableRecordLookups = enableRecordLookups;
     this.keyField = keyField;
     if (this.reverseReader) {
-      this.reverseLogFilePosition = this.lastReverseLogFilePosition = logFile.getFileSize();
+      this.reverseLogFilePosition = this.lastReverseLogFilePosition = this.logFile.getFileSize();
     }
-    addShutDownHook();
-  }
-
-  public HoodieLogFileReader(FileSystem fs, HoodieLogFile logFile, Schema readerSchema) throws IOException {
-    this(fs, logFile, readerSchema, DEFAULT_BUFFER_SIZE, false, false);
-  }
 
-  /**
-   * Fetch the right {@link FSDataInputStream} to be used by wrapping with required input streams.
-   * @param fsDataInputStream original instance of {@link FSDataInputStream}.
-   * @param fs instance of {@link FileSystem} in use.
-   * @param bufferSize buffer size to be used.
-   * @return the right {@link FSDataInputStream} as required.
-   */
-  private FSDataInputStream getFSDataInputStream(FSDataInputStream fsDataInputStream, FileSystem fs, int bufferSize) {
-    if (FSUtils.isGCSFileSystem(fs)) {
-      // in GCS FS, we might need to interceptor seek offsets as we might get EOF exception
-      return new SchemeAwareFSDataInputStream(getFSDataInputStreamForGCS(fsDataInputStream, bufferSize), true);
-    }
-
-    if (fsDataInputStream.getWrappedStream() instanceof FSInputStream) {
-      return new TimedFSDataInputStream(logFile.getPath(), new FSDataInputStream(
-          new BufferedFSInputStream((FSInputStream) fsDataInputStream.getWrappedStream(), bufferSize)));
-    }
-
-    // fsDataInputStream.getWrappedStream() maybe a BufferedFSInputStream
-    // need to wrap in another BufferedFSInputStream the make bufferSize work?
-    return fsDataInputStream;
-  }
-
-  /**
-   * GCS FileSystem needs some special handling for seek and hence this method assists to fetch the right {@link FSDataInputStream} to be
-   * used by wrapping with required input streams.
-   * @param fsDataInputStream original instance of {@link FSDataInputStream}.
-   * @param bufferSize buffer size to be used.
-   * @return the right {@link FSDataInputStream} as required.
-   */
-  private FSDataInputStream getFSDataInputStreamForGCS(FSDataInputStream fsDataInputStream, int bufferSize) {
-    // incase of GCS FS, there are two flows.
-    // a. fsDataInputStream.getWrappedStream() instanceof FSInputStream
-    // b. fsDataInputStream.getWrappedStream() not an instanceof FSInputStream, but an instance of FSDataInputStream.
-    // (a) is handled in the first if block and (b) is handled in the second if block. If not, we fallback to original fsDataInputStream
-    if (fsDataInputStream.getWrappedStream() instanceof FSInputStream) {
-      return new TimedFSDataInputStream(logFile.getPath(), new FSDataInputStream(
-          new BufferedFSInputStream((FSInputStream) fsDataInputStream.getWrappedStream(), bufferSize)));
-    }
-
-    if (fsDataInputStream.getWrappedStream() instanceof FSDataInputStream
-        && ((FSDataInputStream) fsDataInputStream.getWrappedStream()).getWrappedStream() instanceof FSInputStream) {
-      FSInputStream inputStream = (FSInputStream)((FSDataInputStream) fsDataInputStream.getWrappedStream()).getWrappedStream();
-      return new TimedFSDataInputStream(logFile.getPath(),
-          new FSDataInputStream(new BufferedFSInputStream(inputStream, bufferSize)));
-    }
-
-    return fsDataInputStream;
+    addShutDownHook();
   }
 
   @Override
@@ -181,15 +138,10 @@ private void addShutDownHook() {
   // TODO : convert content and block length to long by using ByteBuffer, raw byte [] allows
   // for max of Integer size
   private HoodieLogBlock readBlock() throws IOException {
-
-    int blocksize;
-    int type;
-    HoodieLogBlockType blockType = null;
-    Map<HeaderMetadataType, String> header = null;
-
+    int blockSize;
     try {
       // 1 Read the total size of the block
-      blocksize = (int) inputStream.readLong();
+      blockSize = (int) inputStream.readLong();
     } catch (EOFException | CorruptedLogFileException e) {
       // An exception reading any of the above indicates a corrupt block
       // Create a corrupt block by finding the next MAGIC marker or EOF
@@ -197,9 +149,9 @@ private HoodieLogBlock readBlock() throws IOException {
     }
 
     // We may have had a crash which could have written this block partially
-    // Skip blocksize in the stream and we should either find a sync marker (start of the next
+    // Skip blockSize in the stream and we should either find a sync marker (start of the next
     // block) or EOF. If we did not find either of it, then this block is a corrupted block.
-    boolean isCorrupted = isBlockCorrupt(blocksize);
+    boolean isCorrupted = isBlockCorrupted(blockSize);
     if (isCorrupted) {
       return createCorruptBlock();
     }
@@ -208,71 +160,85 @@ private HoodieLogBlock readBlock() throws IOException {
     HoodieLogFormat.LogFormatVersion nextBlockVersion = readVersion();
 
     // 3. Read the block type for a log block
-    if (nextBlockVersion.getVersion() != HoodieLogFormatVersion.DEFAULT_VERSION) {
-      type = inputStream.readInt();
-
-      ValidationUtils.checkArgument(type < HoodieLogBlockType.values().length, "Invalid block byte type found " + type);
-      blockType = HoodieLogBlockType.values()[type];
-    }
+    HoodieLogBlockType blockType = tryReadBlockType(nextBlockVersion);
 
     // 4. Read the header for a log block, if present
-    if (nextBlockVersion.hasHeader()) {
-      header = HoodieLogBlock.getLogMetadata(inputStream);
-    }
 
-    int contentLength = blocksize;
+    Map<HeaderMetadataType, String> header =
+        nextBlockVersion.hasHeader() ? HoodieLogBlock.getLogMetadata(inputStream) : null;
+
     // 5. Read the content length for the content
-    if (nextBlockVersion.getVersion() != HoodieLogFormatVersion.DEFAULT_VERSION) {
-      contentLength = (int) inputStream.readLong();
-    }
+    // Fallback to full-block size if no content-length
+    // TODO replace w/ hasContentLength
+    int contentLength =
+        nextBlockVersion.getVersion() != HoodieLogFormatVersion.DEFAULT_VERSION ? (int) inputStream.readLong() : blockSize;
 
     // 6. Read the content or skip content based on IO vs Memory trade-off by client
-    // TODO - have a max block size and reuse this buffer in the ByteBuffer
-    // (hard to guess max block size for now)
     long contentPosition = inputStream.getPos();
-    byte[] content = HoodieLogBlock.readOrSkipContent(inputStream, contentLength, readBlockLazily);
+    boolean shouldReadLazily = readBlockLazily && nextBlockVersion.getVersion() != HoodieLogFormatVersion.DEFAULT_VERSION;
+    Option<byte[]> content = HoodieLogBlock.tryReadContent(inputStream, contentLength, shouldReadLazily);
 
     // 7. Read footer if any
-    Map<HeaderMetadataType, String> footer = null;
-    if (nextBlockVersion.hasFooter()) {
-      footer = HoodieLogBlock.getLogMetadata(inputStream);
-    }
+    Map<HeaderMetadataType, String> footer =
+        nextBlockVersion.hasFooter() ? HoodieLogBlock.getLogMetadata(inputStream) : null;
 
     // 8. Read log block length, if present. This acts as a reverse pointer when traversing a
     // log file in reverse
-    @SuppressWarnings("unused")
-    long logBlockLength = 0;
     if (nextBlockVersion.hasLogBlockLength()) {
-      logBlockLength = inputStream.readLong();
+      inputStream.readLong();
     }
 
     // 9. Read the log block end position in the log file
     long blockEndPos = inputStream.getPos();
 
+    HoodieLogBlock.HoodieLogBlockContentLocation logBlockContentLoc =
+        new HoodieLogBlock.HoodieLogBlockContentLocation(hadoopConf, logFile, contentPosition, contentLength, blockEndPos);
+
     switch (Objects.requireNonNull(blockType)) {
-      // based on type read the block
       case AVRO_DATA_BLOCK:
         if (nextBlockVersion.getVersion() == HoodieLogFormatVersion.DEFAULT_VERSION) {
-          return HoodieAvroDataBlock.getBlock(content, readerSchema);
+          return HoodieAvroDataBlock.getBlock(content.get(), readerSchema);
         } else {
-          return new HoodieAvroDataBlock(logFile, inputStream, Option.ofNullable(content), readBlockLazily,
-              contentPosition, contentLength, blockEndPos, readerSchema, header, footer, keyField);
+          return new HoodieAvroDataBlock(inputStream, content, readBlockLazily, logBlockContentLoc,
+              Option.ofNullable(readerSchema), header, footer, keyField);
         }
+
       case HFILE_DATA_BLOCK:
-        return new HoodieHFileDataBlock(logFile, inputStream, Option.ofNullable(content), readBlockLazily,
-            contentPosition, contentLength, blockEndPos, readerSchema,
-            header, footer, enableInlineReading, keyField);
+        checkState(nextBlockVersion.getVersion() != HoodieLogFormatVersion.DEFAULT_VERSION,
+            String.format("HFile block could not be of version (%d)", HoodieLogFormatVersion.DEFAULT_VERSION));
+
+        return new HoodieHFileDataBlock(inputStream, content, readBlockLazily, logBlockContentLoc,
+            Option.ofNullable(readerSchema), header, footer, enableRecordLookups);
+
+      case PARQUET_DATA_BLOCK:
+        checkState(nextBlockVersion.getVersion() != HoodieLogFormatVersion.DEFAULT_VERSION,
+            String.format("Parquet block could not be of version (%d)", HoodieLogFormatVersion.DEFAULT_VERSION));
+
+        return new HoodieParquetDataBlock(inputStream, content, readBlockLazily, logBlockContentLoc,
+             Option.ofNullable(readerSchema), header, footer, keyField);
+
       case DELETE_BLOCK:
-        return HoodieDeleteBlock.getBlock(logFile, inputStream, Option.ofNullable(content), readBlockLazily,
-            contentPosition, contentLength, blockEndPos, header, footer);
+        return new HoodieDeleteBlock(content, inputStream, readBlockLazily, Option.of(logBlockContentLoc), header, footer);
+
       case COMMAND_BLOCK:
-        return HoodieCommandBlock.getBlock(logFile, inputStream, Option.ofNullable(content), readBlockLazily,
-            contentPosition, contentLength, blockEndPos, header, footer);
+        return new HoodieCommandBlock(content, inputStream, readBlockLazily, Option.of(logBlockContentLoc), header, footer);
+
       default:
         throw new HoodieNotSupportedException("Unsupported Block " + blockType);
     }
   }
 
+  @Nullable
+  private HoodieLogBlockType tryReadBlockType(HoodieLogFormat.LogFormatVersion blockVersion) throws IOException {
+    if (blockVersion.getVersion() == HoodieLogFormatVersion.DEFAULT_VERSION) {
+      return null;
+    }
+
+    int type = inputStream.readInt();
+    checkArgument(type < HoodieLogBlockType.values().length, "Invalid block byte type found " + type);
+    return HoodieLogBlockType.values()[type];
+  }
+
   private HoodieLogBlock createCorruptBlock() throws IOException {
     LOG.info("Log " + logFile + " has a corrupted block at " + inputStream.getPos());
     long currentPos = inputStream.getPos();
@@ -282,15 +248,25 @@ private HoodieLogBlock createCorruptBlock() throws IOException {
     LOG.info("Next available block in " + logFile + " starts at " + nextBlockOffset);
     int corruptedBlockSize = (int) (nextBlockOffset - currentPos);
     long contentPosition = inputStream.getPos();
-    byte[] corruptedBytes = HoodieLogBlock.readOrSkipContent(inputStream, corruptedBlockSize, readBlockLazily);
-    return HoodieCorruptBlock.getBlock(logFile, inputStream, Option.ofNullable(corruptedBytes), readBlockLazily,
-        contentPosition, corruptedBlockSize, nextBlockOffset, new HashMap<>(), new HashMap<>());
+    Option<byte[]> corruptedBytes = HoodieLogBlock.tryReadContent(inputStream, corruptedBlockSize, readBlockLazily);
+    HoodieLogBlock.HoodieLogBlockContentLocation logBlockContentLoc =
+        new HoodieLogBlock.HoodieLogBlockContentLocation(hadoopConf, logFile, contentPosition, corruptedBlockSize, nextBlockOffset);
+    return new HoodieCorruptBlock(corruptedBytes, inputStream, readBlockLazily, Option.of(logBlockContentLoc), new HashMap<>(), new HashMap<>());
   }
 
-  private boolean isBlockCorrupt(int blocksize) throws IOException {
+  private boolean isBlockCorrupted(int blocksize) throws IOException {
     long currentPos = inputStream.getPos();
+    long blockSizeFromFooter;
+    
     try {
-      inputStream.seek(currentPos + blocksize);
+      // check if the blocksize mentioned in the footer is the same as the header;
+      // by seeking and checking the length of a long.  We do not seek `currentPos + blocksize`
+      // which can be the file size for the last block in the file, causing EOFException
+      // for some FSDataInputStream implementation
+      inputStream.seek(currentPos + blocksize - Long.BYTES);
+      // Block size in the footer includes the magic header, which the header does not include.
+      // So we have to shorten the footer block size by the size of magic hash
+      blockSizeFromFooter = inputStream.readLong() - magicBuffer.length;
     } catch (EOFException e) {
       LOG.info("Found corrupted block in file " + logFile + " with block size(" + blocksize + ") running past EOF");
       // this is corrupt
@@ -301,19 +277,13 @@ private boolean isBlockCorrupt(int blocksize) throws IOException {
       return true;
     }
 
-    // check if the blocksize mentioned in the footer is the same as the header; by seeking back the length of a long
-    // the backward seek does not incur additional IO as {@link org.apache.hadoop.hdfs.DFSInputStream#seek()}
-    // only moves the index. actual IO happens on the next read operation
-    inputStream.seek(inputStream.getPos() - Long.BYTES);
-    // Block size in the footer includes the magic header, which the header does not include.
-    // So we have to shorten the footer block size by the size of magic hash
-    long blockSizeFromFooter = inputStream.readLong() - magicBuffer.length;
     if (blocksize != blockSizeFromFooter) {
       LOG.info("Found corrupted block in file " + logFile + ". Header block size(" + blocksize
-              + ") did not match the footer block size(" + blockSizeFromFooter + ")");
+          + ") did not match the footer block size(" + blockSizeFromFooter + ")");
       inputStream.seek(currentPos);
       return true;
     }
+
     try {
       readMagic();
       // all good - either we found the sync marker or EOF. Reset position and continue
@@ -481,4 +451,59 @@ public long moveToPrev() throws IOException {
   public void remove() {
     throw new UnsupportedOperationException("Remove not supported for HoodieLogFileReader");
   }
+
+  /**
+   * Fetch the right {@link FSDataInputStream} to be used by wrapping with required input streams.
+   * @param fs instance of {@link FileSystem} in use.
+   * @param bufferSize buffer size to be used.
+   * @return the right {@link FSDataInputStream} as required.
+   */
+  private static FSDataInputStream getFSDataInputStream(FileSystem fs,
+                                                        HoodieLogFile logFile,
+                                                        int bufferSize) throws IOException {
+    FSDataInputStream fsDataInputStream = fs.open(logFile.getPath(), bufferSize);
+
+    if (FSUtils.isGCSFileSystem(fs)) {
+      // in GCS FS, we might need to interceptor seek offsets as we might get EOF exception
+      return new SchemeAwareFSDataInputStream(getFSDataInputStreamForGCS(fsDataInputStream, logFile, bufferSize), true);
+    }
+
+    if (fsDataInputStream.getWrappedStream() instanceof FSInputStream) {
+      return new TimedFSDataInputStream(logFile.getPath(), new FSDataInputStream(
+          new BufferedFSInputStream((FSInputStream) fsDataInputStream.getWrappedStream(), bufferSize)));
+    }
+
+    // fsDataInputStream.getWrappedStream() maybe a BufferedFSInputStream
+    // need to wrap in another BufferedFSInputStream the make bufferSize work?
+    return fsDataInputStream;
+  }
+
+  /**
+   * GCS FileSystem needs some special handling for seek and hence this method assists to fetch the right {@link FSDataInputStream} to be
+   * used by wrapping with required input streams.
+   * @param fsDataInputStream original instance of {@link FSDataInputStream}.
+   * @param bufferSize buffer size to be used.
+   * @return the right {@link FSDataInputStream} as required.
+   */
+  private static FSDataInputStream getFSDataInputStreamForGCS(FSDataInputStream fsDataInputStream,
+                                                              HoodieLogFile logFile,
+                                                              int bufferSize) {
+    // incase of GCS FS, there are two flows.
+    // a. fsDataInputStream.getWrappedStream() instanceof FSInputStream
+    // b. fsDataInputStream.getWrappedStream() not an instanceof FSInputStream, but an instance of FSDataInputStream.
+    // (a) is handled in the first if block and (b) is handled in the second if block. If not, we fallback to original fsDataInputStream
+    if (fsDataInputStream.getWrappedStream() instanceof FSInputStream) {
+      return new TimedFSDataInputStream(logFile.getPath(), new FSDataInputStream(
+          new BufferedFSInputStream((FSInputStream) fsDataInputStream.getWrappedStream(), bufferSize)));
+    }
+
+    if (fsDataInputStream.getWrappedStream() instanceof FSDataInputStream
+        && ((FSDataInputStream) fsDataInputStream.getWrappedStream()).getWrappedStream() instanceof FSInputStream) {
+      FSInputStream inputStream = (FSInputStream)((FSDataInputStream) fsDataInputStream.getWrappedStream()).getWrappedStream();
+      return new TimedFSDataInputStream(logFile.getPath(),
+          new FSDataInputStream(new BufferedFSInputStream(inputStream, bufferSize)));
+    }
+
+    return fsDataInputStream;
+  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFormatWriter.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFormatWriter.java
index 1c33b81246c58..8dbe85efd1164 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFormatWriter.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFormatWriter.java
@@ -60,13 +60,6 @@ public class HoodieLogFormatWriter implements HoodieLogFormat.Writer {
 
   private static final String APPEND_UNAVAILABLE_EXCEPTION_MESSAGE = "not sufficiently replicated yet";
 
-  /**
-   * @param fs
-   * @param logFile
-   * @param bufferSize
-   * @param replication
-   * @param sizeThreshold
-   */
   HoodieLogFormatWriter(FileSystem fs, HoodieLogFile logFile, Integer bufferSize, Short replication, Long sizeThreshold, String rolloverLogWriteToken) {
     this.fs = fs;
     this.logFile = logFile;
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieMergedLogRecordScanner.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieMergedLogRecordScanner.java
index 2e47e695d3144..d0ab73ab01552 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieMergedLogRecordScanner.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieMergedLogRecordScanner.java
@@ -19,6 +19,7 @@
 package org.apache.hudi.common.table.log;
 
 import org.apache.hudi.common.config.HoodieCommonConfig;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieOperation;
 import org.apache.hudi.common.model.HoodieRecord;
@@ -144,7 +145,7 @@ protected void processNextRecord(HoodieRecord<? extends HoodieRecordPayload> hoo
       HoodieRecordPayload combinedValue = hoodieRecord.getData().preCombine(oldValue);
       boolean choosePrev = combinedValue.equals(oldValue);
       HoodieOperation operation = choosePrev ? oldRecord.getOperation() : hoodieRecord.getOperation();
-      records.put(key, new HoodieRecord<>(new HoodieKey(key, hoodieRecord.getPartitionPath()), combinedValue, operation));
+      records.put(key, new HoodieAvroRecord<>(new HoodieKey(key, hoodieRecord.getPartitionPath()), combinedValue, operation));
     } else {
       // Put the record as is
       records.put(key, hoodieRecord);
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java
index 1d3f5f3b01c56..e7f183fafcdd4 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieAvroDataBlock.java
@@ -18,13 +18,6 @@
 
 package org.apache.hudi.common.table.log.block;
 
-import org.apache.hudi.avro.HoodieAvroUtils;
-import org.apache.hudi.common.fs.SizeAwareDataInputStream;
-import org.apache.hudi.common.model.HoodieLogFile;
-import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.exception.HoodieIOException;
-
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericDatumReader;
 import org.apache.avro.generic.GenericDatumWriter;
@@ -36,59 +29,63 @@
 import org.apache.avro.io.Encoder;
 import org.apache.avro.io.EncoderFactory;
 import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hudi.common.fs.SizeAwareDataInputStream;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.util.ClosableIterator;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.exception.HoodieIOException;
 
+import javax.annotation.Nonnull;
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.zip.DeflaterOutputStream;
+import java.util.zip.InflaterInputStream;
 
-import javax.annotation.Nonnull;
+import static org.apache.hudi.common.util.ValidationUtils.checkState;
 
 /**
  * HoodieAvroDataBlock contains a list of records serialized using Avro. It is used with the Parquet base file format.
  */
 public class HoodieAvroDataBlock extends HoodieDataBlock {
 
-  private ThreadLocal<BinaryEncoder> encoderCache = new ThreadLocal<>();
-  private ThreadLocal<BinaryDecoder> decoderCache = new ThreadLocal<>();
-
-  public HoodieAvroDataBlock(@Nonnull Map<HeaderMetadataType, String> logBlockHeader,
-                             @Nonnull Map<HeaderMetadataType, String> logBlockFooter,
-                             @Nonnull Option<HoodieLogBlockContentLocation> blockContentLocation, @Nonnull Option<byte[]> content,
-                             FSDataInputStream inputStream, boolean readBlockLazily) {
-    super(logBlockHeader, logBlockFooter, blockContentLocation, content, inputStream, readBlockLazily);
+  private final ThreadLocal<BinaryEncoder> encoderCache = new ThreadLocal<>();
+
+  public HoodieAvroDataBlock(FSDataInputStream inputStream,
+                             Option<byte[]> content,
+                             boolean readBlockLazily,
+                             HoodieLogBlockContentLocation logBlockContentLocation,
+                             Option<Schema> readerSchema,
+                             Map<HeaderMetadataType, String> header,
+                             Map<HeaderMetadataType, String> footer,
+                             String keyField) {
+    super(content, inputStream, readBlockLazily, Option.of(logBlockContentLocation), readerSchema, header, footer, keyField, false);
   }
 
-  public HoodieAvroDataBlock(HoodieLogFile logFile, FSDataInputStream inputStream, Option<byte[]> content,
-                             boolean readBlockLazily, long position, long blockSize, long blockEndpos, Schema readerSchema,
-                             Map<HeaderMetadataType, String> header, Map<HeaderMetadataType, String> footer, String keyField) {
-    super(content, inputStream, readBlockLazily,
-        Option.of(new HoodieLogBlockContentLocation(logFile, position, blockSize, blockEndpos)), readerSchema, header,
-        footer, keyField);
-  }
-
-  public HoodieAvroDataBlock(@Nonnull List<IndexedRecord> records, @Nonnull Map<HeaderMetadataType,
-      String> header, String keyField) {
+  public HoodieAvroDataBlock(@Nonnull List<IndexedRecord> records,
+                             @Nonnull Map<HeaderMetadataType, String> header,
+                             @Nonnull String keyField) {
     super(records, header, new HashMap<>(), keyField);
   }
 
-  public HoodieAvroDataBlock(@Nonnull List<IndexedRecord> records, @Nonnull Map<HeaderMetadataType, String> header) {
-    super(records, header, new HashMap<>(), HoodieRecord.RECORD_KEY_METADATA_FIELD);
-  }
-
   @Override
   public HoodieLogBlockType getBlockType() {
     return HoodieLogBlockType.AVRO_DATA_BLOCK;
   }
 
   @Override
-  protected byte[] serializeRecords() throws IOException {
+  protected byte[] serializeRecords(List<IndexedRecord> records) throws IOException {
     Schema schema = new Schema.Parser().parse(super.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));
     GenericDatumWriter<IndexedRecord> writer = new GenericDatumWriter<>(schema);
     ByteArrayOutputStream baos = new ByteArrayOutputStream();
@@ -101,9 +98,7 @@ protected byte[] serializeRecords() throws IOException {
     output.writeInt(records.size());
 
     // 3. Write the records
-    Iterator<IndexedRecord> itr = records.iterator();
-    while (itr.hasNext()) {
-      IndexedRecord s = itr.next();
+    for (IndexedRecord s : records) {
       ByteArrayOutputStream temp = new ByteArrayOutputStream();
       BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(temp, encoderCache.get());
       encoderCache.set(encoder);
@@ -118,56 +113,84 @@ protected byte[] serializeRecords() throws IOException {
         output.writeInt(size);
         // Write the content
         output.write(temp.toByteArray());
-        itr.remove();
       } catch (IOException e) {
         throw new HoodieIOException("IOException converting HoodieAvroDataBlock to bytes", e);
       }
     }
+    encoderCache.remove();
     output.close();
     return baos.toByteArray();
   }
 
   // TODO (na) - Break down content into smaller chunks of byte [] to be GC as they are used
-  // TODO (na) - Implement a recordItr instead of recordList
   @Override
-  protected void deserializeRecords() throws IOException {
-    SizeAwareDataInputStream dis =
-        new SizeAwareDataInputStream(new DataInputStream(new ByteArrayInputStream(getContent().get())));
+  protected ClosableIterator<IndexedRecord> deserializeRecords(byte[] content) throws IOException {
+    checkState(this.readerSchema != null, "Reader's schema has to be non-null");
+    return RecordIterator.getInstance(this, content);
+  }
+
+  private static class RecordIterator implements ClosableIterator<IndexedRecord> {
+    private byte[] content;
+    private final SizeAwareDataInputStream dis;
+    private final GenericDatumReader<IndexedRecord> reader;
+    private final ThreadLocal<BinaryDecoder> decoderCache = new ThreadLocal<>();
 
-    // 1. Read version for this data block
-    int version = dis.readInt();
-    HoodieAvroDataBlockVersion logBlockVersion = new HoodieAvroDataBlockVersion(version);
+    private int totalRecords = 0;
+    private int readRecords = 0;
 
-    // Get schema from the header
-    Schema writerSchema = new Schema.Parser().parse(super.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));
+    private RecordIterator(Schema readerSchema, Schema writerSchema, byte[] content) throws IOException {
+      this.content = content;
 
-    // If readerSchema was not present, use writerSchema
-    if (schema == null) {
-      schema = writerSchema;
+      this.dis = new SizeAwareDataInputStream(new DataInputStream(new ByteArrayInputStream(this.content)));
+
+      // 1. Read version for this data block
+      int version = this.dis.readInt();
+      HoodieAvroDataBlockVersion logBlockVersion = new HoodieAvroDataBlockVersion(version);
+
+      this.reader = new GenericDatumReader<>(writerSchema, readerSchema);
+
+      if (logBlockVersion.hasRecordCount()) {
+        this.totalRecords = this.dis.readInt();
+      }
     }
 
-    GenericDatumReader<IndexedRecord> reader = new GenericDatumReader<>(writerSchema, schema);
-    // 2. Get the total records
-    int totalRecords = 0;
-    if (logBlockVersion.hasRecordCount()) {
-      totalRecords = dis.readInt();
+    public static RecordIterator getInstance(HoodieAvroDataBlock dataBlock, byte[] content) throws IOException {
+      // Get schema from the header
+      Schema writerSchema = new Schema.Parser().parse(dataBlock.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));
+      return new RecordIterator(dataBlock.readerSchema, writerSchema, content);
     }
-    List<IndexedRecord> records = new ArrayList<>(totalRecords);
 
-    // 3. Read the content
-    for (int i = 0; i < totalRecords; i++) {
-      int recordLength = dis.readInt();
-      BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(getContent().get(), dis.getNumberOfBytesRead(),
-          recordLength, decoderCache.get());
-      decoderCache.set(decoder);
-      IndexedRecord record = reader.read(null, decoder);
-      records.add(record);
-      dis.skipBytes(recordLength);
+    @Override
+    public void close() {
+      try {
+        this.dis.close();
+        this.decoderCache.remove();
+        this.content = null;
+      } catch (IOException e) {
+        // ignore
+      }
+    }
+
+    @Override
+    public boolean hasNext() {
+      return readRecords < totalRecords;
+    }
+
+    @Override
+    public IndexedRecord next() {
+      try {
+        int recordLength = this.dis.readInt();
+        BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(this.content, this.dis.getNumberOfBytesRead(),
+                recordLength, this.decoderCache.get());
+        this.decoderCache.set(decoder);
+        IndexedRecord record = this.reader.read(null, decoder);
+        this.dis.skipBytes(recordLength);
+        this.readRecords++;
+        return record;
+      } catch (IOException e) {
+        throw new HoodieIOException("Unable to convert bytes to record.", e);
+      }
     }
-    dis.close();
-    this.records = records;
-    // Free up content to be GC'd, deflate
-    deflate();
   }
 
   //----------------------------------------------------------------------------------------
@@ -183,9 +206,7 @@ protected void deserializeRecords() throws IOException {
    */
   @Deprecated
   public HoodieAvroDataBlock(List<IndexedRecord> records, Schema schema) {
-    super(new HashMap<>(), new HashMap<>(), Option.empty(), Option.empty(), null, false);
-    this.records = records;
-    this.schema = schema;
+    super(records, Collections.singletonMap(HeaderMetadataType.SCHEMA, schema.toString()), new HashMap<>(), HoodieRecord.RECORD_KEY_METADATA_FIELD);
   }
 
   /**
@@ -201,7 +222,7 @@ public static HoodieAvroDataBlock getBlock(byte[] content, Schema readerSchema)
     int schemaLength = dis.readInt();
     byte[] compressedSchema = new byte[schemaLength];
     dis.readFully(compressedSchema, 0, schemaLength);
-    Schema writerSchema = new Schema.Parser().parse(HoodieAvroUtils.decompress(compressedSchema));
+    Schema writerSchema = new Schema.Parser().parse(decompress(compressedSchema));
 
     if (readerSchema == null) {
       readerSchema = writerSchema;
@@ -224,6 +245,33 @@ public static HoodieAvroDataBlock getBlock(byte[] content, Schema readerSchema)
     return new HoodieAvroDataBlock(records, readerSchema);
   }
 
+  private static byte[] compress(String text) {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    try {
+      OutputStream out = new DeflaterOutputStream(baos);
+      out.write(text.getBytes(StandardCharsets.UTF_8));
+      out.close();
+    } catch (IOException e) {
+      throw new HoodieIOException("IOException while compressing text " + text, e);
+    }
+    return baos.toByteArray();
+  }
+
+  private static String decompress(byte[] bytes) {
+    InputStream in = new InflaterInputStream(new ByteArrayInputStream(bytes));
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    try {
+      byte[] buffer = new byte[8192];
+      int len;
+      while ((len = in.read(buffer)) > 0) {
+        baos.write(buffer, 0, len);
+      }
+      return new String(baos.toByteArray(), StandardCharsets.UTF_8);
+    } catch (IOException e) {
+      throw new HoodieIOException("IOException while decompressing text", e);
+    }
+  }
+
   @Deprecated
   public byte[] getBytes(Schema schema) throws IOException {
 
@@ -232,10 +280,15 @@ public byte[] getBytes(Schema schema) throws IOException {
     DataOutputStream output = new DataOutputStream(baos);
 
     // 2. Compress and Write schema out
-    byte[] schemaContent = HoodieAvroUtils.compress(schema.toString());
+    byte[] schemaContent = compress(schema.toString());
     output.writeInt(schemaContent.length);
     output.write(schemaContent);
 
+    List<IndexedRecord> records = new ArrayList<>();
+    try (ClosableIterator<IndexedRecord> recordItr = getRecordItr()) {
+      recordItr.forEachRemaining(records::add);
+    }
+
     // 3. Write total number of records
     output.writeInt(records.size());
 
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieCommandBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieCommandBlock.java
index 08909233a576b..0ff3a77b5007b 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieCommandBlock.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieCommandBlock.java
@@ -18,7 +18,6 @@
 
 package org.apache.hudi.common.table.log.block;
 
-import org.apache.hudi.common.model.HoodieLogFile;
 import org.apache.hudi.common.util.Option;
 
 import org.apache.hadoop.fs.FSDataInputStream;
@@ -44,9 +43,9 @@ public HoodieCommandBlock(Map<HeaderMetadataType, String> header) {
     this(Option.empty(), null, false, Option.empty(), header, new HashMap<>());
   }
 
-  private HoodieCommandBlock(Option<byte[]> content, FSDataInputStream inputStream, boolean readBlockLazily,
-      Option<HoodieLogBlockContentLocation> blockContentLocation, Map<HeaderMetadataType, String> header,
-      Map<HeaderMetadataType, String> footer) {
+  public HoodieCommandBlock(Option<byte[]> content, FSDataInputStream inputStream, boolean readBlockLazily,
+                            Option<HoodieLogBlockContentLocation> blockContentLocation, Map<HeaderMetadataType, String> header,
+                            Map<HeaderMetadataType, String> footer) {
     super(header, footer, blockContentLocation, content, inputStream, readBlockLazily);
     this.type =
         HoodieCommandBlockTypeEnum.values()[Integer.parseInt(header.get(HeaderMetadataType.COMMAND_BLOCK_TYPE))];
@@ -65,12 +64,4 @@ public HoodieLogBlockType getBlockType() {
   public byte[] getContentBytes() {
     return new byte[0];
   }
-
-  public static HoodieLogBlock getBlock(HoodieLogFile logFile, FSDataInputStream inputStream, Option<byte[]> content,
-      boolean readBlockLazily, long position, long blockSize, long blockEndPos, Map<HeaderMetadataType, String> header,
-      Map<HeaderMetadataType, String> footer) {
-
-    return new HoodieCommandBlock(content, inputStream, readBlockLazily,
-        Option.of(new HoodieLogBlockContentLocation(logFile, position, blockSize, blockEndPos)), header, footer);
-  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieCorruptBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieCorruptBlock.java
index 873be1315e50b..3e4f571588684 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieCorruptBlock.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieCorruptBlock.java
@@ -18,7 +18,6 @@
 
 package org.apache.hudi.common.table.log.block;
 
-import org.apache.hudi.common.model.HoodieLogFile;
 import org.apache.hudi.common.util.Option;
 
 import org.apache.hadoop.fs.FSDataInputStream;
@@ -32,15 +31,14 @@
  */
 public class HoodieCorruptBlock extends HoodieLogBlock {
 
-  private HoodieCorruptBlock(Option<byte[]> corruptedBytes, FSDataInputStream inputStream, boolean readBlockLazily,
-      Option<HoodieLogBlockContentLocation> blockContentLocation, Map<HeaderMetadataType, String> header,
-      Map<HeaderMetadataType, String> footer) {
+  public HoodieCorruptBlock(Option<byte[]> corruptedBytes, FSDataInputStream inputStream, boolean readBlockLazily,
+                            Option<HoodieLogBlockContentLocation> blockContentLocation, Map<HeaderMetadataType, String> header,
+                            Map<HeaderMetadataType, String> footer) {
     super(header, footer, blockContentLocation, corruptedBytes, inputStream, readBlockLazily);
   }
 
   @Override
   public byte[] getContentBytes() throws IOException {
-
     if (!getContent().isPresent() && readBlockLazily) {
       // read content from disk
       inflate();
@@ -53,11 +51,4 @@ public HoodieLogBlockType getBlockType() {
     return HoodieLogBlockType.CORRUPT_BLOCK;
   }
 
-  public static HoodieLogBlock getBlock(HoodieLogFile logFile, FSDataInputStream inputStream,
-      Option<byte[]> corruptedBytes, boolean readBlockLazily, long position, long blockSize, long blockEndPos,
-      Map<HeaderMetadataType, String> header, Map<HeaderMetadataType, String> footer) {
-
-    return new HoodieCorruptBlock(corruptedBytes, inputStream, readBlockLazily,
-        Option.of(new HoodieLogBlockContentLocation(logFile, position, blockSize, blockEndPos)), header, footer);
-  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java
index 66c9571487dff..846b8d36a5091 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDataBlock.java
@@ -18,25 +18,28 @@
 
 package org.apache.hudi.common.table.log.block;
 
-import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.util.ClosableIterator;
 import org.apache.hudi.common.util.Option;
-import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.HoodieIOException;
 
 import org.apache.avro.Schema;
 import org.apache.avro.generic.IndexedRecord;
 import org.apache.hadoop.fs.FSDataInputStream;
 
-import javax.annotation.Nonnull;
-
 import java.io.IOException;
+import java.util.HashSet;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
+import java.util.function.Function;
+
+import static org.apache.hudi.common.util.ValidationUtils.checkState;
 
 /**
  * DataBlock contains a list of records serialized using formats compatible with the base file format.
  * For each base file format there is a corresponding DataBlock format.
- *
+ * <p>
  * The Datablock contains:
  *   1. Data Block version
  *   2. Total number of records in the block
@@ -44,125 +47,225 @@
  */
 public abstract class HoodieDataBlock extends HoodieLogBlock {
 
-  protected List<IndexedRecord> records;
-  protected Schema schema;
-  protected String keyField;
+  // TODO rebase records/content to leverage Either to warrant
+  //      that they are mutex (used by read/write flows respectively)
+  private final Option<List<IndexedRecord>> records;
 
-  public HoodieDataBlock(@Nonnull Map<HeaderMetadataType, String> logBlockHeader,
-      @Nonnull Map<HeaderMetadataType, String> logBlockFooter,
-      @Nonnull Option<HoodieLogBlockContentLocation> blockContentLocation, @Nonnull Option<byte[]> content,
-      FSDataInputStream inputStream, boolean readBlockLazily) {
-    super(logBlockHeader, logBlockFooter, blockContentLocation, content, inputStream, readBlockLazily);
-    this.keyField = HoodieRecord.RECORD_KEY_METADATA_FIELD;
-  }
+  /**
+   * Key field's name w/in the record's schema
+   */
+  private final String keyFieldName;
 
-  public HoodieDataBlock(@Nonnull List<IndexedRecord> records, @Nonnull Map<HeaderMetadataType, String> header,
-                         @Nonnull Map<HeaderMetadataType, String> footer, String keyField) {
-    this(header, footer, Option.empty(), Option.empty(), null, false);
-    this.records = records;
-    this.schema = new Schema.Parser().parse(super.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));
-    this.keyField = keyField;
-  }
+  private final boolean enablePointLookups;
 
-  protected HoodieDataBlock(Option<byte[]> content, @Nonnull FSDataInputStream inputStream, boolean readBlockLazily,
-                            Option<HoodieLogBlockContentLocation> blockContentLocation, Schema readerSchema,
-                            @Nonnull Map<HeaderMetadataType, String> headers, @Nonnull Map<HeaderMetadataType,
-      String> footer, String keyField) {
-    this(headers, footer, blockContentLocation, content, inputStream, readBlockLazily);
-    this.schema = readerSchema;
-    this.keyField = keyField;
-  }
+  protected final Schema readerSchema;
 
   /**
-   * Util method to get a data block for the requested type.
-   *
-   * @param logDataBlockFormat - Data block type
-   * @param recordList         - List of records that goes in the data block
-   * @param header             - data block header
-   * @return Data block of the requested type.
+   * NOTE: This ctor is used on the write-path (ie when records ought to be written into the log)
    */
-  public static HoodieLogBlock getBlock(HoodieLogBlockType logDataBlockFormat, List<IndexedRecord> recordList,
-                                        Map<HeaderMetadataType, String> header) {
-    return getBlock(logDataBlockFormat, recordList, header, HoodieRecord.RECORD_KEY_METADATA_FIELD);
+  public HoodieDataBlock(List<IndexedRecord> records,
+                         Map<HeaderMetadataType, String> header,
+                         Map<HeaderMetadataType, String> footer,
+                         String keyFieldName) {
+    super(header, footer, Option.empty(), Option.empty(), null, false);
+    this.records = Option.of(records);
+    this.keyFieldName = keyFieldName;
+    // If no reader-schema has been provided assume writer-schema as one
+    this.readerSchema = getWriterSchema(super.getLogBlockHeader());
+    this.enablePointLookups = false;
   }
 
   /**
-   * Util method to get a data block for the requested type.
-   *
-   * @param logDataBlockFormat - Data block type
-   * @param recordList         - List of records that goes in the data block
-   * @param header             - data block header
-   * @param keyField           - FieldId to get the key from the records
-   * @return Data block of the requested type.
+   * NOTE: This ctor is used on the write-path (ie when records ought to be written into the log)
    */
-  public static HoodieLogBlock getBlock(HoodieLogBlockType logDataBlockFormat, List<IndexedRecord> recordList,
-                                        Map<HeaderMetadataType, String> header, String keyField) {
-    switch (logDataBlockFormat) {
-      case AVRO_DATA_BLOCK:
-        return new HoodieAvroDataBlock(recordList, header, keyField);
-      case HFILE_DATA_BLOCK:
-        return new HoodieHFileDataBlock(recordList, header, keyField);
-      default:
-        throw new HoodieException("Data block format " + logDataBlockFormat + " not implemented");
-    }
+  protected HoodieDataBlock(Option<byte[]> content,
+                            FSDataInputStream inputStream,
+                            boolean readBlockLazily,
+                            Option<HoodieLogBlockContentLocation> blockContentLocation,
+                            Option<Schema> readerSchema,
+                            Map<HeaderMetadataType, String> headers,
+                            Map<HeaderMetadataType, String> footer,
+                            String keyFieldName,
+                            boolean enablePointLookups) {
+    super(headers, footer, blockContentLocation, content, inputStream, readBlockLazily);
+    this.records = Option.empty();
+    this.keyFieldName = keyFieldName;
+    // If no reader-schema has been provided assume writer-schema as one
+    this.readerSchema = readerSchema.orElseGet(() -> getWriterSchema(super.getLogBlockHeader()));
+    this.enablePointLookups = enablePointLookups;
   }
 
   @Override
   public byte[] getContentBytes() throws IOException {
     // In case this method is called before realizing records from content
-    if (getContent().isPresent()) {
-      return getContent().get();
-    } else if (readBlockLazily && !getContent().isPresent() && records == null) {
-      // read block lazily
-      createRecordsFromContentBytes();
+    Option<byte[]> content = getContent();
+
+    checkState(content.isPresent() || records.isPresent(), "Block is in invalid state");
+
+    if (content.isPresent()) {
+      return content.get();
     }
 
-    return serializeRecords();
+    return serializeRecords(records.get());
   }
 
-  public abstract HoodieLogBlockType getBlockType();
+  protected static Schema getWriterSchema(Map<HeaderMetadataType, String> logBlockHeader) {
+    return new Schema.Parser().parse(logBlockHeader.get(HeaderMetadataType.SCHEMA));
+  }
 
-  public List<IndexedRecord> getRecords() {
-    if (records == null) {
-      try {
-        // in case records are absent, read content lazily and then convert to IndexedRecords
-        createRecordsFromContentBytes();
-      } catch (IOException io) {
-        throw new HoodieIOException("Unable to convert content bytes to records", io);
-      }
+  /**
+   * Returns all the records iterator contained w/in this block.
+   */
+  public final ClosableIterator<IndexedRecord> getRecordItr() {
+    if (records.isPresent()) {
+      return list2Iterator(records.get());
     }
-    return records;
+    try {
+      // in case records are absent, read content lazily and then convert to IndexedRecords
+      return readRecordsFromBlockPayload();
+    } catch (IOException io) {
+      throw new HoodieIOException("Unable to convert content bytes to records", io);
+    }
+  }
+
+  public Schema getSchema() {
+    return readerSchema;
   }
 
   /**
    * Batch get of keys of interest. Implementation can choose to either do full scan and return matched entries or
    * do a seek based parsing and return matched entries.
+   *
    * @param keys keys of interest.
    * @return List of IndexedRecords for the keys of interest.
-   * @throws IOException
+   * @throws IOException in case of failures encountered when reading/parsing records
    */
-  public List<IndexedRecord> getRecords(List<String> keys) throws IOException {
-    throw new UnsupportedOperationException("On demand batch get based on interested keys not supported");
-  }
+  public final ClosableIterator<IndexedRecord> getRecordItr(List<String> keys) throws IOException {
+    boolean fullScan = keys.isEmpty();
+    if (enablePointLookups && !fullScan) {
+      return lookupRecords(keys);
+    }
 
-  public Schema getSchema() {
-    // if getSchema was invoked before converting byte [] to records
-    if (records == null) {
-      getRecords();
+    // Otherwise, we fetch all the records and filter out all the records, but the
+    // ones requested
+    ClosableIterator<IndexedRecord> allRecords = getRecordItr();
+    if (fullScan) {
+      return allRecords;
     }
-    return schema;
+
+    HashSet<String> keySet = new HashSet<>(keys);
+    return FilteringIterator.getInstance(allRecords, keySet, this::getRecordKey);
   }
 
-  protected void createRecordsFromContentBytes() throws IOException {
+  protected ClosableIterator<IndexedRecord> readRecordsFromBlockPayload() throws IOException {
     if (readBlockLazily && !getContent().isPresent()) {
       // read log block contents from disk
       inflate();
     }
 
-    deserializeRecords();
+    try {
+      return deserializeRecords(getContent().get());
+    } finally {
+      // Free up content to be GC'd by deflating the block
+      deflate();
+    }
+  }
+
+  protected ClosableIterator<IndexedRecord> lookupRecords(List<String> keys) throws IOException {
+    throw new UnsupportedOperationException(
+        String.format("Point lookups are not supported by this Data block type (%s)", getBlockType())
+    );
+  }
+
+  protected abstract byte[] serializeRecords(List<IndexedRecord> records) throws IOException;
+
+  protected abstract ClosableIterator<IndexedRecord> deserializeRecords(byte[] content) throws IOException;
+
+  public abstract HoodieLogBlockType getBlockType();
+
+  protected Option<Schema.Field> getKeyField(Schema schema) {
+    return Option.ofNullable(schema.getField(keyFieldName));
+  }
+
+  protected Option<String> getRecordKey(IndexedRecord record) {
+    return getKeyField(record.getSchema())
+        .map(keyField -> record.get(keyField.pos()))
+        .map(Object::toString);
+  }
+
+  /**
+   * Converts the given list to closable iterator.
+   */
+  static <T> ClosableIterator<T> list2Iterator(List<T> list) {
+    Iterator<T> iterator = list.iterator();
+    return new ClosableIterator<T>() {
+      @Override
+      public void close() {
+        // ignored
+      }
+
+      @Override
+      public boolean hasNext() {
+        return iterator.hasNext();
+      }
+
+      @Override
+      public T next() {
+        return iterator.next();
+      }
+    };
   }
 
-  protected abstract byte[] serializeRecords() throws IOException;
+  // -------------------------------------------------------------------------
+  //  Inner Class
+  // -------------------------------------------------------------------------
+
+  /**
+   * A {@link ClosableIterator} that supports filtering strategy with given keys.
+   * User should supply the key extraction function for fetching string format keys.
+   *
+   * @param <T> the element type
+   */
+  private static class FilteringIterator<T extends IndexedRecord> implements ClosableIterator<T> {
+    private final ClosableIterator<T> nested; // nested iterator
+
+    private final Set<String> keys; // the filtering keys
+    private final Function<T, Option<String>> keyExtract; // function to extract the key
+
+    private T next;
+
+    private FilteringIterator(ClosableIterator<T> nested, Set<String> keys, Function<T, Option<String>> keyExtract) {
+      this.nested = nested;
+      this.keys = keys;
+      this.keyExtract = keyExtract;
+    }
+
+    public static <T extends IndexedRecord> FilteringIterator<T> getInstance(
+        ClosableIterator<T> nested,
+        Set<String> keys,
+        Function<T, Option<String>> keyExtract) {
+      return new FilteringIterator<>(nested, keys, keyExtract);
+    }
+
+    @Override
+    public void close() {
+      this.nested.close();
+    }
+
+    @Override
+    public boolean hasNext() {
+      while (this.nested.hasNext()) {
+        this.next = this.nested.next();
+        if (keys.contains(keyExtract.apply(this.next).orElse(null))) {
+          return true;
+        }
+      }
+      return false;
+    }
 
-  protected abstract void deserializeRecords() throws IOException;
+    @Override
+    public T next() {
+      return this.next;
+    }
+  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDeleteBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDeleteBlock.java
index 45534f7b51013..01159ab72dffe 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDeleteBlock.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieDeleteBlock.java
@@ -20,7 +20,6 @@
 
 import org.apache.hudi.common.fs.SizeAwareDataInputStream;
 import org.apache.hudi.common.model.HoodieKey;
-import org.apache.hudi.common.model.HoodieLogFile;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.SerializationUtils;
 import org.apache.hudi.exception.HoodieIOException;
@@ -47,7 +46,7 @@ public HoodieDeleteBlock(HoodieKey[] keysToDelete, Map<HeaderMetadataType, Strin
     this.keysToDelete = keysToDelete;
   }
 
-  private HoodieDeleteBlock(Option<byte[]> content, FSDataInputStream inputStream, boolean readBlockLazily,
+  public HoodieDeleteBlock(Option<byte[]> content, FSDataInputStream inputStream, boolean readBlockLazily,
       Option<HoodieLogBlockContentLocation> blockContentLocation, Map<HeaderMetadataType, String> header,
       Map<HeaderMetadataType, String> footer) {
     super(header, footer, blockContentLocation, content, inputStream, readBlockLazily);
@@ -55,11 +54,12 @@ private HoodieDeleteBlock(Option<byte[]> content, FSDataInputStream inputStream,
 
   @Override
   public byte[] getContentBytes() throws IOException {
+    Option<byte[]> content = getContent();
 
     // In case this method is called before realizing keys from content
-    if (getContent().isPresent()) {
-      return getContent().get();
-    } else if (readBlockLazily && !getContent().isPresent() && keysToDelete == null) {
+    if (content.isPresent()) {
+      return content.get();
+    } else if (readBlockLazily && keysToDelete == null) {
       // read block lazily
       getKeysToDelete();
     }
@@ -100,11 +100,4 @@ public HoodieLogBlockType getBlockType() {
     return HoodieLogBlockType.DELETE_BLOCK;
   }
 
-  public static HoodieLogBlock getBlock(HoodieLogFile logFile, FSDataInputStream inputStream, Option<byte[]> content,
-      boolean readBlockLazily, long position, long blockSize, long blockEndPos, Map<HeaderMetadataType, String> header,
-      Map<HeaderMetadataType, String> footer) throws IOException {
-
-    return new HoodieDeleteBlock(content, inputStream, readBlockLazily,
-        Option.of(new HoodieLogBlockContentLocation(logFile, position, blockSize, blockEndPos)), header, footer);
-  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java
index 7f1fa2aa1d64a..557a0db7cbfad 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java
@@ -18,19 +18,7 @@
 
 package org.apache.hudi.common.table.log.block;
 
-import org.apache.hudi.avro.HoodieAvroUtils;
-import org.apache.hudi.common.fs.inline.InLineFSUtils;
-import org.apache.hudi.common.fs.inline.InLineFileSystem;
-import org.apache.hudi.common.model.HoodieLogFile;
-import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.common.util.ValidationUtils;
-import org.apache.hudi.exception.HoodieIOException;
-import org.apache.hudi.io.storage.HoodieHBaseKVComparator;
-import org.apache.hudi.io.storage.HoodieHFileReader;
-
 import org.apache.avro.Schema;
-import org.apache.avro.Schema.Field;
 import org.apache.avro.generic.IndexedRecord;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
@@ -42,12 +30,20 @@
 import org.apache.hadoop.hbase.io.hfile.HFile;
 import org.apache.hadoop.hbase.io.hfile.HFileContext;
 import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
-import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hudi.avro.HoodieAvroUtils;
+import org.apache.hudi.common.fs.inline.InLineFSUtils;
+import org.apache.hudi.common.fs.inline.InLineFileSystem;
+import org.apache.hudi.common.util.ClosableIterator;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.StringUtils;
+import org.apache.hudi.common.util.ValidationUtils;
+import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hudi.io.storage.HoodieHBaseKVComparator;
+import org.apache.hudi.io.storage.HoodieHFileReader;
+
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
-import javax.annotation.Nonnull;
-
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.util.Collections;
@@ -56,7 +52,8 @@
 import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
-import java.util.stream.Collectors;
+
+import static org.apache.hudi.common.util.ValidationUtils.checkState;
 
 /**
  * HoodieHFileDataBlock contains a list of records stored inside an HFile format. It is used with the HFile
@@ -64,27 +61,28 @@
  */
 public class HoodieHFileDataBlock extends HoodieDataBlock {
   private static final Logger LOG = LogManager.getLogger(HoodieHFileDataBlock.class);
-  private static Compression.Algorithm compressionAlgorithm = Compression.Algorithm.GZ;
-  private static int blockSize = 1 * 1024 * 1024;
-  private boolean enableInlineReading = false;
-
-  public HoodieHFileDataBlock(HoodieLogFile logFile, FSDataInputStream inputStream, Option<byte[]> content,
-                              boolean readBlockLazily, long position, long blockSize, long blockEndpos,
-                              Schema readerSchema, Map<HeaderMetadataType, String> header,
-                              Map<HeaderMetadataType, String> footer, boolean enableInlineReading, String keyField) {
-    super(content, inputStream, readBlockLazily,
-        Option.of(new HoodieLogBlockContentLocation(logFile, position, blockSize, blockEndpos)),
-        readerSchema, header, footer, keyField);
-    this.enableInlineReading = enableInlineReading;
-  }
 
-  public HoodieHFileDataBlock(@Nonnull List<IndexedRecord> records, @Nonnull Map<HeaderMetadataType, String> header,
-                              String keyField) {
-    super(records, header, new HashMap<>(), keyField);
+  private static final int DEFAULT_BLOCK_SIZE = 1024 * 1024;
+
+  private final Option<Compression.Algorithm> compressionAlgorithm;
+
+  public HoodieHFileDataBlock(FSDataInputStream inputStream,
+                              Option<byte[]> content,
+                              boolean readBlockLazily,
+                              HoodieLogBlockContentLocation logBlockContentLocation,
+                              Option<Schema> readerSchema,
+                              Map<HeaderMetadataType, String> header,
+                              Map<HeaderMetadataType, String> footer,
+                              boolean enablePointLookups) {
+    super(content, inputStream, readBlockLazily, Option.of(logBlockContentLocation), readerSchema, header, footer, HoodieHFileReader.KEY_FIELD_NAME, enablePointLookups);
+    this.compressionAlgorithm = Option.empty();
   }
 
-  public HoodieHFileDataBlock(@Nonnull List<IndexedRecord> records, @Nonnull Map<HeaderMetadataType, String> header) {
-    this(records, header, HoodieRecord.RECORD_KEY_METADATA_FIELD);
+  public HoodieHFileDataBlock(List<IndexedRecord> records,
+                              Map<HeaderMetadataType, String> header,
+                              Compression.Algorithm compressionAlgorithm) {
+    super(records, header, new HashMap<>(), HoodieHFileReader.KEY_FIELD_NAME);
+    this.compressionAlgorithm = Option.of(compressionAlgorithm);
   }
 
   @Override
@@ -93,43 +91,45 @@ public HoodieLogBlockType getBlockType() {
   }
 
   @Override
-  protected byte[] serializeRecords() throws IOException {
-    HFileContext context = new HFileContextBuilder().withBlockSize(blockSize).withCompression(compressionAlgorithm)
+  protected byte[] serializeRecords(List<IndexedRecord> records) throws IOException {
+    HFileContext context = new HFileContextBuilder()
+        .withBlockSize(DEFAULT_BLOCK_SIZE)
+        .withCompression(compressionAlgorithm.get())
         .build();
+
     Configuration conf = new Configuration();
     CacheConfig cacheConfig = new CacheConfig(conf);
     ByteArrayOutputStream baos = new ByteArrayOutputStream();
     FSDataOutputStream ostream = new FSDataOutputStream(baos, null);
 
-    HFile.Writer writer = HFile.getWriterFactory(conf, cacheConfig)
-        .withOutputStream(ostream).withFileContext(context).withComparator(new HoodieHBaseKVComparator()).create();
+    // Use simple incrementing counter as a key
+    boolean useIntegerKey = !getRecordKey(records.get(0)).isPresent();
+    // This is set here to avoid re-computing this in the loop
+    int keyWidth = useIntegerKey ? (int) Math.ceil(Math.log(records.size())) + 1 : -1;
 
     // Serialize records into bytes
     Map<String, byte[]> sortedRecordsMap = new TreeMap<>();
     Iterator<IndexedRecord> itr = records.iterator();
-    boolean useIntegerKey = false;
-    int key = 0;
-    int keySize = 0;
-    Field keyField = records.get(0).getSchema().getField(this.keyField);
-    if (keyField == null) {
-      // Missing key metadata field so we should use an integer sequence key
-      useIntegerKey = true;
-      keySize = (int) Math.ceil(Math.log(records.size())) + 1;
-    }
+
+    int id = 0;
     while (itr.hasNext()) {
       IndexedRecord record = itr.next();
       String recordKey;
       if (useIntegerKey) {
-        recordKey = String.format("%" + keySize + "s", key++);
+        recordKey = String.format("%" + keyWidth + "s", id++);
       } else {
-        recordKey = record.get(keyField.pos()).toString();
+        recordKey = getRecordKey(record).get();
       }
-      byte[] recordBytes = HoodieAvroUtils.indexedRecordToBytes(record);
+
+      final byte[] recordBytes = serializeRecord(record);
       ValidationUtils.checkState(!sortedRecordsMap.containsKey(recordKey),
           "Writing multiple records with same key not supported for " + this.getClass().getName());
       sortedRecordsMap.put(recordKey, recordBytes);
     }
 
+    HFile.Writer writer = HFile.getWriterFactory(conf, cacheConfig)
+        .withOutputStream(ostream).withFileContext(context).withComparator(new HoodieHBaseKVComparator()).create();
+
     // Write the records
     sortedRecordsMap.forEach((recordKey, recordBytes) -> {
       try {
@@ -148,65 +148,83 @@ protected byte[] serializeRecords() throws IOException {
   }
 
   @Override
-  protected void createRecordsFromContentBytes() throws IOException {
-    if (enableInlineReading) {
-      getRecords(Collections.emptyList());
-    } else {
-      super.createRecordsFromContentBytes();
-    }
-  }
-
-  @Override
-  public List<IndexedRecord> getRecords(List<String> keys) throws IOException {
-    readWithInlineFS(keys);
-    return records;
-  }
+  protected ClosableIterator<IndexedRecord> deserializeRecords(byte[] content) throws IOException {
+    checkState(readerSchema != null, "Reader's schema has to be non-null");
 
-  private void readWithInlineFS(List<String> keys) throws IOException {
-    boolean enableFullScan = keys.isEmpty();
     // Get schema from the header
     Schema writerSchema = new Schema.Parser().parse(super.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));
-    // If readerSchema was not present, use writerSchema
-    if (schema == null) {
-      schema = writerSchema;
-    }
-    Configuration conf = new Configuration();
-    CacheConfig cacheConf = new CacheConfig(conf);
-    Configuration inlineConf = new Configuration();
-    inlineConf.set("fs." + InLineFileSystem.SCHEME + ".impl", InLineFileSystem.class.getName());
 
-    Path inlinePath = InLineFSUtils.getInlineFilePath(
-        getBlockContentLocation().get().getLogFile().getPath(),
-        getBlockContentLocation().get().getLogFile().getPath().getFileSystem(conf).getScheme(),
-        getBlockContentLocation().get().getContentPositionInLogFile(),
-        getBlockContentLocation().get().getBlockSize());
-    if (!enableFullScan) {
-      // HFile read will be efficient if keys are sorted, since on storage, records are sorted by key. This will avoid unnecessary seeks.
-      Collections.sort(keys);
-    }
-    HoodieHFileReader reader = new HoodieHFileReader(inlineConf, inlinePath, cacheConf, inlinePath.getFileSystem(inlineConf));
-    List<org.apache.hadoop.hbase.util.Pair<String, IndexedRecord>> logRecords = enableFullScan ? reader.readAllRecords(writerSchema, schema) :
-        reader.readRecords(keys, schema);
-    reader.close();
-    this.records = logRecords.stream().map(t -> t.getSecond()).collect(Collectors.toList());
+    // Read the content
+    HoodieHFileReader<IndexedRecord> reader = new HoodieHFileReader<>(content);
+    // Sets up the writer schema
+    reader.withSchema(writerSchema);
+    Iterator<IndexedRecord> recordIterator = reader.getRecordIterator(readerSchema);
+    return new ClosableIterator<IndexedRecord>() {
+      @Override
+      public void close() {
+        reader.close();
+      }
+
+      @Override
+      public boolean hasNext() {
+        return recordIterator.hasNext();
+      }
+
+      @Override
+      public IndexedRecord next() {
+        return recordIterator.next();
+      }
+    };
   }
 
+  // TODO abstract this w/in HoodieDataBlock
   @Override
-  protected void deserializeRecords() throws IOException {
-    // Get schema from the header
-    Schema writerSchema = new Schema.Parser().parse(super.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));
+  protected ClosableIterator<IndexedRecord> lookupRecords(List<String> keys) throws IOException {
+    HoodieLogBlockContentLocation blockContentLoc = getBlockContentLocation().get();
 
-    // If readerSchema was not present, use writerSchema
-    if (schema == null) {
-      schema = writerSchema;
-    }
+    // NOTE: It's important to extend Hadoop configuration here to make sure configuration
+    //       is appropriately carried over
+    Configuration inlineConf = new Configuration(blockContentLoc.getHadoopConf());
+    inlineConf.set("fs." + InLineFileSystem.SCHEME + ".impl", InLineFileSystem.class.getName());
 
-    // Read the content
-    HoodieHFileReader reader = new HoodieHFileReader<>(getContent().get());
-    List<Pair<String, IndexedRecord>> records = reader.readAllRecords(writerSchema, schema);
-    this.records = records.stream().map(t -> t.getSecond()).collect(Collectors.toList());
+    Path inlinePath = InLineFSUtils.getInlineFilePath(
+        blockContentLoc.getLogFile().getPath(),
+        blockContentLoc.getLogFile().getPath().getFileSystem(inlineConf).getScheme(),
+        blockContentLoc.getContentPositionInLogFile(),
+        blockContentLoc.getBlockSize());
+
+    // HFile read will be efficient if keys are sorted, since on storage, records are sorted by key. This will avoid unnecessary seeks.
+    Collections.sort(keys);
+
+    final HoodieHFileReader<IndexedRecord> reader =
+             new HoodieHFileReader<>(inlineConf, inlinePath, new CacheConfig(inlineConf), inlinePath.getFileSystem(inlineConf));
+    // Get writer's schema from the header
+    final ClosableIterator<IndexedRecord> recordIterator = reader.getRecordIterator(keys, readerSchema);
+    return new ClosableIterator<IndexedRecord>() {
+      @Override
+      public boolean hasNext() {
+        return recordIterator.hasNext();
+      }
+
+      @Override
+      public IndexedRecord next() {
+        return recordIterator.next();
+      }
 
-    // Free up content to be GC'd, deflate
-    deflate();
+      @Override
+      public void close() {
+        recordIterator.close();
+        reader.close();
+      }
+    };
+  }
+
+  private byte[] serializeRecord(IndexedRecord record) {
+    Option<Schema.Field> keyField = getKeyField(record.getSchema());
+    // Reset key value w/in the record to avoid duplicating the key w/in payload
+    if (keyField.isPresent()) {
+      record.put(keyField.get().pos(), StringUtils.EMPTY_STRING);
+    }
+    return HoodieAvroUtils.indexedRecordToBytes(record);
   }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieLogBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieLogBlock.java
index 2fbcd992087e2..d514f28ce1c4a 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieLogBlock.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieLogBlock.java
@@ -18,15 +18,18 @@
 
 package org.apache.hudi.common.table.log.block;
 
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hudi.common.model.HoodieLogFile;
 import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.TypeUtils;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.HoodieIOException;
 
 import org.apache.hadoop.fs.FSDataInputStream;
 
 import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
 
 import java.io.ByteArrayOutputStream;
 import java.io.DataInputStream;
@@ -36,6 +39,8 @@
 import java.util.HashMap;
 import java.util.Map;
 
+import static org.apache.hudi.common.util.ValidationUtils.checkState;
+
 /**
  * Abstract class defining a block in HoodieLogFile.
  */
@@ -58,14 +63,17 @@ public abstract class HoodieLogBlock {
   // TODO : change this to just InputStream so this works for any FileSystem
   // create handlers to return specific type of inputstream based on FS
   // input stream corresponding to the log file where this logBlock belongs
-  protected FSDataInputStream inputStream;
+  private final FSDataInputStream inputStream;
   // Toggle flag, whether to read blocks lazily (I/O intensive) or not (Memory intensive)
   protected boolean readBlockLazily;
 
-  public HoodieLogBlock(@Nonnull Map<HeaderMetadataType, String> logBlockHeader,
+  public HoodieLogBlock(
+      @Nonnull Map<HeaderMetadataType, String> logBlockHeader,
       @Nonnull Map<HeaderMetadataType, String> logBlockFooter,
-      @Nonnull Option<HoodieLogBlockContentLocation> blockContentLocation, @Nonnull Option<byte[]> content,
-      FSDataInputStream inputStream, boolean readBlockLazily) {
+      @Nonnull Option<HoodieLogBlockContentLocation> blockContentLocation,
+      @Nonnull Option<byte[]> content,
+      @Nullable FSDataInputStream inputStream,
+      boolean readBlockLazily) {
     this.logBlockHeader = logBlockHeader;
     this.logBlockFooter = logBlockFooter;
     this.blockContentLocation = blockContentLocation;
@@ -109,7 +117,25 @@ public Option<byte[]> getContent() {
    * Type of the log block WARNING: This enum is serialized as the ordinal. Only add new enums at the end.
    */
   public enum HoodieLogBlockType {
-    COMMAND_BLOCK, DELETE_BLOCK, CORRUPT_BLOCK, AVRO_DATA_BLOCK, HFILE_DATA_BLOCK
+    COMMAND_BLOCK(":command"),
+    DELETE_BLOCK(":delete"),
+    CORRUPT_BLOCK(":corrupted"),
+    AVRO_DATA_BLOCK("avro"),
+    HFILE_DATA_BLOCK("hfile"),
+    PARQUET_DATA_BLOCK("parquet");
+
+    private static final Map<String, HoodieLogBlockType> ID_TO_ENUM_MAP =
+        TypeUtils.getValueToEnumMap(HoodieLogBlockType.class, e -> e.id);
+
+    private final String id;
+
+    HoodieLogBlockType(String id) {
+      this.id = id;
+    }
+
+    public static HoodieLogBlockType fromId(String id) {
+      return ID_TO_ENUM_MAP.get(id);
+    }
   }
 
   /**
@@ -132,7 +158,8 @@ public enum FooterMetadataType {
    * intensive CompactedScanner, the location helps to lazily read contents from the log file
    */
   public static final class HoodieLogBlockContentLocation {
-
+    // Hadoop Config required to access the file
+    private final Configuration hadoopConf;
     // The logFile that contains this block
     private final HoodieLogFile logFile;
     // The filePosition in the logFile for the contents of this block
@@ -142,14 +169,22 @@ public static final class HoodieLogBlockContentLocation {
     // The final position where the complete block ends
     private final long blockEndPos;
 
-    HoodieLogBlockContentLocation(HoodieLogFile logFile, long contentPositionInLogFile, long blockSize,
-        long blockEndPos) {
+    public HoodieLogBlockContentLocation(Configuration hadoopConf,
+                                         HoodieLogFile logFile,
+                                         long contentPositionInLogFile,
+                                         long blockSize,
+                                         long blockEndPos) {
+      this.hadoopConf = hadoopConf;
       this.logFile = logFile;
       this.contentPositionInLogFile = contentPositionInLogFile;
       this.blockSize = blockSize;
       this.blockEndPos = blockEndPos;
     }
 
+    public Configuration getHadoopConf() {
+      return hadoopConf;
+    }
+
     public HoodieLogFile getLogFile() {
       return logFile;
     }
@@ -210,24 +245,27 @@ public static Map<HeaderMetadataType, String> getLogMetadata(DataInputStream dis
    * Read or Skip block content of a log block in the log file. Depends on lazy reading enabled in
    * {@link HoodieMergedLogRecordScanner}
    */
-  public static byte[] readOrSkipContent(FSDataInputStream inputStream, Integer contentLength, boolean readBlockLazily)
+  public static Option<byte[]> tryReadContent(FSDataInputStream inputStream, Integer contentLength, boolean readLazily)
       throws IOException {
-    byte[] content = null;
-    if (!readBlockLazily) {
-      // Read the contents in memory
-      content = new byte[contentLength];
-      inputStream.readFully(content, 0, contentLength);
-    } else {
+    if (readLazily) {
       // Seek to the end of the content block
       inputStream.seek(inputStream.getPos() + contentLength);
+      return Option.empty();
     }
-    return content;
+
+    // TODO re-use buffer if stream is backed by buffer
+    // Read the contents in memory
+    byte[] content = new byte[contentLength];
+    inputStream.readFully(content, 0, contentLength);
+    return Option.of(content);
   }
 
   /**
    * When lazyReading of blocks is turned on, inflate the content of a log block from disk.
    */
   protected void inflate() throws HoodieIOException {
+    checkState(!content.isPresent(), "Block has already been inflated");
+    checkState(inputStream != null, "Block should have input-stream provided");
 
     try {
       content = Option.of(new byte[(int) this.getBlockContentLocation().get().getBlockSize()]);
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieParquetDataBlock.java b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieParquetDataBlock.java
new file mode 100644
index 0000000000000..5e7bef90a08ba
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieParquetDataBlock.java
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.table.log.block;
+
+import org.apache.hudi.avro.HoodieAvroWriteSupport;
+import org.apache.hudi.common.fs.inline.InLineFSUtils;
+import org.apache.hudi.common.fs.inline.InLineFileSystem;
+import org.apache.hudi.common.util.ClosableIterator;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.ParquetReaderIterator;
+import org.apache.hudi.io.storage.HoodieAvroParquetConfig;
+import org.apache.hudi.io.storage.HoodieParquetStreamWriter;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.Path;
+import org.apache.parquet.avro.AvroParquetReader;
+import org.apache.parquet.avro.AvroReadSupport;
+import org.apache.parquet.avro.AvroSchemaConverter;
+import org.apache.parquet.hadoop.ParquetReader;
+import org.apache.parquet.hadoop.ParquetWriter;
+import org.apache.parquet.hadoop.metadata.CompressionCodecName;
+import org.apache.parquet.hadoop.util.HadoopInputFile;
+import org.apache.parquet.io.InputFile;
+
+import javax.annotation.Nonnull;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * HoodieParquetDataBlock contains a list of records serialized using Parquet.
+ */
+public class HoodieParquetDataBlock extends HoodieDataBlock {
+
+  private final Option<CompressionCodecName> compressionCodecName;
+
+  public HoodieParquetDataBlock(FSDataInputStream inputStream,
+                                Option<byte[]> content,
+                                boolean readBlockLazily,
+                                HoodieLogBlockContentLocation logBlockContentLocation,
+                                Option<Schema> readerSchema,
+                                Map<HeaderMetadataType, String> header,
+                                Map<HeaderMetadataType, String> footer,
+                                String keyField) {
+    super(content, inputStream, readBlockLazily, Option.of(logBlockContentLocation), readerSchema, header, footer, keyField, false);
+
+    this.compressionCodecName = Option.empty();
+  }
+
+  public HoodieParquetDataBlock(
+      @Nonnull List<IndexedRecord> records,
+      @Nonnull Map<HeaderMetadataType, String> header,
+      @Nonnull String keyField,
+      @Nonnull CompressionCodecName compressionCodecName
+  ) {
+    super(records, header, new HashMap<>(), keyField);
+
+    this.compressionCodecName = Option.of(compressionCodecName);
+  }
+
+  @Override
+  public HoodieLogBlockType getBlockType() {
+    return HoodieLogBlockType.PARQUET_DATA_BLOCK;
+  }
+
+  @Override
+  protected byte[] serializeRecords(List<IndexedRecord> records) throws IOException {
+    if (records.size() == 0) {
+      return new byte[0];
+    }
+
+    Schema writerSchema = new Schema.Parser().parse(super.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));
+
+    HoodieAvroWriteSupport writeSupport = new HoodieAvroWriteSupport(
+        new AvroSchemaConverter().convert(writerSchema), writerSchema, Option.empty());
+
+    HoodieAvroParquetConfig avroParquetConfig =
+        new HoodieAvroParquetConfig(
+            writeSupport,
+            compressionCodecName.get(),
+            ParquetWriter.DEFAULT_BLOCK_SIZE,
+            ParquetWriter.DEFAULT_PAGE_SIZE,
+            1024 * 1024 * 1024,
+            new Configuration(),
+            Double.parseDouble(String.valueOf(0.1)));//HoodieStorageConfig.PARQUET_COMPRESSION_RATIO.defaultValue()));
+
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+
+    try (FSDataOutputStream outputStream = new FSDataOutputStream(baos)) {
+      try (HoodieParquetStreamWriter<IndexedRecord> parquetWriter = new HoodieParquetStreamWriter<>(outputStream, avroParquetConfig)) {
+        for (IndexedRecord record : records) {
+          String recordKey = getRecordKey(record).orElse(null);
+          parquetWriter.writeAvro(recordKey, record);
+        }
+        outputStream.flush();
+      }
+    }
+
+    return baos.toByteArray();
+  }
+
+  public static ClosableIterator<IndexedRecord> getProjectedParquetRecordsIterator(Configuration conf,
+                                                                                   Schema readerSchema,
+                                                                                   InputFile inputFile) throws IOException {
+    AvroReadSupport.setAvroReadSchema(conf, readerSchema);
+    AvroReadSupport.setRequestedProjection(conf, readerSchema);
+
+    ParquetReader<IndexedRecord> reader =
+        AvroParquetReader.<IndexedRecord>builder(inputFile).withConf(conf).build();
+    return new ParquetReaderIterator<>(reader);
+  }
+
+  /**
+   * NOTE: We're overriding the whole reading sequence to make sure we properly respect
+   *       the requested Reader's schema and only fetch the columns that have been explicitly
+   *       requested by the caller (providing projected Reader's schema)
+   */
+  @Override
+  protected ClosableIterator<IndexedRecord> readRecordsFromBlockPayload() throws IOException {
+    HoodieLogBlockContentLocation blockContentLoc = getBlockContentLocation().get();
+
+    // NOTE: It's important to extend Hadoop configuration here to make sure configuration
+    //       is appropriately carried over
+    Configuration inlineConf = new Configuration(blockContentLoc.getHadoopConf());
+    inlineConf.set("fs." + InLineFileSystem.SCHEME + ".impl", InLineFileSystem.class.getName());
+
+    Path inlineLogFilePath = InLineFSUtils.getInlineFilePath(
+        blockContentLoc.getLogFile().getPath(),
+        blockContentLoc.getLogFile().getPath().getFileSystem(inlineConf).getScheme(),
+        blockContentLoc.getContentPositionInLogFile(),
+        blockContentLoc.getBlockSize());
+
+    return getProjectedParquetRecordsIterator(
+        inlineConf,
+        readerSchema,
+        HadoopInputFile.fromPath(inlineLogFilePath, inlineConf));
+  }
+
+  @Override
+  protected ClosableIterator<IndexedRecord> deserializeRecords(byte[] content) {
+    throw new UnsupportedOperationException("Should not be invoked");
+  }
+}
\ No newline at end of file
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieActiveTimeline.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieActiveTimeline.java
index c7473bd7d59d5..36dd5368d4a63 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieActiveTimeline.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieActiveTimeline.java
@@ -24,6 +24,7 @@
 import org.apache.hudi.common.table.timeline.HoodieInstant.State;
 import org.apache.hudi.common.util.FileIOUtils;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.exception.HoodieIOException;
@@ -70,7 +71,7 @@ public class HoodieActiveTimeline extends HoodieDefaultTimeline {
       SAVEPOINT_EXTENSION, INFLIGHT_SAVEPOINT_EXTENSION,
       CLEAN_EXTENSION, REQUESTED_CLEAN_EXTENSION, INFLIGHT_CLEAN_EXTENSION,
       INFLIGHT_COMPACTION_EXTENSION, REQUESTED_COMPACTION_EXTENSION,
-      INFLIGHT_RESTORE_EXTENSION, RESTORE_EXTENSION,
+      REQUESTED_RESTORE_EXTENSION, INFLIGHT_RESTORE_EXTENSION, RESTORE_EXTENSION,
       ROLLBACK_EXTENSION, REQUESTED_ROLLBACK_EXTENSION, INFLIGHT_ROLLBACK_EXTENSION,
       REQUESTED_REPLACE_COMMIT_EXTENSION, INFLIGHT_REPLACE_COMMIT_EXTENSION, REPLACE_COMMIT_EXTENSION));
   private static final Logger LOG = LogManager.getLogger(HoodieActiveTimeline.class);
@@ -259,6 +260,26 @@ public Option<byte[]> getInstantDetails(HoodieInstant instant) {
     return readDataFromPath(detailPath);
   }
 
+  /**
+   * Get the last instant with valid schema, and convert this to HoodieCommitMetadata
+   */
+  public Option<Pair<HoodieInstant, HoodieCommitMetadata>> getLastCommitMetadataWithValidSchema() {
+    List<HoodieInstant> completed = getCommitsTimeline().filterCompletedInstants().getInstants()
+        .sorted(Comparator.comparing(HoodieInstant::getTimestamp).reversed()).collect(Collectors.toList());
+    for (HoodieInstant instant : completed) {
+      try {
+        HoodieCommitMetadata commitMetadata = HoodieCommitMetadata.fromBytes(
+            getInstantDetails(instant).get(), HoodieCommitMetadata.class);
+        if (!StringUtils.isNullOrEmpty(commitMetadata.getMetadata(HoodieCommitMetadata.SCHEMA_KEY))) {
+          return Option.of(Pair.of(instant, commitMetadata));
+        }
+      } catch (IOException e) {
+        LOG.warn("Failed to convert instant to HoodieCommitMetadata: " + instant.toString());
+      }
+    }
+    return Option.empty();
+  }
+
   /**
    * Get the last instant with valid data, and convert this to HoodieCommitMetadata
    */
@@ -289,6 +310,11 @@ public Option<byte[]> readRollbackInfoAsBytes(HoodieInstant instant) {
     return readDataFromPath(new Path(metaClient.getMetaPath(), instant.getFileName()));
   }
 
+  public Option<byte[]> readRestoreInfoAsBytes(HoodieInstant instant) {
+    // Rollback metadata are always stored only in timeline .hoodie
+    return readDataFromPath(new Path(metaClient.getMetaPath(), instant.getFileName()));
+  }
+
   //-----------------------------------------------------------------
   //      BEGIN - COMPACTION RELATED META-DATA MANAGEMENT.
   //-----------------------------------------------------------------
@@ -429,6 +455,21 @@ public HoodieInstant transitionRollbackRequestedToInflight(HoodieInstant request
     return inflight;
   }
 
+  /**
+   * Transition Restore State from requested to inflight.
+   *
+   * @param requestedInstant requested instant
+   * @return commit instant
+   */
+  public HoodieInstant transitionRestoreRequestedToInflight(HoodieInstant requestedInstant) {
+    ValidationUtils.checkArgument(requestedInstant.getAction().equals(HoodieTimeline.RESTORE_ACTION), "Transition to inflight requested for a restore instant with diff action "
+        + requestedInstant.toString());
+    ValidationUtils.checkArgument(requestedInstant.isRequested(), "Transition to inflight requested for an instant not in requested state " + requestedInstant.toString());
+    HoodieInstant inflight = new HoodieInstant(State.INFLIGHT, RESTORE_ACTION, requestedInstant.getTimestamp());
+    transitionState(requestedInstant, inflight, Option.empty());
+    return inflight;
+  }
+
   /**
    * Transition replace requested file to replace inflight.
    *
@@ -599,6 +640,13 @@ public void saveToRollbackRequested(HoodieInstant instant, Option<byte[]> conten
     createFileInMetaPath(instant.getFileName(), content, false);
   }
 
+  public void saveToRestoreRequested(HoodieInstant instant, Option<byte[]> content) {
+    ValidationUtils.checkArgument(instant.getAction().equals(HoodieTimeline.RESTORE_ACTION));
+    ValidationUtils.checkArgument(instant.getState().equals(State.REQUESTED));
+    // Plan is stored in meta path
+    createFileInMetaPath(instant.getFileName(), content, false);
+  }
+
   private void createFileInMetaPath(String filename, Option<byte[]> content, boolean allowOverwrite) {
     Path fullPath = new Path(metaClient.getMetaPath(), filename);
     if (allowOverwrite || metaClient.getTimelineLayoutVersion().isNullVersion()) {
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieArchivedTimeline.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieArchivedTimeline.java
index 5ad3fa7a9f215..ddfe22ac9e02e 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieArchivedTimeline.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieArchivedTimeline.java
@@ -27,6 +27,7 @@
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.log.HoodieLogFormat;
 import org.apache.hudi.common.table.log.block.HoodieAvroDataBlock;
+import org.apache.hudi.common.util.ClosableIterator;
 import org.apache.hudi.common.util.CollectionUtils;
 import org.apache.hudi.common.util.FileIOUtils;
 import org.apache.hudi.common.util.Option;
@@ -54,10 +55,12 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.Spliterator;
+import java.util.Spliterators;
 import java.util.function.Function;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
-import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
 
 /**
  * Represents the Archived Timeline for the Hoodie table. Instants for the last 12 hours (configurable) is in the
@@ -79,13 +82,13 @@ public class HoodieArchivedTimeline extends HoodieDefaultTimeline {
   private static final String ACTION_TYPE_KEY = "actionType";
   private static final String ACTION_STATE = "actionState";
   private HoodieTableMetaClient metaClient;
-  private Map<String, byte[]> readCommits = new HashMap<>();
+  private final Map<String, byte[]> readCommits = new HashMap<>();
 
   private static final Logger LOG = LogManager.getLogger(HoodieArchivedTimeline.class);
 
   /**
-   * Loads instants between (startTs, endTs].
-   * Note that there is no lazy loading, so this may not work if really long time range (endTs-startTs) is specified.
+   * Loads all the archived instants.
+   * Note that there is no lazy loading, so this may not work if the archived timeline range is really long.
    * TBD: Should we enforce maximum time range?
    */
   public HoodieArchivedTimeline(HoodieTableMetaClient metaClient) {
@@ -96,6 +99,19 @@ public HoodieArchivedTimeline(HoodieTableMetaClient metaClient) {
     this.details = (Function<HoodieInstant, Option<byte[]>> & Serializable) this::getInstantDetails;
   }
 
+  /**
+   * Loads completed instants from startTs(inclusive).
+   * Note that there is no lazy loading, so this may not work if really early startTs is specified.
+   */
+  public HoodieArchivedTimeline(HoodieTableMetaClient metaClient, String startTs) {
+    this.metaClient = metaClient;
+    setInstants(loadInstants(new StartTsFilter(startTs), true,
+        record -> HoodieInstant.State.COMPLETED.toString().equals(record.get(ACTION_STATE).toString())));
+    // multiple casts will make this lambda serializable -
+    // http://docs.oracle.com/javase/specs/jls/se8/html/jls-15.html#jls-15.16
+    this.details = (Function<HoodieInstant, Option<byte[]>> & Serializable) this::getInstantDetails;
+  }
+
   /**
    * For serialization and de-serialization only.
    *
@@ -235,15 +251,14 @@ private List<HoodieInstant> loadInstants(TimeRangeFilter filter, boolean loadIns
             HoodieAvroDataBlock blk = (HoodieAvroDataBlock) reader.next();
             // TODO If we can store additional metadata in datablock, we can skip parsing records
             // (such as startTime, endTime of records in the block)
-            List<IndexedRecord> records = blk.getRecords();
-            // Filter blocks in desired time window
-            instantsInRange.addAll(
-                records.stream()
-                    .filter(r -> commitsFilter.apply((GenericRecord) r))
-                    .map(r -> readCommit((GenericRecord) r, loadInstantDetails))
-                    .filter(c -> filter == null || filter.isInRange(c))
-                    .collect(Collectors.toList())
-            );
+            try (ClosableIterator<IndexedRecord> itr = blk.getRecordItr()) {
+              StreamSupport.stream(Spliterators.spliteratorUnknownSize(itr, Spliterator.IMMUTABLE), true)
+                  // Filter blocks in desired time window
+                  .filter(r -> commitsFilter.apply((GenericRecord) r))
+                  .map(r -> readCommit((GenericRecord) r, loadInstantDetails))
+                  .filter(c -> filter == null || filter.isInRange(c))
+                  .forEach(instantsInRange::add);
+            }
           }
 
           if (filter != null) {
@@ -300,6 +315,19 @@ public boolean isInRange(HoodieInstant instant) {
     }
   }
 
+  private static class StartTsFilter extends TimeRangeFilter {
+    private final String startTs;
+
+    public StartTsFilter(String startTs) {
+      super(startTs, null); // endTs is never used
+      this.startTs = startTs;
+    }
+
+    public boolean isInRange(HoodieInstant instant) {
+      return HoodieTimeline.compareTimestamps(instant.getTimestamp(), GREATER_THAN_OR_EQUALS, startTs);
+    }
+  }
+
   /**
    * Sort files by reverse order of version suffix in file name.
    */
@@ -330,7 +358,7 @@ public HoodieDefaultTimeline getWriteTimeline() {
     // filter in-memory instants
     Set<String> validActions = CollectionUtils.createSet(COMMIT_ACTION, DELTA_COMMIT_ACTION, COMPACTION_ACTION, REPLACE_COMMIT_ACTION);
     return new HoodieDefaultTimeline(getInstants().filter(i ->
-        readCommits.keySet().contains(i.getTimestamp()))
+        readCommits.containsKey(i.getTimestamp()))
         .filter(s -> validActions.contains(s.getAction())), details);
   }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieInstant.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieInstant.java
index a8df62c6496ae..9cd0883126495 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieInstant.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieInstant.java
@@ -166,6 +166,7 @@ public String getFileName() {
       }
     } else if (HoodieTimeline.RESTORE_ACTION.equals(action)) {
       return isInflight() ? HoodieTimeline.makeInflightRestoreFileName(timestamp)
+          : isRequested() ? HoodieTimeline.makeRequestedRestoreFileName(timestamp)
           : HoodieTimeline.makeRestoreFileName(timestamp);
     } else if (HoodieTimeline.REPLACE_COMMIT_ACTION.equals(action)) {
       return isInflight() ? HoodieTimeline.makeInflightReplaceFileName(timestamp)
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieTimeline.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieTimeline.java
index 6ea44a83007d1..25b9c2ec6f2e4 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieTimeline.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieTimeline.java
@@ -78,6 +78,7 @@ public interface HoodieTimeline extends Serializable {
   String REQUESTED_COMPACTION_SUFFIX = StringUtils.join(COMPACTION_ACTION, REQUESTED_EXTENSION);
   String REQUESTED_COMPACTION_EXTENSION = StringUtils.join(".", REQUESTED_COMPACTION_SUFFIX);
   String INFLIGHT_COMPACTION_EXTENSION = StringUtils.join(".", COMPACTION_ACTION, INFLIGHT_EXTENSION);
+  String REQUESTED_RESTORE_EXTENSION = "." + RESTORE_ACTION + REQUESTED_EXTENSION;
   String INFLIGHT_RESTORE_EXTENSION = "." + RESTORE_ACTION + INFLIGHT_EXTENSION;
   String RESTORE_EXTENSION = "." + RESTORE_ACTION;
   String INFLIGHT_REPLACE_COMMIT_EXTENSION = "." + REPLACE_COMMIT_ACTION + INFLIGHT_EXTENSION;
@@ -386,6 +387,10 @@ static String makeRequestedRollbackFileName(String instant) {
     return StringUtils.join(instant, HoodieTimeline.REQUESTED_ROLLBACK_EXTENSION);
   }
 
+  static String makeRequestedRestoreFileName(String instant) {
+    return StringUtils.join(instant, HoodieTimeline.REQUESTED_RESTORE_EXTENSION);
+  }
+
   static String makeInflightRollbackFileName(String instant) {
     return StringUtils.join(instant, HoodieTimeline.INFLIGHT_ROLLBACK_EXTENSION);
   }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/TimelineMetadataUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/TimelineMetadataUtils.java
index 32e42ee58ac27..70a23f1b4c0fb 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/TimelineMetadataUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/timeline/TimelineMetadataUtils.java
@@ -26,6 +26,7 @@
 import org.apache.hudi.avro.model.HoodieReplaceCommitMetadata;
 import org.apache.hudi.avro.model.HoodieRequestedReplaceMetadata;
 import org.apache.hudi.avro.model.HoodieRestoreMetadata;
+import org.apache.hudi.avro.model.HoodieRestorePlan;
 import org.apache.hudi.avro.model.HoodieRollbackMetadata;
 import org.apache.hudi.avro.model.HoodieRollbackPartitionMetadata;
 import org.apache.hudi.avro.model.HoodieRollbackPlan;
@@ -77,10 +78,8 @@ public static HoodieRollbackMetadata convertRollbackMetadata(String startRollbac
     for (HoodieRollbackStat stat : rollbackStats) {
       Map<String, Long> rollbackLogFiles = stat.getCommandBlocksCount().keySet().stream()
           .collect(Collectors.toMap(f -> f.getPath().toString(), FileStatus::getLen));
-      Map<String, Long> probableLogFiles = stat.getWrittenLogFileSizeMap().keySet().stream()
-          .collect(Collectors.toMap(f -> f.getPath().toString(), FileStatus::getLen));
       HoodieRollbackPartitionMetadata metadata = new HoodieRollbackPartitionMetadata(stat.getPartitionPath(),
-          stat.getSuccessDeleteFiles(), stat.getFailedDeleteFiles(), rollbackLogFiles, probableLogFiles);
+          stat.getSuccessDeleteFiles(), stat.getFailedDeleteFiles(), rollbackLogFiles);
       partitionMetadataBuilder.put(stat.getPartitionPath(), metadata);
       totalDeleted += stat.getSuccessDeleteFiles().size();
     }
@@ -114,6 +113,10 @@ public static Option<byte[]> serializeRollbackPlan(HoodieRollbackPlan rollbackPl
     return serializeAvroMetadata(rollbackPlan, HoodieRollbackPlan.class);
   }
 
+  public static Option<byte[]> serializeRestorePlan(HoodieRestorePlan restorePlan) throws IOException {
+    return serializeAvroMetadata(restorePlan, HoodieRestorePlan.class);
+  }
+
   public static Option<byte[]> serializeCleanMetadata(HoodieCleanMetadata metadata) throws IOException {
     return serializeAvroMetadata(metadata, HoodieCleanMetadata.class);
   }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/view/AbstractTableFileSystemView.java b/hudi-common/src/main/java/org/apache/hudi/common/table/view/AbstractTableFileSystemView.java
index 92e6171b68327..208d7ef2ba456 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/view/AbstractTableFileSystemView.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/view/AbstractTableFileSystemView.java
@@ -380,6 +380,19 @@ protected boolean isBaseFileDueToPendingCompaction(HoodieBaseFile baseFile) {
         && baseFile.getCommitTime().equals(compactionWithInstantTime.get().getKey());
   }
 
+  /**
+   * With async clustering, it is possible to see partial/complete base-files due to inflight-clustering, Ignore those
+   * base-files.
+   *
+   * @param baseFile base File
+   */
+  protected boolean isBaseFileDueToPendingClustering(HoodieBaseFile baseFile) {
+    List<String> pendingReplaceInstants =
+        metaClient.getActiveTimeline().filterPendingReplaceTimeline().getInstants().map(HoodieInstant::getTimestamp).collect(Collectors.toList());
+
+    return !pendingReplaceInstants.isEmpty() && pendingReplaceInstants.contains(baseFile.getCommitTime());
+  }
+
   /**
    * Returns true if the file-group is under pending-compaction and the file-slice' baseInstant matches compaction
    * Instant.
@@ -401,7 +414,7 @@ protected boolean isFileSliceAfterPendingCompaction(FileSlice fileSlice) {
    */
   protected FileSlice filterBaseFileAfterPendingCompaction(FileSlice fileSlice) {
     if (isFileSliceAfterPendingCompaction(fileSlice)) {
-      LOG.info("File Slice (" + fileSlice + ") is in pending compaction");
+      LOG.debug("File Slice (" + fileSlice + ") is in pending compaction");
       // Base file is filtered out of the file-slice as the corresponding compaction
       // instant not completed yet.
       FileSlice transformed =
@@ -492,7 +505,7 @@ public final Stream<HoodieBaseFile> getLatestBaseFilesBeforeOrOn(String partitio
           .map(fileGroup -> Option.fromJavaOptional(fileGroup.getAllBaseFiles()
               .filter(baseFile -> HoodieTimeline.compareTimestamps(baseFile.getCommitTime(), HoodieTimeline.LESSER_THAN_OR_EQUALS, maxCommitTime
               ))
-              .filter(df -> !isBaseFileDueToPendingCompaction(df)).findFirst()))
+              .filter(df -> !isBaseFileDueToPendingCompaction(df) && !isBaseFileDueToPendingClustering(df)).findFirst()))
           .filter(Option::isPresent).map(Option::get)
           .map(df -> addBootstrapBaseFileIfPresent(new HoodieFileGroupId(partitionPath, df.getFileId()), df));
     } finally {
@@ -511,7 +524,7 @@ public final Option<HoodieBaseFile> getBaseFileOn(String partitionStr, String in
       } else {
         return fetchHoodieFileGroup(partitionPath, fileId).map(fileGroup -> fileGroup.getAllBaseFiles()
             .filter(baseFile -> HoodieTimeline.compareTimestamps(baseFile.getCommitTime(), HoodieTimeline.EQUALS,
-                instantTime)).filter(df -> !isBaseFileDueToPendingCompaction(df)).findFirst().orElse(null))
+                instantTime)).filter(df -> !isBaseFileDueToPendingCompaction(df) && !isBaseFileDueToPendingClustering(df)).findFirst().orElse(null))
             .map(df -> addBootstrapBaseFileIfPresent(new HoodieFileGroupId(partitionPath, fileId), df));
       }
     } finally {
@@ -547,7 +560,7 @@ public final Stream<HoodieBaseFile> getLatestBaseFilesInRange(List<String> commi
           .filter(fileGroup -> !isFileGroupReplacedBeforeAny(fileGroup.getFileGroupId(), commitsToReturn))
           .map(fileGroup -> Pair.of(fileGroup.getFileGroupId(), Option.fromJavaOptional(
           fileGroup.getAllBaseFiles().filter(baseFile -> commitsToReturn.contains(baseFile.getCommitTime())
-              && !isBaseFileDueToPendingCompaction(baseFile)).findFirst()))).filter(p -> p.getValue().isPresent())
+              && !isBaseFileDueToPendingCompaction(baseFile) && !isBaseFileDueToPendingClustering(baseFile)).findFirst()))).filter(p -> p.getValue().isPresent())
           .map(p -> addBootstrapBaseFileIfPresent(p.getKey(), p.getValue().get()));
     } finally {
       readLock.unlock();
@@ -563,7 +576,7 @@ public final Stream<HoodieBaseFile> getAllBaseFiles(String partitionStr) {
       return fetchAllBaseFiles(partitionPath)
           .filter(df -> !isFileGroupReplaced(partitionPath, df.getFileId()))
           .filter(df -> visibleCommitsAndCompactionTimeline.containsOrBeforeTimelineStarts(df.getCommitTime()))
-          .filter(df -> !isBaseFileDueToPendingCompaction(df))
+          .filter(df -> !isBaseFileDueToPendingCompaction(df) && !isBaseFileDueToPendingClustering(df))
           .map(df -> addBootstrapBaseFileIfPresent(new HoodieFileGroupId(partitionPath, df.getFileId()), df));
     } finally {
       readLock.unlock();
@@ -953,7 +966,7 @@ public Stream<HoodieBaseFile> fetchLatestBaseFiles(final String partitionPath) {
 
   protected Option<HoodieBaseFile> getLatestBaseFile(HoodieFileGroup fileGroup) {
     return Option
-        .fromJavaOptional(fileGroup.getAllBaseFiles().filter(df -> !isBaseFileDueToPendingCompaction(df)).findFirst());
+        .fromJavaOptional(fileGroup.getAllBaseFiles().filter(df -> !isBaseFileDueToPendingCompaction(df) && !isBaseFileDueToPendingClustering(df)).findFirst());
   }
 
   /**
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/view/FileSystemViewStorageConfig.java b/hudi-common/src/main/java/org/apache/hudi/common/table/view/FileSystemViewStorageConfig.java
index b77b0d3a82521..e2342edc3a351 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/view/FileSystemViewStorageConfig.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/view/FileSystemViewStorageConfig.java
@@ -240,7 +240,7 @@ public Builder withMaxMemoryForView(Long maxMemoryForView) {
       return this;
     }
 
-    public Builder withRemoteTimelineClientTimeoutSecs(Long timelineClientTimeoutSecs) {
+    public Builder withRemoteTimelineClientTimeoutSecs(Integer timelineClientTimeoutSecs) {
       fileSystemViewStorageConfig.setValue(REMOTE_TIMEOUT_SECS, timelineClientTimeoutSecs.toString());
       return this;
     }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/view/HoodieTablePreCommitFileSystemView.java b/hudi-common/src/main/java/org/apache/hudi/common/table/view/HoodieTablePreCommitFileSystemView.java
index 76f7e3ca5e388..7401617a6abb6 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/table/view/HoodieTablePreCommitFileSystemView.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/table/view/HoodieTablePreCommitFileSystemView.java
@@ -22,7 +22,6 @@
 import org.apache.hudi.common.model.HoodieBaseFile;
 import org.apache.hudi.common.model.HoodieWriteStat;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
-import org.apache.hudi.common.util.Option;
 
 import java.util.Collections;
 import java.util.List;
@@ -75,16 +74,12 @@ public final Stream<HoodieBaseFile> getLatestBaseFiles(String partitionStr) {
             new HoodieBaseFile(new Path(tableMetaClient.getBasePath(), writeStat.getPath()).toString())));
 
     Stream<HoodieBaseFile> committedBaseFiles = this.completedCommitsFileSystemView.getLatestBaseFiles(partitionStr);
-    Stream<HoodieBaseFile> baseFilesForCommittedFileIds = committedBaseFiles
-        // Remove files replaced by current inflight commit
-        .filter(baseFile -> !replacedFileIdsForPartition.contains(baseFile.getFileId()))
-        // if there is new version of file created by inflight commit, use that file instead of committed version
-        .map(baseFile -> {
-          HoodieBaseFile fileIdNewVersionExists = newFilesWrittenForPartition.remove(baseFile.getFileId());
-          return Option.ofNullable(fileIdNewVersionExists).orElse(baseFile);
-        });
-    
-    Stream<HoodieBaseFile> baseFilesWithNewFileIds = newFilesWrittenForPartition.values().stream();
-    return Stream.concat(baseFilesForCommittedFileIds, baseFilesWithNewFileIds);
+    Map<String, HoodieBaseFile> allFileIds = committedBaseFiles
+            // Remove files replaced by current inflight commit
+            .filter(baseFile -> !replacedFileIdsForPartition.contains(baseFile.getFileId()))
+            .collect(Collectors.toMap(HoodieBaseFile::getFileId, baseFile -> baseFile));
+
+    allFileIds.putAll(newFilesWrittenForPartition);
+    return allFileIds.values().stream();
   }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/BaseFileUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/BaseFileUtils.java
index 25a2bec5baff2..7ec6110d723ab 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/BaseFileUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/BaseFileUtils.java
@@ -167,18 +167,35 @@ public abstract Map<String, String> readFooter(Configuration configuration, bool
    * Fetch {@link HoodieKey}s from the given data file.
    * @param configuration configuration to build fs object
    * @param filePath      The data file path
-   * @return {@link List} of {@link HoodieKey}s fetched from the parquet file
+   * @return {@link List} of {@link HoodieKey}s fetched from the data file
    */
-  public abstract List<HoodieKey> fetchRecordKeyPartitionPath(Configuration configuration, Path filePath);
+  public abstract List<HoodieKey> fetchHoodieKeys(Configuration configuration, Path filePath);
+
+  /**
+   * Provides a closable iterator for reading the given data file.
+   * @param configuration configuration to build fs object
+   * @param filePath      The data file path
+   * @param keyGeneratorOpt instance of KeyGenerator.
+   * @return {@link ClosableIterator} of {@link HoodieKey}s for reading the file
+   */
+  public abstract ClosableIterator<HoodieKey> getHoodieKeyIterator(Configuration configuration, Path filePath, Option<BaseKeyGenerator> keyGeneratorOpt);
+
+  /**
+   * Provides a closable iterator for reading the given data file.
+   * @param configuration configuration to build fs object
+   * @param filePath      The data file path
+   * @return {@link ClosableIterator} of {@link HoodieKey}s for reading the file
+   */
+  public abstract ClosableIterator<HoodieKey> getHoodieKeyIterator(Configuration configuration, Path filePath);
 
   /**
    * Fetch {@link HoodieKey}s from the given data file.
    * @param configuration configuration to build fs object
    * @param filePath      The data file path
    * @param keyGeneratorOpt instance of KeyGenerator.
-   * @return {@link List} of {@link HoodieKey}s fetched from the parquet file
+   * @return {@link List} of {@link HoodieKey}s fetched from the data file
    */
-  public abstract List<HoodieKey> fetchRecordKeyPartitionPath(Configuration configuration, Path filePath, Option<BaseKeyGenerator> keyGeneratorOpt);
+  public abstract List<HoodieKey> fetchHoodieKeys(Configuration configuration, Path filePath, Option<BaseKeyGenerator> keyGeneratorOpt);
 
   /**
    * Read the Avro schema of the data file.
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/BinaryUtil.java b/hudi-common/src/main/java/org/apache/hudi/common/util/BinaryUtil.java
index 0c7e898957670..9fec2c8cf5924 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/BinaryUtil.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/BinaryUtil.java
@@ -19,6 +19,7 @@
 package org.apache.hudi.common.util;
 
 import java.nio.charset.Charset;
+import java.util.zip.CRC32;
 
 public class BinaryUtil {
 
@@ -187,5 +188,14 @@ public static long convertBytesToLong(byte[] bytes) {
     }
     return temp;
   }
+
+  /**
+   * Generate a checksum for a given set of bytes.
+   */
+  public static long generateChecksum(byte[] data) {
+    CRC32 crc = new CRC32();
+    crc.update(data);
+    return crc.getValue();
+  }
 }
 
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/CleanerUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/CleanerUtils.java
index 9f63bfa3da4c4..a3a1305667f6a 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/CleanerUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/CleanerUtils.java
@@ -35,6 +35,9 @@
 import org.apache.hudi.common.table.timeline.versioning.clean.CleanMetadataV2MigrationHandler;
 import org.apache.hudi.common.table.timeline.versioning.clean.CleanPlanMigrator;
 
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.List;
@@ -43,6 +46,9 @@
 import static org.apache.hudi.common.table.timeline.HoodieTimeline.COMMIT_ACTION;
 
 public class CleanerUtils {
+
+  private static final Logger LOG = LogManager.getLogger(CleanerUtils.class);
+
   public static final Integer CLEAN_METADATA_VERSION_1 = CleanMetadataV1MigrationHandler.VERSION;
   public static final Integer CLEAN_METADATA_VERSION_2 = CleanMetadataV2MigrationHandler.VERSION;
   public static final Integer LATEST_CLEAN_METADATA_VERSION = CLEAN_METADATA_VERSION_2;
@@ -131,6 +137,7 @@ public static void rollbackFailedWrites(HoodieFailedWritesCleaningPolicy cleanin
           // No need to do any special cleanup for failed operations during clean
           return;
         } else if (cleaningPolicy.isLazy()) {
+          LOG.info("Cleaned failed attempts if any");
           // Perform rollback of failed operations for all types of actions during clean
           rollbackFailedWritesFunc.apply();
           return;
@@ -140,6 +147,7 @@ public static void rollbackFailedWrites(HoodieFailedWritesCleaningPolicy cleanin
       case COMMIT_ACTION:
         // For any other actions, perform rollback of failed writes
         if (cleaningPolicy.isEager()) {
+          LOG.info("Cleaned failed attempts if any");
           rollbackFailedWritesFunc.apply();
           return;
         }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/ClusteringUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/ClusteringUtils.java
index 15e53705b0d0c..9d741a03f82ec 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/ClusteringUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/ClusteringUtils.java
@@ -215,6 +215,12 @@ private static Map<String, Double> buildMetrics(List<FileSlice> fileSlices) {
   }
 
   public static List<HoodieInstant> getPendingClusteringInstantTimes(HoodieTableMetaClient metaClient) {
-    return metaClient.getActiveTimeline().filterPendingReplaceTimeline().getInstants().collect(Collectors.toList());
+    return metaClient.getActiveTimeline().filterPendingReplaceTimeline().getInstants()
+            .filter(instant -> isPendingClusteringInstant(metaClient, instant))
+            .collect(Collectors.toList());
+  }
+
+  public static boolean isPendingClusteringInstant(HoodieTableMetaClient metaClient, HoodieInstant instant) {
+    return getClusteringPlan(metaClient, instant).isPresent();
   }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/CollectionUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/CollectionUtils.java
index 6a4efca295efe..1a3d053e23acd 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/CollectionUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/CollectionUtils.java
@@ -62,7 +62,7 @@ public static <T> T[] append(T[] array, T elem) {
 
 
   /**
-   * Combines provided {@link List}s into one
+   * Combines provided {@link List}s into one, returning new instance of {@link ArrayList}
    */
   public static <E> List<E> combine(List<E> one, List<E> another) {
     ArrayList<E> combined = new ArrayList<>(one.size() + another.size());
@@ -71,6 +71,19 @@ public static <E> List<E> combine(List<E> one, List<E> another) {
     return combined;
   }
 
+  /**
+   * Combines provided {@link Map}s into one, returning new instance of {@link HashMap}.
+   *
+   * NOTE: That values associated with overlapping keys from the second map, will override
+   *       values from the first one
+   */
+  public static <K, V> Map<K, V> combine(Map<K, V> one, Map<K, V> another) {
+    Map<K, V> combined = new HashMap<>(one.size() + another.size());
+    combined.putAll(one);
+    combined.putAll(another);
+    return combined;
+  }
+
   /**
    * Returns difference b/w {@code one} {@link Set} of elements and {@code another}
    */
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/Functions.java b/hudi-common/src/main/java/org/apache/hudi/common/util/Functions.java
index 3ec96be207330..0b82f091402a0 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/Functions.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/Functions.java
@@ -25,6 +25,11 @@
  */
 public interface Functions {
 
+  static Runnable noop() {
+    return () -> {
+    };
+  }
+
   /**
    * A function which has not any parameter.
    */
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/Option.java b/hudi-common/src/main/java/org/apache/hudi/common/util/Option.java
index 42d6057968f97..193bf5315fd01 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/Option.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/Option.java
@@ -108,14 +108,31 @@ public <U> Option<U> map(Function<? super T, ? extends U> mapper) {
     }
   }
 
+  /**
+   * Returns this {@link Option} if not empty, otherwise evaluates the provided supplier
+   * and returns the alternative
+   */
+  public Option<T> or(Supplier<? extends Option<T>> other) {
+    return val != null ? this : other.get();
+  }
+
+  /**
+   * Identical to {@code Optional.orElse}
+   */
   public T orElse(T other) {
     return val != null ? val : other;
   }
 
+  /**
+   * Identical to {@code Optional.orElseGet}
+   */
   public T orElseGet(Supplier<? extends T> other) {
     return val != null ? val : other.get();
   }
 
+  /**
+   * Identical to {@code Optional.orElseThrow}
+   */
   public <X extends Throwable> T orElseThrow(Supplier<? extends X> exceptionSupplier) throws X {
     if (val != null) {
       return val;
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/OrcReaderIterator.java b/hudi-common/src/main/java/org/apache/hudi/common/util/OrcReaderIterator.java
index 4b3caa756a65f..d9ceeeee40f63 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/OrcReaderIterator.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/OrcReaderIterator.java
@@ -29,19 +29,18 @@
 import org.apache.orc.TypeDescription;
 
 import java.io.IOException;
-import java.util.Iterator;
 
 /**
  * This class wraps a ORC reader and provides an iterator based api to read from an ORC file.
  */
-public class OrcReaderIterator<T> implements Iterator<T> {
+public class OrcReaderIterator<T> implements ClosableIterator<T> {
 
   private final RecordReader recordReader;
   private final Schema avroSchema;
-  List<String> fieldNames;
-  List<TypeDescription> orcFieldTypes;
-  Schema[] avroFieldSchemas;
-  private VectorizedRowBatch batch;
+  private final List<String> fieldNames;
+  private final List<TypeDescription> orcFieldTypes;
+  private final Schema[] avroFieldSchemas;
+  private final VectorizedRowBatch batch;
   private int rowInBatch;
   private T next;
 
@@ -52,7 +51,7 @@ public OrcReaderIterator(RecordReader recordReader, Schema schema, TypeDescripti
     this.orcFieldTypes = orcSchema.getChildren();
     this.avroFieldSchemas = fieldNames.stream()
         .map(fieldName -> avroSchema.getField(fieldName).schema())
-        .toArray(size -> new Schema[size]);
+        .toArray(Schema[]::new);
     this.batch = orcSchema.createRowBatch();
     this.rowInBatch = 0;
   }
@@ -115,4 +114,9 @@ private GenericData.Record readRecordFromBatch() throws IOException {
     rowInBatch++;
     return record;
   }
+
+  @Override
+  public void close() {
+    FileIOUtils.closeQuietly(this.recordReader);
+  }
 }
\ No newline at end of file
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java
index e418043fe0ecd..88c28d75204a7 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/OrcUtils.java
@@ -19,7 +19,9 @@
 package org.apache.hudi.common.util;
 
 import java.io.IOException;
+import java.nio.ByteBuffer;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
@@ -54,29 +56,23 @@
 public class OrcUtils extends BaseFileUtils {
 
   /**
-   * Fetch {@link HoodieKey}s from the given ORC file.
+   * Provides a closable iterator for reading the given ORC file.
    *
-   * @param filePath      The ORC file path.
    * @param configuration configuration to build fs object
-   * @return {@link List} of {@link HoodieKey}s fetched from the ORC file
+   * @param filePath      The ORC file path
+   * @return {@link ClosableIterator} of {@link HoodieKey}s for reading the ORC file
    */
   @Override
-  public List<HoodieKey> fetchRecordKeyPartitionPath(Configuration configuration, Path filePath) {
-    List<HoodieKey> hoodieKeys = new ArrayList<>();
+  public ClosableIterator<HoodieKey> getHoodieKeyIterator(Configuration configuration, Path filePath) {
     try {
-      if (!filePath.getFileSystem(configuration).exists(filePath)) {
-        return new ArrayList<>();
-      }
-
       Configuration conf = new Configuration(configuration);
       conf.addResource(FSUtils.getFs(filePath.toString(), conf).getConf());
       Reader reader = OrcFile.createReader(filePath, OrcFile.readerOptions(conf));
 
       Schema readSchema = HoodieAvroUtils.getRecordKeyPartitionPathSchema();
       TypeDescription orcSchema = AvroOrcUtils.createOrcSchema(readSchema);
-      List<String> fieldNames = orcSchema.getFieldNames();
-      VectorizedRowBatch batch = orcSchema.createRowBatch();
       RecordReader recordReader = reader.rows(new Options(conf).schema(orcSchema));
+      List<String> fieldNames = orcSchema.getFieldNames();
 
       // column indices for the RECORD_KEY_METADATA_FIELD, PARTITION_PATH_METADATA_FIELD fields
       int keyCol = -1;
@@ -92,24 +88,43 @@ public List<HoodieKey> fetchRecordKeyPartitionPath(Configuration configuration,
       if (keyCol == -1 || partitionCol == -1) {
         throw new HoodieException(String.format("Couldn't find row keys or partition path in %s.", filePath));
       }
-      while (recordReader.nextBatch(batch)) {
-        BytesColumnVector rowKeys = (BytesColumnVector) batch.cols[keyCol];
-        BytesColumnVector partitionPaths = (BytesColumnVector) batch.cols[partitionCol];
-        for (int i = 0; i < batch.size; i++) {
-          String rowKey = rowKeys.toString(i);
-          String partitionPath = partitionPaths.toString(i);
-          hoodieKeys.add(new HoodieKey(rowKey, partitionPath));
-        }
+      return new OrcReaderIterator<>(recordReader, readSchema, orcSchema);
+    } catch (IOException e) {
+      throw new HoodieIOException("Failed to open reader from ORC file:" + filePath, e);
+    }
+  }
+
+  /**
+   * Fetch {@link HoodieKey}s from the given ORC file.
+   *
+   * @param filePath      The ORC file path.
+   * @param configuration configuration to build fs object
+   * @return {@link List} of {@link HoodieKey}s fetched from the ORC file
+   */
+  @Override
+  public List<HoodieKey> fetchHoodieKeys(Configuration configuration, Path filePath) {
+    try {
+      if (!filePath.getFileSystem(configuration).exists(filePath)) {
+        return Collections.emptyList();
       }
     } catch (IOException e) {
       throw new HoodieIOException("Failed to read from ORC file:" + filePath, e);
     }
+    List<HoodieKey> hoodieKeys = new ArrayList<>();
+    try (ClosableIterator<HoodieKey> iterator = getHoodieKeyIterator(configuration, filePath, Option.empty()))  {
+      iterator.forEachRemaining(hoodieKeys::add);
+    }
     return hoodieKeys;
   }
 
   @Override
-  public List<HoodieKey> fetchRecordKeyPartitionPath(Configuration configuration, Path filePath, Option<BaseKeyGenerator> keyGeneratorOpt) {
-    throw new HoodieIOException("UnsupportedOperation : Disabling meta fields not yet supported for Orc");
+  public List<HoodieKey> fetchHoodieKeys(Configuration configuration, Path filePath, Option<BaseKeyGenerator> keyGeneratorOpt) {
+    throw new UnsupportedOperationException("Custom key generator is not supported yet");
+  }
+
+  @Override
+  public ClosableIterator<HoodieKey> getHoodieKeyIterator(Configuration configuration, Path filePath, Option<BaseKeyGenerator> keyGeneratorOpt) {
+    throw new UnsupportedOperationException("Custom key generator is not supported yet");
   }
 
   /**
@@ -118,8 +133,7 @@ public List<HoodieKey> fetchRecordKeyPartitionPath(Configuration configuration,
   @Override
   public List<GenericRecord> readAvroRecords(Configuration configuration, Path filePath) {
     Schema avroSchema;
-    try {
-      Reader reader = OrcFile.createReader(filePath, OrcFile.readerOptions(configuration));
+    try (Reader reader = OrcFile.createReader(filePath, OrcFile.readerOptions(configuration))) {
       avroSchema = AvroOrcUtils.createAvroSchema(reader.getSchema());
     } catch (IOException io) {
       throw new HoodieIOException("Unable to read Avro records from an ORC file:" + filePath, io);
@@ -133,14 +147,14 @@ public List<GenericRecord> readAvroRecords(Configuration configuration, Path fil
   @Override
   public List<GenericRecord> readAvroRecords(Configuration configuration, Path filePath, Schema avroSchema) {
     List<GenericRecord> records = new ArrayList<>();
-    try {
-      Reader reader = OrcFile.createReader(filePath, OrcFile.readerOptions(configuration));
+    try (Reader reader = OrcFile.createReader(filePath, OrcFile.readerOptions(configuration))) {
       TypeDescription orcSchema = reader.getSchema();
-      RecordReader recordReader = reader.rows(new Options(configuration).schema(orcSchema));
-      OrcReaderIterator<GenericRecord> iterator = new OrcReaderIterator<>(recordReader, avroSchema, orcSchema);
-      while (iterator.hasNext()) {
-        GenericRecord record = iterator.next();
-        records.add(record);
+      try (RecordReader recordReader = reader.rows(new Options(configuration).schema(orcSchema))) {
+        OrcReaderIterator<GenericRecord> iterator = new OrcReaderIterator<>(recordReader, avroSchema, orcSchema);
+        while (iterator.hasNext()) {
+          GenericRecord record = iterator.next();
+          records.add(record);
+        }
       }
     } catch (IOException io) {
       throw new HoodieIOException("Unable to create an ORC reader for ORC file:" + filePath, io);
@@ -160,35 +174,35 @@ public List<GenericRecord> readAvroRecords(Configuration configuration, Path fil
   @Override
   public Set<String> filterRowKeys(Configuration conf, Path filePath, Set<String> filter)
       throws HoodieIOException {
-    try {
-      Reader reader = OrcFile.createReader(filePath, OrcFile.readerOptions(conf));
-      Set<String> filteredRowKeys = new HashSet<>();
+    try (Reader reader = OrcFile.createReader(filePath, OrcFile.readerOptions(conf));) {
       TypeDescription schema = reader.getSchema();
-      List<String> fieldNames = schema.getFieldNames();
-      VectorizedRowBatch batch = schema.createRowBatch();
-      RecordReader recordReader = reader.rows(new Options(conf).schema(schema));
+      try (RecordReader recordReader = reader.rows(new Options(conf).schema(schema))) {
+        Set<String> filteredRowKeys = new HashSet<>();
+        List<String> fieldNames = schema.getFieldNames();
+        VectorizedRowBatch batch = schema.createRowBatch();
 
-      // column index for the RECORD_KEY_METADATA_FIELD field
-      int colIndex = -1;
-      for (int i = 0; i < fieldNames.size(); i++) {
-        if (fieldNames.get(i).equals(HoodieRecord.RECORD_KEY_METADATA_FIELD)) {
-          colIndex = i;
-          break;
+        // column index for the RECORD_KEY_METADATA_FIELD field
+        int colIndex = -1;
+        for (int i = 0; i < fieldNames.size(); i++) {
+          if (fieldNames.get(i).equals(HoodieRecord.RECORD_KEY_METADATA_FIELD)) {
+            colIndex = i;
+            break;
+          }
         }
-      }
-      if (colIndex == -1) {
-        throw new HoodieException(String.format("Couldn't find row keys in %s.", filePath));
-      }
-      while (recordReader.nextBatch(batch)) {
-        BytesColumnVector rowKeys = (BytesColumnVector) batch.cols[colIndex];
-        for (int i = 0; i < batch.size; i++) {
-          String rowKey = rowKeys.toString(i);
-          if (filter.isEmpty() || filter.contains(rowKey)) {
-            filteredRowKeys.add(rowKey);
+        if (colIndex == -1) {
+          throw new HoodieException(String.format("Couldn't find row keys in %s.", filePath));
+        }
+        while (recordReader.nextBatch(batch)) {
+          BytesColumnVector rowKeys = (BytesColumnVector) batch.cols[colIndex];
+          for (int i = 0; i < batch.size; i++) {
+            String rowKey = rowKeys.toString(i);
+            if (filter.isEmpty() || filter.contains(rowKey)) {
+              filteredRowKeys.add(rowKey);
+            }
           }
         }
+        return filteredRowKeys;
       }
-      return filteredRowKeys;
     } catch (IOException io) {
       throw new HoodieIOException("Unable to read row keys for ORC file:" + filePath, io);
     }
@@ -197,8 +211,7 @@ public Set<String> filterRowKeys(Configuration conf, Path filePath, Set<String>
   @Override
   public Map<String, String> readFooter(Configuration conf, boolean required,
                                         Path orcFilePath, String... footerNames) {
-    try {
-      Reader reader = OrcFile.createReader(orcFilePath, OrcFile.readerOptions(conf));
+    try (Reader reader = OrcFile.createReader(orcFilePath, OrcFile.readerOptions(conf))) {
       Map<String, String> footerVals = new HashMap<>();
       List<UserMetadataItem> metadataItemList = reader.getFileTail().getFooter().getMetadataList();
       Map<String, String> metadata = metadataItemList.stream().collect(Collectors.toMap(
@@ -220,10 +233,16 @@ public Map<String, String> readFooter(Configuration conf, boolean required,
 
   @Override
   public Schema readAvroSchema(Configuration conf, Path orcFilePath) {
-    try {
-      Reader reader = OrcFile.createReader(orcFilePath, OrcFile.readerOptions(conf));
-      TypeDescription orcSchema = reader.getSchema();
-      return AvroOrcUtils.createAvroSchema(orcSchema);
+    try (Reader reader = OrcFile.createReader(orcFilePath, OrcFile.readerOptions(conf))) {
+      if (reader.hasMetadataValue("orc.avro.schema")) {
+        ByteBuffer metadataValue = reader.getMetadataValue("orc.avro.schema");
+        byte[] bytes = new byte[metadataValue.remaining()];
+        metadataValue.get(bytes);
+        return new Schema.Parser().parse(new String(bytes));
+      } else {
+        TypeDescription orcSchema = reader.getSchema();
+        return AvroOrcUtils.createAvroSchema(orcSchema);
+      }
     } catch (IOException io) {
       throw new HoodieIOException("Unable to get Avro schema for ORC file:" + orcFilePath, io);
     }
@@ -231,8 +250,7 @@ public Schema readAvroSchema(Configuration conf, Path orcFilePath) {
 
   @Override
   public long getRowCount(Configuration conf, Path orcFilePath) {
-    try {
-      Reader reader = OrcFile.createReader(orcFilePath, OrcFile.readerOptions(conf));
+    try (Reader reader = OrcFile.createReader(orcFilePath, OrcFile.readerOptions(conf))) {
       return reader.getNumberOfRows();
     } catch (IOException io) {
       throw new HoodieIOException("Unable to get row count for ORC file:" + orcFilePath, io);
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetReaderIterator.java b/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetReaderIterator.java
index 5970e02d6799a..03bd471b606f1 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetReaderIterator.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetReaderIterator.java
@@ -24,13 +24,12 @@
 import org.apache.parquet.hadoop.ParquetReader;
 
 import java.io.IOException;
-import java.util.Iterator;
 
 /**
  * This class wraps a parquet reader and provides an iterator based api to read from a parquet file. This is used in
  * {@link BoundedInMemoryQueue}
  */
-public class ParquetReaderIterator<T> implements Iterator<T> {
+public class ParquetReaderIterator<T> implements ClosableIterator<T> {
 
   // Parquet reader for an existing parquet file
   private final ParquetReader<T> parquetReader;
@@ -73,7 +72,11 @@ public T next() {
     }
   }
 
-  public void close() throws IOException {
-    parquetReader.close();
+  public void close() {
+    try {
+      parquetReader.close();
+    } catch (IOException e) {
+      throw new HoodieException("Exception while closing the parquet reader", e);
+    }
   }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java
index 136206150cbb7..e74f4f77703d0 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/ParquetUtils.java
@@ -18,10 +18,6 @@
 
 package org.apache.hudi.common.util;
 
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.HoodieColumnRangeMetadata;
@@ -30,6 +26,13 @@
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.exception.MetadataNotFoundException;
 import org.apache.hudi.keygen.BaseKeyGenerator;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
 import org.apache.parquet.avro.AvroParquetReader;
 import org.apache.parquet.avro.AvroReadSupport;
 import org.apache.parquet.avro.AvroSchemaConverter;
@@ -37,14 +40,17 @@
 import org.apache.parquet.hadoop.ParquetReader;
 import org.apache.parquet.hadoop.metadata.BlockMetaData;
 import org.apache.parquet.hadoop.metadata.ParquetMetadata;
+import org.apache.parquet.io.api.Binary;
 import org.apache.parquet.schema.DecimalMetadata;
 import org.apache.parquet.schema.MessageType;
 import org.apache.parquet.schema.OriginalType;
 import org.apache.parquet.schema.PrimitiveType;
 
 import javax.annotation.Nonnull;
+
 import java.io.IOException;
 import java.math.BigDecimal;
+import java.math.BigInteger;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -60,6 +66,8 @@
  */
 public class ParquetUtils extends BaseFileUtils {
 
+  private static final Logger LOG = LogManager.getLogger(ParquetUtils.class);
+
   /**
    * Read the rowKey list matching the given filter, from the given parquet file. If the filter is empty, then this will
    * return all the rowkeys.
@@ -74,6 +82,17 @@ public Set<String> filterRowKeys(Configuration configuration, Path filePath, Set
     return filterParquetRowKeys(configuration, filePath, filter, HoodieAvroUtils.getRecordKeySchema());
   }
 
+  public static ParquetMetadata readMetadata(Configuration conf, Path parquetFilePath) {
+    ParquetMetadata footer;
+    try {
+      // TODO(vc): Should we use the parallel reading version here?
+      footer = ParquetFileReader.readFooter(FSUtils.getFs(parquetFilePath.toString(), conf).getConf(), parquetFilePath);
+    } catch (IOException e) {
+      throw new HoodieIOException("Failed to read footer for parquet " + parquetFilePath, e);
+    }
+    return footer;
+  }
+
   /**
    * Read the rowKey list matching the given filter, from the given parquet file. If the filter is empty, then this will
    * return all the rowkeys.
@@ -122,12 +141,26 @@ private static Set<String> filterParquetRowKeys(Configuration configuration, Pat
    * @return {@link List} of {@link HoodieKey}s fetched from the parquet file
    */
   @Override
-  public List<HoodieKey> fetchRecordKeyPartitionPath(Configuration configuration, Path filePath) {
-    return fetchRecordKeyPartitionPathInternal(configuration, filePath, Option.empty());
+  public List<HoodieKey> fetchHoodieKeys(Configuration configuration, Path filePath) {
+    return fetchHoodieKeys(configuration, filePath, Option.empty());
   }
 
-  private List<HoodieKey> fetchRecordKeyPartitionPathInternal(Configuration configuration, Path filePath, Option<BaseKeyGenerator> keyGeneratorOpt) {
-    List<HoodieKey> hoodieKeys = new ArrayList<>();
+  @Override
+  public ClosableIterator<HoodieKey> getHoodieKeyIterator(Configuration configuration, Path filePath) {
+    return getHoodieKeyIterator(configuration, filePath, Option.empty());
+  }
+
+  /**
+   * Returns a closable iterator for reading the given parquet file.
+   *
+   * @param configuration configuration to build fs object
+   * @param filePath      The parquet file path
+   * @param keyGeneratorOpt instance of KeyGenerator
+   *
+   * @return {@link ClosableIterator} of {@link HoodieKey}s for reading the parquet file
+   */
+  @Override
+  public ClosableIterator<HoodieKey> getHoodieKeyIterator(Configuration configuration, Path filePath, Option<BaseKeyGenerator> keyGeneratorOpt) {
     try {
       Configuration conf = new Configuration(configuration);
       conf.addResource(FSUtils.getFs(filePath.toString(), conf).getConf());
@@ -140,27 +173,11 @@ private List<HoodieKey> fetchRecordKeyPartitionPathInternal(Configuration config
           .orElse(HoodieAvroUtils.getRecordKeyPartitionPathSchema());
       AvroReadSupport.setAvroReadSchema(conf, readSchema);
       AvroReadSupport.setRequestedProjection(conf, readSchema);
-      ParquetReader reader = AvroParquetReader.builder(filePath).withConf(conf).build();
-      Object obj = reader.read();
-      while (obj != null) {
-        if (obj instanceof GenericRecord) {
-          String recordKey = null;
-          String partitionPath = null;
-          if (keyGeneratorOpt.isPresent()) {
-            recordKey = keyGeneratorOpt.get().getRecordKey((GenericRecord) obj);
-            partitionPath = keyGeneratorOpt.get().getPartitionPath((GenericRecord) obj);
-          } else {
-            recordKey = ((GenericRecord) obj).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
-            partitionPath = ((GenericRecord) obj).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
-          }
-          hoodieKeys.add(new HoodieKey(recordKey, partitionPath));
-          obj = reader.read();
-        }
-      }
+      ParquetReader<GenericRecord> reader = AvroParquetReader.<GenericRecord>builder(filePath).withConf(conf).build();
+      return HoodieKeyIterator.getInstance(new ParquetReaderIterator<>(reader), keyGeneratorOpt);
     } catch (IOException e) {
       throw new HoodieIOException("Failed to read from Parquet file " + filePath, e);
     }
-    return hoodieKeys;
   }
 
   /**
@@ -168,23 +185,16 @@ private List<HoodieKey> fetchRecordKeyPartitionPathInternal(Configuration config
    *
    * @param configuration   configuration to build fs object
    * @param filePath        The parquet file path.
-   * @param keyGeneratorOpt
+   * @param keyGeneratorOpt instance of KeyGenerator.
    * @return {@link List} of {@link HoodieKey}s fetched from the parquet file
    */
   @Override
-  public List<HoodieKey> fetchRecordKeyPartitionPath(Configuration configuration, Path filePath, Option<BaseKeyGenerator> keyGeneratorOpt) {
-    return fetchRecordKeyPartitionPathInternal(configuration, filePath, keyGeneratorOpt);
-  }
-
-  public ParquetMetadata readMetadata(Configuration conf, Path parquetFilePath) {
-    ParquetMetadata footer;
-    try {
-      // TODO(vc): Should we use the parallel reading version here?
-      footer = ParquetFileReader.readFooter(FSUtils.getFs(parquetFilePath.toString(), conf).getConf(), parquetFilePath);
-    } catch (IOException e) {
-      throw new HoodieIOException("Failed to read footer for parquet " + parquetFilePath, e);
+  public List<HoodieKey> fetchHoodieKeys(Configuration configuration, Path filePath, Option<BaseKeyGenerator> keyGeneratorOpt) {
+    List<HoodieKey> hoodieKeys = new ArrayList<>();
+    try (ClosableIterator<HoodieKey> iterator = getHoodieKeyIterator(configuration, filePath, keyGeneratorOpt)) {
+      iterator.forEachRemaining(hoodieKeys::add);
+      return hoodieKeys;
     }
-    return footer;
   }
 
   /**
@@ -222,10 +232,8 @@ public Schema readAvroSchema(Configuration configuration, Path parquetFilePath)
    */
   @Override
   public List<GenericRecord> readAvroRecords(Configuration configuration, Path filePath) {
-    ParquetReader reader = null;
     List<GenericRecord> records = new ArrayList<>();
-    try {
-      reader = AvroParquetReader.builder(filePath).withConf(configuration).build();
+    try (ParquetReader reader = AvroParquetReader.builder(filePath).withConf(configuration).build()) {
       Object obj = reader.read();
       while (obj != null) {
         if (obj instanceof GenericRecord) {
@@ -236,14 +244,6 @@ public List<GenericRecord> readAvroRecords(Configuration configuration, Path fil
     } catch (IOException e) {
       throw new HoodieIOException("Failed to read avro records from Parquet " + filePath, e);
 
-    } finally {
-      if (reader != null) {
-        try {
-          reader.close();
-        } catch (IOException e) {
-          // ignore
-        }
-      }
     }
     return records;
   }
@@ -298,18 +298,20 @@ public List<HoodieColumnRangeMetadata<Comparable>> readRangeFromParquetMetadata(
     Map<String, List<HoodieColumnRangeMetadata<Comparable>>> columnToStatsListMap = metadata.getBlocks().stream().sequential()
             .flatMap(blockMetaData -> blockMetaData.getColumns().stream()
                     .filter(f -> cols.contains(f.getPath().toDotString()))
-                    .map(columnChunkMetaData ->
-                        new HoodieColumnRangeMetadata<Comparable>(
-                            parquetFilePath.getName(),
-                            columnChunkMetaData.getPath().toDotString(),
-                            convertToNativeJavaType(
-                                columnChunkMetaData.getPrimitiveType(),
-                                columnChunkMetaData.getStatistics().genericGetMin()),
-                            convertToNativeJavaType(
-                                columnChunkMetaData.getPrimitiveType(),
-                                columnChunkMetaData.getStatistics().genericGetMax()),
-                            columnChunkMetaData.getStatistics().getNumNulls(),
-                            columnChunkMetaData.getPrimitiveType().stringifier()))
+                .map(columnChunkMetaData ->
+                    new HoodieColumnRangeMetadata<Comparable>(
+                        parquetFilePath.getName(),
+                        columnChunkMetaData.getPath().toDotString(),
+                        convertToNativeJavaType(
+                            columnChunkMetaData.getPrimitiveType(),
+                            columnChunkMetaData.getStatistics().genericGetMin()),
+                        convertToNativeJavaType(
+                            columnChunkMetaData.getPrimitiveType(),
+                            columnChunkMetaData.getStatistics().genericGetMax()),
+                        columnChunkMetaData.getStatistics().getNumNulls(),
+                        columnChunkMetaData.getValueCount(),
+                        columnChunkMetaData.getTotalSize(),
+                        columnChunkMetaData.getTotalUncompressedSize()))
             ).collect(Collectors.groupingBy(HoodieColumnRangeMetadata::getColumnName));
 
     // Combine those into file-level statistics
@@ -354,30 +356,117 @@ private <T extends Comparable<T>> HoodieColumnRangeMetadata<T> combineRanges(
       maxValue = one.getMaxValue().compareTo(another.getMaxValue()) < 0 ? another.getMaxValue() : one.getMaxValue();
     } else if (one.getMaxValue() == null) {
       maxValue = another.getMaxValue();
-    } else  {
+    } else {
       maxValue = one.getMaxValue();
     }
 
     return new HoodieColumnRangeMetadata<T>(
         one.getFilePath(),
-        one.getColumnName(), minValue, maxValue, one.getNumNulls() + another.getNumNulls(), one.getStringifier());
+        one.getColumnName(), minValue, maxValue,
+        one.getNullCount() + another.getNullCount(),
+        one.getValueCount() + another.getValueCount(),
+        one.getTotalSize() + another.getTotalSize(),
+        one.getTotalUncompressedSize() + another.getTotalUncompressedSize());
   }
 
   private static Comparable<?> convertToNativeJavaType(PrimitiveType primitiveType, Comparable val) {
     if (primitiveType.getOriginalType() == OriginalType.DECIMAL) {
-      DecimalMetadata decimalMetadata = primitiveType.getDecimalMetadata();
-      return BigDecimal.valueOf((Integer) val, decimalMetadata.getScale());
+      return extractDecimal(val, primitiveType.getDecimalMetadata());
     } else if (primitiveType.getOriginalType() == OriginalType.DATE) {
       // NOTE: This is a workaround to address race-condition in using
       //       {@code SimpleDataFormat} concurrently (w/in {@code DateStringifier})
       // TODO cleanup after Parquet upgrade to 1.12
       synchronized (primitiveType.stringifier()) {
+        // Date logical type is implemented as a signed INT32
+        // REF: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md
         return java.sql.Date.valueOf(
             primitiveType.stringifier().stringify((Integer) val)
         );
       }
+    } else if (primitiveType.getOriginalType() == OriginalType.UTF8) {
+      // NOTE: UTF8 type designates a byte array that should be interpreted as a
+      // UTF-8 encoded character string
+      // REF: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md
+      return ((Binary) val).toStringUsingUTF8();
+    } else if (primitiveType.getPrimitiveTypeName() == PrimitiveType.PrimitiveTypeName.BINARY) {
+      // NOTE: `getBytes` access makes a copy of the underlying byte buffer
+      return ((Binary) val).toByteBuffer();
     }
 
     return val;
   }
+
+  @Nonnull
+  private static BigDecimal extractDecimal(Object val, DecimalMetadata decimalMetadata) {
+    // In Parquet, Decimal could be represented as either of
+    //    1. INT32 (for 1 <= precision <= 9)
+    //    2. INT64 (for 1 <= precision <= 18)
+    //    3. FIXED_LEN_BYTE_ARRAY (precision is limited by the array size. Length n can store <= floor(log_10(2^(8*n - 1) - 1)) base-10 digits)
+    //    4. BINARY (precision is not limited)
+    // REF: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#DECIMAL
+    int scale = decimalMetadata.getScale();
+    if (val == null) {
+      return null;
+    } else if (val instanceof Integer) {
+      return BigDecimal.valueOf((Integer) val, scale);
+    } else if (val instanceof Long) {
+      return BigDecimal.valueOf((Long) val, scale);
+    } else if (val instanceof Binary) {
+      // NOTE: Unscaled number is stored in BE format (most significant byte is 0th)
+      return new BigDecimal(new BigInteger(((Binary) val).getBytesUnsafe()), scale);
+    } else {
+      throw new UnsupportedOperationException(String.format("Unsupported value type (%s)", val.getClass().getName()));
+    }
+  }
+
+  // -------------------------------------------------------------------------
+  //  Inner Class
+  // -------------------------------------------------------------------------
+
+  /**
+   * An iterator that can apply the given function {@code func} to transform records
+   * from the underneath record iterator to hoodie keys.
+   */
+  private static class HoodieKeyIterator implements ClosableIterator<HoodieKey> {
+    private final ClosableIterator<GenericRecord> nestedItr;
+    private final Function<GenericRecord, HoodieKey> func;
+
+    public static HoodieKeyIterator getInstance(ClosableIterator<GenericRecord> nestedItr, Option<BaseKeyGenerator> keyGenerator) {
+      return new HoodieKeyIterator(nestedItr, keyGenerator);
+    }
+
+    private HoodieKeyIterator(ClosableIterator<GenericRecord> nestedItr, Option<BaseKeyGenerator> keyGenerator) {
+      this.nestedItr = nestedItr;
+      if (keyGenerator.isPresent()) {
+        this.func = retVal -> {
+          String recordKey = keyGenerator.get().getRecordKey(retVal);
+          String partitionPath = keyGenerator.get().getPartitionPath(retVal);
+          return new HoodieKey(recordKey, partitionPath);
+        };
+      } else {
+        this.func = retVal -> {
+          String recordKey = retVal.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
+          String partitionPath = retVal.get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
+          return new HoodieKey(recordKey, partitionPath);
+        };
+      }
+    }
+
+    @Override
+    public void close() {
+      if (this.nestedItr != null) {
+        this.nestedItr.close();
+      }
+    }
+
+    @Override
+    public boolean hasNext() {
+      return this.nestedItr.hasNext();
+    }
+
+    @Override
+    public HoodieKey next() {
+      return this.func.apply(this.nestedItr.next());
+    }
+  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/RetryHelper.java b/hudi-common/src/main/java/org/apache/hudi/common/util/RetryHelper.java
new file mode 100644
index 0000000000000..067c5ee40dad7
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/RetryHelper.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.util;
+
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Random;
+import java.util.stream.Collectors;
+
+public class RetryHelper<T> {
+  private static final Logger LOG = LogManager.getLogger(RetryHelper.class);
+  private CheckedFunction<T> func;
+  private int num;
+  private long maxIntervalTime;
+  private long initialIntervalTime = 100L;
+  private String taskInfo = "N/A";
+  private List<? extends Class<? extends Exception>> retryExceptionsClasses;
+
+  public RetryHelper() {
+  }
+
+  public RetryHelper(long maxRetryIntervalMs, int maxRetryNumbers, long initialRetryIntervalMs, String retryExceptions) {
+    this.num = maxRetryNumbers;
+    this.initialIntervalTime = initialRetryIntervalMs;
+    this.maxIntervalTime = maxRetryIntervalMs;
+    if (StringUtils.isNullOrEmpty(retryExceptions)) {
+      this.retryExceptionsClasses = new ArrayList<>();
+    } else {
+      this.retryExceptionsClasses = Arrays.stream(retryExceptions.split(","))
+          .map(exception -> (Exception) ReflectionUtils.loadClass(exception, ""))
+          .map(Exception::getClass)
+          .collect(Collectors.toList());
+    }
+  }
+
+  public RetryHelper(String taskInfo) {
+    this.taskInfo = taskInfo;
+  }
+
+  public RetryHelper tryWith(CheckedFunction<T> func) {
+    this.func = func;
+    return this;
+  }
+
+  public T start() throws IOException {
+    int retries = 0;
+    T functionResult = null;
+
+    while (true) {
+      long waitTime = Math.min(getWaitTimeExp(retries), maxIntervalTime);
+      try {
+        functionResult = func.get();
+        break;
+      } catch (IOException | RuntimeException e) {
+        if (!checkIfExceptionInRetryList(e)) {
+          throw e;
+        }
+        if (retries++ >= num) {
+          LOG.error("Still failed to " + taskInfo + " after retried " + num + " times.", e);
+          throw e;
+        }
+        LOG.warn("Catch Exception " + taskInfo + ", will retry after " + waitTime + " ms.", e);
+        try {
+          Thread.sleep(waitTime);
+        } catch (InterruptedException ex) {
+            // ignore InterruptedException here
+        }
+      }
+    }
+
+    if (retries > 0) {
+      LOG.info("Success to " + taskInfo + " after retried " + retries + " times.");
+    }
+    return functionResult;
+  }
+
+  private boolean checkIfExceptionInRetryList(Exception e) {
+    boolean inRetryList = false;
+
+    // if users didn't set hoodie.filesystem.operation.retry.exceptions
+    // we will retry all the IOException and RuntimeException
+    if (retryExceptionsClasses.isEmpty()) {
+      return true;
+    }
+
+    for (Class<? extends Exception> clazz : retryExceptionsClasses) {
+      if (clazz.isInstance(e)) {
+        inRetryList = true;
+        break;
+      }
+    }
+    return inRetryList;
+  }
+
+  private long getWaitTimeExp(int retryCount) {
+    Random random = new Random();
+    if (0 == retryCount) {
+      return initialIntervalTime;
+    }
+
+    return (long) Math.pow(2, retryCount) * initialIntervalTime + random.nextInt(100);
+  }
+
+  @FunctionalInterface
+  public interface CheckedFunction<T> {
+    T get() throws IOException;
+  }
+}
\ No newline at end of file
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/SpillableMapUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/SpillableMapUtils.java
index 934b5b5f616c6..9ded415438a86 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/SpillableMapUtils.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/SpillableMapUtils.java
@@ -19,6 +19,7 @@
 package org.apache.hudi.common.util;
 
 import org.apache.hudi.common.fs.SizeAwareDataOutputStream;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieOperation;
 import org.apache.hudi.common.model.HoodieRecord;
@@ -32,9 +33,9 @@
 
 import java.io.IOException;
 import java.io.RandomAccessFile;
-import java.util.zip.CRC32;
 
 import static org.apache.hudi.avro.HoodieAvroUtils.getNullableValAsString;
+import static org.apache.hudi.common.util.BinaryUtil.generateChecksum;
 
 /**
  * A utility class supports spillable map.
@@ -94,15 +95,6 @@ private static long spill(SizeAwareDataOutputStream outputStream, FileEntry file
     return outputStream.getSize();
   }
 
-  /**
-   * Generate a checksum for a given set of bytes.
-   */
-  public static long generateChecksum(byte[] data) {
-    CRC32 crc = new CRC32();
-    crc.update(data);
-    return crc.getValue();
-  }
-
   /**
    * Compute a bytes representation of the payload by serializing the contents This is used to estimate the size of the
    * payload (either in memory or when written to disk).
@@ -144,7 +136,7 @@ public static <R> R convertToHoodieRecordPayload(GenericRecord record, String pa
     Object preCombineVal = getPreCombineVal(record, preCombineField);
     HoodieOperation operation = withOperationField
         ? HoodieOperation.fromName(getNullableValAsString(record, HoodieRecord.OPERATION_METADATA_FIELD)) : null;
-    HoodieRecord<? extends HoodieRecordPayload> hoodieRecord = new HoodieRecord<>(new HoodieKey(recKey, partitionPath),
+    HoodieRecord<? extends HoodieRecordPayload> hoodieRecord = new HoodieAvroRecord<>(new HoodieKey(recKey, partitionPath),
         ReflectionUtils.loadPayload(payloadClazz, new Object[]{record, preCombineVal}, GenericRecord.class,
             Comparable.class), operation);
 
@@ -170,7 +162,7 @@ private static Object getPreCombineVal(GenericRecord rec, String preCombineField
    * Utility method to convert bytes to HoodieRecord using schema and payload class.
    */
   public static <R> R generateEmptyPayload(String recKey, String partitionPath, String payloadClazz) {
-    HoodieRecord<? extends HoodieRecordPayload> hoodieRecord = new HoodieRecord<>(new HoodieKey(recKey, partitionPath),
+    HoodieRecord<? extends HoodieRecordPayload> hoodieRecord = new HoodieAvroRecord<>(new HoodieKey(recKey, partitionPath),
         ReflectionUtils.loadPayload(payloadClazz, new Object[] {Option.empty()}, Option.class));
     return (R) hoodieRecord;
   }
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/collection/BitCaskDiskMap.java b/hudi-common/src/main/java/org/apache/hudi/common/util/collection/BitCaskDiskMap.java
index 289901df81861..9fb0b20e74f2c 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/collection/BitCaskDiskMap.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/collection/BitCaskDiskMap.java
@@ -56,6 +56,8 @@
 import java.util.zip.DeflaterOutputStream;
 import java.util.zip.InflaterInputStream;
 
+import static org.apache.hudi.common.util.BinaryUtil.generateChecksum;
+
 /**
  * This class provides a disk spillable only map implementation. All of the data is currenly written to one file,
  * without any rollover support. It uses the following : 1) An in-memory map that tracks the key-> latest ValueMetadata.
@@ -223,7 +225,7 @@ private synchronized R put(T key, R value, boolean flush) {
           new BitCaskDiskMap.ValueMetadata(this.filePath, valueSize, filePosition.get(), timestamp));
       byte[] serializedKey = SerializationUtils.serialize(key);
       filePosition
-          .set(SpillableMapUtils.spillToDisk(writeOnlyFileHandle, new FileEntry(SpillableMapUtils.generateChecksum(val),
+          .set(SpillableMapUtils.spillToDisk(writeOnlyFileHandle, new FileEntry(generateChecksum(val),
               serializedKey.length, valueSize, serializedKey, val, timestamp)));
       if (flush) {
         flushToDisk();
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/hash/ColumnID.java b/hudi-common/src/main/java/org/apache/hudi/common/util/hash/ColumnIndexID.java
similarity index 80%
rename from hudi-common/src/main/java/org/apache/hudi/common/util/hash/ColumnID.java
rename to hudi-common/src/main/java/org/apache/hudi/common/util/hash/ColumnIndexID.java
index be4db44ecd961..92e60b30a311f 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/hash/ColumnID.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/hash/ColumnIndexID.java
@@ -24,14 +24,21 @@
 /**
  * A stateful Hoodie object ID representing any table column.
  */
-public class ColumnID extends HoodieID {
+public class ColumnIndexID extends HoodieIndexID {
 
   private static final Type TYPE = Type.COLUMN;
-  private static final HashID.Size ID_COLUMN_HASH_SIZE = HashID.Size.BITS_64;
+  public static final HashID.Size ID_COLUMN_HASH_SIZE = HashID.Size.BITS_64;
+  private final String column;
   private final byte[] hash;
 
-  public ColumnID(final String message) {
-    this.hash = HashID.hash(message, ID_COLUMN_HASH_SIZE);
+  public ColumnIndexID(final String column) {
+    this.column = column;
+    this.hash = HashID.hash(column, ID_COLUMN_HASH_SIZE);
+  }
+
+  @Override
+  public String getName() {
+    return column;
   }
 
   @Override
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/hash/FileID.java b/hudi-common/src/main/java/org/apache/hudi/common/util/hash/FileIndexID.java
similarity index 84%
rename from hudi-common/src/main/java/org/apache/hudi/common/util/hash/FileID.java
rename to hudi-common/src/main/java/org/apache/hudi/common/util/hash/FileIndexID.java
index 0cb73c5abf9a8..3f9616908bb39 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/hash/FileID.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/hash/FileIndexID.java
@@ -24,14 +24,21 @@
 /**
  * Hoodie object ID representing any file.
  */
-public class FileID extends HoodieID {
+public class FileIndexID extends HoodieIndexID {
 
   private static final Type TYPE = Type.FILE;
   private static final HashID.Size ID_FILE_HASH_SIZE = HashID.Size.BITS_128;
+  private final String fileName;
   private final byte[] hash;
 
-  public FileID(final String message) {
-    this.hash = HashID.hash(message, ID_FILE_HASH_SIZE);
+  public FileIndexID(final String fileName) {
+    this.fileName = fileName;
+    this.hash = HashID.hash(fileName, ID_FILE_HASH_SIZE);
+  }
+
+  @Override
+  public String getName() {
+    return fileName;
   }
 
   @Override
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/hash/HoodieID.java b/hudi-common/src/main/java/org/apache/hudi/common/util/hash/HoodieIndexID.java
similarity index 89%
rename from hudi-common/src/main/java/org/apache/hudi/common/util/hash/HoodieID.java
rename to hudi-common/src/main/java/org/apache/hudi/common/util/hash/HoodieIndexID.java
index e08e254b0a215..139efd17ed0ae 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/hash/HoodieID.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/hash/HoodieIndexID.java
@@ -24,9 +24,10 @@
 import java.io.Serializable;
 
 /**
- * A serializable ID that can be used to identify any Hoodie table fields and resources.
+ * A serializable ID that can be used to identify any Hoodie table fields and
+ * resources in the on-disk index.
  */
-public abstract class HoodieID implements Serializable {
+public abstract class HoodieIndexID implements Serializable {
 
   private static final long serialVersionUID = 1L;
 
@@ -50,6 +51,13 @@ public String toString() {
     }
   }
 
+  /**
+   * Get the resource name for which this index id is generated.
+   *
+   * @return The resource name
+   */
+  public abstract String getName();
+
   /**
    * Get the number of bits representing this ID in memory.
    * <p>
@@ -74,7 +82,7 @@ public String toString() {
   public abstract String toString();
 
   /**
-   *
+   * Get the Base64 encoded version of the ID.
    */
   public String asBase64EncodedString() {
     throw new HoodieNotSupportedException("Unsupported hash for " + getType());
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/hash/PartitionID.java b/hudi-common/src/main/java/org/apache/hudi/common/util/hash/PartitionIndexID.java
similarity index 83%
rename from hudi-common/src/main/java/org/apache/hudi/common/util/hash/PartitionID.java
rename to hudi-common/src/main/java/org/apache/hudi/common/util/hash/PartitionIndexID.java
index f31159faa2a2f..0fbae27b80de8 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/hash/PartitionID.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/hash/PartitionIndexID.java
@@ -24,14 +24,21 @@
 /**
  * Hoodie object ID representing any partition.
  */
-public class PartitionID extends HoodieID {
+public class PartitionIndexID extends HoodieIndexID {
 
   private static final Type TYPE = Type.PARTITION;
   private static final HashID.Size ID_PARTITION_HASH_SIZE = HashID.Size.BITS_64;
+  private final String partition;
   private final byte[] hash;
 
-  public PartitionID(final String message) {
-    this.hash = HashID.hash(message, ID_PARTITION_HASH_SIZE);
+  public PartitionIndexID(final String partition) {
+    this.partition = partition;
+    this.hash = HashID.hash(partition, ID_PARTITION_HASH_SIZE);
+  }
+
+  @Override
+  public String getName() {
+    return partition;
   }
 
   @Override
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/io/ByteBufferBackedInputStream.java b/hudi-common/src/main/java/org/apache/hudi/common/util/io/ByteBufferBackedInputStream.java
new file mode 100644
index 0000000000000..0f96d1011a3f0
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/io/ByteBufferBackedInputStream.java
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.util.io;
+
+import javax.annotation.Nonnull;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+
+/**
+ * Instance of {@link InputStream} backed by {@link ByteBuffer}, implementing following
+ * functionality (on top of what's required by {@link InputStream})
+ *
+ * <ol>
+ *   <li>Seeking: enables random access by allowing to seek to an arbitrary position w/in the stream</li>
+ *   <li>(Thread-safe) Copying: enables to copy from the underlying buffer not modifying the state of the stream</li>
+ * </ol>
+ *
+ * NOTE: Generally methods of this class are NOT thread-safe, unless specified otherwise
+ */
+public class ByteBufferBackedInputStream extends InputStream {
+
+  private final ByteBuffer buffer;
+  private final int bufferOffset;
+
+  public ByteBufferBackedInputStream(ByteBuffer buf) {
+    this.buffer = buf.duplicate();
+    // We're marking current buffer position, so that we will be able
+    // to reset it later on appropriately (to support seek operations)
+    this.buffer.mark();
+    this.bufferOffset = buffer.position();
+  }
+
+  public ByteBufferBackedInputStream(byte[] array) {
+    this(array, 0, array.length);
+  }
+
+  public ByteBufferBackedInputStream(byte[] array, int offset, int length) {
+    this(ByteBuffer.wrap(array, offset, length));
+  }
+
+  @Override
+  public int read() {
+    if (!buffer.hasRemaining()) {
+      throw new IllegalArgumentException("Reading past backed buffer boundary");
+    }
+    return buffer.get() & 0xFF;
+  }
+
+  @Override
+  public int read(@Nonnull byte[] bytes, int offset, int length) {
+    if (!buffer.hasRemaining()) {
+      throw new IllegalArgumentException("Reading past backed buffer boundary");
+    }
+    // Determine total number of bytes available to read
+    int available = Math.min(length, buffer.remaining());
+    // Copy bytes into the target buffer
+    buffer.get(bytes, offset, available);
+    return available;
+  }
+
+  /**
+   * Returns current position of the stream
+   */
+  public int getPosition() {
+    return buffer.position() - bufferOffset;
+  }
+
+  /**
+   * Seeks to a position w/in the stream
+   *
+   * NOTE: Position is relative to the start of the stream (ie its absolute w/in this stream),
+   * with following invariant being assumed:
+   * <p>0 <= pos <= length (of the stream)</p>
+   *
+   * This method is NOT thread-safe
+   *
+   * @param pos target position to seek to w/in the holding buffer
+   */
+  public void seek(long pos) {
+    buffer.reset(); // to mark
+    int offset = buffer.position();
+    // NOTE: That the new pos is still relative to buffer's offset
+    int newPos = offset + (int) pos;
+    if (newPos > buffer.limit() || newPos < offset) {
+      throw new IllegalArgumentException(
+          String.format("Can't seek past the backing buffer (limit %d, offset %d, new %d)", buffer.limit(), offset, newPos)
+      );
+    }
+
+    buffer.position(newPos);
+  }
+
+  /**
+   * Copies at most {@code length} bytes starting from position {@code pos} into the target
+   * buffer with provided {@code offset}. Returns number of bytes copied from the backing buffer
+   *
+   * NOTE: This does not change the current position of the stream and is thread-safe
+   *
+   * @param pos absolute position w/in stream to read from
+   * @param targetBuffer target buffer to copy into
+   * @param offset target buffer offset to copy at
+   * @param length length of the sequence to copy
+   * @return number of bytes copied
+   */
+  public int copyFrom(long pos, byte[] targetBuffer, int offset, int length) {
+    int bufferPos = bufferOffset + (int) pos;
+    if (bufferPos > buffer.limit()) {
+      throw new IllegalArgumentException(
+          String.format("Can't read past the backing buffer boundary (offset %d, length %d)", pos, buffer.limit() - bufferOffset)
+      );
+    } else if (length > targetBuffer.length) {
+      throw new IllegalArgumentException(
+          String.format("Target buffer is too small (length %d, buffer size %d)", length, targetBuffer.length)
+      );
+    }
+    // Determine total number of bytes available to read
+    int available = Math.min(length, buffer.limit() - bufferPos);
+    // Get current buffer position in the backing array
+    System.arraycopy(buffer.array(), bufferPos, targetBuffer, offset, available);
+    return available;
+  }
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/queue/BoundedInMemoryExecutor.java b/hudi-common/src/main/java/org/apache/hudi/common/util/queue/BoundedInMemoryExecutor.java
index 872837913b054..68b840a4794d6 100644
--- a/hudi-common/src/main/java/org/apache/hudi/common/util/queue/BoundedInMemoryExecutor.java
+++ b/hudi-common/src/main/java/org/apache/hudi/common/util/queue/BoundedInMemoryExecutor.java
@@ -19,6 +19,7 @@
 package org.apache.hudi.common.util.queue;
 
 import org.apache.hudi.common.util.DefaultSizeEstimator;
+import org.apache.hudi.common.util.Functions;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.SizeEstimator;
 import org.apache.hudi.exception.HoodieException;
@@ -26,7 +27,8 @@
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
-import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
 import java.util.List;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.CountDownLatch;
@@ -54,29 +56,35 @@ public class BoundedInMemoryExecutor<I, O, E> {
   private final List<BoundedInMemoryQueueProducer<I>> producers;
   // Consumer
   private final Option<BoundedInMemoryQueueConsumer<O, E>> consumer;
+  // pre-execute function to implement environment specific behavior before executors (producers/consumer) run
+  private final Runnable preExecuteRunnable;
+
+  public BoundedInMemoryExecutor(final long bufferLimitInBytes, final Iterator<I> inputItr,
+      BoundedInMemoryQueueConsumer<O, E> consumer, Function<I, O> transformFunction, Runnable preExecuteRunnable) {
+    this(bufferLimitInBytes, new IteratorBasedQueueProducer<>(inputItr), Option.of(consumer), transformFunction, preExecuteRunnable);
+  }
 
   public BoundedInMemoryExecutor(final long bufferLimitInBytes, BoundedInMemoryQueueProducer<I> producer,
       Option<BoundedInMemoryQueueConsumer<O, E>> consumer, final Function<I, O> transformFunction) {
-    this(bufferLimitInBytes, Arrays.asList(producer), consumer, transformFunction, new DefaultSizeEstimator<>());
+    this(bufferLimitInBytes, producer, consumer, transformFunction, Functions.noop());
+  }
+
+  public BoundedInMemoryExecutor(final long bufferLimitInBytes, BoundedInMemoryQueueProducer<I> producer,
+      Option<BoundedInMemoryQueueConsumer<O, E>> consumer, final Function<I, O> transformFunction, Runnable preExecuteRunnable) {
+    this(bufferLimitInBytes, Collections.singletonList(producer), consumer, transformFunction, new DefaultSizeEstimator<>(), preExecuteRunnable);
   }
 
   public BoundedInMemoryExecutor(final long bufferLimitInBytes, List<BoundedInMemoryQueueProducer<I>> producers,
       Option<BoundedInMemoryQueueConsumer<O, E>> consumer, final Function<I, O> transformFunction,
-      final SizeEstimator<O> sizeEstimator) {
+      final SizeEstimator<O> sizeEstimator, Runnable preExecuteRunnable) {
     this.producers = producers;
     this.consumer = consumer;
+    this.preExecuteRunnable = preExecuteRunnable;
     // Ensure single thread for each producer thread and one for consumer
     this.executorService = Executors.newFixedThreadPool(producers.size() + 1);
     this.queue = new BoundedInMemoryQueue<>(bufferLimitInBytes, transformFunction, sizeEstimator);
   }
 
-  /**
-   * Callback to implement environment specific behavior before executors (producers/consumer) run.
-   */
-  public void preExecute() {
-    // Do Nothing in general context
-  }
-
   /**
    * Start all Producers.
    */
@@ -88,7 +96,7 @@ public ExecutorCompletionService<Boolean> startProducers() {
     producers.stream().map(producer -> {
       return completionService.submit(() -> {
         try {
-          preExecute();
+          preExecuteRunnable.run();
           producer.produce(queue);
         } catch (Throwable e) {
           LOG.error("error producing records", e);
@@ -116,7 +124,7 @@ private Future<E> startConsumer() {
     return consumer.map(consumer -> {
       return executorService.submit(() -> {
         LOG.info("starting consumer thread");
-        preExecute();
+        preExecuteRunnable.run();
         try {
           E result = consumer.consume(queue);
           LOG.info("Queue Consumption is done; notifying producer threads");
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroParquetConfig.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroParquetConfig.java
similarity index 100%
rename from hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroParquetConfig.java
rename to hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieAvroParquetConfig.java
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieBaseParquetConfig.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieBaseParquetConfig.java
similarity index 100%
rename from hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/storage/HoodieBaseParquetConfig.java
rename to hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieBaseParquetConfig.java
diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileReader.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileReader.java
index fefe7eb7e5cc6..cb330b81432bf 100644
--- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileReader.java
+++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieFileReader.java
@@ -20,6 +20,8 @@
 
 import java.io.IOException;
 import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
 import java.util.Set;
 
 import org.apache.avro.Schema;
@@ -27,7 +29,7 @@
 import org.apache.hudi.common.bloom.BloomFilter;
 import org.apache.hudi.common.util.Option;
 
-public interface HoodieFileReader<R extends IndexedRecord> {
+public interface HoodieFileReader<R extends IndexedRecord> extends AutoCloseable {
 
   public String[] readMinMaxRecordKeys();
 
@@ -35,6 +37,10 @@ public interface HoodieFileReader<R extends IndexedRecord> {
 
   public Set<String> filterRowKeys(Set<String> candidateRowKeys);
 
+  default Map<String, R> getRecordsByKeys(List<String> rowKeys) throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
   public Iterator<R> getRecordIterator(Schema readerSchema) throws IOException;
 
   default Iterator<R> getRecordIterator() throws IOException {
diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java
index e3e38eca86ca9..371da7675e992 100644
--- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java
+++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java
@@ -18,17 +18,18 @@
 
 package org.apache.hudi.io.storage;
 
-import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.HashSet;
+import java.util.HashMap;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.TreeSet;
+import java.util.stream.Collectors;
 
 import org.apache.avro.Schema;
 import org.apache.avro.generic.IndexedRecord;
@@ -49,11 +50,17 @@
 import org.apache.hudi.common.bloom.BloomFilter;
 import org.apache.hudi.common.bloom.BloomFilterFactory;
 import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.util.ClosableIterator;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.ValidationUtils;
+import org.apache.hudi.common.util.io.ByteBufferBackedInputStream;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.HoodieIOException;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
 
-public class HoodieHFileReader<R extends IndexedRecord> implements HoodieFileReader {
+public class HoodieHFileReader<R extends IndexedRecord> implements HoodieFileReader<R> {
+  private static final Logger LOG = LogManager.getLogger(HoodieHFileReader.class);
   private Path path;
   private Configuration conf;
   private HFile.Reader reader;
@@ -63,6 +70,7 @@ public class HoodieHFileReader<R extends IndexedRecord> implements HoodieFileRea
   // key retrieval.
   private HFileScanner keyScanner;
 
+  public static final String KEY_FIELD_NAME = "key";
   public static final String KEY_SCHEMA = "schema";
   public static final String KEY_BLOOM_FILTER_META_BLOCK = "bloomFilter";
   public static final String KEY_BLOOM_FILTER_TYPE_CODE = "bloomFilterTypeCode";
@@ -75,11 +83,11 @@ public HoodieHFileReader(Configuration configuration, Path path, CacheConfig cac
     this.reader = HFile.createReader(FSUtils.getFs(path.toString(), configuration), path, cacheConfig, conf);
   }
 
-  public HoodieHFileReader(Configuration configuration, Path path, CacheConfig cacheConfig, FileSystem inlineFs) throws IOException {
+  public HoodieHFileReader(Configuration configuration, Path path, CacheConfig cacheConfig, FileSystem fs) throws IOException {
     this.conf = configuration;
     this.path = path;
-    this.fsDataInputStream = inlineFs.open(path);
-    this.reader = HFile.createReader(inlineFs, path, cacheConfig, configuration);
+    this.fsDataInputStream = fs.open(path);
+    this.reader = HFile.createReader(fs, path, cacheConfig, configuration);
   }
 
   public HoodieHFileReader(byte[] content) throws IOException {
@@ -116,6 +124,13 @@ public Schema getSchema() {
     return schema;
   }
 
+  /**
+   * Sets up the writer schema explicitly.
+   */
+  public void withSchema(Schema schema) {
+    this.schema = schema;
+  }
+
   @Override
   public BloomFilter readBloomFilter() {
     Map<byte[], byte[]> fileInfo;
@@ -131,35 +146,69 @@ public BloomFilter readBloomFilter() {
     }
   }
 
+  /**
+   * Filter keys by availability.
+   * <p>
+   * Note: This method is performant when the caller passes in a sorted candidate keys.
+   *
+   * @param candidateRowKeys - Keys to check for the availability
+   * @return Subset of candidate keys that are available
+   */
   @Override
-  public Set<String> filterRowKeys(Set candidateRowKeys) {
-    // Current implementation reads all records and filters them. In certain cases, it many be better to:
-    //  1. Scan a limited subset of keys (min/max range of candidateRowKeys)
-    //  2. Lookup keys individually (if the size of candidateRowKeys is much less than the total keys in file)
-    try {
-      List<Pair<String, R>> allRecords = readAllRecords();
-      Set<String> rowKeys = new HashSet<>();
-      allRecords.forEach(t -> {
-        if (candidateRowKeys.contains(t.getFirst())) {
-          rowKeys.add(t.getFirst());
-        }
-      });
-      return rowKeys;
-    } catch (IOException e) {
-      throw new HoodieIOException("Failed to read row keys from " + path, e);
+  public Set<String> filterRowKeys(Set<String> candidateRowKeys) {
+    return candidateRowKeys.stream().filter(k -> {
+      try {
+        return isKeyAvailable(k);
+      } catch (IOException e) {
+        LOG.error("Failed to check key availability: " + k);
+        return false;
+      }
+    }).collect(Collectors.toSet());
+  }
+
+  @Override
+  public Map<String, R> getRecordsByKeys(List<String> rowKeys) throws IOException {
+    return filterRecordsImpl(new TreeSet<>(rowKeys));
+  }
+
+  /**
+   * Filter records by sorted keys.
+   * <p>
+   * TODO: Implement single seek and sequential scan till the last candidate key
+   * instead of repeated seeks.
+   *
+   * @param sortedCandidateRowKeys - Sorted set of keys to fetch records for
+   * @return Map of keys to fetched records
+   * @throws IOException When the deserialization of records fail
+   */
+  private synchronized Map<String, R> filterRecordsImpl(TreeSet<String> sortedCandidateRowKeys) throws IOException {
+    HashMap<String, R> filteredRecords = new HashMap<>();
+    for (String key : sortedCandidateRowKeys) {
+      Option<R> record = getRecordByKey(key);
+      if (record.isPresent()) {
+        filteredRecords.put(key, record.get());
+      }
     }
+    return filteredRecords;
   }
 
-  public List<Pair<String, R>> readAllRecords(Schema writerSchema, Schema readerSchema) throws IOException {
+  /**
+   * Reads all the records with given schema.
+   *
+   * <p>NOTE: This should only be used for testing,
+   * the records are materialized eagerly into a list and returned,
+   * use {@code getRecordIterator} where possible.
+   */
+  private List<Pair<String, R>> readAllRecords(Schema writerSchema, Schema readerSchema) {
+    final Option<Schema.Field> keyFieldSchema = Option.ofNullable(readerSchema.getField(KEY_FIELD_NAME));
     List<Pair<String, R>> recordList = new LinkedList<>();
     try {
       final HFileScanner scanner = reader.getScanner(false, false);
       if (scanner.seekTo()) {
         do {
           Cell c = scanner.getKeyValue();
-          byte[] keyBytes = Arrays.copyOfRange(c.getRowArray(), c.getRowOffset(), c.getRowOffset() + c.getRowLength());
-          R record = getRecordFromCell(c, writerSchema, readerSchema);
-          recordList.add(new Pair<>(new String(keyBytes), record));
+          final Pair<String, R> keyAndRecordPair = getRecordFromCell(c, writerSchema, readerSchema, keyFieldSchema);
+          recordList.add(keyAndRecordPair);
         } while (scanner.next());
       }
 
@@ -169,17 +218,36 @@ public List<Pair<String, R>> readAllRecords(Schema writerSchema, Schema readerSc
     }
   }
 
-  public List<Pair<String, R>> readAllRecords() throws IOException {
-    Schema schema = new Schema.Parser().parse(new String(reader.loadFileInfo().get(KEY_SCHEMA.getBytes())));
+  /**
+   * Reads all the records with current schema.
+   *
+   * <p>NOTE: This should only be used for testing,
+   * the records are materialized eagerly into a list and returned,
+   * use {@code getRecordIterator} where possible.
+   */
+  public List<Pair<String, R>> readAllRecords() {
+    Schema schema = getSchema();
     return readAllRecords(schema, schema);
   }
 
+  /**
+   * Reads all the records with current schema and filtering keys.
+   *
+   * <p>NOTE: This should only be used for testing,
+   * the records are materialized eagerly into a list and returned,
+   * use {@code getRecordIterator} where possible.
+   */
   public List<Pair<String, R>> readRecords(List<String> keys) throws IOException {
-    reader.loadFileInfo();
-    Schema schema = new Schema.Parser().parse(new String(reader.loadFileInfo().get(KEY_SCHEMA.getBytes())));
-    return readRecords(keys, schema);
+    return readRecords(keys, getSchema());
   }
 
+  /**
+   * Reads all the records with given schema and filtering keys.
+   *
+   * <p>NOTE: This should only be used for testing,
+   * the records are materialized eagerly into a list and returned,
+   * use {@code getRecordIterator} where possible.
+   */
   public List<Pair<String, R>> readRecords(List<String> keys, Schema schema) throws IOException {
     this.schema = schema;
     reader.loadFileInfo();
@@ -193,9 +261,45 @@ public List<Pair<String, R>> readRecords(List<String> keys, Schema schema) throw
     return records;
   }
 
+  public ClosableIterator<R> getRecordIterator(List<String> keys, Schema schema) throws IOException {
+    this.schema = schema;
+    reader.loadFileInfo();
+    Iterator<String> iterator = keys.iterator();
+    return new ClosableIterator<R>() {
+      private R next;
+      @Override
+      public void close() {
+      }
+
+      @Override
+      public boolean hasNext() {
+        try {
+          while (iterator.hasNext()) {
+            Option<R> value = getRecordByKey(iterator.next(), schema);
+            if (value.isPresent()) {
+              next = value.get();
+              return true;
+            }
+          }
+          return false;
+        } catch (IOException e) {
+          throw new HoodieIOException("unable to read next record from hfile ", e);
+        }
+      }
+
+      @Override
+      public R next() {
+        return next;
+      }
+    };
+  }
+
   @Override
   public Iterator getRecordIterator(Schema readerSchema) throws IOException {
     final HFileScanner scanner = reader.getScanner(false, false);
+    final Option<Schema.Field> keyFieldSchema = Option.ofNullable(readerSchema.getField(KEY_FIELD_NAME));
+    ValidationUtils.checkState(keyFieldSchema != null,
+        "Missing key field '" + KEY_FIELD_NAME + "' in the schema!");
     return new Iterator<R>() {
       private R next = null;
       private boolean eof = false;
@@ -206,7 +310,8 @@ public boolean hasNext() {
           // To handle when hasNext() is called multiple times for idempotency and/or the first time
           if (this.next == null && !this.eof) {
             if (!scanner.isSeeked() && scanner.seekTo()) {
-                this.next = getRecordFromCell(scanner.getKeyValue(), getSchema(), readerSchema);
+              final Pair<String, R> keyAndRecordPair = getRecordFromCell(scanner.getKeyValue(), getSchema(), readerSchema, keyFieldSchema);
+              this.next = keyAndRecordPair.getSecond();
             }
           }
           return this.next != null;
@@ -226,7 +331,8 @@ public R next() {
           }
           R retVal = this.next;
           if (scanner.next()) {
-            this.next = getRecordFromCell(scanner.getKeyValue(), getSchema(), readerSchema);
+            final Pair<String, R> keyAndRecordPair = getRecordFromCell(scanner.getKeyValue(), getSchema(), readerSchema, keyFieldSchema);
+            this.next = keyAndRecordPair.getSecond();
           } else {
             this.next = null;
             this.eof = true;
@@ -239,9 +345,24 @@ public R next() {
     };
   }
 
+  private boolean isKeyAvailable(String key) throws IOException {
+    final KeyValue kv = new KeyValue(key.getBytes(), null, null, null);
+    synchronized (this) {
+      if (keyScanner == null) {
+        keyScanner = reader.getScanner(false, false);
+      }
+      if (keyScanner.seekTo(kv) == 0) {
+        return true;
+      }
+    }
+    return false;
+  }
+
   @Override
   public Option getRecordByKey(String key, Schema readerSchema) throws IOException {
     byte[] value = null;
+    final Option<Schema.Field> keyFieldSchema = Option.ofNullable(readerSchema.getField(KEY_FIELD_NAME));
+    ValidationUtils.checkState(keyFieldSchema != null);
     KeyValue kv = new KeyValue(key.getBytes(), null, null, null);
 
     synchronized (this) {
@@ -257,16 +378,51 @@ public Option getRecordByKey(String key, Schema readerSchema) throws IOException
     }
 
     if (value != null) {
-      R record = (R)HoodieAvroUtils.bytesToAvro(value, getSchema(), readerSchema);
+      R record = deserialize(key.getBytes(), value, getSchema(), readerSchema, keyFieldSchema);
       return Option.of(record);
     }
 
     return Option.empty();
   }
 
-  private R getRecordFromCell(Cell c, Schema writerSchema, Schema readerSchema) throws IOException {
-    byte[] value = Arrays.copyOfRange(c.getValueArray(), c.getValueOffset(), c.getValueOffset() + c.getValueLength());
-    return (R)HoodieAvroUtils.bytesToAvro(value, writerSchema, readerSchema);
+  private Pair<String, R> getRecordFromCell(Cell cell, Schema writerSchema, Schema readerSchema, Option<Schema.Field> keyFieldSchema) throws IOException {
+    final byte[] keyBytes = Arrays.copyOfRange(cell.getRowArray(), cell.getRowOffset(), cell.getRowOffset() + cell.getRowLength());
+    final byte[] valueBytes = Arrays.copyOfRange(cell.getValueArray(), cell.getValueOffset(), cell.getValueOffset() + cell.getValueLength());
+    R record = deserialize(keyBytes, valueBytes, writerSchema, readerSchema, keyFieldSchema);
+    return new Pair<>(new String(keyBytes), record);
+  }
+
+  /**
+   * Deserialize the record byte array contents to record object.
+   *
+   * @param keyBytes       - Record key as byte array
+   * @param valueBytes     - Record content as byte array
+   * @param writerSchema   - Writer schema
+   * @param readerSchema   - Reader schema
+   * @param keyFieldSchema - Key field id in the schema
+   * @return Deserialized record object
+   */
+  private R deserialize(final byte[] keyBytes, final byte[] valueBytes, Schema writerSchema, Schema readerSchema,
+                        Option<Schema.Field> keyFieldSchema) throws IOException {
+    R record = (R) HoodieAvroUtils.bytesToAvro(valueBytes, writerSchema, readerSchema);
+    materializeRecordIfNeeded(keyBytes, record, keyFieldSchema);
+    return record;
+  }
+
+  /**
+   * Materialize the record for any missing fields, if needed.
+   *
+   * @param keyBytes       - Key byte array
+   * @param record         - Record object to materialize
+   * @param keyFieldSchema - Key field id in the schema
+   */
+  private void materializeRecordIfNeeded(final byte[] keyBytes, R record, Option<Schema.Field> keyFieldSchema) {
+    if (keyFieldSchema.isPresent()) {
+      final Object keyObject = record.get(keyFieldSchema.get().pos());
+      if (keyObject != null && keyObject.toString().isEmpty()) {
+        record.put(keyFieldSchema.get().pos(), new String(keyBytes));
+      }
+    }
   }
 
   @Override
@@ -288,28 +444,14 @@ public synchronized void close() {
     }
   }
 
-  static class SeekableByteArrayInputStream extends ByteArrayInputStream implements Seekable, PositionedReadable {
+  static class SeekableByteArrayInputStream extends ByteBufferBackedInputStream implements Seekable, PositionedReadable {
     public SeekableByteArrayInputStream(byte[] buf) {
       super(buf);
     }
 
     @Override
     public long getPos() throws IOException {
-      return pos;
-    }
-
-    @Override
-    public void seek(long pos) throws IOException {
-      if (mark != 0) {
-        throw new IllegalStateException();
-      }
-
-      reset();
-      long skipped = skip(pos);
-
-      if (skipped != pos) {
-        throw new IOException();
-      }
+      return getPosition();
     }
 
     @Override
@@ -319,19 +461,7 @@ public boolean seekToNewSource(long targetPos) throws IOException {
 
     @Override
     public int read(long position, byte[] buffer, int offset, int length) throws IOException {
-
-      if (position >= buf.length) {
-        throw new IllegalArgumentException();
-      }
-      if (position + length > buf.length) {
-        throw new IllegalArgumentException();
-      }
-      if (length > buffer.length) {
-        throw new IllegalArgumentException();
-      }
-
-      System.arraycopy(buf, (int) position, buffer, offset, length);
-      return length;
+      return copyFrom(position, buffer, offset, length);
     }
 
     @Override
diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetReader.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetReader.java
index 9ead1ac87ba50..9ad07dfafbf60 100644
--- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetReader.java
+++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetReader.java
@@ -34,9 +34,9 @@
 import org.apache.parquet.avro.AvroReadSupport;
 import org.apache.parquet.hadoop.ParquetReader;
 
-public class HoodieParquetReader<R extends IndexedRecord> implements HoodieFileReader {
-  private Path path;
-  private Configuration conf;
+public class HoodieParquetReader<R extends IndexedRecord> implements HoodieFileReader<R> {
+  private final Path path;
+  private final Configuration conf;
   private final BaseFileUtils parquetUtils;
 
   public HoodieParquetReader(Configuration configuration, Path path) {
@@ -45,6 +45,7 @@ public HoodieParquetReader(Configuration configuration, Path path) {
     this.parquetUtils = BaseFileUtils.getInstance(HoodieFileFormat.PARQUET);
   }
 
+  @Override
   public String[] readMinMaxRecordKeys() {
     return parquetUtils.readMinMaxRecordKeys(conf, path);
   }
@@ -55,15 +56,15 @@ public BloomFilter readBloomFilter() {
   }
 
   @Override
-  public Set<String> filterRowKeys(Set candidateRowKeys) {
+  public Set<String> filterRowKeys(Set<String> candidateRowKeys) {
     return parquetUtils.filterRowKeys(conf, path, candidateRowKeys);
   }
 
   @Override
   public Iterator<R> getRecordIterator(Schema schema) throws IOException {
     AvroReadSupport.setAvroReadSchema(conf, schema);
-    ParquetReader<IndexedRecord> reader = AvroParquetReader.<IndexedRecord>builder(path).withConf(conf).build();
-    return new ParquetReaderIterator(reader);
+    ParquetReader<R> reader = AvroParquetReader.<R>builder(path).withConf(conf).build();
+    return new ParquetReaderIterator<>(reader);
   }
 
   @Override
diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetStreamWriter.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetStreamWriter.java
new file mode 100644
index 0000000000000..a2736018242b6
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieParquetStreamWriter.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.io.storage;
+
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.Path;
+import org.apache.hudi.avro.HoodieAvroWriteSupport;
+import org.apache.hudi.parquet.io.OutputStreamBackedOutputFile;
+import org.apache.parquet.hadoop.ParquetFileWriter;
+import org.apache.parquet.hadoop.ParquetWriter;
+import org.apache.parquet.hadoop.api.WriteSupport;
+import org.apache.parquet.io.OutputFile;
+
+import java.io.IOException;
+
+// TODO(HUDI-3035) unify w/ HoodieParquetWriter
+public class HoodieParquetStreamWriter<R extends IndexedRecord> implements AutoCloseable {
+
+  private final ParquetWriter<R> writer;
+  private final HoodieAvroWriteSupport writeSupport;
+
+  public HoodieParquetStreamWriter(FSDataOutputStream outputStream,
+                                   HoodieAvroParquetConfig parquetConfig) throws IOException {
+    this.writeSupport = parquetConfig.getWriteSupport();
+    this.writer = new Builder<R>(new OutputStreamBackedOutputFile(outputStream), writeSupport)
+        .withWriteMode(ParquetFileWriter.Mode.CREATE)
+        .withCompressionCodec(parquetConfig.getCompressionCodecName())
+        .withRowGroupSize(parquetConfig.getBlockSize())
+        .withPageSize(parquetConfig.getPageSize())
+        .withDictionaryPageSize(parquetConfig.getPageSize())
+        .withDictionaryEncoding(parquetConfig.dictionaryEnabled())
+        .withWriterVersion(ParquetWriter.DEFAULT_WRITER_VERSION)
+        .withConf(parquetConfig.getHadoopConf())
+        .build();
+  }
+
+  public void writeAvro(String key, R object) throws IOException {
+    writer.write(object);
+    writeSupport.add(key);
+  }
+
+  @Override
+  public void close() throws IOException {
+    writer.close();
+  }
+
+  private static class Builder<T> extends ParquetWriter.Builder<T, Builder<T>> {
+    private final WriteSupport<T> writeSupport;
+
+    private Builder(Path file, WriteSupport<T> writeSupport) {
+      super(file);
+      this.writeSupport = writeSupport;
+    }
+
+    private Builder(OutputFile file, WriteSupport<T> writeSupport) {
+      super(file);
+      this.writeSupport = writeSupport;
+    }
+
+    @Override
+    protected Builder<T> self() {
+      return this;
+    }
+
+    @Override
+    protected WriteSupport<T> getWriteSupport(Configuration conf) {
+      return writeSupport;
+    }
+  }
+}
\ No newline at end of file
diff --git a/hudi-common/src/main/java/org/apache/hudi/keygen/constant/KeyGeneratorOptions.java b/hudi-common/src/main/java/org/apache/hudi/keygen/constant/KeyGeneratorOptions.java
index 6a1f761219221..ff182c4c1661f 100644
--- a/hudi-common/src/main/java/org/apache/hudi/keygen/constant/KeyGeneratorOptions.java
+++ b/hudi-common/src/main/java/org/apache/hudi/keygen/constant/KeyGeneratorOptions.java
@@ -96,5 +96,28 @@ public class KeyGeneratorOptions extends HoodieConfig {
    */
   @Deprecated
   public static final String PARTITIONPATH_FIELD_OPT_KEY = PARTITIONPATH_FIELD_NAME.key();
+
+  /**
+   * Supported configs.
+   */
+  public static class Config {
+
+    // One value from TimestampType above
+    public static final String TIMESTAMP_TYPE_FIELD_PROP = "hoodie.deltastreamer.keygen.timebased.timestamp.type";
+    public static final String INPUT_TIME_UNIT =
+        "hoodie.deltastreamer.keygen.timebased.timestamp.scalar.time.unit";
+    //This prop can now accept list of input date formats.
+    public static final String TIMESTAMP_INPUT_DATE_FORMAT_PROP =
+        "hoodie.deltastreamer.keygen.timebased.input.dateformat";
+    public static final String TIMESTAMP_INPUT_DATE_FORMAT_LIST_DELIMITER_REGEX_PROP = "hoodie.deltastreamer.keygen.timebased.input.dateformat.list.delimiter.regex";
+    public static final String TIMESTAMP_INPUT_TIMEZONE_FORMAT_PROP = "hoodie.deltastreamer.keygen.timebased.input.timezone";
+    public static final String TIMESTAMP_OUTPUT_DATE_FORMAT_PROP =
+        "hoodie.deltastreamer.keygen.timebased.output.dateformat";
+    //still keeping this prop for backward compatibility so that functionality for existing users does not break.
+    public static final String TIMESTAMP_TIMEZONE_FORMAT_PROP =
+        "hoodie.deltastreamer.keygen.timebased.timezone";
+    public static final String TIMESTAMP_OUTPUT_TIMEZONE_FORMAT_PROP = "hoodie.deltastreamer.keygen.timebased.output.timezone";
+    public static final String DATE_TIME_PARSER_PROP = "hoodie.deltastreamer.keygen.datetime.parser.class";
+  }
 }
 
diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/BaseTableMetadata.java b/hudi-common/src/main/java/org/apache/hudi/metadata/BaseTableMetadata.java
index ccd421e677651..3c648f38defc6 100644
--- a/hudi-common/src/main/java/org/apache/hudi/metadata/BaseTableMetadata.java
+++ b/hudi-common/src/main/java/org/apache/hudi/metadata/BaseTableMetadata.java
@@ -19,6 +19,10 @@
 
 package org.apache.hudi.metadata;
 
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hudi.avro.model.HoodieMetadataBloomFilter;
+import org.apache.hudi.avro.model.HoodieMetadataColumnStats;
 import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.config.SerializableConfiguration;
 import org.apache.hudi.common.engine.HoodieEngineContext;
@@ -30,29 +34,33 @@
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.util.HoodieTimer;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.common.util.hash.ColumnIndexID;
+import org.apache.hudi.common.util.hash.FileIndexID;
+import org.apache.hudi.common.util.hash.PartitionIndexID;
 import org.apache.hudi.exception.HoodieMetadataException;
-
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
 import java.io.IOException;
+import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
 import java.util.stream.Collectors;
 
 public abstract class BaseTableMetadata implements HoodieTableMetadata {
 
   private static final Logger LOG = LogManager.getLogger(BaseTableMetadata.class);
 
-  static final long MAX_MEMORY_SIZE_IN_BYTES = 1024 * 1024 * 1024;
-  static final int BUFFER_SIZE = 10 * 1024 * 1024;
+  public static final long MAX_MEMORY_SIZE_IN_BYTES = 1024 * 1024 * 1024;
+  public static final int BUFFER_SIZE = 10 * 1024 * 1024;
 
   protected final transient HoodieEngineContext engineContext;
   protected final SerializableConfiguration hadoopConf;
@@ -63,7 +71,9 @@ public abstract class BaseTableMetadata implements HoodieTableMetadata {
   // Directory used for Spillable Map when merging records
   protected final String spillableMapDirectory;
 
-  protected boolean enabled;
+  protected boolean isMetadataTableEnabled;
+  protected boolean isBloomFilterIndexEnabled = false;
+  protected boolean isColumnStatsIndexEnabled = false;
 
   protected BaseTableMetadata(HoodieEngineContext engineContext, HoodieMetadataConfig metadataConfig,
                               String dataBasePath, String spillableMapDirectory) {
@@ -74,7 +84,7 @@ protected BaseTableMetadata(HoodieEngineContext engineContext, HoodieMetadataCon
     this.spillableMapDirectory = spillableMapDirectory;
     this.metadataConfig = metadataConfig;
 
-    this.enabled = metadataConfig.enabled();
+    this.isMetadataTableEnabled = metadataConfig.enabled();
     if (metadataConfig.enableMetrics()) {
       this.metrics = Option.of(new HoodieMetadataMetrics(Registry.getRegistry("HoodieMetadata")));
     } else {
@@ -84,16 +94,15 @@ protected BaseTableMetadata(HoodieEngineContext engineContext, HoodieMetadataCon
 
   /**
    * Return the list of partitions in the dataset.
-   *
+   * <p>
    * If the Metadata Table is enabled, the listing is retrieved from the stored metadata. Otherwise, the list of
    * partitions is retrieved directly from the underlying {@code FileSystem}.
-   *
+   * <p>
    * On any errors retrieving the listing from the metadata, defaults to using the file system listings.
-   *
    */
   @Override
   public List<String> getAllPartitionPaths() throws IOException {
-    if (enabled) {
+    if (isMetadataTableEnabled) {
       try {
         return fetchAllPartitionPaths();
       } catch (Exception e) {
@@ -106,10 +115,10 @@ public List<String> getAllPartitionPaths() throws IOException {
 
   /**
    * Return the list of files in a partition.
-   *
+   * <p>
    * If the Metadata Table is enabled, the listing is retrieved from the stored metadata. Otherwise, the list of
    * partitions is retrieved directly from the underlying {@code FileSystem}.
-   *
+   * <p>
    * On any errors retrieving the listing from the metadata, defaults to using the file system listings.
    *
    * @param partitionPath The absolute path of the partition to list
@@ -117,7 +126,7 @@ public List<String> getAllPartitionPaths() throws IOException {
   @Override
   public FileStatus[] getAllFilesInPartition(Path partitionPath)
       throws IOException {
-    if (enabled) {
+    if (isMetadataTableEnabled) {
       try {
         return fetchAllFilesInPartition(partitionPath);
       } catch (Exception e) {
@@ -132,7 +141,7 @@ public FileStatus[] getAllFilesInPartition(Path partitionPath)
   @Override
   public Map<String, FileStatus[]> getAllFilesInPartitions(List<String> partitions)
       throws IOException {
-    if (enabled) {
+    if (isMetadataTableEnabled) {
       try {
         List<Path> partitionPaths = partitions.stream().map(entry -> new Path(entry)).collect(Collectors.toList());
         Map<String, FileStatus[]> partitionsFilesMap = fetchAllFilesInPartitionPaths(partitionPaths);
@@ -146,12 +155,124 @@ public Map<String, FileStatus[]> getAllFilesInPartitions(List<String> partitions
         .getAllFilesInPartitions(partitions);
   }
 
+  @Override
+  public Option<ByteBuffer> getBloomFilter(final String partitionName, final String fileName)
+      throws HoodieMetadataException {
+    if (!isBloomFilterIndexEnabled) {
+      LOG.error("Metadata bloom filter index is disabled!");
+      return Option.empty();
+    }
+
+    final Pair<String, String> partitionFileName = Pair.of(partitionName, fileName);
+    Map<Pair<String, String>, ByteBuffer> bloomFilters = getBloomFilters(Collections.singletonList(partitionFileName));
+    if (bloomFilters.isEmpty()) {
+      LOG.error("Meta index: missing bloom filter for partition: " + partitionName + ", file: " + fileName);
+      return Option.empty();
+    }
+
+    ValidationUtils.checkState(bloomFilters.containsKey(partitionFileName));
+    return Option.of(bloomFilters.get(partitionFileName));
+  }
+
+  @Override
+  public Map<Pair<String, String>, ByteBuffer> getBloomFilters(final List<Pair<String, String>> partitionNameFileNameList)
+      throws HoodieMetadataException {
+    if (!isBloomFilterIndexEnabled) {
+      LOG.error("Metadata bloom filter index is disabled!");
+      return Collections.emptyMap();
+    }
+    if (partitionNameFileNameList.isEmpty()) {
+      return Collections.emptyMap();
+    }
+
+    HoodieTimer timer = new HoodieTimer().startTimer();
+    Set<String> partitionIDFileIDSortedStrings = new TreeSet<>();
+    Map<String, Pair<String, String>> fileToKeyMap = new HashMap<>();
+    partitionNameFileNameList.forEach(partitionNameFileNamePair -> {
+          final String bloomFilterIndexKey = HoodieMetadataPayload.getBloomFilterIndexKey(
+              new PartitionIndexID(partitionNameFileNamePair.getLeft()), new FileIndexID(partitionNameFileNamePair.getRight()));
+          partitionIDFileIDSortedStrings.add(bloomFilterIndexKey);
+          fileToKeyMap.put(bloomFilterIndexKey, partitionNameFileNamePair);
+        }
+    );
+
+    List<String> partitionIDFileIDStrings = new ArrayList<>(partitionIDFileIDSortedStrings);
+    List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> hoodieRecordList =
+        getRecordsByKeys(partitionIDFileIDStrings, MetadataPartitionType.BLOOM_FILTERS.getPartitionPath());
+    metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.LOOKUP_BLOOM_FILTERS_METADATA_STR,
+        (timer.endTimer() / partitionIDFileIDStrings.size())));
+
+    Map<Pair<String, String>, ByteBuffer> partitionFileToBloomFilterMap = new HashMap<>();
+    for (final Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>> entry : hoodieRecordList) {
+      if (entry.getRight().isPresent()) {
+        final Option<HoodieMetadataBloomFilter> bloomFilterMetadata =
+            entry.getRight().get().getData().getBloomFilterMetadata();
+        if (bloomFilterMetadata.isPresent()) {
+          if (!bloomFilterMetadata.get().getIsDeleted()) {
+            ValidationUtils.checkState(fileToKeyMap.containsKey(entry.getLeft()));
+            partitionFileToBloomFilterMap.put(fileToKeyMap.get(entry.getLeft()), bloomFilterMetadata.get().getBloomFilter());
+          }
+        } else {
+          LOG.error("Meta index bloom filter missing for: " + fileToKeyMap.get(entry.getLeft()));
+        }
+      }
+    }
+    return partitionFileToBloomFilterMap;
+  }
+
+  @Override
+  public Map<Pair<String, String>, HoodieMetadataColumnStats> getColumnStats(final List<Pair<String, String>> partitionNameFileNameList, final String columnName)
+      throws HoodieMetadataException {
+    if (!isColumnStatsIndexEnabled) {
+      LOG.error("Metadata column stats index is disabled!");
+      return Collections.emptyMap();
+    }
+
+    Map<String, Pair<String, String>> columnStatKeyToFileNameMap = new HashMap<>();
+    TreeSet<String> sortedKeys = new TreeSet<>();
+    final ColumnIndexID columnIndexID = new ColumnIndexID(columnName);
+    for (Pair<String, String> partitionNameFileNamePair : partitionNameFileNameList) {
+      final String columnStatsIndexKey = HoodieMetadataPayload.getColumnStatsIndexKey(
+          new PartitionIndexID(partitionNameFileNamePair.getLeft()),
+          new FileIndexID(partitionNameFileNamePair.getRight()),
+          columnIndexID);
+      sortedKeys.add(columnStatsIndexKey);
+      columnStatKeyToFileNameMap.put(columnStatsIndexKey, partitionNameFileNamePair);
+    }
+
+    List<String> columnStatKeys = new ArrayList<>(sortedKeys);
+    HoodieTimer timer = new HoodieTimer().startTimer();
+    List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> hoodieRecordList =
+        getRecordsByKeys(columnStatKeys, MetadataPartitionType.COLUMN_STATS.getPartitionPath());
+    metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.LOOKUP_COLUMN_STATS_METADATA_STR, timer.endTimer()));
+
+    Map<Pair<String, String>, HoodieMetadataColumnStats> fileToColumnStatMap = new HashMap<>();
+    for (final Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>> entry : hoodieRecordList) {
+      if (entry.getRight().isPresent()) {
+        final Option<HoodieMetadataColumnStats> columnStatMetadata =
+            entry.getRight().get().getData().getColumnStatMetadata();
+        if (columnStatMetadata.isPresent()) {
+          if (!columnStatMetadata.get().getIsDeleted()) {
+            ValidationUtils.checkState(columnStatKeyToFileNameMap.containsKey(entry.getLeft()));
+            final Pair<String, String> partitionFileNamePair = columnStatKeyToFileNameMap.get(entry.getLeft());
+            ValidationUtils.checkState(!fileToColumnStatMap.containsKey(partitionFileNamePair));
+            fileToColumnStatMap.put(partitionFileNamePair, columnStatMetadata.get());
+          }
+        } else {
+          LOG.error("Meta index column stats missing for: " + entry.getLeft());
+        }
+      }
+    }
+    return fileToColumnStatMap;
+  }
+
   /**
    * Returns a list of all partitions.
    */
   protected List<String> fetchAllPartitionPaths() throws IOException {
     HoodieTimer timer = new HoodieTimer().startTimer();
-    Option<HoodieRecord<HoodieMetadataPayload>> hoodieRecord = getRecordByKey(RECORDKEY_PARTITION_LIST, MetadataPartitionType.FILES.partitionPath());
+    Option<HoodieRecord<HoodieMetadataPayload>> hoodieRecord = getRecordByKey(RECORDKEY_PARTITION_LIST,
+        MetadataPartitionType.FILES.getPartitionPath());
     metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.LOOKUP_PARTITIONS_STR, timer.endTimer()));
 
     List<String> partitions = Collections.emptyList();
@@ -181,7 +302,8 @@ FileStatus[] fetchAllFilesInPartition(Path partitionPath) throws IOException {
     }
 
     HoodieTimer timer = new HoodieTimer().startTimer();
-    Option<HoodieRecord<HoodieMetadataPayload>> hoodieRecord = getRecordByKey(partitionName, MetadataPartitionType.FILES.partitionPath());
+    Option<HoodieRecord<HoodieMetadataPayload>> hoodieRecord = getRecordByKey(partitionName,
+        MetadataPartitionType.FILES.getPartitionPath());
     metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.LOOKUP_FILES_STR, timer.endTimer()));
 
     FileStatus[] statuses = {};
@@ -215,7 +337,7 @@ Map<String, FileStatus[]> fetchAllFilesInPartitionPaths(List<Path> partitionPath
 
     HoodieTimer timer = new HoodieTimer().startTimer();
     List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> partitionsFileStatus =
-        getRecordsByKeys(new ArrayList<>(partitionInfo.keySet()), MetadataPartitionType.FILES.partitionPath());
+        getRecordsByKeys(new ArrayList<>(partitionInfo.keySet()), MetadataPartitionType.FILES.getPartitionPath());
     metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.LOOKUP_FILES_STR, timer.endTimer()));
     Map<String, FileStatus[]> result = new HashMap<>();
 
@@ -231,18 +353,18 @@ Map<String, FileStatus[]> fetchAllFilesInPartitionPaths(List<Path> partitionPath
   }
 
   /**
-   * May be handle spurious deletes. Depending on config, throw an exception or log a warn msg.
+   * Maybe handle spurious deletes. Depending on config, throw an exception or log a warn msg.
    * @param hoodieRecord instance of {@link HoodieRecord} of interest.
    * @param partitionName partition name of interest.
    */
   private void mayBeHandleSpuriousDeletes(Option<HoodieRecord<HoodieMetadataPayload>> hoodieRecord, String partitionName) {
     if (!hoodieRecord.get().getData().getDeletions().isEmpty()) {
-      if (!metadataConfig.ignoreSpuriousDeletes()) {
+      if (metadataConfig.ignoreSpuriousDeletes()) {
+        LOG.warn("Metadata record for " + partitionName + " encountered some files to be deleted which was not added before. "
+            + "Ignoring the spurious deletes as the `" + HoodieMetadataConfig.IGNORE_SPURIOUS_DELETES.key() + "` config is set to true");
+      } else {
         throw new HoodieMetadataException("Metadata record for " + partitionName + " is inconsistent: "
             + hoodieRecord.get().getData());
-      } else {
-        LOG.warn("Metadata record for " + partitionName + " encountered some files to be deleted which was not added before. "
-            + "Ignoring the spurious deletes as the `" + HoodieMetadataConfig.IGNORE_SPURIOUS_DELETES.key() + "` config is set to false");
       }
     }
   }
diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/FileSystemBackedTableMetadata.java b/hudi-common/src/main/java/org/apache/hudi/metadata/FileSystemBackedTableMetadata.java
index d05b95dfdb495..a4e5ea3539f17 100644
--- a/hudi-common/src/main/java/org/apache/hudi/metadata/FileSystemBackedTableMetadata.java
+++ b/hudi-common/src/main/java/org/apache/hudi/metadata/FileSystemBackedTableMetadata.java
@@ -18,6 +18,7 @@
 
 package org.apache.hudi.metadata;
 
+import org.apache.hudi.avro.model.HoodieMetadataColumnStats;
 import org.apache.hudi.common.config.SerializableConfiguration;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.fs.FSUtils;
@@ -29,8 +30,10 @@
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hudi.exception.HoodieMetadataException;
 
 import java.io.IOException;
+import java.nio.ByteBuffer;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
@@ -139,4 +142,21 @@ public void close() throws Exception {
   public void reset() {
     // no-op
   }
+
+  public Option<ByteBuffer> getBloomFilter(final String partitionName, final String fileName)
+      throws HoodieMetadataException {
+    throw new HoodieMetadataException("Unsupported operation: getBloomFilter for " + fileName);
+  }
+
+  @Override
+  public Map<Pair<String, String>, ByteBuffer> getBloomFilters(final List<Pair<String, String>> partitionNameFileNameList)
+      throws HoodieMetadataException {
+    throw new HoodieMetadataException("Unsupported operation: getBloomFilters!");
+  }
+
+  @Override
+  public Map<Pair<String, String>, HoodieMetadataColumnStats> getColumnStats(final List<Pair<String, String>> partitionNameFileNameList, final String columnName)
+      throws HoodieMetadataException {
+    throw new HoodieMetadataException("Unsupported operation: getColumnsStats!");
+  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
index c9e538f72eaa0..7b4dbd9a0b935 100644
--- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
+++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
@@ -18,6 +18,9 @@
 
 package org.apache.hudi.metadata;
 
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.fs.Path;
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.avro.model.HoodieMetadataRecord;
 import org.apache.hudi.avro.model.HoodieRestoreMetadata;
@@ -27,6 +30,7 @@
 import org.apache.hudi.common.config.SerializableConfiguration;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.model.FileSlice;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieBaseFile;
 import org.apache.hudi.common.model.HoodieLogFile;
 import org.apache.hudi.common.model.HoodieRecord;
@@ -48,10 +52,6 @@
 import org.apache.hudi.exception.TableNotFoundException;
 import org.apache.hudi.io.storage.HoodieFileReader;
 import org.apache.hudi.io.storage.HoodieFileReaderFactory;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
@@ -64,6 +64,7 @@
 import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicInteger;
 import java.util.stream.Collectors;
 
 /**
@@ -80,8 +81,9 @@ public class HoodieBackedTableMetadata extends BaseTableMetadata {
   // should we reuse the open file handles, across calls
   private final boolean reuse;
 
-  // Readers for latest file slice corresponding to file groups in the metadata partition of interest
-  private Map<String, Pair<HoodieFileReader, HoodieMetadataMergedLogRecordReader>> partitionReaders = new ConcurrentHashMap<>();
+  // Readers for the latest file slice corresponding to file groups in the metadata partition
+  private Map<Pair<String, String>, Pair<HoodieFileReader, HoodieMetadataMergedLogRecordReader>> partitionReaders =
+      new ConcurrentHashMap<>();
 
   public HoodieBackedTableMetadata(HoodieEngineContext engineContext, HoodieMetadataConfig metadataConfig,
                                    String datasetBasePath, String spillableMapDirectory) {
@@ -97,7 +99,7 @@ public HoodieBackedTableMetadata(HoodieEngineContext engineContext, HoodieMetada
 
   private void initIfNeeded() {
     this.metadataBasePath = HoodieTableMetadata.getMetadataTableBasePath(dataBasePath);
-    if (!enabled) {
+    if (!isMetadataTableEnabled) {
       if (!HoodieTableMetadata.isMetadataTable(metadataBasePath)) {
         LOG.info("Metadata table is disabled.");
       }
@@ -105,14 +107,16 @@ private void initIfNeeded() {
       try {
         this.metadataMetaClient = HoodieTableMetaClient.builder().setConf(hadoopConf.get()).setBasePath(metadataBasePath).build();
         this.metadataTableConfig = metadataMetaClient.getTableConfig();
+        this.isBloomFilterIndexEnabled = metadataConfig.isBloomFilterIndexEnabled();
+        this.isColumnStatsIndexEnabled = metadataConfig.isColumnStatsIndexEnabled();
       } catch (TableNotFoundException e) {
         LOG.warn("Metadata table was not found at path " + metadataBasePath);
-        this.enabled = false;
+        this.isMetadataTableEnabled = false;
         this.metadataMetaClient = null;
         this.metadataTableConfig = null;
       } catch (Exception e) {
         LOG.error("Failed to initialize metadata table at path " + metadataBasePath, e);
-        this.enabled = false;
+        this.isMetadataTableEnabled = false;
         this.metadataMetaClient = null;
         this.metadataTableConfig = null;
       }
@@ -125,30 +129,43 @@ protected Option<HoodieRecord<HoodieMetadataPayload>> getRecordByKey(String key,
     return recordsByKeys.size() == 0 ? Option.empty() : recordsByKeys.get(0).getValue();
   }
 
-  protected List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> getRecordsByKeys(List<String> keys, String partitionName) {
-    Pair<HoodieFileReader, HoodieMetadataMergedLogRecordReader> readers = openReadersIfNeeded(keys.get(0), partitionName);
-    try {
-      List<Long> timings = new ArrayList<>();
-      HoodieFileReader baseFileReader = readers.getKey();
-      HoodieMetadataMergedLogRecordReader logRecordScanner = readers.getRight();
+  @Override
+  protected List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> getRecordsByKeys(List<String> keys,
+                                                                                             String partitionName) {
+    Map<Pair<String, FileSlice>, List<String>> partitionFileSliceToKeysMap = getPartitionFileSliceToKeysMapping(partitionName, keys);
+    List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> result = new ArrayList<>();
+    AtomicInteger fileSlicesKeysCount = new AtomicInteger();
+    partitionFileSliceToKeysMap.forEach((partitionFileSlicePair, fileSliceKeys) -> {
+      Pair<HoodieFileReader, HoodieMetadataMergedLogRecordReader> readers = openReadersIfNeeded(partitionName,
+          partitionFileSlicePair.getRight());
+      try {
+        List<Long> timings = new ArrayList<>();
+        HoodieFileReader baseFileReader = readers.getKey();
+        HoodieMetadataMergedLogRecordReader logRecordScanner = readers.getRight();
 
-      if (baseFileReader == null && logRecordScanner == null) {
-        return Collections.emptyList();
-      }
+        if (baseFileReader == null && logRecordScanner == null) {
+          return;
+        }
 
-      // local map to assist in merging with base file records
-      Map<String, Option<HoodieRecord<HoodieMetadataPayload>>> logRecords = readLogRecords(logRecordScanner, keys, timings);
-      List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> result = readFromBaseAndMergeWithLogRecords(
-          baseFileReader, keys, logRecords, timings, partitionName);
-      LOG.info(String.format("Metadata read for %s keys took [baseFileRead, logMerge] %s ms", keys.size(), timings));
-      return result;
-    } catch (IOException ioe) {
-      throw new HoodieIOException("Error merging records from metadata table for  " + keys.size() + " key : ", ioe);
-    } finally {
-      if (!reuse) {
-        close(partitionName);
+        // local map to assist in merging with base file records
+        Map<String, Option<HoodieRecord<HoodieMetadataPayload>>> logRecords = readLogRecords(logRecordScanner,
+            fileSliceKeys, timings);
+        result.addAll(readFromBaseAndMergeWithLogRecords(baseFileReader, fileSliceKeys, logRecords,
+            timings, partitionName));
+        LOG.debug(String.format("Metadata read for %s keys took [baseFileRead, logMerge] %s ms",
+            fileSliceKeys.size(), timings));
+        fileSlicesKeysCount.addAndGet(fileSliceKeys.size());
+      } catch (IOException ioe) {
+        throw new HoodieIOException("Error merging records from metadata table for  " + keys.size() + " key : ", ioe);
+      } finally {
+        if (!reuse) {
+          close(Pair.of(partitionFileSlicePair.getLeft(), partitionFileSlicePair.getRight().getFileId()));
+        }
       }
-    }
+    });
+
+    ValidationUtils.checkState(keys.size() == fileSlicesKeysCount.get());
+    return result;
   }
 
   private Map<String, Option<HoodieRecord<HoodieMetadataPayload>>> readLogRecords(HoodieMetadataMergedLogRecordReader logRecordScanner,
@@ -190,16 +207,16 @@ private List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> readFrom
     // Retrieve record from base file
     if (baseFileReader != null) {
       HoodieTimer readTimer = new HoodieTimer();
+      Map<String, GenericRecord> baseFileRecords = baseFileReader.getRecordsByKeys(keys);
       for (String key : keys) {
         readTimer.startTimer();
-        Option<GenericRecord> baseRecord = baseFileReader.getRecordByKey(key);
-        if (baseRecord.isPresent()) {
-          hoodieRecord = getRecord(baseRecord, partitionName);
+        if (baseFileRecords.containsKey(key)) {
+          hoodieRecord = getRecord(Option.of(baseFileRecords.get(key)), partitionName);
           metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.BASEFILE_READ_STR, readTimer.endTimer()));
           // merge base file record w/ log record if present
           if (logRecords.containsKey(key) && logRecords.get(key).isPresent()) {
             HoodieRecordPayload mergedPayload = logRecords.get(key).get().getData().preCombine(hoodieRecord.getData());
-            result.add(Pair.of(key, Option.of(new HoodieRecord(hoodieRecord.getKey(), mergedPayload))));
+            result.add(Pair.of(key, Option.of(new HoodieAvroRecord(hoodieRecord.getKey(), mergedPayload))));
           } else {
             // only base record
             result.add(Pair.of(key, Option.of(hoodieRecord)));
@@ -233,38 +250,54 @@ private HoodieRecord<HoodieMetadataPayload> getRecord(Option<GenericRecord> base
   }
 
   /**
-   * Returns a new pair of readers to the base and log files.
+   * Get the latest file slices for the interested keys in a given partition.
+   *
+   * @param partitionName - Partition to get the file slices from
+   * @param keys          - Interested keys
+   * @return FileSlices for the keys
    */
-  private Pair<HoodieFileReader, HoodieMetadataMergedLogRecordReader> openReadersIfNeeded(String key, String partitionName) {
-    return partitionReaders.computeIfAbsent(partitionName, k -> {
-      try {
-        final long baseFileOpenMs;
-        final long logScannerOpenMs;
-        HoodieFileReader baseFileReader = null;
-        HoodieMetadataMergedLogRecordReader logRecordScanner = null;
+  private Map<Pair<String, FileSlice>, List<String>> getPartitionFileSliceToKeysMapping(final String partitionName, final List<String> keys) {
+    // Metadata is in sync till the latest completed instant on the dataset
+    List<FileSlice> latestFileSlices =
+        HoodieTableMetadataUtil.getPartitionLatestMergedFileSlices(metadataMetaClient, partitionName);
+
+    Map<Pair<String, FileSlice>, List<String>> partitionFileSliceToKeysMap = new HashMap<>();
+    for (String key : keys) {
+      final FileSlice slice = latestFileSlices.get(HoodieTableMetadataUtil.mapRecordKeyToFileGroupIndex(key,
+          latestFileSlices.size()));
+      final Pair<String, FileSlice> partitionNameFileSlicePair = Pair.of(partitionName, slice);
+      partitionFileSliceToKeysMap.computeIfAbsent(partitionNameFileSlicePair, k -> new ArrayList<>()).add(key);
+    }
+    return partitionFileSliceToKeysMap;
+  }
 
-        // Metadata is in sync till the latest completed instant on the dataset
+  /**
+   * Create a file reader and the record scanner for a given partition and file slice
+   * if readers are not already available.
+   *
+   * @param partitionName - Partition name
+   * @param slice         - The file slice to open readers for
+   * @return File reader and the record scanner pair for the requested file slice
+   */
+  private Pair<HoodieFileReader, HoodieMetadataMergedLogRecordReader> openReadersIfNeeded(String partitionName, FileSlice slice) {
+    return partitionReaders.computeIfAbsent(Pair.of(partitionName, slice.getFileId()), k -> {
+      try {
         HoodieTimer timer = new HoodieTimer().startTimer();
-        List<FileSlice> latestFileSlices = HoodieTableMetadataUtil.getPartitionLatestMergedFileSlices(metadataMetaClient, partitionName);
-        if (latestFileSlices.size() == 0) {
-          // empty partition
-          return Pair.of(null, null);
-        }
-        ValidationUtils.checkArgument(latestFileSlices.size() == 1, String.format("Invalid number of file slices: found=%d, required=%d", latestFileSlices.size(), 1));
-        final FileSlice slice = latestFileSlices.get(HoodieTableMetadataUtil.mapRecordKeyToFileGroupIndex(key, latestFileSlices.size()));
 
         // Open base file reader
         Pair<HoodieFileReader, Long> baseFileReaderOpenTimePair = getBaseFileReader(slice, timer);
-        baseFileReader = baseFileReaderOpenTimePair.getKey();
-        baseFileOpenMs = baseFileReaderOpenTimePair.getValue();
+        HoodieFileReader baseFileReader = baseFileReaderOpenTimePair.getKey();
+        final long baseFileOpenMs = baseFileReaderOpenTimePair.getValue();
 
         // Open the log record scanner using the log files from the latest file slice
-        Pair<HoodieMetadataMergedLogRecordReader, Long> logRecordScannerOpenTimePair = getLogRecordScanner(slice,
-            partitionName);
-        logRecordScanner = logRecordScannerOpenTimePair.getKey();
-        logScannerOpenMs = logRecordScannerOpenTimePair.getValue();
-
-        metrics.ifPresent(metrics -> metrics.updateMetrics(HoodieMetadataMetrics.SCAN_STR, baseFileOpenMs + logScannerOpenMs));
+        List<HoodieLogFile> logFiles = slice.getLogFiles().collect(Collectors.toList());
+        Pair<HoodieMetadataMergedLogRecordReader, Long> logRecordScannerOpenTimePair =
+            getLogRecordScanner(logFiles, partitionName);
+        HoodieMetadataMergedLogRecordReader logRecordScanner = logRecordScannerOpenTimePair.getKey();
+        final long logScannerOpenMs = logRecordScannerOpenTimePair.getValue();
+
+        metrics.ifPresent(metrics -> metrics.updateMetrics(HoodieMetadataMetrics.SCAN_STR,
+            +baseFileOpenMs + logScannerOpenMs));
         return Pair.of(baseFileReader, logRecordScanner);
       } catch (IOException e) {
         throw new HoodieIOException("Error opening readers for metadata table partition " + partitionName, e);
@@ -312,9 +345,9 @@ private Set<String> getValidInstantTimestamps() {
     return validInstantTimestamps;
   }
 
-  private Pair<HoodieMetadataMergedLogRecordReader, Long> getLogRecordScanner(FileSlice slice, String partitionName) {
+  public Pair<HoodieMetadataMergedLogRecordReader, Long> getLogRecordScanner(List<HoodieLogFile> logFiles, String partitionName) {
     HoodieTimer timer = new HoodieTimer().startTimer();
-    List<String> logFilePaths = slice.getLogFiles()
+    List<String> sortedLogFilePaths = logFiles.stream()
         .sorted(HoodieLogFile.getLogFileComparator())
         .map(o -> o.getPath().toString())
         .collect(Collectors.toList());
@@ -332,7 +365,7 @@ private Pair<HoodieMetadataMergedLogRecordReader, Long> getLogRecordScanner(File
     HoodieMetadataMergedLogRecordReader logRecordScanner = HoodieMetadataMergedLogRecordReader.newBuilder()
         .withFileSystem(metadataMetaClient.getFs())
         .withBasePath(metadataBasePath)
-        .withLogFilePaths(logFilePaths)
+        .withLogFilePaths(sortedLogFilePaths)
         .withReaderSchema(schema)
         .withLatestInstantTime(latestMetadataInstantTime)
         .withMaxMemorySizeInBytes(MAX_MEMORY_SIZE_IN_BYTES)
@@ -347,7 +380,7 @@ private Pair<HoodieMetadataMergedLogRecordReader, Long> getLogRecordScanner(File
 
     Long logScannerOpenMs = timer.endTimer();
     LOG.info(String.format("Opened %d metadata log files (dataset instant=%s, metadata instant=%s) in %d ms",
-        logFilePaths.size(), getLatestDataInstantTime(), latestMetadataInstantTime, logScannerOpenMs));
+        sortedLogFilePaths.size(), getLatestDataInstantTime(), latestMetadataInstantTime, logScannerOpenMs));
     return Pair.of(logRecordScanner, logScannerOpenMs);
   }
 
@@ -382,14 +415,20 @@ private List<String> getRollbackedCommits(HoodieInstant instant, HoodieActiveTim
 
   @Override
   public void close() {
-    for (String partitionName : partitionReaders.keySet()) {
-      close(partitionName);
+    for (Pair<String, String> partitionFileSlicePair : partitionReaders.keySet()) {
+      close(partitionFileSlicePair);
     }
     partitionReaders.clear();
   }
 
-  private synchronized void close(String partitionName) {
-    Pair<HoodieFileReader, HoodieMetadataMergedLogRecordReader> readers = partitionReaders.remove(partitionName);
+  /**
+   * Close the file reader and the record scanner for the given file slice.
+   *
+   * @param partitionFileSlicePair - Partition and FileSlice
+   */
+  private synchronized void close(Pair<String, String> partitionFileSlicePair) {
+    Pair<HoodieFileReader, HoodieMetadataMergedLogRecordReader> readers =
+        partitionReaders.remove(partitionFileSlicePair);
     if (readers != null) {
       try {
         if (readers.getKey() != null) {
@@ -405,7 +444,7 @@ private synchronized void close(String partitionName) {
   }
 
   public boolean enabled() {
-    return enabled;
+    return isMetadataTableEnabled;
   }
 
   public SerializableConfiguration getHadoopConf() {
diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataMergedLogRecordReader.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataMergedLogRecordReader.java
index 01c8d05e9b220..4f616c362fbf6 100644
--- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataMergedLogRecordReader.java
+++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataMergedLogRecordReader.java
@@ -18,32 +18,32 @@
 
 package org.apache.hudi.metadata;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.generic.IndexedRecord;
-import org.apache.hadoop.fs.FileSystem;
-
 import org.apache.hudi.common.config.HoodieMetadataConfig;
-import org.apache.hudi.common.table.HoodieTableConfig;
-import org.apache.hudi.common.util.SpillableMapUtils;
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
-
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner;
 import org.apache.hudi.common.table.log.InstantRange;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.SpillableMapUtils;
 import org.apache.hudi.common.util.collection.ExternalSpillableMap;
 import org.apache.hudi.common.util.collection.Pair;
 
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Set;
+
 /**
  * A {@code HoodieMergedLogRecordScanner} implementation which only merged records matching providing keys. This is
  * useful in limiting memory usage when only a small subset of updates records are to be read.
@@ -87,7 +87,7 @@ protected void processNextDeletedKey(HoodieKey hoodieKey) {
   }
 
   @Override
-  protected HoodieRecord<?> createHoodieRecord(final IndexedRecord rec, final HoodieTableConfig hoodieTableConfig,
+  protected HoodieAvroRecord<?> createHoodieRecord(final IndexedRecord rec, final HoodieTableConfig hoodieTableConfig,
                                                final String payloadClassFQN, final String preCombineField,
                                                final boolean withOperationField,
                                                final Option<Pair<String, String>> simpleKeyGenFields,
@@ -116,7 +116,7 @@ public static HoodieMetadataMergedLogRecordReader.Builder newBuilder() {
    * @param key Key of the record to retrieve
    * @return {@code HoodieRecord} if key was found else {@code Option.empty()}
    */
-  public List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> getRecordByKey(String key) {
+  public synchronized List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>>>> getRecordByKey(String key) {
     return Collections.singletonList(Pair.of(key, Option.ofNullable((HoodieRecord) records.get(key))));
   }
 
@@ -139,7 +139,7 @@ public synchronized List<Pair<String, Option<HoodieRecord<HoodieMetadataPayload>
 
   @Override
   protected String getKeyField() {
-    return HoodieMetadataPayload.SCHEMA_FIELD_ID_KEY;
+    return HoodieMetadataPayload.KEY_FIELD_NAME;
   }
 
   /**
diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataMetrics.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataMetrics.java
index 2efc96c6f3dee..fe8612c42e802 100644
--- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataMetrics.java
+++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataMetrics.java
@@ -41,6 +41,8 @@ public class HoodieMetadataMetrics implements Serializable {
   // Metric names
   public static final String LOOKUP_PARTITIONS_STR = "lookup_partitions";
   public static final String LOOKUP_FILES_STR = "lookup_files";
+  public static final String LOOKUP_BLOOM_FILTERS_METADATA_STR = "lookup_meta_index_bloom_filters";
+  public static final String LOOKUP_COLUMN_STATS_METADATA_STR = "lookup_meta_index_column_ranges";
   public static final String SCAN_STR = "scan";
   public static final String BASEFILE_READ_STR = "basefile_read";
   public static final String INITIALIZE_STR = "initialize";
@@ -77,7 +79,7 @@ private Map<String, String> getStats(HoodieTableFileSystemView fsView, boolean d
     Map<String, String> stats = new HashMap<>();
 
     // Total size of the metadata and count of base/log files
-    for (String metadataPartition : MetadataPartitionType.all()) {
+    for (String metadataPartition : MetadataPartitionType.allPaths()) {
       List<FileSlice> latestSlices = fsView.getLatestFileSlices(metadataPartition).collect(Collectors.toList());
 
       // Total size of the metadata and count of base/log files
diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java
index 0b0d144a6e7e9..221b52e77e674 100644
--- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java
+++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java
@@ -18,88 +18,199 @@
 
 package org.apache.hudi.metadata;
 
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hudi.avro.model.HoodieMetadataBloomFilter;
+import org.apache.hudi.avro.model.HoodieMetadataColumnStats;
 import org.apache.hudi.avro.model.HoodieMetadataFileInfo;
 import org.apache.hudi.avro.model.HoodieMetadataRecord;
+import org.apache.hudi.common.bloom.BloomFilterTypeCode;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.HoodieAvroRecord;
+import org.apache.hudi.common.model.HoodieColumnRangeMetadata;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.ValidationUtils;
+import org.apache.hudi.common.util.hash.ColumnIndexID;
+import org.apache.hudi.common.util.hash.FileIndexID;
+import org.apache.hudi.common.util.hash.PartitionIndexID;
 import org.apache.hudi.exception.HoodieMetadataException;
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.generic.IndexedRecord;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
+import org.apache.hudi.io.storage.HoodieHFileReader;
 
 import java.io.IOException;
+import java.nio.ByteBuffer;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Properties;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
+import static org.apache.hudi.TypeUtils.unsafeCast;
+import static org.apache.hudi.common.util.ValidationUtils.checkArgument;
+import static org.apache.hudi.common.util.ValidationUtils.checkState;
 import static org.apache.hudi.metadata.HoodieTableMetadata.RECORDKEY_PARTITION_LIST;
 
 /**
- * This is a payload which saves information about a single entry in the Metadata Table.
- *
- * The type of the entry is determined by the "type" saved within the record. The following types of entries are saved:
- *
- *   1. List of partitions: There is a single such record
- *         key="__all_partitions__"
- *
- *   2. List of files in a Partition: There is one such record for each partition
- *         key=Partition name
- *
- *  During compaction on the table, the deletions are merged with additions and hence pruned.
- *
- * Metadata Table records are saved with the schema defined in HoodieMetadata.avsc. This class encapsulates the
- * HoodieMetadataRecord for ease of operations.
+ * MetadataTable records are persisted with the schema defined in HoodieMetadata.avsc.
+ * This class represents the payload for the MetadataTable.
+ * <p>
+ * This single metadata payload is shared by all the partitions under the metadata table.
+ * The partition specific records are determined by the field "type" saved within the record.
+ * The following types are supported:
+ * <p>
+ * METADATA_TYPE_PARTITION_LIST (1):
+ * -- List of all partitions. There is a single such record
+ * -- key = @{@link HoodieTableMetadata#RECORDKEY_PARTITION_LIST}
+ * <p>
+ * METADATA_TYPE_FILE_LIST (2):
+ * -- List of all files in a partition. There is one such record for each partition
+ * -- key = partition name
+ * <p>
+ * METADATA_TYPE_COLUMN_STATS (3):
+ * -- This is an index for column stats in the table
+ * <p>
+ * METADATA_TYPE_BLOOM_FILTER (4):
+ * -- This is an index for base file bloom filters. This is a map of FileID to its BloomFilter byte[].
+ * <p>
+ * During compaction on the table, the deletions are merged with additions and hence records are pruned.
  */
 public class HoodieMetadataPayload implements HoodieRecordPayload<HoodieMetadataPayload> {
 
+  // Type of the record. This can be an enum in the schema but Avro1.8
+  // has a bug - https://issues.apache.org/jira/browse/AVRO-1810
+  protected static final int METADATA_TYPE_PARTITION_LIST = 1;
+  protected static final int METADATA_TYPE_FILE_LIST = 2;
+  protected static final int METADATA_TYPE_COLUMN_STATS = 3;
+  protected static final int METADATA_TYPE_BLOOM_FILTER = 4;
+
   // HoodieMetadata schema field ids
-  public static final String SCHEMA_FIELD_ID_KEY = "key";
-  public static final String SCHEMA_FIELD_ID_TYPE = "type";
-  public static final String SCHEMA_FIELD_ID_METADATA = "filesystemMetadata";
+  public static final String KEY_FIELD_NAME = HoodieHFileReader.KEY_FIELD_NAME;
+  public static final String SCHEMA_FIELD_NAME_TYPE = "type";
+  public static final String SCHEMA_FIELD_NAME_METADATA = "filesystemMetadata";
+  public static final String SCHEMA_FIELD_ID_COLUMN_STATS = "ColumnStatsMetadata";
+  public static final String SCHEMA_FIELD_ID_BLOOM_FILTER = "BloomFilterMetadata";
+
+  // HoodieMetadata bloom filter payload field ids
+  private static final String FIELD_IS_DELETED = "isDeleted";
+  private static final String BLOOM_FILTER_FIELD_TYPE = "type";
+  private static final String BLOOM_FILTER_FIELD_TIMESTAMP = "timestamp";
+  private static final String BLOOM_FILTER_FIELD_BLOOM_FILTER = "bloomFilter";
+  private static final String BLOOM_FILTER_FIELD_IS_DELETED = FIELD_IS_DELETED;
 
-  // Type of the record
-  // This can be an enum in the schema but Avro 1.8 has a bug - https://issues.apache.org/jira/browse/AVRO-1810
-  private static final int PARTITION_LIST = 1;
-  private static final int FILE_LIST = 2;
+  // HoodieMetadata column stats payload field ids
+  private static final String COLUMN_STATS_FIELD_MIN_VALUE = "minValue";
+  private static final String COLUMN_STATS_FIELD_MAX_VALUE = "maxValue";
+  private static final String COLUMN_STATS_FIELD_NULL_COUNT = "nullCount";
+  private static final String COLUMN_STATS_FIELD_VALUE_COUNT = "valueCount";
+  private static final String COLUMN_STATS_FIELD_TOTAL_SIZE = "totalSize";
+  private static final String COLUMN_STATS_FIELD_RESOURCE_NAME = "fileName";
+  private static final String COLUMN_STATS_FIELD_TOTAL_UNCOMPRESSED_SIZE = "totalUncompressedSize";
+  private static final String COLUMN_STATS_FIELD_IS_DELETED = FIELD_IS_DELETED;
 
   private String key = null;
   private int type = 0;
   private Map<String, HoodieMetadataFileInfo> filesystemMetadata = null;
+  private HoodieMetadataBloomFilter bloomFilterMetadata = null;
+  private HoodieMetadataColumnStats columnStatMetadata = null;
 
   public HoodieMetadataPayload(GenericRecord record, Comparable<?> orderingVal) {
     this(Option.of(record));
   }
 
-  public HoodieMetadataPayload(Option<GenericRecord> record) {
-    if (record.isPresent()) {
+  public HoodieMetadataPayload(Option<GenericRecord> recordOpt) {
+    if (recordOpt.isPresent()) {
+      GenericRecord record = recordOpt.get();
       // This can be simplified using SpecificData.deepcopy once this bug is fixed
       // https://issues.apache.org/jira/browse/AVRO-1811
-      key = record.get().get(SCHEMA_FIELD_ID_KEY).toString();
-      type = (int) record.get().get(SCHEMA_FIELD_ID_TYPE);
-      if (record.get().get(SCHEMA_FIELD_ID_METADATA) != null) {
-        filesystemMetadata = (Map<String, HoodieMetadataFileInfo>) record.get().get("filesystemMetadata");
+      //
+      // NOTE: {@code HoodieMetadataRecord} has to always carry both "key" nad "type" fields
+      //       for it to be handled appropriately, therefore these fields have to be reflected
+      //       in any (read-)projected schema
+      key = record.get(KEY_FIELD_NAME).toString();
+      type = (int) record.get(SCHEMA_FIELD_NAME_TYPE);
+
+      Map<String, HoodieMetadataFileInfo> metadata = getNestedFieldValue(record, SCHEMA_FIELD_NAME_METADATA);
+      if (metadata != null) {
+        filesystemMetadata = metadata;
         filesystemMetadata.keySet().forEach(k -> {
           GenericRecord v = filesystemMetadata.get(k);
-          filesystemMetadata.put(k.toString(), new HoodieMetadataFileInfo((Long) v.get("size"), (Boolean) v.get("isDeleted")));
+          filesystemMetadata.put(k, new HoodieMetadataFileInfo((Long) v.get("size"), (Boolean) v.get("isDeleted")));
         });
       }
+
+      if (type == METADATA_TYPE_BLOOM_FILTER) {
+        GenericRecord bloomFilterRecord = getNestedFieldValue(record, SCHEMA_FIELD_ID_BLOOM_FILTER);
+        // NOTE: Only legitimate reason for {@code BloomFilterMetadata} to not be present is when
+        //       it's not been read from the storage (ie it's not been a part of projected schema).
+        //       Otherwise, it has to be present or the record would be considered invalid
+        if (bloomFilterRecord == null) {
+          checkArgument(record.getSchema().getField(SCHEMA_FIELD_ID_BLOOM_FILTER) == null,
+              String.format("Valid %s record expected for type: %s", SCHEMA_FIELD_ID_BLOOM_FILTER, METADATA_TYPE_COLUMN_STATS));
+        } else {
+          bloomFilterMetadata = new HoodieMetadataBloomFilter(
+              (String) bloomFilterRecord.get(BLOOM_FILTER_FIELD_TYPE),
+              (String) bloomFilterRecord.get(BLOOM_FILTER_FIELD_TIMESTAMP),
+              (ByteBuffer) bloomFilterRecord.get(BLOOM_FILTER_FIELD_BLOOM_FILTER),
+              (Boolean) bloomFilterRecord.get(BLOOM_FILTER_FIELD_IS_DELETED)
+          );
+        }
+      }
+
+      if (type == METADATA_TYPE_COLUMN_STATS) {
+        GenericRecord columnStatsRecord = getNestedFieldValue(record, SCHEMA_FIELD_ID_COLUMN_STATS);
+        // NOTE: Only legitimate reason for {@code ColumnStatsMetadata} to not be present is when
+        //       it's not been read from the storage (ie it's not been a part of projected schema).
+        //       Otherwise, it has to be present or the record would be considered invalid
+        if (columnStatsRecord == null) {
+          checkArgument(record.getSchema().getField(SCHEMA_FIELD_ID_COLUMN_STATS) == null,
+              String.format("Valid %s record expected for type: %s", SCHEMA_FIELD_ID_COLUMN_STATS, METADATA_TYPE_COLUMN_STATS));
+        } else {
+          columnStatMetadata = HoodieMetadataColumnStats.newBuilder()
+              .setFileName((String) columnStatsRecord.get(COLUMN_STATS_FIELD_RESOURCE_NAME))
+              .setMinValue((String) columnStatsRecord.get(COLUMN_STATS_FIELD_MIN_VALUE))
+              .setMaxValue((String) columnStatsRecord.get(COLUMN_STATS_FIELD_MAX_VALUE))
+              .setValueCount((Long) columnStatsRecord.get(COLUMN_STATS_FIELD_VALUE_COUNT))
+              .setNullCount((Long) columnStatsRecord.get(COLUMN_STATS_FIELD_NULL_COUNT))
+              .setTotalSize((Long) columnStatsRecord.get(COLUMN_STATS_FIELD_TOTAL_SIZE))
+              .setTotalUncompressedSize((Long) columnStatsRecord.get(COLUMN_STATS_FIELD_TOTAL_UNCOMPRESSED_SIZE))
+              .setIsDeleted((Boolean) columnStatsRecord.get(COLUMN_STATS_FIELD_IS_DELETED))
+          .build();
+        }
+      }
     }
   }
 
   private HoodieMetadataPayload(String key, int type, Map<String, HoodieMetadataFileInfo> filesystemMetadata) {
+    this(key, type, filesystemMetadata, null, null);
+  }
+
+  private HoodieMetadataPayload(String key, HoodieMetadataBloomFilter metadataBloomFilter) {
+    this(key, METADATA_TYPE_BLOOM_FILTER, null, metadataBloomFilter, null);
+  }
+
+  private HoodieMetadataPayload(String key, HoodieMetadataColumnStats columnStats) {
+    this(key, METADATA_TYPE_COLUMN_STATS, null, null, columnStats);
+  }
+
+  protected HoodieMetadataPayload(String key, int type,
+                                  Map<String, HoodieMetadataFileInfo> filesystemMetadata,
+                                  HoodieMetadataBloomFilter metadataBloomFilter,
+                                  HoodieMetadataColumnStats columnStats) {
     this.key = key;
     this.type = type;
     this.filesystemMetadata = filesystemMetadata;
+    this.bloomFilterMetadata = metadataBloomFilter;
+    this.columnStatMetadata = columnStats;
   }
 
   /**
@@ -109,69 +220,122 @@ private HoodieMetadataPayload(String key, int type, Map<String, HoodieMetadataFi
    */
   public static HoodieRecord<HoodieMetadataPayload> createPartitionListRecord(List<String> partitions) {
     Map<String, HoodieMetadataFileInfo> fileInfo = new HashMap<>();
-    partitions.forEach(partition -> fileInfo.put(partition, new HoodieMetadataFileInfo(0L,  false)));
+    partitions.forEach(partition -> fileInfo.put(partition, new HoodieMetadataFileInfo(0L, false)));
 
-    HoodieKey key = new HoodieKey(RECORDKEY_PARTITION_LIST, MetadataPartitionType.FILES.partitionPath());
-    HoodieMetadataPayload payload = new HoodieMetadataPayload(key.getRecordKey(), PARTITION_LIST, fileInfo);
-    return new HoodieRecord<>(key, payload);
+    HoodieKey key = new HoodieKey(RECORDKEY_PARTITION_LIST, MetadataPartitionType.FILES.getPartitionPath());
+    HoodieMetadataPayload payload = new HoodieMetadataPayload(key.getRecordKey(), METADATA_TYPE_PARTITION_LIST,
+        fileInfo);
+    return new HoodieAvroRecord<>(key, payload);
   }
 
   /**
    * Create and return a {@code HoodieMetadataPayload} to save list of files within a partition.
    *
-   * @param partition The name of the partition
-   * @param filesAdded Mapping of files to their sizes for files which have been added to this partition
+   * @param partition    The name of the partition
+   * @param filesAdded   Mapping of files to their sizes for files which have been added to this partition
    * @param filesDeleted List of files which have been deleted from this partition
    */
   public static HoodieRecord<HoodieMetadataPayload> createPartitionFilesRecord(String partition,
-                                                                               Option<Map<String, Long>> filesAdded, Option<List<String>> filesDeleted) {
+                                                                               Option<Map<String, Long>> filesAdded,
+                                                                               Option<List<String>> filesDeleted) {
     Map<String, HoodieMetadataFileInfo> fileInfo = new HashMap<>();
     filesAdded.ifPresent(
         m -> m.forEach((filename, size) -> fileInfo.put(filename, new HoodieMetadataFileInfo(size, false))));
     filesDeleted.ifPresent(
-        m -> m.forEach(filename -> fileInfo.put(filename, new HoodieMetadataFileInfo(0L,  true))));
+        m -> m.forEach(filename -> fileInfo.put(filename, new HoodieMetadataFileInfo(0L, true))));
 
-    HoodieKey key = new HoodieKey(partition, MetadataPartitionType.FILES.partitionPath());
-    HoodieMetadataPayload payload = new HoodieMetadataPayload(key.getRecordKey(), FILE_LIST, fileInfo);
-    return new HoodieRecord<>(key, payload);
+    HoodieKey key = new HoodieKey(partition, MetadataPartitionType.FILES.getPartitionPath());
+    HoodieMetadataPayload payload = new HoodieMetadataPayload(key.getRecordKey(), METADATA_TYPE_FILE_LIST, fileInfo);
+    return new HoodieAvroRecord<>(key, payload);
+  }
+
+  /**
+   * Create bloom filter metadata record.
+   *
+   * @param partitionName - Partition name
+   * @param baseFileName  - Base file name for which the bloom filter needs to persisted
+   * @param timestamp     - Instant timestamp responsible for this record
+   * @param bloomFilter   - Bloom filter for the File
+   * @param isDeleted     - Is the bloom filter no more valid
+   * @return Metadata payload containing the fileID and its bloom filter record
+   */
+  public static HoodieRecord<HoodieMetadataPayload> createBloomFilterMetadataRecord(final String partitionName,
+                                                                                    final String baseFileName,
+                                                                                    final String timestamp,
+                                                                                    final ByteBuffer bloomFilter,
+                                                                                    final boolean isDeleted) {
+    ValidationUtils.checkArgument(!baseFileName.contains(Path.SEPARATOR)
+            && FSUtils.isBaseFile(new Path(baseFileName)),
+        "Invalid base file '" + baseFileName + "' for MetaIndexBloomFilter!");
+    final String bloomFilterIndexKey = new PartitionIndexID(partitionName).asBase64EncodedString()
+        .concat(new FileIndexID(baseFileName).asBase64EncodedString());
+    HoodieKey key = new HoodieKey(bloomFilterIndexKey, MetadataPartitionType.BLOOM_FILTERS.getPartitionPath());
+
+    // TODO: HUDI-3203 Get the bloom filter type from the file
+    HoodieMetadataBloomFilter metadataBloomFilter =
+        new HoodieMetadataBloomFilter(BloomFilterTypeCode.DYNAMIC_V0.name(),
+            timestamp, bloomFilter, isDeleted);
+    HoodieMetadataPayload metadataPayload = new HoodieMetadataPayload(key.getRecordKey(),
+        metadataBloomFilter);
+    return new HoodieAvroRecord<>(key, metadataPayload);
   }
 
   @Override
   public HoodieMetadataPayload preCombine(HoodieMetadataPayload previousRecord) {
     ValidationUtils.checkArgument(previousRecord.type == type,
-        "Cannot combine " + previousRecord.type  + " with " + type);
-
-    Map<String, HoodieMetadataFileInfo> combinedFileInfo = null;
+        "Cannot combine " + previousRecord.type + " with " + type);
 
     switch (type) {
-      case PARTITION_LIST:
-      case FILE_LIST:
-        combinedFileInfo = combineFilesystemMetadata(previousRecord);
-        break;
+      case METADATA_TYPE_PARTITION_LIST:
+      case METADATA_TYPE_FILE_LIST:
+        Map<String, HoodieMetadataFileInfo> combinedFileInfo = combineFilesystemMetadata(previousRecord);
+        return new HoodieMetadataPayload(key, type, combinedFileInfo);
+      case METADATA_TYPE_BLOOM_FILTER:
+        HoodieMetadataBloomFilter combineBloomFilterMetadata = combineBloomFilterMetadata(previousRecord);
+        return new HoodieMetadataPayload(key, combineBloomFilterMetadata);
+      case METADATA_TYPE_COLUMN_STATS:
+        return new HoodieMetadataPayload(key, combineColumnStatsMetadata(previousRecord));
       default:
         throw new HoodieMetadataException("Unknown type of HoodieMetadataPayload: " + type);
     }
+  }
+
+  private HoodieMetadataBloomFilter combineBloomFilterMetadata(HoodieMetadataPayload previousRecord) {
+    return this.bloomFilterMetadata;
+  }
 
-    return new HoodieMetadataPayload(key, type, combinedFileInfo);
+  private HoodieMetadataColumnStats combineColumnStatsMetadata(HoodieMetadataPayload previousRecord) {
+    return this.columnStatMetadata;
   }
 
   @Override
-  public Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord oldRecord, Schema schema) throws IOException {
-    HoodieMetadataPayload anotherPayload = new HoodieMetadataPayload(Option.of((GenericRecord)oldRecord));
+  public Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord oldRecord, Schema schema, Properties properties) throws IOException {
+    HoodieMetadataPayload anotherPayload = new HoodieMetadataPayload(Option.of((GenericRecord) oldRecord));
     HoodieRecordPayload combinedPayload = preCombine(anotherPayload);
-    return combinedPayload.getInsertValue(schema);
+    return combinedPayload.getInsertValue(schema, properties);
   }
 
   @Override
-  public Option<IndexedRecord> getInsertValue(Schema schema) throws IOException {
+  public Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord oldRecord, Schema schema) throws IOException {
+    return combineAndGetUpdateValue(oldRecord, schema, new Properties());
+  }
+
+  @Override
+  public Option<IndexedRecord> getInsertValue(Schema schema, Properties properties) throws IOException {
     if (key == null) {
       return Option.empty();
     }
 
-    HoodieMetadataRecord record = new HoodieMetadataRecord(key, type, filesystemMetadata);
+    HoodieMetadataRecord record = new HoodieMetadataRecord(key, type, filesystemMetadata, bloomFilterMetadata,
+        columnStatMetadata);
     return Option.of(record);
   }
 
+  @Override
+  public Option<IndexedRecord> getInsertValue(Schema schema) throws IOException {
+    return getInsertValue(schema, new Properties());
+  }
+
   /**
    * Returns the list of filenames added as part of this record.
    */
@@ -186,6 +350,28 @@ public List<String> getDeletions() {
     return filterFileInfoEntries(true).map(Map.Entry::getKey).sorted().collect(Collectors.toList());
   }
 
+  /**
+   * Get the bloom filter metadata from this payload.
+   */
+  public Option<HoodieMetadataBloomFilter> getBloomFilterMetadata() {
+    if (bloomFilterMetadata == null) {
+      return Option.empty();
+    }
+
+    return Option.of(bloomFilterMetadata);
+  }
+
+  /**
+   * Get the bloom filter metadata from this payload.
+   */
+  public Option<HoodieMetadataColumnStats> getColumnStatMetadata() {
+    if (columnStatMetadata == null) {
+      return Option.empty();
+    }
+
+    return Option.of(columnStatMetadata);
+  }
+
   /**
    * Returns the files added as part of this record.
    */
@@ -234,14 +420,101 @@ private Map<String, HoodieMetadataFileInfo> combineFilesystemMetadata(HoodieMeta
     return combinedFileInfo;
   }
 
+  /**
+   * Get bloom filter index key.
+   *
+   * @param partitionIndexID - Partition index id
+   * @param fileIndexID      - File index id
+   * @return Bloom filter index key
+   */
+  public static String getBloomFilterIndexKey(PartitionIndexID partitionIndexID, FileIndexID fileIndexID) {
+    return partitionIndexID.asBase64EncodedString()
+        .concat(fileIndexID.asBase64EncodedString());
+  }
+
+  /**
+   * Get column stats index key.
+   *
+   * @param partitionIndexID - Partition index id
+   * @param fileIndexID      - File index id
+   * @param columnIndexID    - Column index id
+   * @return Column stats index key
+   */
+  public static String getColumnStatsIndexKey(PartitionIndexID partitionIndexID, FileIndexID fileIndexID, ColumnIndexID columnIndexID) {
+    return columnIndexID.asBase64EncodedString()
+        .concat(partitionIndexID.asBase64EncodedString())
+        .concat(fileIndexID.asBase64EncodedString());
+  }
+
+  /**
+   * Get column stats index key from the column range metadata.
+   *
+   * @param partitionName       - Partition name
+   * @param columnRangeMetadata -  Column range metadata
+   * @return Column stats index key
+   */
+  public static String getColumnStatsIndexKey(String partitionName, HoodieColumnRangeMetadata<Comparable> columnRangeMetadata) {
+    final PartitionIndexID partitionIndexID = new PartitionIndexID(partitionName);
+    final FileIndexID fileIndexID = new FileIndexID(new Path(columnRangeMetadata.getFilePath()).getName());
+    final ColumnIndexID columnIndexID = new ColumnIndexID(columnRangeMetadata.getColumnName());
+    return getColumnStatsIndexKey(partitionIndexID, fileIndexID, columnIndexID);
+  }
+
+  public static Stream<HoodieRecord> createColumnStatsRecords(
+      String partitionName, Collection<HoodieColumnRangeMetadata<Comparable>> columnRangeMetadataList, boolean isDeleted) {
+    return columnRangeMetadataList.stream().map(columnRangeMetadata -> {
+      HoodieKey key = new HoodieKey(getColumnStatsIndexKey(partitionName, columnRangeMetadata),
+          MetadataPartitionType.COLUMN_STATS.getPartitionPath());
+      HoodieMetadataPayload payload = new HoodieMetadataPayload(key.getRecordKey(),
+          HoodieMetadataColumnStats.newBuilder()
+              .setFileName(new Path(columnRangeMetadata.getFilePath()).getName())
+              .setMinValue(columnRangeMetadata.getMinValue() == null ? null :
+                  columnRangeMetadata.getMinValue().toString())
+              .setMaxValue(columnRangeMetadata.getMaxValue() == null ? null :
+                  columnRangeMetadata.getMaxValue().toString())
+              .setNullCount(columnRangeMetadata.getNullCount())
+              .setValueCount(columnRangeMetadata.getValueCount())
+              .setTotalSize(columnRangeMetadata.getTotalSize())
+              .setTotalUncompressedSize(columnRangeMetadata.getTotalUncompressedSize())
+              .setIsDeleted(isDeleted)
+              .build());
+      return new HoodieAvroRecord<>(key, payload);
+    });
+
+
+  }
+
   @Override
   public String toString() {
     final StringBuilder sb = new StringBuilder("HoodieMetadataPayload {");
-    sb.append(SCHEMA_FIELD_ID_KEY + "=").append(key).append(", ");
-    sb.append(SCHEMA_FIELD_ID_TYPE + "=").append(type).append(", ");
+    sb.append(KEY_FIELD_NAME + "=").append(key).append(", ");
+    sb.append(SCHEMA_FIELD_NAME_TYPE + "=").append(type).append(", ");
     sb.append("creations=").append(Arrays.toString(getFilenames().toArray())).append(", ");
     sb.append("deletions=").append(Arrays.toString(getDeletions().toArray())).append(", ");
+    if (type == METADATA_TYPE_BLOOM_FILTER) {
+      checkState(getBloomFilterMetadata().isPresent());
+      sb.append("BloomFilter: {");
+      sb.append("bloom size: " + getBloomFilterMetadata().get().getBloomFilter().array().length).append(", ");
+      sb.append("timestamp: " + getBloomFilterMetadata().get().getTimestamp()).append(", ");
+      sb.append("deleted: " + getBloomFilterMetadata().get().getIsDeleted());
+      sb.append("}");
+    }
+    if (type == METADATA_TYPE_COLUMN_STATS) {
+      checkState(getColumnStatMetadata().isPresent());
+      sb.append("ColStats: {");
+      sb.append(getColumnStatMetadata().get());
+      sb.append("}");
+    }
     sb.append('}');
     return sb.toString();
   }
+
+  private static <T> T getNestedFieldValue(GenericRecord record, String fieldName) {
+    // NOTE: This routine is more lightweight than {@code HoodieAvroUtils.getNestedFieldVal}
+    if (record.getSchema().getField(fieldName) == null) {
+      return null;
+    }
+
+    return unsafeCast(record.get(fieldName));
+  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadata.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadata.java
index d981b7085195b..52fdbd993627f 100644
--- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadata.java
+++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadata.java
@@ -18,19 +18,25 @@
 
 package org.apache.hudi.metadata;
 
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hudi.avro.model.HoodieMetadataColumnStats;
 import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.config.SerializableConfiguration;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.util.Option;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.Path;
+import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.exception.HoodieMetadataException;
 
 import java.io.IOException;
 import java.io.Serializable;
+import java.nio.ByteBuffer;
 import java.util.List;
 import java.util.Map;
 
+import static org.apache.hudi.common.util.ValidationUtils.checkArgument;
+
 /**
  * Interface that supports querying various pieces of metadata about a hudi table.
  */
@@ -54,12 +60,19 @@ public interface HoodieTableMetadata extends Serializable, AutoCloseable {
   static final String METADATA_TABLE_REL_PATH = HoodieTableMetaClient.METAFOLDER_NAME + Path.SEPARATOR + "metadata";
 
   /**
-   * Return the base path of the Metadata Table.
-   *
-   * @param tableBasePath The base path of the dataset
+   * Return the base-path of the Metadata Table for the given Dataset identified by base-path
    */
-  static String getMetadataTableBasePath(String tableBasePath) {
-    return tableBasePath + Path.SEPARATOR + METADATA_TABLE_REL_PATH;
+  static String getMetadataTableBasePath(String dataTableBasePath) {
+    return dataTableBasePath + Path.SEPARATOR + METADATA_TABLE_REL_PATH;
+  }
+
+  /**
+   * Returns the base path of the Dataset provided the base-path of the Metadata Table of this
+   * Dataset
+   */
+  static String getDataTableBasePathFromMetadataTable(String metadataTableBasePath) {
+    checkArgument(isMetadataTable(metadataTableBasePath));
+    return metadataTableBasePath.substring(0, metadataTableBasePath.lastIndexOf(METADATA_TABLE_REL_PATH) - 1);
   }
 
   /**
@@ -104,6 +117,38 @@ static HoodieTableMetadata create(HoodieEngineContext engineContext, HoodieMetad
    */
   Map<String, FileStatus[]> getAllFilesInPartitions(List<String> partitionPaths) throws IOException;
 
+  /**
+   * Get the bloom filter for the FileID from the metadata table.
+   *
+   * @param partitionName - Partition name
+   * @param fileName      - File name for which bloom filter needs to be retrieved
+   * @return BloomFilter byte buffer if available, otherwise empty
+   * @throws HoodieMetadataException
+   */
+  Option<ByteBuffer> getBloomFilter(final String partitionName, final String fileName)
+      throws HoodieMetadataException;
+
+  /**
+   * Get bloom filters for files from the metadata table index.
+   *
+   * @param partitionNameFileNameList - List of partition and file name pair for which bloom filters need to be retrieved
+   * @return Map of partition file name pair to its bloom filter byte buffer
+   * @throws HoodieMetadataException
+   */
+  Map<Pair<String, String>, ByteBuffer> getBloomFilters(final List<Pair<String, String>> partitionNameFileNameList)
+      throws HoodieMetadataException;
+
+  /**
+   * Get column stats for files from the metadata table index.
+   *
+   * @param partitionNameFileNameList - List of partition and file name pair for which bloom filters need to be retrieved
+   * @param columnName                - Column name for which stats are needed
+   * @return Map of partition and file name pair to its column stats
+   * @throws HoodieMetadataException
+   */
+  Map<Pair<String, String>, HoodieMetadataColumnStats> getColumnStats(final List<Pair<String, String>> partitionNameFileNameList, final String columnName)
+      throws HoodieMetadataException;
+
   /**
    * Get the instant time to which the metadata is synced w.r.t data timeline.
    */
diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
index 58d63a194e81d..e569baefb6f06 100644
--- a/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
+++ b/hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
@@ -21,26 +21,42 @@
 import org.apache.hudi.avro.model.HoodieCleanMetadata;
 import org.apache.hudi.avro.model.HoodieRestoreMetadata;
 import org.apache.hudi.avro.model.HoodieRollbackMetadata;
+import org.apache.hudi.common.bloom.BloomFilter;
+import org.apache.hudi.common.config.HoodieMetadataConfig;
+import org.apache.hudi.common.data.HoodieData;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.FileSlice;
+import org.apache.hudi.common.model.HoodieColumnRangeMetadata;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
+import org.apache.hudi.common.model.HoodieDeltaWriteStat;
+import org.apache.hudi.common.model.HoodieFileFormat;
 import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieWriteStat;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.TableSchemaResolver;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.table.timeline.HoodieDefaultTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.ParquetUtils;
 import org.apache.hudi.common.util.ValidationUtils;
+import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.HoodieMetadataException;
+import org.apache.hudi.io.storage.HoodieFileReader;
+import org.apache.hudi.io.storage.HoodieFileReaderFactory;
 
+import org.apache.avro.generic.IndexedRecord;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
+import java.io.IOException;
+import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -62,12 +78,17 @@ public class HoodieTableMetadataUtil {
 
   private static final Logger LOG = LogManager.getLogger(HoodieTableMetadataUtil.class);
 
+  protected static final String PARTITION_NAME_FILES = "files";
+  protected static final String PARTITION_NAME_COLUMN_STATS = "column_stats";
+  protected static final String PARTITION_NAME_BLOOM_FILTERS = "bloom_filters";
+
   /**
-   * Delete the metadata table for the dataset. This will be invoked during upgrade/downgrade operation during which no other
+   * Delete the metadata table for the dataset. This will be invoked during upgrade/downgrade operation during which
+   * no other
    * process should be running.
    *
    * @param basePath base path of the dataset
-   * @param context instance of {@link HoodieEngineContext}.
+   * @param context  instance of {@link HoodieEngineContext}.
    */
   public static void deleteMetadataTable(String basePath, HoodieEngineContext context) {
     final String metadataTablePath = HoodieTableMetadata.getMetadataTableBasePath(basePath);
@@ -79,14 +100,53 @@ public static void deleteMetadataTable(String basePath, HoodieEngineContext cont
     }
   }
 
+  /**
+   * Convert commit action to metadata records for the enabled partition types.
+   *
+   * @param commitMetadata                      - Commit action metadata
+   * @param dataMetaClient                      - Meta client for the data table
+   * @param isMetaIndexColumnStatsForAllColumns - Do all columns need meta indexing?
+   * @param instantTime                         - Action instant time
+   * @return Map of partition to metadata records for the commit action
+   */
+  public static Map<MetadataPartitionType, HoodieData<HoodieRecord>> convertMetadataToRecords(
+      HoodieEngineContext context, List<MetadataPartitionType> enabledPartitionTypes,
+      HoodieCommitMetadata commitMetadata, HoodieTableMetaClient dataMetaClient,
+      boolean isMetaIndexColumnStatsForAllColumns, String instantTime) {
+    final Map<MetadataPartitionType, HoodieData<HoodieRecord>> partitionToRecordsMap = new HashMap<>();
+    final HoodieData<HoodieRecord> filesPartitionRecordsRDD = context.parallelize(
+        convertMetadataToFilesPartitionRecords(commitMetadata, instantTime), 1);
+    partitionToRecordsMap.put(MetadataPartitionType.FILES, filesPartitionRecordsRDD);
+
+    if (enabledPartitionTypes.contains(MetadataPartitionType.BLOOM_FILTERS)) {
+      final List<HoodieRecord> metadataBloomFilterRecords = convertMetadataToBloomFilterRecords(commitMetadata,
+          dataMetaClient, instantTime);
+      if (!metadataBloomFilterRecords.isEmpty()) {
+        final HoodieData<HoodieRecord> metadataBloomFilterRecordsRDD = context.parallelize(metadataBloomFilterRecords, 1);
+        partitionToRecordsMap.put(MetadataPartitionType.BLOOM_FILTERS, metadataBloomFilterRecordsRDD);
+      }
+    }
+
+    if (enabledPartitionTypes.contains(MetadataPartitionType.COLUMN_STATS)) {
+      final List<HoodieRecord> metadataColumnStats = convertMetadataToColumnStatsRecords(commitMetadata, context,
+          dataMetaClient, isMetaIndexColumnStatsForAllColumns, instantTime);
+      if (!metadataColumnStats.isEmpty()) {
+        final HoodieData<HoodieRecord> metadataColumnStatsRDD = context.parallelize(metadataColumnStats, 1);
+        partitionToRecordsMap.put(MetadataPartitionType.COLUMN_STATS, metadataColumnStatsRDD);
+      }
+    }
+    return partitionToRecordsMap;
+  }
+
   /**
    * Finds all new files/partitions created as part of commit and creates metadata table records for them.
    *
-   * @param commitMetadata
-   * @param instantTime
-   * @return a list of metadata table records
+   * @param commitMetadata - Commit action metadata
+   * @param instantTime    - Commit action instant time
+   * @return List of metadata table records
    */
-  public static List<HoodieRecord> convertMetadataToRecords(HoodieCommitMetadata commitMetadata, String instantTime) {
+  public static List<HoodieRecord> convertMetadataToFilesPartitionRecords(HoodieCommitMetadata commitMetadata,
+                                                                          String instantTime) {
     List<HoodieRecord> records = new LinkedList<>();
     List<String> allPartitions = new LinkedList<>();
     commitMetadata.getPartitionToWriteStats().forEach((partitionStatName, writeStats) -> {
@@ -124,6 +184,102 @@ public static List<HoodieRecord> convertMetadataToRecords(HoodieCommitMetadata c
     return records;
   }
 
+  /**
+   * Convert commit action metadata to bloom filter records.
+   *
+   * @param commitMetadata - Commit action metadata
+   * @param dataMetaClient - Meta client for the data table
+   * @param instantTime    - Action instant time
+   * @return List of metadata table records
+   */
+  public static List<HoodieRecord> convertMetadataToBloomFilterRecords(HoodieCommitMetadata commitMetadata,
+                                                                       HoodieTableMetaClient dataMetaClient,
+                                                                       String instantTime) {
+    List<HoodieRecord> records = new LinkedList<>();
+    commitMetadata.getPartitionToWriteStats().forEach((partitionStatName, writeStats) -> {
+      final String partition = partitionStatName.equals(EMPTY_PARTITION_NAME) ? NON_PARTITIONED_NAME : partitionStatName;
+      Map<String, Long> newFiles = new HashMap<>(writeStats.size());
+      writeStats.forEach(hoodieWriteStat -> {
+        // No action for delta logs
+        if (hoodieWriteStat instanceof HoodieDeltaWriteStat) {
+          return;
+        }
+
+        String pathWithPartition = hoodieWriteStat.getPath();
+        if (pathWithPartition == null) {
+          // Empty partition
+          LOG.error("Failed to find path in write stat to update metadata table " + hoodieWriteStat);
+          return;
+        }
+        int offset = partition.equals(NON_PARTITIONED_NAME) ? (pathWithPartition.startsWith("/") ? 1 : 0) :
+            partition.length() + 1;
+
+        final String fileName = pathWithPartition.substring(offset);
+        if (!FSUtils.isBaseFile(new Path(fileName))) {
+          return;
+        }
+        ValidationUtils.checkState(!newFiles.containsKey(fileName), "Duplicate files in HoodieCommitMetadata");
+
+        final Path writeFilePath = new Path(dataMetaClient.getBasePath(), pathWithPartition);
+        try {
+          HoodieFileReader<IndexedRecord> fileReader =
+              HoodieFileReaderFactory.getFileReader(dataMetaClient.getHadoopConf(), writeFilePath);
+          try {
+            final BloomFilter fileBloomFilter = fileReader.readBloomFilter();
+            if (fileBloomFilter == null) {
+              LOG.error("Failed to read bloom filter for " + writeFilePath);
+              return;
+            }
+            ByteBuffer bloomByteBuffer = ByteBuffer.wrap(fileBloomFilter.serializeToString().getBytes());
+            HoodieRecord record = HoodieMetadataPayload.createBloomFilterMetadataRecord(
+                partition, fileName, instantTime, bloomByteBuffer, false);
+            records.add(record);
+          } catch (Exception e) {
+            LOG.error("Failed to read bloom filter for " + writeFilePath);
+            return;
+          }
+          fileReader.close();
+        } catch (IOException e) {
+          LOG.error("Failed to get bloom filter for file: " + writeFilePath + ", write stat: " + hoodieWriteStat);
+        }
+      });
+    });
+
+    return records;
+  }
+
+  /**
+   * Convert the clean action to metadata records.
+   */
+  public static Map<MetadataPartitionType, HoodieData<HoodieRecord>> convertMetadataToRecords(
+      HoodieEngineContext engineContext, List<MetadataPartitionType> enabledPartitionTypes,
+      HoodieCleanMetadata cleanMetadata, HoodieTableMetaClient dataMetaClient, String instantTime) {
+    final Map<MetadataPartitionType, HoodieData<HoodieRecord>> partitionToRecordsMap = new HashMap<>();
+    final HoodieData<HoodieRecord> filesPartitionRecordsRDD = engineContext.parallelize(
+        convertMetadataToFilesPartitionRecords(cleanMetadata, instantTime), 1);
+    partitionToRecordsMap.put(MetadataPartitionType.FILES, filesPartitionRecordsRDD);
+
+    if (enabledPartitionTypes.contains(MetadataPartitionType.BLOOM_FILTERS)) {
+      final List<HoodieRecord> metadataBloomFilterRecords = convertMetadataToBloomFilterRecords(cleanMetadata,
+          engineContext, instantTime);
+      if (!metadataBloomFilterRecords.isEmpty()) {
+        final HoodieData<HoodieRecord> metadataBloomFilterRecordsRDD = engineContext.parallelize(metadataBloomFilterRecords, 1);
+        partitionToRecordsMap.put(MetadataPartitionType.BLOOM_FILTERS, metadataBloomFilterRecordsRDD);
+      }
+    }
+
+    if (enabledPartitionTypes.contains(MetadataPartitionType.COLUMN_STATS)) {
+      final List<HoodieRecord> metadataColumnStats = convertMetadataToColumnStatsRecords(cleanMetadata, engineContext,
+          dataMetaClient);
+      if (!metadataColumnStats.isEmpty()) {
+        final HoodieData<HoodieRecord> metadataColumnStatsRDD = engineContext.parallelize(metadataColumnStats, 1);
+        partitionToRecordsMap.put(MetadataPartitionType.COLUMN_STATS, metadataColumnStatsRDD);
+      }
+    }
+
+    return partitionToRecordsMap;
+  }
+
   /**
    * Finds all files that were deleted as part of a clean and creates metadata table records for them.
    *
@@ -131,7 +287,8 @@ public static List<HoodieRecord> convertMetadataToRecords(HoodieCommitMetadata c
    * @param instantTime
    * @return a list of metadata table records
    */
-  public static List<HoodieRecord> convertMetadataToRecords(HoodieCleanMetadata cleanMetadata, String instantTime) {
+  public static List<HoodieRecord> convertMetadataToFilesPartitionRecords(HoodieCleanMetadata cleanMetadata,
+                                                                          String instantTime) {
     List<HoodieRecord> records = new LinkedList<>();
     int[] fileDeleteCount = {0};
     cleanMetadata.getPartitionMetadata().forEach((partitionName, partitionMetadata) -> {
@@ -150,51 +307,191 @@ public static List<HoodieRecord> convertMetadataToRecords(HoodieCleanMetadata cl
     return records;
   }
 
+  /**
+   * Convert clean metadata to bloom filter index records.
+   *
+   * @param cleanMetadata - Clean action metadata
+   * @param engineContext - Engine context
+   * @param instantTime   - Clean action instant time
+   * @return List of bloom filter index records for the clean metadata
+   */
+  public static List<HoodieRecord> convertMetadataToBloomFilterRecords(HoodieCleanMetadata cleanMetadata,
+                                                                       HoodieEngineContext engineContext,
+                                                                       String instantTime) {
+    List<Pair<String, String>> deleteFileList = new ArrayList<>();
+    cleanMetadata.getPartitionMetadata().forEach((partition, partitionMetadata) -> {
+      // Files deleted from a partition
+      List<String> deletedFiles = partitionMetadata.getDeletePathPatterns();
+      deletedFiles.forEach(entry -> {
+        final Path deletedFilePath = new Path(entry);
+        if (FSUtils.isBaseFile(deletedFilePath)) {
+          deleteFileList.add(Pair.of(partition, deletedFilePath.getName()));
+        }
+      });
+    });
+
+    return engineContext.map(deleteFileList, deleteFileInfo -> {
+      return HoodieMetadataPayload.createBloomFilterMetadataRecord(
+          deleteFileInfo.getLeft(), deleteFileInfo.getRight(), instantTime, ByteBuffer.allocate(0), true);
+    }, 1).stream().collect(Collectors.toList());
+  }
+
+  /**
+   * Convert clean metadata to column stats index records.
+   *
+   * @param cleanMetadata     - Clean action metadata
+   * @param engineContext     - Engine context
+   * @param datasetMetaClient - data table meta client
+   * @return List of column stats index records for the clean metadata
+   */
+  public static List<HoodieRecord> convertMetadataToColumnStatsRecords(HoodieCleanMetadata cleanMetadata,
+                                                                       HoodieEngineContext engineContext,
+                                                                       HoodieTableMetaClient datasetMetaClient) {
+    List<Pair<String, String>> deleteFileList = new ArrayList<>();
+    cleanMetadata.getPartitionMetadata().forEach((partition, partitionMetadata) -> {
+      // Files deleted from a partition
+      List<String> deletedFiles = partitionMetadata.getDeletePathPatterns();
+      deletedFiles.forEach(entry -> deleteFileList.add(Pair.of(partition, entry)));
+    });
+
+    List<String> latestColumns = getLatestColumns(datasetMetaClient);
+    return engineContext.flatMap(deleteFileList,
+        deleteFileInfo -> {
+          if (deleteFileInfo.getRight().endsWith(HoodieFileFormat.PARQUET.getFileExtension())) {
+            return getColumnStats(deleteFileInfo.getKey(), deleteFileInfo.getValue(), datasetMetaClient,
+                latestColumns, true);
+          }
+          return Stream.empty();
+        }, 1).stream().collect(Collectors.toList());
+  }
+
+  /**
+   * Convert restore action metadata to metadata table records.
+   */
+  public static Map<MetadataPartitionType, HoodieData<HoodieRecord>> convertMetadataToRecords(
+      HoodieEngineContext engineContext, List<MetadataPartitionType> enabledPartitionTypes,
+      HoodieActiveTimeline metadataTableTimeline, HoodieRestoreMetadata restoreMetadata,
+      HoodieTableMetaClient dataMetaClient, String instantTime, Option<String> lastSyncTs) {
+    final Map<MetadataPartitionType, HoodieData<HoodieRecord>> partitionToRecordsMap = new HashMap<>();
+    final Map<String, Map<String, Long>> partitionToAppendedFiles = new HashMap<>();
+    final Map<String, List<String>> partitionToDeletedFiles = new HashMap<>();
+
+    processRestoreMetadata(metadataTableTimeline, restoreMetadata,
+        partitionToAppendedFiles, partitionToDeletedFiles, lastSyncTs);
+
+    final HoodieData<HoodieRecord> filesPartitionRecordsRDD = engineContext.parallelize(
+        convertFilesToFilesPartitionRecords(partitionToDeletedFiles,
+            partitionToAppendedFiles, instantTime, "Restore"), 1);
+    partitionToRecordsMap.put(MetadataPartitionType.FILES, filesPartitionRecordsRDD);
+
+    if (enabledPartitionTypes.contains(MetadataPartitionType.BLOOM_FILTERS)) {
+      final List<HoodieRecord> metadataBloomFilterRecords = convertFilesToBloomFilterRecords(
+          engineContext, dataMetaClient, partitionToDeletedFiles, partitionToAppendedFiles, instantTime);
+      if (!metadataBloomFilterRecords.isEmpty()) {
+        final HoodieData<HoodieRecord> metadataBloomFilterRecordsRDD = engineContext.parallelize(metadataBloomFilterRecords, 1);
+        partitionToRecordsMap.put(MetadataPartitionType.BLOOM_FILTERS, metadataBloomFilterRecordsRDD);
+      }
+    }
+
+    if (enabledPartitionTypes.contains(MetadataPartitionType.COLUMN_STATS)) {
+      final List<HoodieRecord> metadataColumnStats = convertFilesToColumnStatsRecords(
+          engineContext, dataMetaClient, partitionToDeletedFiles, partitionToAppendedFiles, instantTime);
+      if (!metadataColumnStats.isEmpty()) {
+        final HoodieData<HoodieRecord> metadataColumnStatsRDD = engineContext.parallelize(metadataColumnStats, 1);
+        partitionToRecordsMap.put(MetadataPartitionType.COLUMN_STATS, metadataColumnStatsRDD);
+      }
+    }
+
+    return partitionToRecordsMap;
+  }
+
   /**
    * Aggregates all files deleted and appended to from all rollbacks associated with a restore operation then
    * creates metadata table records for them.
    *
-   * @param restoreMetadata
-   * @param instantTime
+   * @param restoreMetadata - Restore action metadata
    * @return a list of metadata table records
    */
-  public static List<HoodieRecord> convertMetadataToRecords(HoodieActiveTimeline metadataTableTimeline,
-                                                            HoodieRestoreMetadata restoreMetadata, String instantTime, Option<String> lastSyncTs) {
-    Map<String, Map<String, Long>> partitionToAppendedFiles = new HashMap<>();
-    Map<String, List<String>> partitionToDeletedFiles = new HashMap<>();
+  private static void processRestoreMetadata(HoodieActiveTimeline metadataTableTimeline,
+                                             HoodieRestoreMetadata restoreMetadata,
+                                             Map<String, Map<String, Long>> partitionToAppendedFiles,
+                                             Map<String, List<String>> partitionToDeletedFiles,
+                                             Option<String> lastSyncTs) {
     restoreMetadata.getHoodieRestoreMetadata().values().forEach(rms -> {
-      rms.forEach(rm -> processRollbackMetadata(metadataTableTimeline, rm, partitionToDeletedFiles, partitionToAppendedFiles, lastSyncTs));
+      rms.forEach(rm -> processRollbackMetadata(metadataTableTimeline, rm,
+          partitionToDeletedFiles, partitionToAppendedFiles, lastSyncTs));
     });
-
-    return convertFilesToRecords(partitionToDeletedFiles, partitionToAppendedFiles, instantTime, "Restore");
   }
 
-  public static List<HoodieRecord> convertMetadataToRecords(HoodieActiveTimeline metadataTableTimeline,
-                                                            HoodieRollbackMetadata rollbackMetadata, String instantTime,
-                                                            Option<String> lastSyncTs, boolean wasSynced) {
+  /**
+   * Convert rollback action metadata to metadata table records.
+   */
+  public static Map<MetadataPartitionType, HoodieData<HoodieRecord>> convertMetadataToRecords(
+      HoodieEngineContext engineContext, List<MetadataPartitionType> enabledPartitionTypes,
+      HoodieActiveTimeline metadataTableTimeline, HoodieRollbackMetadata rollbackMetadata,
+      HoodieTableMetaClient dataMetaClient, String instantTime, Option<String> lastSyncTs, boolean wasSynced) {
+    final Map<MetadataPartitionType, HoodieData<HoodieRecord>> partitionToRecordsMap = new HashMap<>();
 
-    Map<String, Map<String, Long>> partitionToAppendedFiles = new HashMap<>();
     Map<String, List<String>> partitionToDeletedFiles = new HashMap<>();
-    processRollbackMetadata(metadataTableTimeline, rollbackMetadata, partitionToDeletedFiles, partitionToAppendedFiles, lastSyncTs);
+    Map<String, Map<String, Long>> partitionToAppendedFiles = new HashMap<>();
+    List<HoodieRecord> filesPartitionRecords = convertMetadataToRollbackRecords(metadataTableTimeline, rollbackMetadata,
+        partitionToDeletedFiles, partitionToAppendedFiles, instantTime, lastSyncTs, wasSynced);
+    final HoodieData<HoodieRecord> rollbackRecordsRDD = engineContext.parallelize(filesPartitionRecords, 1);
+    partitionToRecordsMap.put(MetadataPartitionType.FILES, rollbackRecordsRDD);
+
+    if (enabledPartitionTypes.contains(MetadataPartitionType.BLOOM_FILTERS)) {
+      final List<HoodieRecord> metadataBloomFilterRecords = convertFilesToBloomFilterRecords(
+          engineContext, dataMetaClient, partitionToDeletedFiles, partitionToAppendedFiles, instantTime);
+      if (!metadataBloomFilterRecords.isEmpty()) {
+        final HoodieData<HoodieRecord> metadataBloomFilterRecordsRDD = engineContext.parallelize(metadataBloomFilterRecords, 1);
+        partitionToRecordsMap.put(MetadataPartitionType.BLOOM_FILTERS, metadataBloomFilterRecordsRDD);
+      }
+    }
+
+    if (enabledPartitionTypes.contains(MetadataPartitionType.COLUMN_STATS)) {
+      final List<HoodieRecord> metadataColumnStats = convertFilesToColumnStatsRecords(
+          engineContext, dataMetaClient, partitionToDeletedFiles, partitionToAppendedFiles, instantTime);
+      if (!metadataColumnStats.isEmpty()) {
+        final HoodieData<HoodieRecord> metadataColumnStatsRDD = engineContext.parallelize(metadataColumnStats, 1);
+        partitionToRecordsMap.put(MetadataPartitionType.COLUMN_STATS, metadataColumnStatsRDD);
+      }
+    }
+
+    return partitionToRecordsMap;
+  }
+
+  /**
+   * Convert rollback action metadata to files partition records.
+   */
+  private static List<HoodieRecord> convertMetadataToRollbackRecords(HoodieActiveTimeline metadataTableTimeline,
+                                                                     HoodieRollbackMetadata rollbackMetadata,
+                                                                     Map<String, List<String>> partitionToDeletedFiles,
+                                                                     Map<String, Map<String, Long>> partitionToAppendedFiles,
+                                                                     String instantTime,
+                                                                     Option<String> lastSyncTs, boolean wasSynced) {
+    processRollbackMetadata(metadataTableTimeline, rollbackMetadata, partitionToDeletedFiles,
+        partitionToAppendedFiles, lastSyncTs);
     if (!wasSynced) {
       // Since the instant-being-rolled-back was never committed to the metadata table, the files added there
       // need not be deleted. For MOR Table, the rollback appends logBlocks so we need to keep the appended files.
       partitionToDeletedFiles.clear();
     }
-    return convertFilesToRecords(partitionToDeletedFiles, partitionToAppendedFiles, instantTime, "Rollback");
+    return convertFilesToFilesPartitionRecords(partitionToDeletedFiles, partitionToAppendedFiles, instantTime, "Rollback");
   }
 
   /**
    * Extracts information about the deleted and append files from the {@code HoodieRollbackMetadata}.
-   *
+   * <p>
    * During a rollback files may be deleted (COW, MOR) or rollback blocks be appended (MOR only) to files. This
    * function will extract this change file for each partition.
-   * @param metadataTableTimeline Current timeline of the Metdata Table
-   * @param rollbackMetadata {@code HoodieRollbackMetadata}
-   * @param partitionToDeletedFiles The {@code Map} to fill with files deleted per partition.
+   *
+   * @param metadataTableTimeline    Current timeline of the Metadata Table
+   * @param rollbackMetadata         {@code HoodieRollbackMetadata}
+   * @param partitionToDeletedFiles  The {@code Map} to fill with files deleted per partition.
    * @param partitionToAppendedFiles The {@code Map} to fill with files appended per partition and their sizes.
    */
-  private static void processRollbackMetadata(HoodieActiveTimeline metadataTableTimeline, HoodieRollbackMetadata rollbackMetadata,
+  private static void processRollbackMetadata(HoodieActiveTimeline metadataTableTimeline,
+                                              HoodieRollbackMetadata rollbackMetadata,
                                               Map<String, List<String>> partitionToDeletedFiles,
                                               Map<String, Map<String, Long>> partitionToAppendedFiles,
                                               Option<String> lastSyncTs) {
@@ -264,23 +561,15 @@ private static void processRollbackMetadata(HoodieActiveTimeline metadataTableTi
           partitionToAppendedFiles.get(partition).merge(new Path(path).getName(), size, fileMergeFn);
         });
       }
-
-      if (pm.getWrittenLogFiles() != null && !pm.getWrittenLogFiles().isEmpty()) {
-        if (!partitionToAppendedFiles.containsKey(partition)) {
-          partitionToAppendedFiles.put(partition, new HashMap<>());
-        }
-
-        // Extract appended file name from the absolute paths saved in getWrittenLogFiles()
-        pm.getWrittenLogFiles().forEach((path, size) -> {
-          partitionToAppendedFiles.get(partition).merge(new Path(path).getName(), size, fileMergeFn);
-        });
-      }
     });
   }
 
-  private static List<HoodieRecord> convertFilesToRecords(Map<String, List<String>> partitionToDeletedFiles,
-                                                          Map<String, Map<String, Long>> partitionToAppendedFiles, String instantTime,
-                                                          String operation) {
+  /**
+   * Convert rollback action metadata to files partition records.
+   */
+  private static List<HoodieRecord> convertFilesToFilesPartitionRecords(Map<String, List<String>> partitionToDeletedFiles,
+                                                                        Map<String, Map<String, Long>> partitionToAppendedFiles,
+                                                                        String instantTime, String operation) {
     List<HoodieRecord> records = new LinkedList<>();
     int[] fileChangeCount = {0, 0}; // deletes, appends
 
@@ -319,9 +608,88 @@ private static List<HoodieRecord> convertFilesToRecords(Map<String, List<String>
     return records;
   }
 
+  /**
+   * Convert rollback action metadata to bloom filter index records.
+   */
+  private static List<HoodieRecord> convertFilesToBloomFilterRecords(HoodieEngineContext engineContext,
+                                                                     HoodieTableMetaClient dataMetaClient,
+                                                                     Map<String, List<String>> partitionToDeletedFiles,
+                                                                     Map<String, Map<String, Long>> partitionToAppendedFiles,
+                                                                     String instantTime) {
+    List<HoodieRecord> records = new LinkedList<>();
+    partitionToDeletedFiles.forEach((partitionName, deletedFileList) -> deletedFileList.forEach(deletedFile -> {
+      if (!FSUtils.isBaseFile(new Path(deletedFile))) {
+        return;
+      }
+
+      final String partition = partitionName.equals(EMPTY_PARTITION_NAME) ? NON_PARTITIONED_NAME : partitionName;
+      records.add(HoodieMetadataPayload.createBloomFilterMetadataRecord(
+          partition, deletedFile, instantTime, ByteBuffer.allocate(0), true));
+    }));
+
+    partitionToAppendedFiles.forEach((partitionName, appendedFileMap) -> {
+      final String partition = partitionName.equals(EMPTY_PARTITION_NAME) ? NON_PARTITIONED_NAME : partitionName;
+      appendedFileMap.forEach((appendedFile, length) -> {
+        if (!FSUtils.isBaseFile(new Path(appendedFile))) {
+          return;
+        }
+        final String pathWithPartition = partitionName + "/" + appendedFile;
+        final Path appendedFilePath = new Path(dataMetaClient.getBasePath(), pathWithPartition);
+        try {
+          HoodieFileReader<IndexedRecord> fileReader =
+              HoodieFileReaderFactory.getFileReader(dataMetaClient.getHadoopConf(), appendedFilePath);
+          final BloomFilter fileBloomFilter = fileReader.readBloomFilter();
+          if (fileBloomFilter == null) {
+            LOG.error("Failed to read bloom filter for " + appendedFilePath);
+            return;
+          }
+          ByteBuffer bloomByteBuffer = ByteBuffer.wrap(fileBloomFilter.serializeToString().getBytes());
+          HoodieRecord record = HoodieMetadataPayload.createBloomFilterMetadataRecord(
+              partition, appendedFile, instantTime, bloomByteBuffer, false);
+          records.add(record);
+          fileReader.close();
+        } catch (IOException e) {
+          LOG.error("Failed to get bloom filter for file: " + appendedFilePath);
+        }
+      });
+    });
+    return records;
+  }
+
+  /**
+   * Convert rollback action metadata to column stats index records.
+   */
+  private static List<HoodieRecord> convertFilesToColumnStatsRecords(HoodieEngineContext engineContext,
+                                                                     HoodieTableMetaClient datasetMetaClient,
+                                                                     Map<String, List<String>> partitionToDeletedFiles,
+                                                                     Map<String, Map<String, Long>> partitionToAppendedFiles,
+                                                                     String instantTime) {
+    List<HoodieRecord> records = new LinkedList<>();
+    List<String> latestColumns = getLatestColumns(datasetMetaClient);
+    partitionToDeletedFiles.forEach((partitionName, deletedFileList) -> deletedFileList.forEach(deletedFile -> {
+      final String partition = partitionName.equals(EMPTY_PARTITION_NAME) ? NON_PARTITIONED_NAME : partitionName;
+      if (deletedFile.endsWith(HoodieFileFormat.PARQUET.getFileExtension())) {
+        final String filePathWithPartition = partitionName + "/" + deletedFile;
+        records.addAll(getColumnStats(partition, filePathWithPartition, datasetMetaClient,
+            latestColumns, true).collect(Collectors.toList()));
+      }
+    }));
+
+    partitionToAppendedFiles.forEach((partitionName, appendedFileMap) -> appendedFileMap.forEach(
+        (appendedFile, size) -> {
+          final String partition = partitionName.equals(EMPTY_PARTITION_NAME) ? NON_PARTITIONED_NAME : partitionName;
+          if (appendedFile.endsWith(HoodieFileFormat.PARQUET.getFileExtension())) {
+            final String filePathWithPartition = partitionName + "/" + appendedFile;
+            records.addAll(getColumnStats(partition, filePathWithPartition, datasetMetaClient,
+                latestColumns, false).collect(Collectors.toList()));
+          }
+        }));
+    return records;
+  }
+
   /**
    * Map a record key to a file group in partition of interest.
-   *
+   * <p>
    * Note: For hashing, the algorithm is same as String.hashCode() but is being defined here as hashCode()
    * implementation is not guaranteed by the JVM to be consistent across JVM versions and implementations.
    *
@@ -349,7 +717,7 @@ public static int mapRecordKeyToFileGroupIndex(String recordKey, int numFileGrou
    */
   public static List<FileSlice> getPartitionLatestMergedFileSlices(HoodieTableMetaClient metaClient, String partition) {
     LOG.info("Loading latest merged file slices for metadata table partition " + partition);
-    return getPartitionFileSlices(metaClient, partition, true);
+    return getPartitionFileSlices(metaClient, Option.empty(), partition, true);
   }
 
   /**
@@ -357,27 +725,23 @@ public static List<FileSlice> getPartitionLatestMergedFileSlices(HoodieTableMeta
    * returned is sorted in the correct order of file group name.
    *
    * @param metaClient - Instance of {@link HoodieTableMetaClient}.
+   * @param fsView     - Metadata table filesystem view
    * @param partition  - The name of the partition whose file groups are to be loaded.
    * @return List of latest file slices for all file groups in a given partition.
    */
-  public static List<FileSlice> getPartitionLatestFileSlices(HoodieTableMetaClient metaClient, String partition) {
+  public static List<FileSlice> getPartitionLatestFileSlices(HoodieTableMetaClient metaClient,
+                                                             Option<HoodieTableFileSystemView> fsView, String partition) {
     LOG.info("Loading latest file slices for metadata table partition " + partition);
-    return getPartitionFileSlices(metaClient, partition, false);
+    return getPartitionFileSlices(metaClient, fsView, partition, false);
   }
 
   /**
-   * Get the latest file slices for a given partition.
+   * Get metadata table file system view.
    *
-   * @param metaClient      - Instance of {@link HoodieTableMetaClient}.
-   * @param partition       - The name of the partition whose file groups are to be loaded.
-   * @param mergeFileSlices - When enabled, will merge the latest file slices with the last known
-   *                        completed instant. This is useful for readers when there are pending
-   *                        compactions. MergeFileSlices when disabled, will return the latest file
-   *                        slices without any merging, and this is needed for the writers.
-   * @return List of latest file slices for all file groups in a given partition.
+   * @param metaClient - Metadata table meta client
+   * @return Filesystem view for the metadata table
    */
-  private static List<FileSlice> getPartitionFileSlices(HoodieTableMetaClient metaClient, String partition,
-                                                        boolean mergeFileSlices) {
+  public static HoodieTableFileSystemView getFileSystemView(HoodieTableMetaClient metaClient) {
     // If there are no commits on the metadata table then the table's
     // default FileSystemView will not return any file slices even
     // though we may have initialized them.
@@ -387,16 +751,175 @@ private static List<FileSlice> getPartitionFileSlices(HoodieTableMetaClient meta
           HoodieActiveTimeline.createNewInstantTime());
       timeline = new HoodieDefaultTimeline(Arrays.asList(instant).stream(), metaClient.getActiveTimeline()::getInstantDetails);
     }
+    return new HoodieTableFileSystemView(metaClient, timeline);
+  }
 
-    HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(metaClient, timeline);
+  /**
+   * Get the latest file slices for a given partition.
+   *
+   * @param metaClient      - Instance of {@link HoodieTableMetaClient}.
+   * @param partition       - The name of the partition whose file groups are to be loaded.
+   * @param mergeFileSlices - When enabled, will merge the latest file slices with the last known
+   *                        completed instant. This is useful for readers when there are pending
+   *                        compactions. MergeFileSlices when disabled, will return the latest file
+   *                        slices without any merging, and this is needed for the writers.
+   * @return List of latest file slices for all file groups in a given partition.
+   */
+  private static List<FileSlice> getPartitionFileSlices(HoodieTableMetaClient metaClient,
+                                                        Option<HoodieTableFileSystemView> fileSystemView,
+                                                        String partition,
+                                                        boolean mergeFileSlices) {
+    HoodieTableFileSystemView fsView = fileSystemView.orElse(getFileSystemView(metaClient));
     Stream<FileSlice> fileSliceStream;
     if (mergeFileSlices) {
       fileSliceStream = fsView.getLatestMergedFileSlicesBeforeOrOn(
-          partition, timeline.filterCompletedInstants().lastInstant().get().getTimestamp());
+          partition, metaClient.getActiveTimeline().filterCompletedInstants().lastInstant().get().getTimestamp());
     } else {
       fileSliceStream = fsView.getLatestFileSlices(partition);
     }
     return fileSliceStream.sorted((s1, s2) -> s1.getFileId().compareTo(s2.getFileId())).collect(Collectors.toList());
   }
 
+  public static List<HoodieRecord> convertMetadataToColumnStatsRecords(HoodieCommitMetadata commitMetadata,
+                                                                       HoodieEngineContext engineContext,
+                                                                       HoodieTableMetaClient dataMetaClient,
+                                                                       boolean isMetaIndexColumnStatsForAllColumns,
+                                                                       String instantTime) {
+
+    try {
+      List<HoodieWriteStat> allWriteStats = commitMetadata.getPartitionToWriteStats().values().stream()
+          .flatMap(entry -> entry.stream()).collect(Collectors.toList());
+      return HoodieTableMetadataUtil.createColumnStatsFromWriteStats(engineContext, dataMetaClient, allWriteStats,
+          isMetaIndexColumnStatsForAllColumns);
+    } catch (Exception e) {
+      throw new HoodieException("Failed to generate column stats records for metadata table ", e);
+    }
+  }
+
+  /**
+   * Create column stats from write status.
+   *
+   * @param engineContext                       - Engine context
+   * @param datasetMetaClient                   - Dataset meta client
+   * @param allWriteStats                       - Write status to convert
+   * @param isMetaIndexColumnStatsForAllColumns - Are all columns enabled for indexing
+   */
+  public static List<HoodieRecord> createColumnStatsFromWriteStats(HoodieEngineContext engineContext,
+                                                                   HoodieTableMetaClient datasetMetaClient,
+                                                                   List<HoodieWriteStat> allWriteStats,
+                                                                   boolean isMetaIndexColumnStatsForAllColumns) throws Exception {
+    if (allWriteStats.isEmpty()) {
+      return Collections.emptyList();
+    }
+
+    List<HoodieWriteStat> prunedWriteStats = allWriteStats.stream().filter(writeStat -> {
+      return !(writeStat instanceof HoodieDeltaWriteStat);
+    }).collect(Collectors.toList());
+    if (prunedWriteStats.isEmpty()) {
+      return Collections.emptyList();
+    }
+
+    return engineContext.flatMap(prunedWriteStats,
+        writeStat -> translateWriteStatToColumnStats(writeStat, datasetMetaClient,
+            getLatestColumns(datasetMetaClient, isMetaIndexColumnStatsForAllColumns)),
+        prunedWriteStats.size());
+  }
+
+  /**
+   * Get the latest columns for the table for column stats indexing.
+   *
+   * @param datasetMetaClient                   - Data table meta client
+   * @param isMetaIndexColumnStatsForAllColumns - Is column stats indexing enabled for all columns
+   */
+  private static List<String> getLatestColumns(HoodieTableMetaClient datasetMetaClient, boolean isMetaIndexColumnStatsForAllColumns) {
+    if (!isMetaIndexColumnStatsForAllColumns
+        || datasetMetaClient.getCommitsTimeline().filterCompletedInstants().countInstants() < 1) {
+      return Collections.singletonList(datasetMetaClient.getTableConfig().getRecordKeyFieldProp());
+    }
+
+    TableSchemaResolver schemaResolver = new TableSchemaResolver(datasetMetaClient);
+    // consider nested fields as well. if column stats is enabled only for a subset of columns,
+    // directly use them instead of all columns from the latest table schema
+    try {
+      return schemaResolver.getTableAvroSchema().getFields().stream()
+          .map(entry -> entry.name()).collect(Collectors.toList());
+    } catch (Exception e) {
+      throw new HoodieException("Failed to get latest columns for " + datasetMetaClient.getBasePath());
+    }
+  }
+
+  private static List<String> getLatestColumns(HoodieTableMetaClient datasetMetaClient) {
+    return getLatestColumns(datasetMetaClient, false);
+  }
+
+  public static Stream<HoodieRecord> translateWriteStatToColumnStats(HoodieWriteStat writeStat,
+                                                                     HoodieTableMetaClient datasetMetaClient,
+                                                                     List<String> latestColumns) {
+    return getColumnStats(writeStat.getPartitionPath(), writeStat.getPath(), datasetMetaClient, latestColumns, false);
+
+  }
+
+  private static Stream<HoodieRecord> getColumnStats(final String partitionPath, final String filePathWithPartition,
+                                                     HoodieTableMetaClient datasetMetaClient,
+                                                     List<String> columns, boolean isDeleted) {
+    final String partition = partitionPath.equals(EMPTY_PARTITION_NAME) ? NON_PARTITIONED_NAME : partitionPath;
+    final int offset = partition.equals(NON_PARTITIONED_NAME) ? (filePathWithPartition.startsWith("/") ? 1 : 0)
+        : partition.length() + 1;
+    final String fileName = filePathWithPartition.substring(offset);
+    if (!FSUtils.isBaseFile(new Path(fileName))) {
+      return Stream.empty();
+    }
+
+    if (filePathWithPartition.endsWith(HoodieFileFormat.PARQUET.getFileExtension())) {
+      List<HoodieColumnRangeMetadata<Comparable>> columnRangeMetadataList = new ArrayList<>();
+      final Path fullFilePath = new Path(datasetMetaClient.getBasePath(), filePathWithPartition);
+      if (!isDeleted) {
+        try {
+          columnRangeMetadataList = new ParquetUtils().readRangeFromParquetMetadata(
+              datasetMetaClient.getHadoopConf(), fullFilePath, columns);
+        } catch (Exception e) {
+          LOG.error("Failed to read column stats for " + fullFilePath, e);
+        }
+      } else {
+        columnRangeMetadataList =
+            columns.stream().map(entry -> new HoodieColumnRangeMetadata<Comparable>(fileName,
+                    entry, null, null, 0, 0, 0, 0))
+                .collect(Collectors.toList());
+      }
+      return HoodieMetadataPayload.createColumnStatsRecords(partitionPath, columnRangeMetadataList, isDeleted);
+    } else {
+      throw new HoodieException("Column range index not supported for filePathWithPartition " + fileName);
+    }
+  }
+
+  /**
+   * Get file group count for a metadata table partition.
+   *
+   * @param partitionType        - Metadata table partition type
+   * @param metaClient           - Metadata table meta client
+   * @param fsView               - Filesystem view
+   * @param metadataConfig       - Metadata config
+   * @param isBootstrapCompleted - Is bootstrap completed for the metadata table
+   * @return File group count for the requested metadata partition type
+   */
+  public static int getPartitionFileGroupCount(final MetadataPartitionType partitionType,
+                                               final Option<HoodieTableMetaClient> metaClient,
+                                               final Option<HoodieTableFileSystemView> fsView,
+                                               final HoodieMetadataConfig metadataConfig, boolean isBootstrapCompleted) {
+    if (isBootstrapCompleted) {
+      final List<FileSlice> latestFileSlices = HoodieTableMetadataUtil
+          .getPartitionLatestFileSlices(metaClient.get(), fsView, partitionType.getPartitionPath());
+      return Math.max(latestFileSlices.size(), 1);
+    }
+
+    switch (partitionType) {
+      case BLOOM_FILTERS:
+        return metadataConfig.getBloomFilterIndexFileGroupCount();
+      case COLUMN_STATS:
+        return metadataConfig.getColumnStatsIndexFileGroupCount();
+      default:
+        return 1;
+    }
+  }
+
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/metadata/MetadataPartitionType.java b/hudi-common/src/main/java/org/apache/hudi/metadata/MetadataPartitionType.java
index 380f4d04d34a6..9fb268e7de1b0 100644
--- a/hudi-common/src/main/java/org/apache/hudi/metadata/MetadataPartitionType.java
+++ b/hudi-common/src/main/java/org/apache/hudi/metadata/MetadataPartitionType.java
@@ -22,19 +22,23 @@
 import java.util.List;
 
 public enum MetadataPartitionType {
-  FILES("files", "files-");
+  FILES(HoodieTableMetadataUtil.PARTITION_NAME_FILES, "files-"),
+  COLUMN_STATS(HoodieTableMetadataUtil.PARTITION_NAME_COLUMN_STATS, "col-stats-"),
+  BLOOM_FILTERS(HoodieTableMetadataUtil.PARTITION_NAME_BLOOM_FILTERS, "bloom-filters-");
 
-  // refers to partition path in metadata table.
+  // Partition path in metadata table.
   private final String partitionPath;
-  // refers to fileId prefix used for all file groups in this partition.
+  // FileId prefix used for all file groups in this partition.
   private final String fileIdPrefix;
+  // Total file groups
+  private int fileGroupCount = 1;
 
-  MetadataPartitionType(String partitionPath, String fileIdPrefix) {
+  MetadataPartitionType(final String partitionPath, final String fileIdPrefix) {
     this.partitionPath = partitionPath;
     this.fileIdPrefix = fileIdPrefix;
   }
 
-  public String partitionPath() {
+  public String getPartitionPath() {
     return partitionPath;
   }
 
@@ -42,7 +46,28 @@ public String getFileIdPrefix() {
     return fileIdPrefix;
   }
 
-  public static List<String> all() {
-    return Arrays.asList(MetadataPartitionType.FILES.partitionPath());
+  void setFileGroupCount(final int fileGroupCount) {
+    this.fileGroupCount = fileGroupCount;
+  }
+
+  public int getFileGroupCount() {
+    return this.fileGroupCount;
+  }
+
+  public static List<String> allPaths() {
+    return Arrays.asList(
+        FILES.getPartitionPath(),
+        COLUMN_STATS.getPartitionPath(),
+        BLOOM_FILTERS.getPartitionPath()
+    );
+  }
+
+  @Override
+  public String toString() {
+    return "Metadata partition {"
+        + "name: " + getPartitionPath()
+        + ", prefix: " + getFileIdPrefix()
+        + ", groups: " + getFileGroupCount()
+        + "}";
   }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/parquet/io/ByteBufferBackedInputFile.java b/hudi-common/src/main/java/org/apache/hudi/parquet/io/ByteBufferBackedInputFile.java
new file mode 100644
index 0000000000000..40454d306ac78
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/parquet/io/ByteBufferBackedInputFile.java
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.parquet.io;
+
+import org.apache.hudi.common.util.io.ByteBufferBackedInputStream;
+import org.apache.parquet.io.DelegatingSeekableInputStream;
+import org.apache.parquet.io.InputFile;
+import org.apache.parquet.io.SeekableInputStream;
+
+/**
+ * Implementation of {@link InputFile} backed by {@code byte[]} buffer
+ */
+public class ByteBufferBackedInputFile implements InputFile {
+  private final byte[] buffer;
+  private final int offset;
+  private final int length;
+
+  public ByteBufferBackedInputFile(byte[] buffer, int offset, int length) {
+    this.buffer = buffer;
+    this.offset = offset;
+    this.length = length;
+  }
+
+  public ByteBufferBackedInputFile(byte[] buffer) {
+    this(buffer, 0, buffer.length);
+  }
+
+  @Override
+  public long getLength() {
+    return length;
+  }
+
+  @Override
+  public SeekableInputStream newStream() {
+    return new DelegatingSeekableInputStream(new ByteBufferBackedInputStream(buffer, offset, length)) {
+      @Override
+      public long getPos() {
+        return ((ByteBufferBackedInputStream) getStream()).getPosition();
+      }
+
+      @Override
+      public void seek(long newPos) {
+        ((ByteBufferBackedInputStream) getStream()).seek(newPos);
+      }
+    };
+  }
+}
diff --git a/hudi-common/src/main/java/org/apache/hudi/parquet/io/OutputStreamBackedOutputFile.java b/hudi-common/src/main/java/org/apache/hudi/parquet/io/OutputStreamBackedOutputFile.java
new file mode 100644
index 0000000000000..48c2c82e7b422
--- /dev/null
+++ b/hudi-common/src/main/java/org/apache/hudi/parquet/io/OutputStreamBackedOutputFile.java
@@ -0,0 +1,94 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.parquet.io;
+
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.parquet.io.OutputFile;
+import org.apache.parquet.io.PositionOutputStream;
+
+import javax.annotation.Nonnull;
+import java.io.IOException;
+
+/**
+ * Implementation of the {@link OutputFile} backed by {@link java.io.OutputStream}
+ */
+public class OutputStreamBackedOutputFile implements OutputFile {
+
+  private static final long DEFAULT_BLOCK_SIZE = 1024L * 1024L;
+
+  private final FSDataOutputStream outputStream;
+
+  public OutputStreamBackedOutputFile(FSDataOutputStream outputStream) {
+    this.outputStream = outputStream;
+  }
+
+  @Override
+  public PositionOutputStream create(long blockSizeHint) {
+    return new PositionOutputStreamAdapter(outputStream);
+  }
+
+  @Override
+  public PositionOutputStream createOrOverwrite(long blockSizeHint) {
+    return create(blockSizeHint);
+  }
+
+  @Override
+  public boolean supportsBlockSize() {
+    return false;
+  }
+
+  @Override
+  public long defaultBlockSize() {
+    return DEFAULT_BLOCK_SIZE;
+  }
+
+  private static class PositionOutputStreamAdapter extends PositionOutputStream {
+    private final FSDataOutputStream delegate;
+
+    PositionOutputStreamAdapter(FSDataOutputStream delegate) {
+      this.delegate = delegate;
+    }
+
+    @Override
+    public long getPos() throws IOException {
+      return delegate.getPos();
+    }
+
+    @Override
+    public void write(int b) throws IOException {
+      delegate.write(b);
+    }
+
+    @Override
+    public void write(@Nonnull byte[] buffer, int off, int len) throws IOException {
+      delegate.write(buffer, off, len);
+    }
+
+    @Override
+    public void flush() throws IOException {
+      delegate.flush();
+    }
+
+    @Override
+    public void close() {
+      // We're deliberately not closing the delegate stream here to allow caller
+      // to explicitly manage its lifecycle
+    }
+  }
+}
diff --git a/hudi-common/src/main/scala/org/apache/hudi/HoodieTableFileIndexBase.scala b/hudi-common/src/main/scala/org/apache/hudi/HoodieTableFileIndexBase.scala
deleted file mode 100644
index f25c7d99d5a5e..0000000000000
--- a/hudi-common/src/main/scala/org/apache/hudi/HoodieTableFileIndexBase.scala
+++ /dev/null
@@ -1,300 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hudi
-
-import org.apache.hadoop.fs.{FileStatus, Path}
-import org.apache.hudi.common.config.{HoodieMetadataConfig, TypedProperties}
-import org.apache.hudi.common.engine.HoodieEngineContext
-import org.apache.hudi.common.fs.FSUtils
-import org.apache.hudi.common.model.HoodieTableType.MERGE_ON_READ
-import org.apache.hudi.common.model.{FileSlice, HoodieTableQueryType}
-import org.apache.hudi.common.table.HoodieTableMetaClient
-import org.apache.hudi.common.table.view.{FileSystemViewStorageConfig, HoodieTableFileSystemView}
-
-import scala.collection.JavaConverters._
-import scala.collection.JavaConversions._
-import scala.collection.mutable
-
-/**
- * Common (engine-agnostic) File Index implementation enabling individual query engines to
- * list Hudi Table contents based on the
- *
- * <ul>
- *   <li>Table type (MOR, COW)</li>
- *   <li>Query type (snapshot, read_optimized, incremental)</li>
- *   <li>Query instant/range</li>
- * </ul>
- *
- * @param engineContext Hudi engine-specific context
- * @param metaClient Hudi table's meta-client
- * @param configProperties unifying configuration (in the form of generic properties)
- * @param queryType target query type
- * @param queryPaths target DFS paths being queried
- * @param specifiedQueryInstant instant as of which table is being queried
- * @param shouldIncludePendingCommits flags whether file-index should exclude any pending operations
- * @param fileStatusCache transient cache of fetched [[FileStatus]]es
- */
-abstract class HoodieTableFileIndexBase(engineContext: HoodieEngineContext,
-                                        metaClient: HoodieTableMetaClient,
-                                        configProperties: TypedProperties,
-                                        queryType: HoodieTableQueryType,
-                                        protected val queryPaths: Seq[Path],
-                                        specifiedQueryInstant: Option[String] = None,
-                                        shouldIncludePendingCommits: Boolean = false,
-                                        @transient fileStatusCache: FileStatusCacheTrait) {
-  /**
-   * Get all completeCommits.
-   */
-  lazy val completedCommits = metaClient.getCommitsTimeline
-    .filterCompletedInstants().getInstants.iterator().toList.map(_.getTimestamp)
-
-  private lazy val _partitionColumns: Array[String] =
-    metaClient.getTableConfig.getPartitionFields.orElse(Array[String]())
-
-  private lazy val fileSystemStorageConfig = FileSystemViewStorageConfig.newBuilder()
-    .fromProperties(configProperties)
-    .build()
-  private lazy val metadataConfig = HoodieMetadataConfig.newBuilder
-    .fromProperties(configProperties)
-    .build()
-
-  private val tableType = metaClient.getTableType
-
-  protected val basePath: String = metaClient.getBasePath
-
-  @transient
-  @volatile protected var cachedFileSize: Long = 0L
-  @transient
-  @volatile protected var cachedAllInputFileSlices: Map[PartitionPath, Seq[FileSlice]] = _
-  @volatile protected var queryAsNonePartitionedTable: Boolean = _
-  @transient
-  @volatile private var fileSystemView: HoodieTableFileSystemView = _
-
-  refresh0()
-
-  /**
-   * Fetch list of latest base files and log files per partition.
-   *
-   * @return mapping from string partition paths to its base/log files
-   */
-  def listFileSlices(): Map[String, Seq[FileSlice]] = {
-    if (queryAsNonePartitionedTable) {
-      // Read as Non-Partitioned table.
-      cachedAllInputFileSlices.map(entry => (entry._1.path, entry._2))
-    } else {
-      cachedAllInputFileSlices.keys.toSeq.map(partition => {
-        (partition.path, cachedAllInputFileSlices(partition))
-      }).toMap
-    }
-  }
-
-  private def refresh0(): Unit = {
-    val startTime = System.currentTimeMillis()
-    val partitionFiles = loadPartitionPathFiles()
-    val allFiles = partitionFiles.values.reduceOption(_ ++ _)
-      .getOrElse(Array.empty[FileStatus])
-
-    metaClient.reloadActiveTimeline()
-
-    val activeTimeline = getActiveTimeline
-    val latestInstant = activeTimeline.lastInstant()
-    // TODO we can optimize the flow by:
-    //  - First fetch list of files from instants of interest
-    //  - Load FileStatus's
-    fileSystemView = new HoodieTableFileSystemView(metaClient, activeTimeline, allFiles)
-    val queryInstant = if (specifiedQueryInstant.isDefined) {
-      specifiedQueryInstant
-    } else if (latestInstant.isPresent) {
-      Some(latestInstant.get.getTimestamp)
-    } else {
-      None
-    }
-
-    (tableType, queryType) match {
-      case (MERGE_ON_READ, HoodieTableQueryType.QUERY_TYPE_SNAPSHOT) =>
-        // Fetch and store latest base and log files, and their sizes
-        cachedAllInputFileSlices = partitionFiles.map(p => {
-          val latestSlices = if (queryInstant.isDefined) {
-            fileSystemView.getLatestMergedFileSlicesBeforeOrOn(p._1.path, queryInstant.get)
-              .iterator().asScala.toSeq
-          } else {
-            Seq()
-          }
-          (p._1, latestSlices)
-        })
-        cachedFileSize = cachedAllInputFileSlices.values.flatten.map(fileSlice => {
-          if (fileSlice.getBaseFile.isPresent) {
-            fileSlice.getBaseFile.get().getFileLen + fileSlice.getLogFiles.iterator().asScala.map(_.getFileSize).sum
-          } else {
-            fileSlice.getLogFiles.iterator().asScala.map(_.getFileSize).sum
-          }
-        }).sum
-      case (_, _) =>
-        // Fetch and store latest base files and its sizes
-        cachedAllInputFileSlices = partitionFiles.map(p => {
-          val fileSlices = specifiedQueryInstant
-            .map(instant =>
-              fileSystemView.getLatestFileSlicesBeforeOrOn(p._1.path, instant, true))
-            .getOrElse(fileSystemView.getLatestFileSlices(p._1.path))
-            .iterator().asScala.toSeq
-          (p._1, fileSlices)
-        })
-        cachedFileSize = cachedAllInputFileSlices.values.flatten.map(fileSliceSize).sum
-    }
-
-    // If the partition value contains InternalRow.empty, we query it as a non-partitioned table.
-    queryAsNonePartitionedTable = partitionFiles.keys.exists(p => p.values.isEmpty)
-    val flushSpend = System.currentTimeMillis() - startTime
-
-    logInfo(s"Refresh table ${metaClient.getTableConfig.getTableName}," +
-      s" spend: $flushSpend ms")
-  }
-
-  protected def refresh(): Unit = {
-    fileStatusCache.invalidate()
-    refresh0()
-  }
-
-  private def getActiveTimeline = {
-    val timeline = metaClient.getActiveTimeline.getCommitsTimeline
-    if (shouldIncludePendingCommits) {
-      timeline
-    } else {
-      timeline.filterCompletedInstants()
-    }
-  }
-
-  private def fileSliceSize(fileSlice: FileSlice): Long = {
-    val logFileSize = fileSlice.getLogFiles.iterator().asScala.map(_.getFileSize).filter(_ > 0).sum
-    if (fileSlice.getBaseFile.isPresent) {
-      fileSlice.getBaseFile.get().getFileLen + logFileSize
-    } else {
-      logFileSize
-    }
-  }
-
-  /**
-   * Load all partition paths and it's files under the query table path.
-   */
-  private def loadPartitionPathFiles(): Map[PartitionPath, Array[FileStatus]] = {
-    val partitionPaths = getAllQueryPartitionPaths
-    // List files in all of the partition path.
-    val pathToFetch = mutable.ArrayBuffer[PartitionPath]()
-    val cachePartitionToFiles = mutable.Map[PartitionPath, Array[FileStatus]]()
-    // Fetch from the FileStatusCache
-    partitionPaths.foreach { partitionPath =>
-      fileStatusCache.get(partitionPath.fullPartitionPath(basePath)) match {
-        case Some(filesInPartition) =>
-          cachePartitionToFiles.put(partitionPath, filesInPartition)
-
-        case None => pathToFetch.append(partitionPath)
-      }
-    }
-
-    val fetchedPartitionToFiles =
-      if (pathToFetch.nonEmpty) {
-        val fullPartitionPathsToFetch = pathToFetch.map(p => (p, p.fullPartitionPath(basePath).toString)).toMap
-        val partitionToFilesMap = FSUtils.getFilesInPartitions(engineContext, metadataConfig, basePath,
-          fullPartitionPathsToFetch.values.toArray, fileSystemStorageConfig.getSpillableDir)
-        fullPartitionPathsToFetch.map(p => {
-          (p._1, partitionToFilesMap.get(p._2))
-        })
-      } else {
-        Map.empty[PartitionPath, Array[FileStatus]]
-      }
-
-    // Update the fileStatusCache
-    fetchedPartitionToFiles.foreach {
-      case (partitionRowPath, filesInPartition) =>
-        fileStatusCache.put(partitionRowPath.fullPartitionPath(basePath), filesInPartition)
-    }
-    cachePartitionToFiles.toMap ++ fetchedPartitionToFiles
-  }
-
-  def getAllQueryPartitionPaths: Seq[PartitionPath] = {
-    val queryRelativePartitionPaths = queryPaths.map(FSUtils.getRelativePartitionPath(new Path(basePath), _))
-    // Load all the partition path from the basePath, and filter by the query partition path.
-    // TODO load files from the queryRelativePartitionPaths directly.
-    val partitionPaths = FSUtils.getAllPartitionPaths(engineContext, metadataConfig, basePath).asScala
-      .filter(path => queryRelativePartitionPaths.exists(path.startsWith))
-
-    val partitionSchema = _partitionColumns
-
-    // Convert partition's path into partition descriptor
-    partitionPaths.map { partitionPath =>
-      val partitionColumnValues = parsePartitionColumnValues(partitionSchema, partitionPath)
-      PartitionPath(partitionPath, partitionColumnValues)
-    }
-  }
-
-  /**
-   * Parses partition columns' values from the provided partition's path, returning list of
-   * values (that might have engine-specific representation)
-   *
-   * @param partitionColumns partitioning columns identifying the partition
-   * @param partitionPath partition's path to parse partitioning columns' values from
-   */
-  protected def parsePartitionColumnValues(partitionColumns: Array[String], partitionPath: String): Array[Any]
-
-  // TODO eval whether we should just use logger directly
-  protected def logWarning(str: => String): Unit
-  protected def logInfo(str: => String): Unit
-
-  /**
-   * Represents a partition as a tuple of
-   * <ul>
-   *   <li>Actual partition path (relative to the table's base path)</li>
-   *   <li>Values of the corresponding columns table is being partitioned by (partitioning columns)</li>
-   * </ul>
-   *
-   * E.g. PartitionPath("2021/02/01", Array("2021","02","01"))
-   *
-   * NOTE: Partitioning column values might have engine specific representation (for ex,
-   * {@code UTF8String} for Spark, etc) and are solely used in partition pruning in an very
-   * engine-specific ways
-   *
-   * @param values values of the corresponding partitioning columns
-   * @param path partition's path
-   *
-   * TODO expose as a trait and make impls engine-specific (current impl is tailored for Spark)
-   */
-  case class PartitionPath(path: String, values: Array[Any]) {
-    override def equals(other: Any): Boolean = other match {
-      case PartitionPath(otherPath, _) => path == otherPath
-      case _ => false
-    }
-
-    override def hashCode(): Int = {
-      path.hashCode
-    }
-
-    def fullPartitionPath(basePath: String): Path = {
-      if (path.isEmpty) {
-        new Path(basePath) // This is a non-partition path
-      } else {
-        new Path(basePath, path)
-      }
-    }
-  }
-}
-
-trait FileStatusCacheTrait {
-  def get(path: Path): Option[Array[FileStatus]]
-  def put(path: Path, leafFiles: Array[FileStatus]): Unit
-  def invalidate(): Unit
-}
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java
index e4460ce629f40..f51702a447258 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtils.java
@@ -68,8 +68,8 @@ public class TestFSUtils extends HoodieCommonTestHarness {
   private final long minRollbackToKeep = 10;
   private final long minCleanToKeep = 10;
 
-  private static final String TEST_WRITE_TOKEN = "1-0-1";
-  private static final String BASE_FILE_EXTENSION = HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().getFileExtension();
+  private static String TEST_WRITE_TOKEN = "1-0-1";
+  public static final String BASE_FILE_EXTENSION = HoodieTableConfig.BASE_FILE_FORMAT.defaultValue().getFileExtension();
 
   @Rule
   public final EnvironmentVariables environmentVariables = new EnvironmentVariables();
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtilsWithRetryWrapperEnable.java b/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtilsWithRetryWrapperEnable.java
new file mode 100644
index 0000000000000..0b849ebec8185
--- /dev/null
+++ b/hudi-common/src/test/java/org/apache/hudi/common/fs/TestFSUtilsWithRetryWrapperEnable.java
@@ -0,0 +1,210 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version loop.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-loop.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.fs;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.util.Progressable;
+
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.net.URI;
+import java.util.Arrays;
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+/**
+ * Tests file system utils with retry wrapper enable.
+ * P.S extends TestFSUtils and setUp a HoodieWrapperFileSystem for metaClient which can test all the TestFSUtils uts with RetryWrapperEnable
+ */
+public class TestFSUtilsWithRetryWrapperEnable extends TestFSUtils {
+
+  private static final String EXCEPTION_MESSAGE = "Fake runtime exception here.";
+  private long maxRetryIntervalMs;
+  private int maxRetryNumbers;
+  private long initialRetryIntervalMs;
+
+  @Override
+  @BeforeEach
+  public void setUp() throws IOException {
+    initMetaClient();
+    basePath = "file:" + basePath;
+    FileSystemRetryConfig fileSystemRetryConfig = FileSystemRetryConfig.newBuilder().withFileSystemActionRetryEnabled(true).build();
+    maxRetryIntervalMs = fileSystemRetryConfig.getMaxRetryIntervalMs();
+    maxRetryNumbers = fileSystemRetryConfig.getMaxRetryNumbers();
+    initialRetryIntervalMs = fileSystemRetryConfig.getInitialRetryIntervalMs();
+
+    FakeRemoteFileSystem fakeFs = new FakeRemoteFileSystem(FSUtils.getFs(metaClient.getMetaPath(), metaClient.getHadoopConf()), 2);
+    FileSystem fileSystem = new HoodieRetryWrapperFileSystem(fakeFs, maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, "");
+
+    HoodieWrapperFileSystem fs = new HoodieWrapperFileSystem(fileSystem, new NoOpConsistencyGuard());
+    metaClient.setFs(fs);
+  }
+
+  // Test the scenario that fs keeps retrying until it fails.
+  @Test
+  public void testProcessFilesWithExceptions() throws Exception {
+    FakeRemoteFileSystem fakeFs = new FakeRemoteFileSystem(FSUtils.getFs(metaClient.getMetaPath(), metaClient.getHadoopConf()), 100);
+    FileSystem fileSystem = new HoodieRetryWrapperFileSystem(fakeFs, maxRetryIntervalMs, maxRetryNumbers, initialRetryIntervalMs, "");
+    HoodieWrapperFileSystem fs = new HoodieWrapperFileSystem(fileSystem, new NoOpConsistencyGuard());
+    metaClient.setFs(fs);
+    List<String> folders =
+            Arrays.asList("2016/04/15", ".hoodie/.temp/2/2016/04/15");
+    folders.forEach(f -> assertThrows(RuntimeException.class, () -> metaClient.getFs().mkdirs(new Path(new Path(basePath), f))));
+  }
+
+  /**
+   * Fake remote FileSystem which will throw RuntimeException something like AmazonS3Exception 503.
+   */
+  class FakeRemoteFileSystem extends FileSystem {
+
+    private FileSystem fs;
+    private int count = 1;
+    private int loop;
+
+    public FakeRemoteFileSystem(FileSystem fs, int retryLoop) {
+      this.fs = fs;
+      this.loop = retryLoop;
+    }
+    
+    @Override
+    public URI getUri() {
+      return fs.getUri();
+    }
+
+    @Override
+    public FSDataInputStream open(Path f, int bufferSize) throws IOException {
+      if (count % loop == 0) {
+        count++;
+        return fs.open(f, bufferSize);
+      } else {
+        count++;
+        throw new RuntimeException(EXCEPTION_MESSAGE);
+      }
+    }
+
+    @Override
+    public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException {
+      if (count % loop == 0) {
+        count++;
+        return fs.create(f, permission, overwrite, bufferSize, replication, blockSize, progress);
+      } else {
+        count++;
+        throw new RuntimeException(EXCEPTION_MESSAGE);
+      }
+    }
+
+    @Override
+    public FSDataOutputStream append(Path f, int bufferSize, Progressable progress) throws IOException {
+      if (count % loop == 0) {
+        count++;
+        return fs.append(f, bufferSize, progress);
+      } else {
+        count++;
+        throw new RuntimeException(EXCEPTION_MESSAGE);
+      }
+    }
+
+    @Override
+    public boolean rename(Path src, Path dst) throws IOException {
+      if (count % loop == 0) {
+        count++;
+        return fs.rename(src, dst);
+      } else {
+        count++;
+        throw new RuntimeException(EXCEPTION_MESSAGE);
+      }
+    }
+
+    @Override
+    public boolean delete(Path f, boolean recursive) throws IOException {
+      if (count % loop == 0) {
+        count++;
+        return fs.delete(f, recursive);
+      } else {
+        count++;
+        throw new RuntimeException(EXCEPTION_MESSAGE);
+      }
+    }
+
+    @Override
+    public FileStatus[] listStatus(Path f) throws FileNotFoundException, IOException {
+      if (count % loop == 0) {
+        count++;
+        return fs.listStatus(f);
+      } else {
+        count++;
+        throw new RuntimeException(EXCEPTION_MESSAGE);
+      }
+    }
+
+    @Override
+    public void setWorkingDirectory(Path newDir) {
+      fs.setWorkingDirectory(newDir);
+    }
+
+    @Override
+    public Path getWorkingDirectory() {
+      return fs.getWorkingDirectory();
+    }
+
+    @Override
+    public boolean mkdirs(Path f, FsPermission permission) throws IOException {
+      if (count % loop == 0) {
+        count++;
+        return fs.mkdirs(f, permission);
+      } else {
+        count++;
+        throw new RuntimeException(EXCEPTION_MESSAGE);
+      }
+    }
+
+    @Override
+    public FileStatus getFileStatus(Path f) throws IOException {
+      if (count % loop == 0) {
+        count++;
+        return fs.getFileStatus(f);
+      } else {
+        count++;
+        throw new RuntimeException(EXCEPTION_MESSAGE);
+      }
+    }
+
+    @Override
+    public RemoteIterator<LocatedFileStatus> listLocatedStatus(Path f) throws IOException {
+      return fs.listLocatedStatus(f);
+    }
+
+    @Override
+    public Configuration getConf() {
+      return fs.getConf();
+    }
+  }
+}
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestParquetInLining.java b/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestParquetInLining.java
index c4e728dc24909..9ed27c4b2d63c 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestParquetInLining.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/fs/inline/TestParquetInLining.java
@@ -19,6 +19,7 @@
 
 package org.apache.hudi.common.fs.inline;
 
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.testutils.FileSystemTestUtils;
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
@@ -144,7 +145,8 @@ static List<GenericRecord> getParquetHoodieRecords() throws IOException {
     List<HoodieRecord> hoodieRecords = dataGenerator.generateInsertsWithHoodieAvroPayload(commitTime, 10);
     List<GenericRecord> toReturn = new ArrayList<>();
     for (HoodieRecord record : hoodieRecords) {
-      toReturn.add((GenericRecord) record.getData().getInsertValue(HoodieTestDataGenerator.AVRO_SCHEMA).get());
+      toReturn.add((GenericRecord) ((HoodieAvroRecord) record).getData()
+          .getInsertValue(HoodieTestDataGenerator.AVRO_SCHEMA).get());
     }
     return toReturn;
   }
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java
index bbfd8cf4ad39b..e9b06e6d6397d 100755
--- a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormat.java
@@ -18,16 +18,10 @@
 
 package org.apache.hudi.common.functional;
 
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.generic.IndexedRecord;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.HoodieArchivedLogFile;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieLogFile;
 import org.apache.hudi.common.model.HoodieRecord;
@@ -46,16 +40,30 @@
 import org.apache.hudi.common.table.log.block.HoodieLogBlock;
 import org.apache.hudi.common.table.log.block.HoodieLogBlock.HeaderMetadataType;
 import org.apache.hudi.common.table.log.block.HoodieLogBlock.HoodieLogBlockType;
+import org.apache.hudi.common.table.log.block.HoodieParquetDataBlock;
 import org.apache.hudi.common.testutils.FileCreateUtils;
+import org.apache.hudi.common.testutils.HadoopMapRedUtils;
 import org.apache.hudi.common.testutils.HoodieCommonTestHarness;
 import org.apache.hudi.common.testutils.HoodieTestUtils;
 import org.apache.hudi.common.testutils.SchemaTestUtil;
 import org.apache.hudi.common.testutils.minicluster.MiniClusterUtil;
+import org.apache.hudi.common.util.ClosableIterator;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.collection.ExternalSpillableMap;
 import org.apache.hudi.exception.CorruptedLogFileException;
-
 import org.apache.hudi.exception.HoodieIOException;
+
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.io.compress.Compression;
+import org.apache.parquet.hadoop.metadata.CompressionCodecName;
+import org.apache.parquet.hadoop.util.counters.BenchmarkCounter;
 import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeAll;
@@ -96,11 +104,12 @@
 @SuppressWarnings("Duplicates")
 public class TestHoodieLogFormat extends HoodieCommonTestHarness {
 
+  private static final HoodieLogBlockType DEFAULT_DATA_BLOCK_TYPE = HoodieLogBlockType.AVRO_DATA_BLOCK;
+
   private static String BASE_OUTPUT_PATH = "/tmp/";
   private FileSystem fs;
   private Path partitionPath;
   private int bufferSize = 4096;
-  private HoodieLogBlockType dataBlockType = HoodieLogBlockType.AVRO_DATA_BLOCK;
 
   @BeforeAll
   public static void setUpClass() throws IOException, InterruptedException {
@@ -139,7 +148,7 @@ public void testEmptyLog() throws IOException {
   }
 
   @ParameterizedTest
-  @EnumSource(names = { "AVRO_DATA_BLOCK", "HFILE_DATA_BLOCK" })
+  @EnumSource(names = {"AVRO_DATA_BLOCK", "HFILE_DATA_BLOCK", "PARQUET_DATA_BLOCK"})
   public void testBasicAppend(HoodieLogBlockType dataBlockType) throws IOException, InterruptedException, URISyntaxException {
     Writer writer =
         HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
@@ -171,7 +180,7 @@ public void testRollover() throws IOException, InterruptedException, URISyntaxEx
     Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
     header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
-    HoodieDataBlock dataBlock = getDataBlock(records, header);
+    HoodieDataBlock dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records, header);
     // Write out a block
     AppendResult firstAppend = writer.appendBlock(dataBlock);
     // Get the size of the block
@@ -186,7 +195,7 @@ public void testRollover() throws IOException, InterruptedException, URISyntaxEx
         HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
             .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).withSizeThreshold(size - 1).build();
     records = SchemaTestUtil.generateTestRecords(0, 100);
-    dataBlock = getDataBlock(records, header);
+    dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records, header);
     AppendResult secondAppend = writer.appendBlock(dataBlock);
 
     assertEquals(firstAppend.logFile(), secondAppend.logFile());
@@ -198,7 +207,7 @@ public void testRollover() throws IOException, InterruptedException, URISyntaxEx
 
     // Write one more block, which should not go to the new log file.
     records = SchemaTestUtil.generateTestRecords(0, 100);
-    dataBlock = getDataBlock(records, header);
+    dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records, header);
     AppendResult rolloverAppend = writer.appendBlock(dataBlock);
 
     assertNotEquals(secondAppend.logFile(), rolloverAppend.logFile());
@@ -245,7 +254,7 @@ private void testConcurrentAppend(boolean logFileExists, boolean newLogFileForma
     Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
     header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
-    HoodieDataBlock dataBlock = getDataBlock(records, header);
+    HoodieDataBlock dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records, header);
     writer.appendBlock(dataBlock);
     Writer writer2 = builder2.build();
     writer2.appendBlock(dataBlock);
@@ -257,8 +266,9 @@ private void testConcurrentAppend(boolean logFileExists, boolean newLogFileForma
     assertEquals(logFile1.getLogVersion(), logFile2.getLogVersion() - 1, "Log Files must have different versions");
   }
 
-  @Test
-  public void testMultipleAppend() throws IOException, URISyntaxException, InterruptedException {
+  @ParameterizedTest
+  @EnumSource(names = {"AVRO_DATA_BLOCK", "HFILE_DATA_BLOCK", "PARQUET_DATA_BLOCK"})
+  public void testMultipleAppend(HoodieLogBlockType dataBlockType) throws IOException, URISyntaxException, InterruptedException {
     Writer writer =
         HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
             .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
@@ -266,7 +276,7 @@ public void testMultipleAppend() throws IOException, URISyntaxException, Interru
     Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
     header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
-    HoodieDataBlock dataBlock = getDataBlock(records, header);
+    HoodieDataBlock dataBlock = getDataBlock(dataBlockType, records, header);
     writer.appendBlock(dataBlock);
     long size1 = writer.getCurrentSize();
     writer.close();
@@ -276,7 +286,7 @@ public void testMultipleAppend() throws IOException, URISyntaxException, Interru
             .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
     records = SchemaTestUtil.generateTestRecords(0, 100);
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
-    dataBlock = getDataBlock(records, header);
+    dataBlock = getDataBlock(dataBlockType, records, header);
     writer.appendBlock(dataBlock);
     long size2 = writer.getCurrentSize();
     assertTrue(size2 > size1, "We just wrote a new block - size2 should be > size1");
@@ -290,7 +300,7 @@ public void testMultipleAppend() throws IOException, URISyntaxException, Interru
             .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
     records = SchemaTestUtil.generateTestRecords(0, 100);
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
-    dataBlock = getDataBlock(records, header);
+    dataBlock = getDataBlock(dataBlockType, records, header);
     writer.appendBlock(dataBlock);
     long size3 = writer.getCurrentSize();
     assertTrue(size3 > size2, "We just wrote a new block - size3 should be > size2");
@@ -309,26 +319,27 @@ public void testMultipleAppend() throws IOException, URISyntaxException, Interru
    * This is actually a test on concurrent append and not recovery lease. Commenting this out.
    * https://issues.apache.org/jira/browse/HUDI-117
    */
+
   /**
    * @Test public void testLeaseRecovery() throws IOException, URISyntaxException, InterruptedException { Writer writer
-   *       = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
-   *       .withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
-   *       .overBaseCommit("100").withFs(fs).build(); List<IndexedRecord> records =
-   *       SchemaTestUtil.generateTestRecords(0, 100); Map<HoodieLogBlock.HeaderMetadataType, String> header =
-   *       Maps.newHashMap(); header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
-   *       header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString()); HoodieAvroDataBlock
-   *       dataBlock = new HoodieAvroDataBlock(records, header); writer = writer.appendBlock(dataBlock); long size1 =
-   *       writer.getCurrentSize(); // do not close this writer - this simulates a data note appending to a log dying
-   *       without closing the file // writer.close();
-   *
-   *       writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
-   *       .withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100")
-   *       .withFs(fs).build(); records = SchemaTestUtil.generateTestRecords(0, 100);
-   *       header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString()); dataBlock = new
-   *       HoodieAvroDataBlock(records, header); writer = writer.appendBlock(dataBlock); long size2 =
-   *       writer.getCurrentSize(); assertTrue("We just wrote a new block - size2 should be > size1", size2 > size1);
-   *       assertEquals("Write should be auto-flushed. The size reported by FileStatus and the writer should match",
-   *       size2, fs.getFileStatus(writer.getLogFile().getPath()).getLen()); writer.close(); }
+   * = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
+   * .withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1")
+   * .overBaseCommit("100").withFs(fs).build(); List<IndexedRecord> records =
+   * SchemaTestUtil.generateTestRecords(0, 100); Map<HoodieLogBlock.HeaderMetadataType, String> header =
+   * Maps.newHashMap(); header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
+   * header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString()); HoodieAvroDataBlock
+   * dataBlock = new HoodieAvroDataBlock(records, header); writer = writer.appendBlock(dataBlock); long size1 =
+   * writer.getCurrentSize(); // do not close this writer - this simulates a data note appending to a log dying
+   * without closing the file // writer.close();
+   * <p>
+   * writer = HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath)
+   * .withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId("test-fileid1").overBaseCommit("100")
+   * .withFs(fs).build(); records = SchemaTestUtil.generateTestRecords(0, 100);
+   * header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString()); dataBlock = new
+   * HoodieAvroDataBlock(records, header); writer = writer.appendBlock(dataBlock); long size2 =
+   * writer.getCurrentSize(); assertTrue("We just wrote a new block - size2 should be > size1", size2 > size1);
+   * assertEquals("Write should be auto-flushed. The size reported by FileStatus and the writer should match",
+   * size2, fs.getFileStatus(writer.getLogFile().getPath()).getLen()); writer.close(); }
    */
 
   @Test
@@ -344,7 +355,7 @@ public void testAppendNotSupported() throws IOException, URISyntaxException, Int
     Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
     header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
-    HoodieDataBlock dataBlock = getDataBlock(records, header);
+    HoodieDataBlock dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records, header);
 
     for (int i = 0; i < 2; i++) {
       Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(testPath)
@@ -371,18 +382,19 @@ public void testBasicWriteAndScan() throws IOException, URISyntaxException, Inte
     Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
     header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
-    HoodieDataBlock dataBlock = getDataBlock(records, header);
+    HoodieDataBlock dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records, header);
     writer.appendBlock(dataBlock);
     writer.close();
 
     Reader reader = HoodieLogFormat.newReader(fs, writer.getLogFile(), SchemaTestUtil.getSimpleSchema());
     assertTrue(reader.hasNext(), "We wrote a block, we should be able to read it");
     HoodieLogBlock nextBlock = reader.next();
-    assertEquals(dataBlockType, nextBlock.getBlockType(), "The next block should be a data block");
+    assertEquals(DEFAULT_DATA_BLOCK_TYPE, nextBlock.getBlockType(), "The next block should be a data block");
     HoodieDataBlock dataBlockRead = (HoodieDataBlock) nextBlock;
-    assertEquals(copyOfRecords.size(), dataBlockRead.getRecords().size(),
+    List<IndexedRecord> recordsRead = getRecords(dataBlockRead);
+    assertEquals(copyOfRecords.size(), recordsRead.size(),
         "Read records size should be equal to the written records size");
-    assertEquals(copyOfRecords, dataBlockRead.getRecords(),
+    assertEquals(copyOfRecords, recordsRead,
         "Both records lists should be the same. (ordering guaranteed)");
     reader.close();
   }
@@ -400,10 +412,10 @@ public void testHugeLogFileWrite() throws IOException, URISyntaxException, Inter
     Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
     header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
-    byte[] dataBlockContentBytes = getDataBlock(records, header).getContentBytes();
-    HoodieDataBlock reusableDataBlock = new HoodieAvroDataBlock(null, null,
-        Option.ofNullable(dataBlockContentBytes), false, 0, dataBlockContentBytes.length,
-        0, getSimpleSchema(), header, new HashMap<>(), HoodieRecord.RECORD_KEY_METADATA_FIELD);
+    byte[] dataBlockContentBytes = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records, header).getContentBytes();
+    HoodieLogBlock.HoodieLogBlockContentLocation logBlockContentLoc = new HoodieLogBlock.HoodieLogBlockContentLocation(new Configuration(), null, 0, dataBlockContentBytes.length, 0);
+    HoodieDataBlock reusableDataBlock = new HoodieAvroDataBlock(null, Option.ofNullable(dataBlockContentBytes), false,
+        logBlockContentLoc, Option.ofNullable(getSimpleSchema()), header, new HashMap<>(), HoodieRecord.RECORD_KEY_METADATA_FIELD);
     long writtenSize = 0;
     int logBlockWrittenNum = 0;
     while (writtenSize < Integer.MAX_VALUE) {
@@ -418,11 +430,12 @@ public void testHugeLogFileWrite() throws IOException, URISyntaxException, Inter
         true, true);
     assertTrue(reader.hasNext(), "We wrote a block, we should be able to read it");
     HoodieLogBlock nextBlock = reader.next();
-    assertEquals(dataBlockType, nextBlock.getBlockType(), "The next block should be a data block");
+    assertEquals(DEFAULT_DATA_BLOCK_TYPE, nextBlock.getBlockType(), "The next block should be a data block");
     HoodieDataBlock dataBlockRead = (HoodieDataBlock) nextBlock;
-    assertEquals(copyOfRecords.size(), dataBlockRead.getRecords().size(),
+    List<IndexedRecord> recordsRead = getRecords(dataBlockRead);
+    assertEquals(copyOfRecords.size(), recordsRead.size(),
         "Read records size should be equal to the written records size");
-    assertEquals(copyOfRecords, dataBlockRead.getRecords(),
+    assertEquals(copyOfRecords, recordsRead,
         "Both records lists should be the same. (ordering guaranteed)");
     int logBlockReadNum = 1;
     while (reader.hasNext()) {
@@ -447,11 +460,16 @@ public void testHugeLogFileWrite() throws IOException, URISyntaxException, Inter
     oversizeWriter.close();
   }
 
-  @Test
-  public void testBasicAppendAndRead() throws IOException, URISyntaxException, InterruptedException {
-    Writer writer =
-        HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
-            .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
+  @ParameterizedTest
+  @EnumSource(names = {"AVRO_DATA_BLOCK", "HFILE_DATA_BLOCK", "PARQUET_DATA_BLOCK"})
+  public void testBasicAppendAndRead(HoodieLogBlockType dataBlockType) throws IOException, URISyntaxException, InterruptedException {
+    Writer writer = HoodieLogFormat.newWriterBuilder()
+        .onParentPath(partitionPath)
+        .withFileExtension(HoodieLogFile.DELTA_EXTENSION)
+        .withFileId("test-fileid1")
+        .overBaseCommit("100")
+        .withFs(fs)
+        .build();
     List<IndexedRecord> records1 = SchemaTestUtil.generateTestRecords(0, 100);
     Schema schema = getSimpleSchema();
     List<IndexedRecord> copyOfRecords1 = records1.stream()
@@ -459,30 +477,39 @@ public void testBasicAppendAndRead() throws IOException, URISyntaxException, Int
     Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
     header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
-    HoodieDataBlock dataBlock = getDataBlock(records1, header);
+    HoodieDataBlock dataBlock = getDataBlock(dataBlockType, records1, header);
     writer.appendBlock(dataBlock);
     writer.close();
 
-    writer =
-        HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
-            .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
+    writer = HoodieLogFormat.newWriterBuilder()
+        .onParentPath(partitionPath)
+        .withFileExtension(HoodieLogFile.DELTA_EXTENSION)
+        .withFileId("test-fileid1")
+        .overBaseCommit("100")
+        .withFs(fs)
+        .build();
     List<IndexedRecord> records2 = SchemaTestUtil.generateTestRecords(0, 100);
     List<IndexedRecord> copyOfRecords2 = records2.stream()
         .map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
-    dataBlock = getDataBlock(records2, header);
+    dataBlock = getDataBlock(dataBlockType, records2, header);
     writer.appendBlock(dataBlock);
     writer.close();
 
     // Close and Open again and append 100 more records
-    writer =
-        HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
-            .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
+    writer = HoodieLogFormat.newWriterBuilder()
+        .onParentPath(partitionPath)
+        .withFileExtension(HoodieLogFile.DELTA_EXTENSION)
+        .withFileId("test-fileid1")
+        .overBaseCommit("100")
+        .withFs(fs)
+        .build();
+
     List<IndexedRecord> records3 = SchemaTestUtil.generateTestRecords(0, 100);
     List<IndexedRecord> copyOfRecords3 = records3.stream()
         .map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
-    dataBlock = getDataBlock(records3, header);
+    dataBlock = getDataBlock(dataBlockType, records3, header);
     writer.appendBlock(dataBlock);
     writer.close();
 
@@ -490,26 +517,29 @@ public void testBasicAppendAndRead() throws IOException, URISyntaxException, Int
     assertTrue(reader.hasNext(), "First block should be available");
     HoodieLogBlock nextBlock = reader.next();
     HoodieDataBlock dataBlockRead = (HoodieDataBlock) nextBlock;
-    assertEquals(copyOfRecords1.size(), dataBlockRead.getRecords().size(),
+    List<IndexedRecord> recordsRead1 = getRecords(dataBlockRead);
+    assertEquals(copyOfRecords1.size(),recordsRead1.size(),
         "Read records size should be equal to the written records size");
-    assertEquals(copyOfRecords1, dataBlockRead.getRecords(),
+    assertEquals(copyOfRecords1, recordsRead1,
         "Both records lists should be the same. (ordering guaranteed)");
     assertEquals(dataBlockRead.getSchema(), getSimpleSchema());
 
     reader.hasNext();
     nextBlock = reader.next();
     dataBlockRead = (HoodieDataBlock) nextBlock;
-    assertEquals(copyOfRecords2.size(), dataBlockRead.getRecords().size(),
+    List<IndexedRecord> recordsRead2 = getRecords(dataBlockRead);
+    assertEquals(copyOfRecords2.size(), recordsRead2.size(),
         "Read records size should be equal to the written records size");
-    assertEquals(copyOfRecords2, dataBlockRead.getRecords(),
+    assertEquals(copyOfRecords2, recordsRead2,
         "Both records lists should be the same. (ordering guaranteed)");
 
     reader.hasNext();
     nextBlock = reader.next();
     dataBlockRead = (HoodieDataBlock) nextBlock;
-    assertEquals(copyOfRecords3.size(), dataBlockRead.getRecords().size(),
+    List<IndexedRecord> recordsRead3 = getRecords(dataBlockRead);
+    assertEquals(copyOfRecords3.size(), recordsRead3.size(),
         "Read records size should be equal to the written records size");
-    assertEquals(copyOfRecords3, dataBlockRead.getRecords(),
+    assertEquals(copyOfRecords3, recordsRead3,
         "Both records lists should be the same. (ordering guaranteed)");
     reader.close();
   }
@@ -538,7 +568,7 @@ public void testBasicAppendAndScanMultipleFiles(ExternalSpillableMap.DiskMapType
           .map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
       allRecords.add(copyOfRecords1);
       header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
-      HoodieDataBlock dataBlock = getDataBlock(records1, header);
+      HoodieDataBlock dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records1, header);
       writer.appendBlock(dataBlock);
     }
     writer.close();
@@ -563,7 +593,8 @@ public void testBasicAppendAndScanMultipleFiles(ExternalSpillableMap.DiskMapType
 
     List<IndexedRecord> scannedRecords = new ArrayList<>();
     for (HoodieRecord record : scanner) {
-      scannedRecords.add((IndexedRecord) record.getData().getInsertValue(schema).get());
+      scannedRecords.add((IndexedRecord)
+          ((HoodieAvroRecord) record).getData().getInsertValue(schema).get());
     }
 
     assertEquals(scannedRecords.size(), allRecords.stream().mapToLong(Collection::size).sum(),
@@ -580,7 +611,7 @@ public void testAppendAndReadOnCorruptedLog() throws IOException, URISyntaxExcep
     Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
     header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
-    HoodieDataBlock dataBlock = getDataBlock(records, header);
+    HoodieDataBlock dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records, header);
     writer.appendBlock(dataBlock);
     writer.close();
 
@@ -602,11 +633,11 @@ public void testAppendAndReadOnCorruptedLog() throws IOException, URISyntaxExcep
 
     // Append a proper block that is of the missing length of the corrupted block
     writer =
-            HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
-                    .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
+        HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
+            .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
     records = SchemaTestUtil.generateTestRecords(0, 10);
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
-    dataBlock = getDataBlock(records, header);
+    dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records, header);
     writer.appendBlock(dataBlock);
     writer.close();
 
@@ -644,7 +675,7 @@ public void testAppendAndReadOnCorruptedLog() throws IOException, URISyntaxExcep
             .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
     records = SchemaTestUtil.generateTestRecords(0, 100);
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
-    dataBlock = getDataBlock(records, header);
+    dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records, header);
     writer.appendBlock(dataBlock);
     writer.close();
 
@@ -674,7 +705,7 @@ public void testValidateCorruptBlockEndPosition() throws IOException, URISyntaxE
     Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
     header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
-    HoodieDataBlock dataBlock = getDataBlock(records, header);
+    HoodieDataBlock dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records, header);
     writer.appendBlock(dataBlock);
     writer.close();
 
@@ -702,7 +733,7 @@ public void testValidateCorruptBlockEndPosition() throws IOException, URISyntaxE
             .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
     records = SchemaTestUtil.generateTestRecords(0, 10);
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
-    dataBlock = getDataBlock(records, header);
+    dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records, header);
     writer.appendBlock(dataBlock);
     writer.close();
 
@@ -741,7 +772,7 @@ public void testAvroLogRecordReaderBasic(ExternalSpillableMap.DiskMapType diskMa
     Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
     header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
-    HoodieDataBlock dataBlock = getDataBlock(records1, header);
+    HoodieDataBlock dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records1, header);
     writer.appendBlock(dataBlock);
 
     // Write 2
@@ -749,7 +780,7 @@ public void testAvroLogRecordReaderBasic(ExternalSpillableMap.DiskMapType diskMa
     List<IndexedRecord> copyOfRecords2 = records2.stream()
         .map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
-    dataBlock = getDataBlock(records2, header);
+    dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records2, header);
     writer.appendBlock(dataBlock);
     writer.close();
 
@@ -804,14 +835,14 @@ public void testAvroLogRecordReaderWithRollbackTombstone(ExternalSpillableMap.Di
     header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
 
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
-    HoodieDataBlock dataBlock = getDataBlock(records1, header);
+    HoodieDataBlock dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records1, header);
     writer.appendBlock(dataBlock);
 
     // Write 2
     header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "101");
     List<IndexedRecord> records2 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
-    dataBlock = getDataBlock(records2, header);
+    dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records2, header);
     writer.appendBlock(dataBlock);
 
     // Rollback the last write
@@ -827,7 +858,7 @@ public void testAvroLogRecordReaderWithRollbackTombstone(ExternalSpillableMap.Di
     List<IndexedRecord> copyOfRecords3 = records3.stream()
         .map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
-    dataBlock = getDataBlock(records3, header);
+    dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records3, header);
     writer.appendBlock(dataBlock);
     writer.close();
 
@@ -880,7 +911,7 @@ public void testAvroLogRecordReaderWithFailedPartialBlock(ExternalSpillableMap.D
     Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
     header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
-    HoodieDataBlock dataBlock = getDataBlock(records1, header);
+    HoodieDataBlock dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records1, header);
     writer.appendBlock(dataBlock);
     writer.close();
 
@@ -914,7 +945,7 @@ public void testAvroLogRecordReaderWithFailedPartialBlock(ExternalSpillableMap.D
         .map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
 
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
-    dataBlock = getDataBlock(records3, header);
+    dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records3, header);
     writer.appendBlock(dataBlock);
     writer.close();
 
@@ -968,7 +999,7 @@ public void testAvroLogRecordReaderWithDeleteAndRollback(ExternalSpillableMap.Di
     Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
     header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
-    HoodieDataBlock dataBlock = getDataBlock(records1, header);
+    HoodieDataBlock dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records1, header);
     writer.appendBlock(dataBlock);
 
     // Write 2
@@ -976,7 +1007,7 @@ public void testAvroLogRecordReaderWithDeleteAndRollback(ExternalSpillableMap.Di
     List<IndexedRecord> records2 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
     List<IndexedRecord> copyOfRecords2 = records2.stream()
         .map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
-    dataBlock = getDataBlock(records2, header);
+    dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records2, header);
     writer.appendBlock(dataBlock);
 
     copyOfRecords1.addAll(copyOfRecords2);
@@ -1089,13 +1120,13 @@ public void testAvroLogRecordReaderWithFailedRollbacks(ExternalSpillableMap.Disk
     header.put(HoodieLogBlock.HeaderMetadataType.TARGET_INSTANT_TIME, "100");
 
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
-    HoodieDataBlock dataBlock = getDataBlock(records1, header);
+    HoodieDataBlock dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records1, header);
     writer.appendBlock(dataBlock);
 
     // Write 2
     List<IndexedRecord> records2 = SchemaTestUtil.generateHoodieTestRecords(0, 100);
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
-    dataBlock = getDataBlock(records2, header);
+    dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records2, header);
     writer.appendBlock(dataBlock);
 
     // Delete 50 keys
@@ -1173,7 +1204,7 @@ public void testAvroLogRecordReaderWithInsertDeleteAndRollback(ExternalSpillable
     header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
     header.put(HoodieLogBlock.HeaderMetadataType.TARGET_INSTANT_TIME, "100");
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
-    HoodieDataBlock dataBlock = getDataBlock(records1, header);
+    HoodieDataBlock dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records1, header);
     writer.appendBlock(dataBlock);
 
     // Delete 50 keys
@@ -1232,7 +1263,7 @@ public void testAvroLogRecordReaderWithInvalidRollback(ExternalSpillableMap.Disk
     Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
     header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
-    HoodieDataBlock dataBlock = getDataBlock(records1, header);
+    HoodieDataBlock dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records1, header);
     writer.appendBlock(dataBlock);
 
     FileCreateUtils.createDeltaCommit(basePath, "100", fs);
@@ -1290,7 +1321,7 @@ public void testAvroLogRecordReaderWithInsertsDeleteAndRollback(ExternalSpillabl
     header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
     header.put(HoodieLogBlock.HeaderMetadataType.TARGET_INSTANT_TIME, "100");
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
-    HoodieDataBlock dataBlock = getDataBlock(records1, header);
+    HoodieDataBlock dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records1, header);
     writer.appendBlock(dataBlock);
     writer.appendBlock(dataBlock);
     writer.appendBlock(dataBlock);
@@ -1354,7 +1385,7 @@ public void testAvroLogRecordReaderWithMixedInsertsCorruptsAndRollback(ExternalS
     Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
     header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
-    HoodieDataBlock dataBlock = getDataBlock(records1, header);
+    HoodieDataBlock dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records1, header);
     writer.appendBlock(dataBlock);
     writer.appendBlock(dataBlock);
     writer.appendBlock(dataBlock);
@@ -1473,7 +1504,7 @@ private void testAvroLogRecordReaderMergingMultipleLogFiles(int numRecordsInLog1
       Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
       header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
       header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
-      HoodieDataBlock dataBlock = getDataBlock(records.subList(0, numRecordsInLog1), header);
+      HoodieDataBlock dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records.subList(0, numRecordsInLog1), header);
       writer.appendBlock(dataBlock);
       // Get the size of the block
       long size = writer.getCurrentSize();
@@ -1487,7 +1518,7 @@ private void testAvroLogRecordReaderMergingMultipleLogFiles(int numRecordsInLog1
       Map<HoodieLogBlock.HeaderMetadataType, String> header2 = new HashMap<>();
       header2.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
       header2.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
-      HoodieDataBlock dataBlock2 = getDataBlock(records2.subList(0, numRecordsInLog2), header2);
+      HoodieDataBlock dataBlock2 = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records2.subList(0, numRecordsInLog2), header2);
       writer2.appendBlock(dataBlock2);
       // Get the size of the block
       writer2.close();
@@ -1574,7 +1605,7 @@ public void testBasicAppendAndReadInReverse(boolean readBlocksLazily)
     Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
     header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
-    HoodieDataBlock dataBlock = getDataBlock(records1, header);
+    HoodieDataBlock dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records1, header);
     writer.appendBlock(dataBlock);
     writer.close();
 
@@ -1584,7 +1615,7 @@ public void testBasicAppendAndReadInReverse(boolean readBlocksLazily)
     List<IndexedRecord> records2 = SchemaTestUtil.generateTestRecords(0, 100);
     List<IndexedRecord> copyOfRecords2 = records2.stream()
         .map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
-    dataBlock = getDataBlock(records2, header);
+    dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records2, header);
     writer.appendBlock(dataBlock);
     writer.close();
 
@@ -1595,7 +1626,7 @@ public void testBasicAppendAndReadInReverse(boolean readBlocksLazily)
     List<IndexedRecord> records3 = SchemaTestUtil.generateTestRecords(0, 100);
     List<IndexedRecord> copyOfRecords3 = records3.stream()
         .map(record -> HoodieAvroUtils.rewriteRecord((GenericRecord) record, schema)).collect(Collectors.toList());
-    dataBlock = getDataBlock(records3, header);
+    dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records3, header);
     writer.appendBlock(dataBlock);
     writer.close();
 
@@ -1609,25 +1640,28 @@ public void testBasicAppendAndReadInReverse(boolean readBlocksLazily)
     HoodieLogBlock prevBlock = reader.prev();
     HoodieDataBlock dataBlockRead = (HoodieDataBlock) prevBlock;
 
-    assertEquals(copyOfRecords3.size(), dataBlockRead.getRecords().size(),
+    List<IndexedRecord> recordsRead1 = getRecords(dataBlockRead);
+    assertEquals(copyOfRecords3.size(), recordsRead1.size(),
         "Third records size should be equal to the written records size");
-    assertEquals(copyOfRecords3, dataBlockRead.getRecords(),
+    assertEquals(copyOfRecords3, recordsRead1,
         "Both records lists should be the same. (ordering guaranteed)");
 
     assertTrue(reader.hasPrev(), "Second block should be available");
     prevBlock = reader.prev();
     dataBlockRead = (HoodieDataBlock) prevBlock;
-    assertEquals(copyOfRecords2.size(), dataBlockRead.getRecords().size(),
+    List<IndexedRecord> recordsRead2 = getRecords(dataBlockRead);
+    assertEquals(copyOfRecords2.size(), recordsRead2.size(),
         "Read records size should be equal to the written records size");
-    assertEquals(copyOfRecords2, dataBlockRead.getRecords(),
+    assertEquals(copyOfRecords2, recordsRead2,
         "Both records lists should be the same. (ordering guaranteed)");
 
     assertTrue(reader.hasPrev(), "First block should be available");
     prevBlock = reader.prev();
     dataBlockRead = (HoodieDataBlock) prevBlock;
-    assertEquals(copyOfRecords1.size(), dataBlockRead.getRecords().size(),
+    List<IndexedRecord> recordsRead3 = getRecords(dataBlockRead);
+    assertEquals(copyOfRecords1.size(), recordsRead3.size(),
         "Read records size should be equal to the written records size");
-    assertEquals(copyOfRecords1, dataBlockRead.getRecords(),
+    assertEquals(copyOfRecords1, recordsRead3,
         "Both records lists should be the same. (ordering guaranteed)");
 
     assertFalse(reader.hasPrev());
@@ -1646,7 +1680,7 @@ public void testAppendAndReadOnCorruptedLogInReverse(boolean readBlocksLazily)
     Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
     header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
-    HoodieDataBlock dataBlock = getDataBlock(records, header);
+    HoodieDataBlock dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records, header);
     writer.appendBlock(dataBlock);
     writer.close();
 
@@ -1674,7 +1708,7 @@ public void testAppendAndReadOnCorruptedLogInReverse(boolean readBlocksLazily)
         HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
             .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
     records = SchemaTestUtil.generateTestRecords(0, 100);
-    dataBlock = getDataBlock(records, header);
+    dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records, header);
     writer.appendBlock(dataBlock);
     writer.close();
 
@@ -1708,7 +1742,7 @@ public void testBasicAppendAndTraverseInReverse(boolean readBlocksLazily)
     Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
     header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
-    HoodieDataBlock dataBlock = getDataBlock(records1, header);
+    HoodieDataBlock dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records1, header);
     writer.appendBlock(dataBlock);
     writer.close();
 
@@ -1716,7 +1750,7 @@ public void testBasicAppendAndTraverseInReverse(boolean readBlocksLazily)
         HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
             .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
     List<IndexedRecord> records2 = SchemaTestUtil.generateTestRecords(0, 100);
-    dataBlock = getDataBlock(records2, header);
+    dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records2, header);
     writer.appendBlock(dataBlock);
     writer.close();
 
@@ -1725,7 +1759,7 @@ public void testBasicAppendAndTraverseInReverse(boolean readBlocksLazily)
         HoodieLogFormat.newWriterBuilder().onParentPath(partitionPath).withFileExtension(HoodieLogFile.DELTA_EXTENSION)
             .withFileId("test-fileid1").overBaseCommit("100").withFs(fs).build();
     List<IndexedRecord> records3 = SchemaTestUtil.generateTestRecords(0, 100);
-    dataBlock = getDataBlock(records3, header);
+    dataBlock = getDataBlock(DEFAULT_DATA_BLOCK_TYPE, records3, header);
     writer.appendBlock(dataBlock);
     writer.close();
 
@@ -1745,9 +1779,10 @@ public void testBasicAppendAndTraverseInReverse(boolean readBlocksLazily)
     assertTrue(reader.hasPrev(), "First block should be available");
     HoodieLogBlock prevBlock = reader.prev();
     HoodieDataBlock dataBlockRead = (HoodieDataBlock) prevBlock;
-    assertEquals(copyOfRecords1.size(), dataBlockRead.getRecords().size(),
+    List<IndexedRecord> recordsRead = getRecords(dataBlockRead);
+    assertEquals(copyOfRecords1.size(), recordsRead.size(),
         "Read records size should be equal to the written records size");
-    assertEquals(copyOfRecords1, dataBlockRead.getRecords(),
+    assertEquals(copyOfRecords1, recordsRead,
         "Both records lists should be the same. (ordering guaranteed)");
 
     assertFalse(reader.hasPrev());
@@ -1770,7 +1805,7 @@ public void testV0Format() throws IOException, URISyntaxException {
 
     HoodieLogBlock logBlock = HoodieAvroDataBlock.getBlock(content, schema);
     assertEquals(HoodieLogBlockType.AVRO_DATA_BLOCK, logBlock.getBlockType());
-    List<IndexedRecord> readRecords = ((HoodieAvroDataBlock) logBlock).getRecords();
+    List<IndexedRecord> readRecords = getRecords((HoodieAvroDataBlock) logBlock);
     assertEquals(readRecords.size(), recordsCopy.size());
     for (int i = 0; i < recordsCopy.size(); ++i) {
       assertEquals(recordsCopy.get(i), readRecords.get(i));
@@ -1779,15 +1814,74 @@ public void testV0Format() throws IOException, URISyntaxException {
     // Reader schema is optional if it is same as write schema
     logBlock = HoodieAvroDataBlock.getBlock(content, null);
     assertEquals(HoodieLogBlockType.AVRO_DATA_BLOCK, logBlock.getBlockType());
-    readRecords = ((HoodieAvroDataBlock) logBlock).getRecords();
+    readRecords = getRecords((HoodieAvroDataBlock) logBlock);
     assertEquals(readRecords.size(), recordsCopy.size());
     for (int i = 0; i < recordsCopy.size(); ++i) {
       assertEquals(recordsCopy.get(i), readRecords.get(i));
     }
   }
 
-  private HoodieDataBlock getDataBlock(List<IndexedRecord> records, Map<HeaderMetadataType, String> header) {
-    return getDataBlock(dataBlockType, records, header);
+  @ParameterizedTest
+  @EnumSource(names = {"AVRO_DATA_BLOCK", "HFILE_DATA_BLOCK", "PARQUET_DATA_BLOCK"})
+  public void testDataBlockFormatAppendAndReadWithProjectedSchema(
+      HoodieLogBlockType dataBlockType
+  ) throws IOException, URISyntaxException, InterruptedException {
+    Writer writer = HoodieLogFormat.newWriterBuilder()
+        .onParentPath(partitionPath)
+        .withFileExtension(HoodieLogFile.DELTA_EXTENSION)
+        .withFileId("test-fileid1")
+        .overBaseCommit("100")
+        .withFs(fs)
+        .build();
+
+    List<GenericRecord> records = SchemaTestUtil.generateTestGenericRecords(0, 1000);
+
+    Schema schema = getSimpleSchema();
+
+    Map<HoodieLogBlock.HeaderMetadataType, String> header =
+        new HashMap<HoodieLogBlock.HeaderMetadataType, String>() {{
+          put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
+          put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
+        }};
+
+    // Init Benchmark to report number of bytes actually read from the Block
+    BenchmarkCounter.initCounterFromReporter(HadoopMapRedUtils.createTestReporter(), fs.getConf());
+
+    // NOTE: Have to use this ugly hack since List generic is not covariant in its type param
+    HoodieDataBlock dataBlock = getDataBlock(dataBlockType, (List<IndexedRecord>)(List) records, header);
+
+    writer.appendBlock(dataBlock);
+    writer.close();
+
+    Schema projectedSchema = HoodieAvroUtils.generateProjectionSchema(schema, Collections.singletonList("name"));
+
+    List<GenericRecord> projectedRecords = HoodieAvroUtils.rewriteRecords(records, projectedSchema);
+
+    try (Reader reader = HoodieLogFormat.newReader(fs, writer.getLogFile(), projectedSchema, true, false)) {
+      assertTrue(reader.hasNext(), "First block should be available");
+
+      HoodieLogBlock nextBlock = reader.next();
+
+      HoodieDataBlock dataBlockRead = (HoodieDataBlock) nextBlock;
+
+      Map<HoodieLogBlockType, Integer> expectedReadBytes =
+          new HashMap<HoodieLogBlockType, Integer>() {{
+            put(HoodieLogBlockType.AVRO_DATA_BLOCK, 0); // not supported
+            put(HoodieLogBlockType.HFILE_DATA_BLOCK, 0); // not supported
+            put(HoodieLogBlockType.PARQUET_DATA_BLOCK, 2605);
+          }};
+
+      List<IndexedRecord> recordsRead = getRecords(dataBlockRead);
+      assertEquals(projectedRecords.size(), recordsRead.size(),
+          "Read records size should be equal to the written records size");
+      assertEquals(projectedRecords, recordsRead,
+          "Both records lists should be the same. (ordering guaranteed)");
+      assertEquals(dataBlockRead.getSchema(), projectedSchema);
+
+      int bytesRead = (int) BenchmarkCounter.getBytesRead();
+
+      assertEquals(expectedReadBytes.get(dataBlockType), bytesRead, "Read bytes have to match");
+    }
   }
 
   private HoodieDataBlock getDataBlock(HoodieLogBlockType dataBlockType, List<IndexedRecord> records,
@@ -1796,7 +1890,9 @@ private HoodieDataBlock getDataBlock(HoodieLogBlockType dataBlockType, List<Inde
       case AVRO_DATA_BLOCK:
         return new HoodieAvroDataBlock(records, header, HoodieRecord.RECORD_KEY_METADATA_FIELD);
       case HFILE_DATA_BLOCK:
-        return new HoodieHFileDataBlock(records, header, HoodieRecord.RECORD_KEY_METADATA_FIELD);
+        return new HoodieHFileDataBlock(records, header, Compression.Algorithm.GZ);
+      case PARQUET_DATA_BLOCK:
+        return new HoodieParquetDataBlock(records, header, HoodieRecord.RECORD_KEY_METADATA_FIELD, CompressionCodecName.GZIP);
       default:
         throw new RuntimeException("Unknown data block type " + dataBlockType);
     }
@@ -1815,4 +1911,15 @@ private static Stream<Arguments> testArguments() {
         arguments(ExternalSpillableMap.DiskMapType.ROCKS_DB, true, true)
     );
   }
+
+  /**
+   * Utility to convert the given iterator to a List.
+   */
+  private static List<IndexedRecord> getRecords(HoodieDataBlock dataBlock) {
+    ClosableIterator<IndexedRecord> itr = dataBlock.getRecordItr();
+
+    List<IndexedRecord> elements = new ArrayList<>();
+    itr.forEachRemaining(elements::add);
+    return elements;
+  }
 }
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormatAppendFailure.java b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormatAppendFailure.java
index e313bb4a6ca0f..6c4d69a05b296 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormatAppendFailure.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/functional/TestHoodieLogFormatAppendFailure.java
@@ -19,6 +19,7 @@
 package org.apache.hudi.common.functional;
 
 import org.apache.hudi.common.model.HoodieArchivedLogFile;
+import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.table.log.HoodieLogFormat;
 import org.apache.hudi.common.table.log.HoodieLogFormat.Writer;
 import org.apache.hudi.common.table.log.block.HoodieAvroDataBlock;
@@ -104,7 +105,7 @@ public void testFailedToGetAppendStreamFromHDFSNameNode()
     Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>(2);
     header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, "100");
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, getSimpleSchema().toString());
-    HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header);
+    HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header, HoodieRecord.RECORD_KEY_METADATA_FIELD);
 
     Writer writer = HoodieLogFormat.newWriterBuilder().onParentPath(testPath)
         .withFileExtension(HoodieArchivedLogFile.ARCHIVE_EXTENSION).withFileId("commits.archive")
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodieRecord.java b/hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodieRecord.java
index e31286d10c2cf..b6bbc34cc3de9 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodieRecord.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/model/TestHoodieRecord.java
@@ -44,7 +44,7 @@ public class TestHoodieRecord {
   public void setUp() throws Exception {
     final List<IndexedRecord> indexedRecords = SchemaTestUtil.generateHoodieTestRecords(0, 1);
     final List<HoodieRecord> hoodieRecords =
-        indexedRecords.stream().map(r -> new HoodieRecord(new HoodieKey(UUID.randomUUID().toString(), "0000/00/00"),
+        indexedRecords.stream().map(r -> new HoodieAvroRecord(new HoodieKey(UUID.randomUUID().toString(), "0000/00/00"),
             new AvroBinaryTestPayload(Option.of((GenericRecord) r)))).collect(Collectors.toList());
     hoodieRecord = hoodieRecords.get(0);
   }
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/properties/TestTypedProperties.java b/hudi-common/src/test/java/org/apache/hudi/common/properties/TestTypedProperties.java
index 95955d4d72a27..a3ba13ec14a2a 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/properties/TestTypedProperties.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/properties/TestTypedProperties.java
@@ -19,8 +19,10 @@
 package org.apache.hudi.common.properties;
 
 import org.apache.hudi.common.config.TypedProperties;
+
 import org.junit.jupiter.api.Test;
 
+import java.io.IOException;
 import java.util.Properties;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -81,4 +83,54 @@ public void testGetBoolean() {
     assertEquals(true, typedProperties.getBoolean("key1", false));
     assertEquals(false, typedProperties.getBoolean("key2", false));
   }
+
+  @Test
+  public void testPropertiesOrder() throws IOException {
+    Properties properties = new TypedProperties();
+    properties.put("key0", "true");
+    properties.put("key1", "false");
+    properties.put("key2", "true");
+    properties.put("key3", "false");
+    properties.put("key4", "true");
+    properties.put("key5", "true");
+    properties.put("key6", "false");
+    properties.put("key7", "true");
+    properties.put("key8", "false");
+    properties.put("key9", "true");
+
+    TypedProperties typedProperties = new TypedProperties(properties);
+    assertTypeProperties(typedProperties, 0);
+  }
+
+  @Test
+  void testPutAllProperties() {
+    Properties firstProp = new TypedProperties();
+    firstProp.put("key0", "true");
+    firstProp.put("key1", "false");
+    firstProp.put("key2", "true");
+
+    TypedProperties firstProperties = new TypedProperties(firstProp);
+    assertTypeProperties(firstProperties, 0);
+
+    TypedProperties secondProperties = new TypedProperties();
+    secondProperties.put("key3", "true");
+    secondProperties.put("key4", "false");
+    secondProperties.put("key5", "true");
+    assertTypeProperties(secondProperties, 3);
+
+    TypedProperties thirdProperties = new TypedProperties();
+    thirdProperties.putAll(firstProp);
+    thirdProperties.putAll(secondProperties);
+
+    assertEquals(3, firstProp.stringPropertyNames().size());
+    assertEquals(3, secondProperties.stringPropertyNames().size());
+    assertEquals(6, thirdProperties.stringPropertyNames().size());
+  }
+
+  private void assertTypeProperties(TypedProperties typedProperties, int start) {
+    String[] props = typedProperties.stringPropertyNames().toArray(new String[0]);
+    for (int i = start; i < props.length; i++) {
+      assertEquals(String.format("key%d", i), props[i]);
+    }
+  }
 }
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/TestHoodieTableConfig.java b/hudi-common/src/test/java/org/apache/hudi/common/table/TestHoodieTableConfig.java
index 73d101cf2c71f..f21d8e6dc37e5 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/table/TestHoodieTableConfig.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/table/TestHoodieTableConfig.java
@@ -64,7 +64,7 @@ public void setUp() throws Exception {
   public void testCreate() throws IOException {
     assertTrue(fs.exists(new Path(metaPath, HoodieTableConfig.HOODIE_PROPERTIES_FILE)));
     HoodieTableConfig config = new HoodieTableConfig(fs, metaPath.toString(), null);
-    assertEquals(4, config.getProps().size());
+    assertEquals(5, config.getProps().size());
   }
 
   @Test
@@ -77,7 +77,7 @@ public void testUpdate() throws IOException {
     assertTrue(fs.exists(cfgPath));
     assertFalse(fs.exists(backupCfgPath));
     HoodieTableConfig config = new HoodieTableConfig(fs, metaPath.toString(), null);
-    assertEquals(5, config.getProps().size());
+    assertEquals(6, config.getProps().size());
     assertEquals("test-table2", config.getTableName());
     assertEquals("new_field", config.getPreCombineField());
   }
@@ -90,7 +90,7 @@ public void testDelete() throws IOException {
     assertTrue(fs.exists(cfgPath));
     assertFalse(fs.exists(backupCfgPath));
     HoodieTableConfig config = new HoodieTableConfig(fs, metaPath.toString(), null);
-    assertEquals(3, config.getProps().size());
+    assertEquals(4, config.getProps().size());
     assertNull(config.getProps().getProperty("hoodie.invalid.config"));
     assertFalse(config.getProps().contains(HoodieTableConfig.ARCHIVELOG_FOLDER.key()));
   }
@@ -114,7 +114,7 @@ public void testReadsWithUpdateFailures() throws IOException {
     assertFalse(fs.exists(cfgPath));
     assertTrue(fs.exists(backupCfgPath));
     config = new HoodieTableConfig(fs, metaPath.toString(), null);
-    assertEquals(4, config.getProps().size());
+    assertEquals(5, config.getProps().size());
   }
 
   @ParameterizedTest
@@ -132,6 +132,6 @@ public void testUpdateRecovery(boolean shouldPropsFileExist) throws IOException
     assertTrue(fs.exists(cfgPath));
     assertFalse(fs.exists(backupCfgPath));
     config = new HoodieTableConfig(fs, metaPath.toString(), null);
-    assertEquals(4, config.getProps().size());
+    assertEquals(5, config.getProps().size());
   }
 }
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/TestHoodieTableMetaClient.java b/hudi-common/src/test/java/org/apache/hudi/common/table/TestHoodieTableMetaClient.java
index 586a451065823..840e6ddf4ad3f 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/table/TestHoodieTableMetaClient.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/table/TestHoodieTableMetaClient.java
@@ -54,6 +54,8 @@ public void checkMetadata() {
     assertEquals(basePath, metaClient.getBasePath(), "Basepath should be the one assigned");
     assertEquals(basePath + "/.hoodie", metaClient.getMetaPath(),
         "Metapath should be ${basepath}/.hoodie");
+    assertTrue(metaClient.getTableConfig().getProps().containsKey(HoodieTableConfig.TABLE_CHECKSUM.key()));
+    assertTrue(HoodieTableConfig.validateChecksum(metaClient.getTableConfig().getProps()));
   }
 
   @Test
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/TestTimelineUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/table/TestTimelineUtils.java
index 9397295013ea1..22ceb5bfef373 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/table/TestTimelineUtils.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/table/TestTimelineUtils.java
@@ -270,8 +270,7 @@ private HoodieRollbackMetadata getRollbackMetadataInstance(String basePath, Stri
     List<HoodieInstant> rollbacks = new ArrayList<>();
     rollbacks.add(new HoodieInstant(false, actionType, commitTs));
 
-    HoodieRollbackStat rollbackStat = new HoodieRollbackStat(partition, deletedFiles, Collections.emptyList(), Collections.emptyMap(),
-        Collections.EMPTY_MAP);
+    HoodieRollbackStat rollbackStat = new HoodieRollbackStat(partition, deletedFiles, Collections.emptyList(), Collections.emptyMap());
     List<HoodieRollbackStat> rollbackStats = new ArrayList<>();
     rollbackStats.add(rollbackStat);
     return TimelineMetadataUtils.convertRollbackMetadata(commitTs, Option.empty(), rollbacks, rollbackStats);
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/timeline/TestHoodieActiveTimeline.java b/hudi-common/src/test/java/org/apache/hudi/common/table/timeline/TestHoodieActiveTimeline.java
index 9d89c2a6b5feb..576cfd7cb0f3f 100755
--- a/hudi-common/src/test/java/org/apache/hudi/common/table/timeline/TestHoodieActiveTimeline.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/table/timeline/TestHoodieActiveTimeline.java
@@ -126,6 +126,7 @@ public void testLoadingInstantsFromFiles() throws IOException {
     HoodieActiveTimeline oldTimeline = new HoodieActiveTimeline(
         HoodieTableMetaClient.builder().setConf(metaClient.getHadoopConf()).setBasePath(metaClient.getBasePath())
             .setLoadActiveTimelineOnLoad(true).setConsistencyGuardConfig(metaClient.getConsistencyGuardConfig())
+            .setFileSystemRetryConfig(metaClient.getFileSystemRetryConfig())
             .setLayoutVersion(Option.of(new TimelineLayoutVersion(VERSION_0))).build());
     // Old Timeline writes both to aux and timeline folder
     oldTimeline.saveToCompactionRequested(instant6, Option.of(dummy));
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFileSystemView.java b/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFileSystemView.java
index 924c6724e7b22..54bc138fc8f84 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFileSystemView.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestHoodieTableFileSystemView.java
@@ -31,6 +31,7 @@
 import org.apache.hudi.common.bootstrap.index.BootstrapIndex.IndexWriter;
 import org.apache.hudi.common.bootstrap.index.HFileBootstrapIndex;
 import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.fs.HoodieWrapperFileSystem;
 import org.apache.hudi.common.model.BaseFile;
 import org.apache.hudi.common.model.BootstrapFileMapping;
 import org.apache.hudi.common.model.CompactionOperation;
@@ -41,6 +42,7 @@
 import org.apache.hudi.common.model.HoodieFileGroupId;
 import org.apache.hudi.common.model.HoodieLogFile;
 import org.apache.hudi.common.model.HoodieTableType;
+import org.apache.hudi.common.model.HoodieWriteStat;
 import org.apache.hudi.common.model.WriteOperationType;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
@@ -50,12 +52,15 @@
 import org.apache.hudi.common.table.view.TableFileSystemView.BaseFileOnlyView;
 import org.apache.hudi.common.table.view.TableFileSystemView.SliceView;
 import org.apache.hudi.common.testutils.HoodieCommonTestHarness;
+import org.apache.hudi.common.testutils.HoodieTestTable;
 import org.apache.hudi.common.testutils.HoodieTestUtils;
 import org.apache.hudi.common.util.ClusteringUtils;
 import org.apache.hudi.common.util.CommitUtils;
 import org.apache.hudi.common.util.CompactionUtils;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.collection.ImmutablePair;
 import org.apache.hudi.common.util.collection.Pair;
+
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.junit.jupiter.api.BeforeEach;
@@ -1537,6 +1542,234 @@ public void testPendingClusteringOperations() throws IOException {
     assertFalse(fileIds.contains(fileId3));
   }
 
+  /**
+   *
+   * create hoodie table like
+   * .
+   * ├── .hoodie
+   * │   ├── .aux
+   * │   │   └── .bootstrap
+   * │   │       ├── .fileids
+   * │   │       └── .partitions
+   * │   ├── .temp
+   * │   ├── 1.commit
+   * │   ├── 1.commit.requested
+   * │   ├── 1.inflight
+   * │   ├── 2.replacecommit
+   * │   ├── 2.replacecommit.inflight
+   * │   ├── 2.replacecommit.requested
+   * │   ├── 3.commit
+   * │   ├── 3.commit.requested
+   * │   ├── 3.inflight
+   * │   ├── archived
+   * │   └── hoodie.properties
+   * └── 2020
+   *     └── 06
+   *         └── 27
+   *             ├── 5fe477d2-0150-46d4-833c-1e9cc8da9948_1-0-1_3.parquet
+   *             ├── 7e3208c8-fdec-4254-9682-8fff1e51ee8d_1-0-1_2.parquet
+   *             ├── e04b0e2d-1467-46b2-8ea6-f4fe950965a5_1-0-1_1.parquet
+   *             └── f3936b66-b3db-4fc8-a6d0-b1a7559016e6_1-0-1_1.parquet
+   *
+   * First test fsView API with finished clustering:
+   *  1. getLatestBaseFilesBeforeOrOn
+   *  2. getBaseFileOn
+   *  3. getLatestBaseFilesInRange
+   *  4. getAllBaseFiles
+   *  5. getLatestBaseFiles
+   *
+   * Then remove 2.replacecommit, 1.commit, 1.commit.requested, 1.inflight to simulate
+   * pending clustering at the earliest position in the active timeline and test these APIs again.
+   *
+   * @throws IOException
+   */
+  @Test
+  public void testHoodieTableFileSystemViewWithPendingClustering() throws IOException {
+    List<String> latestBaseFilesBeforeOrOn;
+    Option<HoodieBaseFile> baseFileOn;
+    List<String> latestBaseFilesInRange;
+    List<String> allBaseFiles;
+    List<String> latestBaseFiles;
+    List<String> latestBaseFilesPerPartition;
+    String partitionPath = "2020/06/27";
+    new File(basePath + "/" + partitionPath).mkdirs();
+    HoodieActiveTimeline commitTimeline = metaClient.getActiveTimeline();
+
+    // will create 5 fileId in partition.
+    // fileId1 and fileId2 will be replaced by fileID3
+    // fileId4 and fileId5 will be committed after clustering finished.
+    String fileId1 = UUID.randomUUID().toString();
+    String fileId2 = UUID.randomUUID().toString();
+    String fileId3 = UUID.randomUUID().toString();
+    String fileId4 = UUID.randomUUID().toString();
+    String fileId5 = UUID.randomUUID().toString();
+
+    assertFalse(roView.getLatestBaseFiles(partitionPath)
+            .anyMatch(dfile -> dfile.getFileId().equals(fileId1)
+                || dfile.getFileId().equals(fileId2)
+                || dfile.getFileId().equals(fileId3)
+                || dfile.getFileId().equals(fileId4)
+                || dfile.getFileId().equals(fileId5)),
+        "No commit, should not find any data file");
+
+    // first insert commit
+    String commitTime1 = "1";
+    String fileName1 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId1);
+    String fileName2 = FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, fileId2);
+    new File(basePath + "/" + partitionPath + "/" + fileName1).createNewFile();
+    new File(basePath + "/" + partitionPath + "/" + fileName2).createNewFile();
+
+    HoodieInstant instant1 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, commitTime1);
+
+    // build writeStats
+    HashMap<String, List<String>> partitionToFile1 = new HashMap<>();
+    ArrayList<String> files1 = new ArrayList<>();
+    files1.add(fileId1);
+    files1.add(fileId2);
+    partitionToFile1.put(partitionPath, files1);
+    List<HoodieWriteStat> writeStats1 = buildWriteStats(partitionToFile1, commitTime1);
+
+    HoodieCommitMetadata commitMetadata1 =
+        CommitUtils.buildMetadata(writeStats1, new HashMap<>(), Option.empty(), WriteOperationType.INSERT, "", HoodieTimeline.COMMIT_ACTION);
+    saveAsComplete(commitTimeline, instant1, Option.of(commitMetadata1.toJsonString().getBytes(StandardCharsets.UTF_8)));
+    commitTimeline.reload();
+
+    // replace commit
+    String commitTime2 = "2";
+    String fileName3 = FSUtils.makeDataFileName(commitTime2, TEST_WRITE_TOKEN, fileId3);
+    new File(basePath + "/" + partitionPath + "/" + fileName3).createNewFile();
+
+    HoodieInstant instant2 = new HoodieInstant(true, HoodieTimeline.REPLACE_COMMIT_ACTION, commitTime2);
+    Map<String, List<String>> partitionToReplaceFileIds = new HashMap<>();
+    List<String> replacedFileIds = new ArrayList<>();
+    replacedFileIds.add(fileId1);
+    replacedFileIds.add(fileId2);
+    partitionToReplaceFileIds.put(partitionPath, replacedFileIds);
+
+    HashMap<String, List<String>> partitionToFile2 = new HashMap<>();
+    ArrayList<String> files2 = new ArrayList<>();
+    files2.add(fileId3);
+    partitionToFile2.put(partitionPath, files2);
+    List<HoodieWriteStat> writeStats2 = buildWriteStats(partitionToFile2, commitTime2);
+
+    HoodieCommitMetadata commitMetadata2 =
+        CommitUtils.buildMetadata(writeStats2, partitionToReplaceFileIds, Option.empty(), WriteOperationType.INSERT_OVERWRITE, "", HoodieTimeline.REPLACE_COMMIT_ACTION);
+    saveAsComplete(commitTimeline, instant2, Option.of(commitMetadata2.toJsonString().getBytes(StandardCharsets.UTF_8)));
+
+    // another insert commit
+    String commitTime3 = "3";
+    String fileName4 = FSUtils.makeDataFileName(commitTime3, TEST_WRITE_TOKEN, fileId4);
+    new File(basePath + "/" + partitionPath + "/" + fileName4).createNewFile();
+    HoodieInstant instant3 = new HoodieInstant(true, HoodieTimeline.COMMIT_ACTION, commitTime3);
+
+    // build writeStats
+    HashMap<String, List<String>> partitionToFile3 = new HashMap<>();
+    ArrayList<String> files3 = new ArrayList<>();
+    files3.add(fileId4);
+    partitionToFile3.put(partitionPath, files3);
+    List<HoodieWriteStat> writeStats3 = buildWriteStats(partitionToFile3, commitTime3);
+    HoodieCommitMetadata commitMetadata3 =
+        CommitUtils.buildMetadata(writeStats3, new HashMap<>(), Option.empty(), WriteOperationType.INSERT, "", HoodieTimeline.COMMIT_ACTION);
+    saveAsComplete(commitTimeline, instant3, Option.of(commitMetadata3.toJsonString().getBytes(StandardCharsets.UTF_8)));
+
+    metaClient.reloadActiveTimeline();
+    refreshFsView();
+
+    ArrayList<String> commits = new ArrayList<>();
+    commits.add(commitTime1);
+    commits.add(commitTime2);
+    commits.add(commitTime3);
+
+    // do check
+    latestBaseFilesBeforeOrOn = fsView.getLatestBaseFilesBeforeOrOn(partitionPath, commitTime3).map(HoodieBaseFile::getFileId).collect(Collectors.toList());
+    assertEquals(2, latestBaseFilesBeforeOrOn.size());
+    assertTrue(latestBaseFilesBeforeOrOn.contains(fileId3));
+    assertTrue(latestBaseFilesBeforeOrOn.contains(fileId4));
+
+    // could see fileId3 because clustering is committed.
+    baseFileOn = fsView.getBaseFileOn(partitionPath, commitTime2, fileId3);
+    assertTrue(baseFileOn.isPresent());
+    assertEquals(baseFileOn.get().getFileId(), fileId3);
+
+    latestBaseFilesInRange = fsView.getLatestBaseFilesInRange(commits).map(HoodieBaseFile::getFileId).collect(Collectors.toList());
+    assertEquals(2, latestBaseFilesInRange.size());
+    assertTrue(latestBaseFilesInRange.contains(fileId3));
+    assertTrue(latestBaseFilesInRange.contains(fileId4));
+
+    allBaseFiles = fsView.getAllBaseFiles(partitionPath).map(HoodieBaseFile::getFileId).collect(Collectors.toList());
+    assertEquals(2, allBaseFiles.size());
+    assertTrue(allBaseFiles.contains(fileId3));
+    assertTrue(allBaseFiles.contains(fileId4));
+
+    // could see fileId3 because clustering is committed.
+    latestBaseFiles = fsView.getLatestBaseFiles().map(HoodieBaseFile::getFileId).collect(Collectors.toList());
+    assertEquals(2, latestBaseFiles.size());
+    assertTrue(allBaseFiles.contains(fileId3));
+    assertTrue(allBaseFiles.contains(fileId4));
+
+    // could see fileId3 because clustering is committed.
+    latestBaseFilesPerPartition = fsView.getLatestBaseFiles(partitionPath).map(HoodieBaseFile::getFileId).collect(Collectors.toList());
+    assertEquals(2, latestBaseFiles.size());
+    assertTrue(latestBaseFilesPerPartition.contains(fileId3));
+    assertTrue(latestBaseFilesPerPartition.contains(fileId4));
+
+    HoodieWrapperFileSystem fs = metaClient.getFs();
+    fs.delete(new Path(basePath + "/.hoodie", "1.commit"), false);
+    fs.delete(new Path(basePath + "/.hoodie", "1.inflight"), false);
+    fs.delete(new Path(basePath + "/.hoodie", "1.commit.requested"), false);
+    fs.delete(new Path(basePath + "/.hoodie", "2.replacecommit"), false);
+
+    metaClient.reloadActiveTimeline();
+    refreshFsView();
+    // do check after delete some commit file
+    latestBaseFilesBeforeOrOn = fsView.getLatestBaseFilesBeforeOrOn(partitionPath, commitTime3).map(HoodieBaseFile::getFileId).collect(Collectors.toList());
+    assertEquals(3, latestBaseFilesBeforeOrOn.size());
+    assertTrue(latestBaseFilesBeforeOrOn.contains(fileId1));
+    assertTrue(latestBaseFilesBeforeOrOn.contains(fileId2));
+    assertTrue(latestBaseFilesBeforeOrOn.contains(fileId4));
+
+    // couldn't see fileId3 because clustering is not committed.
+    baseFileOn = fsView.getBaseFileOn(partitionPath, commitTime2, fileId3);
+    assertFalse(baseFileOn.isPresent());
+
+    latestBaseFilesInRange = fsView.getLatestBaseFilesInRange(commits).map(HoodieBaseFile::getFileId).collect(Collectors.toList());
+    assertEquals(3, latestBaseFilesInRange.size());
+    assertTrue(latestBaseFilesInRange.contains(fileId1));
+    assertTrue(latestBaseFilesInRange.contains(fileId2));
+    assertTrue(latestBaseFilesInRange.contains(fileId4));
+
+    allBaseFiles = fsView.getAllBaseFiles(partitionPath).map(HoodieBaseFile::getFileId).collect(Collectors.toList());
+    assertEquals(3, allBaseFiles.size());
+    assertTrue(allBaseFiles.contains(fileId1));
+    assertTrue(allBaseFiles.contains(fileId2));
+    assertTrue(allBaseFiles.contains(fileId4));
+
+    // couldn't see fileId3 because clustering is not committed.
+    latestBaseFiles = fsView.getLatestBaseFiles().map(HoodieBaseFile::getFileId).collect(Collectors.toList());
+    assertEquals(3, latestBaseFiles.size());
+    assertTrue(allBaseFiles.contains(fileId1));
+    assertTrue(allBaseFiles.contains(fileId2));
+    assertTrue(allBaseFiles.contains(fileId4));
+
+    // couldn't see fileId3 because clustering is not committed.
+    latestBaseFilesPerPartition = fsView.getLatestBaseFiles(partitionPath).map(HoodieBaseFile::getFileId).collect(Collectors.toList());
+    assertEquals(3, latestBaseFiles.size());
+    assertTrue(latestBaseFilesPerPartition.contains(fileId1));
+    assertTrue(latestBaseFilesPerPartition.contains(fileId2));
+    assertTrue(latestBaseFilesPerPartition.contains(fileId4));
+  }
+
+
+  // Generate Hoodie WriteStat For Given Partition
+  private List<HoodieWriteStat> buildWriteStats(HashMap<String, List<String>> partitionToFileIds, String commitTime) {
+    HashMap<String, List<Pair<String, Integer>>> maps = new HashMap<>();
+    for (String partition : partitionToFileIds.keySet()) {
+      List<Pair<String, Integer>> list = partitionToFileIds.get(partition).stream().map(fileId -> new ImmutablePair<String, Integer>(fileId, 0)).collect(Collectors.toList());
+      maps.put(partition, list);
+    }
+    return HoodieTestTable.generateHoodieWriteStatForPartition(maps, commitTime, false);
+  }
+
   @Override
   protected HoodieTableType getTableType() {
     return HoodieTableType.MERGE_ON_READ;
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestIncrementalFSViewSync.java b/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestIncrementalFSViewSync.java
index 0bcebaf71e9ff..a9c9db303f328 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestIncrementalFSViewSync.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/table/view/TestIncrementalFSViewSync.java
@@ -556,7 +556,7 @@ private void performRestore(HoodieInstant instant, List<String> files, String ro
       boolean isRestore) throws IOException {
     Map<String, List<String>> partititonToFiles = deleteFiles(files);
     List<HoodieRollbackStat> rollbackStats = partititonToFiles.entrySet().stream().map(e ->
-        new HoodieRollbackStat(e.getKey(), e.getValue(), new ArrayList<>(), new HashMap<>(), new HashMap<>())
+        new HoodieRollbackStat(e.getKey(), e.getValue(), new ArrayList<>(), new HashMap<>())
     ).collect(Collectors.toList());
 
     List<HoodieInstant> rollbacks = new ArrayList<>();
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/FixtureUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/FixtureUtils.java
deleted file mode 100644
index 6dfe0da797f8e..0000000000000
--- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/FixtureUtils.java
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hudi.common.testutils;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.net.URL;
-import java.nio.file.Path;
-import java.util.Objects;
-import java.util.zip.ZipEntry;
-import java.util.zip.ZipInputStream;
-
-public final class FixtureUtils {
-
-  public static Path prepareFixtureTable(URL fixtureResource, Path basePath) throws IOException {
-    File zippedFixtureTable = new File(fixtureResource.getFile());
-    try (ZipInputStream zis = new ZipInputStream(new FileInputStream(zippedFixtureTable))) {
-      byte[] buffer = new byte[1024];
-      ZipEntry zipEntry = zis.getNextEntry();
-      Path tableBasePath = basePath.resolve(Objects.requireNonNull(zipEntry).getName()
-          .replaceAll(File.separator + "$", ""));
-      while (zipEntry != null) {
-        File newFile = newFile(basePath.toFile(), zipEntry);
-        if (zipEntry.isDirectory()) {
-          if (!newFile.isDirectory() && !newFile.mkdirs()) {
-            throw new IOException("Failed to create directory " + newFile);
-          }
-        } else {
-          // fix for Windows-created archives
-          File parent = newFile.getParentFile();
-          if (!parent.isDirectory() && !parent.mkdirs()) {
-            throw new IOException("Failed to create directory " + parent);
-          }
-
-          // write file content
-          try (FileOutputStream fos = new FileOutputStream(newFile)) {
-            int len;
-            while ((len = zis.read(buffer)) > 0) {
-              fos.write(buffer, 0, len);
-            }
-          }
-        }
-        zipEntry = zis.getNextEntry();
-      }
-      zis.closeEntry();
-      return tableBasePath;
-    }
-  }
-
-  public static File newFile(File destinationDir, ZipEntry zipEntry) throws IOException {
-    File destFile = new File(destinationDir, zipEntry.getName());
-
-    String destDirPath = destinationDir.getCanonicalPath();
-    String destFilePath = destFile.getCanonicalPath();
-
-    if (!destFilePath.startsWith(destDirPath + File.separator)) {
-      throw new IOException("Entry is outside of the target dir: " + zipEntry.getName());
-    }
-
-    return destFile;
-  }
-}
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HadoopMapRedUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HadoopMapRedUtils.java
new file mode 100644
index 0000000000000..a06039b5fba35
--- /dev/null
+++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HadoopMapRedUtils.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.testutils;
+
+import org.apache.hadoop.mapred.Counters;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hudi.common.util.Option;
+
+import java.util.concurrent.ConcurrentHashMap;
+
+public class HadoopMapRedUtils {
+
+  /**
+   * Creates instance of {@link Reporter} to collect reported counters
+   */
+  public static Reporter createTestReporter() {
+    class TestReporter implements Reporter {
+      private final ConcurrentHashMap<String, Counters.Counter> counters =
+          new ConcurrentHashMap<>();
+
+      @Override
+      public void setStatus(String status) {
+        // not-supported
+      }
+
+      @Override
+      public Counters.Counter getCounter(Enum<?> name) {
+        return counters.computeIfAbsent(name.name(), (ignored) -> new Counters.Counter());
+      }
+
+      @Override
+      public Counters.Counter getCounter(String group, String name) {
+        return counters.computeIfAbsent(getKey(group, name), (ignored) -> new Counters.Counter());
+      }
+
+      @Override
+      public void incrCounter(Enum<?> key, long amount) {
+        Option.ofNullable(counters.get(key))
+            .ifPresent(c -> c.increment(amount));
+      }
+
+      @Override
+      public void incrCounter(String group, String counter, long amount) {
+        Option.ofNullable(counters.get(getKey(group, counter)))
+            .ifPresent(c -> c.increment(amount));
+      }
+
+      @Override
+      public InputSplit getInputSplit() throws UnsupportedOperationException {
+        throw new UnsupportedOperationException("not supported");
+      }
+
+      @Override
+      public float getProgress() {
+        return -1;
+      }
+
+      @Override
+      public void progress() {
+        // not-supported
+      }
+
+      private String getKey(String group, String name) {
+        return String.format("%s:%s", group, name);
+      }
+    }
+
+    return new TestReporter();
+  }
+}
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java
index 21816a56c2db2..3e147b7fdd47c 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java
@@ -19,10 +19,22 @@
 
 package org.apache.hudi.common.testutils;
 
+import org.apache.avro.Conversions;
+import org.apache.avro.LogicalTypes;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericArray;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericFixed;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.avro.model.HoodieCompactionPlan;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.HoodieAvroPayload;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodiePartitionMetadata;
@@ -33,29 +45,22 @@
 import org.apache.hudi.common.table.timeline.TimelineMetadataUtils;
 import org.apache.hudi.common.util.AvroOrcUtils;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.HoodieIOException;
-
-import org.apache.avro.Conversions;
-import org.apache.avro.LogicalTypes;
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericArray;
-import org.apache.avro.generic.GenericData;
-import org.apache.avro.generic.GenericFixed;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.orc.TypeDescription;
 
 import java.io.IOException;
 import java.io.Serializable;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
 import java.math.BigDecimal;
 import java.nio.ByteBuffer;
 import java.nio.charset.StandardCharsets;
-import java.sql.Date;
+import java.time.Instant;
+import java.time.LocalDateTime;
+import java.time.ZoneOffset;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
@@ -139,7 +144,7 @@ public class HoodieTestDataGenerator implements AutoCloseable {
   public static final TypeDescription ORC_TRIP_SCHEMA = AvroOrcUtils.createOrcSchema(new Schema.Parser().parse(TRIP_SCHEMA));
   public static final Schema FLATTENED_AVRO_SCHEMA = new Schema.Parser().parse(TRIP_FLATTENED_SCHEMA);
 
-  private static final Random RAND = new Random(46474747);
+  private final Random rand;
 
   //Maintains all the existing keys schema wise
   private final Map<String, Map<Integer, KeyPartition>> existingKeysBySchema;
@@ -147,27 +152,58 @@ public class HoodieTestDataGenerator implements AutoCloseable {
   //maintains the count of existing keys schema wise
   private Map<String, Integer> numKeysBySchema;
 
+  public HoodieTestDataGenerator(long seed) {
+    this(seed, DEFAULT_PARTITION_PATHS, new HashMap<>());
+  }
+
+  public HoodieTestDataGenerator(long seed, String[] partitionPaths, Map<Integer, KeyPartition> keyPartitionMap) {
+    this.rand = new Random(seed);
+    this.partitionPaths = Arrays.copyOf(partitionPaths, partitionPaths.length);
+    this.existingKeysBySchema = new HashMap<>();
+    this.existingKeysBySchema.put(TRIP_EXAMPLE_SCHEMA, keyPartitionMap);
+    this.numKeysBySchema = new HashMap<>();
+    this.numKeysBySchema.put(TRIP_EXAMPLE_SCHEMA, keyPartitionMap.size());
+
+    logger.info(String.format("Test DataGenerator's seed (%s)", seed));
+  }
+
+  //////////////////////////////////////////////////////////////////////////////////
+  // DEPRECATED API
+  //////////////////////////////////////////////////////////////////////////////////
+
+  @Deprecated
   public HoodieTestDataGenerator(String[] partitionPaths) {
     this(partitionPaths, new HashMap<>());
   }
 
+  @Deprecated
   public HoodieTestDataGenerator() {
     this(DEFAULT_PARTITION_PATHS);
   }
 
+  @Deprecated
   public HoodieTestDataGenerator(String[] partitionPaths, Map<Integer, KeyPartition> keyPartitionMap) {
-    this.partitionPaths = Arrays.copyOf(partitionPaths, partitionPaths.length);
-    this.existingKeysBySchema = new HashMap<>();
-    existingKeysBySchema.put(TRIP_EXAMPLE_SCHEMA, keyPartitionMap);
-    numKeysBySchema = new HashMap<>();
-    numKeysBySchema.put(TRIP_EXAMPLE_SCHEMA, keyPartitionMap.size());
+    // NOTE: This used as a workaround to make sure that new instantiations of the generator
+    //       always return "new" random values.
+    //       Caveat is that if 2 successive invocations are made w/in the timespan that is smaller
+    //       than the resolution of {@code nanoTime}, then this will produce identical results
+    this(System.nanoTime(), partitionPaths, keyPartitionMap);
+  }
+
+  /**
+   * @deprecated please use non-static version
+   */
+  public static void writePartitionMetadataDeprecated(FileSystem fs, String[] partitionPaths, String basePath) {
+    new HoodieTestDataGenerator().writePartitionMetadata(fs, partitionPaths, basePath);
   }
 
+  //////////////////////////////////////////////////////////////////////////////////
+
   /**
    * @implNote {@link HoodieTestDataGenerator} is supposed to just generate records with schemas. Leave HoodieTable files (metafile, basefile, logfile, etc) to {@link HoodieTestTable}.
    * @deprecated Use {@link HoodieTestTable#withPartitionMetaFiles(java.lang.String...)} instead.
    */
-  public static void writePartitionMetadata(FileSystem fs, String[] partitionPaths, String basePath) {
+  public void writePartitionMetadata(FileSystem fs, String[] partitionPaths, String basePath) {
     for (String partitionPath : partitionPaths) {
       new HoodiePartitionMetadata(fs, "000", new Path(basePath), new Path(basePath, partitionPath)).trySave(0);
     }
@@ -197,7 +233,7 @@ public RawTripTestPayload generateRandomValueAsPerSchema(String schemaStr, Hoodi
    * @param instantTime Instant time to use.
    * @return Raw paylaod of a test record.
    */
-  public static RawTripTestPayload generateRandomValue(HoodieKey key, String instantTime) throws IOException {
+  public RawTripTestPayload generateRandomValue(HoodieKey key, String instantTime) throws IOException {
     return generateRandomValue(key, instantTime, false);
   }
 
@@ -211,12 +247,12 @@ public static RawTripTestPayload generateRandomValue(HoodieKey key, String insta
    * @return Raw paylaod of a test record.
    * @throws IOException
    */
-  public static RawTripTestPayload generateRandomValue(
+  private RawTripTestPayload generateRandomValue(
       HoodieKey key, String instantTime, boolean isFlattened) throws IOException {
     return generateRandomValue(key, instantTime, isFlattened, 0);
   }
 
-  public static RawTripTestPayload generateRandomValue(
+  private RawTripTestPayload generateRandomValue(
       HoodieKey key, String instantTime, boolean isFlattened, int ts) throws IOException {
     GenericRecord rec = generateGenericRecord(
         key.getRecordKey(), key.getPartitionPath(), "rider-" + instantTime, "driver-" + instantTime, ts,
@@ -240,7 +276,7 @@ public RawTripTestPayload generatePayloadForShortTripSchema(HoodieKey key, Strin
   /**
    * Generates a new avro record of the above schema format for a delete.
    */
-  public static RawTripTestPayload generateRandomDeleteValue(HoodieKey key, String instantTime) throws IOException {
+  private RawTripTestPayload generateRandomDeleteValue(HoodieKey key, String instantTime) throws IOException {
     GenericRecord rec = generateGenericRecord(key.getRecordKey(), key.getPartitionPath(), "rider-" + instantTime, "driver-" + instantTime, 0,
         true, false);
     return new RawTripTestPayload(Option.of(rec.toString()), key.getRecordKey(), key.getPartitionPath(), TRIP_EXAMPLE_SCHEMA, true, 0L);
@@ -249,17 +285,17 @@ public static RawTripTestPayload generateRandomDeleteValue(HoodieKey key, String
   /**
    * Generates a new avro record of the above schema format, retaining the key if optionally provided.
    */
-  public static HoodieAvroPayload generateAvroPayload(HoodieKey key, String instantTime) {
+  private HoodieAvroPayload generateAvroPayload(HoodieKey key, String instantTime) {
     GenericRecord rec = generateGenericRecord(key.getRecordKey(), key.getPartitionPath(), "rider-" + instantTime, "driver-" + instantTime, 0);
     return new HoodieAvroPayload(Option.of(rec));
   }
 
-  public static GenericRecord generateGenericRecord(String rowKey, String partitionPath, String riderName, String driverName,
-                                                    long timestamp) {
+  public GenericRecord generateGenericRecord(String rowKey, String partitionPath, String riderName, String driverName,
+                                             long timestamp) {
     return generateGenericRecord(rowKey, partitionPath, riderName, driverName, timestamp, false, false);
   }
 
-  public static GenericRecord generateGenericRecord(String rowKey, String partitionPath, String riderName, String driverName,
+  public GenericRecord generateGenericRecord(String rowKey, String partitionPath, String riderName, String driverName,
                                                     long timestamp, boolean isDeleteRecord,
                                                     boolean isFlattened) {
     GenericRecord rec = new GenericData.Record(isFlattened ? FLATTENED_AVRO_SCHEMA : AVRO_SCHEMA);
@@ -268,25 +304,25 @@ public static GenericRecord generateGenericRecord(String rowKey, String partitio
     rec.put("partition_path", partitionPath);
     rec.put("rider", riderName);
     rec.put("driver", driverName);
-    rec.put("begin_lat", RAND.nextDouble());
-    rec.put("begin_lon", RAND.nextDouble());
-    rec.put("end_lat", RAND.nextDouble());
-    rec.put("end_lon", RAND.nextDouble());
+    rec.put("begin_lat", rand.nextDouble());
+    rec.put("begin_lon", rand.nextDouble());
+    rec.put("end_lat", rand.nextDouble());
+    rec.put("end_lon", rand.nextDouble());
     if (isFlattened) {
-      rec.put("fare", RAND.nextDouble() * 100);
+      rec.put("fare", rand.nextDouble() * 100);
       rec.put("currency", "USD");
     } else {
-      rec.put("distance_in_meters", RAND.nextInt());
-      rec.put("seconds_since_epoch", RAND.nextLong());
-      rec.put("weight", RAND.nextFloat());
+      rec.put("distance_in_meters", rand.nextInt());
+      rec.put("seconds_since_epoch", rand.nextLong());
+      rec.put("weight", rand.nextFloat());
       byte[] bytes = "Canada".getBytes();
       rec.put("nation", ByteBuffer.wrap(bytes));
-      long currentTimeMillis = System.currentTimeMillis();
-      Date date = new Date(currentTimeMillis);
-      rec.put("current_date", (int) date.toLocalDate().toEpochDay());
-      rec.put("current_ts", currentTimeMillis);
+      long randomMillis = genRandomTimeMillis(rand);
+      Instant instant = Instant.ofEpochMilli(randomMillis);
+      rec.put("current_date", (int) LocalDateTime.ofInstant(instant, ZoneOffset.UTC).toLocalDate().toEpochDay());
+      rec.put("current_ts", randomMillis);
 
-      BigDecimal bigDecimal = new BigDecimal(String.format("%5f", RAND.nextFloat()));
+      BigDecimal bigDecimal = new BigDecimal(String.format("%5f", rand.nextFloat()));
       Schema decimalSchema = AVRO_SCHEMA.getField("height").schema();
       Conversions.DecimalConversion decimalConversions = new Conversions.DecimalConversion();
       GenericFixed genericFixed = decimalConversions.toFixed(bigDecimal, decimalSchema, LogicalTypes.decimal(10, 6));
@@ -295,14 +331,14 @@ public static GenericRecord generateGenericRecord(String rowKey, String partitio
       rec.put("city_to_state", Collections.singletonMap("LA", "CA"));
 
       GenericRecord fareRecord = new GenericData.Record(AVRO_SCHEMA.getField("fare").schema());
-      fareRecord.put("amount", RAND.nextDouble() * 100);
+      fareRecord.put("amount", rand.nextDouble() * 100);
       fareRecord.put("currency", "USD");
       rec.put("fare", fareRecord);
 
       GenericArray<GenericRecord> tipHistoryArray = new GenericData.Array<>(1, AVRO_SCHEMA.getField("tip_history").schema());
       Schema tipSchema = new Schema.Parser().parse(AVRO_SCHEMA.getField("tip_history").schema().toString()).getElementType();
       GenericRecord tipRecord = new GenericData.Record(tipSchema);
-      tipRecord.put("amount", RAND.nextDouble() * 100);
+      tipRecord.put("amount", rand.nextDouble() * 100);
       tipRecord.put("currency", "USD");
       tipHistoryArray.add(tipRecord);
       rec.put("tip_history", tipHistoryArray);
@@ -325,7 +361,7 @@ public GenericRecord generateRecordForTripSchema(String rowKey, String riderName
     rec.put("timestamp", timestamp);
     rec.put("rider", riderName);
     rec.put("driver", driverName);
-    rec.put("fare", RAND.nextDouble() * 100);
+    rec.put("fare", rand.nextDouble() * 100);
     rec.put("_hoodie_is_deleted", false);
     return rec;
   }
@@ -336,7 +372,7 @@ public GenericRecord generateRecordForShortTripSchema(String rowKey, String ride
     rec.put("timestamp", timestamp);
     rec.put("rider", riderName);
     rec.put("driver", driverName);
-    rec.put("fare", RAND.nextDouble() * 100);
+    rec.put("fare", rand.nextDouble() * 100);
     rec.put("_hoodie_is_deleted", false);
     return rec;
   }
@@ -346,7 +382,7 @@ public static void createCommitFile(String basePath, String instantTime, Configu
     createCommitFile(basePath, instantTime, configuration, commitMetadata);
   }
 
-  public static void createCommitFile(String basePath, String instantTime, Configuration configuration, HoodieCommitMetadata commitMetadata) {
+  private static void createCommitFile(String basePath, String instantTime, Configuration configuration, HoodieCommitMetadata commitMetadata) {
     Arrays.asList(HoodieTimeline.makeCommitFileName(instantTime), HoodieTimeline.makeInflightCommitFileName(instantTime),
         HoodieTimeline.makeRequestedCommitFileName(instantTime))
         .forEach(f -> createMetadataFile(f, basePath, configuration, commitMetadata));
@@ -382,13 +418,7 @@ private static void createMetadataFile(String f, String basePath, Configuration
     }
   }
 
-  public static void createReplaceFile(String basePath, String instantTime, Configuration configuration, HoodieCommitMetadata commitMetadata) {
-    Arrays.asList(HoodieTimeline.makeReplaceFileName(instantTime), HoodieTimeline.makeInflightReplaceFileName(instantTime),
-        HoodieTimeline.makeRequestedReplaceFileName(instantTime))
-        .forEach(f -> createMetadataFile(f, basePath, configuration, commitMetadata));
-  }
-
-  public static void createPendingReplaceFile(String basePath, String instantTime, Configuration configuration, HoodieCommitMetadata commitMetadata) {
+  private static void createPendingReplaceFile(String basePath, String instantTime, Configuration configuration, HoodieCommitMetadata commitMetadata) {
     Arrays.asList(HoodieTimeline.makeInflightReplaceFileName(instantTime),
         HoodieTimeline.makeRequestedReplaceFileName(instantTime))
         .forEach(f -> createMetadataFile(f, basePath, configuration, commitMetadata));
@@ -406,13 +436,6 @@ public static void createEmptyCleanRequestedFile(String basePath, String instant
     createEmptyFile(basePath, commitFile, configuration);
   }
 
-  public static void createCompactionRequestedFile(String basePath, String instantTime, Configuration configuration)
-      throws IOException {
-    Path commitFile = new Path(basePath + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
-        + HoodieTimeline.makeRequestedCompactionFileName(instantTime));
-    createEmptyFile(basePath, commitFile, configuration);
-  }
-
   private static void createEmptyFile(String basePath, Path filePath, Configuration configuration) throws IOException {
     FileSystem fs = FSUtils.getFs(basePath, configuration);
     FSDataOutputStream os = fs.create(filePath, true);
@@ -483,13 +506,13 @@ public List<HoodieRecord> generateInsertsContainsAllPartitions(String instantTim
   }
 
   public List<HoodieRecord> generateInsertsForPartition(String instantTime, Integer n, String partition) {
-    return generateInsertsStream(instantTime,  n, false, TRIP_EXAMPLE_SCHEMA, false, () -> partition, () -> UUID.randomUUID().toString()).collect(Collectors.toList());
+    return generateInsertsStream(instantTime,  n, false, TRIP_EXAMPLE_SCHEMA, false, () -> partition, () -> genPseudoRandomUUID(rand).toString()).collect(Collectors.toList());
   }
 
   public Stream<HoodieRecord> generateInsertsStream(String commitTime, Integer n, boolean isFlattened, String schemaStr, boolean containsAllPartitions) {
     return generateInsertsStream(commitTime, n, isFlattened, schemaStr, containsAllPartitions,
-        () -> partitionPaths[RAND.nextInt(partitionPaths.length)],
-        () -> UUID.randomUUID().toString());
+        () -> partitionPaths[rand.nextInt(partitionPaths.length)],
+        () -> genPseudoRandomUUID(rand).toString());
   }
 
   /**
@@ -510,7 +533,7 @@ public Stream<HoodieRecord> generateInsertsStream(String instantTime, Integer n,
       populateKeysBySchema(schemaStr, currSize + i, kp);
       incrementNumExistingKeysBySchema(schemaStr);
       try {
-        return new HoodieRecord(key, generateRandomValueAsPerSchema(schemaStr, key, instantTime, isFlattened));
+        return new HoodieAvroRecord(key, generateRandomValueAsPerSchema(schemaStr, key, instantTime, isFlattened));
       } catch (IOException e) {
         throw new HoodieIOException(e.getMessage(), e);
       }
@@ -541,7 +564,7 @@ public List<HoodieRecord> generateSameKeyInserts(String instantTime, List<Hoodie
     List<HoodieRecord> copy = new ArrayList<>();
     for (HoodieRecord r : origin) {
       HoodieKey key = r.getKey();
-      HoodieRecord record = new HoodieRecord(key, generateRandomValue(key, instantTime));
+      HoodieRecord record = new HoodieAvroRecord(key, generateRandomValue(key, instantTime));
       copy.add(record);
     }
     return copy;
@@ -551,9 +574,9 @@ public List<HoodieRecord> generateInsertsWithHoodieAvroPayload(String instantTim
     List<HoodieRecord> inserts = new ArrayList<>();
     int currSize = getNumExistingKeys(TRIP_EXAMPLE_SCHEMA);
     for (int i = 0; i < limit; i++) {
-      String partitionPath = partitionPaths[RAND.nextInt(partitionPaths.length)];
-      HoodieKey key = new HoodieKey(UUID.randomUUID().toString(), partitionPath);
-      HoodieRecord record = new HoodieRecord(key, generateAvroPayload(key, instantTime));
+      String partitionPath = partitionPaths[rand.nextInt(partitionPaths.length)];
+      HoodieKey key = new HoodieKey(genPseudoRandomUUID(rand).toString(), partitionPath);
+      HoodieRecord record = new HoodieAvroRecord(key, generateAvroPayload(key, instantTime));
       inserts.add(record);
 
       KeyPartition kp = new KeyPartition();
@@ -568,7 +591,7 @@ public List<HoodieRecord> generateInsertsWithHoodieAvroPayload(String instantTim
   public List<HoodieRecord> generateUpdatesWithHoodieAvroPayload(String instantTime, List<HoodieRecord> baseRecords) {
     List<HoodieRecord> updates = new ArrayList<>();
     for (HoodieRecord baseRecord : baseRecords) {
-      HoodieRecord record = new HoodieRecord(baseRecord.getKey(), generateAvroPayload(baseRecord.getKey(), instantTime));
+      HoodieRecord record = new HoodieAvroRecord(baseRecord.getKey(), generateAvroPayload(baseRecord.getKey(), instantTime));
       updates.add(record);
     }
     return updates;
@@ -596,11 +619,11 @@ public HoodieRecord generateDeleteRecord(HoodieRecord existingRecord) throws IOE
   public HoodieRecord generateDeleteRecord(HoodieKey key) throws IOException {
     RawTripTestPayload payload =
         new RawTripTestPayload(Option.empty(), key.getRecordKey(), key.getPartitionPath(), null, true, 0L);
-    return new HoodieRecord(key, payload);
+    return new HoodieAvroRecord(key, payload);
   }
 
   public HoodieRecord generateUpdateRecord(HoodieKey key, String instantTime) throws IOException {
-    return new HoodieRecord(key, generateRandomValue(key, instantTime));
+    return new HoodieAvroRecord(key, generateRandomValue(key, instantTime));
   }
 
   public List<HoodieRecord> generateUpdates(String instantTime, List<HoodieRecord> baseRecords) throws IOException {
@@ -615,7 +638,7 @@ public List<HoodieRecord> generateUpdates(String instantTime, List<HoodieRecord>
   public List<HoodieRecord> generateUpdatesWithTS(String instantTime, List<HoodieRecord> baseRecords, int ts) throws IOException {
     List<HoodieRecord> updates = new ArrayList<>();
     for (HoodieRecord baseRecord : baseRecords) {
-      HoodieRecord record = new HoodieRecord(baseRecord.getKey(),
+      HoodieRecord record = new HoodieAvroRecord(baseRecord.getKey(),
           generateRandomValue(baseRecord.getKey(), instantTime, false, ts));
       updates.add(record);
     }
@@ -653,7 +676,7 @@ public List<HoodieRecord> generateUpdates(String instantTime, Integer n) throws
     for (int i = 0; i < n; i++) {
       Map<Integer, KeyPartition> existingKeys = existingKeysBySchema.get(TRIP_EXAMPLE_SCHEMA);
       Integer numExistingKeys = numKeysBySchema.get(TRIP_EXAMPLE_SCHEMA);
-      KeyPartition kp = existingKeys.get(RAND.nextInt(numExistingKeys - 1));
+      KeyPartition kp = existingKeys.get(rand.nextInt(numExistingKeys - 1));
       HoodieRecord record = generateUpdateRecord(kp.key, instantTime);
       updates.add(record);
     }
@@ -725,7 +748,7 @@ public Stream<HoodieRecord> generateUniqueUpdatesStream(String instantTime, Inte
     }
 
     return IntStream.range(0, n).boxed().map(i -> {
-      int index = numExistingKeys == 1 ? 0 : RAND.nextInt(numExistingKeys - 1);
+      int index = numExistingKeys == 1 ? 0 : rand.nextInt(numExistingKeys - 1);
       KeyPartition kp = existingKeys.get(index);
       // Find the available keyPartition starting from randomly chosen one.
       while (used.contains(kp)) {
@@ -735,7 +758,7 @@ public Stream<HoodieRecord> generateUniqueUpdatesStream(String instantTime, Inte
       logger.debug("key getting updated: " + kp.key.getRecordKey());
       used.add(kp);
       try {
-        return new HoodieRecord(kp.key, generateRandomValueAsPerSchema(schemaStr, kp.key, instantTime, false));
+        return new HoodieAvroRecord(kp.key, generateRandomValueAsPerSchema(schemaStr, kp.key, instantTime, false));
       } catch (IOException e) {
         throw new HoodieIOException(e.getMessage(), e);
       }
@@ -758,7 +781,7 @@ public Stream<HoodieKey> generateUniqueDeleteStream(Integer n) {
 
     List<HoodieKey> result = new ArrayList<>();
     for (int i = 0; i < n; i++) {
-      int index = RAND.nextInt(numExistingKeys);
+      int index = rand.nextInt(numExistingKeys);
       while (!existingKeys.containsKey(index)) {
         index = (index + 1) % numExistingKeys;
       }
@@ -790,7 +813,7 @@ public Stream<HoodieRecord> generateUniqueDeleteRecordStream(String instantTime,
 
     List<HoodieRecord> result = new ArrayList<>();
     for (int i = 0; i < n; i++) {
-      int index = RAND.nextInt(numExistingKeys);
+      int index = rand.nextInt(numExistingKeys);
       while (!existingKeys.containsKey(index)) {
         index = (index + 1) % numExistingKeys;
       }
@@ -801,7 +824,7 @@ public Stream<HoodieRecord> generateUniqueDeleteRecordStream(String instantTime,
       numExistingKeys--;
       used.add(kp);
       try {
-        result.add(new HoodieRecord(kp.key, generateRandomDeleteValue(kp.key, instantTime)));
+        result.add(new HoodieAvroRecord(kp.key, generateRandomDeleteValue(kp.key, instantTime)));
       } catch (IOException e) {
         throw new HoodieIOException(e.getMessage(), e);
       }
@@ -840,8 +863,8 @@ public boolean deleteExistingKeyIfPresent(HoodieKey key) {
   public List<GenericRecord> generateGenericRecords(int numRecords) {
     List<GenericRecord> list = new ArrayList<>();
     IntStream.range(0, numRecords).forEach(i -> {
-      list.add(generateGenericRecord(UUID.randomUUID().toString(), "0", UUID.randomUUID().toString(), UUID.randomUUID()
-          .toString(), RAND.nextLong()));
+      list.add(generateGenericRecord(genPseudoRandomUUID(rand).toString(), "0",
+          genPseudoRandomUUID(rand).toString(), genPseudoRandomUUID(rand).toString(), rand.nextLong()));
     });
     return list;
   }
@@ -864,4 +887,31 @@ public static class KeyPartition implements Serializable {
   public void close() {
     existingKeysBySchema.clear();
   }
+
+  private static long genRandomTimeMillis(Random r) {
+    // Fri Feb 13 15:31:30 PST 2009
+    long anchorTs = 1234567890L;
+    // NOTE: To provide for certainty and not generate overly random dates, we will limit
+    //       dispersion to be w/in +/- 3 days from the anchor date
+    return anchorTs + r.nextLong() % 259200000L;
+  }
+
+  private static UUID genPseudoRandomUUID(Random r) {
+    byte[] bytes = new byte[16];
+    r.nextBytes(bytes);
+
+    bytes[6] &= 0x0f;
+    bytes[6] |= 0x40;
+    bytes[8] &= 0x3f;
+    bytes[8] |= 0x80;
+
+    try {
+      Constructor<UUID> ctor = UUID.class.getDeclaredConstructor(byte[].class);
+      ctor.setAccessible(true);
+      return ctor.newInstance((Object) bytes);
+    } catch (InvocationTargetException | InstantiationException | IllegalAccessException | NoSuchMethodException e) {
+      logger.info("Failed to generate pseudo-random UUID!");
+      throw new HoodieException(e);
+    }
+  }
 }
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestTable.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestTable.java
index 7b8148a612a8b..f78312217eec2 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestTable.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestTable.java
@@ -354,7 +354,6 @@ public HoodieRollbackMetadata getRollbackMetadata(String instantTimeToDelete, Ma
       rollbackPartitionMetadata.setPartitionPath(entry.getKey());
       rollbackPartitionMetadata.setSuccessDeleteFiles(entry.getValue());
       rollbackPartitionMetadata.setFailedDeleteFiles(new ArrayList<>());
-      rollbackPartitionMetadata.setWrittenLogFiles(getWrittenLogFiles(instantTimeToDelete, entry));
       long rollbackLogFileSize = 50 + RANDOM.nextInt(500);
       String fileId = UUID.randomUUID().toString();
       String logFileName = logFileName(instantTimeToDelete, fileId, 0);
@@ -1045,7 +1044,7 @@ private static HoodieTestTableState getTestTableStateWithPartitionFileInfo(Write
     return testTableState;
   }
 
-  private static List<HoodieWriteStat> generateHoodieWriteStatForPartition(Map<String, List<Pair<String, Integer>>> partitionToFileIdMap,
+  public static List<HoodieWriteStat> generateHoodieWriteStatForPartition(Map<String, List<Pair<String, Integer>>> partitionToFileIdMap,
                                                                            String commitTime, boolean bootstrap) {
     List<HoodieWriteStat> writeStats = new ArrayList<>();
     for (Map.Entry<String, List<Pair<String, Integer>>> entry : partitionToFileIdMap.entrySet()) {
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestUtils.java
index c623c2f5df590..f9c9898f20192 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestUtils.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestUtils.java
@@ -18,6 +18,7 @@
 
 package org.apache.hudi.common.testutils;
 
+import org.apache.hudi.common.fs.HoodieWrapperFileSystem;
 import org.apache.hudi.common.model.HoodieAvroPayload;
 import org.apache.hudi.common.model.HoodieFileFormat;
 import org.apache.hudi.common.model.HoodieTableType;
@@ -25,6 +26,7 @@
 import org.apache.hudi.common.model.HoodieWriteStat.RuntimeStats;
 import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.metadata.HoodieTableMetadata;
 
 import com.esotericsoftware.kryo.Kryo;
 import com.esotericsoftware.kryo.io.Input;
@@ -176,4 +178,17 @@ public static List<HoodieWriteStat> generateFakeHoodieWriteStat(int limit) {
     }
     return writeStatList;
   }
+
+  public static void createCompactionCommitInMetadataTable(
+      Configuration hadoopConf, HoodieWrapperFileSystem wrapperFs, String basePath,
+      String instantTime) throws IOException {
+    // This is to simulate a completed compaction commit in metadata table timeline,
+    // so that the commits on data table timeline can be archived
+    // Note that, if metadata table is enabled, instants in data table timeline,
+    // which are more recent than the last compaction on the metadata table,
+    // are not archived (HoodieTimelineArchiveLog::getInstantsToArchive)
+    String metadataTableBasePath = HoodieTableMetadata.getMetadataTableBasePath(basePath);
+    HoodieTestUtils.init(hadoopConf, metadataTableBasePath, HoodieTableType.MERGE_ON_READ);
+    HoodieTestDataGenerator.createCommitFile(metadataTableBasePath, instantTime + "001", wrapperFs.getConf());
+  }
 }
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/RawTripTestPayload.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/RawTripTestPayload.java
index 8bd10823dacf2..c052b63ab544b 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/RawTripTestPayload.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/RawTripTestPayload.java
@@ -80,6 +80,23 @@ public RawTripTestPayload(String jsonData) throws IOException {
     this.isDeleted = false;
   }
 
+  /**
+   * @deprecated PLEASE READ THIS CAREFULLY
+   *
+   * Converting properly typed schemas into JSON leads to inevitable information loss, since JSON
+   * encodes only representation of the record (with no schema accompanying it), therefore occasionally
+   * losing nuances of the original data-types provided by the schema (for ex, with 1.23 literal it's
+   * impossible to tell whether original type was Double or Decimal).
+   *
+   * Multiplied by the fact that Spark 2 JSON schema inference has substantial gaps in it (see below),
+   * it's **NOT RECOMMENDED** to use this method. Instead please consider using {@link AvroConversionUtils#createDataframe()}
+   * method accepting list of {@link HoodieRecord} (as produced by the {@link HoodieTestDataGenerator}
+   * to create Spark's {@code Dataframe}s directly.
+   *
+   * REFs
+   * https://medium.com/swlh/notes-about-json-schema-handling-in-spark-sql-be1e7f13839d
+   */
+  @Deprecated
   public static List<String> recordsToStrings(List<HoodieRecord> records) {
     return records.stream().map(RawTripTestPayload::recordToString).filter(Option::isPresent).map(Option::get)
         .collect(Collectors.toList());
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/SchemaTestUtil.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/SchemaTestUtil.java
index bd1e3b764e1bc..ab77caa1bcb83 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/SchemaTestUtil.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/SchemaTestUtil.java
@@ -21,6 +21,7 @@
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.avro.MercifulJsonConverter;
 import org.apache.hudi.common.model.HoodieAvroPayload;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
@@ -71,6 +72,10 @@ public static List<IndexedRecord> generateTestRecords(int from, int limit) throw
     return toRecords(getSimpleSchema(), getSimpleSchema(), from, limit);
   }
 
+  public static List<GenericRecord> generateTestGenericRecords(int from, int limit) throws IOException, URISyntaxException {
+    return toRecords(getSimpleSchema(), getSimpleSchema(), from, limit);
+  }
+
   public static List<String> generateTestJsonRecords(int from, int limit) throws IOException, URISyntaxException {
     Path dataPath = initializeSampleDataPath();
 
@@ -81,9 +86,9 @@ public static List<String> generateTestJsonRecords(int from, int limit) throws I
     }
   }
 
-  private static List<IndexedRecord> toRecords(Schema writerSchema, Schema readerSchema, int from, int limit)
+  private static <T extends IndexedRecord> List<T> toRecords(Schema writerSchema, Schema readerSchema, int from, int limit)
       throws IOException, URISyntaxException {
-    GenericDatumReader<IndexedRecord> reader = new GenericDatumReader<>(writerSchema, readerSchema);
+    GenericDatumReader<T> reader = new GenericDatumReader<>(writerSchema, readerSchema);
     Path dataPath = initializeSampleDataPath();
 
     try (Stream<String> stream = Files.lines(dataPath)) {
@@ -148,7 +153,7 @@ public static List<HoodieRecord> generateHoodieTestRecords(int from, int limit,
   }
 
   private static HoodieRecord convertToHoodieRecords(IndexedRecord iRecord, String key, String partitionPath) {
-    return new HoodieRecord<>(new HoodieKey(key, partitionPath),
+    return new HoodieAvroRecord<>(new HoodieKey(key, partitionPath),
         new HoodieAvroPayload(Option.of((GenericRecord) iRecord)));
   }
 
@@ -168,7 +173,7 @@ public static List<HoodieRecord> generateHoodieTestRecordsWithoutHoodieMetadata(
       throws IOException, URISyntaxException {
 
     List<IndexedRecord> iRecords = generateTestRecords(from, limit);
-    return iRecords.stream().map(r -> new HoodieRecord<>(new HoodieKey(UUID.randomUUID().toString(), "0000/00/00"),
+    return iRecords.stream().map(r -> new HoodieAvroRecord<>(new HoodieKey(UUID.randomUUID().toString(), "0000/00/00"),
         new HoodieAvroPayload(Option.of((GenericRecord) r)))).collect(Collectors.toList());
   }
 
@@ -176,9 +181,9 @@ public static List<HoodieRecord> updateHoodieTestRecordsWithoutHoodieMetadata(Li
       Schema schema, String fieldNameToUpdate, String newValue) {
     return oldRecords.stream().map(r -> {
       try {
-        GenericRecord rec = (GenericRecord) r.getData().getInsertValue(schema).get();
+        GenericRecord rec = (GenericRecord) ((HoodieAvroRecord) r).getData().getInsertValue(schema).get();
         rec.put(fieldNameToUpdate, newValue);
-        return new HoodieRecord<>(r.getKey(), new HoodieAvroPayload(Option.of(rec)));
+        return new HoodieAvroRecord<>(r.getKey(), new HoodieAvroPayload(Option.of(rec)));
       } catch (IOException io) {
         throw new HoodieIOException("unable to get data from hoodie record", io);
       }
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/testutils/SpillableMapTestUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/testutils/SpillableMapTestUtils.java
index 89155904ec605..2e450660b5a4c 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/testutils/SpillableMapTestUtils.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/testutils/SpillableMapTestUtils.java
@@ -19,6 +19,7 @@
 package org.apache.hudi.common.testutils;
 
 import org.apache.hudi.common.model.HoodieAvroPayload;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordLocation;
@@ -48,7 +49,7 @@ public static List<String> upsertRecords(List<IndexedRecord> iRecords,
       String partitionPath = ((GenericRecord) r).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
       recordKeys.add(key);
       HoodieRecord record =
-          new HoodieRecord<>(new HoodieKey(key, partitionPath), new HoodieAvroPayload(Option.of((GenericRecord) r)));
+          new HoodieAvroRecord<>(new HoodieKey(key, partitionPath), new HoodieAvroPayload(Option.of((GenericRecord) r)));
       record.unseal();
       record.setCurrentLocation(new HoodieRecordLocation("DUMMY_COMMIT_TIME", "DUMMY_FILE_ID"));
       record.seal();
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/TestParquetUtils.java b/hudi-common/src/test/java/org/apache/hudi/common/util/TestParquetUtils.java
index e07c0fad3d24e..7bef8477125c2 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/util/TestParquetUtils.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/util/TestParquetUtils.java
@@ -147,7 +147,7 @@ public void testFetchRecordKeyPartitionPathFromParquet(String typeCode) throws E
 
     // Read and verify
     List<HoodieKey> fetchedRows =
-        parquetUtils.fetchRecordKeyPartitionPath(HoodieTestUtils.getDefaultHadoopConf(), new Path(filePath));
+        parquetUtils.fetchHoodieKeys(HoodieTestUtils.getDefaultHadoopConf(), new Path(filePath));
     assertEquals(rowKeys.size(), fetchedRows.size(), "Total count does not match");
 
     for (HoodieKey entry : fetchedRows) {
@@ -173,7 +173,7 @@ public void testFetchRecordKeyPartitionPathVirtualKeysFromParquet() throws Excep
 
     // Read and verify
     List<HoodieKey> fetchedRows =
-        parquetUtils.fetchRecordKeyPartitionPath(HoodieTestUtils.getDefaultHadoopConf(), new Path(filePath),
+        parquetUtils.fetchHoodieKeys(HoodieTestUtils.getDefaultHadoopConf(), new Path(filePath),
             Option.of(new TestBaseKeyGen("abc","def")));
     assertEquals(rowKeys.size(), fetchedRows.size(), "Total count does not match");
 
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/collection/TestBitCaskDiskMap.java b/hudi-common/src/test/java/org/apache/hudi/common/util/collection/TestBitCaskDiskMap.java
index 208c6d96995b8..9bbe4277162e0 100755
--- a/hudi-common/src/test/java/org/apache/hudi/common/util/collection/TestBitCaskDiskMap.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/util/collection/TestBitCaskDiskMap.java
@@ -20,6 +20,7 @@
 
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.common.model.HoodieAvroPayload;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
@@ -186,7 +187,7 @@ public void testSizeEstimator() throws IOException, URISyntaxException {
     schema = SchemaTestUtil.getSimpleSchema();
     List<IndexedRecord> indexedRecords = SchemaTestUtil.generateHoodieTestRecords(0, 1);
     hoodieRecords =
-        indexedRecords.stream().map(r -> new HoodieRecord<>(new HoodieKey(UUID.randomUUID().toString(), "0000/00/00"),
+        indexedRecords.stream().map(r -> new HoodieAvroRecord<>(new HoodieKey(UUID.randomUUID().toString(), "0000/00/00"),
             new AvroBinaryTestPayload(Option.of((GenericRecord) r)))).collect(Collectors.toList());
     payloadSize = SpillableMapUtils.computePayloadSize(hoodieRecords.remove(0), new HoodieRecordSizeEstimator(schema));
     assertTrue(payloadSize > 0);
@@ -195,7 +196,7 @@ public void testSizeEstimator() throws IOException, URISyntaxException {
     final Schema simpleSchemaWithMetadata = HoodieAvroUtils.addMetadataFields(SchemaTestUtil.getSimpleSchema());
     indexedRecords = SchemaTestUtil.generateHoodieTestRecords(0, 1);
     hoodieRecords = indexedRecords.stream()
-        .map(r -> new HoodieRecord<>(new HoodieKey(UUID.randomUUID().toString(), "0000/00/00"),
+        .map(r -> new HoodieAvroRecord<>(new HoodieKey(UUID.randomUUID().toString(), "0000/00/00"),
             new AvroBinaryTestPayload(
                 Option.of(HoodieAvroUtils.rewriteRecord((GenericRecord) r, simpleSchemaWithMetadata)))))
         .collect(Collectors.toList());
@@ -212,7 +213,7 @@ public void testPutAll(boolean isCompressionEnabled) throws IOException, URISynt
     iRecords.forEach(r -> {
       String key = ((GenericRecord) r).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
       String partitionPath = ((GenericRecord) r).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
-      HoodieRecord value = new HoodieRecord<>(new HoodieKey(key, partitionPath), new HoodieAvroPayload(Option.of((GenericRecord) r)));
+      HoodieRecord value = new HoodieAvroRecord<>(new HoodieKey(key, partitionPath), new HoodieAvroPayload(Option.of((GenericRecord) r)));
       recordMap.put(key, value);
     });
 
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/collection/TestExternalSpillableMap.java b/hudi-common/src/test/java/org/apache/hudi/common/util/collection/TestExternalSpillableMap.java
index f7b45e9d839b6..e33baf1493a93 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/util/collection/TestExternalSpillableMap.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/util/collection/TestExternalSpillableMap.java
@@ -20,6 +20,7 @@
 
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.common.model.HoodieAvroPayload;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
@@ -135,7 +136,7 @@ public void testSimpleUpsert(ExternalSpillableMap.DiskMapType diskMapType, boole
     updatedRecords.forEach(record -> {
       HoodieRecord rec = records.get(((GenericRecord) record).get(HoodieRecord.RECORD_KEY_METADATA_FIELD));
       try {
-        assertEquals(rec.getData().getInsertValue(schema).get(), record);
+        assertEquals(((HoodieAvroRecord) rec).getData().getInsertValue(schema).get(), record);
       } catch (IOException io) {
         throw new UncheckedIOException(io);
       }
@@ -159,13 +160,13 @@ public void testAllMapOperations(ExternalSpillableMap.DiskMapType diskMapType, b
     IndexedRecord inMemoryRecord = iRecords.get(0);
     String ikey = ((GenericRecord) inMemoryRecord).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
     String iPartitionPath = ((GenericRecord) inMemoryRecord).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
-    HoodieRecord inMemoryHoodieRecord = new HoodieRecord<>(new HoodieKey(ikey, iPartitionPath),
+    HoodieRecord inMemoryHoodieRecord = new HoodieAvroRecord<>(new HoodieKey(ikey, iPartitionPath),
         new HoodieAvroPayload(Option.of((GenericRecord) inMemoryRecord)));
 
     IndexedRecord onDiskRecord = iRecords.get(99);
     String dkey = ((GenericRecord) onDiskRecord).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
     String dPartitionPath = ((GenericRecord) onDiskRecord).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
-    HoodieRecord onDiskHoodieRecord = new HoodieRecord<>(new HoodieKey(dkey, dPartitionPath),
+    HoodieRecord onDiskHoodieRecord = new HoodieAvroRecord<>(new HoodieKey(dkey, dPartitionPath),
         new HoodieAvroPayload(Option.of((GenericRecord) onDiskRecord)));
     // assert size
     assert records.size() == 100;
@@ -241,7 +242,7 @@ public void testDataCorrectnessWithUpsertsToDataInMapAndOnDisk(ExternalSpillable
 
     // Get a record from the in-Memory map
     String key = recordKeys.get(0);
-    HoodieRecord record = records.get(key);
+    HoodieAvroRecord record = (HoodieAvroRecord) records.get(key);
     List<IndexedRecord> recordsToUpdate = new ArrayList<>();
     recordsToUpdate.add((IndexedRecord) record.getData().getInsertValue(schema).get());
 
@@ -259,7 +260,7 @@ public void testDataCorrectnessWithUpsertsToDataInMapAndOnDisk(ExternalSpillable
 
     // Get a record from the disk based map
     key = recordKeys.get(recordKeys.size() - 1);
-    record = records.get(key);
+    record = (HoodieAvroRecord) records.get(key);
     recordsToUpdate = new ArrayList<>();
     recordsToUpdate.add((IndexedRecord) record.getData().getInsertValue(schema).get());
 
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/collection/TestRocksDbDiskMap.java b/hudi-common/src/test/java/org/apache/hudi/common/util/collection/TestRocksDbDiskMap.java
index 2ae521fc8c217..31daaab213604 100644
--- a/hudi-common/src/test/java/org/apache/hudi/common/util/collection/TestRocksDbDiskMap.java
+++ b/hudi-common/src/test/java/org/apache/hudi/common/util/collection/TestRocksDbDiskMap.java
@@ -18,12 +18,9 @@
 
 package org.apache.hudi.common.util.collection;
 
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.generic.IndexedRecord;
-
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.common.model.HoodieAvroPayload;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
@@ -33,6 +30,9 @@
 import org.apache.hudi.common.testutils.SpillableMapTestUtils;
 import org.apache.hudi.common.util.Option;
 
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.IndexedRecord;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 
@@ -166,7 +166,7 @@ public void testPutAll() throws IOException, URISyntaxException {
     iRecords.forEach(r -> {
       String key = ((GenericRecord) r).get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString();
       String partitionPath = ((GenericRecord) r).get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString();
-      HoodieRecord value = new HoodieRecord<>(new HoodieKey(key, partitionPath), new HoodieAvroPayload(Option.of((GenericRecord) r)));
+      HoodieRecord value = new HoodieAvroRecord<>(new HoodieKey(key, partitionPath), new HoodieAvroPayload(Option.of((GenericRecord) r)));
       recordMap.put(key, value);
     });
 
diff --git a/hudi-common/src/test/java/org/apache/hudi/common/util/io/TestByteBufferBackedInputStream.java b/hudi-common/src/test/java/org/apache/hudi/common/util/io/TestByteBufferBackedInputStream.java
new file mode 100644
index 0000000000000..87bd2eea2ebe5
--- /dev/null
+++ b/hudi-common/src/test/java/org/apache/hudi/common/util/io/TestByteBufferBackedInputStream.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.common.util.io;
+
+import org.junit.jupiter.api.Test;
+
+import java.nio.ByteBuffer;
+
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+public class TestByteBufferBackedInputStream {
+
+  @Test
+  public void testConstructor() {
+    byte[] bytes = { 0xD, 0xE, 0xA, 0xD, 0xD, 0xE, 0xE, 0xD };
+    ByteBuffer byteBuf = ByteBuffer.wrap(bytes, 0, 1);
+    ByteBuffer byteBufClone = byteBuf.duplicate();
+
+    // ByteBuffer ctor
+    ByteBufferBackedInputStream first = new ByteBufferBackedInputStream(byteBuf);
+
+    assertEquals(first.read(), 0xD);
+    assertThrows(IllegalArgumentException.class, first::read);
+    // Make sure that the original buffer stays intact
+    assertEquals(byteBufClone, byteBuf);
+
+    // byte[] ctor
+    ByteBufferBackedInputStream second = new ByteBufferBackedInputStream(bytes);
+
+    assertEquals(second.read(), 0xD);
+
+    // byte[] ctor (w/ offset)
+    ByteBufferBackedInputStream third = new ByteBufferBackedInputStream(bytes, 1, 1);
+
+    assertEquals(third.read(), 0xE);
+    assertThrows(IllegalArgumentException.class, third::read);
+  }
+
+  @Test
+  public void testRead() {
+    byte[] sourceBytes = { 0xD, 0xE, 0xA, 0xD, 0xD, 0xE, 0xE, 0xD };
+
+    ByteBufferBackedInputStream stream = new ByteBufferBackedInputStream(sourceBytes);
+
+    int firstByte = stream.read();
+    assertEquals(firstByte, 0xD);
+
+    byte[] readBytes = new byte[4];
+    int read = stream.read(readBytes, 1, 3);
+
+    assertEquals(3, read);
+    assertArrayEquals(new byte[]{0, 0xE, 0xA, 0xD}, readBytes);
+    assertEquals(4, stream.getPosition());
+  }
+
+  @Test
+  public void testSeek() {
+    byte[] sourceBytes = { 0xD, 0xE, 0xA, 0xD, 0xD, 0xA, 0xE, 0xD };
+
+    ByteBufferBackedInputStream stream = new ByteBufferBackedInputStream(sourceBytes, 1, 7);
+
+    // Seek to 2 byte in the stream (3 in the original buffer)
+    stream.seek(1);
+    int firstRead = stream.read();
+    assertEquals(0xA, firstRead);
+
+    // Seek to 5 byte in the stream (6 in the original buffer)
+    stream.seek(5);
+    int secondRead = stream.read();
+    assertEquals(0xE, secondRead);
+
+    // Try to seek past the stream boundary
+    assertThrows(IllegalArgumentException.class, () -> stream.seek(8));
+  }
+
+  @Test
+  public void testCopyFrom() {
+    byte[] sourceBytes = { 0xD, 0xE, 0xA, 0xD, 0xD, 0xA, 0xE, 0xD };
+
+    ByteBufferBackedInputStream stream = new ByteBufferBackedInputStream(sourceBytes);
+
+    int firstByte = stream.read();
+    assertEquals(firstByte, 0xD);
+
+    // Copy 5 byes from the stream (while keeping stream's state intact)
+    byte[] targetBytes = new byte[5];
+    stream.copyFrom(2, targetBytes, 0, targetBytes.length);
+
+    assertArrayEquals(new byte[] { 0xA, 0xD, 0xD, 0xA, 0xE }, targetBytes);
+
+    // Continue reading the stream from where we left of (before copying)
+    int secondByte = stream.read();
+    assertEquals(secondByte, 0xE);
+  }
+}
diff --git a/hudi-common/src/test/resources/timestamp-test-evolved.avsc b/hudi-common/src/test/resources/timestamp-test-evolved.avsc
index beb36329eabac..7a52ca6f245e1 100644
--- a/hudi-common/src/test/resources/timestamp-test-evolved.avsc
+++ b/hudi-common/src/test/resources/timestamp-test-evolved.avsc
@@ -20,7 +20,43 @@
   "type": "record",
   "name": "User",
   "fields": [
-    {"name": "field1", "type": ["null", "string"], "default": null},
-    {"name": "createTime", "type": ["null", "long"], "default": null}
+    {
+      "name": "field1",
+      "type": [
+        "null",
+        "string"
+      ],
+      "default": null
+    },
+    {
+      "name": "createTime",
+      "type": [
+        "null",
+        "long"
+      ],
+      "default": null
+    },
+    {
+      "name": "createTimeString",
+      "type": [
+        "null",
+        "string"
+      ],
+      "default": null
+    },
+    {
+      "name": "createTimeDecimal",
+      "type": [
+        "null",
+        {
+          "name": "decimalFixed",
+          "type": "fixed",
+          "logicalType": "decimal",
+          "precision": 20,
+          "scale": 4,
+          "size": 10
+        }
+      ]
+    }
   ]
 }
\ No newline at end of file
diff --git a/hudi-examples/src/main/java/org/apache/hudi/examples/common/HoodieExampleDataGenerator.java b/hudi-examples/src/main/java/org/apache/hudi/examples/common/HoodieExampleDataGenerator.java
index 71c6408ccb2cd..78df2e78e7081 100644
--- a/hudi-examples/src/main/java/org/apache/hudi/examples/common/HoodieExampleDataGenerator.java
+++ b/hudi-examples/src/main/java/org/apache/hudi/examples/common/HoodieExampleDataGenerator.java
@@ -20,6 +20,7 @@
 
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.common.model.HoodieAvroPayload;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
@@ -126,7 +127,7 @@ public Stream<HoodieRecord<T>> generateInsertsStream(String commitTime, Integer
       kp.partitionPath = partitionPath;
       existingKeys.put(currSize + i, kp);
       numExistingKeys++;
-      return new HoodieRecord<>(key, generateRandomValue(key, commitTime));
+      return new HoodieAvroRecord<>(key, generateRandomValue(key, commitTime));
     });
   }
 
@@ -149,7 +150,7 @@ public List<HoodieRecord<T>> generateUpdates(String commitTime, Integer n) {
   }
 
   public HoodieRecord<T> generateUpdateRecord(HoodieKey key, String commitTime) {
-    return new HoodieRecord<>(key, generateRandomValue(key, commitTime));
+    return new HoodieAvroRecord<>(key, generateRandomValue(key, commitTime));
   }
 
   private Option<String> convertToString(HoodieRecord<T> record) {
diff --git a/hudi-examples/src/main/java/org/apache/hudi/examples/java/HoodieJavaWriteClientExample.java b/hudi-examples/src/main/java/org/apache/hudi/examples/java/HoodieJavaWriteClientExample.java
index 587f73b0f7fd4..4890a6529a52c 100644
--- a/hudi-examples/src/main/java/org/apache/hudi/examples/java/HoodieJavaWriteClientExample.java
+++ b/hudi-examples/src/main/java/org/apache/hudi/examples/java/HoodieJavaWriteClientExample.java
@@ -18,11 +18,11 @@
 
 package org.apache.hudi.examples.java;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hudi.client.HoodieJavaWriteClient;
 import org.apache.hudi.client.common.HoodieJavaEngineContext;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.HoodieAvroPayload;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieTableType;
@@ -33,6 +33,7 @@
 import org.apache.hudi.examples.common.HoodieExampleDataGenerator;
 import org.apache.hudi.index.HoodieIndex;
 
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
@@ -95,7 +96,7 @@ public static void main(String[] args) throws Exception {
     List<HoodieRecord<HoodieAvroPayload>> records = dataGen.generateInserts(newCommitTime, 10);
     List<HoodieRecord<HoodieAvroPayload>> recordsSoFar = new ArrayList<>(records);
     List<HoodieRecord<HoodieAvroPayload>> writeRecords =
-        recordsSoFar.stream().map(r -> new HoodieRecord<HoodieAvroPayload>(r)).collect(Collectors.toList());
+        recordsSoFar.stream().map(r -> new HoodieAvroRecord<HoodieAvroPayload>(r)).collect(Collectors.toList());
     client.insert(writeRecords, newCommitTime);
 
     // updates
@@ -105,7 +106,7 @@ public static void main(String[] args) throws Exception {
     records.addAll(toBeUpdated);
     recordsSoFar.addAll(toBeUpdated);
     writeRecords =
-        recordsSoFar.stream().map(r -> new HoodieRecord<HoodieAvroPayload>(r)).collect(Collectors.toList());
+        recordsSoFar.stream().map(r -> new HoodieAvroRecord<HoodieAvroPayload>(r)).collect(Collectors.toList());
     client.upsert(writeRecords, newCommitTime);
 
     // Delete
diff --git a/hudi-examples/src/main/java/org/apache/hudi/examples/spark/HoodieWriteClientExample.java b/hudi-examples/src/main/java/org/apache/hudi/examples/spark/HoodieWriteClientExample.java
index 35e46605f17b2..1afc180531a16 100644
--- a/hudi-examples/src/main/java/org/apache/hudi/examples/spark/HoodieWriteClientExample.java
+++ b/hudi-examples/src/main/java/org/apache/hudi/examples/spark/HoodieWriteClientExample.java
@@ -38,6 +38,8 @@
 
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hudi.table.action.HoodieWriteMetadata;
+
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.spark.SparkConf;
@@ -140,8 +142,8 @@ public static void main(String[] args) throws Exception {
       // compaction
       if (HoodieTableType.valueOf(tableType) == HoodieTableType.MERGE_ON_READ) {
         Option<String> instant = client.scheduleCompaction(Option.empty());
-        JavaRDD<WriteStatus> writeStatues = client.compact(instant.get());
-        client.commitCompaction(instant.get(), writeStatues, Option.empty());
+        HoodieWriteMetadata<JavaRDD<WriteStatus>> compactionMetadata = client.compact(instant.get());
+        client.commitCompaction(instant.get(), compactionMetadata.getCommitMetadata().get(), Option.empty());
       }
 
     }
diff --git a/hudi-examples/src/main/scala/org/apache/hudi/examples/spark/HoodieDataSourceExample.scala b/hudi-examples/src/main/scala/org/apache/hudi/examples/spark/HoodieDataSourceExample.scala
index cb221fcef3b2a..77b3885e3cf7a 100644
--- a/hudi-examples/src/main/scala/org/apache/hudi/examples/spark/HoodieDataSourceExample.scala
+++ b/hudi-examples/src/main/scala/org/apache/hudi/examples/spark/HoodieDataSourceExample.scala
@@ -82,7 +82,7 @@ object HoodieDataSourceExample {
       option(PARTITIONPATH_FIELD.key, "partitionpath").
       option(TBL_NAME.key, tableName).
       mode(Overwrite).
-        save(tablePath)
+      save(tablePath)
   }
 
   /**
@@ -127,7 +127,7 @@ object HoodieDataSourceExample {
       option(PARTITIONPATH_FIELD.key, "partitionpath").
       option(TBL_NAME.key, tableName).
       mode(Append).
-        save(tablePath)
+      save(tablePath)
   }
 
   /**
diff --git a/hudi-flink/pom.xml b/hudi-flink/pom.xml
index c8fac38be5b18..27a4a0b453cb7 100644
--- a/hudi-flink/pom.xml
+++ b/hudi-flink/pom.xml
@@ -164,13 +164,13 @@
     </dependency>
     <dependency>
       <groupId>org.apache.flink</groupId>
-      <artifactId>flink-table-runtime-blink_${scala.binary.version}</artifactId>
+      <artifactId>flink-table-runtime_${scala.binary.version}</artifactId>
       <version>${flink.version}</version>
       <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.flink</groupId>
-      <artifactId>flink-table-planner-blink_${scala.binary.version}</artifactId>
+      <artifactId>flink-table-planner_${scala.binary.version}</artifactId>
       <version>${flink.version}</version>
       <scope>provided</scope>
     </dependency>
@@ -307,7 +307,7 @@
     </dependency>
     <dependency>
       <groupId>org.apache.flink</groupId>
-      <artifactId>flink-runtime_${scala.binary.version}</artifactId>
+      <artifactId>flink-runtime</artifactId>
       <version>${flink.version}</version>
       <scope>test</scope>
       <type>test-jar</type>
@@ -321,7 +321,7 @@
     </dependency>
     <dependency>
       <groupId>org.apache.flink</groupId>
-      <artifactId>flink-table-runtime-blink_${scala.binary.version}</artifactId>
+      <artifactId>flink-table-runtime_${scala.binary.version}</artifactId>
       <version>${flink.version}</version>
       <scope>test</scope>
       <type>test-jar</type>
diff --git a/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java b/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java
index 77c3f15e54c45..1be90603605cd 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java
@@ -23,9 +23,11 @@
 import org.apache.hudi.common.config.HoodieConfig;
 import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload;
+import org.apache.hudi.config.HoodieIndexConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor;
+import org.apache.hudi.index.HoodieIndex;
 import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
 import org.apache.hudi.keygen.constant.KeyGeneratorType;
 
@@ -106,6 +108,12 @@ private FlinkOptions() {
   // ------------------------------------------------------------------------
   //  Index Options
   // ------------------------------------------------------------------------
+  public static final ConfigOption<String> INDEX_TYPE = ConfigOptions
+      .key("index.type")
+      .stringType()
+      .defaultValue(HoodieIndex.IndexType.FLINK_STATE.name())
+      .withDescription("Index type of Flink write job, default is using state backed index.");
+
   public static final ConfigOption<Boolean> INDEX_BOOTSTRAP_ENABLED = ConfigOptions
       .key("index.bootstrap.enabled")
       .booleanType()
@@ -310,6 +318,20 @@ private FlinkOptions() {
           + "Actual value will be obtained by invoking .toString() on the field value. Nested fields can be specified using "
           + "the dot notation eg: `a.b.c`");
 
+  public static final ConfigOption<String> INDEX_KEY_FIELD = ConfigOptions
+      .key(HoodieIndexConfig.BUCKET_INDEX_HASH_FIELD.key())
+      .stringType()
+      .defaultValue("")
+      .withDescription("Index key field. Value to be used as hashing to find the bucket ID. Should be a subset of or equal to the recordKey fields.\n"
+        + "Actual value will be obtained by invoking .toString() on the field value. Nested fields can be specified using "
+        + "the dot notation eg: `a.b.c`");
+
+  public static final ConfigOption<Integer> BUCKET_INDEX_NUM_BUCKETS = ConfigOptions
+      .key(HoodieIndexConfig.BUCKET_INDEX_NUM_BUCKETS.key())
+      .intType()
+      .defaultValue(4) // default 4 buckets per partition
+      .withDescription("Hudi bucket number per partition. Only affected if using Hudi bucket index.");
+
   public static final ConfigOption<String> PARTITION_PATH_FIELD = ConfigOptions
       .key(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key())
       .stringType()
diff --git a/hudi-flink/src/main/java/org/apache/hudi/configuration/OptionsResolver.java b/hudi-flink/src/main/java/org/apache/hudi/configuration/OptionsResolver.java
index acb4af61110fa..6ebf09069be60 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/configuration/OptionsResolver.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/configuration/OptionsResolver.java
@@ -20,6 +20,7 @@
 
 import org.apache.hudi.common.model.DefaultHoodieRecordPayload;
 import org.apache.hudi.common.model.WriteOperationType;
+import org.apache.hudi.index.HoodieIndex;
 import org.apache.hudi.table.format.FilePathUtils;
 
 import org.apache.flink.configuration.Configuration;
@@ -101,6 +102,10 @@ public static boolean isPartitionedTable(Configuration conf) {
     return FilePathUtils.extractPartitionKeys(conf).length > 0;
   }
 
+  public static boolean isBucketIndexType(Configuration conf) {
+    return conf.getString(FlinkOptions.INDEX_TYPE).equals(HoodieIndex.IndexType.BUCKET.name());
+  }
+
   /**
    * Returns whether the source should emit changelog.
    *
diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/BucketStreamWriteFunction.java b/hudi-flink/src/main/java/org/apache/hudi/sink/BucketStreamWriteFunction.java
new file mode 100644
index 0000000000000..128358096cde6
--- /dev/null
+++ b/hudi-flink/src/main/java/org/apache/hudi/sink/BucketStreamWriteFunction.java
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.sink;
+
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.runtime.state.FunctionInitializationContext;
+import org.apache.flink.streaming.api.functions.ProcessFunction;
+import org.apache.flink.util.Collector;
+import org.apache.hudi.common.model.FileSlice;
+import org.apache.hudi.common.model.HoodieKey;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieRecordLocation;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.configuration.FlinkOptions;
+import org.apache.hudi.index.bucket.BucketIdentifier;
+import org.apache.hudi.table.HoodieFlinkTable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+
+import static java.util.stream.Collectors.toList;
+
+/**
+ * A stream write function with bucket hash index.
+ *
+ * <p>The task holds a fresh new local index: {(partition + bucket number) &rarr fileId} mapping, this index
+ * is used for deciding whether the incoming records in an UPDATE or INSERT.
+ * The index is local because different partition paths have separate items in the index.
+ *
+ * @param <I> the input type
+ */
+public class BucketStreamWriteFunction<I> extends StreamWriteFunction<I> {
+
+  private static final Logger LOG = LoggerFactory.getLogger(BucketStreamWriteFunction.class);
+
+  private int maxParallelism;
+
+  private int parallelism;
+
+  private int bucketNum;
+
+  private transient HoodieFlinkTable<?> table;
+
+  private String indexKeyFields;
+
+  private final HashMap<String, String> bucketToFileIDMap;
+
+  /**
+   * Constructs a BucketStreamWriteFunction.
+   *
+   * @param config The config options
+   */
+  public BucketStreamWriteFunction(Configuration config) {
+    super(config);
+    this.bucketToFileIDMap = new HashMap<>();
+  }
+
+  @Override
+  public void open(Configuration parameters) throws IOException {
+    super.open(parameters);
+    this.bucketNum = config.getInteger(FlinkOptions.BUCKET_INDEX_NUM_BUCKETS);
+    this.indexKeyFields = config.getString(FlinkOptions.INDEX_KEY_FIELD);
+    this.taskID = getRuntimeContext().getIndexOfThisSubtask();
+    this.parallelism = getRuntimeContext().getNumberOfParallelSubtasks();
+    this.maxParallelism = getRuntimeContext().getMaxNumberOfParallelSubtasks();
+    bootstrapIndex();
+  }
+
+  @Override
+  public void initializeState(FunctionInitializationContext context) throws Exception {
+    super.initializeState(context);
+    this.table = this.writeClient.getHoodieTable();
+  }
+
+  @Override
+  public void processElement(I i, ProcessFunction<I, Object>.Context context, Collector<Object> collector) throws Exception {
+    HoodieRecord<?> record = (HoodieRecord<?>) i;
+    final HoodieKey hoodieKey = record.getKey();
+    final HoodieRecordLocation location;
+
+    final int bucketNum = BucketIdentifier.getBucketId(hoodieKey, indexKeyFields, this.bucketNum);
+    final String partitionBucketId = BucketIdentifier.partitionBucketIdStr(hoodieKey.getPartitionPath(), bucketNum);
+
+    if (bucketToFileIDMap.containsKey(partitionBucketId)) {
+      location = new HoodieRecordLocation("U", bucketToFileIDMap.get(partitionBucketId));
+    } else {
+      String newFileId = BucketIdentifier.newBucketFileIdPrefix(bucketNum);
+      location = new HoodieRecordLocation("I", newFileId);
+      bucketToFileIDMap.put(partitionBucketId, newFileId);
+    }
+    record.unseal();
+    record.setCurrentLocation(location);
+    record.seal();
+    bufferRecord(record);
+  }
+
+  /**
+   * Get partition_bucket -> fileID mapping from the existing hudi table.
+   * This is a required operation for each restart to avoid having duplicate file ids for one bucket.
+   */
+  private void bootstrapIndex() throws IOException {
+    Option<HoodieInstant> latestCommitTime = table.getFileSystemView().getTimeline().filterCompletedInstants().lastInstant();
+    if (!latestCommitTime.isPresent()) {
+      return;
+    }
+    // bootstrap bucket info from existing file system
+    // bucketNum % totalParallelism == this taskID belongs to this task
+    HashSet<Integer> bucketToLoad = new HashSet<>();
+    for (int i = 0; i < bucketNum; i++) {
+      int partitionOfBucket = BucketIdentifier.mod(i, parallelism);
+      if (partitionOfBucket == taskID) {
+        LOG.info(String.format("Bootstrapping index. Adding bucket %s , "
+            + "Current parallelism: %s , Max parallelism: %s , Current task id: %s",
+            i, parallelism, maxParallelism, taskID));
+        bucketToLoad.add(i);
+      }
+    }
+    bucketToLoad.forEach(bucket -> LOG.info(String.format("bucketToLoad contains %s", bucket)));
+
+    LOG.info(String.format("Loading Hoodie Table %s, with path %s", table.getMetaClient().getTableConfig().getTableName(),
+        table.getMetaClient().getBasePath()));
+
+    // Iterate through all existing partitions to load existing fileID belongs to this task
+    List<String> partitions = table.getMetadata().getAllPartitionPaths();
+    for (String partitionPath : partitions) {
+      List<FileSlice> latestFileSlices = table.getSliceView()
+          .getLatestFileSlices(partitionPath)
+          .collect(toList());
+      for (FileSlice fileslice : latestFileSlices) {
+        String fileID = fileslice.getFileId();
+        int bucketNumber = BucketIdentifier.bucketIdFromFileId(fileID);
+        if (bucketToLoad.contains(bucketNumber)) {
+          String partitionBucketId = BucketIdentifier.partitionBucketIdStr(partitionPath, bucketNumber);
+          LOG.info(String.format("Should load this partition bucket %s with fileID %s", partitionBucketId, fileID));
+          if (bucketToFileIDMap.containsKey(partitionBucketId)) {
+            throw new RuntimeException(String.format("Duplicate fileID %s from partitionBucket %s found "
+              + "during the BucketStreamWriteFunction index bootstrap.", fileID, partitionBucketId));
+          } else {
+            LOG.info(String.format("Adding fileID %s to the partition bucket %s.", fileID, partitionBucketId));
+            bucketToFileIDMap.put(partitionBucketId, fileID);
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/BucketStreamWriteOperator.java b/hudi-flink/src/main/java/org/apache/hudi/sink/BucketStreamWriteOperator.java
new file mode 100644
index 0000000000000..209fe59e4b8ca
--- /dev/null
+++ b/hudi-flink/src/main/java/org/apache/hudi/sink/BucketStreamWriteOperator.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.sink;
+
+import org.apache.flink.configuration.Configuration;
+import org.apache.hudi.sink.common.AbstractWriteOperator;
+import org.apache.hudi.sink.common.WriteOperatorFactory;
+
+/**
+ * Operator for {@link BucketStreamWriteFunction}.
+ *
+ * @param <I> The input type
+ */
+public class BucketStreamWriteOperator<I> extends AbstractWriteOperator<I> {
+
+  public BucketStreamWriteOperator(Configuration conf) {
+    super(new BucketStreamWriteFunction<>(conf));
+  }
+
+  public static <I> WriteOperatorFactory<I> getFactory(Configuration conf) {
+    return WriteOperatorFactory.instance(conf, new BucketStreamWriteOperator<>(conf));
+  }
+}
diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteFunction.java b/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteFunction.java
index 0e7e35e7ea328..c2f54dd8aaffe 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteFunction.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteFunction.java
@@ -19,6 +19,7 @@
 package org.apache.hudi.sink;
 
 import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieOperation;
 import org.apache.hudi.common.model.HoodieRecord;
@@ -217,13 +218,13 @@ public static DataItem fromHoodieRecord(HoodieRecord<?> record) {
       return new DataItem(
           record.getRecordKey(),
           record.getCurrentLocation().getInstantTime(),
-          record.getData(),
+          ((HoodieAvroRecord) record).getData(),
           record.getOperation());
     }
 
     public HoodieRecord<?> toHoodieRecord(String partitionPath) {
       HoodieKey hoodieKey = new HoodieKey(this.key, partitionPath);
-      HoodieRecord<?> record = new HoodieRecord<>(hoodieKey, data, operation);
+      HoodieRecord<?> record = new HoodieAvroRecord<>(hoodieKey, data, operation);
       HoodieRecordLocation loc = new HoodieRecordLocation(instant, null);
       record.setCurrentLocation(loc);
       return record;
@@ -264,7 +265,7 @@ public List<HoodieRecord> writeBuffer() {
     public void preWrite(List<HoodieRecord> records) {
       // rewrite the first record with expected fileID
       HoodieRecord<?> first = records.get(0);
-      HoodieRecord<?> record = new HoodieRecord<>(first.getKey(), first.getData(), first.getOperation());
+      HoodieRecord<?> record = new HoodieAvroRecord<>(first.getKey(), (HoodieRecordPayload) first.getData(), first.getOperation());
       HoodieRecordLocation newLoc = new HoodieRecordLocation(first.getCurrentLocation().getInstantTime(), fileID);
       record.setCurrentLocation(newLoc);
 
@@ -370,7 +371,7 @@ private String getBucketID(HoodieRecord<?> record) {
    *
    * @param value HoodieRecord
    */
-  private void bufferRecord(HoodieRecord<?> value) {
+  protected void bufferRecord(HoodieRecord<?> value) {
     final String bucketID = getBucketID(value);
 
     DataBucket bucket = this.buckets.computeIfAbsent(bucketID,
diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteOperatorCoordinator.java b/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteOperatorCoordinator.java
index 447cfa420a3f4..4782070e33760 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteOperatorCoordinator.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteOperatorCoordinator.java
@@ -415,13 +415,11 @@ private void sendCommitAckEvents(long checkpointId) {
     CompletableFuture<?>[] futures = Arrays.stream(this.gateways).filter(Objects::nonNull)
         .map(gw -> gw.sendEvent(CommitAckEvent.getInstance(checkpointId)))
         .toArray(CompletableFuture<?>[]::new);
-    try {
-      CompletableFuture.allOf(futures).get();
-    } catch (Throwable throwable) {
-      if (!sendToFinishedTasks(throwable)) {
-        throw new HoodieException("Error while waiting for the commit ack events to finish sending", throwable);
+    CompletableFuture.allOf(futures).whenComplete((resp, error) -> {
+      if (!sendToFinishedTasks(error)) {
+        throw new HoodieException("Error while waiting for the commit ack events to finish sending", error);
       }
-    }
+    });
   }
 
   /**
diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/BootstrapOperator.java b/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/BootstrapOperator.java
index 4832f18bf7f04..0f944c56577e2 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/BootstrapOperator.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/BootstrapOperator.java
@@ -20,6 +20,7 @@
 
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.FileSlice;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordGlobalLocation;
@@ -29,6 +30,7 @@
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.util.BaseFileUtils;
+import org.apache.hudi.common.util.ClosableIterator;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.config.HoodieWriteConfig;
@@ -209,17 +211,10 @@ protected void loadRecords(String partitionPath) throws Exception {
           if (!isValidFile(baseFile.getFileStatus())) {
             return;
           }
-
-          final List<HoodieKey> hoodieKeys;
-          try {
-            hoodieKeys =
-                fileUtils.fetchRecordKeyPartitionPath(this.hadoopConf, new Path(baseFile.getPath()));
-          } catch (Exception e) {
-            throw new HoodieException(String.format("Error when loading record keys from file: %s", baseFile), e);
-          }
-
-          for (HoodieKey hoodieKey : hoodieKeys) {
-            output.collect(new StreamRecord(new IndexRecord(generateHoodieRecord(hoodieKey, fileSlice))));
+          try (ClosableIterator<HoodieKey> iterator = fileUtils.getHoodieKeyIterator(this.hadoopConf, new Path(baseFile.getPath()))) {
+            iterator.forEachRemaining(hoodieKey -> {
+              output.collect(new StreamRecord(new IndexRecord(generateHoodieRecord(hoodieKey, fileSlice))));
+            });
           }
         });
 
@@ -251,7 +246,7 @@ protected void loadRecords(String partitionPath) throws Exception {
 
   @SuppressWarnings("unchecked")
   public static HoodieRecord generateHoodieRecord(HoodieKey hoodieKey, FileSlice fileSlice) {
-    HoodieRecord hoodieRecord = new HoodieRecord(hoodieKey, null);
+    HoodieRecord hoodieRecord = new HoodieAvroRecord(hoodieKey, null);
     hoodieRecord.setCurrentLocation(new HoodieRecordGlobalLocation(hoodieKey.getPartitionPath(), fileSlice.getBaseInstantTime(), fileSlice.getFileId()));
     hoodieRecord.seal();
 
diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/IndexRecord.java b/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/IndexRecord.java
index 2fe83b71ca98f..edae0389b8aca 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/IndexRecord.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/IndexRecord.java
@@ -18,16 +18,22 @@
 
 package org.apache.hudi.sink.bootstrap;
 
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
 
 /**
  * The index record.
  */
-public class IndexRecord<T extends HoodieRecordPayload> extends HoodieRecord<T> {
+public class IndexRecord<T extends HoodieRecordPayload> extends HoodieAvroRecord<T> {
   private static final long serialVersionUID = 1L;
 
   public IndexRecord(HoodieRecord<T> record) {
     super(record);
   }
+
+  @Override
+  public HoodieRecord<T> newInstance() {
+    return new IndexRecord<>(this);
+  }
 }
\ No newline at end of file
diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/compact/CompactFunction.java b/hudi-flink/src/main/java/org/apache/hudi/sink/compact/CompactFunction.java
index 560b5ffbad305..a43fcd5ad4bf9 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/sink/compact/CompactFunction.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/sink/compact/CompactFunction.java
@@ -21,9 +21,11 @@
 import org.apache.hudi.client.HoodieFlinkWriteClient;
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.common.model.CompactionOperation;
+import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.sink.utils.NonThrownExecutor;
 import org.apache.hudi.table.HoodieFlinkCopyOnWriteTable;
 import org.apache.hudi.table.action.compact.HoodieFlinkMergeOnReadTableCompactor;
+import org.apache.hudi.util.CompactionUtil;
 import org.apache.hudi.util.StreamerUtil;
 
 import org.apache.flink.annotation.VisibleForTesting;
@@ -51,7 +53,7 @@ public class CompactFunction extends ProcessFunction<CompactionPlanEvent, Compac
   /**
    * Write Client.
    */
-  private transient HoodieFlinkWriteClient writeClient;
+  private transient HoodieFlinkWriteClient<?> writeClient;
 
   /**
    * Whether to execute compaction asynchronously.
@@ -89,21 +91,24 @@ public void processElement(CompactionPlanEvent event, Context context, Collector
     if (asyncCompaction) {
       // executes the compaction task asynchronously to not block the checkpoint barrier propagate.
       executor.execute(
-          () -> doCompaction(instantTime, compactionOperation, collector),
+          () -> doCompaction(instantTime, compactionOperation, collector, reloadWriteConfig()),
           (errMsg, t) -> collector.collect(new CompactionCommitEvent(instantTime, compactionOperation.getFileId(), taskID)),
           "Execute compaction for instant %s from task %d", instantTime, taskID);
     } else {
       // executes the compaction task synchronously for batch mode.
       LOG.info("Execute compaction for instant {} from task {}", instantTime, taskID);
-      doCompaction(instantTime, compactionOperation, collector);
+      doCompaction(instantTime, compactionOperation, collector, writeClient.getConfig());
     }
   }
 
-  private void doCompaction(String instantTime, CompactionOperation compactionOperation, Collector<CompactionCommitEvent> collector) throws IOException {
-    HoodieFlinkMergeOnReadTableCompactor compactor = new HoodieFlinkMergeOnReadTableCompactor();
+  private void doCompaction(String instantTime,
+                            CompactionOperation compactionOperation,
+                            Collector<CompactionCommitEvent> collector,
+                            HoodieWriteConfig writeConfig) throws IOException {
+    HoodieFlinkMergeOnReadTableCompactor<?> compactor = new HoodieFlinkMergeOnReadTableCompactor<>();
     List<WriteStatus> writeStatuses = compactor.compact(
         new HoodieFlinkCopyOnWriteTable<>(
-            writeClient.getConfig(),
+            writeConfig,
             writeClient.getEngineContext(),
             writeClient.getHoodieTable().getMetaClient()),
         writeClient.getHoodieTable().getMetaClient(),
@@ -114,6 +119,12 @@ private void doCompaction(String instantTime, CompactionOperation compactionOper
     collector.collect(new CompactionCommitEvent(instantTime, compactionOperation.getFileId(), writeStatuses, taskID));
   }
 
+  private HoodieWriteConfig reloadWriteConfig() throws Exception {
+    HoodieWriteConfig writeConfig = writeClient.getConfig();
+    CompactionUtil.setAvroSchema(writeConfig, writeClient.getHoodieTable().getMetaClient());
+    return writeConfig;
+  }
+
   @VisibleForTesting
   public void setExecutor(NonThrownExecutor executor) {
     this.executor = executor;
diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/compact/CompactionCommitSink.java b/hudi-flink/src/main/java/org/apache/hudi/sink/compact/CompactionCommitSink.java
index 53127359cb986..ecd66936e88c3 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/sink/compact/CompactionCommitSink.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/sink/compact/CompactionCommitSink.java
@@ -20,12 +20,15 @@
 
 import org.apache.hudi.avro.model.HoodieCompactionPlan;
 import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.common.data.HoodieList;
+import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.util.CompactionUtils;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.configuration.FlinkOptions;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.sink.CleanFunction;
 import org.apache.hudi.table.HoodieFlinkTable;
+import org.apache.hudi.table.action.compact.CompactHelpers;
 import org.apache.hudi.util.CompactionUtil;
 import org.apache.hudi.util.StreamerUtil;
 
@@ -147,8 +150,11 @@ private void doCommit(String instant, Collection<CompactionCommitEvent> events)
         .flatMap(Collection::stream)
         .collect(Collectors.toList());
 
+    HoodieCommitMetadata metadata = CompactHelpers.getInstance().createCompactionMetadata(
+        table, instant, HoodieList.of(statuses), writeClient.getConfig().getSchema());
+
     // commit the compaction
-    this.writeClient.commitCompaction(instant, statuses, Option.empty());
+    this.writeClient.commitCompaction(instant, metadata, Option.empty());
 
     // Whether to clean up the old log file when compaction
     if (!conf.getBoolean(FlinkOptions.CLEAN_ASYNC_ENABLED)) {
diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/compact/HoodieFlinkCompactor.java b/hudi-flink/src/main/java/org/apache/hudi/sink/compact/HoodieFlinkCompactor.java
index a6161f2c88cf8..546136e416b7f 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/sink/compact/HoodieFlinkCompactor.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/sink/compact/HoodieFlinkCompactor.java
@@ -18,7 +18,7 @@
 
 package org.apache.hudi.sink.compact;
 
-import org.apache.hudi.async.HoodieAsyncService;
+import org.apache.hudi.async.HoodieAsyncTableService;
 import org.apache.hudi.avro.model.HoodieCompactionPlan;
 import org.apache.hudi.client.HoodieFlinkWriteClient;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
@@ -116,7 +116,7 @@ public static FlinkCompactionConfig getFlinkCompactionConfig(String[] args) {
   /**
    * Schedules compaction in service.
    */
-  public static class AsyncCompactionService extends HoodieAsyncService {
+  public static class AsyncCompactionService extends HoodieAsyncTableService {
     private static final long serialVersionUID = 1L;
 
     /**
@@ -173,6 +173,7 @@ public AsyncCompactionService(FlinkCompactionConfig cfg, Configuration conf, Str
       CompactionUtil.inferChangelogMode(conf, metaClient);
 
       this.writeClient = StreamerUtil.createWriteClient(conf);
+      this.writeConfig = writeClient.getConfig();
       this.table = writeClient.getHoodieTable();
     }
 
diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/BucketAssignFunction.java b/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/BucketAssignFunction.java
index 73fd6685539fb..d01db962c9ba4 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/BucketAssignFunction.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/BucketAssignFunction.java
@@ -22,6 +22,7 @@
 import org.apache.hudi.client.common.HoodieFlinkEngineContext;
 import org.apache.hudi.common.config.SerializableConfiguration;
 import org.apache.hudi.common.model.BaseAvroPayload;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordGlobalLocation;
@@ -180,7 +181,7 @@ private void processRecord(HoodieRecord<?> record, Collector<O> out) throws Exce
         if (globalIndex) {
           // if partition path changes, emit a delete record for old partition path,
           // then update the index state using location with new partition path.
-          HoodieRecord<?> deleteRecord = new HoodieRecord<>(new HoodieKey(recordKey, oldLoc.getPartitionPath()),
+          HoodieRecord<?> deleteRecord = new HoodieAvroRecord<>(new HoodieKey(recordKey, oldLoc.getPartitionPath()),
               payloadCreation.createDeletePayload((BaseAvroPayload) record.getData()));
           deleteRecord.setCurrentLocation(oldLoc.toLocal("U"));
           deleteRecord.seal();
diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/BucketIndexPartitioner.java b/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/BucketIndexPartitioner.java
new file mode 100644
index 0000000000000..ab46b0317e477
--- /dev/null
+++ b/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/BucketIndexPartitioner.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.sink.partitioner;
+
+import org.apache.flink.api.common.functions.Partitioner;
+import org.apache.hudi.common.model.HoodieKey;
+import org.apache.hudi.index.bucket.BucketIdentifier;
+
+/**
+ * Bucket index input partitioner.
+ * The fields to hash can be a subset of the primary key fields.
+ *
+ * @param <T> The type of obj to hash
+ */
+public class BucketIndexPartitioner<T extends HoodieKey> implements Partitioner<T> {
+
+  private final int bucketNum;
+  private final String indexKeyFields;
+
+  public BucketIndexPartitioner(int bucketNum, String indexKeyFields) {
+    this.bucketNum = bucketNum;
+    this.indexKeyFields = indexKeyFields;
+  }
+
+  @Override
+  public int partition(HoodieKey key, int numPartitions) {
+    int curBucket = BucketIdentifier.getBucketId(key, indexKeyFields, bucketNum);
+    return BucketIdentifier.mod(curBucket, numPartitions);
+  }
+}
diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/profile/DeltaWriteProfile.java b/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/profile/DeltaWriteProfile.java
index 97b6b238814c7..aad775a356423 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/profile/DeltaWriteProfile.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/profile/DeltaWriteProfile.java
@@ -59,7 +59,7 @@ protected List<SmallFile> smallFilesProfile(String partitionPath) {
       List<FileSlice> allSmallFileSlices = new ArrayList<>();
       // If we can index log files, we can add more inserts to log files for fileIds including those under
       // pending compaction.
-      List<FileSlice> allFileSlices = fsView.getLatestFileSlicesBeforeOrOn(partitionPath, latestCommitTime.getTimestamp(), true)
+      List<FileSlice> allFileSlices = fsView.getLatestFileSlicesBeforeOrOn(partitionPath, latestCommitTime.getTimestamp(), false)
           .collect(Collectors.toList());
       for (FileSlice fileSlice : allFileSlices) {
         if (isSmallFile(fileSlice)) {
diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/profile/WriteProfile.java b/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/profile/WriteProfile.java
index 84fcd03f0833b..fdb8152b00577 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/profile/WriteProfile.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/profile/WriteProfile.java
@@ -180,7 +180,14 @@ public synchronized List<SmallFile> getSmallFiles(String partitionPath) {
     if (smallFilesMap.containsKey(partitionPath)) {
       return smallFilesMap.get(partitionPath);
     }
-    List<SmallFile> smallFiles = smallFilesProfile(partitionPath);
+
+    List<SmallFile> smallFiles = new ArrayList<>();
+    if (config.getParquetSmallFileLimit() <= 0) {
+      this.smallFilesMap.put(partitionPath, smallFiles);
+      return smallFiles;
+    }
+
+    smallFiles = smallFilesProfile(partitionPath);
     this.smallFilesMap.put(partitionPath, smallFiles);
     return smallFiles;
   }
diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/profile/WriteProfiles.java b/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/profile/WriteProfiles.java
index e8aafd830f10f..405522802c368 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/profile/WriteProfiles.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/profile/WriteProfiles.java
@@ -18,6 +18,10 @@
 
 package org.apache.hudi.sink.partitioner.profile;
 
+import org.apache.flink.core.fs.Path;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hudi.client.common.HoodieFlinkEngineContext;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
@@ -29,11 +33,6 @@
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils;
 import org.apache.hudi.util.StreamerUtil;
-
-import org.apache.flink.core.fs.Path;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -117,7 +116,7 @@ private static Map<String, FileStatus> getFilesToReadOfInstant(
       HoodieCommitMetadata metadata,
       FileSystem fs,
       HoodieTableType tableType) {
-    return getFilesToRead(metadata, basePath.toString(), tableType).entrySet().stream()
+    return getFilesToRead(fs.getConf(), metadata, basePath.toString(), tableType).entrySet().stream()
         // filter out the file paths that does not exist, some files may be cleaned by
         // the cleaner.
         .filter(entry -> {
@@ -133,14 +132,16 @@ private static Map<String, FileStatus> getFilesToReadOfInstant(
   }
 
   private static Map<String, FileStatus> getFilesToRead(
+      Configuration hadoopConf,
       HoodieCommitMetadata metadata,
       String basePath,
-      HoodieTableType tableType) {
+      HoodieTableType tableType
+  ) {
     switch (tableType) {
       case COPY_ON_WRITE:
-        return metadata.getFileIdToFileStatus(basePath);
+        return metadata.getFileIdToFileStatus(hadoopConf, basePath);
       case MERGE_ON_READ:
-        return metadata.getFullPathToFileStatus(basePath);
+        return metadata.getFullPathToFileStatus(hadoopConf, basePath);
       default:
         throw new AssertionError();
     }
diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/transform/RowDataToHoodieFunction.java b/hudi-flink/src/main/java/org/apache/hudi/sink/transform/RowDataToHoodieFunction.java
index b600a5d2f50e4..bfc7d7d62ad45 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/sink/transform/RowDataToHoodieFunction.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/sink/transform/RowDataToHoodieFunction.java
@@ -18,11 +18,11 @@
 
 package org.apache.hudi.sink.transform;
 
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieOperation;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
-import org.apache.hudi.configuration.FlinkOptions;
 import org.apache.hudi.keygen.KeyGenerator;
 import org.apache.hudi.keygen.factory.HoodieAvroKeyGeneratorFactory;
 import org.apache.hudi.sink.utils.PayloadCreation;
@@ -87,7 +87,7 @@ public void open(Configuration parameters) throws Exception {
     this.converter = RowDataToAvroConverters.createConverter(this.rowType);
     this.keyGenerator =
         HoodieAvroKeyGeneratorFactory
-            .createKeyGenerator(flinkConf2TypedProperties(FlinkOptions.flatOptions(this.config)));
+            .createKeyGenerator(flinkConf2TypedProperties(this.config));
     this.payloadCreation = PayloadCreation.instance(config);
   }
 
@@ -111,6 +111,6 @@ private HoodieRecord toHoodieRecord(I record) throws Exception {
 
     HoodieRecordPayload payload = payloadCreation.createPayload(gr);
     HoodieOperation operation = HoodieOperation.fromValue(record.getRowKind().toByteValue());
-    return new HoodieRecord<>(hoodieKey, payload, operation);
+    return new HoodieAvroRecord<>(hoodieKey, payload, operation);
   }
 }
diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/transform/RowDataToHoodieFunctionWithRateLimit.java b/hudi-flink/src/main/java/org/apache/hudi/sink/transform/RowDataToHoodieFunctionWithRateLimit.java
index 4526c6ff9ea98..3d42ad87d908e 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/sink/transform/RowDataToHoodieFunctionWithRateLimit.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/sink/transform/RowDataToHoodieFunctionWithRateLimit.java
@@ -22,7 +22,7 @@
 import org.apache.hudi.configuration.FlinkOptions;
 
 import org.apache.flink.configuration.Configuration;
-import org.apache.flink.shaded.guava18.com.google.common.util.concurrent.RateLimiter;
+import org.apache.flink.shaded.guava30.com.google.common.util.concurrent.RateLimiter;
 import org.apache.flink.table.data.RowData;
 import org.apache.flink.table.types.logical.RowType;
 
diff --git a/hudi-flink/src/main/java/org/apache/hudi/sink/utils/Pipelines.java b/hudi-flink/src/main/java/org/apache/hudi/sink/utils/Pipelines.java
index 5f156e839f1e3..ae8b4f21300a2 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/sink/utils/Pipelines.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/sink/utils/Pipelines.java
@@ -18,9 +18,11 @@
 
 package org.apache.hudi.sink.utils;
 
+import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.configuration.FlinkOptions;
 import org.apache.hudi.configuration.OptionsResolver;
+import org.apache.hudi.sink.BucketStreamWriteOperator;
 import org.apache.hudi.sink.CleanFunction;
 import org.apache.hudi.sink.StreamWriteOperator;
 import org.apache.hudi.sink.append.AppendWriteOperator;
@@ -36,14 +38,18 @@
 import org.apache.hudi.sink.compact.CompactionPlanEvent;
 import org.apache.hudi.sink.compact.CompactionPlanOperator;
 import org.apache.hudi.sink.partitioner.BucketAssignFunction;
+import org.apache.hudi.sink.partitioner.BucketIndexPartitioner;
 import org.apache.hudi.sink.transform.RowDataToHoodieFunctions;
 import org.apache.hudi.table.format.FilePathUtils;
 
+import org.apache.flink.api.common.functions.Partitioner;
 import org.apache.flink.api.common.typeinfo.TypeInformation;
 import org.apache.flink.configuration.Configuration;
+import org.apache.flink.runtime.state.KeyGroupRangeAssignment;
 import org.apache.flink.streaming.api.datastream.DataStream;
 import org.apache.flink.streaming.api.datastream.DataStreamSink;
 import org.apache.flink.streaming.api.functions.sink.SinkFunction;
+import org.apache.flink.streaming.api.graph.StreamGraphGenerator;
 import org.apache.flink.streaming.api.operators.KeyedProcessOperator;
 import org.apache.flink.streaming.api.operators.ProcessOperator;
 import org.apache.flink.table.data.RowData;
@@ -55,6 +61,31 @@
  */
 public class Pipelines {
 
+  /**
+   * Bulk insert the input dataset at once.
+   *
+   * <p>By default, the input dataset would shuffle by the partition path first then
+   * sort by the partition path before passing around to the write function.
+   * The whole pipeline looks like the following:
+   *
+   * <pre>
+   *      | input1 | ===\     /=== |sorter| === | task1 | (p1, p2)
+   *                   shuffle
+   *      | input2 | ===/     \=== |sorter| === | task2 | (p3, p4)
+   *
+   *      Note: Both input1 and input2's dataset come from partitions: p1, p2, p3, p4
+   * </pre>
+   *
+   * <p>The write task switches to new file handle each time it receives a record
+   * from the different partition path, the shuffle and sort would reduce small files.
+   *
+   * <p>The bulk insert should be run in batch execution mode.
+   *
+   * @param conf       The configuration
+   * @param rowType    The input row type
+   * @param dataStream The input data stream
+   * @return the bulk insert data stream sink
+   */
   public static DataStreamSink<Object> bulkInsert(Configuration conf, RowType rowType, DataStream<RowData> dataStream) {
     WriteOperatorFactory<RowData> operatorFactory = BulkInsertWriteOperator.getFactory(conf, rowType);
 
@@ -64,7 +95,11 @@ public static DataStreamSink<Object> bulkInsert(Configuration conf, RowType rowT
       if (conf.getBoolean(FlinkOptions.WRITE_BULK_INSERT_SHUFFLE_BY_PARTITION)) {
 
         // shuffle by partition keys
-        dataStream = dataStream.keyBy(rowDataKeyGen::getPartitionPath);
+        // use #partitionCustom instead of #keyBy to avoid duplicate sort operations,
+        // see BatchExecutionUtils#applyBatchExecutionSettings for details.
+        Partitioner<String> partitioner = (key, channels) ->
+            KeyGroupRangeAssignment.assignKeyToParallelOperator(key, StreamGraphGenerator.DEFAULT_LOWER_BOUND_MAX_PARALLELISM, channels);
+        dataStream = dataStream.partitionCustom(partitioner, rowDataKeyGen::getPartitionPath);
       }
       if (conf.getBoolean(FlinkOptions.WRITE_BULK_INSERT_SORT_BY_PARTITION)) {
         SortOperatorGen sortOperatorGen = new SortOperatorGen(rowType, partitionFields);
@@ -88,6 +123,27 @@ public static DataStreamSink<Object> bulkInsert(Configuration conf, RowType rowT
         .name("dummy");
   }
 
+  /**
+   * Insert the dataset with append mode(no upsert or deduplication).
+   *
+   * <p>The input dataset would be rebalanced among the write tasks:
+   *
+   * <pre>
+   *      | input1 | ===\     /=== | task1 | (p1, p2, p3, p4)
+   *                   shuffle
+   *      | input2 | ===/     \=== | task2 | (p1, p2, p3, p4)
+   *
+   *      Note: Both input1 and input2's dataset come from partitions: p1, p2, p3, p4
+   * </pre>
+   *
+   * <p>The write task switches to new file handle each time it receives a record
+   * from the different partition path, so there may be many small files.
+   *
+   * @param conf       The configuration
+   * @param rowType    The input row type
+   * @param dataStream The input data stream
+   * @return the appending data stream sink
+   */
   public static DataStreamSink<Object> append(Configuration conf, RowType rowType, DataStream<RowData> dataStream) {
     WriteOperatorFactory<RowData> operatorFactory = AppendWriteOperator.getFactory(conf, rowType);
 
@@ -101,6 +157,8 @@ public static DataStreamSink<Object> append(Configuration conf, RowType rowType,
 
   /**
    * Constructs bootstrap pipeline as streaming.
+   * The bootstrap operator loads the existing data index (primary key to file id mapping),
+   * then sends the indexing data set to subsequent operator(usually the bucket assign operator).
    */
   public static DataStream<HoodieRecord> bootstrap(
       Configuration conf,
@@ -112,6 +170,8 @@ public static DataStream<HoodieRecord> bootstrap(
 
   /**
    * Constructs bootstrap pipeline.
+   * The bootstrap operator loads the existing data index (primary key to file id mapping),
+   * then send the indexing data set to subsequent operator(usually the bucket assign operator).
    *
    * @param conf The configuration
    * @param rowType The row type
@@ -128,7 +188,7 @@ public static DataStream<HoodieRecord> bootstrap(
       boolean bounded,
       boolean overwrite) {
     final boolean globalIndex = conf.getBoolean(FlinkOptions.INDEX_GLOBAL_ENABLED);
-    if (overwrite) {
+    if (overwrite || OptionsResolver.isBucketIndexType(conf)) {
       return rowDataToHoodieRecord(conf, rowType, dataStream);
     } else if (bounded && !globalIndex && OptionsResolver.isPartitionedTable(conf)) {
       return boundedBootstrap(conf, rowType, defaultParallelism, dataStream);
@@ -158,6 +218,11 @@ private static DataStream<HoodieRecord> streamBootstrap(
     return dataStream1;
   }
 
+  /**
+   * Constructs bootstrap pipeline for batch execution mode.
+   * The indexing data set is loaded before the actual data write
+   * in order to support batch UPSERT.
+   */
   private static DataStream<HoodieRecord> boundedBootstrap(
       Configuration conf,
       RowType rowType,
@@ -177,28 +242,85 @@ private static DataStream<HoodieRecord> boundedBootstrap(
         .uid("uid_batch_index_bootstrap_" + conf.getString(FlinkOptions.TABLE_NAME));
   }
 
+  /**
+   * Transforms the row data to hoodie records.
+   */
   public static DataStream<HoodieRecord> rowDataToHoodieRecord(Configuration conf, RowType rowType, DataStream<RowData> dataStream) {
     return dataStream.map(RowDataToHoodieFunctions.create(rowType, conf), TypeInformation.of(HoodieRecord.class));
   }
 
+  /**
+   * The streaming write pipeline.
+   *
+   * <p>The input dataset shuffles by the primary key first then
+   * shuffles by the file group ID before passing around to the write function.
+   * The whole pipeline looks like the following:
+   *
+   * <pre>
+   *      | input1 | ===\     /=== | bucket assigner | ===\     /=== | task1 |
+   *                   shuffle(by PK)                    shuffle(by bucket ID)
+   *      | input2 | ===/     \=== | bucket assigner | ===/     \=== | task2 |
+   *
+   *      Note: a file group must be handled by one write task to avoid write conflict.
+   * </pre>
+   *
+   * <p>The bucket assigner assigns the inputs to suitable file groups, the write task caches
+   * and flushes the data set to disk.
+   *
+   * @param conf       The configuration
+   * @param defaultParallelism The default parallelism
+   * @param dataStream The input data stream
+   * @return the stream write data stream pipeline
+   */
   public static DataStream<Object> hoodieStreamWrite(Configuration conf, int defaultParallelism, DataStream<HoodieRecord> dataStream) {
-    WriteOperatorFactory<HoodieRecord> operatorFactory = StreamWriteOperator.getFactory(conf);
-    return dataStream
+    if (OptionsResolver.isBucketIndexType(conf)) {
+      WriteOperatorFactory<HoodieRecord> operatorFactory = BucketStreamWriteOperator.getFactory(conf);
+      int bucketNum = conf.getInteger(FlinkOptions.BUCKET_INDEX_NUM_BUCKETS);
+      String indexKeyFields = conf.getString(FlinkOptions.INDEX_KEY_FIELD);
+      BucketIndexPartitioner<HoodieKey> partitioner = new BucketIndexPartitioner<>(bucketNum, indexKeyFields);
+      return dataStream.partitionCustom(partitioner, HoodieRecord::getKey)
+        .transform("bucket_write", TypeInformation.of(Object.class), operatorFactory)
+        .uid("uid_bucket_write" + conf.getString(FlinkOptions.TABLE_NAME))
+        .setParallelism(conf.getInteger(FlinkOptions.WRITE_TASKS));
+    } else {
+      WriteOperatorFactory<HoodieRecord> operatorFactory = StreamWriteOperator.getFactory(conf);
+      return dataStream
         // Key-by record key, to avoid multiple subtasks write to a bucket at the same time
         .keyBy(HoodieRecord::getRecordKey)
         .transform(
-            "bucket_assigner",
-            TypeInformation.of(HoodieRecord.class),
-            new KeyedProcessOperator<>(new BucketAssignFunction<>(conf)))
+          "bucket_assigner",
+          TypeInformation.of(HoodieRecord.class),
+          new KeyedProcessOperator<>(new BucketAssignFunction<>(conf)))
         .uid("uid_bucket_assigner_" + conf.getString(FlinkOptions.TABLE_NAME))
         .setParallelism(conf.getOptional(FlinkOptions.BUCKET_ASSIGN_TASKS).orElse(defaultParallelism))
         // shuffle by fileId(bucket id)
         .keyBy(record -> record.getCurrentLocation().getFileId())
-        .transform("hoodie_stream_write", TypeInformation.of(Object.class), operatorFactory)
-        .uid("uid_hoodie_stream_write" + conf.getString(FlinkOptions.TABLE_NAME))
+        .transform("stream_write", TypeInformation.of(Object.class), operatorFactory)
+        .uid("uid_stream_write" + conf.getString(FlinkOptions.TABLE_NAME))
         .setParallelism(conf.getInteger(FlinkOptions.WRITE_TASKS));
+    }
   }
 
+  /**
+   * The compaction tasks pipeline.
+   *
+   * <p>The compaction plan operator monitors the new compaction plan on the timeline
+   * then distributes the sub-plans to the compaction tasks. The compaction task then
+   * handle over the metadata to commit task for compaction transaction commit.
+   * The whole pipeline looks like the following:
+   *
+   * <pre>
+   *                                           /=== | task1 | ===\
+   *      | plan generation | ===> re-balance                      | commit |
+   *                                           \=== | task2 | ===/
+   *
+   *      Note: both the compaction plan generation task and commission task are singleton.
+   * </pre>
+   *
+   * @param conf       The configuration
+   * @param dataStream The input data stream
+   * @return the compaction pipeline
+   */
   public static DataStreamSink<CompactionCommitEvent> compact(Configuration conf, DataStream<Object> dataStream) {
     return dataStream.transform("compact_plan_generate",
         TypeInformation.of(CompactionPlanEvent.class),
diff --git a/hudi-flink/src/main/java/org/apache/hudi/source/IncrementalInputSplits.java b/hudi-flink/src/main/java/org/apache/hudi/source/IncrementalInputSplits.java
index 58c38ef56744e..02e0e253cf577 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/source/IncrementalInputSplits.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/source/IncrementalInputSplits.java
@@ -250,22 +250,12 @@ private List<HoodieCommitMetadata> getArchivedMetadata(
       InstantRange instantRange,
       HoodieTimeline commitTimeline,
       String tableName) {
-    if (instantRange == null || commitTimeline.isBeforeTimelineStarts(instantRange.getStartInstant())) {
-      // read the archived metadata if:
-      // 1. the start commit is 'earliest';
-      // 2. the start instant is archived.
-      HoodieArchivedTimeline archivedTimeline = metaClient.getArchivedTimeline();
+    if (commitTimeline.isBeforeTimelineStarts(instantRange.getStartInstant())) {
+      // read the archived metadata if the start instant is archived.
+      HoodieArchivedTimeline archivedTimeline = metaClient.getArchivedTimeline(instantRange.getStartInstant());
       HoodieTimeline archivedCompleteTimeline = archivedTimeline.getCommitsTimeline().filterCompletedInstants();
       if (!archivedCompleteTimeline.empty()) {
-        final String endTs = archivedCompleteTimeline.lastInstant().get().getTimestamp();
         Stream<HoodieInstant> instantStream = archivedCompleteTimeline.getInstants();
-        if (instantRange != null) {
-          archivedTimeline.loadInstantDetailsInMemory(instantRange.getStartInstant(), endTs);
-          instantStream = instantStream.filter(s -> HoodieTimeline.compareTimestamps(s.getTimestamp(), GREATER_THAN_OR_EQUALS, instantRange.getStartInstant()));
-        } else {
-          final String startTs = archivedCompleteTimeline.firstInstant().get().getTimestamp();
-          archivedTimeline.loadInstantDetailsInMemory(startTs, endTs);
-        }
         return maySkipCompaction(instantStream)
             .map(instant -> WriteProfiles.getCommitMetadata(tableName, path, instant, archivedTimeline)).collect(Collectors.toList());
       }
diff --git a/hudi-flink/src/main/java/org/apache/hudi/source/StreamReadOperator.java b/hudi-flink/src/main/java/org/apache/hudi/source/StreamReadOperator.java
index 013043384d3b5..c3f43422f1d1e 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/source/StreamReadOperator.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/source/StreamReadOperator.java
@@ -21,6 +21,7 @@
 import org.apache.hudi.table.format.mor.MergeOnReadInputFormat;
 import org.apache.hudi.table.format.mor.MergeOnReadInputSplit;
 
+import org.apache.flink.api.common.operators.MailboxExecutor;
 import org.apache.flink.api.common.state.ListState;
 import org.apache.flink.api.common.state.ListStateDescriptor;
 import org.apache.flink.runtime.state.JavaSerializer;
@@ -29,7 +30,6 @@
 import org.apache.flink.streaming.api.functions.source.SourceFunction;
 import org.apache.flink.streaming.api.operators.AbstractStreamOperator;
 import org.apache.flink.streaming.api.operators.AbstractStreamOperatorFactory;
-import org.apache.flink.streaming.api.operators.MailboxExecutor;
 import org.apache.flink.streaming.api.operators.OneInputStreamOperator;
 import org.apache.flink.streaming.api.operators.OneInputStreamOperatorFactory;
 import org.apache.flink.streaming.api.operators.StreamOperator;
@@ -54,7 +54,7 @@
  * StreamReadMonitoringFunction}. Contrary to the {@link StreamReadMonitoringFunction} which has a parallelism of 1,
  * this operator can have multiple parallelism.
  *
- * <p>As soon as an input split {@link MergeOnReadInputSplit} is received, it is put in a queue,
+ * <p>As soon as an input split {@link MergeOnReadInputSplit} is received, it is put into a queue,
  * the {@link MailboxExecutor} read the actual data of the split.
  * This architecture allows the separation of split reading from processing the checkpoint barriers,
  * thus removing any potential back-pressure.
@@ -64,7 +64,7 @@ public class StreamReadOperator extends AbstractStreamOperator<RowData>
 
   private static final Logger LOG = LoggerFactory.getLogger(StreamReadOperator.class);
 
-  private static final int MINI_BATCH_SIZE = 1000;
+  private static final int MINI_BATCH_SIZE = 2048;
 
   // It's the same thread that runs this operator and checkpoint actions. Use this executor to schedule only
   // splits for subsequent reading, so that a new checkpoint could be triggered without blocking a long time
@@ -118,10 +118,10 @@ public void initializeState(StateInitializationContext context) throws Exception
         getOperatorConfig().getTimeCharacteristic(),
         getProcessingTimeService(),
         new Object(), // no actual locking needed
-        getContainingTask().getStreamStatusMaintainer(),
         output,
         getRuntimeContext().getExecutionConfig().getAutoWatermarkInterval(),
-        -1);
+        -1,
+        true);
 
     // Enqueue to process the recovered input splits.
     enqueueProcessSplits();
@@ -205,8 +205,8 @@ public void processWatermark(Watermark mark) {
   }
 
   @Override
-  public void dispose() throws Exception {
-    super.dispose();
+  public void close() throws Exception {
+    super.close();
 
     if (format != null) {
       format.close();
@@ -218,8 +218,8 @@ public void dispose() throws Exception {
   }
 
   @Override
-  public void close() throws Exception {
-    super.close();
+  public void finish() throws Exception {
+    super.finish();
     output.close();
     if (sourceContext != null) {
       sourceContext.emitWatermark(Watermark.MAX_WATERMARK);
diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableFactory.java b/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableFactory.java
index 5299551fccd38..7543382e19df4 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableFactory.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableFactory.java
@@ -24,9 +24,11 @@
 import org.apache.hudi.configuration.OptionsResolver;
 import org.apache.hudi.exception.HoodieValidationException;
 import org.apache.hudi.hive.MultiPartKeysValueExtractor;
+import org.apache.hudi.index.HoodieIndex;
 import org.apache.hudi.keygen.ComplexAvroKeyGenerator;
 import org.apache.hudi.keygen.NonpartitionedAvroKeyGenerator;
 import org.apache.hudi.keygen.TimestampBasedAvroKeyGenerator;
+import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
 import org.apache.hudi.util.AvroSchemaConverter;
 import org.apache.hudi.util.DataTypeUtils;
 
@@ -198,6 +200,11 @@ private static void setupHoodieKeyOptions(Configuration conf, CatalogTable table
       // the PARTITIONED BY syntax always has higher priority than option FlinkOptions#PARTITION_PATH_FIELD
       conf.setString(FlinkOptions.PARTITION_PATH_FIELD, String.join(",", partitionKeys));
     }
+    // set index key for bucket index if not defined
+    if (conf.getString(FlinkOptions.INDEX_TYPE).equals(HoodieIndex.IndexType.BUCKET.name())
+        && conf.getString(FlinkOptions.INDEX_KEY_FIELD).isEmpty()) {
+      conf.setString(FlinkOptions.INDEX_KEY_FIELD, conf.getString(FlinkOptions.RECORD_KEY_FIELD));
+    }
     // tweak the key gen class if possible
     final String[] partitions = conf.getString(FlinkOptions.PARTITION_PATH_FIELD).split(",");
     final String[] pks = conf.getString(FlinkOptions.RECORD_KEY_FIELD).split(",");
@@ -238,22 +245,22 @@ public static void setupTimestampKeygenOptions(Configuration conf, DataType fiel
       int precision = DataTypeUtils.precision(fieldType.getLogicalType());
       if (precision == 0) {
         // seconds
-        conf.setString(TimestampBasedAvroKeyGenerator.Config.TIMESTAMP_TYPE_FIELD_PROP,
+        conf.setString(KeyGeneratorOptions.Config.TIMESTAMP_TYPE_FIELD_PROP,
             TimestampBasedAvroKeyGenerator.TimestampType.UNIX_TIMESTAMP.name());
       } else if (precision == 3) {
         // milliseconds
-        conf.setString(TimestampBasedAvroKeyGenerator.Config.TIMESTAMP_TYPE_FIELD_PROP,
+        conf.setString(KeyGeneratorOptions.Config.TIMESTAMP_TYPE_FIELD_PROP,
             TimestampBasedAvroKeyGenerator.TimestampType.EPOCHMILLISECONDS.name());
       }
       String partitionFormat = conf.getOptional(FlinkOptions.PARTITION_FORMAT).orElse(FlinkOptions.PARTITION_FORMAT_HOUR);
-      conf.setString(TimestampBasedAvroKeyGenerator.Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP, partitionFormat);
+      conf.setString(KeyGeneratorOptions.Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP, partitionFormat);
     } else {
-      conf.setString(TimestampBasedAvroKeyGenerator.Config.TIMESTAMP_TYPE_FIELD_PROP,
+      conf.setString(KeyGeneratorOptions.Config.TIMESTAMP_TYPE_FIELD_PROP,
           TimestampBasedAvroKeyGenerator.TimestampType.DATE_STRING.name());
       String partitionFormat = conf.getOptional(FlinkOptions.PARTITION_FORMAT).orElse(FlinkOptions.PARTITION_FORMAT_DAY);
-      conf.setString(TimestampBasedAvroKeyGenerator.Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP, partitionFormat);
+      conf.setString(KeyGeneratorOptions.Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP, partitionFormat);
     }
-    conf.setString(TimestampBasedAvroKeyGenerator.Config.TIMESTAMP_OUTPUT_TIMEZONE_FORMAT_PROP, "UTC");
+    conf.setString(KeyGeneratorOptions.Config.TIMESTAMP_OUTPUT_TIMEZONE_FORMAT_PROP, "UTC");
   }
 
   /**
diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSink.java b/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSink.java
index c1e6d0c28aa06..bbbc67985c8af 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSink.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSink.java
@@ -84,7 +84,6 @@ public SinkRuntimeProvider getSinkRuntimeProvider(Context context) {
       // default parallelism
       int parallelism = dataStream.getExecutionConfig().getParallelism();
       DataStream<Object> pipeline;
-
       // bootstrap
       final DataStream<HoodieRecord> hoodieRecordDataStream =
           Pipelines.bootstrap(conf, rowType, parallelism, dataStream, context.isBounded(), overwrite);
diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSource.java b/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSource.java
index 259c2e40cd477..3efd1d5612f15 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSource.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSource.java
@@ -452,8 +452,8 @@ private Schema inferSchemaFromDdl() {
   @VisibleForTesting
   public Schema getTableAvroSchema() {
     try {
-      TableSchemaResolver schemaUtil = new TableSchemaResolver(metaClient, conf.getBoolean(FlinkOptions.CHANGELOG_ENABLED));
-      return schemaUtil.getTableAvroSchema();
+      TableSchemaResolver schemaResolver = new TableSchemaResolver(metaClient);
+      return schemaResolver.getTableAvroSchema();
     } catch (Throwable e) {
       // table exists but has no written data
       LOG.warn("Get table avro schema error, use schema from the DDL instead", e);
diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieCatalog.java b/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieCatalog.java
index fddaaba66e291..3317967006101 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieCatalog.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/table/catalog/HoodieCatalog.java
@@ -376,7 +376,7 @@ public List<CatalogPartitionSpec> listPartitionsByFilter(ObjectPath tablePath, L
   @Override
   public CatalogPartition getPartition(ObjectPath tablePath, CatalogPartitionSpec catalogPartitionSpec)
       throws PartitionNotExistException, CatalogException {
-    return null;
+    throw new PartitionNotExistException(getName(), tablePath, catalogPartitionSpec);
   }
 
   @Override
@@ -409,7 +409,7 @@ public List<String> listFunctions(String databaseName) throws DatabaseNotExistEx
 
   @Override
   public CatalogFunction getFunction(ObjectPath functionPath) throws FunctionNotExistException, CatalogException {
-    return null;
+    throw new FunctionNotExistException(getName(), functionPath);
   }
 
   @Override
diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/FormatUtils.java b/hudi-flink/src/main/java/org/apache/hudi/table/format/FormatUtils.java
index 2c3318362b053..fa404cc2163ec 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/table/format/FormatUtils.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/table/format/FormatUtils.java
@@ -24,6 +24,7 @@
 import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner;
 import org.apache.hudi.common.table.log.HoodieUnMergedLogRecordScanner;
 import org.apache.hudi.common.util.DefaultSizeEstimator;
+import org.apache.hudi.common.util.Functions;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.queue.BoundedInMemoryExecutor;
 import org.apache.hudi.common.util.queue.BoundedInMemoryQueueProducer;
@@ -50,6 +51,7 @@
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
+import java.util.function.Function;
 
 /**
  * Utilities for format.
@@ -193,8 +195,9 @@ public BoundedMemoryRecords(
           HoodieRealtimeRecordReaderUtils.getMaxCompactionMemoryInBytes(new JobConf(hadoopConf)),
           getParallelProducers(),
           Option.empty(),
-          x -> x,
-          new DefaultSizeEstimator<>());
+          Function.identity(),
+          new DefaultSizeEstimator<>(),
+          Functions.noop());
       // Consumer of this record reader
       this.iterator = this.executor.getQueue().iterator();
       this.scanner = FormatUtils.unMergedLogScanner(split, logSchema, hadoopConf,
diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/ParquetColumnarRowSplitReader.java b/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/ParquetColumnarRowSplitReader.java
index c615283c7c5ad..3cb491cfaf575 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/ParquetColumnarRowSplitReader.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/ParquetColumnarRowSplitReader.java
@@ -18,11 +18,10 @@
 
 package org.apache.hudi.table.format.cow;
 
-import org.apache.hudi.table.format.cow.data.ColumnarRowData;
-import org.apache.hudi.table.format.cow.vector.VectorizedColumnBatch;
-
 import org.apache.flink.formats.parquet.vector.reader.ColumnReader;
+import org.apache.flink.table.data.ColumnarRowData;
 import org.apache.flink.table.data.vector.ColumnVector;
+import org.apache.flink.table.data.vector.VectorizedColumnBatch;
 import org.apache.flink.table.data.vector.writable.WritableColumnVector;
 import org.apache.flink.table.types.logical.LogicalType;
 import org.apache.flink.table.types.logical.LogicalTypeRoot;
diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java b/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java
index 12d63aa974a5d..ca1408dcb7a5c 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/ParquetSplitReaderUtil.java
@@ -22,7 +22,6 @@
 import org.apache.hudi.table.format.cow.vector.HeapArrayVector;
 import org.apache.hudi.table.format.cow.vector.HeapMapColumnVector;
 import org.apache.hudi.table.format.cow.vector.HeapRowColumnVector;
-import org.apache.hudi.table.format.cow.vector.VectorizedColumnBatch;
 import org.apache.hudi.table.format.cow.vector.reader.ArrayColumnReader;
 import org.apache.hudi.table.format.cow.vector.reader.MapColumnReader;
 import org.apache.hudi.table.format.cow.vector.reader.RowColumnReader;
@@ -41,6 +40,7 @@
 import org.apache.flink.table.data.DecimalData;
 import org.apache.flink.table.data.TimestampData;
 import org.apache.flink.table.data.vector.ColumnVector;
+import org.apache.flink.table.data.vector.VectorizedColumnBatch;
 import org.apache.flink.table.data.vector.heap.HeapBooleanVector;
 import org.apache.flink.table.data.vector.heap.HeapByteVector;
 import org.apache.flink.table.data.vector.heap.HeapBytesVector;
diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/data/ColumnarArrayData.java b/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/data/ColumnarArrayData.java
deleted file mode 100644
index a16a4dd8d0142..0000000000000
--- a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/data/ColumnarArrayData.java
+++ /dev/null
@@ -1,272 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hudi.table.format.cow.data;
-
-import org.apache.hudi.table.format.cow.vector.MapColumnVector;
-
-import org.apache.flink.table.data.ArrayData;
-import org.apache.flink.table.data.DecimalData;
-import org.apache.flink.table.data.MapData;
-import org.apache.flink.table.data.RawValueData;
-import org.apache.flink.table.data.RowData;
-import org.apache.flink.table.data.StringData;
-import org.apache.flink.table.data.TimestampData;
-import org.apache.flink.table.data.binary.TypedSetters;
-import org.apache.flink.table.data.vector.ArrayColumnVector;
-import org.apache.flink.table.data.vector.BooleanColumnVector;
-import org.apache.flink.table.data.vector.ByteColumnVector;
-import org.apache.flink.table.data.vector.BytesColumnVector;
-import org.apache.flink.table.data.vector.ColumnVector;
-import org.apache.flink.table.data.vector.DecimalColumnVector;
-import org.apache.flink.table.data.vector.DoubleColumnVector;
-import org.apache.flink.table.data.vector.FloatColumnVector;
-import org.apache.flink.table.data.vector.IntColumnVector;
-import org.apache.flink.table.data.vector.LongColumnVector;
-import org.apache.flink.table.data.vector.RowColumnVector;
-import org.apache.flink.table.data.vector.ShortColumnVector;
-import org.apache.flink.table.data.vector.TimestampColumnVector;
-
-import java.util.Arrays;
-
-/**
- * Columnar array to support access to vector column data.
- *
- * <p>References {@code org.apache.flink.table.data.ColumnarArrayData} to include FLINK-15390.
- */
-public final class ColumnarArrayData implements ArrayData, TypedSetters {
-
-  private final ColumnVector data;
-  private final int offset;
-  private final int numElements;
-
-  public ColumnarArrayData(ColumnVector data, int offset, int numElements) {
-    this.data = data;
-    this.offset = offset;
-    this.numElements = numElements;
-  }
-
-  @Override
-  public int size() {
-    return numElements;
-  }
-
-  @Override
-  public boolean isNullAt(int pos) {
-    return data.isNullAt(offset + pos);
-  }
-
-  @Override
-  public void setNullAt(int pos) {
-    throw new UnsupportedOperationException("Not support the operation!");
-  }
-
-  @Override
-  public boolean getBoolean(int pos) {
-    return ((BooleanColumnVector) data).getBoolean(offset + pos);
-  }
-
-  @Override
-  public byte getByte(int pos) {
-    return ((ByteColumnVector) data).getByte(offset + pos);
-  }
-
-  @Override
-  public short getShort(int pos) {
-    return ((ShortColumnVector) data).getShort(offset + pos);
-  }
-
-  @Override
-  public int getInt(int pos) {
-    return ((IntColumnVector) data).getInt(offset + pos);
-  }
-
-  @Override
-  public long getLong(int pos) {
-    return ((LongColumnVector) data).getLong(offset + pos);
-  }
-
-  @Override
-  public float getFloat(int pos) {
-    return ((FloatColumnVector) data).getFloat(offset + pos);
-  }
-
-  @Override
-  public double getDouble(int pos) {
-    return ((DoubleColumnVector) data).getDouble(offset + pos);
-  }
-
-  @Override
-  public StringData getString(int pos) {
-    BytesColumnVector.Bytes byteArray = getByteArray(pos);
-    return StringData.fromBytes(byteArray.data, byteArray.offset, byteArray.len);
-  }
-
-  @Override
-  public DecimalData getDecimal(int pos, int precision, int scale) {
-    return ((DecimalColumnVector) data).getDecimal(offset + pos, precision, scale);
-  }
-
-  @Override
-  public TimestampData getTimestamp(int pos, int precision) {
-    return ((TimestampColumnVector) data).getTimestamp(offset + pos, precision);
-  }
-
-  @Override
-  public <T> RawValueData<T> getRawValue(int pos) {
-    throw new UnsupportedOperationException("RawValueData is not supported.");
-  }
-
-  @Override
-  public byte[] getBinary(int pos) {
-    BytesColumnVector.Bytes byteArray = getByteArray(pos);
-    if (byteArray.len == byteArray.data.length) {
-      return byteArray.data;
-    } else {
-      return Arrays.copyOfRange(byteArray.data, byteArray.offset, byteArray.len);
-    }
-  }
-
-  @Override
-  public ArrayData getArray(int pos) {
-    return ((ArrayColumnVector) data).getArray(offset + pos);
-  }
-
-  @Override
-  public MapData getMap(int pos) {
-    return ((MapColumnVector) data).getMap(offset + pos);
-  }
-
-  @Override
-  public RowData getRow(int pos, int numFields) {
-    return ((RowColumnVector) data).getRow(offset + pos);
-  }
-
-  @Override
-  public void setBoolean(int pos, boolean value) {
-    throw new UnsupportedOperationException("Not support the operation!");
-  }
-
-  @Override
-  public void setByte(int pos, byte value) {
-    throw new UnsupportedOperationException("Not support the operation!");
-  }
-
-  @Override
-  public void setShort(int pos, short value) {
-    throw new UnsupportedOperationException("Not support the operation!");
-  }
-
-  @Override
-  public void setInt(int pos, int value) {
-    throw new UnsupportedOperationException("Not support the operation!");
-  }
-
-  @Override
-  public void setLong(int pos, long value) {
-    throw new UnsupportedOperationException("Not support the operation!");
-  }
-
-  @Override
-  public void setFloat(int pos, float value) {
-    throw new UnsupportedOperationException("Not support the operation!");
-  }
-
-  @Override
-  public void setDouble(int pos, double value) {
-    throw new UnsupportedOperationException("Not support the operation!");
-  }
-
-  @Override
-  public void setDecimal(int pos, DecimalData value, int precision) {
-    throw new UnsupportedOperationException("Not support the operation!");
-  }
-
-  @Override
-  public void setTimestamp(int pos, TimestampData value, int precision) {
-    throw new UnsupportedOperationException("Not support the operation!");
-  }
-
-  @Override
-  public boolean[] toBooleanArray() {
-    boolean[] res = new boolean[numElements];
-    for (int i = 0; i < numElements; i++) {
-      res[i] = getBoolean(i);
-    }
-    return res;
-  }
-
-  @Override
-  public byte[] toByteArray() {
-    byte[] res = new byte[numElements];
-    for (int i = 0; i < numElements; i++) {
-      res[i] = getByte(i);
-    }
-    return res;
-  }
-
-  @Override
-  public short[] toShortArray() {
-    short[] res = new short[numElements];
-    for (int i = 0; i < numElements; i++) {
-      res[i] = getShort(i);
-    }
-    return res;
-  }
-
-  @Override
-  public int[] toIntArray() {
-    int[] res = new int[numElements];
-    for (int i = 0; i < numElements; i++) {
-      res[i] = getInt(i);
-    }
-    return res;
-  }
-
-  @Override
-  public long[] toLongArray() {
-    long[] res = new long[numElements];
-    for (int i = 0; i < numElements; i++) {
-      res[i] = getLong(i);
-    }
-    return res;
-  }
-
-  @Override
-  public float[] toFloatArray() {
-    float[] res = new float[numElements];
-    for (int i = 0; i < numElements; i++) {
-      res[i] = getFloat(i);
-    }
-    return res;
-  }
-
-  @Override
-  public double[] toDoubleArray() {
-    double[] res = new double[numElements];
-    for (int i = 0; i < numElements; i++) {
-      res[i] = getDouble(i);
-    }
-    return res;
-  }
-
-  private BytesColumnVector.Bytes getByteArray(int pos) {
-    return ((BytesColumnVector) data).getBytes(offset + pos);
-  }
-}
-
diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/data/ColumnarMapData.java b/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/data/ColumnarMapData.java
deleted file mode 100644
index 9792e87ec9365..0000000000000
--- a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/data/ColumnarMapData.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hudi.table.format.cow.data;
-
-import org.apache.flink.table.data.ArrayData;
-import org.apache.flink.table.data.ColumnarArrayData;
-import org.apache.flink.table.data.MapData;
-import org.apache.flink.table.data.vector.ColumnVector;
-
-/**
- * Columnar map to support access to vector column data.
- *
- * <p>Referenced from flink 1.14.0 {@code org.apache.flink.table.data.ColumnarMapData}.
- */
-public final class ColumnarMapData implements MapData {
-
-  private final ColumnVector keyColumnVector;
-  private final ColumnVector valueColumnVector;
-  private final int offset;
-  private final int size;
-
-  public ColumnarMapData(
-      ColumnVector keyColumnVector,
-      ColumnVector valueColumnVector,
-      int offset,
-      int size) {
-    this.keyColumnVector = keyColumnVector;
-    this.valueColumnVector = valueColumnVector;
-    this.offset = offset;
-    this.size = size;
-  }
-
-  @Override
-  public int size() {
-    return size;
-  }
-
-  @Override
-  public ArrayData keyArray() {
-    return new ColumnarArrayData(keyColumnVector, offset, size);
-  }
-
-  @Override
-  public ArrayData valueArray() {
-    return new ColumnarArrayData(valueColumnVector, offset, size);
-  }
-
-  @Override
-  public boolean equals(Object o) {
-    throw new UnsupportedOperationException(
-        "ColumnarMapData do not support equals, please compare fields one by one!");
-  }
-
-  @Override
-  public int hashCode() {
-    throw new UnsupportedOperationException(
-        "ColumnarMapData do not support hashCode, please hash fields one by one!");
-  }
-}
diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/data/ColumnarRowData.java b/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/data/ColumnarRowData.java
deleted file mode 100644
index ebb4ca26fa87d..0000000000000
--- a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/data/ColumnarRowData.java
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hudi.table.format.cow.data;
-
-import org.apache.hudi.table.format.cow.vector.VectorizedColumnBatch;
-
-import org.apache.flink.table.data.ArrayData;
-import org.apache.flink.table.data.DecimalData;
-import org.apache.flink.table.data.MapData;
-import org.apache.flink.table.data.RawValueData;
-import org.apache.flink.table.data.RowData;
-import org.apache.flink.table.data.StringData;
-import org.apache.flink.table.data.TimestampData;
-import org.apache.flink.table.data.binary.TypedSetters;
-import org.apache.flink.table.data.vector.BytesColumnVector;
-import org.apache.flink.types.RowKind;
-
-/**
- * Columnar row to support access to vector column data.
- * It is a row view in {@link VectorizedColumnBatch}.
- *
- * <p>References {@code org.apache.flink.table.data.ColumnarRowData} to include FLINK-15390.
- */
-public final class ColumnarRowData implements RowData, TypedSetters {
-
-  private RowKind rowKind = RowKind.INSERT;
-  private VectorizedColumnBatch vectorizedColumnBatch;
-  private int rowId;
-
-  public ColumnarRowData() {
-  }
-
-  public ColumnarRowData(VectorizedColumnBatch vectorizedColumnBatch) {
-    this(vectorizedColumnBatch, 0);
-  }
-
-  public ColumnarRowData(VectorizedColumnBatch vectorizedColumnBatch, int rowId) {
-    this.vectorizedColumnBatch = vectorizedColumnBatch;
-    this.rowId = rowId;
-  }
-
-  public void setVectorizedColumnBatch(VectorizedColumnBatch vectorizedColumnBatch) {
-    this.vectorizedColumnBatch = vectorizedColumnBatch;
-    this.rowId = 0;
-  }
-
-  public void setRowId(int rowId) {
-    this.rowId = rowId;
-  }
-
-  @Override
-  public RowKind getRowKind() {
-    return rowKind;
-  }
-
-  @Override
-  public void setRowKind(RowKind kind) {
-    this.rowKind = kind;
-  }
-
-  @Override
-  public int getArity() {
-    return vectorizedColumnBatch.getArity();
-  }
-
-  @Override
-  public boolean isNullAt(int pos) {
-    return vectorizedColumnBatch.isNullAt(rowId, pos);
-  }
-
-  @Override
-  public boolean getBoolean(int pos) {
-    return vectorizedColumnBatch.getBoolean(rowId, pos);
-  }
-
-  @Override
-  public byte getByte(int pos) {
-    return vectorizedColumnBatch.getByte(rowId, pos);
-  }
-
-  @Override
-  public short getShort(int pos) {
-    return vectorizedColumnBatch.getShort(rowId, pos);
-  }
-
-  @Override
-  public int getInt(int pos) {
-    return vectorizedColumnBatch.getInt(rowId, pos);
-  }
-
-  @Override
-  public long getLong(int pos) {
-    return vectorizedColumnBatch.getLong(rowId, pos);
-  }
-
-  @Override
-  public float getFloat(int pos) {
-    return vectorizedColumnBatch.getFloat(rowId, pos);
-  }
-
-  @Override
-  public double getDouble(int pos) {
-    return vectorizedColumnBatch.getDouble(rowId, pos);
-  }
-
-  @Override
-  public StringData getString(int pos) {
-    BytesColumnVector.Bytes byteArray = vectorizedColumnBatch.getByteArray(rowId, pos);
-    return StringData.fromBytes(byteArray.data, byteArray.offset, byteArray.len);
-  }
-
-  @Override
-  public DecimalData getDecimal(int pos, int precision, int scale) {
-    return vectorizedColumnBatch.getDecimal(rowId, pos, precision, scale);
-  }
-
-  @Override
-  public TimestampData getTimestamp(int pos, int precision) {
-    return vectorizedColumnBatch.getTimestamp(rowId, pos, precision);
-  }
-
-  @Override
-  public <T> RawValueData<T> getRawValue(int pos) {
-    throw new UnsupportedOperationException("RawValueData is not supported.");
-  }
-
-  @Override
-  public byte[] getBinary(int pos) {
-    BytesColumnVector.Bytes byteArray = vectorizedColumnBatch.getByteArray(rowId, pos);
-    if (byteArray.len == byteArray.data.length) {
-      return byteArray.data;
-    } else {
-      byte[] ret = new byte[byteArray.len];
-      System.arraycopy(byteArray.data, byteArray.offset, ret, 0, byteArray.len);
-      return ret;
-    }
-  }
-
-  @Override
-  public RowData getRow(int pos, int numFields) {
-    return vectorizedColumnBatch.getRow(rowId, pos);
-  }
-
-  @Override
-  public ArrayData getArray(int pos) {
-    return vectorizedColumnBatch.getArray(rowId, pos);
-  }
-
-  @Override
-  public MapData getMap(int pos) {
-    return vectorizedColumnBatch.getMap(rowId, pos);
-  }
-
-  @Override
-  public void setNullAt(int pos) {
-    throw new UnsupportedOperationException("Not support the operation!");
-  }
-
-  @Override
-  public void setBoolean(int pos, boolean value) {
-    throw new UnsupportedOperationException("Not support the operation!");
-  }
-
-  @Override
-  public void setByte(int pos, byte value) {
-    throw new UnsupportedOperationException("Not support the operation!");
-  }
-
-  @Override
-  public void setShort(int pos, short value) {
-    throw new UnsupportedOperationException("Not support the operation!");
-  }
-
-  @Override
-  public void setInt(int pos, int value) {
-    throw new UnsupportedOperationException("Not support the operation!");
-  }
-
-  @Override
-  public void setLong(int pos, long value) {
-    throw new UnsupportedOperationException("Not support the operation!");
-  }
-
-  @Override
-  public void setFloat(int pos, float value) {
-    throw new UnsupportedOperationException("Not support the operation!");
-  }
-
-  @Override
-  public void setDouble(int pos, double value) {
-    throw new UnsupportedOperationException("Not support the operation!");
-  }
-
-  @Override
-  public void setDecimal(int pos, DecimalData value, int precision) {
-    throw new UnsupportedOperationException("Not support the operation!");
-  }
-
-  @Override
-  public void setTimestamp(int pos, TimestampData value, int precision) {
-    throw new UnsupportedOperationException("Not support the operation!");
-  }
-
-  @Override
-  public boolean equals(Object o) {
-    throw new UnsupportedOperationException(
-        "ColumnarRowData do not support equals, please compare fields one by one!");
-  }
-
-  @Override
-  public int hashCode() {
-    throw new UnsupportedOperationException(
-        "ColumnarRowData do not support hashCode, please hash fields one by one!");
-  }
-}
-
diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/HeapArrayVector.java b/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/HeapArrayVector.java
index f4c15b6a9b366..edd90714c87a7 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/HeapArrayVector.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/HeapArrayVector.java
@@ -18,9 +18,8 @@
 
 package org.apache.hudi.table.format.cow.vector;
 
-import org.apache.hudi.table.format.cow.data.ColumnarArrayData;
-
 import org.apache.flink.table.data.ArrayData;
+import org.apache.flink.table.data.ColumnarArrayData;
 import org.apache.flink.table.data.vector.ArrayColumnVector;
 import org.apache.flink.table.data.vector.ColumnVector;
 import org.apache.flink.table.data.vector.heap.AbstractHeapVector;
diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/HeapMapColumnVector.java b/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/HeapMapColumnVector.java
index f05a2e73431d0..2b34a02f116b3 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/HeapMapColumnVector.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/HeapMapColumnVector.java
@@ -18,10 +18,10 @@
 
 package org.apache.hudi.table.format.cow.vector;
 
-import org.apache.hudi.table.format.cow.data.ColumnarMapData;
-
+import org.apache.flink.table.data.ColumnarMapData;
 import org.apache.flink.table.data.MapData;
 import org.apache.flink.table.data.vector.ColumnVector;
+import org.apache.flink.table.data.vector.MapColumnVector;
 import org.apache.flink.table.data.vector.heap.AbstractHeapVector;
 import org.apache.flink.table.data.vector.writable.WritableColumnVector;
 
diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/HeapRowColumnVector.java b/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/HeapRowColumnVector.java
index ad05a612c7bde..0193e6cbb1d22 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/HeapRowColumnVector.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/HeapRowColumnVector.java
@@ -18,8 +18,9 @@
 
 package org.apache.hudi.table.format.cow.vector;
 
-import org.apache.hudi.table.format.cow.data.ColumnarRowData;
-
+import org.apache.flink.table.data.ColumnarRowData;
+import org.apache.flink.table.data.vector.RowColumnVector;
+import org.apache.flink.table.data.vector.VectorizedColumnBatch;
 import org.apache.flink.table.data.vector.heap.AbstractHeapVector;
 import org.apache.flink.table.data.vector.writable.WritableColumnVector;
 
diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/VectorizedColumnBatch.java b/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/VectorizedColumnBatch.java
deleted file mode 100644
index 9eee55d1eeae6..0000000000000
--- a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/VectorizedColumnBatch.java
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hudi.table.format.cow.vector;
-
-import org.apache.flink.table.data.ArrayData;
-import org.apache.flink.table.data.DecimalData;
-import org.apache.flink.table.data.MapData;
-import org.apache.flink.table.data.RowData;
-import org.apache.flink.table.data.TimestampData;
-import org.apache.flink.table.data.vector.ArrayColumnVector;
-import org.apache.flink.table.data.vector.BooleanColumnVector;
-import org.apache.flink.table.data.vector.ByteColumnVector;
-import org.apache.flink.table.data.vector.BytesColumnVector;
-import org.apache.flink.table.data.vector.ColumnVector;
-import org.apache.flink.table.data.vector.DecimalColumnVector;
-import org.apache.flink.table.data.vector.DoubleColumnVector;
-import org.apache.flink.table.data.vector.FloatColumnVector;
-import org.apache.flink.table.data.vector.IntColumnVector;
-import org.apache.flink.table.data.vector.LongColumnVector;
-import org.apache.flink.table.data.vector.ShortColumnVector;
-import org.apache.flink.table.data.vector.TimestampColumnVector;
-
-import java.io.Serializable;
-import java.nio.charset.StandardCharsets;
-
-/**
- * A VectorizedColumnBatch is a set of rows, organized with each column as a vector. It is the unit
- * of query execution, organized to minimize the cost per row.
- *
- * <p>{@code VectorizedColumnBatch}s are influenced by Apache Hive VectorizedRowBatch.
- *
- * <p>References {@code org.apache.flink.table.data.vector.VectorizedColumnBatch} to include FLINK-15390.
- */
-public class VectorizedColumnBatch implements Serializable {
-  private static final long serialVersionUID = 1L;
-
-  /**
-   * This number is carefully chosen to minimize overhead and typically allows one
-   * VectorizedColumnBatch to fit in cache.
-   */
-  public static final int DEFAULT_SIZE = 2048;
-
-  private int numRows;
-  public final ColumnVector[] columns;
-
-  public VectorizedColumnBatch(ColumnVector[] vectors) {
-    this.columns = vectors;
-  }
-
-  public void setNumRows(int numRows) {
-    this.numRows = numRows;
-  }
-
-  public int getNumRows() {
-    return numRows;
-  }
-
-  public int getArity() {
-    return columns.length;
-  }
-
-  public boolean isNullAt(int rowId, int colId) {
-    return columns[colId].isNullAt(rowId);
-  }
-
-  public boolean getBoolean(int rowId, int colId) {
-    return ((BooleanColumnVector) columns[colId]).getBoolean(rowId);
-  }
-
-  public byte getByte(int rowId, int colId) {
-    return ((ByteColumnVector) columns[colId]).getByte(rowId);
-  }
-
-  public short getShort(int rowId, int colId) {
-    return ((ShortColumnVector) columns[colId]).getShort(rowId);
-  }
-
-  public int getInt(int rowId, int colId) {
-    return ((IntColumnVector) columns[colId]).getInt(rowId);
-  }
-
-  public long getLong(int rowId, int colId) {
-    return ((LongColumnVector) columns[colId]).getLong(rowId);
-  }
-
-  public float getFloat(int rowId, int colId) {
-    return ((FloatColumnVector) columns[colId]).getFloat(rowId);
-  }
-
-  public double getDouble(int rowId, int colId) {
-    return ((DoubleColumnVector) columns[colId]).getDouble(rowId);
-  }
-
-  public BytesColumnVector.Bytes getByteArray(int rowId, int colId) {
-    return ((BytesColumnVector) columns[colId]).getBytes(rowId);
-  }
-
-  private byte[] getBytes(int rowId, int colId) {
-    BytesColumnVector.Bytes byteArray = getByteArray(rowId, colId);
-    if (byteArray.len == byteArray.data.length) {
-      return byteArray.data;
-    } else {
-      return byteArray.getBytes();
-    }
-  }
-
-  public String getString(int rowId, int colId) {
-    BytesColumnVector.Bytes byteArray = getByteArray(rowId, colId);
-    return new String(byteArray.data, byteArray.offset, byteArray.len, StandardCharsets.UTF_8);
-  }
-
-  public DecimalData getDecimal(int rowId, int colId, int precision, int scale) {
-    return ((DecimalColumnVector) (columns[colId])).getDecimal(rowId, precision, scale);
-  }
-
-  public TimestampData getTimestamp(int rowId, int colId, int precision) {
-    return ((TimestampColumnVector) (columns[colId])).getTimestamp(rowId, precision);
-  }
-
-  public ArrayData getArray(int rowId, int colId) {
-    return ((ArrayColumnVector) columns[colId]).getArray(rowId);
-  }
-
-  public RowData getRow(int rowId, int colId) {
-    return ((RowColumnVector) columns[colId]).getRow(rowId);
-  }
-
-  public MapData getMap(int rowId, int colId) {
-    return ((MapColumnVector) columns[colId]).getMap(rowId);
-  }
-}
-
diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/mor/MergeOnReadInputFormat.java b/hudi-flink/src/main/java/org/apache/hudi/table/format/mor/MergeOnReadInputFormat.java
index 7a72bca0582fd..4404e15eaaccf 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/table/format/mor/MergeOnReadInputFormat.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/table/format/mor/MergeOnReadInputFormat.java
@@ -18,6 +18,7 @@
 
 package org.apache.hudi.table.format.mor;
 
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieOperation;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner;
@@ -340,7 +341,7 @@ public boolean hasNext() {
         while (logRecordsKeyIterator.hasNext()) {
           String curAvroKey = logRecordsKeyIterator.next();
           Option<IndexedRecord> curAvroRecord = null;
-          final HoodieRecord<?> hoodieRecord = scanner.getRecords().get(curAvroKey);
+          final HoodieAvroRecord<?> hoodieRecord = (HoodieAvroRecord) scanner.getRecords().get(curAvroKey);
           try {
             curAvroRecord = hoodieRecord.getData().getInsertValue(tableSchema);
           } catch (IOException e) {
@@ -412,7 +413,7 @@ private ClosableIterator<RowData> getUnMergedLogFileIterator(MergeOnReadInputSpl
       public boolean hasNext() {
         while (recordsIterator.hasNext()) {
           Option<IndexedRecord> curAvroRecord = null;
-          final HoodieRecord<?> hoodieRecord = recordsIterator.next();
+          final HoodieAvroRecord<?> hoodieRecord = (HoodieAvroRecord) recordsIterator.next();
           try {
             curAvroRecord = hoodieRecord.getData().getInsertValue(tableSchema);
           } catch (IOException e) {
@@ -725,7 +726,7 @@ public boolean reachedEnd() throws IOException {
     }
 
     private Option<IndexedRecord> getInsertValue(String curKey) throws IOException {
-      final HoodieRecord<?> record = scanner.getRecords().get(curKey);
+      final HoodieAvroRecord<?> record = (HoodieAvroRecord) scanner.getRecords().get(curKey);
       if (!emitDelete && HoodieOperation.isDelete(record.getOperation())) {
         return Option.empty();
       }
@@ -750,7 +751,7 @@ public void close() throws IOException {
     private Option<IndexedRecord> mergeRowWithLog(
         RowData curRow,
         String curKey) throws IOException {
-      final HoodieRecord<?> record = scanner.getRecords().get(curKey);
+      final HoodieAvroRecord<?> record = (HoodieAvroRecord) scanner.getRecords().get(curKey);
       GenericRecord historyAvroRecord = (GenericRecord) rowDataToAvroConverter.convert(tableSchema, curRow);
       return record.getData().combineAndGetUpdateValue(historyAvroRecord, tableSchema);
     }
diff --git a/hudi-flink/src/main/java/org/apache/hudi/util/CompactionUtil.java b/hudi-flink/src/main/java/org/apache/hudi/util/CompactionUtil.java
index d04937bf7d66f..74629f9b0942f 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/util/CompactionUtil.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/util/CompactionUtil.java
@@ -26,6 +26,7 @@
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.configuration.FlinkOptions;
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.sink.compact.FlinkCompactionConfig;
@@ -106,6 +107,18 @@ public static void setAvroSchema(Configuration conf, HoodieTableMetaClient metaC
     conf.setString(FlinkOptions.SOURCE_AVRO_SCHEMA, tableAvroSchema.toString());
   }
 
+  /**
+   * Sets up the avro schema string into the HoodieWriteConfig {@code HoodieWriteConfig}
+   * through reading from the hoodie table metadata.
+   *
+   * @param writeConfig The HoodieWriteConfig
+   */
+  public static void setAvroSchema(HoodieWriteConfig writeConfig, HoodieTableMetaClient metaClient) throws Exception {
+    TableSchemaResolver tableSchemaResolver = new TableSchemaResolver(metaClient);
+    Schema tableAvroSchema = tableSchemaResolver.getTableAvroSchema(false);
+    writeConfig.setSchema(tableAvroSchema.toString());
+  }
+
   /**
    * Infers the changelog mode based on the data file schema(including metadata fields).
    *
diff --git a/hudi-flink/src/main/java/org/apache/hudi/util/StreamerUtil.java b/hudi-flink/src/main/java/org/apache/hudi/util/StreamerUtil.java
index 98df0bbcfd868..45d23f2ff4ea1 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/util/StreamerUtil.java
+++ b/hudi-flink/src/main/java/org/apache/hudi/util/StreamerUtil.java
@@ -406,7 +406,9 @@ public static HoodieFlinkWriteClient createWriteClient(Configuration conf) throw
     FileSystemViewStorageConfig rebuilt = FileSystemViewStorageConfig.newBuilder()
         .withStorageType(viewStorageConfig.getStorageType())
         .withRemoteServerHost(viewStorageConfig.getRemoteViewServerHost())
-        .withRemoteServerPort(viewStorageConfig.getRemoteViewServerPort()).build();
+        .withRemoteServerPort(viewStorageConfig.getRemoteViewServerPort())
+        .withRemoteTimelineClientTimeoutSecs(viewStorageConfig.getRemoteTimelineClientTimeoutSecs())
+        .build();
     ViewStorageProperties.createProperties(conf.getString(FlinkOptions.PATH), rebuilt);
     return writeClient;
   }
diff --git a/hudi-flink/src/test/java/org/apache/hudi/sink/StreamWriteITCase.java b/hudi-flink/src/test/java/org/apache/hudi/sink/StreamWriteITCase.java
index 028c058eedafc..eaa2d6ced67d9 100644
--- a/hudi-flink/src/test/java/org/apache/hudi/sink/StreamWriteITCase.java
+++ b/hudi-flink/src/test/java/org/apache/hudi/sink/StreamWriteITCase.java
@@ -51,6 +51,8 @@
 import org.apache.flink.util.TestLogger;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.io.TempDir;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
 
 import java.io.File;
 import java.nio.charset.StandardCharsets;
@@ -129,10 +131,14 @@ public void testWriteToHoodieWithoutTransformer() throws Exception {
     testWriteToHoodie(null, EXPECTED);
   }
 
-  @Test
-  public void testMergeOnReadWriteWithCompaction() throws Exception {
+  @ParameterizedTest
+  @ValueSource(strings = {"BUCKET", "FLINK_STATE"})
+  public void testMergeOnReadWriteWithCompaction(String indexType) throws Exception {
     int parallelism = 4;
     Configuration conf = TestConfigurations.getDefaultConf(tempFile.getAbsolutePath());
+    conf.setString(FlinkOptions.INDEX_TYPE, indexType);
+    conf.setInteger(FlinkOptions.BUCKET_INDEX_NUM_BUCKETS, 4);
+    conf.setString(FlinkOptions.INDEX_KEY_FIELD, "id");
     conf.setInteger(FlinkOptions.COMPACTION_DELTA_COMMITS, 1);
     conf.setString(FlinkOptions.TABLE_TYPE, HoodieTableType.MERGE_ON_READ.name());
     StreamExecutionEnvironment execEnv = StreamExecutionEnvironment.getExecutionEnvironment();
@@ -172,7 +178,7 @@ public void testMergeOnReadWriteWithCompaction() throws Exception {
     DataStream<Object> pipeline = Pipelines.hoodieStreamWrite(conf, parallelism, hoodieRecordDataStream);
     Pipelines.clean(conf, pipeline);
     Pipelines.compact(conf, pipeline);
-    JobClient client = execEnv.executeAsync(execEnv.getStreamGraph(conf.getString(FlinkOptions.TABLE_NAME)));
+    JobClient client = execEnv.executeAsync(execEnv.getStreamGraph());
     if (client.getJobStatus().get() != JobStatus.FAILED) {
       try {
         TimeUnit.SECONDS.sleep(20); // wait long enough for the compaction to finish
@@ -229,7 +235,7 @@ private void testWriteToHoodie(
     DataStream<Object> pipeline = Pipelines.hoodieStreamWrite(conf, parallelism, hoodieRecordDataStream);
     execEnv.addOperator(pipeline.getTransformation());
 
-    JobClient client = execEnv.executeAsync(execEnv.getStreamGraph(conf.getString(FlinkOptions.TABLE_NAME)));
+    JobClient client = execEnv.executeAsync(conf.getString(FlinkOptions.TABLE_NAME));
     // wait for the streaming job to finish
     client.getJobExecutionResult().get();
 
diff --git a/hudi-flink/src/test/java/org/apache/hudi/sink/TestWriteCopyOnWrite.java b/hudi-flink/src/test/java/org/apache/hudi/sink/TestWriteCopyOnWrite.java
index a91f45263ff25..35523a8fb426c 100644
--- a/hudi-flink/src/test/java/org/apache/hudi/sink/TestWriteCopyOnWrite.java
+++ b/hudi-flink/src/test/java/org/apache/hudi/sink/TestWriteCopyOnWrite.java
@@ -397,6 +397,7 @@ public void testWriteExactlyOnce() throws Exception {
 
   @Test
   public void testReuseEmbeddedServer() throws IOException {
+    conf.setInteger("hoodie.filesystem.view.remote.timeout.secs", 500);
     HoodieFlinkWriteClient writeClient = StreamerUtil.createWriteClient(conf);
     FileSystemViewStorageConfig viewStorageConfig = writeClient.getConfig().getViewStorageConfig();
 
@@ -406,6 +407,7 @@ public void testReuseEmbeddedServer() throws IOException {
     writeClient = StreamerUtil.createWriteClient(conf);
     assertSame(writeClient.getConfig().getViewStorageConfig().getStorageType(), FileSystemViewStorageType.REMOTE_FIRST);
     assertEquals(viewStorageConfig.getRemoteViewServerPort(), writeClient.getConfig().getViewStorageConfig().getRemoteViewServerPort());
+    assertEquals(viewStorageConfig.getRemoteTimelineClientTimeoutSecs(), 500);
   }
 
   // -------------------------------------------------------------------------
diff --git a/hudi-flink/src/test/java/org/apache/hudi/sink/utils/CollectorOutput.java b/hudi-flink/src/test/java/org/apache/hudi/sink/utils/CollectorOutput.java
index 3da21e6eb9887..c386e6287b8cd 100644
--- a/hudi-flink/src/test/java/org/apache/hudi/sink/utils/CollectorOutput.java
+++ b/hudi-flink/src/test/java/org/apache/hudi/sink/utils/CollectorOutput.java
@@ -23,6 +23,7 @@
 import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker;
 import org.apache.flink.streaming.runtime.streamrecord.StreamElement;
 import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;
+import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus;
 import org.apache.flink.util.InstantiationUtil;
 import org.apache.flink.util.OutputTag;
 
@@ -49,6 +50,11 @@ public void emitWatermark(Watermark mark) {
     list.add(mark);
   }
 
+  @Override
+  public void emitWatermarkStatus(WatermarkStatus watermarkStatus) {
+
+  }
+
   @Override
   public void emitLatencyMarker(LatencyMarker latencyMarker) {
     list.add(latencyMarker);
diff --git a/hudi-flink/src/test/java/org/apache/hudi/sink/utils/CompactFunctionWrapper.java b/hudi-flink/src/test/java/org/apache/hudi/sink/utils/CompactFunctionWrapper.java
index fe2ddad18955f..e703515de3b7f 100644
--- a/hudi-flink/src/test/java/org/apache/hudi/sink/utils/CompactFunctionWrapper.java
+++ b/hudi-flink/src/test/java/org/apache/hudi/sink/utils/CompactFunctionWrapper.java
@@ -38,6 +38,7 @@
 import org.apache.flink.streaming.api.watermark.Watermark;
 import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker;
 import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;
+import org.apache.flink.streaming.runtime.watermarkstatus.WatermarkStatus;
 import org.apache.flink.util.Collector;
 import org.apache.flink.util.OutputTag;
 
@@ -102,6 +103,11 @@ public void emitWatermark(Watermark watermark) {
 
       }
 
+      @Override
+      public void emitWatermarkStatus(WatermarkStatus watermarkStatus) {
+
+      }
+
       @Override
       public <X> void collect(OutputTag<X> outputTag, StreamRecord<X> streamRecord) {
 
diff --git a/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockStateInitializationContext.java b/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockStateInitializationContext.java
index dd89f71110e82..c582e9553b30e 100644
--- a/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockStateInitializationContext.java
+++ b/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockStateInitializationContext.java
@@ -23,6 +23,8 @@
 import org.apache.flink.runtime.state.StateInitializationContext;
 import org.apache.flink.runtime.state.StatePartitionStreamProvider;
 
+import java.util.OptionalLong;
+
 /**
  * A {@link FunctionInitializationContext} for testing purpose.
  */
@@ -39,6 +41,11 @@ public boolean isRestored() {
     return false;
   }
 
+  @Override
+  public OptionalLong getRestoredCheckpointId() {
+    return OptionalLong.empty();
+  }
+
   @Override
   public MockOperatorStateStore getOperatorStateStore() {
     return operatorStateStore;
diff --git a/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockStreamingRuntimeContext.java b/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockStreamingRuntimeContext.java
index 14305da3db781..8a66f1dce011a 100644
--- a/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockStreamingRuntimeContext.java
+++ b/hudi-flink/src/test/java/org/apache/hudi/sink/utils/MockStreamingRuntimeContext.java
@@ -19,7 +19,7 @@
 
 import org.apache.flink.api.common.ExecutionConfig;
 import org.apache.flink.api.common.state.KeyedStateStore;
-import org.apache.flink.metrics.MetricGroup;
+import org.apache.flink.metrics.groups.OperatorMetricGroup;
 import org.apache.flink.metrics.groups.UnregisteredMetricsGroup;
 import org.apache.flink.runtime.jobgraph.OperatorID;
 import org.apache.flink.runtime.memory.MemoryManager;
@@ -69,8 +69,8 @@ public MockStreamingRuntimeContext(
   }
 
   @Override
-  public MetricGroup getMetricGroup() {
-    return new UnregisteredMetricsGroup();
+  public OperatorMetricGroup getMetricGroup() {
+    return UnregisteredMetricsGroup.createOperatorMetricGroup();
   }
 
   @Override
diff --git a/hudi-flink/src/test/java/org/apache/hudi/source/TestStreamReadOperator.java b/hudi-flink/src/test/java/org/apache/hudi/source/TestStreamReadOperator.java
index 911c68511ccee..db45a75977f5e 100644
--- a/hudi-flink/src/test/java/org/apache/hudi/source/TestStreamReadOperator.java
+++ b/hudi-flink/src/test/java/org/apache/hudi/source/TestStreamReadOperator.java
@@ -245,10 +245,10 @@ private OneInputStreamOperatorTestHarness<MergeOnReadInputSplit, RowData> create
     final List<String> partitionKeys = Collections.singletonList("partition");
 
     // This input format is used to opening the emitted split.
-    TableSchemaResolver schemaUtil = new TableSchemaResolver(metaClient);
+    TableSchemaResolver schemaResolver = new TableSchemaResolver(metaClient);
     final Schema tableAvroSchema;
     try {
-      tableAvroSchema = schemaUtil.getTableAvroSchema();
+      tableAvroSchema = schemaResolver.getTableAvroSchema();
     } catch (Exception e) {
       throw new HoodieException("Get table avro schema error", e);
     }
diff --git a/hudi-flink/src/test/java/org/apache/hudi/table/HoodieDataSourceITCase.java b/hudi-flink/src/test/java/org/apache/hudi/table/HoodieDataSourceITCase.java
index f1ca68e632771..7c9b0bb6a3cc8 100644
--- a/hudi-flink/src/test/java/org/apache/hudi/table/HoodieDataSourceITCase.java
+++ b/hudi-flink/src/test/java/org/apache/hudi/table/HoodieDataSourceITCase.java
@@ -31,10 +31,12 @@
 
 import org.apache.flink.configuration.Configuration;
 import org.apache.flink.core.execution.JobClient;
+import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 import org.apache.flink.table.api.EnvironmentSettings;
 import org.apache.flink.table.api.TableEnvironment;
 import org.apache.flink.table.api.TableResult;
 import org.apache.flink.table.api.TableSchema;
+import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
 import org.apache.flink.table.api.config.ExecutionConfigOptions;
 import org.apache.flink.table.api.internal.TableEnvironmentImpl;
 import org.apache.flink.table.catalog.ObjectPath;
@@ -62,6 +64,7 @@
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
+import static org.apache.hudi.utils.TestConfigurations.catalog;
 import static org.apache.hudi.utils.TestConfigurations.sql;
 import static org.apache.hudi.utils.TestData.assertRowsEquals;
 import static org.junit.jupiter.api.Assertions.assertThrows;
@@ -86,8 +89,24 @@ void beforeEach() {
     execConf.setString("restart-strategy", "fixed-delay");
     execConf.setString("restart-strategy.fixed-delay.attempts", "0");
 
+    Configuration conf = new Configuration();
+    // for batch upsert use cases: current suggestion is to disable these 2 options,
+    // from 1.14, flink runtime execution mode has switched from streaming
+    // to batch for batch execution mode(before that, both streaming and batch use streaming execution mode),
+    // current batch execution mode has these limitations:
+    //
+    // 1. the keyed stream default to always sort the inputs by key;
+    // 2. the batch state-backend requires the inputs sort by state key
+    //
+    // For our hudi batch pipeline upsert case, we rely on the consuming sequence for index records and data records,
+    // the index records must be loaded first before data records for BucketAssignFunction to keep upsert semantics correct,
+    // so we suggest disabling these 2 options to use streaming state-backend for batch execution mode
+    // to keep the strategy before 1.14.
+    conf.setBoolean("execution.sorted-inputs.enabled", false);
+    conf.setBoolean("execution.batch-state-backend.enabled", false);
+    StreamExecutionEnvironment execEnv = StreamExecutionEnvironment.getExecutionEnvironment(conf);
     settings = EnvironmentSettings.newInstance().inBatchMode().build();
-    batchTableEnv = TableEnvironmentImpl.create(settings);
+    batchTableEnv = StreamTableEnvironment.create(execEnv, settings);
     batchTableEnv.getConfig().getConfiguration()
         .setInteger(ExecutionConfigOptions.TABLE_EXEC_RESOURCE_DEFAULT_PARALLELISM, 1);
   }
@@ -861,7 +880,7 @@ void testWriteAndReadDebeziumJson(ExecMode execMode) throws Exception {
         .getContextClassLoader().getResource("debezium_json.data")).toString();
     String sourceDDL = ""
         + "CREATE TABLE debezium_source(\n"
-        + "  id INT NOT NULL,\n"
+        + "  id INT NOT NULL PRIMARY KEY NOT ENFORCED,\n"
         + "  ts BIGINT,\n"
         + "  name STRING,\n"
         + "  description STRING,\n"
@@ -1153,6 +1172,7 @@ void testParquetComplexNestedRowTypes(String operation) {
     String hoodieTableDDL = sql("t1")
         .field("f_int int")
         .field("f_array array<varchar(10)>")
+        .field("int_array array<int>")
         .field("f_map map<varchar(20), int>")
         .field("f_row row(f_nested_array array<varchar(10)>, f_nested_row row(f_row_f0 int, f_row_f1 varchar(10)))")
         .pkField("f_int")
@@ -1167,12 +1187,53 @@ void testParquetComplexNestedRowTypes(String operation) {
     List<Row> result = CollectionUtil.iterableToList(
         () -> tableEnv.sqlQuery("select * from t1").execute().collect());
     final String expected = "["
-        + "+I[1, [abc1, def1], {abc1=1, def1=3}, +I[[abc1, def1], +I[1, abc1]]], "
-        + "+I[2, [abc2, def2], {def2=3, abc2=1}, +I[[abc2, def2], +I[2, abc2]]], "
-        + "+I[3, [abc3, def3], {def3=3, abc3=1}, +I[[abc3, def3], +I[3, abc3]]]]";
+        + "+I[1, [abc1, def1], [1, 1], {abc1=1, def1=3}, +I[[abc1, def1], +I[1, abc1]]], "
+        + "+I[2, [abc2, def2], [2, 2], {def2=3, abc2=1}, +I[[abc2, def2], +I[2, abc2]]], "
+        + "+I[3, [abc3, def3], [3, 3], {def3=3, abc3=1}, +I[[abc3, def3], +I[3, abc3]]]]";
     assertRowsEquals(result, expected);
   }
 
+  @ParameterizedTest
+  @ValueSource(strings = {"insert", "upsert", "bulk_insert"})
+  void testBuiltinFunctionWithCatalog(String operation) {
+    TableEnvironment tableEnv = streamTableEnv;
+
+    String hudiCatalogDDL = catalog("hudi_" + operation)
+        .catalogPath(tempFile.getAbsolutePath())
+        .end();
+
+    tableEnv.executeSql(hudiCatalogDDL);
+    tableEnv.executeSql("use catalog " + ("hudi_" + operation));
+
+    String dbName = "hudi";
+    tableEnv.executeSql("create database " + dbName);
+    tableEnv.executeSql("use " + dbName);
+
+    String hoodieTableDDL = sql("t1")
+        .field("f_int int")
+        .field("f_date DATE")
+        .pkField("f_int")
+        .partitionField("f_int")
+        .option(FlinkOptions.PATH, tempFile.getAbsolutePath() + "/" + dbName + "/" + operation)
+        .option(FlinkOptions.OPERATION, operation)
+        .end();
+    tableEnv.executeSql(hoodieTableDDL);
+
+    String insertSql = "insert into t1 values (1, TO_DATE('2022-02-02')), (2, DATE '2022-02-02')";
+    execInsertSql(tableEnv, insertSql);
+
+    List<Row> result = CollectionUtil.iterableToList(
+        () -> tableEnv.sqlQuery("select * from t1").execute().collect());
+    final String expected = "["
+        + "+I[1, 2022-02-02], "
+        + "+I[2, 2022-02-02]]";
+    assertRowsEquals(result, expected);
+
+    List<Row> partitionResult = CollectionUtil.iterableToList(
+        () -> tableEnv.sqlQuery("select * from t1 where f_int = 1").execute().collect());
+    assertRowsEquals(partitionResult, "[+I[1, 2022-02-02]]");
+  }
+
   // -------------------------------------------------------------------------
   //  Utilities
   // -------------------------------------------------------------------------
diff --git a/hudi-flink/src/test/java/org/apache/hudi/table/TestHoodieTableFactory.java b/hudi-flink/src/test/java/org/apache/hudi/table/TestHoodieTableFactory.java
index cbdffe360fd2b..a76e00816189a 100644
--- a/hudi-flink/src/test/java/org/apache/hudi/table/TestHoodieTableFactory.java
+++ b/hudi-flink/src/test/java/org/apache/hudi/table/TestHoodieTableFactory.java
@@ -24,9 +24,11 @@
 import org.apache.hudi.exception.HoodieValidationException;
 import org.apache.hudi.hive.MultiPartKeysValueExtractor;
 import org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor;
+import org.apache.hudi.index.HoodieIndex;
 import org.apache.hudi.keygen.ComplexAvroKeyGenerator;
 import org.apache.hudi.keygen.NonpartitionedAvroKeyGenerator;
 import org.apache.hudi.keygen.TimestampBasedAvroKeyGenerator;
+import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
 import org.apache.hudi.util.StreamerUtil;
 import org.apache.hudi.utils.SchemaBuilder;
 import org.apache.hudi.utils.TestConfigurations;
@@ -346,6 +348,16 @@ void testSetupHoodieKeyOptionsForSink() {
     final Configuration conf3 = tableSink3.getConf();
     assertThat(conf3.get(FlinkOptions.RECORD_KEY_FIELD), is("f0,f1"));
     assertThat(conf3.get(FlinkOptions.KEYGEN_CLASS_NAME), is(NonpartitionedAvroKeyGenerator.class.getName()));
+
+    // definition of bucket index
+    this.conf.setString(FlinkOptions.INDEX_TYPE, HoodieIndex.IndexType.BUCKET.name());
+    final MockContext sinkContext4 = MockContext.getInstance(this.conf, schema2, "");
+    final HoodieTableSink tableSink4 = (HoodieTableSink) new HoodieTableFactory().createDynamicTableSink(sinkContext4);
+    final Configuration conf4 = tableSink4.getConf();
+    assertThat(conf4.get(FlinkOptions.RECORD_KEY_FIELD), is("f0,f1"));
+    assertThat(conf4.get(FlinkOptions.INDEX_KEY_FIELD), is("f0,f1"));
+    assertThat(conf4.get(FlinkOptions.INDEX_TYPE), is(HoodieIndex.IndexType.BUCKET.name()));
+    assertThat(conf4.get(FlinkOptions.KEYGEN_CLASS_NAME), is(NonpartitionedAvroKeyGenerator.class.getName()));
   }
 
   @Test
@@ -419,11 +431,11 @@ void testSetupTimestampBasedKeyGenForSink() {
     final Configuration conf1 = tableSource1.getConf();
     assertThat(conf1.get(FlinkOptions.RECORD_KEY_FIELD), is("f0"));
     assertThat(conf1.get(FlinkOptions.KEYGEN_CLASS_NAME), is(TimestampBasedAvroKeyGenerator.class.getName()));
-    assertThat(conf1.getString(TimestampBasedAvroKeyGenerator.Config.TIMESTAMP_TYPE_FIELD_PROP, "dummy"),
+    assertThat(conf1.getString(KeyGeneratorOptions.Config.TIMESTAMP_TYPE_FIELD_PROP, "dummy"),
         is("EPOCHMILLISECONDS"));
-    assertThat(conf1.getString(TimestampBasedAvroKeyGenerator.Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP, "dummy"),
+    assertThat(conf1.getString(KeyGeneratorOptions.Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP, "dummy"),
         is(FlinkOptions.PARTITION_FORMAT_HOUR));
-    assertThat(conf1.getString(TimestampBasedAvroKeyGenerator.Config.TIMESTAMP_OUTPUT_TIMEZONE_FORMAT_PROP, "dummy"),
+    assertThat(conf1.getString(KeyGeneratorOptions.Config.TIMESTAMP_OUTPUT_TIMEZONE_FORMAT_PROP, "dummy"),
         is("UTC"));
   }
 
diff --git a/hudi-flink/src/test/java/org/apache/hudi/utils/TestConfigurations.java b/hudi-flink/src/test/java/org/apache/hudi/utils/TestConfigurations.java
index 46cad3e826d3e..d1b6e04a1835d 100644
--- a/hudi-flink/src/test/java/org/apache/hudi/utils/TestConfigurations.java
+++ b/hudi-flink/src/test/java/org/apache/hudi/utils/TestConfigurations.java
@@ -64,12 +64,12 @@ private TestConfigurations() {
       .map(RowType.RowField::asSummaryString).collect(Collectors.toList());
 
   public static final DataType ROW_DATA_TYPE_WIDER = DataTypes.ROW(
-          DataTypes.FIELD("uuid", DataTypes.VARCHAR(20)),// record key
-          DataTypes.FIELD("name", DataTypes.VARCHAR(10)),
-          DataTypes.FIELD("age", DataTypes.INT()),
-          DataTypes.FIELD("salary", DataTypes.DOUBLE()),
-          DataTypes.FIELD("ts", DataTypes.TIMESTAMP(3)), // precombine field
-          DataTypes.FIELD("partition", DataTypes.VARCHAR(10)))
+      DataTypes.FIELD("uuid", DataTypes.VARCHAR(20)),// record key
+      DataTypes.FIELD("name", DataTypes.VARCHAR(10)),
+      DataTypes.FIELD("age", DataTypes.INT()),
+      DataTypes.FIELD("salary", DataTypes.DOUBLE()),
+      DataTypes.FIELD("ts", DataTypes.TIMESTAMP(3)), // precombine field
+      DataTypes.FIELD("partition", DataTypes.VARCHAR(10)))
       .notNull();
 
   public static final RowType ROW_TYPE_WIDER = (RowType) ROW_DATA_TYPE_WIDER.getLogicalType();
@@ -112,6 +112,15 @@ public static String getCreateHoodieTableDDL(
     return builder.toString();
   }
 
+  public static String getCreateHudiCatalogDDL(final String catalogName, final String catalogPath) {
+    StringBuilder builder = new StringBuilder();
+    builder.append("create catalog ").append(catalogName).append(" with (\n");
+    builder.append("  'type' = 'hudi',\n"
+        + "  'catalog.path' = '").append(catalogPath).append("'");
+    builder.append("\n)");
+    return builder.toString();
+  }
+
   public static String getFileSourceDDL(String tableName) {
     return getFileSourceDDL(tableName, "test_source.data");
   }
@@ -222,6 +231,10 @@ public static Sql sql(String tableName) {
     return new Sql(tableName);
   }
 
+  public static Catalog catalog(String catalogName) {
+    return new Catalog(catalogName);
+  }
+
   // -------------------------------------------------------------------------
   //  Utilities
   // -------------------------------------------------------------------------
@@ -285,4 +298,22 @@ public String end() {
           this.withPartition, this.pkField, this.partitionField);
     }
   }
+
+  public static class Catalog {
+    private final String catalogName;
+    private String catalogPath = ".";
+
+    public Catalog(String catalogName) {
+      this.catalogName = catalogName;
+    }
+
+    public Catalog catalogPath(String catalogPath) {
+      this.catalogPath = catalogPath;
+      return this;
+    }
+
+    public String end() {
+      return TestConfigurations.getCreateHudiCatalogDDL(catalogName, catalogPath);
+    }
+  }
 }
diff --git a/hudi-flink/src/test/java/org/apache/hudi/utils/TestSQL.java b/hudi-flink/src/test/java/org/apache/hudi/utils/TestSQL.java
index 595d142b7cc0d..1695e4e7149a9 100644
--- a/hudi-flink/src/test/java/org/apache/hudi/utils/TestSQL.java
+++ b/hudi-flink/src/test/java/org/apache/hudi/utils/TestSQL.java
@@ -58,7 +58,7 @@ private TestSQL() {
       + "(3, array['abc3', 'def3'], map['abc3', 1, 'def3', 3], row(3, 'abc3'))";
 
   public static final String COMPLEX_NESTED_ROW_TYPE_INSERT_T1 = "insert into t1 values\n"
-      + "(1, array['abc1', 'def1'], map['abc1', 1, 'def1', 3], row(array['abc1', 'def1'], row(1, 'abc1'))),\n"
-      + "(2, array['abc2', 'def2'], map['abc2', 1, 'def2', 3], row(array['abc2', 'def2'], row(2, 'abc2'))),\n"
-      + "(3, array['abc3', 'def3'], map['abc3', 1, 'def3', 3], row(array['abc3', 'def3'], row(3, 'abc3')))";
+      + "(1, array['abc1', 'def1'], array[1, 1], map['abc1', 1, 'def1', 3], row(array['abc1', 'def1'], row(1, 'abc1'))),\n"
+      + "(2, array['abc2', 'def2'], array[2, 2], map['abc2', 1, 'def2', 3], row(array['abc2', 'def2'], row(2, 'abc2'))),\n"
+      + "(3, array['abc3', 'def3'], array[3, 3], map['abc3', 1, 'def3', 3], row(array['abc3', 'def3'], row(3, 'abc3')))";
 }
diff --git a/hudi-flink/src/test/java/org/apache/hudi/utils/factory/ContinuousFileSourceFactory.java b/hudi-flink/src/test/java/org/apache/hudi/utils/factory/ContinuousFileSourceFactory.java
index 92d9c55723518..31b3ad5c7669d 100644
--- a/hudi-flink/src/test/java/org/apache/hudi/utils/factory/ContinuousFileSourceFactory.java
+++ b/hudi-flink/src/test/java/org/apache/hudi/utils/factory/ContinuousFileSourceFactory.java
@@ -53,7 +53,7 @@ public DynamicTableSource createDynamicTableSource(Context context) {
     Configuration conf = (Configuration) helper.getOptions();
     Path path = new Path(conf.getOptional(FlinkOptions.PATH).orElseThrow(() ->
         new ValidationException("Option [path] should be not empty.")));
-    return new ContinuousFileSource(context.getCatalogTable().getSchema(), path, conf);
+    return new ContinuousFileSource(context.getCatalogTable().getResolvedSchema(), path, conf);
   }
 
   @Override
diff --git a/hudi-flink/src/test/java/org/apache/hudi/utils/source/ContinuousFileSource.java b/hudi-flink/src/test/java/org/apache/hudi/utils/source/ContinuousFileSource.java
index a44061076f581..d38aad60c3452 100644
--- a/hudi-flink/src/test/java/org/apache/hudi/utils/source/ContinuousFileSource.java
+++ b/hudi-flink/src/test/java/org/apache/hudi/utils/source/ContinuousFileSource.java
@@ -18,15 +18,15 @@
 
 package org.apache.hudi.utils.source;
 
+import org.apache.flink.api.common.state.CheckpointListener;
 import org.apache.flink.configuration.Configuration;
 import org.apache.flink.core.fs.Path;
 import org.apache.flink.formats.common.TimestampFormat;
 import org.apache.flink.formats.json.JsonRowDataDeserializationSchema;
-import org.apache.flink.runtime.state.CheckpointListener;
 import org.apache.flink.streaming.api.datastream.DataStream;
 import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
 import org.apache.flink.streaming.api.functions.source.SourceFunction;
-import org.apache.flink.table.api.TableSchema;
+import org.apache.flink.table.catalog.ResolvedSchema;
 import org.apache.flink.table.connector.ChangelogMode;
 import org.apache.flink.table.connector.source.DataStreamScanProvider;
 import org.apache.flink.table.connector.source.DynamicTableSource;
@@ -59,12 +59,12 @@
  */
 public class ContinuousFileSource implements ScanTableSource {
 
-  private final TableSchema tableSchema;
+  private final ResolvedSchema tableSchema;
   private final Path path;
   private final Configuration conf;
 
   public ContinuousFileSource(
-      TableSchema tableSchema,
+      ResolvedSchema tableSchema,
       Path path,
       Configuration conf) {
     this.tableSchema = tableSchema;
@@ -83,7 +83,7 @@ public boolean isBounded() {
 
       @Override
       public DataStream<RowData> produceDataStream(StreamExecutionEnvironment execEnv) {
-        final RowType rowType = (RowType) tableSchema.toRowDataType().getLogicalType();
+        final RowType rowType = (RowType) tableSchema.toSourceRowDataType().getLogicalType();
         JsonRowDataDeserializationSchema deserializationSchema = new JsonRowDataDeserializationSchema(
             rowType,
             InternalTypeInfo.of(rowType),
@@ -178,7 +178,7 @@ private void loadDataBuffer() {
     }
 
     @Override
-    public void notifyCheckpointComplete(long l) throws Exception {
+    public void notifyCheckpointComplete(long l) {
       this.currentCP.incrementAndGet();
     }
   }
diff --git a/hudi-hadoop-mr/pom.xml b/hudi-hadoop-mr/pom.xml
index 57fbdb7b8e267..bf87bfaa36a81 100644
--- a/hudi-hadoop-mr/pom.xml
+++ b/hudi-hadoop-mr/pom.xml
@@ -30,13 +30,6 @@
   </properties>
 
   <dependencies>
-    <!-- Scala -->
-    <dependency>
-      <groupId>org.scala-lang</groupId>
-      <artifactId>scala-library</artifactId>
-      <version>${scala.version}</version>
-    </dependency>
-
     <!-- Hudi -->
     <dependency>
       <groupId>org.apache.hudi</groupId>
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/BaseFileWithLogsSplit.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/BaseFileWithLogsSplit.java
deleted file mode 100644
index c9afa9119c0c5..0000000000000
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/BaseFileWithLogsSplit.java
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hudi.hadoop;
-
-import org.apache.hudi.common.model.HoodieLogFile;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.FileSplit;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * Encode additional information in split to track matching log file and base files.
- * Hence, this class tracks a log/base file split.
- */
-public class BaseFileWithLogsSplit extends FileSplit {
-  // a flag to mark this split is produced by incremental query or not.
-  private boolean belongToIncrementalSplit = false;
-  // the log file paths of this split.
-  private List<HoodieLogFile> deltaLogFiles = new ArrayList<>();
-  // max commit time of current split.
-  private String maxCommitTime = "";
-  // the basePath of current hoodie table.
-  private String basePath = "";
-  // the base file belong to this split.
-  private String baseFilePath = "";
-
-  public BaseFileWithLogsSplit(Path file, long start, long length, String[] hosts) {
-    super(file, start, length, hosts);
-  }
-
-  @Override
-  public void write(DataOutput out) throws IOException {
-    super.write(out);
-    out.writeBoolean(belongToIncrementalSplit);
-    Text.writeString(out, maxCommitTime);
-    Text.writeString(out, basePath);
-    Text.writeString(out, baseFilePath);
-    out.writeInt(deltaLogFiles.size());
-    for (HoodieLogFile logFile : deltaLogFiles) {
-      Text.writeString(out, logFile.getPath().toString());
-      out.writeLong(logFile.getFileSize());
-    }
-  }
-
-  @Override
-  public void readFields(DataInput in) throws IOException {
-    super.readFields(in);
-    belongToIncrementalSplit = in.readBoolean();
-    maxCommitTime = Text.readString(in);
-    basePath = Text.readString(in);
-    baseFilePath = Text.readString(in);
-    int deltaLogSize = in.readInt();
-    List<HoodieLogFile> tempDeltaLogs = new ArrayList<>();
-    for (int i = 0; i < deltaLogSize; i++) {
-      String logPath = Text.readString(in);
-      long logFileSize = in.readLong();
-      tempDeltaLogs.add(new HoodieLogFile(new Path(logPath), logFileSize));
-    }
-    deltaLogFiles = tempDeltaLogs;
-  }
-
-  public boolean getBelongToIncrementalSplit() {
-    return belongToIncrementalSplit;
-  }
-
-  public void setBelongToIncrementalSplit(boolean belongToIncrementalSplit) {
-    this.belongToIncrementalSplit = belongToIncrementalSplit;
-  }
-
-  public List<HoodieLogFile> getDeltaLogFiles() {
-    return deltaLogFiles;
-  }
-
-  public void setDeltaLogFiles(List<HoodieLogFile> deltaLogFiles) {
-    this.deltaLogFiles = deltaLogFiles;
-  }
-
-  public String getMaxCommitTime() {
-    return maxCommitTime;
-  }
-
-  public void setMaxCommitTime(String maxCommitTime) {
-    this.maxCommitTime = maxCommitTime;
-  }
-
-  public String getBasePath() {
-    return basePath;
-  }
-
-  public void setBasePath(String basePath) {
-    this.basePath = basePath;
-  }
-
-  public String getBaseFilePath() {
-    return baseFilePath;
-  }
-
-  public void setBaseFilePath(String baseFilePath) {
-    this.baseFilePath = baseFilePath;
-  }
-}
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/BootstrapBaseFileSplit.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/BootstrapBaseFileSplit.java
index 437304fb043d0..6db1751771904 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/BootstrapBaseFileSplit.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/BootstrapBaseFileSplit.java
@@ -32,9 +32,11 @@ public class BootstrapBaseFileSplit extends FileSplit {
 
   private FileSplit bootstrapFileSplit;
 
-  public BootstrapBaseFileSplit() {
-    super();
-  }
+  /**
+   * NOTE: This ctor is necessary for Hive to be able to serialize and
+   *       then instantiate it when deserializing back
+   */
+  public BootstrapBaseFileSplit() {}
 
   public BootstrapBaseFileSplit(FileSplit baseSplit, FileSplit bootstrapFileSplit)
       throws IOException {
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HiveHoodieTableFileIndex.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HiveHoodieTableFileIndex.java
index 585728d1e72b7..000fce5e8fbff 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HiveHoodieTableFileIndex.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HiveHoodieTableFileIndex.java
@@ -20,8 +20,7 @@
 
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
-import org.apache.hudi.HoodieTableFileIndexBase;
-import org.apache.hudi.FileStatusCacheTrait;
+import org.apache.hudi.BaseHoodieTableFileIndex;
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.model.HoodieTableQueryType;
@@ -29,15 +28,13 @@
 import org.apache.hudi.common.util.Option;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import scala.Function0;
-import scala.collection.JavaConverters;
 
 import java.util.List;
 
 /**
- * Implementation of {@link HoodieTableFileIndexBase} for Hive-based query engines
+ * Implementation of {@link BaseHoodieTableFileIndex} for Hive-based query engines
  */
-public class HiveHoodieTableFileIndex extends HoodieTableFileIndexBase {
+public class HiveHoodieTableFileIndex extends BaseHoodieTableFileIndex {
 
   public static final Logger LOG = LoggerFactory.getLogger(HiveHoodieTableFileIndex.class);
 
@@ -53,16 +50,13 @@ public HiveHoodieTableFileIndex(HoodieEngineContext engineContext,
         metaClient,
         configProperties,
         queryType,
-        JavaConverters.asScalaBufferConverter(queryPaths).asScala(),
-        toScalaOption(specifiedQueryInstant),
+        queryPaths,
+        specifiedQueryInstant,
         shouldIncludePendingCommits,
+        true,
         new NoopCache());
   }
 
-  private static scala.Option<String> toScalaOption(Option<String> opt) {
-    return scala.Option.apply(opt.orElse(null));
-  }
-
   @Override
   public Object[] parsePartitionColumnValues(String[] partitionColumns, String partitionPath) {
     // NOTE: Parsing partition path into partition column values isn't required on Hive,
@@ -71,20 +65,10 @@ public Object[] parsePartitionColumnValues(String[] partitionColumns, String par
     return new Object[0];
   }
 
-  @Override
-  public void logInfo(Function0<String> lazyStr) {
-    LOG.info(lazyStr.apply());
-  }
-
-  @Override
-  public void logWarning(Function0<String> lazyStr) {
-    LOG.info(lazyStr.apply());
-  }
-
-  static class NoopCache implements FileStatusCacheTrait {
+  static class NoopCache implements FileStatusCache {
     @Override
-    public scala.Option<FileStatus[]> get(Path path) {
-      return scala.Option.empty();
+    public Option<FileStatus[]> get(Path path) {
+      return Option.empty();
     }
 
     @Override
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieFileInputFormatBase.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieCopyOnWriteTableInputFormat.java
similarity index 65%
rename from hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieFileInputFormatBase.java
rename to hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieCopyOnWriteTableInputFormat.java
index a35eb50945285..2b8dae255e3c4 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieFileInputFormatBase.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieCopyOnWriteTableInputFormat.java
@@ -18,40 +18,47 @@
 
 package org.apache.hudi.hadoop;
 
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.ArrayWritable;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.InputSplit;
 import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.engine.HoodieLocalEngineContext;
 import org.apache.hudi.common.model.FileSlice;
 import org.apache.hudi.common.model.HoodieBaseFile;
-import org.apache.hudi.common.model.HoodieLogFile;
 import org.apache.hudi.common.model.HoodieTableQueryType;
+import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.TableSchemaResolver;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.util.CollectionUtils;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hudi.hadoop.realtime.HoodieVirtualKeyInfo;
 import org.apache.hudi.hadoop.utils.HoodieHiveUtils;
 import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils;
-import scala.collection.JavaConverters;
-import scala.collection.Seq;
+import org.apache.parquet.schema.MessageType;
 
 import javax.annotation.Nonnull;
 import java.io.IOException;
+import java.io.UnsupportedEncodingException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 import java.util.Properties;
 import java.util.stream.Collectors;
-import java.util.stream.Stream;
 
 import static org.apache.hudi.common.util.ValidationUtils.checkState;
 
@@ -64,43 +71,35 @@
  *   <li>Incremental mode: reading table's state as of particular timestamp (or instant, in Hudi's terms)</li>
  *   <li>External mode: reading non-Hudi partitions</li>
  * </ul>
+ *
+ * NOTE: This class is invariant of the underlying file-format of the files being read
  */
-public abstract class HoodieFileInputFormatBase extends FileInputFormat<NullWritable, ArrayWritable>
-    implements Configurable {
-
-  protected Configuration conf;
-
-  protected abstract boolean includeLogFilesForSnapShotView();
+public class HoodieCopyOnWriteTableInputFormat extends HoodieTableInputFormat {
 
   @Override
-  public final Configuration getConf() {
-    return conf;
+  protected boolean isSplitable(FileSystem fs, Path filename) {
+    return !(filename instanceof PathWithBootstrapFileStatus);
   }
 
   @Override
-  public final void setConf(Configuration conf) {
-    this.conf = conf;
-  }
+  protected FileSplit makeSplit(Path file, long start, long length,
+                                String[] hosts) {
+    FileSplit split = new FileSplit(file, start, length, hosts);
 
-  @Nonnull
-  private static RealtimeFileStatus createRealtimeFileStatusUnchecked(HoodieLogFile latestLogFile, Stream<HoodieLogFile> logFiles) {
-    List<HoodieLogFile> sortedLogFiles = logFiles.sorted(HoodieLogFile.getLogFileComparator()).collect(Collectors.toList());
-    try {
-      RealtimeFileStatus rtFileStatus = new RealtimeFileStatus(latestLogFile.getFileStatus());
-      rtFileStatus.setDeltaLogFiles(sortedLogFiles);
-      return rtFileStatus;
-    } catch (IOException e) {
-      throw new RuntimeException(e);
+    if (file instanceof PathWithBootstrapFileStatus) {
+      return makeExternalFileSplit((PathWithBootstrapFileStatus)file, split);
     }
+    return split;
   }
 
-  @Nonnull
-  private static FileStatus getFileStatusUnchecked(Option<HoodieBaseFile> baseFileOpt) {
-    try {
-      return HoodieInputFormatUtils.getFileStatus(baseFileOpt.get());
-    } catch (IOException ioe) {
-      throw new RuntimeException(ioe);
+  @Override
+  protected FileSplit makeSplit(Path file, long start, long length,
+                                String[] hosts, String[] inMemoryHosts) {
+    FileSplit split = new FileSplit(file, start, length, hosts, inMemoryHosts);
+    if (file instanceof PathWithBootstrapFileStatus) {
+      return makeExternalFileSplit((PathWithBootstrapFileStatus)file, split);
     }
+    return split;
   }
 
   @Override
@@ -143,6 +142,70 @@ public FileStatus[] listStatus(JobConf job) throws IOException {
     return returns.toArray(new FileStatus[0]);
   }
 
+  @Override
+  public RecordReader<NullWritable, ArrayWritable> getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException {
+    throw new UnsupportedEncodingException("not implemented");
+  }
+
+  /**
+   * Abstracts and exposes {@link FileInputFormat#listStatus(JobConf)} operation to subclasses that
+   * lists files (returning an array of {@link FileStatus}) corresponding to the input paths specified
+   * as part of provided {@link JobConf}
+   */
+  protected final FileStatus[] doListStatus(JobConf job) throws IOException {
+    return super.listStatus(job);
+  }
+
+  /**
+   * Achieves listStatus functionality for an incrementally queried table. Instead of listing all
+   * partitions and then filtering based on the commits of interest, this logic first extracts the
+   * partitions touched by the desired commits and then lists only those partitions.
+   */
+  protected List<FileStatus> listStatusForIncrementalMode(JobConf job,
+                                                          HoodieTableMetaClient tableMetaClient,
+                                                          List<Path> inputPaths,
+                                                          String incrementalTable) throws IOException {
+    Job jobContext = Job.getInstance(job);
+    Option<HoodieTimeline> timeline = HoodieInputFormatUtils.getFilteredCommitsTimeline(jobContext, tableMetaClient);
+    if (!timeline.isPresent()) {
+      return null;
+    }
+    Option<List<HoodieInstant>> commitsToCheck = HoodieInputFormatUtils.getCommitsForIncrementalQuery(jobContext, incrementalTable, timeline.get());
+    if (!commitsToCheck.isPresent()) {
+      return null;
+    }
+    Option<String> incrementalInputPaths = HoodieInputFormatUtils.getAffectedPartitions(commitsToCheck.get(), tableMetaClient, timeline.get(), inputPaths);
+    // Mutate the JobConf to set the input paths to only partitions touched by incremental pull.
+    if (!incrementalInputPaths.isPresent()) {
+      return null;
+    }
+    setInputPaths(job, incrementalInputPaths.get());
+    FileStatus[] fileStatuses = doListStatus(job);
+    return HoodieInputFormatUtils.filterIncrementalFileStatus(jobContext, tableMetaClient, timeline.get(), fileStatuses, commitsToCheck.get());
+  }
+
+  protected FileStatus createFileStatusUnchecked(FileSlice fileSlice, HiveHoodieTableFileIndex fileIndex, Option<HoodieVirtualKeyInfo> virtualKeyInfoOpt) {
+    Option<HoodieBaseFile> baseFileOpt = fileSlice.getBaseFile();
+
+    if (baseFileOpt.isPresent()) {
+      return getFileStatusUnchecked(baseFileOpt.get());
+    } else {
+      throw new IllegalStateException("Invalid state: base-file has to be present");
+    }
+  }
+
+  private BootstrapBaseFileSplit makeExternalFileSplit(PathWithBootstrapFileStatus file, FileSplit split) {
+    try {
+      LOG.info("Making external data split for " + file);
+      FileStatus externalFileStatus = file.getBootstrapFileStatus();
+      FileSplit externalFileSplit = makeSplit(externalFileStatus.getPath(), 0, externalFileStatus.getLen(),
+          new String[0], new String[0]);
+      return new BootstrapBaseFileSplit(split, externalFileSplit);
+    } catch (IOException e) {
+      throw new HoodieIOException(e.getMessage(), e);
+    }
+  }
+
   @Nonnull
   private List<FileStatus> listStatusForSnapshotMode(JobConf job,
                                                      Map<String, HoodieTableMetaClient> tableMetaClientMap,
@@ -172,36 +235,24 @@ private List<FileStatus> listStatusForSnapshotMode(JobConf job,
               engineContext,
               tableMetaClient,
               props,
-              HoodieTableQueryType.QUERY_TYPE_SNAPSHOT,
+              HoodieTableQueryType.SNAPSHOT,
               partitionPaths,
               queryCommitInstant,
               shouldIncludePendingCommits);
 
-      Map<String, Seq<FileSlice>> partitionedFileSlices =
-          JavaConverters.mapAsJavaMapConverter(fileIndex.listFileSlices()).asJava();
+      Map<String, List<FileSlice>> partitionedFileSlices = fileIndex.listFileSlices();
+
+      Option<HoodieVirtualKeyInfo> virtualKeyInfoOpt = getHoodieVirtualKeyInfo(tableMetaClient);
 
       targetFiles.addAll(
           partitionedFileSlices.values()
               .stream()
-              .flatMap(seq -> JavaConverters.seqAsJavaListConverter(seq).asJava().stream())
-              .map(fileSlice -> {
-                Option<HoodieBaseFile> baseFileOpt = fileSlice.getBaseFile();
-                Option<HoodieLogFile> latestLogFileOpt = fileSlice.getLatestLogFile();
-                if (baseFileOpt.isPresent()) {
-                  return getFileStatusUnchecked(baseFileOpt);
-                } else if (includeLogFilesForSnapShotView() && latestLogFileOpt.isPresent()) {
-                  return createRealtimeFileStatusUnchecked(latestLogFileOpt.get(), fileSlice.getLogFiles());
-                } else {
-                  throw new IllegalStateException("Invalid state: either base-file or log-file should be present");
-                }
-              })
+              .flatMap(Collection::stream)
+              .map(fileSlice -> createFileStatusUnchecked(fileSlice, fileIndex, virtualKeyInfoOpt))
               .collect(Collectors.toList())
       );
     }
 
-    // TODO cleanup
-    validate(targetFiles, listStatusForSnapshotModeLegacy(job, tableMetaClientMap, snapshotPaths));
-
     return targetFiles;
   }
 
@@ -211,42 +262,28 @@ private void validate(List<FileStatus> targetFiles, List<FileStatus> legacyFileS
   }
 
   @Nonnull
-  private List<FileStatus> listStatusForSnapshotModeLegacy(JobConf job, Map<String, HoodieTableMetaClient> tableMetaClientMap, List<Path> snapshotPaths) throws IOException {
-    return HoodieInputFormatUtils.filterFileStatusForSnapshotMode(job, tableMetaClientMap, snapshotPaths, includeLogFilesForSnapShotView());
-  }
-
-  /**
-   * Abstracts and exposes {@link FileInputFormat#listStatus(JobConf)} operation to subclasses that
-   * lists files (returning an array of {@link FileStatus}) corresponding to the input paths specified
-   * as part of provided {@link JobConf}
-   */
-  protected final FileStatus[] doListStatus(JobConf job) throws IOException {
-    return super.listStatus(job);
+  protected static FileStatus getFileStatusUnchecked(HoodieBaseFile baseFile) {
+    try {
+      return HoodieInputFormatUtils.getFileStatus(baseFile);
+    } catch (IOException ioe) {
+      throw new HoodieIOException("Failed to get file-status", ioe);
+    }
   }
 
-  /**
-   * Achieves listStatus functionality for an incrementally queried table. Instead of listing all
-   * partitions and then filtering based on the commits of interest, this logic first extracts the
-   * partitions touched by the desired commits and then lists only those partitions.
-   */
-  protected List<FileStatus> listStatusForIncrementalMode(JobConf job, HoodieTableMetaClient tableMetaClient,
-                                                          List<Path> inputPaths, String incrementalTable) throws IOException {
-    Job jobContext = Job.getInstance(job);
-    Option<HoodieTimeline> timeline = HoodieInputFormatUtils.getFilteredCommitsTimeline(jobContext, tableMetaClient);
-    if (!timeline.isPresent()) {
-      return null;
+  protected static Option<HoodieVirtualKeyInfo> getHoodieVirtualKeyInfo(HoodieTableMetaClient metaClient) {
+    HoodieTableConfig tableConfig = metaClient.getTableConfig();
+    if (tableConfig.populateMetaFields()) {
+      return Option.empty();
     }
-    Option<List<HoodieInstant>> commitsToCheck = HoodieInputFormatUtils.getCommitsForIncrementalQuery(jobContext, incrementalTable, timeline.get());
-    if (!commitsToCheck.isPresent()) {
-      return null;
-    }
-    Option<String> incrementalInputPaths = HoodieInputFormatUtils.getAffectedPartitions(commitsToCheck.get(), tableMetaClient, timeline.get(), inputPaths);
-    // Mutate the JobConf to set the input paths to only partitions touched by incremental pull.
-    if (!incrementalInputPaths.isPresent()) {
-      return null;
+
+    TableSchemaResolver tableSchemaResolver = new TableSchemaResolver(metaClient);
+    try {
+      MessageType parquetSchema = tableSchemaResolver.getTableParquetSchema();
+      return Option.of(new HoodieVirtualKeyInfo(tableConfig.getRecordKeyFieldProp(),
+          tableConfig.getPartitionFieldProp(), parquetSchema.getFieldIndex(tableConfig.getRecordKeyFieldProp()),
+          parquetSchema.getFieldIndex(tableConfig.getPartitionFieldProp())));
+    } catch (Exception exception) {
+      throw new HoodieException("Fetching table schema failed with exception ", exception);
     }
-    setInputPaths(job, incrementalInputPaths.get());
-    FileStatus[] fileStatuses = doListStatus(job);
-    return HoodieInputFormatUtils.filterIncrementalFileStatus(jobContext, tableMetaClient, timeline.get(), fileStatuses, commitsToCheck.get());
   }
 }
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieHFileInputFormat.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieHFileInputFormat.java
index 2baf140e21138..6eb1663a0d12c 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieHFileInputFormat.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieHFileInputFormat.java
@@ -35,17 +35,12 @@
  * HoodieInputFormat for HUDI datasets which store data in HFile base file format.
  */
 @UseFileSplitsFromInputFormat
-public class HoodieHFileInputFormat extends HoodieFileInputFormatBase {
+public class HoodieHFileInputFormat extends HoodieCopyOnWriteTableInputFormat {
 
   protected HoodieDefaultTimeline filterInstantsTimeline(HoodieDefaultTimeline timeline) {
     return HoodieInputFormatUtils.filterInstantsTimeline(timeline);
   }
 
-  @Override
-  protected boolean includeLogFilesForSnapShotView() {
-    return false;
-  }
-
   @Override
   public RecordReader<NullWritable, ArrayWritable> getRecordReader(final InputSplit split, final JobConf job,
       final Reporter reporter) throws IOException {
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieParquetInputFormat.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieParquetInputFormat.java
index f63352829faf9..7b79f61e49bcf 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieParquetInputFormat.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieParquetInputFormat.java
@@ -18,18 +18,6 @@
 
 package org.apache.hudi.hadoop;
 
-import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.table.timeline.HoodieDefaultTimeline;
-import org.apache.hudi.common.util.collection.Pair;
-import org.apache.hudi.exception.HoodieIOException;
-import org.apache.hudi.hadoop.utils.HoodieHiveUtils;
-import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils;
-
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat;
 import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg;
 import org.apache.hadoop.hive.ql.plan.TableScanDesc;
 import org.apache.hadoop.io.ArrayWritable;
@@ -39,6 +27,9 @@
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.RecordReader;
 import org.apache.hadoop.mapred.Reporter;
+import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.hadoop.utils.HoodieHiveUtils;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
@@ -54,20 +45,16 @@
  */
 @UseRecordReaderFromInputFormat
 @UseFileSplitsFromInputFormat
-public class HoodieParquetInputFormat extends HoodieFileInputFormatBase implements Configurable {
+public class HoodieParquetInputFormat extends HoodieParquetInputFormatBase {
 
   private static final Logger LOG = LogManager.getLogger(HoodieParquetInputFormat.class);
 
-  // NOTE: We're only using {@code MapredParquetInputFormat} to compose vectorized
-  //       {@code RecordReader}
-  private final MapredParquetInputFormat mapredParquetInputFormat = new MapredParquetInputFormat();
-
-  protected HoodieDefaultTimeline filterInstantsTimeline(HoodieDefaultTimeline timeline) {
-    return HoodieInputFormatUtils.filterInstantsTimeline(timeline);
+  public HoodieParquetInputFormat() {
+    super(new HoodieCopyOnWriteTableInputFormat());
   }
 
-  protected boolean includeLogFilesForSnapShotView() {
-    return false;
+  protected HoodieParquetInputFormat(HoodieCopyOnWriteTableInputFormat delegate) {
+    super(delegate);
   }
 
   @Override
@@ -96,36 +83,10 @@ public RecordReader<NullWritable, ArrayWritable> getRecordReader(final InputSpli
     return getRecordReaderInternal(split, job, reporter);
   }
 
-  @Override
-  protected boolean isSplitable(FileSystem fs, Path filename) {
-    return !(filename instanceof PathWithBootstrapFileStatus);
-  }
-
-  @Override
-  protected FileSplit makeSplit(Path file, long start, long length,
-                                String[] hosts) {
-    FileSplit split = new FileSplit(file, start, length, hosts);
-
-    if (file instanceof PathWithBootstrapFileStatus) {
-      return makeExternalFileSplit((PathWithBootstrapFileStatus)file, split);
-    }
-    return split;
-  }
-
-  @Override
-  protected FileSplit makeSplit(Path file, long start, long length,
-                                String[] hosts, String[] inMemoryHosts) {
-    FileSplit split = new FileSplit(file, start, length, hosts, inMemoryHosts);
-    if (file instanceof PathWithBootstrapFileStatus) {
-      return makeExternalFileSplit((PathWithBootstrapFileStatus)file, split);
-    }
-    return split;
-  }
-
   private RecordReader<NullWritable, ArrayWritable> getRecordReaderInternal(InputSplit split,
                                                                             JobConf job,
                                                                             Reporter reporter) throws IOException {
-    return mapredParquetInputFormat.getRecordReader(split, job, reporter);
+    return super.getRecordReader(split, job, reporter);
   }
 
   private RecordReader<NullWritable, ArrayWritable> createBootstrappingRecordReader(InputSplit split,
@@ -176,16 +137,4 @@ private RecordReader<NullWritable, ArrayWritable> createBootstrappingRecordReade
           true);
     }
   }
-
-  private BootstrapBaseFileSplit makeExternalFileSplit(PathWithBootstrapFileStatus file, FileSplit split) {
-    try {
-      LOG.info("Making external data split for " + file);
-      FileStatus externalFileStatus = file.getBootstrapFileStatus();
-      FileSplit externalFileSplit = makeSplit(externalFileStatus.getPath(), 0, externalFileStatus.getLen(),
-          new String[0], new String[0]);
-      return new BootstrapBaseFileSplit(split, externalFileSplit);
-    } catch (IOException e) {
-      throw new HoodieIOException(e.getMessage(), e);
-    }
-  }
 }
\ No newline at end of file
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieParquetInputFormatBase.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieParquetInputFormatBase.java
new file mode 100644
index 0000000000000..ed88acacb4d2f
--- /dev/null
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieParquetInputFormatBase.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.hadoop;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hudi.hadoop.realtime.HoodieMergeOnReadTableInputFormat;
+
+import java.io.IOException;
+
+/**
+ * !!! PLEASE READ CAREFULLY !!!
+ *
+ * NOTE: Hive bears optimizations which are based upon validating whether {@link FileInputFormat}
+ * implementation inherits from {@link MapredParquetInputFormat}.
+ *
+ * To make sure that Hudi implementations are leveraging these optimizations to the fullest, this class
+ * serves as a base-class for every {@link FileInputFormat} implementations working with Parquet file-format.
+ *
+ * However, this class serves as a simple delegate to the actual implementation hierarchy: it expects
+ * either {@link HoodieCopyOnWriteTableInputFormat} or {@link HoodieMergeOnReadTableInputFormat} to be supplied
+ * to which it delegates all of its necessary methods.
+ */
+public abstract class HoodieParquetInputFormatBase extends MapredParquetInputFormat implements Configurable {
+
+  private final HoodieTableInputFormat inputFormatDelegate;
+
+  protected HoodieParquetInputFormatBase(HoodieCopyOnWriteTableInputFormat inputFormatDelegate) {
+    this.inputFormatDelegate = inputFormatDelegate;
+  }
+
+  @Override
+  public final void setConf(Configuration conf) {
+    inputFormatDelegate.setConf(conf);
+  }
+
+  @Override
+  public final Configuration getConf() {
+    return inputFormatDelegate.getConf();
+  }
+
+  @Override
+  public final InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
+    return inputFormatDelegate.getSplits(job, numSplits);
+  }
+
+  @Override
+  protected final boolean isSplitable(FileSystem fs, Path filename) {
+    return inputFormatDelegate.isSplitable(fs, filename);
+  }
+
+  @Override
+  protected final FileSplit makeSplit(Path file, long start, long length,
+                                String[] hosts) {
+    return inputFormatDelegate.makeSplit(file, start, length, hosts);
+  }
+
+  @Override
+  protected final FileSplit makeSplit(Path file, long start, long length,
+                                String[] hosts, String[] inMemoryHosts) {
+    return inputFormatDelegate.makeSplit(file, start, length, hosts, inMemoryHosts);
+  }
+
+  @Override
+  public final FileStatus[] listStatus(JobConf job) throws IOException {
+    return inputFormatDelegate.listStatus(job);
+  }
+}
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieTableInputFormat.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieTableInputFormat.java
new file mode 100644
index 0000000000000..d18cb7895ad00
--- /dev/null
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/HoodieTableInputFormat.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.hadoop;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.JobConf;
+
+import java.io.IOException;
+
+/**
+ * Abstract base class of the Hive's {@link FileInputFormat} implementations allowing for reading of Hudi's
+ * Copy-on-Write (COW) and Merge-on-Read (MOR) tables
+ */
+public abstract class HoodieTableInputFormat extends FileInputFormat<NullWritable, ArrayWritable>
+    implements Configurable {
+
+  protected Configuration conf;
+
+  @Override
+  public final Configuration getConf() {
+    return conf;
+  }
+
+  @Override
+  public final void setConf(Configuration conf) {
+    this.conf = conf;
+  }
+
+  @Override
+  protected boolean isSplitable(FileSystem fs, Path filename) {
+    return super.isSplitable(fs, filename);
+  }
+
+  @Override
+  protected FileSplit makeSplit(Path file, long start, long length, String[] hosts) {
+    return super.makeSplit(file, start, length, hosts);
+  }
+
+  @Override
+  protected FileSplit makeSplit(Path file, long start, long length, String[] hosts, String[] inMemoryHosts) {
+    return super.makeSplit(file, start, length, hosts, inMemoryHosts);
+  }
+
+  @Override
+  protected FileStatus[] listStatus(JobConf job) throws IOException {
+    return super.listStatus(job);
+  }
+}
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/InputPathHandler.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/InputPathHandler.java
index 07bd82afa9e9e..24d190700fea3 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/InputPathHandler.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/InputPathHandler.java
@@ -35,7 +35,7 @@
 import java.util.List;
 import java.util.Map;
 
-import static org.apache.hudi.hadoop.utils.HoodieInputFormatUtils.getTableMetaClientForBasePath;
+import static org.apache.hudi.hadoop.utils.HoodieInputFormatUtils.getTableMetaClientForBasePathUnchecked;
 
 /**
  * InputPathHandler takes in a set of input paths and incremental tables list. Then, classifies the
@@ -107,7 +107,7 @@ private void parseInputPaths(Path[] inputPaths, List<String> incrementalTables)
         // This path is for a table that we don't know about yet.
         HoodieTableMetaClient metaClient;
         try {
-          metaClient = getTableMetaClientForBasePath(inputPath.getFileSystem(conf), inputPath);
+          metaClient = getTableMetaClientForBasePathUnchecked(conf, inputPath);
           tableMetaClientMap.put(getIncrementalTable(metaClient), metaClient);
           tagAsIncrementalOrSnapshot(inputPath, metaClient, incrementalTables);
         } catch (TableNotFoundException | InvalidTableException e) {
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/PathWithLogFilePath.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/PathWithLogFilePath.java
deleted file mode 100644
index 8f9ac8b03d575..0000000000000
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/PathWithLogFilePath.java
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hudi.hadoop;
-
-import org.apache.hudi.common.model.HoodieLogFile;
-
-import org.apache.hadoop.fs.Path;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * Encode additional information in Path to track matching log file and base files.
- * Hence, this class tracks a log/base file status.
- */
-public class PathWithLogFilePath extends Path {
-  // a flag to mark this split is produced by incremental query or not.
-  private boolean belongToIncrementalPath = false;
-  // the log files belong this path.
-  private List<HoodieLogFile> deltaLogFiles = new ArrayList<>();
-  // max commit time of current path.
-  private String maxCommitTime = "";
-  // the basePath of current hoodie table.
-  private String basePath = "";
-  // the base file belong to this path;
-  private String baseFilePath = "";
-  // the bootstrap file belong to this path.
-  // only if current query table is bootstrap table, this field is used.
-  private PathWithBootstrapFileStatus pathWithBootstrapFileStatus;
-
-  public PathWithLogFilePath(Path parent, String child) {
-    super(parent, child);
-  }
-
-  public void setBelongToIncrementalPath(boolean belongToIncrementalPath) {
-    this.belongToIncrementalPath = belongToIncrementalPath;
-  }
-
-  public List<HoodieLogFile> getDeltaLogFiles() {
-    return deltaLogFiles;
-  }
-
-  public void setDeltaLogFiles(List<HoodieLogFile> deltaLogFiles) {
-    this.deltaLogFiles = deltaLogFiles;
-  }
-
-  public String getMaxCommitTime() {
-    return maxCommitTime;
-  }
-
-  public void setMaxCommitTime(String maxCommitTime) {
-    this.maxCommitTime = maxCommitTime;
-  }
-
-  public String getBasePath() {
-    return basePath;
-  }
-
-  public void setBasePath(String basePath) {
-    this.basePath = basePath;
-  }
-
-  public void setBaseFilePath(String baseFilePath) {
-    this.baseFilePath = baseFilePath;
-  }
-
-  public boolean splitable() {
-    return !baseFilePath.isEmpty();
-  }
-
-  public PathWithBootstrapFileStatus getPathWithBootstrapFileStatus() {
-    return pathWithBootstrapFileStatus;
-  }
-
-  public void setPathWithBootstrapFileStatus(PathWithBootstrapFileStatus pathWithBootstrapFileStatus) {
-    this.pathWithBootstrapFileStatus = pathWithBootstrapFileStatus;
-  }
-
-  public boolean includeBootstrapFilePath() {
-    return pathWithBootstrapFileStatus != null;
-  }
-
-  public BaseFileWithLogsSplit buildSplit(Path file, long start, long length, String[] hosts) {
-    BaseFileWithLogsSplit bs = new BaseFileWithLogsSplit(file, start, length, hosts);
-    bs.setBelongToIncrementalSplit(belongToIncrementalPath);
-    bs.setDeltaLogFiles(deltaLogFiles);
-    bs.setMaxCommitTime(maxCommitTime);
-    bs.setBasePath(basePath);
-    bs.setBaseFilePath(baseFilePath);
-    return bs;
-  }
-}
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/RealtimeFileStatus.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/RealtimeFileStatus.java
index e8e1a28987c56..641aa2759ff20 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/RealtimeFileStatus.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/RealtimeFileStatus.java
@@ -18,13 +18,14 @@
 
 package org.apache.hudi.hadoop;
 
-import org.apache.hudi.common.model.HoodieLogFile;
-
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
+import org.apache.hudi.common.model.HoodieLogFile;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.hadoop.realtime.HoodieRealtimePath;
+import org.apache.hudi.hadoop.realtime.HoodieVirtualKeyInfo;
 
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.List;
 
 /**
@@ -34,51 +35,62 @@
  * in Path.
  */
 public class RealtimeFileStatus extends FileStatus {
-  // a flag to mark this split is produced by incremental query or not.
-  private boolean belongToIncrementalFileStatus = false;
-  // the log files belong this fileStatus.
-  private List<HoodieLogFile> deltaLogFiles = new ArrayList<>();
-  // max commit time of current fileStatus.
+  /**
+   * Base path of the table this path belongs to
+   */
+  private final String basePath;
+  /**
+   * List of delta log-files holding updated records for this base-file
+   */
+  private final List<HoodieLogFile> deltaLogFiles;
+  /**
+   * Marks whether this path produced as part of Incremental Query
+   */
+  private final boolean belongsToIncrementalQuery;
+  /**
+   * Latest commit instant available at the time of the query in which all of the files
+   * pertaining to this split are represented
+   */
   private String maxCommitTime = "";
-  // the basePath of current hoodie table.
-  private String basePath = "";
-  // the base file belong to this status;
-  private String baseFilePath = "";
-  // the bootstrap file belong to this status.
-  // only if current query table is bootstrap table, this field is used.
+  /**
+   * File status for the Bootstrap file (only relevant if this table is a bootstrapped table
+   */
   private FileStatus bootStrapFileStatus;
-
-  public RealtimeFileStatus(FileStatus fileStatus) throws IOException {
+  /**
+   * Virtual key configuration of the table this split belongs to
+   */
+  private final Option<HoodieVirtualKeyInfo> virtualKeyInfo;
+
+  public RealtimeFileStatus(FileStatus fileStatus,
+                            String basePath,
+                            List<HoodieLogFile> deltaLogFiles,
+                            boolean belongsToIncrementalQuery,
+                            Option<HoodieVirtualKeyInfo> virtualKeyInfo) throws IOException {
     super(fileStatus);
+    this.basePath = basePath;
+    this.deltaLogFiles = deltaLogFiles;
+    this.belongsToIncrementalQuery = belongsToIncrementalQuery;
+    this.virtualKeyInfo = virtualKeyInfo;
   }
 
   @Override
   public Path getPath() {
     Path path = super.getPath();
-    PathWithLogFilePath pathWithLogFilePath = new PathWithLogFilePath(path.getParent(), path.getName());
-    pathWithLogFilePath.setBelongToIncrementalPath(belongToIncrementalFileStatus);
-    pathWithLogFilePath.setDeltaLogFiles(deltaLogFiles);
-    pathWithLogFilePath.setMaxCommitTime(maxCommitTime);
-    pathWithLogFilePath.setBasePath(basePath);
-    pathWithLogFilePath.setBaseFilePath(baseFilePath);
+
+    HoodieRealtimePath realtimePath = new HoodieRealtimePath(path.getParent(), path.getName(), basePath,
+        deltaLogFiles, maxCommitTime, belongsToIncrementalQuery, virtualKeyInfo);
+
     if (bootStrapFileStatus != null) {
-      pathWithLogFilePath.setPathWithBootstrapFileStatus((PathWithBootstrapFileStatus)bootStrapFileStatus.getPath());
+      realtimePath.setPathWithBootstrapFileStatus((PathWithBootstrapFileStatus)bootStrapFileStatus.getPath());
     }
-    return pathWithLogFilePath;
-  }
 
-  public void setBelongToIncrementalFileStatus(boolean belongToIncrementalFileStatus) {
-    this.belongToIncrementalFileStatus = belongToIncrementalFileStatus;
+    return realtimePath;
   }
 
   public List<HoodieLogFile> getDeltaLogFiles() {
     return deltaLogFiles;
   }
 
-  public void setDeltaLogFiles(List<HoodieLogFile> deltaLogFiles) {
-    this.deltaLogFiles = deltaLogFiles;
-  }
-
   public String getMaxCommitTime() {
     return maxCommitTime;
   }
@@ -87,18 +99,6 @@ public void setMaxCommitTime(String maxCommitTime) {
     this.maxCommitTime = maxCommitTime;
   }
 
-  public String getBasePath() {
-    return basePath;
-  }
-
-  public void setBasePath(String basePath) {
-    this.basePath = basePath;
-  }
-
-  public void setBaseFilePath(String baseFilePath) {
-    this.baseFilePath = baseFilePath;
-  }
-
   public void setBootStrapFileStatus(FileStatus bootStrapFileStatus) {
     this.bootStrapFileStatus = bootStrapFileStatus;
   }
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/hive/HoodieCombineHiveInputFormat.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/hive/HoodieCombineHiveInputFormat.java
index c24c75359f588..8736883cea72c 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/hive/HoodieCombineHiveInputFormat.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/hive/HoodieCombineHiveInputFormat.java
@@ -18,12 +18,6 @@
 
 package org.apache.hudi.hadoop.hive;
 
-import org.apache.hudi.common.util.ReflectionUtils;
-import org.apache.hudi.common.util.ValidationUtils;
-import org.apache.hudi.hadoop.HoodieParquetInputFormat;
-import org.apache.hudi.hadoop.realtime.HoodieCombineRealtimeRecordReader;
-import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat;
-import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
@@ -62,6 +56,13 @@
 import org.apache.hadoop.mapred.lib.CombineFileInputFormat;
 import org.apache.hadoop.mapred.lib.CombineFileSplit;
 import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hudi.common.util.ReflectionUtils;
+import org.apache.hudi.common.util.ValidationUtils;
+import org.apache.hudi.hadoop.HoodieParquetInputFormat;
+import org.apache.hudi.hadoop.HoodieParquetInputFormatBase;
+import org.apache.hudi.hadoop.realtime.HoodieCombineRealtimeRecordReader;
+import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat;
+import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
@@ -876,7 +877,7 @@ protected List<FileStatus> listStatus(JobContext job) throws IOException {
       LOG.info("Listing status in HoodieCombineHiveInputFormat.HoodieCombineFileInputFormatShim");
       List<FileStatus> result;
       if (hoodieFilter) {
-        HoodieParquetInputFormat input;
+        HoodieParquetInputFormatBase input;
         if (isRealTime) {
           LOG.info("Using HoodieRealtimeInputFormat");
           input = createParquetRealtimeInputFormat();
@@ -916,7 +917,7 @@ public CombineFileSplit[] getSplits(JobConf job, int numSplits) throws IOExcepti
         job.set("hudi.hive.realtime", "true");
         InputSplit[] splits;
         if (hoodieFilter) {
-          HoodieParquetInputFormat input = createParquetRealtimeInputFormat();
+          HoodieParquetRealtimeInputFormat input = createParquetRealtimeInputFormat();
           input.setConf(job);
           splits = input.getSplits(job, numSplits);
         } else {
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/AbstractRealtimeRecordReader.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/AbstractRealtimeRecordReader.java
index 78ac8805d8aaf..030e20f2278b4 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/AbstractRealtimeRecordReader.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/AbstractRealtimeRecordReader.java
@@ -19,6 +19,7 @@
 package org.apache.hudi.hadoop.realtime;
 
 import org.apache.hudi.common.model.HoodieAvroPayload;
+import org.apache.hudi.common.model.HoodiePayloadProps;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.log.LogReaderUtils;
 import org.apache.hudi.exception.HoodieIOException;
@@ -39,6 +40,7 @@
 import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
+import java.util.Properties;
 import java.util.stream.Collectors;
 
 /**
@@ -50,6 +52,7 @@ public abstract class AbstractRealtimeRecordReader {
   protected final RealtimeSplit split;
   protected final JobConf jobConf;
   protected final boolean usesCustomPayload;
+  protected Properties payloadProps = new Properties();
   // Schema handles
   private Schema readerSchema;
   private Schema writerSchema;
@@ -62,7 +65,11 @@ public AbstractRealtimeRecordReader(RealtimeSplit split, JobConf job) {
     LOG.info("columnIds ==> " + job.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR));
     LOG.info("partitioningColumns ==> " + job.get(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS, ""));
     try {
-      this.usesCustomPayload = usesCustomPayload();
+      HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(jobConf).setBasePath(split.getBasePath()).build();
+      if (metaClient.getTableConfig().getPreCombineField() != null) {
+        this.payloadProps.setProperty(HoodiePayloadProps.PAYLOAD_ORDERING_FIELD_PROP_KEY, metaClient.getTableConfig().getPreCombineField());
+      }
+      this.usesCustomPayload = usesCustomPayload(metaClient);
       LOG.info("usesCustomPayload ==> " + this.usesCustomPayload);
       init();
     } catch (IOException e) {
@@ -70,8 +77,7 @@ public AbstractRealtimeRecordReader(RealtimeSplit split, JobConf job) {
     }
   }
 
-  private boolean usesCustomPayload() {
-    HoodieTableMetaClient metaClient = HoodieTableMetaClient.builder().setConf(jobConf).setBasePath(split.getBasePath()).build();
+  private boolean usesCustomPayload(HoodieTableMetaClient metaClient) {
     return !(metaClient.getTableConfig().getPayloadClass().contains(HoodieAvroPayload.class.getName())
         || metaClient.getTableConfig().getPayloadClass().contains("org.apache.hudi.OverwriteWithLatestAvroPayload"));
   }
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieHFileRealtimeInputFormat.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieHFileRealtimeInputFormat.java
index 525bec61333e4..799d90bce5df4 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieHFileRealtimeInputFormat.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieHFileRealtimeInputFormat.java
@@ -18,15 +18,15 @@
 
 package org.apache.hudi.hadoop.realtime;
 
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
 import org.apache.hadoop.io.ArrayWritable;
 import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapred.FileSplit;
 import org.apache.hadoop.mapred.InputSplit;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.RecordReader;
 import org.apache.hadoop.mapred.Reporter;
-import org.apache.hudi.common.table.timeline.HoodieDefaultTimeline;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.ValidationUtils;
 import org.apache.hudi.hadoop.HoodieHFileInputFormat;
@@ -38,29 +38,18 @@
 import org.apache.log4j.Logger;
 
 import java.io.IOException;
-import java.util.Arrays;
-import java.util.stream.Stream;
 
 /**
  * HoodieRealtimeInputFormat for HUDI datasets which store data in HFile base file format.
  */
 @UseRecordReaderFromInputFormat
 @UseFileSplitsFromInputFormat
-public class HoodieHFileRealtimeInputFormat extends HoodieHFileInputFormat {
+public class HoodieHFileRealtimeInputFormat extends HoodieMergeOnReadTableInputFormat {
 
   private static final Logger LOG = LogManager.getLogger(HoodieHFileRealtimeInputFormat.class);
 
-  @Override
-  public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
-    Stream<FileSplit> fileSplits = Arrays.stream(super.getSplits(job, numSplits)).map(is -> (FileSplit) is);
-    return HoodieRealtimeInputFormatUtils.getRealtimeSplits(job, fileSplits);
-  }
-
-  @Override
-  protected HoodieDefaultTimeline filterInstantsTimeline(HoodieDefaultTimeline timeline) {
-    // no specific filtering for Realtime format
-    return timeline;
-  }
+  // NOTE: We're only using {@code HoodieHFileInputFormat} to compose {@code RecordReader}
+  private final HoodieHFileInputFormat hFileInputFormat = new HoodieHFileInputFormat();
 
   @Override
   public RecordReader<NullWritable, ArrayWritable> getRecordReader(final InputSplit split, final JobConf jobConf,
@@ -99,6 +88,12 @@ public RecordReader<NullWritable, ArrayWritable> getRecordReader(final InputSpli
         "HoodieRealtimeRecordReader can only work on HoodieRealtimeFileSplit and not with " + split);
 
     return new HoodieRealtimeRecordReader((HoodieRealtimeFileSplit) split, jobConf,
-        super.getRecordReader(split, jobConf, reporter));
+        hFileInputFormat.getRecordReader(split, jobConf, reporter));
+  }
+
+  @Override
+  protected boolean isSplitable(FileSystem fs, Path filename) {
+    // This file isn't splittable.
+    return false;
   }
 }
\ No newline at end of file
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieMergeOnReadTableInputFormat.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieMergeOnReadTableInputFormat.java
new file mode 100644
index 0000000000000..982d52b0d4807
--- /dev/null
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieMergeOnReadTableInputFormat.java
@@ -0,0 +1,375 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.hadoop.realtime;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.SplitLocationInfo;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hudi.common.model.FileSlice;
+import org.apache.hudi.common.model.HoodieBaseFile;
+import org.apache.hudi.common.model.HoodieCommitMetadata;
+import org.apache.hudi.common.model.HoodieFileGroup;
+import org.apache.hudi.common.model.HoodieLogFile;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hudi.hadoop.BootstrapBaseFileSplit;
+import org.apache.hudi.hadoop.FileStatusWithBootstrapBaseFile;
+import org.apache.hudi.hadoop.HiveHoodieTableFileIndex;
+import org.apache.hudi.hadoop.HoodieCopyOnWriteTableInputFormat;
+import org.apache.hudi.hadoop.LocatedFileStatusWithBootstrapBaseFile;
+import org.apache.hudi.hadoop.RealtimeFileStatus;
+import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils;
+import org.apache.hudi.hadoop.utils.HoodieRealtimeInputFormatUtils;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import static org.apache.hudi.common.util.ValidationUtils.checkState;
+
+/**
+ * Base implementation of the Hive's {@link FileInputFormat} allowing for reading of Hudi's
+ * Merge-on-Read (COW) tables in various configurations:
+ *
+ * <ul>
+ *   <li>Snapshot mode: reading table's state as of particular timestamp (or instant, in Hudi's terms)</li>
+ *   <li>Incremental mode: reading table's state as of particular timestamp (or instant, in Hudi's terms)</li>
+ *   <li>External mode: reading non-Hudi partitions</li>
+ * </ul>
+ * <p>
+ * NOTE: This class is invariant of the underlying file-format of the files being read
+ */
+public class HoodieMergeOnReadTableInputFormat extends HoodieCopyOnWriteTableInputFormat implements Configurable {
+
+  @Override
+  public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
+    List<FileSplit> fileSplits = Arrays.stream(super.getSplits(job, numSplits))
+        .map(is -> (FileSplit) is)
+        .collect(Collectors.toList());
+
+    return (containsIncrementalQuerySplits(fileSplits) ? filterIncrementalQueryFileSplits(fileSplits) : fileSplits)
+        .toArray(new FileSplit[0]);
+  }
+
+  @Override
+  protected FileStatus createFileStatusUnchecked(FileSlice fileSlice, HiveHoodieTableFileIndex fileIndex, Option<HoodieVirtualKeyInfo> virtualKeyInfoOpt) {
+    Option<HoodieBaseFile> baseFileOpt = fileSlice.getBaseFile();
+    Option<HoodieLogFile> latestLogFileOpt = fileSlice.getLatestLogFile();
+    Stream<HoodieLogFile> logFiles = fileSlice.getLogFiles();
+
+    Option<HoodieInstant> latestCompletedInstantOpt = fileIndex.getLatestCompletedInstant();
+    String tableBasePath = fileIndex.getBasePath();
+
+    // Check if we're reading a MOR table
+    if (baseFileOpt.isPresent()) {
+      return createRealtimeFileStatusUnchecked(baseFileOpt.get(), logFiles, tableBasePath, latestCompletedInstantOpt, virtualKeyInfoOpt);
+    } else if (latestLogFileOpt.isPresent()) {
+      return createRealtimeFileStatusUnchecked(latestLogFileOpt.get(), logFiles, tableBasePath, latestCompletedInstantOpt, virtualKeyInfoOpt);
+    } else {
+      throw new IllegalStateException("Invalid state: either base-file or log-file has to be present");
+    }
+  }
+
+  /**
+   * Keep the logic of mor_incr_view as same as spark datasource.
+   * Step1: Get list of commits to be fetched based on start commit and max commits(for snapshot max commits is -1).
+   * Step2: Get list of affected files status for these affected file status.
+   * Step3: Construct HoodieTableFileSystemView based on those affected file status.
+   *        a. Filter affected partitions based on inputPaths.
+   *        b. Get list of fileGroups based on affected partitions by fsView.getAllFileGroups.
+   * Step4: Set input paths based on filtered affected partition paths. changes that amony original input paths passed to
+   *        this method. some partitions did not have commits as part of the trimmed down list of commits and hence we need this step.
+   * Step5: Find candidate fileStatus, since when we get baseFileStatus from HoodieTableFileSystemView,
+   *        the BaseFileStatus will missing file size information.
+   *        We should use candidate fileStatus to update the size information for BaseFileStatus.
+   * Step6: For every file group from step3(b)
+   *        Get 1st available base file from all file slices. then we use candidate file status to update the baseFileStatus,
+   *        and construct RealTimeFileStatus and add it to result along with log files.
+   *        If file group just has log files, construct RealTimeFileStatus and add it to result.
+   * TODO: unify the incremental view code between hive/spark-sql and spark datasource
+   */
+  @Override
+  protected List<FileStatus> listStatusForIncrementalMode(JobConf job,
+                                                          HoodieTableMetaClient tableMetaClient,
+                                                          List<Path> inputPaths,
+                                                          String incrementalTableName) throws IOException {
+    List<FileStatus> result = new ArrayList<>();
+    Job jobContext = Job.getInstance(job);
+
+    // step1
+    Option<HoodieTimeline> timeline = HoodieInputFormatUtils.getFilteredCommitsTimeline(jobContext, tableMetaClient);
+    if (!timeline.isPresent()) {
+      return result;
+    }
+    HoodieTimeline commitsTimelineToReturn = HoodieInputFormatUtils.getHoodieTimelineForIncrementalQuery(jobContext, incrementalTableName, timeline.get());
+    Option<List<HoodieInstant>> commitsToCheck = Option.of(commitsTimelineToReturn.getInstants().collect(Collectors.toList()));
+    if (!commitsToCheck.isPresent()) {
+      return result;
+    }
+    // step2
+    commitsToCheck.get().sort(HoodieInstant::compareTo);
+    List<HoodieCommitMetadata> metadataList = commitsToCheck
+        .get().stream().map(instant -> {
+          try {
+            return HoodieInputFormatUtils.getCommitMetadata(instant, commitsTimelineToReturn);
+          } catch (IOException e) {
+            throw new HoodieException(String.format("cannot get metadata for instant: %s", instant));
+          }
+        }).collect(Collectors.toList());
+
+    // build fileGroup from fsView
+    List<FileStatus> affectedFileStatus = Arrays.asList(HoodieInputFormatUtils
+        .listAffectedFilesForCommits(job, new Path(tableMetaClient.getBasePath()), metadataList));
+    // step3
+    HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(tableMetaClient, commitsTimelineToReturn, affectedFileStatus.toArray(new FileStatus[0]));
+    // build fileGroup from fsView
+    Path basePath = new Path(tableMetaClient.getBasePath());
+    // filter affectedPartition by inputPaths
+    List<String> affectedPartition = HoodieInputFormatUtils.getWritePartitionPaths(metadataList).stream()
+        .filter(k -> k.isEmpty() ? inputPaths.contains(basePath) : inputPaths.contains(new Path(basePath, k))).collect(Collectors.toList());
+    if (affectedPartition.isEmpty()) {
+      return result;
+    }
+    List<HoodieFileGroup> fileGroups = affectedPartition.stream()
+        .flatMap(partitionPath -> fsView.getAllFileGroups(partitionPath)).collect(Collectors.toList());
+    // step4
+    setInputPaths(job, affectedPartition.stream()
+        .map(p -> p.isEmpty() ? basePath.toString() : new Path(basePath, p).toString()).collect(Collectors.joining(",")));
+
+    // step5
+    // find all file status in partitionPaths.
+    FileStatus[] fileStatuses = doListStatus(job);
+    Map<String, FileStatus> candidateFileStatus = new HashMap<>();
+    for (int i = 0; i < fileStatuses.length; i++) {
+      String key = fileStatuses[i].getPath().toString();
+      candidateFileStatus.put(key, fileStatuses[i]);
+    }
+
+    Option<HoodieVirtualKeyInfo> virtualKeyInfoOpt = getHoodieVirtualKeyInfo(tableMetaClient);
+    String maxCommitTime = fsView.getLastInstant().get().getTimestamp();
+    // step6
+    result.addAll(collectAllIncrementalFiles(fileGroups, maxCommitTime, basePath.toString(), candidateFileStatus, virtualKeyInfoOpt));
+    return result;
+  }
+
+  @Override
+  protected boolean isSplitable(FileSystem fs, Path filename) {
+    if (filename instanceof HoodieRealtimePath) {
+      return ((HoodieRealtimePath) filename).isSplitable();
+    }
+
+    return super.isSplitable(fs, filename);
+  }
+
+  // make split for path.
+  // When query the incremental view, the read files may be bootstrap files, we wrap those bootstrap files into
+  // PathWithLogFilePath, so those bootstrap files should be processed int this function.
+  @Override
+  protected FileSplit makeSplit(Path file, long start, long length, String[] hosts) {
+    if (file instanceof HoodieRealtimePath) {
+      return doMakeSplitForRealtimePath((HoodieRealtimePath) file, start, length, hosts, null);
+    }
+    return super.makeSplit(file, start, length, hosts);
+  }
+
+  @Override
+  protected FileSplit makeSplit(Path file, long start, long length, String[] hosts, String[] inMemoryHosts) {
+    if (file instanceof HoodieRealtimePath) {
+      return doMakeSplitForRealtimePath((HoodieRealtimePath) file, start, length, hosts, inMemoryHosts);
+    }
+    return super.makeSplit(file, start, length, hosts, inMemoryHosts);
+  }
+
+  private static List<FileStatus> collectAllIncrementalFiles(List<HoodieFileGroup> fileGroups,
+                                                             String maxCommitTime,
+                                                             String basePath,
+                                                             Map<String, FileStatus> candidateFileStatus,
+                                                             Option<HoodieVirtualKeyInfo> virtualKeyInfoOpt) {
+
+    List<FileStatus> result = new ArrayList<>();
+    fileGroups.stream().forEach(f -> {
+      try {
+        List<FileSlice> baseFiles = f.getAllFileSlices().filter(slice -> slice.getBaseFile().isPresent()).collect(Collectors.toList());
+        if (!baseFiles.isEmpty()) {
+          FileStatus baseFileStatus = HoodieInputFormatUtils.getFileStatus(baseFiles.get(0).getBaseFile().get());
+          String baseFilePath = baseFileStatus.getPath().toUri().toString();
+          if (!candidateFileStatus.containsKey(baseFilePath)) {
+            throw new HoodieException("Error obtaining fileStatus for file: " + baseFilePath);
+          }
+          List<HoodieLogFile> deltaLogFiles = f.getLatestFileSlice().get().getLogFiles().collect(Collectors.toList());
+          // We cannot use baseFileStatus.getPath() here, since baseFileStatus.getPath() missing file size information.
+          // So we use candidateFileStatus.get(baseFileStatus.getPath()) to get a correct path.
+          RealtimeFileStatus fileStatus = new RealtimeFileStatus(candidateFileStatus.get(baseFilePath),
+              basePath, deltaLogFiles, true, virtualKeyInfoOpt);
+          fileStatus.setMaxCommitTime(maxCommitTime);
+          if (baseFileStatus instanceof LocatedFileStatusWithBootstrapBaseFile || baseFileStatus instanceof FileStatusWithBootstrapBaseFile) {
+            fileStatus.setBootStrapFileStatus(baseFileStatus);
+          }
+          result.add(fileStatus);
+        }
+        // add file group which has only logs.
+        if (f.getLatestFileSlice().isPresent() && baseFiles.isEmpty()) {
+          List<FileStatus> logFileStatus = f.getLatestFileSlice().get().getLogFiles().map(logFile -> logFile.getFileStatus()).collect(Collectors.toList());
+          if (logFileStatus.size() > 0) {
+            List<HoodieLogFile> deltaLogFiles = logFileStatus.stream().map(l -> new HoodieLogFile(l.getPath(), l.getLen())).collect(Collectors.toList());
+            RealtimeFileStatus fileStatus = new RealtimeFileStatus(logFileStatus.get(0), basePath,
+                deltaLogFiles, true, virtualKeyInfoOpt);
+            fileStatus.setMaxCommitTime(maxCommitTime);
+            result.add(fileStatus);
+          }
+        }
+      } catch (IOException e) {
+        throw new HoodieException("Error obtaining data file/log file grouping ", e);
+      }
+    });
+    return result;
+  }
+
+  private FileSplit doMakeSplitForRealtimePath(HoodieRealtimePath path, long start, long length, String[] hosts, String[] inMemoryHosts) {
+    if (path.includeBootstrapFilePath()) {
+      FileSplit bf =
+          inMemoryHosts == null
+              ? super.makeSplit(path.getPathWithBootstrapFileStatus(), start, length, hosts)
+              : super.makeSplit(path.getPathWithBootstrapFileStatus(), start, length, hosts, inMemoryHosts);
+      return createRealtimeBoostrapBaseFileSplit(
+          (BootstrapBaseFileSplit) bf,
+          path.getBasePath(),
+          path.getDeltaLogFiles(),
+          path.getMaxCommitTime(),
+          path.getBelongsToIncrementalQuery(),
+          path.getVirtualKeyInfo()
+      );
+    }
+
+    return createRealtimeFileSplit(path, start, length, hosts);
+  }
+
+  private static boolean containsIncrementalQuerySplits(List<FileSplit> fileSplits) {
+    return fileSplits.stream().anyMatch(HoodieRealtimeInputFormatUtils::doesBelongToIncrementalQuery);
+  }
+
+  private static List<FileSplit> filterIncrementalQueryFileSplits(List<FileSplit> fileSplits) {
+    return fileSplits.stream().filter(HoodieRealtimeInputFormatUtils::doesBelongToIncrementalQuery)
+        .collect(Collectors.toList());
+  }
+
+  private static HoodieRealtimeFileSplit createRealtimeFileSplit(HoodieRealtimePath path, long start, long length, String[] hosts) {
+    try {
+      return new HoodieRealtimeFileSplit(new FileSplit(path, start, length, hosts), path);
+    } catch (IOException e) {
+      throw new HoodieIOException(String.format("Failed to create instance of %s", HoodieRealtimeFileSplit.class.getName()), e);
+    }
+  }
+
+  private static HoodieRealtimeBootstrapBaseFileSplit createRealtimeBoostrapBaseFileSplit(BootstrapBaseFileSplit split,
+                                                                                          String basePath,
+                                                                                          List<HoodieLogFile> logFiles,
+                                                                                          String maxInstantTime,
+                                                                                          boolean belongsToIncrementalQuery,
+                                                                                          Option<HoodieVirtualKeyInfo> virtualKeyInfoOpt) {
+    try {
+      String[] hosts = split.getLocationInfo() != null ? Arrays.stream(split.getLocationInfo())
+          .filter(x -> !x.isInMemory()).toArray(String[]::new) : new String[0];
+      String[] inMemoryHosts = split.getLocationInfo() != null ? Arrays.stream(split.getLocationInfo())
+          .filter(SplitLocationInfo::isInMemory).toArray(String[]::new) : new String[0];
+      FileSplit baseSplit = new FileSplit(split.getPath(), split.getStart(), split.getLength(),
+          hosts, inMemoryHosts);
+      return new HoodieRealtimeBootstrapBaseFileSplit(baseSplit, basePath, logFiles, maxInstantTime, split.getBootstrapFileSplit(),
+          belongsToIncrementalQuery, virtualKeyInfoOpt);
+    } catch (IOException e) {
+      throw new HoodieIOException("Error creating hoodie real time split ", e);
+    }
+  }
+
+  /**
+   * Creates {@link RealtimeFileStatus} for the file-slice where base file is present
+   */
+  private static RealtimeFileStatus createRealtimeFileStatusUnchecked(HoodieBaseFile baseFile,
+                                                                      Stream<HoodieLogFile> logFiles,
+                                                                      String basePath,
+                                                                      Option<HoodieInstant> latestCompletedInstantOpt,
+                                                                      Option<HoodieVirtualKeyInfo> virtualKeyInfoOpt) {
+    FileStatus baseFileStatus = getFileStatusUnchecked(baseFile);
+    List<HoodieLogFile> sortedLogFiles = logFiles.sorted(HoodieLogFile.getLogFileComparator()).collect(Collectors.toList());
+
+    try {
+      RealtimeFileStatus rtFileStatus = new RealtimeFileStatus(baseFileStatus, basePath, sortedLogFiles,
+          false, virtualKeyInfoOpt);
+
+      if (latestCompletedInstantOpt.isPresent()) {
+        HoodieInstant latestCompletedInstant = latestCompletedInstantOpt.get();
+        checkState(latestCompletedInstant.isCompleted());
+
+        rtFileStatus.setMaxCommitTime(latestCompletedInstant.getTimestamp());
+      }
+
+      if (baseFileStatus instanceof LocatedFileStatusWithBootstrapBaseFile || baseFileStatus instanceof FileStatusWithBootstrapBaseFile) {
+        rtFileStatus.setBootStrapFileStatus(baseFileStatus);
+      }
+
+      return rtFileStatus;
+    } catch (IOException e) {
+      throw new HoodieIOException(String.format("Failed to init %s", RealtimeFileStatus.class.getSimpleName()), e);
+    }
+  }
+
+  /**
+   * Creates {@link RealtimeFileStatus} for the file-slice where base file is NOT present
+   */
+  private static RealtimeFileStatus createRealtimeFileStatusUnchecked(HoodieLogFile latestLogFile,
+                                                                      Stream<HoodieLogFile> logFiles,
+                                                                      String basePath,
+                                                                      Option<HoodieInstant> latestCompletedInstantOpt,
+                                                                      Option<HoodieVirtualKeyInfo> virtualKeyInfoOpt) {
+    List<HoodieLogFile> sortedLogFiles = logFiles.sorted(HoodieLogFile.getLogFileComparator()).collect(Collectors.toList());
+    try {
+      RealtimeFileStatus rtFileStatus = new RealtimeFileStatus(latestLogFile.getFileStatus(), basePath,
+          sortedLogFiles, false, virtualKeyInfoOpt);
+
+      if (latestCompletedInstantOpt.isPresent()) {
+        HoodieInstant latestCompletedInstant = latestCompletedInstantOpt.get();
+        checkState(latestCompletedInstant.isCompleted());
+
+        rtFileStatus.setMaxCommitTime(latestCompletedInstant.getTimestamp());
+      }
+
+      return rtFileStatus;
+    } catch (IOException e) {
+      throw new HoodieIOException(String.format("Failed to init %s", RealtimeFileStatus.class.getSimpleName()), e);
+    }
+  }
+}
+
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java
index f3cf4ffa86578..e8c806ed2cf67 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieParquetRealtimeInputFormat.java
@@ -18,253 +18,60 @@
 
 package org.apache.hudi.hadoop.realtime;
 
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hudi.common.fs.FSUtils;
-import org.apache.hudi.common.model.FileSlice;
-import org.apache.hudi.common.model.HoodieCommitMetadata;
-import org.apache.hudi.common.model.HoodieFileGroup;
-import org.apache.hudi.common.model.HoodieLogFile;
-import org.apache.hudi.common.table.HoodieTableMetaClient;
-import org.apache.hudi.common.table.timeline.HoodieDefaultTimeline;
-import org.apache.hudi.common.table.timeline.HoodieInstant;
-import org.apache.hudi.common.table.timeline.HoodieTimeline;
-import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.common.util.ValidationUtils;
-import org.apache.hudi.exception.HoodieException;
-import org.apache.hudi.hadoop.BootstrapBaseFileSplit;
-import org.apache.hudi.hadoop.FileStatusWithBootstrapBaseFile;
-import org.apache.hudi.hadoop.LocatedFileStatusWithBootstrapBaseFile;
-import org.apache.hudi.hadoop.RealtimeFileStatus;
-import org.apache.hudi.hadoop.PathWithLogFilePath;
-import org.apache.hudi.hadoop.HoodieParquetInputFormat;
-import org.apache.hudi.hadoop.UseFileSplitsFromInputFormat;
-import org.apache.hudi.hadoop.UseRecordReaderFromInputFormat;
-import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils;
-import org.apache.hudi.hadoop.utils.HoodieRealtimeInputFormatUtils;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
 import org.apache.hadoop.io.ArrayWritable;
 import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapred.FileSplit;
 import org.apache.hadoop.mapred.InputSplit;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.RecordReader;
 import org.apache.hadoop.mapred.Reporter;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.util.ValidationUtils;
+import org.apache.hudi.hadoop.HoodieParquetInputFormat;
+import org.apache.hudi.hadoop.UseFileSplitsFromInputFormat;
+import org.apache.hudi.hadoop.UseRecordReaderFromInputFormat;
+import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils;
+import org.apache.hudi.hadoop.utils.HoodieRealtimeInputFormatUtils;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
 import java.io.IOException;
 
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.stream.Collectors;
-
 /**
  * Input Format, that provides a real-time view of data in a Hoodie table.
  */
 @UseRecordReaderFromInputFormat
 @UseFileSplitsFromInputFormat
-public class HoodieParquetRealtimeInputFormat extends HoodieParquetInputFormat implements Configurable {
+public class HoodieParquetRealtimeInputFormat extends HoodieParquetInputFormat {
 
   private static final Logger LOG = LogManager.getLogger(HoodieParquetRealtimeInputFormat.class);
 
+  public HoodieParquetRealtimeInputFormat() {
+    super(new HoodieMergeOnReadTableInputFormat());
+  }
+
   // To make Hive on Spark queries work with RT tables. Our theory is that due to
   // {@link org.apache.hadoop.hive.ql.io.parquet.ProjectionPusher}
   // not handling empty list correctly, the ParquetRecordReaderWrapper ends up adding the same column ids multiple
   // times which ultimately breaks the query.
-
-  @Override
-  public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
-
-    List<FileSplit> fileSplits = Arrays.stream(super.getSplits(job, numSplits)).map(is -> (FileSplit) is).collect(Collectors.toList());
-
-    boolean isIncrementalSplits = HoodieRealtimeInputFormatUtils.isIncrementalQuerySplits(fileSplits);
-
-    return isIncrementalSplits
-        ? HoodieRealtimeInputFormatUtils.getIncrementalRealtimeSplits(job, fileSplits.stream())
-        : HoodieRealtimeInputFormatUtils.getRealtimeSplits(job, fileSplits.stream());
-  }
-
-  /**
-   * Keep the logic of mor_incr_view as same as spark datasource.
-   * Step1: Get list of commits to be fetched based on start commit and max commits(for snapshot max commits is -1).
-   * Step2: Get list of affected files status for these affected file status.
-   * Step3: Construct HoodieTableFileSystemView based on those affected file status.
-   *        a. Filter affected partitions based on inputPaths.
-   *        b. Get list of fileGroups based on affected partitions by fsView.getAllFileGroups.
-   * Step4: Set input paths based on filtered affected partition paths. changes that amony original input paths passed to
-   *        this method. some partitions did not have commits as part of the trimmed down list of commits and hence we need this step.
-   * Step5: Find candidate fileStatus, since when we get baseFileStatus from HoodieTableFileSystemView,
-   *        the BaseFileStatus will missing file size information.
-   *        We should use candidate fileStatus to update the size information for BaseFileStatus.
-   * Step6: For every file group from step3(b)
-   *        Get 1st available base file from all file slices. then we use candidate file status to update the baseFileStatus,
-   *        and construct RealTimeFileStatus and add it to result along with log files.
-   *        If file group just has log files, construct RealTimeFileStatus and add it to result.
-   * TODO: unify the incremental view code between hive/spark-sql and spark datasource
-   */
-  @Override
-  protected List<FileStatus> listStatusForIncrementalMode(
-      JobConf job, HoodieTableMetaClient tableMetaClient, List<Path> inputPaths, String incrementalTable) throws IOException {
-    List<FileStatus> result = new ArrayList<>();
-    Job jobContext = Job.getInstance(job);
-
-    // step1
-    Option<HoodieTimeline> timeline = HoodieInputFormatUtils.getFilteredCommitsTimeline(jobContext, tableMetaClient);
-    if (!timeline.isPresent()) {
-      return result;
-    }
-    HoodieTimeline commitsTimelineToReturn = HoodieInputFormatUtils.getHoodieTimelineForIncrementalQuery(jobContext, incrementalTable, timeline.get());
-    Option<List<HoodieInstant>> commitsToCheck = Option.of(commitsTimelineToReturn.getInstants().collect(Collectors.toList()));
-    if (!commitsToCheck.isPresent()) {
-      return result;
-    }
-    // step2
-    commitsToCheck.get().sort(HoodieInstant::compareTo);
-    List<HoodieCommitMetadata> metadataList = commitsToCheck
-        .get().stream().map(instant -> {
-          try {
-            return HoodieInputFormatUtils.getCommitMetadata(instant, commitsTimelineToReturn);
-          } catch (IOException e) {
-            throw new HoodieException(String.format("cannot get metadata for instant: %s", instant));
-          }
-        }).collect(Collectors.toList());
-
-    // build fileGroup from fsView
-    List<FileStatus> affectedFileStatus = Arrays.asList(HoodieInputFormatUtils
-        .listAffectedFilesForCommits(new Path(tableMetaClient.getBasePath()), metadataList));
-    // step3
-    HoodieTableFileSystemView fsView = new HoodieTableFileSystemView(tableMetaClient, commitsTimelineToReturn, affectedFileStatus.toArray(new FileStatus[0]));
-    // build fileGroup from fsView
-    Path basePath = new Path(tableMetaClient.getBasePath());
-    // filter affectedPartition by inputPaths
-    List<String> affectedPartition = HoodieInputFormatUtils.getWritePartitionPaths(metadataList).stream()
-        .filter(k -> k.isEmpty() ? inputPaths.contains(basePath) : inputPaths.contains(new Path(basePath, k))).collect(Collectors.toList());
-    if (affectedPartition.isEmpty()) {
-      return result;
-    }
-    List<HoodieFileGroup> fileGroups = affectedPartition.stream()
-        .flatMap(partitionPath -> fsView.getAllFileGroups(partitionPath)).collect(Collectors.toList());
-    // step4
-    setInputPaths(job, affectedPartition.stream()
-        .map(p -> p.isEmpty() ? basePath.toString() : new Path(basePath, p).toString()).collect(Collectors.joining(",")));
-
-    // step5
-    // find all file status in partitionPaths.
-    FileStatus[] fileStatuses = doListStatus(job);
-    Map<String, FileStatus> candidateFileStatus = new HashMap<>();
-    for (int i = 0; i < fileStatuses.length; i++) {
-      String key = fileStatuses[i].getPath().toString();
-      candidateFileStatus.put(key, fileStatuses[i]);
-    }
-
-    String maxCommitTime = fsView.getLastInstant().get().getTimestamp();
-    // step6
-    result.addAll(collectAllIncrementalFiles(fileGroups, maxCommitTime, basePath.toString(), candidateFileStatus));
-    return result;
-  }
-
-  private List<FileStatus> collectAllIncrementalFiles(List<HoodieFileGroup> fileGroups, String maxCommitTime, String basePath, Map<String, FileStatus> candidateFileStatus) {
-    List<FileStatus> result = new ArrayList<>();
-    fileGroups.stream().forEach(f -> {
-      try {
-        List<FileSlice> baseFiles = f.getAllFileSlices().filter(slice -> slice.getBaseFile().isPresent()).collect(Collectors.toList());
-        if (!baseFiles.isEmpty()) {
-          FileStatus baseFileStatus = HoodieInputFormatUtils.getFileStatus(baseFiles.get(0).getBaseFile().get());
-          String baseFilePath = baseFileStatus.getPath().toUri().toString();
-          if (!candidateFileStatus.containsKey(baseFilePath)) {
-            throw new HoodieException("Error obtaining fileStatus for file: " + baseFilePath);
-          }
-          // We cannot use baseFileStatus.getPath() here, since baseFileStatus.getPath() missing file size information.
-          // So we use candidateFileStatus.get(baseFileStatus.getPath()) to get a correct path.
-          RealtimeFileStatus fileStatus = new RealtimeFileStatus(candidateFileStatus.get(baseFilePath));
-          fileStatus.setMaxCommitTime(maxCommitTime);
-          fileStatus.setBelongToIncrementalFileStatus(true);
-          fileStatus.setBasePath(basePath);
-          fileStatus.setBaseFilePath(baseFilePath);
-          fileStatus.setDeltaLogFiles(f.getLatestFileSlice().get().getLogFiles().collect(Collectors.toList()));
-          // try to set bootstrapfileStatus
-          if (baseFileStatus instanceof LocatedFileStatusWithBootstrapBaseFile || baseFileStatus instanceof FileStatusWithBootstrapBaseFile) {
-            fileStatus.setBootStrapFileStatus(baseFileStatus);
-          }
-          result.add(fileStatus);
-        }
-        // add file group which has only logs.
-        if (f.getLatestFileSlice().isPresent() && baseFiles.isEmpty()) {
-          List<FileStatus> logFileStatus = f.getLatestFileSlice().get().getLogFiles().map(logFile -> logFile.getFileStatus()).collect(Collectors.toList());
-          if (logFileStatus.size() > 0) {
-            RealtimeFileStatus fileStatus = new RealtimeFileStatus(logFileStatus.get(0));
-            fileStatus.setBelongToIncrementalFileStatus(true);
-            fileStatus.setDeltaLogFiles(logFileStatus.stream().map(l -> new HoodieLogFile(l.getPath(), l.getLen())).collect(Collectors.toList()));
-            fileStatus.setMaxCommitTime(maxCommitTime);
-            fileStatus.setBasePath(basePath);
-            result.add(fileStatus);
-          }
-        }
-      } catch (IOException e) {
-        throw new HoodieException("Error obtaining data file/log file grouping ", e);
-      }
-    });
-    return result;
-  }
-
-  @Override
-  protected boolean includeLogFilesForSnapShotView() {
-    return true;
-  }
-
-  @Override
-  protected boolean isSplitable(FileSystem fs, Path filename) {
-    if (filename instanceof PathWithLogFilePath) {
-      return ((PathWithLogFilePath)filename).splitable();
-    }
-    return super.isSplitable(fs, filename);
-  }
-
-  // make split for path.
-  // When query the incremental view, the read files may be bootstrap files, we wrap those bootstrap files into
-  // PathWithLogFilePath, so those bootstrap files should be processed int this function.
-  @Override
-  protected FileSplit makeSplit(Path file, long start, long length, String[] hosts) {
-    if (file instanceof PathWithLogFilePath) {
-      return doMakeSplitForPathWithLogFilePath((PathWithLogFilePath) file, start, length, hosts, null);
-    }
-    return super.makeSplit(file, start, length, hosts);
-  }
-
   @Override
-  protected FileSplit makeSplit(Path file, long start, long length, String[] hosts, String[] inMemoryHosts) {
-    if (file instanceof PathWithLogFilePath) {
-      return doMakeSplitForPathWithLogFilePath((PathWithLogFilePath) file, start, length, hosts, inMemoryHosts);
-    }
-    return super.makeSplit(file, start, length, hosts, inMemoryHosts);
-  }
+  public RecordReader<NullWritable, ArrayWritable> getRecordReader(final InputSplit split, final JobConf jobConf,
+                                                                   final Reporter reporter) throws IOException {
+    // sanity check
+    ValidationUtils.checkArgument(split instanceof RealtimeSplit,
+        "HoodieRealtimeRecordReader can only work on RealtimeSplit and not with " + split);
+    RealtimeSplit realtimeSplit = (RealtimeSplit) split;
+    addProjectionToJobConf(realtimeSplit, jobConf);
+    LOG.info("Creating record reader with readCols :" + jobConf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR)
+        + ", Ids :" + jobConf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR));
 
-  private FileSplit doMakeSplitForPathWithLogFilePath(PathWithLogFilePath path, long start, long length, String[] hosts, String[] inMemoryHosts) {
-    if (!path.includeBootstrapFilePath()) {
-      return path.buildSplit(path, start, length, hosts);
-    } else {
-      FileSplit bf =
-          inMemoryHosts == null
-              ? super.makeSplit(path.getPathWithBootstrapFileStatus(), start, length, hosts)
-              : super.makeSplit(path.getPathWithBootstrapFileStatus(), start, length, hosts, inMemoryHosts);
-      return HoodieRealtimeInputFormatUtils
-          .createRealtimeBoostrapBaseFileSplit((BootstrapBaseFileSplit) bf, path.getBasePath(), path.getDeltaLogFiles(), path.getMaxCommitTime());
+    // for log only split, set the parquet reader as empty.
+    if (FSUtils.isLogFile(realtimeSplit.getPath())) {
+      return new HoodieRealtimeRecordReader(realtimeSplit, jobConf, new HoodieEmptyRecordReader(realtimeSplit, jobConf));
     }
-  }
 
-  @Override
-  protected HoodieDefaultTimeline filterInstantsTimeline(HoodieDefaultTimeline timeline) {
-    // no specific filtering for Realtime format
-    return timeline;
+    return new HoodieRealtimeRecordReader(realtimeSplit, jobConf,
+        super.getRecordReader(split, jobConf, reporter));
   }
 
   void addProjectionToJobConf(final RealtimeSplit realtimeSplit, final JobConf jobConf) {
@@ -287,32 +94,14 @@ void addProjectionToJobConf(final RealtimeSplit realtimeSplit, final JobConf job
           // TO fix this, hoodie columns are appended late at the time record-reader gets built instead of construction
           // time.
           if (!realtimeSplit.getDeltaLogPaths().isEmpty()) {
-            HoodieRealtimeInputFormatUtils.addRequiredProjectionFields(jobConf, realtimeSplit.getHoodieVirtualKeyInfo());
+            HoodieRealtimeInputFormatUtils.addRequiredProjectionFields(jobConf, realtimeSplit.getVirtualKeyInfo());
           }
-          this.conf = jobConf;
-          this.conf.set(HoodieInputFormatUtils.HOODIE_READ_COLUMNS_PROP, "true");
+          jobConf.set(HoodieInputFormatUtils.HOODIE_READ_COLUMNS_PROP, "true");
+          setConf(jobConf);
         }
       }
     }
-    HoodieRealtimeInputFormatUtils.cleanProjectionColumnIds(jobConf);
-  }
 
-  @Override
-  public RecordReader<NullWritable, ArrayWritable> getRecordReader(final InputSplit split, final JobConf jobConf,
-                                                                   final Reporter reporter) throws IOException {
-    // sanity check
-    ValidationUtils.checkArgument(split instanceof RealtimeSplit,
-        "HoodieRealtimeRecordReader can only work on RealtimeSplit and not with " + split);
-    RealtimeSplit realtimeSplit = (RealtimeSplit) split;
-    addProjectionToJobConf(realtimeSplit, jobConf);
-    LOG.info("Creating record reader with readCols :" + jobConf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR)
-        + ", Ids :" + jobConf.get(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR));
-
-    // for log only split, set the parquet reader as empty.
-    if (FSUtils.isLogFile(realtimeSplit.getPath())) {
-      return new HoodieRealtimeRecordReader(realtimeSplit, jobConf, new HoodieEmptyRecordReader(realtimeSplit, jobConf));
-    }
-    return new HoodieRealtimeRecordReader(realtimeSplit, jobConf,
-        super.getRecordReader(split, jobConf, reporter));
+    HoodieRealtimeInputFormatUtils.cleanProjectionColumnIds(jobConf);
   }
 }
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeBootstrapBaseFileSplit.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeBootstrapBaseFileSplit.java
new file mode 100644
index 0000000000000..c7022c98ad3cd
--- /dev/null
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeBootstrapBaseFileSplit.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.hadoop.realtime;
+
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hudi.common.model.HoodieLogFile;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.hadoop.BootstrapBaseFileSplit;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Realtime {@link FileSplit} with external base file
+ *
+ * NOTE: If you're adding fields here you need to make sure that you appropriately de-/serialize them
+ *       in {@link #readFromInput(DataInput)} and {@link #writeToOutput(DataOutput)}
+ */
+public class HoodieRealtimeBootstrapBaseFileSplit extends BootstrapBaseFileSplit implements RealtimeSplit {
+  /**
+   * Marks whether this path produced as part of Incremental Query
+   */
+  private boolean belongsToIncrementalQuery = false;
+  /**
+   * List of delta log-files holding updated records for this base-file
+   */
+  private List<HoodieLogFile> deltaLogFiles = new ArrayList<>();
+  /**
+   * Latest commit instant available at the time of the query in which all of the files
+   * pertaining to this split are represented
+   */
+  private String maxCommitTime;
+  /**
+   * Base path of the table this path belongs to
+   */
+  private String basePath;
+  /**
+   * Virtual key configuration of the table this split belongs to
+   */
+  private Option<HoodieVirtualKeyInfo> virtualKeyInfo = Option.empty();
+
+  /**
+   * NOTE: This ctor is necessary for Hive to be able to serialize and
+   *       then instantiate it when deserializing back
+   */
+  public HoodieRealtimeBootstrapBaseFileSplit() {}
+
+  public HoodieRealtimeBootstrapBaseFileSplit(FileSplit baseSplit,
+                                              String basePath,
+                                              List<HoodieLogFile> deltaLogFiles,
+                                              String maxInstantTime,
+                                              FileSplit externalFileSplit,
+                                              boolean belongsToIncrementalQuery,
+                                              Option<HoodieVirtualKeyInfo> virtualKeyInfoOpt) throws IOException {
+    super(baseSplit, externalFileSplit);
+    this.maxCommitTime = maxInstantTime;
+    this.deltaLogFiles = deltaLogFiles;
+    this.basePath = basePath;
+    this.belongsToIncrementalQuery = belongsToIncrementalQuery;
+    this.virtualKeyInfo = virtualKeyInfoOpt;
+  }
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    super.write(out);
+    writeToOutput(out);
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    super.readFields(in);
+    readFromInput(in);
+  }
+
+  @Override
+  public List<HoodieLogFile> getDeltaLogFiles() {
+    return deltaLogFiles;
+  }
+
+  @Override
+  public void setDeltaLogFiles(List<HoodieLogFile> deltaLogFiles) {
+    this.deltaLogFiles = deltaLogFiles;
+  }
+
+  @Override
+  public String getMaxCommitTime() {
+    return maxCommitTime;
+  }
+
+  @Override
+  public String getBasePath() {
+    return basePath;
+  }
+
+  @Override
+  public Option<HoodieVirtualKeyInfo> getVirtualKeyInfo() {
+    return virtualKeyInfo;
+  }
+
+  @Override
+  public boolean getBelongsToIncrementalQuery() {
+    return belongsToIncrementalQuery;
+  }
+
+  @Override
+  public void setBelongsToIncrementalQuery(boolean belongsToIncrementalPath) {
+    this.belongsToIncrementalQuery = belongsToIncrementalPath;
+  }
+
+  @Override
+  public void setMaxCommitTime(String maxInstantTime) {
+    this.maxCommitTime = maxInstantTime;
+  }
+
+  @Override
+  public void setBasePath(String basePath) {
+    this.basePath = basePath;
+  }
+
+  @Override
+  public void setVirtualKeyInfo(Option<HoodieVirtualKeyInfo> virtualKeyInfo) {
+    this.virtualKeyInfo = virtualKeyInfo;
+  }
+}
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeFileSplit.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeFileSplit.java
index a39ec35507a77..a424f021c2d20 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeFileSplit.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimeFileSplit.java
@@ -18,83 +18,125 @@
 
 package org.apache.hudi.hadoop.realtime;
 
+import org.apache.hadoop.mapred.FileSplit;
 import org.apache.hudi.common.model.HoodieLogFile;
 import org.apache.hudi.common.util.Option;
 
-import org.apache.hadoop.mapred.FileSplit;
-
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
-import java.util.stream.Collectors;
 
 /**
- * Filesplit that wraps the base split and a list of log files to merge deltas from.
+ * {@link FileSplit} implementation that holds
+ * <ol>
+ *   <li>Split corresponding to the base file</li>
+ *   <li>List of {@link HoodieLogFile} that holds the delta to be merged (upon reading)</li>
+ * </ol>
+ *
+ * This split is correspondent to a single file-slice in the Hudi terminology.
+ *
+ * NOTE: If you're adding fields here you need to make sure that you appropriately de-/serialize them
+ *       in {@link #readFromInput(DataInput)} and {@link #writeToOutput(DataOutput)}
  */
 public class HoodieRealtimeFileSplit extends FileSplit implements RealtimeSplit {
-
-  private List<String> deltaLogPaths;
+  /**
+   * List of delta log-files holding updated records for this base-file
+   */
   private List<HoodieLogFile> deltaLogFiles = new ArrayList<>();
-
-  private String maxCommitTime;
-
+  /**
+   * Base path of the table this path belongs to
+   */
   private String basePath;
-
-  private Option<HoodieVirtualKeyInfo> hoodieVirtualKeyInfo = Option.empty();
-
-  public HoodieRealtimeFileSplit() {
-    super();
+  /**
+   * Latest commit instant available at the time of the query in which all of the files
+   * pertaining to this split are represented
+   */
+  private String maxCommitTime;
+  /**
+   * Marks whether this path produced as part of Incremental Query
+   */
+  private boolean belongsToIncrementalQuery = false;
+  /**
+   * Virtual key configuration of the table this split belongs to
+   */
+  private Option<HoodieVirtualKeyInfo> virtualKeyInfo = Option.empty();
+
+  public HoodieRealtimeFileSplit() {}
+
+  public HoodieRealtimeFileSplit(FileSplit baseSplit,
+                                 HoodieRealtimePath path)
+      throws IOException {
+    this(baseSplit,
+        path.getBasePath(),
+        path.getDeltaLogFiles(),
+        path.getMaxCommitTime(),
+        path.getBelongsToIncrementalQuery(),
+        path.getVirtualKeyInfo());
   }
 
-  public HoodieRealtimeFileSplit(FileSplit baseSplit, String basePath, List<HoodieLogFile> deltaLogFiles, String maxCommitTime,
-                                 Option<HoodieVirtualKeyInfo> hoodieVirtualKeyInfo)
+  /**
+   * @VisibleInTesting
+   */
+  public HoodieRealtimeFileSplit(FileSplit baseSplit,
+                                 String basePath,
+                                 List<HoodieLogFile> deltaLogFiles,
+                                 String maxCommitTime,
+                                 boolean belongsToIncrementalQuery,
+                                 Option<HoodieVirtualKeyInfo> virtualKeyInfo)
       throws IOException {
     super(baseSplit.getPath(), baseSplit.getStart(), baseSplit.getLength(), baseSplit.getLocations());
     this.deltaLogFiles = deltaLogFiles;
-    this.deltaLogPaths = deltaLogFiles.stream().map(entry -> entry.getPath().toString()).collect(Collectors.toList());
-    this.maxCommitTime = maxCommitTime;
     this.basePath = basePath;
-    this.hoodieVirtualKeyInfo = hoodieVirtualKeyInfo;
-  }
-
-  public List<String> getDeltaLogPaths() {
-    return deltaLogPaths;
+    this.maxCommitTime = maxCommitTime;
+    this.belongsToIncrementalQuery = belongsToIncrementalQuery;
+    this.virtualKeyInfo = virtualKeyInfo;
   }
 
   public List<HoodieLogFile> getDeltaLogFiles() {
     return deltaLogFiles;
   }
 
+  @Override
+  public void setDeltaLogFiles(List<HoodieLogFile> deltaLogFiles) {
+    this.deltaLogFiles = deltaLogFiles;
+  }
+
   public String getMaxCommitTime() {
     return maxCommitTime;
   }
 
+  public void setMaxCommitTime(String maxCommitTime) {
+    this.maxCommitTime = maxCommitTime;
+  }
+
   public String getBasePath() {
     return basePath;
   }
 
-  @Override
-  public void setHoodieVirtualKeyInfo(Option<HoodieVirtualKeyInfo> hoodieVirtualKeyInfo) {
-    this.hoodieVirtualKeyInfo = hoodieVirtualKeyInfo;
+  public void setBasePath(String basePath) {
+    this.basePath = basePath;
   }
 
   @Override
-  public Option<HoodieVirtualKeyInfo> getHoodieVirtualKeyInfo() {
-    return hoodieVirtualKeyInfo;
+  public void setVirtualKeyInfo(Option<HoodieVirtualKeyInfo> virtualKeyInfo) {
+    this.virtualKeyInfo = virtualKeyInfo;
   }
 
-  public void setDeltaLogPaths(List<String> deltaLogPaths) {
-    this.deltaLogPaths = deltaLogPaths;
+  @Override
+  public Option<HoodieVirtualKeyInfo> getVirtualKeyInfo() {
+    return virtualKeyInfo;
   }
 
-  public void setMaxCommitTime(String maxCommitTime) {
-    this.maxCommitTime = maxCommitTime;
+  @Override
+  public boolean getBelongsToIncrementalQuery() {
+    return belongsToIncrementalQuery;
   }
 
-  public void setBasePath(String basePath) {
-    this.basePath = basePath;
+  @Override
+  public void setBelongsToIncrementalQuery(boolean belongsToIncrementalPath) {
+    this.belongsToIncrementalQuery = belongsToIncrementalPath;
   }
 
   @Override
@@ -111,7 +153,7 @@ public void readFields(DataInput in) throws IOException {
 
   @Override
   public String toString() {
-    return "HoodieRealtimeFileSplit{DataPath=" + getPath() + ", deltaLogPaths=" + deltaLogPaths
+    return "HoodieRealtimeFileSplit{DataPath=" + getPath() + ", deltaLogPaths=" + getDeltaLogPaths()
         + ", maxCommitTime='" + maxCommitTime + '\'' + ", basePath='" + basePath + '\'' + '}';
   }
 }
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimePath.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimePath.java
new file mode 100644
index 0000000000000..bba44d5c6632c
--- /dev/null
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/HoodieRealtimePath.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.hadoop.realtime;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hudi.common.model.HoodieLogFile;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.hadoop.PathWithBootstrapFileStatus;
+
+import java.util.List;
+
+/**
+ * {@link Path} implementation encoding additional information necessary to appropriately read
+ * base files of the MOR tables, such as list of delta log files (holding updated records) associated
+ * w/ the base file, etc.
+ */
+public class HoodieRealtimePath extends Path {
+  /**
+   * Marks whether this path produced as part of Incremental Query
+   */
+  private final boolean belongsToIncrementalQuery;
+  /**
+   * List of delta log-files holding updated records for this base-file
+   */
+  private final List<HoodieLogFile> deltaLogFiles;
+  /**
+   * Latest commit instant available at the time of the query in which all of the files
+   * pertaining to this split are represented
+   */
+  private final String maxCommitTime;
+  /**
+   * Base path of the table this path belongs to
+   */
+  private final String basePath;
+  /**
+   * Virtual key configuration of the table this split belongs to
+   */
+  private final Option<HoodieVirtualKeyInfo> virtualKeyInfo;
+  /**
+   * File status for the Bootstrap file (only relevant if this table is a bootstrapped table
+   */
+  private PathWithBootstrapFileStatus pathWithBootstrapFileStatus;
+
+  public HoodieRealtimePath(Path parent,
+                            String child,
+                            String basePath,
+                            List<HoodieLogFile> deltaLogFiles,
+                            String maxCommitTime,
+                            boolean belongsToIncrementalQuery,
+                            Option<HoodieVirtualKeyInfo> virtualKeyInfo) {
+    super(parent, child);
+    this.basePath = basePath;
+    this.deltaLogFiles = deltaLogFiles;
+    this.maxCommitTime = maxCommitTime;
+    this.belongsToIncrementalQuery = belongsToIncrementalQuery;
+    this.virtualKeyInfo = virtualKeyInfo;
+  }
+
+  public List<HoodieLogFile> getDeltaLogFiles() {
+    return deltaLogFiles;
+  }
+
+  public String getMaxCommitTime() {
+    return maxCommitTime;
+  }
+
+  public String getBasePath() {
+    return basePath;
+  }
+
+  public boolean getBelongsToIncrementalQuery() {
+    return belongsToIncrementalQuery;
+  }
+
+  public boolean isSplitable() {
+    return !toString().isEmpty();
+  }
+
+  public PathWithBootstrapFileStatus getPathWithBootstrapFileStatus() {
+    return pathWithBootstrapFileStatus;
+  }
+
+  public void setPathWithBootstrapFileStatus(PathWithBootstrapFileStatus pathWithBootstrapFileStatus) {
+    this.pathWithBootstrapFileStatus = pathWithBootstrapFileStatus;
+  }
+
+  public boolean includeBootstrapFilePath() {
+    return pathWithBootstrapFileStatus != null;
+  }
+
+  public Option<HoodieVirtualKeyInfo> getVirtualKeyInfo() {
+    return virtualKeyInfo;
+  }
+}
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeBootstrapBaseFileSplit.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeBootstrapBaseFileSplit.java
deleted file mode 100644
index 79d2d815ee809..0000000000000
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeBootstrapBaseFileSplit.java
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hudi.hadoop.realtime;
-
-import org.apache.hudi.common.model.HoodieLogFile;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.hadoop.BootstrapBaseFileSplit;
-
-import org.apache.hadoop.mapred.FileSplit;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.stream.Collectors;
-
-/**
- * Realtime File Split with external base file.
- */
-public class RealtimeBootstrapBaseFileSplit extends BootstrapBaseFileSplit implements RealtimeSplit {
-
-  private List<String> deltaLogPaths;
-  private List<HoodieLogFile> deltaLogFiles = new ArrayList<>();
-
-  private String maxInstantTime;
-
-  private String basePath;
-
-  public RealtimeBootstrapBaseFileSplit() {
-    super();
-  }
-
-  public RealtimeBootstrapBaseFileSplit(FileSplit baseSplit, String basePath, List<HoodieLogFile> deltaLogFiles,
-                                        String maxInstantTime, FileSplit externalFileSplit) throws IOException {
-    super(baseSplit, externalFileSplit);
-    this.maxInstantTime = maxInstantTime;
-    this.deltaLogFiles = deltaLogFiles;
-    this.deltaLogPaths = deltaLogFiles.stream().map(entry -> entry.getPath().toString()).collect(Collectors.toList());
-    this.basePath = basePath;
-  }
-
-  @Override
-  public void write(DataOutput out) throws IOException {
-    super.write(out);
-    writeToOutput(out);
-  }
-
-  @Override
-  public void readFields(DataInput in) throws IOException {
-    super.readFields(in);
-    readFromInput(in);
-  }
-
-  @Override
-  public List<String> getDeltaLogPaths() {
-    return deltaLogPaths;
-  }
-
-  @Override
-  public List<HoodieLogFile> getDeltaLogFiles() {
-    return deltaLogFiles;
-  }
-
-  @Override
-  public String getMaxCommitTime() {
-    return maxInstantTime;
-  }
-
-  @Override
-  public String getBasePath() {
-    return basePath;
-  }
-
-  @Override
-  public Option<HoodieVirtualKeyInfo> getHoodieVirtualKeyInfo() {
-    return Option.empty();
-  }
-
-  @Override
-  public void setDeltaLogPaths(List<String> deltaLogPaths) {
-    this.deltaLogPaths = deltaLogPaths;
-  }
-
-  @Override
-  public void setMaxCommitTime(String maxInstantTime) {
-    this.maxInstantTime = maxInstantTime;
-  }
-
-  @Override
-  public void setBasePath(String basePath) {
-    this.basePath = basePath;
-  }
-
-  @Override
-  public void setHoodieVirtualKeyInfo(Option<HoodieVirtualKeyInfo> hoodieVirtualKeyInfo) {}
-
-}
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java
index f00efa5efaaa6..b917f004bcd06 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeCompactedRecordReader.java
@@ -18,9 +18,16 @@
 
 package org.apache.hudi.hadoop.realtime;
 
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.common.config.HoodieCommonConfig;
 import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner;
@@ -28,13 +35,6 @@
 import org.apache.hudi.hadoop.config.HoodieRealtimeConfig;
 import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils;
 import org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils;
-
-import org.apache.avro.generic.GenericRecord;
-import org.apache.hadoop.io.ArrayWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordReader;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
@@ -54,7 +54,7 @@ class RealtimeCompactedRecordReader extends AbstractRealtimeRecordReader
 
   private final Set<String> deltaRecordKeys;
   private final HoodieMergedLogRecordScanner mergedLogRecordScanner;
-  private int recordKeyIndex = HoodieInputFormatUtils.HOODIE_RECORD_KEY_COL_POS;
+  private final int recordKeyIndex;
   private Iterator<String> deltaItr;
 
   public RealtimeCompactedRecordReader(RealtimeSplit split, JobConf job,
@@ -64,9 +64,9 @@ public RealtimeCompactedRecordReader(RealtimeSplit split, JobConf job,
     this.mergedLogRecordScanner = getMergedLogRecordScanner();
     this.deltaRecordMap = mergedLogRecordScanner.getRecords();
     this.deltaRecordKeys = new HashSet<>(this.deltaRecordMap.keySet());
-    if (split.getHoodieVirtualKeyInfo().isPresent()) {
-      this.recordKeyIndex = split.getHoodieVirtualKeyInfo().get().getRecordKeyFieldIndex();
-    }
+    this.recordKeyIndex = split.getVirtualKeyInfo()
+        .map(HoodieVirtualKeyInfo::getRecordKeyFieldIndex)
+        .orElse(HoodieInputFormatUtils.HOODIE_RECORD_KEY_COL_POS);
   }
 
   /**
@@ -96,9 +96,9 @@ private HoodieMergedLogRecordScanner getMergedLogRecordScanner() throws IOExcept
 
   private Option<GenericRecord> buildGenericRecordwithCustomPayload(HoodieRecord record) throws IOException {
     if (usesCustomPayload) {
-      return record.getData().getInsertValue(getWriterSchema());
+      return ((HoodieAvroRecord) record).getData().getInsertValue(getWriterSchema(), payloadProps);
     } else {
-      return record.getData().getInsertValue(getReaderSchema());
+      return ((HoodieAvroRecord) record).getData().getInsertValue(getReaderSchema(), payloadProps);
     }
   }
 
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeSplit.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeSplit.java
index a7f0d2cc2f5e7..d9b1923c60f80 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeSplit.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeSplit.java
@@ -18,18 +18,18 @@
 
 package org.apache.hudi.hadoop.realtime;
 
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.InputSplitWithLocationInfo;
 import org.apache.hudi.common.model.HoodieLogFile;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.hadoop.InputSplitUtils;
 
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.InputSplitWithLocationInfo;
-
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.stream.Collectors;
 
 /**
  * Realtime Input Split Interface.
@@ -40,10 +40,14 @@ public interface RealtimeSplit extends InputSplitWithLocationInfo {
    * Return Log File Paths.
    * @return
    */
-  List<String> getDeltaLogPaths();
+  default List<String> getDeltaLogPaths() {
+    return getDeltaLogFiles().stream().map(entry -> entry.getPath().toString()).collect(Collectors.toList());
+  }
 
   List<HoodieLogFile> getDeltaLogFiles();
 
+  void setDeltaLogFiles(List<HoodieLogFile> deltaLogFiles);
+
   /**
    * Return Max Instant Time.
    * @return
@@ -60,14 +64,12 @@ public interface RealtimeSplit extends InputSplitWithLocationInfo {
    * Returns Virtual key info if meta fields are disabled.
    * @return
    */
-  Option<HoodieVirtualKeyInfo> getHoodieVirtualKeyInfo();
+  Option<HoodieVirtualKeyInfo> getVirtualKeyInfo();
 
   /**
-   * Update Log File Paths.
-   *
-   * @param deltaLogPaths
+   * Returns the flag whether this split belongs to an Incremental Query
    */
-  void setDeltaLogPaths(List<String> deltaLogPaths);
+  boolean getBelongsToIncrementalQuery();
 
   /**
    * Update Maximum valid instant time.
@@ -81,57 +83,72 @@ public interface RealtimeSplit extends InputSplitWithLocationInfo {
    */
   void setBasePath(String basePath);
 
-  void setHoodieVirtualKeyInfo(Option<HoodieVirtualKeyInfo> hoodieVirtualKeyInfo);
+  /**
+   * Sets the flag whether this split belongs to an Incremental Query
+   */
+  void setBelongsToIncrementalQuery(boolean belongsToIncrementalQuery);
+
+  void setVirtualKeyInfo(Option<HoodieVirtualKeyInfo> virtualKeyInfo);
 
   default void writeToOutput(DataOutput out) throws IOException {
     InputSplitUtils.writeString(getBasePath(), out);
     InputSplitUtils.writeString(getMaxCommitTime(), out);
-    out.writeInt(getDeltaLogPaths().size());
-    for (String logFilePath : getDeltaLogPaths()) {
-      InputSplitUtils.writeString(logFilePath, out);
+    InputSplitUtils.writeBoolean(getBelongsToIncrementalQuery(), out);
+
+    out.writeInt(getDeltaLogFiles().size());
+    for (HoodieLogFile logFile : getDeltaLogFiles()) {
+      InputSplitUtils.writeString(logFile.getPath().toString(), out);
+      out.writeLong(logFile.getFileSize());
     }
-    if (!getHoodieVirtualKeyInfo().isPresent()) {
+
+    Option<HoodieVirtualKeyInfo> virtualKeyInfoOpt = getVirtualKeyInfo();
+    if (!virtualKeyInfoOpt.isPresent()) {
       InputSplitUtils.writeBoolean(false, out);
     } else {
       InputSplitUtils.writeBoolean(true, out);
-      InputSplitUtils.writeString(getHoodieVirtualKeyInfo().get().getRecordKeyField(), out);
-      InputSplitUtils.writeString(getHoodieVirtualKeyInfo().get().getPartitionPathField(), out);
-      InputSplitUtils.writeString(String.valueOf(getHoodieVirtualKeyInfo().get().getRecordKeyFieldIndex()), out);
-      InputSplitUtils.writeString(String.valueOf(getHoodieVirtualKeyInfo().get().getPartitionPathFieldIndex()), out);
+      InputSplitUtils.writeString(virtualKeyInfoOpt.get().getRecordKeyField(), out);
+      InputSplitUtils.writeString(virtualKeyInfoOpt.get().getPartitionPathField(), out);
+      InputSplitUtils.writeString(String.valueOf(virtualKeyInfoOpt.get().getRecordKeyFieldIndex()), out);
+      InputSplitUtils.writeString(String.valueOf(virtualKeyInfoOpt.get().getPartitionPathFieldIndex()), out);
     }
   }
 
   default void readFromInput(DataInput in) throws IOException {
     setBasePath(InputSplitUtils.readString(in));
     setMaxCommitTime(InputSplitUtils.readString(in));
+    setBelongsToIncrementalQuery(InputSplitUtils.readBoolean(in));
+
     int totalLogFiles = in.readInt();
-    List<String> deltaLogPaths = new ArrayList<>(totalLogFiles);
+    List<HoodieLogFile> deltaLogPaths = new ArrayList<>(totalLogFiles);
     for (int i = 0; i < totalLogFiles; i++) {
-      deltaLogPaths.add(InputSplitUtils.readString(in));
+      String logFilePath = InputSplitUtils.readString(in);
+      long logFileSize = in.readLong();
+      deltaLogPaths.add(new HoodieLogFile(new Path(logFilePath), logFileSize));
     }
-    setDeltaLogPaths(deltaLogPaths);
+    setDeltaLogFiles(deltaLogPaths);
+
     boolean hoodieVirtualKeyPresent = InputSplitUtils.readBoolean(in);
     if (hoodieVirtualKeyPresent) {
       String recordKeyField = InputSplitUtils.readString(in);
       String partitionPathField = InputSplitUtils.readString(in);
       int recordFieldIndex = Integer.parseInt(InputSplitUtils.readString(in));
       int partitionPathIndex = Integer.parseInt(InputSplitUtils.readString(in));
-      setHoodieVirtualKeyInfo(Option.of(new HoodieVirtualKeyInfo(recordKeyField, partitionPathField, recordFieldIndex, partitionPathIndex)));
+      setVirtualKeyInfo(Option.of(new HoodieVirtualKeyInfo(recordKeyField, partitionPathField, recordFieldIndex, partitionPathIndex)));
     }
   }
 
   /**
    * The file containing this split's data.
    */
-  public Path getPath();
+  Path getPath();
 
   /**
    * The position of the first byte in the file to process.
    */
-  public long getStart();
+  long getStart();
 
   /**
    * The number of bytes in the file to process.
    */
-  public long getLength();
+  long getLength();
 }
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeUnmergedRecordReader.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeUnmergedRecordReader.java
index 9f51e7f16137e..84c808865072a 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeUnmergedRecordReader.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/realtime/RealtimeUnmergedRecordReader.java
@@ -18,18 +18,10 @@
 
 package org.apache.hudi.hadoop.realtime;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.hadoop.io.ArrayWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordReader;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.table.log.HoodieUnMergedLogRecordScanner;
 import org.apache.hudi.common.util.DefaultSizeEstimator;
+import org.apache.hudi.common.util.Functions;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.queue.BoundedInMemoryExecutor;
 import org.apache.hudi.common.util.queue.BoundedInMemoryQueueProducer;
@@ -40,6 +32,18 @@
 import org.apache.hudi.hadoop.config.HoodieRealtimeConfig;
 import org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils;
 
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.function.Function;
+
 class RealtimeUnmergedRecordReader extends AbstractRealtimeRecordReader
     implements RecordReader<NullWritable, ArrayWritable> {
 
@@ -74,7 +78,7 @@ public RealtimeUnmergedRecordReader(RealtimeSplit split, JobConf job,
     this.parquetRecordsIterator = new RecordReaderValueIterator<>(this.parquetReader);
     this.executor = new BoundedInMemoryExecutor<>(
         HoodieRealtimeRecordReaderUtils.getMaxCompactionMemoryInBytes(jobConf), getParallelProducers(),
-        Option.empty(), x -> x, new DefaultSizeEstimator<>());
+        Option.empty(), Function.identity(), new DefaultSizeEstimator<>(), Functions.noop());
     // Consumer of this record reader
     this.iterator = this.executor.getQueue().iterator();
     this.logRecordScanner = HoodieUnMergedLogRecordScanner.newBuilder()
@@ -88,7 +92,7 @@ public RealtimeUnmergedRecordReader(RealtimeSplit split, JobConf job,
         .withBufferSize(this.jobConf.getInt(HoodieRealtimeConfig.MAX_DFS_STREAM_BUFFER_SIZE_PROP, HoodieRealtimeConfig.DEFAULT_MAX_DFS_STREAM_BUFFER_SIZE))
         .withLogRecordScannerCallback(record -> {
           // convert Hoodie log record to Hadoop AvroWritable and buffer
-          GenericRecord rec = (GenericRecord) record.getData().getInsertValue(getReaderSchema()).get();
+          GenericRecord rec = (GenericRecord) record.getData().getInsertValue(getReaderSchema(), payloadProps).get();
           ArrayWritable aWritable = (ArrayWritable) HoodieRealtimeRecordReaderUtils.avroToArrayWritable(rec, getHiveSchema());
           this.executor.getQueue().insertRecord(aWritable);
         })
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieHiveUtils.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieHiveUtils.java
index b4f7e336335d4..fa2bce4875379 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieHiveUtils.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieHiveUtils.java
@@ -19,15 +19,11 @@
 package org.apache.hudi.hadoop.utils;
 
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hudi.common.table.HoodieTableMetaClient;
-import org.apache.hudi.common.table.timeline.HoodieTimeline;
-import org.apache.hudi.common.util.CollectionUtils;
-import org.apache.hudi.common.util.Option;
-import org.apache.hudi.exception.HoodieIOException;
-
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hudi.common.util.CollectionUtils;
+import org.apache.hudi.common.util.Option;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
@@ -148,39 +144,6 @@ public static List<String> getIncrementalTableNames(JobContext job) {
     return result;
   }
 
-  /**
-   * Depending on the configs hoodie.%s.consume.pending.commits and hoodie.%s.consume.commit of job
-   *
-   * (hoodie.<tableName>.consume.pending.commits, hoodie.<tableName>.consume.commit) ->
-   *      (true, validCommit)       -> returns activeTimeline filtered until validCommit
-   *      (true, InValidCommit)     -> Raises HoodieIOException
-   *      (true, notSet)            -> Raises HoodieIOException
-   *      (false, validCommit)      -> returns completedTimeline filtered until validCommit
-   *      (false, InValidCommit)    -> Raises HoodieIOException
-   *      (false or notSet, notSet) -> returns completedTimeline unfiltered
-   *
-   *      validCommit is one which exists in the timeline being checked and vice versa
-   */
-  public static HoodieTimeline getTableTimeline(final String tableName, final JobConf job, final HoodieTableMetaClient metaClient) {
-    HoodieTimeline timeline = metaClient.getActiveTimeline().getCommitsTimeline();
-
-    boolean includePendingCommits = shouldIncludePendingCommits(job, tableName);
-    Option<String> maxCommit = getMaxCommit(job, tableName);
-
-    HoodieTimeline finalizedTimeline = includePendingCommits ? timeline : timeline.filterCompletedInstants();
-
-    return !maxCommit.isPresent() ? finalizedTimeline : filterIfInstantExists(tableName, finalizedTimeline, maxCommit.get());
-
-  }
-
-  private static HoodieTimeline filterIfInstantExists(String tableName, HoodieTimeline timeline, String maxCommit) {
-    if (maxCommit == null || !timeline.containsInstant(maxCommit)) {
-      LOG.info("Timestamp " + maxCommit + " doesn't exist in the commits timeline:" + timeline + " table: " + tableName);
-      throw new HoodieIOException("Valid timestamp is required for " + HOODIE_CONSUME_COMMIT + " in snapshot mode");
-    }
-    return timeline.findInstantsBeforeOrEquals(maxCommit);
-  }
-
   public static boolean isIncrementalUseDatabase(Configuration conf) {
     return conf.getBoolean(HOODIE_INCREMENTAL_USE_DATABASE, false);
   }
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieInputFormatUtils.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieInputFormatUtils.java
index a5a3f7e215073..7fec1fb63f6fa 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieInputFormatUtils.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieInputFormatUtils.java
@@ -18,47 +18,40 @@
 
 package org.apache.hudi.hadoop.utils;
 
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
+import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat;
+import org.apache.hadoop.hive.ql.io.orc.OrcSerde;
+import org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat;
+import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.JobContext;
 import org.apache.hudi.common.config.HoodieMetadataConfig;
-import org.apache.hudi.common.engine.HoodieLocalEngineContext;
 import org.apache.hudi.common.fs.FSUtils;
-import org.apache.hudi.common.model.FileSlice;
+import org.apache.hudi.common.model.HoodieBaseFile;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieFileFormat;
 import org.apache.hudi.common.model.HoodiePartitionMetadata;
-import org.apache.hudi.common.model.HoodieBaseFile;
-import org.apache.hudi.common.model.HoodieLogFile;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.timeline.HoodieDefaultTimeline;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
-import org.apache.hudi.common.table.view.FileSystemViewManager;
 import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
 import org.apache.hudi.common.table.view.TableFileSystemView;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.exception.HoodieIOException;
+import org.apache.hudi.hadoop.FileStatusWithBootstrapBaseFile;
 import org.apache.hudi.hadoop.HoodieHFileInputFormat;
 import org.apache.hudi.hadoop.HoodieParquetInputFormat;
-import org.apache.hudi.hadoop.RealtimeFileStatus;
 import org.apache.hudi.hadoop.LocatedFileStatusWithBootstrapBaseFile;
-import org.apache.hudi.hadoop.FileStatusWithBootstrapBaseFile;
 import org.apache.hudi.hadoop.realtime.HoodieHFileRealtimeInputFormat;
 import org.apache.hudi.hadoop.realtime.HoodieParquetRealtimeInputFormat;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.LocatedFileStatus;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
-import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat;
-import org.apache.hadoop.hive.ql.io.orc.OrcSerde;
-import org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat;
-import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapreduce.Job;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 
@@ -318,7 +311,7 @@ public static Map<Path, HoodieTableMetaClient> getTableMetaClientByPartitionPath
     Map<Path, HoodieTableMetaClient> metaClientMap = new HashMap<>();
     return partitions.stream().collect(Collectors.toMap(Function.identity(), p -> {
       try {
-        HoodieTableMetaClient metaClient = getTableMetaClientForBasePath(p.getFileSystem(conf), p);
+        HoodieTableMetaClient metaClient = getTableMetaClientForBasePathUnchecked(conf, p);
         metaClientMap.put(p, metaClient);
         return metaClient;
       } catch (IOException e) {
@@ -328,20 +321,17 @@ public static Map<Path, HoodieTableMetaClient> getTableMetaClientByPartitionPath
   }
 
   /**
-   * Extract HoodieTableMetaClient from a partition path(not base path).
-   * @param fs
-   * @param dataPath
-   * @return
-   * @throws IOException
+   * Extract HoodieTableMetaClient from a partition path (not base path)
    */
-  public static HoodieTableMetaClient getTableMetaClientForBasePath(FileSystem fs, Path dataPath) throws IOException {
+  public static HoodieTableMetaClient getTableMetaClientForBasePathUnchecked(Configuration conf, Path partitionPath) throws IOException {
+    FileSystem fs = partitionPath.getFileSystem(conf);
     int levels = HoodieHiveUtils.DEFAULT_LEVELS_TO_BASEPATH;
-    if (HoodiePartitionMetadata.hasPartitionMetadata(fs, dataPath)) {
-      HoodiePartitionMetadata metadata = new HoodiePartitionMetadata(fs, dataPath);
+    if (HoodiePartitionMetadata.hasPartitionMetadata(fs, partitionPath)) {
+      HoodiePartitionMetadata metadata = new HoodiePartitionMetadata(fs, partitionPath);
       metadata.readFromFS();
       levels = metadata.getPartitionDepth();
     }
-    Path baseDir = HoodieHiveUtils.getNthParent(dataPath, levels);
+    Path baseDir = HoodieHiveUtils.getNthParent(partitionPath, levels);
     LOG.info("Reading hoodie metadata from path " + baseDir.toString());
     return HoodieTableMetaClient.builder().setConf(fs.getConf()).setBasePath(baseDir.toString()).build();
   }
@@ -440,67 +430,6 @@ public static HoodieMetadataConfig buildMetadataConfig(Configuration conf) {
         .build();
   }
 
-  public static List<FileStatus> filterFileStatusForSnapshotMode(JobConf job, Map<String, HoodieTableMetaClient> tableMetaClientMap,
-                                                                 List<Path> snapshotPaths, boolean includeLogFiles) throws IOException {
-    HoodieLocalEngineContext engineContext = new HoodieLocalEngineContext(job);
-    List<FileStatus> returns = new ArrayList<>();
-
-    Map<HoodieTableMetaClient, List<Path>> groupedPaths =
-        HoodieInputFormatUtils.groupSnapshotPathsByMetaClient(tableMetaClientMap.values(), snapshotPaths);
-
-    Map<HoodieTableMetaClient, HoodieTableFileSystemView> fsViewCache = new HashMap<>();
-
-    LOG.info("Found a total of " + groupedPaths.size() + " groups");
-
-    try {
-      for (Map.Entry<HoodieTableMetaClient, List<Path>> entry : groupedPaths.entrySet()) {
-        HoodieTableMetaClient metaClient = entry.getKey();
-        if (LOG.isDebugEnabled()) {
-          LOG.debug("Hoodie Metadata initialized with completed commit instant as :" + metaClient);
-        }
-
-        HoodieTimeline timeline = HoodieHiveUtils.getTableTimeline(metaClient.getTableConfig().getTableName(), job, metaClient);
-
-        HoodieTableFileSystemView fsView = fsViewCache.computeIfAbsent(metaClient, tableMetaClient ->
-            FileSystemViewManager.createInMemoryFileSystemViewWithTimeline(engineContext, tableMetaClient, buildMetadataConfig(job), timeline));
-        List<HoodieBaseFile> filteredBaseFiles = new ArrayList<>();
-        Map<FileStatus, List<HoodieLogFile>> filteredLogs = new HashMap<>();
-        for (Path p : entry.getValue()) {
-          String relativePartitionPath = FSUtils.getRelativePartitionPath(new Path(metaClient.getBasePath()), p);
-          List<HoodieBaseFile> matched = fsView.getLatestBaseFiles(relativePartitionPath).collect(Collectors.toList());
-          filteredBaseFiles.addAll(matched);
-          if (includeLogFiles) {
-            List<FileSlice> logMatched = fsView.getLatestFileSlices(relativePartitionPath)
-                .filter(f -> !f.getBaseFile().isPresent() && f.getLatestLogFile().isPresent())
-                .collect(Collectors.toList());
-            logMatched.forEach(f -> {
-              List<HoodieLogFile> logPathSizePairs = f.getLogFiles().sorted(HoodieLogFile.getLogFileComparator()).collect(Collectors.toList());
-              filteredLogs.put(f.getLatestLogFile().get().getFileStatus(), logPathSizePairs);
-            });
-          }
-        }
-
-        LOG.info("Total paths to process after hoodie filter " + filteredBaseFiles.size());
-        for (HoodieBaseFile filteredFile : filteredBaseFiles) {
-          if (LOG.isDebugEnabled()) {
-            LOG.debug("Processing latest hoodie file - " + filteredFile.getPath());
-          }
-          filteredFile = refreshFileStatus(job, filteredFile);
-          returns.add(getFileStatus(filteredFile));
-        }
-
-        for (Map.Entry<FileStatus, List<HoodieLogFile>> filterLogEntry : filteredLogs.entrySet()) {
-          RealtimeFileStatus rs = new RealtimeFileStatus(filterLogEntry.getKey());
-          rs.setDeltaLogFiles(filterLogEntry.getValue());
-          returns.add(rs);
-        }
-      }
-    } finally {
-      fsViewCache.forEach(((metaClient, fsView) -> fsView.close()));
-    }
-    return returns;
-  }
-
   /**
    * Checks the file status for a race condition which can set the file size to 0. 1. HiveInputFormat does
    * super.listStatus() and gets back a FileStatus[] 2. Then it creates the HoodieTableMetaClient for the paths listed.
@@ -534,12 +463,12 @@ private static HoodieBaseFile refreshFileStatus(Configuration conf, HoodieBaseFi
    *
    * @return the affected file status array
    */
-  public static FileStatus[] listAffectedFilesForCommits(Path basePath, List<HoodieCommitMetadata> metadataList) {
+  public static FileStatus[] listAffectedFilesForCommits(Configuration hadoopConf, Path basePath, List<HoodieCommitMetadata> metadataList) {
     // TODO: Use HoodieMetaTable to extract affected file directly.
     HashMap<String, FileStatus> fullPathToFileStatus = new HashMap<>();
     // Iterate through the given commits.
     for (HoodieCommitMetadata metadata: metadataList) {
-      fullPathToFileStatus.putAll(metadata.getFullPathToFileStatus(basePath.toString()));
+      fullPathToFileStatus.putAll(metadata.getFullPathToFileStatus(hadoopConf, basePath.toString()));
     }
     return fullPathToFileStatus.values().toArray(new FileStatus[0]);
   }
diff --git a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeInputFormatUtils.java b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeInputFormatUtils.java
index 6718642d22728..d2501ee8dc15e 100644
--- a/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeInputFormatUtils.java
+++ b/hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieRealtimeInputFormatUtils.java
@@ -18,46 +18,30 @@
 
 package org.apache.hudi.hadoop.utils;
 
-import org.apache.hudi.common.engine.HoodieLocalEngineContext;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.JobConf;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.FileSlice;
 import org.apache.hudi.common.model.HoodieBaseFile;
 import org.apache.hudi.common.model.HoodieLogFile;
 import org.apache.hudi.common.model.HoodieRecord;
-import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
-import org.apache.hudi.common.table.TableSchemaResolver;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
-import org.apache.hudi.common.table.timeline.HoodieTimeline;
-import org.apache.hudi.common.table.view.FileSystemViewManager;
 import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
-import org.apache.hudi.common.util.CollectionUtils;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.exception.HoodieException;
-import org.apache.hudi.exception.HoodieIOException;
-import org.apache.hudi.hadoop.BaseFileWithLogsSplit;
-import org.apache.hudi.hadoop.BootstrapBaseFileSplit;
+import org.apache.hudi.hadoop.realtime.HoodieRealtimeBootstrapBaseFileSplit;
 import org.apache.hudi.hadoop.realtime.HoodieRealtimeFileSplit;
 import org.apache.hudi.hadoop.realtime.HoodieVirtualKeyInfo;
-import org.apache.hudi.hadoop.realtime.RealtimeBootstrapBaseFileSplit;
 import org.apache.hudi.hadoop.realtime.RealtimeSplit;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
-import org.apache.hadoop.mapred.FileSplit;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.SplitLocationInfo;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
-import org.apache.parquet.schema.MessageType;
 
-import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
@@ -65,169 +49,32 @@
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
+import static org.apache.hudi.TypeUtils.unsafeCast;
+
 public class HoodieRealtimeInputFormatUtils extends HoodieInputFormatUtils {
 
   private static final Logger LOG = LogManager.getLogger(HoodieRealtimeInputFormatUtils.class);
 
-  public static InputSplit[] getRealtimeSplits(Configuration conf, Stream<FileSplit> fileSplits) {
-    Map<Path, List<FileSplit>> partitionsToParquetSplits =
-        fileSplits.collect(Collectors.groupingBy(split -> split.getPath().getParent()));
-    // TODO(vc): Should we handle also non-hoodie splits here?
-    Map<Path, HoodieTableMetaClient> partitionsToMetaClient = getTableMetaClientByPartitionPath(conf, partitionsToParquetSplits.keySet());
-
-    // Create file system cache so metadata table is only instantiated once. Also can benefit normal file listing if
-    // partition path is listed twice so file groups will already be loaded in file system
-    Map<HoodieTableMetaClient, HoodieTableFileSystemView> fsCache = new HashMap<>();
-    // for all unique split parents, obtain all delta files based on delta commit timeline,
-    // grouped on file id
-    List<InputSplit> rtSplits = new ArrayList<>();
-    try {
-      // Pre process tableConfig from first partition to fetch virtual key info
-      Option<HoodieVirtualKeyInfo> hoodieVirtualKeyInfo = Option.empty();
-      if (partitionsToParquetSplits.size() > 0) {
-        HoodieTableMetaClient metaClient = partitionsToMetaClient.get(partitionsToParquetSplits.keySet().iterator().next());
-        hoodieVirtualKeyInfo = getHoodieVirtualKeyInfo(metaClient);
-      }
-      Option<HoodieVirtualKeyInfo> finalHoodieVirtualKeyInfo = hoodieVirtualKeyInfo;
-      partitionsToParquetSplits.keySet().forEach(partitionPath -> {
-        // for each partition path obtain the data & log file groupings, then map back to inputsplits
-        HoodieTableMetaClient metaClient = partitionsToMetaClient.get(partitionPath);
-        if (!fsCache.containsKey(metaClient)) {
-          HoodieLocalEngineContext engineContext = new HoodieLocalEngineContext(conf);
-          HoodieTableFileSystemView fsView = FileSystemViewManager.createInMemoryFileSystemViewWithTimeline(engineContext,
-              metaClient, HoodieInputFormatUtils.buildMetadataConfig(conf), metaClient.getActiveTimeline());
-          fsCache.put(metaClient, fsView);
-        }
-        HoodieTableFileSystemView fsView = fsCache.get(metaClient);
-
-        String relPartitionPath = FSUtils.getRelativePartitionPath(new Path(metaClient.getBasePath()), partitionPath);
-        // Both commit and delta-commits are included - pick the latest completed one
-        Option<HoodieInstant> latestCompletedInstant =
-            metaClient.getActiveTimeline().getWriteTimeline().filterCompletedInstants().lastInstant();
-
-        Stream<FileSlice> latestFileSlices = latestCompletedInstant
-            .map(instant -> fsView.getLatestMergedFileSlicesBeforeOrOn(relPartitionPath, instant.getTimestamp()))
-            .orElse(Stream.empty());
-
-        // subgroup splits again by file id & match with log files.
-        Map<String, List<FileSplit>> groupedInputSplits = partitionsToParquetSplits.get(partitionPath).stream()
-            .collect(Collectors.groupingBy(split -> FSUtils.getFileIdFromFilePath(split.getPath())));
-        // Get the maxCommit from the last delta or compaction or commit - when bootstrapped from COW table
-        String maxCommitTime = metaClient.getActiveTimeline().getTimelineOfActions(CollectionUtils.createSet(HoodieTimeline.COMMIT_ACTION,
-            HoodieTimeline.ROLLBACK_ACTION, HoodieTimeline.DELTA_COMMIT_ACTION, HoodieTimeline.REPLACE_COMMIT_ACTION))
-            .filterCompletedInstants().lastInstant().get().getTimestamp();
-        latestFileSlices.forEach(fileSlice -> {
-          List<FileSplit> dataFileSplits = groupedInputSplits.getOrDefault(fileSlice.getFileId(), new ArrayList<>());
-          dataFileSplits.forEach(split -> {
-            try {
-              List<HoodieLogFile> logFiles = fileSlice.getLogFiles().sorted(HoodieLogFile.getLogFileComparator())
-                  .collect(Collectors.toList());
-              if (split instanceof BootstrapBaseFileSplit) {
-                BootstrapBaseFileSplit eSplit = (BootstrapBaseFileSplit) split;
-                rtSplits.add(createRealtimeBoostrapBaseFileSplit(eSplit, metaClient.getBasePath(), logFiles, maxCommitTime));
-              } else {
-                rtSplits.add(new HoodieRealtimeFileSplit(split, metaClient.getBasePath(), logFiles, maxCommitTime, finalHoodieVirtualKeyInfo));
-              }
-            } catch (IOException e) {
-              throw new HoodieIOException("Error creating hoodie real time split ", e);
-            }
-          });
-        });
-      });
-    } catch (Exception e) {
-      throw new HoodieException("Error obtaining data file/log file grouping ", e);
-    } finally {
-      // close all the open fs views.
-      fsCache.forEach((k, view) -> view.close());
-    }
-    LOG.info("Returning a total splits of " + rtSplits.size());
-    return rtSplits.toArray(new InputSplit[0]);
-  }
-
-  // get IncrementalRealtimeSplits
-  public static InputSplit[] getIncrementalRealtimeSplits(Configuration conf, Stream<FileSplit> fileSplits) throws IOException {
-    List<InputSplit> rtSplits = new ArrayList<>();
-    List<FileSplit> fileSplitList = fileSplits.collect(Collectors.toList());
-    Set<Path> partitionSet = fileSplitList.stream().map(f -> f.getPath().getParent()).collect(Collectors.toSet());
-    Map<Path, HoodieTableMetaClient> partitionsToMetaClient = getTableMetaClientByPartitionPath(conf, partitionSet);
-    // Pre process tableConfig from first partition to fetch virtual key info
-    Option<HoodieVirtualKeyInfo> hoodieVirtualKeyInfo = Option.empty();
-    if (partitionSet.size() > 0) {
-      hoodieVirtualKeyInfo = getHoodieVirtualKeyInfo(partitionsToMetaClient.get(partitionSet.iterator().next()));
-    }
-    Option<HoodieVirtualKeyInfo> finalHoodieVirtualKeyInfo = hoodieVirtualKeyInfo;
-    fileSplitList.stream().forEach(s -> {
-      // deal with incremental query.
-      try {
-        if (s instanceof BaseFileWithLogsSplit) {
-          BaseFileWithLogsSplit bs = (BaseFileWithLogsSplit)s;
-          if (bs.getBelongToIncrementalSplit()) {
-            rtSplits.add(new HoodieRealtimeFileSplit(bs, bs.getBasePath(), bs.getDeltaLogFiles(), bs.getMaxCommitTime(), finalHoodieVirtualKeyInfo));
-          }
-        } else if (s instanceof RealtimeBootstrapBaseFileSplit) {
-          rtSplits.add(s);
-        }
-      } catch (IOException e) {
-        throw new HoodieIOException("Error creating hoodie real time split ", e);
-      }
-    });
-    LOG.info("Returning a total splits of " + rtSplits.size());
-    return rtSplits.toArray(new InputSplit[0]);
-  }
-
-  public static Option<HoodieVirtualKeyInfo> getHoodieVirtualKeyInfo(HoodieTableMetaClient metaClient) {
-    HoodieTableConfig tableConfig = metaClient.getTableConfig();
-    if (!tableConfig.populateMetaFields()) {
-      TableSchemaResolver tableSchemaResolver = new TableSchemaResolver(metaClient);
-      try {
-        MessageType parquetSchema = tableSchemaResolver.getTableParquetSchema();
-        return Option.of(new HoodieVirtualKeyInfo(tableConfig.getRecordKeyFieldProp(),
-            tableConfig.getPartitionFieldProp(), parquetSchema.getFieldIndex(tableConfig.getRecordKeyFieldProp()),
-            parquetSchema.getFieldIndex(tableConfig.getPartitionFieldProp())));
-      } catch (Exception exception) {
-        throw new HoodieException("Fetching table schema failed with exception ", exception);
-      }
+  public static boolean doesBelongToIncrementalQuery(FileSplit s) {
+    if (s instanceof HoodieRealtimeFileSplit) {
+      HoodieRealtimeFileSplit bs = unsafeCast(s);
+      return bs.getBelongsToIncrementalQuery();
+    } else if (s instanceof HoodieRealtimeBootstrapBaseFileSplit) {
+      HoodieRealtimeBootstrapBaseFileSplit bs = unsafeCast(s);
+      return bs.getBelongsToIncrementalQuery();
     }
-    return Option.empty();
-  }
 
-  public static boolean isIncrementalQuerySplits(List<FileSplit> fileSplits) {
-    if (fileSplits == null || fileSplits.size() == 0) {
-      return false;
-    }
-    return fileSplits.stream().anyMatch(s -> {
-      if (s instanceof BaseFileWithLogsSplit) {
-        BaseFileWithLogsSplit bs = (BaseFileWithLogsSplit)s;
-        return bs.getBelongToIncrementalSplit();
-      } else {
-        return s instanceof RealtimeBootstrapBaseFileSplit;
-      }
-    });
-  }
-
-  public static RealtimeBootstrapBaseFileSplit createRealtimeBoostrapBaseFileSplit(
-      BootstrapBaseFileSplit split, String basePath, List<HoodieLogFile> logFiles, String maxInstantTime) {
-    try {
-      String[] hosts = split.getLocationInfo() != null ? Arrays.stream(split.getLocationInfo())
-          .filter(x -> !x.isInMemory()).toArray(String[]::new) : new String[0];
-      String[] inMemoryHosts = split.getLocationInfo() != null ? Arrays.stream(split.getLocationInfo())
-          .filter(SplitLocationInfo::isInMemory).toArray(String[]::new) : new String[0];
-      FileSplit baseSplit = new FileSplit(split.getPath(), split.getStart(), split.getLength(),
-          hosts, inMemoryHosts);
-      return new RealtimeBootstrapBaseFileSplit(baseSplit, basePath, logFiles, maxInstantTime, split.getBootstrapFileSplit());
-    } catch (IOException e) {
-      throw new HoodieIOException("Error creating hoodie real time split ", e);
-    }
+    return false;
   }
 
   // Return parquet file with a list of log files in the same file group.
-  public static List<Pair<Option<HoodieBaseFile>, List<String>>> groupLogsByBaseFile(Configuration conf, List<Path> partitionPaths) {
+  public static List<Pair<Option<HoodieBaseFile>, List<HoodieLogFile>>> groupLogsByBaseFile(Configuration conf, List<Path> partitionPaths) {
     Set<Path> partitionSet = new HashSet<>(partitionPaths);
     // TODO(vc): Should we handle also non-hoodie splits here?
     Map<Path, HoodieTableMetaClient> partitionsToMetaClient = getTableMetaClientByPartitionPath(conf, partitionSet);
 
     // Get all the base file and it's log files pairs in required partition paths.
-    List<Pair<Option<HoodieBaseFile>, List<String>>> baseAndLogsList = new ArrayList<>();
+    List<Pair<Option<HoodieBaseFile>, List<HoodieLogFile>>> baseAndLogsList = new ArrayList<>();
     partitionSet.forEach(partitionPath -> {
       // for each partition path obtain the data & log file groupings, then map back to inputsplits
       HoodieTableMetaClient metaClient = partitionsToMetaClient.get(partitionPath);
@@ -244,8 +91,7 @@ public static List<Pair<Option<HoodieBaseFile>, List<String>>> groupLogsByBaseFi
             .orElse(Stream.empty());
 
         latestFileSlices.forEach(fileSlice -> {
-          List<String> logFilePaths = fileSlice.getLogFiles().sorted(HoodieLogFile.getLogFileComparator())
-                  .map(logFile -> logFile.getPath().toString()).collect(Collectors.toList());
+          List<HoodieLogFile> logFilePaths = fileSlice.getLogFiles().sorted(HoodieLogFile.getLogFileComparator()).collect(Collectors.toList());
           baseAndLogsList.add(Pair.of(fileSlice.getBaseFile(), logFilePaths));
         });
       } catch (Exception e) {
@@ -312,7 +158,7 @@ public static boolean requiredProjectionFieldsExistInConf(Configuration configur
 
   public static boolean canAddProjectionToJobConf(final RealtimeSplit realtimeSplit, final JobConf jobConf) {
     return jobConf.get(HoodieInputFormatUtils.HOODIE_READ_COLUMNS_PROP) == null
-            || (!realtimeSplit.getDeltaLogPaths().isEmpty() && !HoodieRealtimeInputFormatUtils.requiredProjectionFieldsExistInConf(jobConf, realtimeSplit.getHoodieVirtualKeyInfo()));
+            || (!realtimeSplit.getDeltaLogPaths().isEmpty() && !HoodieRealtimeInputFormatUtils.requiredProjectionFieldsExistInConf(jobConf, realtimeSplit.getVirtualKeyInfo()));
   }
 
   /**
diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieParquetInputFormat.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieParquetInputFormat.java
index 5c7a1fdf2f84f..2ae7c36d98e7e 100644
--- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieParquetInputFormat.java
+++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/TestHoodieParquetInputFormat.java
@@ -18,6 +18,15 @@
 
 package org.apache.hudi.hadoop;
 
+import org.apache.avro.Schema;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapreduce.Job;
 import org.apache.hudi.avro.model.HoodieCompactionPlan;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
@@ -34,16 +43,7 @@
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.hadoop.testutils.InputFormatTestUtil;
 import org.apache.hudi.hadoop.utils.HoodieHiveUtils;
-
-import org.apache.avro.Schema;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.io.ArrayWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordReader;
-import org.apache.hadoop.mapreduce.Job;
+import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
@@ -115,7 +115,7 @@ public void testPendingCompactionWithActiveCommits() throws IOException {
     timeline.setInstants(instants);
 
     // Verify getCommitsTimelineBeforePendingCompaction does not return instants after first compaction instant
-    HoodieTimeline filteredTimeline = inputFormat.filterInstantsTimeline(timeline);
+    HoodieTimeline filteredTimeline = HoodieInputFormatUtils.filterInstantsTimeline(timeline);
     assertTrue(filteredTimeline.containsInstant(t1));
     assertTrue(filteredTimeline.containsInstant(t2));
     assertFalse(filteredTimeline.containsInstant(t3));
@@ -126,7 +126,7 @@ public void testPendingCompactionWithActiveCommits() throws IOException {
     instants.remove(t3);
     timeline = new HoodieActiveTimeline(metaClient);
     timeline.setInstants(instants);
-    filteredTimeline = inputFormat.filterInstantsTimeline(timeline);
+    filteredTimeline = HoodieInputFormatUtils.filterInstantsTimeline(timeline);
 
     // verify all remaining instants are returned.
     assertTrue(filteredTimeline.containsInstant(t1));
@@ -140,7 +140,7 @@ public void testPendingCompactionWithActiveCommits() throws IOException {
     instants.remove(t5);
     timeline = new HoodieActiveTimeline(metaClient);
     timeline.setInstants(instants);
-    filteredTimeline = inputFormat.filterInstantsTimeline(timeline);
+    filteredTimeline = HoodieInputFormatUtils.filterInstantsTimeline(timeline);
 
     // verify all remaining instants are returned.
     assertTrue(filteredTimeline.containsInstant(t1));
@@ -202,11 +202,11 @@ public void testSnapshotWithInvalidCommitShouldThrowException() throws IOExcepti
     FileInputFormat.setInputPaths(jobConf, partitionDir.getPath());
     InputFormatTestUtil.setupSnapshotIncludePendingCommits(jobConf, "1");
     Exception exception = assertThrows(HoodieIOException.class, () -> inputFormat.listStatus(jobConf));
-    assertEquals("Valid timestamp is required for hoodie.%s.consume.commit in snapshot mode", exception.getMessage());
+    assertEquals("Query instant (1) not found in the timeline", exception.getMessage());
 
     InputFormatTestUtil.setupSnapshotMaxCommitTimeQueryMode(jobConf, "1");
     exception = assertThrows(HoodieIOException.class, () -> inputFormat.listStatus(jobConf));
-    assertEquals("Valid timestamp is required for hoodie.%s.consume.commit in snapshot mode", exception.getMessage());
+    assertEquals("Query instant (1) not found in the timeline", exception.getMessage());
   }
 
   @Test
diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/functional/TestHoodieCombineHiveInputFormat.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/functional/TestHoodieCombineHiveInputFormat.java
index 50c3f2e1c4e88..ec6ea0a8b3ec3 100644
--- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/functional/TestHoodieCombineHiveInputFormat.java
+++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/functional/TestHoodieCombineHiveInputFormat.java
@@ -104,7 +104,7 @@ public void multiPartitionReadersRealtimeCombineHoodieInputFormat() throws Excep
     final int numRecords = 1000;
     // Create 3 partitions, each partition holds one parquet file and 1000 records
     List<File> partitionDirs = InputFormatTestUtil
-        .prepareMultiPartitionedParquetTable(tempDir, schema, 3, numRecords, commitTime);
+        .prepareMultiPartitionedParquetTable(tempDir, schema, 3, numRecords, commitTime, HoodieTableType.MERGE_ON_READ);
     InputFormatTestUtil.commit(tempDir, commitTime);
 
     TableDesc tblDesc = Utilities.defaultTd;
@@ -245,7 +245,7 @@ public void multiLevelPartitionReadersRealtimeCombineHoodieInputFormat() throws
   }
 
   @Test
-  public void testMutilReaderRealtimeComineHoodieInputFormat() throws Exception {
+  public void testMultiReaderRealtimeCombineHoodieInputFormat() throws Exception {
     // test for hudi-1722
     Configuration conf = new Configuration();
     // initial commit
diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeFileSplit.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeFileSplit.java
index 9d3855c47d663..a6ca32769cf8d 100644
--- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeFileSplit.java
+++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeFileSplit.java
@@ -18,12 +18,11 @@
 
 package org.apache.hudi.hadoop.realtime;
 
-import org.apache.hudi.common.model.HoodieLogFile;
-import org.apache.hudi.common.util.Option;
-
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hudi.common.model.HoodieLogFile;
+import org.apache.hudi.common.util.Option;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.extension.ExtendWith;
@@ -72,7 +71,7 @@ public void setUp(@TempDir java.nio.file.Path tempDir) throws Exception {
     baseFileSplit = new FileSplit(new Path(fileSplitName), 0, 100, new String[] {});
     maxCommitTime = "10001";
 
-    split = new HoodieRealtimeFileSplit(baseFileSplit, basePath, deltaLogFiles, maxCommitTime, Option.empty());
+    split = new HoodieRealtimeFileSplit(baseFileSplit, basePath, deltaLogFiles, maxCommitTime, false, Option.empty());
   }
 
   @Test
diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java
index ede76dc3490fa..fc4eb7ce2c042 100644
--- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java
+++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/realtime/TestHoodieRealtimeRecordReader.java
@@ -18,8 +18,28 @@
 
 package org.apache.hudi.hadoop.realtime;
 
+import org.apache.avro.Schema;
+import org.apache.avro.Schema.Field;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
+import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat;
+import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
+import org.apache.hadoop.io.ArrayWritable;
+import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileSplit;
 import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
 import org.apache.hadoop.mapred.Reporter;
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.avro.model.HoodieCompactionPlan;
@@ -30,8 +50,8 @@
 import org.apache.hudi.common.model.HoodieLogFile;
 import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
 import org.apache.hudi.common.model.HoodieTableType;
-import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.model.HoodieWriteStat;
+import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.log.HoodieLogFormat;
 import org.apache.hudi.common.table.log.HoodieLogFormat.Writer;
 import org.apache.hudi.common.table.log.block.HoodieLogBlock;
@@ -44,32 +64,9 @@
 import org.apache.hudi.common.util.collection.ExternalSpillableMap;
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.exception.HoodieException;
-import org.apache.hudi.hadoop.BaseFileWithLogsSplit;
-import org.apache.hudi.hadoop.PathWithLogFilePath;
 import org.apache.hudi.hadoop.RealtimeFileStatus;
-import org.apache.hudi.hadoop.testutils.InputFormatTestUtil;
 import org.apache.hudi.hadoop.config.HoodieRealtimeConfig;
-
-import org.apache.avro.Schema;
-import org.apache.avro.Schema.Field;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
-import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat;
-import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
-import org.apache.hadoop.io.ArrayWritable;
-import org.apache.hadoop.io.BooleanWritable;
-import org.apache.hadoop.io.DoubleWritable;
-import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileSplit;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hudi.hadoop.testutils.InputFormatTestUtil;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.junit.jupiter.api.io.TempDir;
@@ -84,12 +81,12 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
 import java.util.Properties;
 import java.util.Set;
-import java.util.Map;
-import java.util.HashMap;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
@@ -163,6 +160,12 @@ public void testHFileInlineReader() throws Exception {
         HoodieLogBlock.HoodieLogBlockType.HFILE_DATA_BLOCK);
   }
 
+  @Test
+  public void testParquetInlineReader() throws Exception {
+    testReaderInternal(ExternalSpillableMap.DiskMapType.BITCASK, false, false,
+        HoodieLogBlock.HoodieLogBlockType.PARQUET_DATA_BLOCK);
+  }
+
   private void testReaderInternal(ExternalSpillableMap.DiskMapType diskMapType,
                                   boolean isCompressionEnabled,
                                   boolean partitioned) throws Exception {
@@ -223,7 +226,9 @@ private void testReaderInternal(ExternalSpillableMap.DiskMapType diskMapType,
             new FileSplit(new Path(partitionDir + "/fileid0_1-0-1_" + baseInstant + ".parquet"), 0, 1, baseJobConf),
             basePath.toUri().toString(), fileSlice.getLogFiles().sorted(HoodieLogFile.getLogFileComparator())
             .collect(Collectors.toList()),
-            instantTime, Option.empty());
+            instantTime,
+            false,
+            Option.empty());
 
         // create a RecordReader to be used by HoodieRealtimeRecordReader
         RecordReader<NullWritable, ArrayWritable> reader = new MapredParquetInputFormat().getRecordReader(
@@ -303,7 +308,7 @@ public void testUnMergedReader() throws Exception {
     // create a split with baseFile (parquet file written earlier) and new log file(s)
     HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit(
         new FileSplit(new Path(partitionDir + "/fileid0_1-0-1_" + instantTime + ".parquet"), 0, 1, baseJobConf),
-        basePath.toUri().toString(), Collections.singletonList(writer.getLogFile()), newCommitTime, Option.empty());
+        basePath.toUri().toString(), Collections.singletonList(writer.getLogFile()), newCommitTime, false, Option.empty());
 
     // create a RecordReader to be used by HoodieRealtimeRecordReader
     RecordReader<NullWritable, ArrayWritable> reader = new MapredParquetInputFormat().getRecordReader(
@@ -382,7 +387,7 @@ public void testReaderWithNestedAndComplexSchema(ExternalSpillableMap.DiskMapTyp
     // create a split with baseFile (parquet file written earlier) and new log file(s)
     HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit(
         new FileSplit(new Path(partitionDir + "/fileid0_1-0-1_" + instantTime + ".parquet"), 0, 1, baseJobConf),
-        basePath.toUri().toString(), Collections.singletonList(writer.getLogFile()), newCommitTime, Option.empty());
+        basePath.toUri().toString(), Collections.singletonList(writer.getLogFile()), newCommitTime, false, Option.empty());
 
     // create a RecordReader to be used by HoodieRealtimeRecordReader
     RecordReader<NullWritable, ArrayWritable> reader = new MapredParquetInputFormat().getRecordReader(
@@ -529,7 +534,7 @@ public void testSchemaEvolutionAndRollbackBlockInLastLogFile(ExternalSpillableMa
     // create a split with baseFile (parquet file written earlier) and new log file(s)
     HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit(
         new FileSplit(new Path(partitionDir + "/fileid0_1_" + instantTime + ".parquet"), 0, 1, baseJobConf),
-        basePath.toUri().toString(), logFiles, newCommitTime, Option.empty());
+        basePath.toUri().toString(), logFiles, newCommitTime, false, Option.empty());
 
     // create a RecordReader to be used by HoodieRealtimeRecordReader
     RecordReader<NullWritable, ArrayWritable> reader = new MapredParquetInputFormat().getRecordReader(
@@ -609,7 +614,7 @@ public void testIncrementalWithOnlylog() throws Exception {
       HoodieParquetRealtimeInputFormat inputFormat =  new HoodieParquetRealtimeInputFormat();
       inputFormat.setConf(baseJobConf);
       InputSplit[] splits = inputFormat.getSplits(baseJobConf, 1);
-      assertTrue(splits.length == 1);
+      assertEquals(1, splits.length);
       JobConf newJobConf = new JobConf(baseJobConf);
       List<Schema.Field> fields = schema.getFields();
       setHiveColumnNameProps(fields, newJobConf, false);
@@ -763,13 +768,16 @@ public void testLogOnlyReader() throws Exception {
       FileCreateUtils.createDeltaCommit(basePath.toString(), instantTime);
       // create a split with new log file(s)
       fileSlice.addLogFile(new HoodieLogFile(writer.getLogFile().getPath(), size));
-      RealtimeFileStatus realtimeFileStatus = new RealtimeFileStatus(new FileStatus(writer.getLogFile().getFileSize(), false, 1, 1, 0, writer.getLogFile().getPath()));
+      RealtimeFileStatus realtimeFileStatus = new RealtimeFileStatus(
+          new FileStatus(writer.getLogFile().getFileSize(), false, 1, 1, 0, writer.getLogFile().getPath()),
+          basePath.toString(),
+          fileSlice.getLogFiles().collect(Collectors.toList()),
+          false,
+          Option.empty());
       realtimeFileStatus.setMaxCommitTime(instantTime);
-      realtimeFileStatus.setBasePath(basePath.toString());
-      realtimeFileStatus.setDeltaLogFiles(fileSlice.getLogFiles().collect(Collectors.toList()));
-      PathWithLogFilePath pathWithLogFileStatus = (PathWithLogFilePath) realtimeFileStatus.getPath();
-      BaseFileWithLogsSplit bs = pathWithLogFileStatus.buildSplit(pathWithLogFileStatus, 0, 0, new String[] {""});
-      HoodieRealtimeFileSplit split = new HoodieRealtimeFileSplit(bs, bs.getBasePath(), bs.getDeltaLogFiles(), bs.getMaxCommitTime(), Option.empty());
+      HoodieRealtimePath realtimePath = (HoodieRealtimePath) realtimeFileStatus.getPath();
+      HoodieRealtimeFileSplit split =
+          new HoodieRealtimeFileSplit(new FileSplit(realtimePath, 0, 0, new String[] {""}), realtimePath);
 
       JobConf newJobConf = new JobConf(baseJobConf);
       List<Schema.Field> fields = schema.getFields();
diff --git a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/testutils/InputFormatTestUtil.java b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/testutils/InputFormatTestUtil.java
index 8c19524a2d651..836ad57121bd5 100644
--- a/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/testutils/InputFormatTestUtil.java
+++ b/hudi-hadoop-mr/src/test/java/org/apache/hudi/hadoop/testutils/InputFormatTestUtil.java
@@ -20,6 +20,7 @@
 
 import org.apache.hadoop.fs.LocalFileSystem;
 import org.apache.hadoop.fs.RawLocalFileSystem;
+import org.apache.hadoop.hbase.io.compress.Compression;
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.HoodieFileFormat;
@@ -33,6 +34,7 @@
 import org.apache.hudi.common.table.log.block.HoodieDataBlock;
 import org.apache.hudi.common.table.log.block.HoodieHFileDataBlock;
 import org.apache.hudi.common.table.log.block.HoodieLogBlock;
+import org.apache.hudi.common.table.log.block.HoodieParquetDataBlock;
 import org.apache.hudi.common.testutils.HoodieTestUtils;
 import org.apache.hudi.common.testutils.SchemaTestUtil;
 import org.apache.hudi.hadoop.utils.HoodieHiveUtils;
@@ -47,6 +49,7 @@
 import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.parquet.avro.AvroParquetWriter;
+import org.apache.parquet.hadoop.metadata.CompressionCodecName;
 
 import java.io.File;
 import java.io.IOException;
@@ -243,9 +246,9 @@ public static File prepareNonPartitionedParquetTable(java.nio.file.Path basePath
   }
 
   public static List<File> prepareMultiPartitionedParquetTable(java.nio.file.Path basePath, Schema schema,
-      int numberPartitions, int numberOfRecordsPerPartition, String commitNumber) throws IOException {
+      int numberPartitions, int numberOfRecordsPerPartition, String commitNumber, HoodieTableType tableType) throws IOException {
     List<File> result = new ArrayList<>();
-    HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath.toString());
+    HoodieTestUtils.init(HoodieTestUtils.getDefaultHadoopConf(), basePath.toString(), tableType, HoodieFileFormat.PARQUET);
     for (int i = 0; i < numberPartitions; i++) {
       java.nio.file.Path partitionPath = basePath.resolve(Paths.get(2016 + i + "", "05", "01"));
       setupPartition(basePath, partitionPath);
@@ -363,8 +366,14 @@ public static HoodieLogFormat.Writer writeDataBlockToLogFile(File partitionDir,
     Map<HoodieLogBlock.HeaderMetadataType, String> header = new HashMap<>();
     header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, newCommit);
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, writeSchema.toString());
-    HoodieDataBlock dataBlock = (logBlockType == HoodieLogBlock.HoodieLogBlockType.HFILE_DATA_BLOCK) ? new HoodieHFileDataBlock(records, header) :
-        new HoodieAvroDataBlock(records, header);
+    HoodieDataBlock dataBlock = null;
+    if (logBlockType == HoodieLogBlock.HoodieLogBlockType.HFILE_DATA_BLOCK) {
+      dataBlock = new HoodieHFileDataBlock(records, header, Compression.Algorithm.GZ);
+    } else if (logBlockType == HoodieLogBlock.HoodieLogBlockType.PARQUET_DATA_BLOCK) {
+      dataBlock = new HoodieParquetDataBlock(records, header, HoodieRecord.RECORD_KEY_METADATA_FIELD, CompressionCodecName.GZIP);
+    } else {
+      dataBlock = new HoodieAvroDataBlock(records, header, HoodieRecord.RECORD_KEY_METADATA_FIELD);
+    }
     writer.appendBlock(dataBlock);
     return writer;
   }
diff --git a/hudi-integ-test/README.md b/hudi-integ-test/README.md
index ffdedf849298e..7ee4598ba3bcb 100644
--- a/hudi-integ-test/README.md
+++ b/hudi-integ-test/README.md
@@ -82,8 +82,8 @@ spark-submit
 
  2.YAML file
 
-Choose to write up the entire DAG of operations in YAML, take a look at `complex-dag-cow.yaml` or 
-`complex-dag-mor.yaml`.
+Choose to write up the entire DAG of operations in YAML, take a look at `simple-deltastreamer.yaml` or 
+`simple-deltastreamer.yaml`.
 Once you're ready with the DAG you want to execute, simply pass the yaml file path as follows:
 
 ```
@@ -177,7 +177,7 @@ cd /opt
 Copy the integration tests jar into the docker container
 
 ```
-docker cp packaging/hudi-integ-test-bundle/target/hudi-integ-test-bundle-0.10.0-SNAPSHOT.jar adhoc-2:/opt
+docker cp packaging/hudi-integ-test-bundle/target/hudi-integ-test-bundle-0.11.0-SNAPSHOT.jar adhoc-2:/opt
 ```
 
 ```
@@ -217,7 +217,7 @@ spark-submit \
 --conf spark.driver.extraClassPath=/var/demo/jars/* \
 --conf spark.executor.extraClassPath=/var/demo/jars/* \
 --class org.apache.hudi.integ.testsuite.HoodieTestSuiteJob \
-/opt/hudi-integ-test-bundle-0.10.0-SNAPSHOT.jar \
+/opt/hudi-integ-test-bundle-0.11.0-SNAPSHOT.jar \
 --source-ordering-field test_suite_source_ordering_field \
 --use-deltastreamer \
 --target-base-path /user/hive/warehouse/hudi-integ-test-suite/output \
@@ -227,7 +227,7 @@ spark-submit \
 --schemaprovider-class org.apache.hudi.integ.testsuite.schema.TestSuiteFileBasedSchemaProvider \
 --source-class org.apache.hudi.utilities.sources.AvroDFSSource \
 --input-file-size 125829120 \
---workload-yaml-path file:/var/hoodie/ws/docker/demo/config/test-suite/complex-dag-cow.yaml \
+--workload-yaml-path file:/var/hoodie/ws/docker/demo/config/test-suite/simple-deltastreamer.yaml \
 --workload-generator-classname org.apache.hudi.integ.testsuite.dag.WorkflowDagGenerator \
 --table-type COPY_ON_WRITE \
 --compact-scheduling-minshare 1 \
@@ -264,7 +264,7 @@ spark-submit \
 --conf spark.driver.extraClassPath=/var/demo/jars/* \
 --conf spark.executor.extraClassPath=/var/demo/jars/* \
 --class org.apache.hudi.integ.testsuite.HoodieTestSuiteJob \
-/opt/hudi-integ-test-bundle-0.10.0-SNAPSHOT.jar \
+/opt/hudi-integ-test-bundle-0.11.0-SNAPSHOT.jar \
 --source-ordering-field test_suite_source_ordering_field \
 --use-deltastreamer \
 --target-base-path /user/hive/warehouse/hudi-integ-test-suite/output \
@@ -274,7 +274,7 @@ spark-submit \
 --schemaprovider-class org.apache.hudi.integ.testsuite.schema.TestSuiteFileBasedSchemaProvider \
 --source-class org.apache.hudi.utilities.sources.AvroDFSSource \
 --input-file-size 125829120 \
---workload-yaml-path file:/var/hoodie/ws/docker/demo/config/test-suite/complex-dag-mor.yaml \
+--workload-yaml-path file:/var/hoodie/ws/docker/demo/config/test-suite/simple-deltastreamer.yaml \
 --workload-generator-classname org.apache.hudi.integ.testsuite.dag.WorkflowDagGenerator \
 --table-type MERGE_ON_READ \
 --compact-scheduling-minshare 1 \
@@ -308,16 +308,16 @@ contents both via spark datasource and hive table via spark sql engine. Hive val
 If you have "ValidateDatasetNode" in your dag, do not replace hive jars as instructed above. Spark sql engine does not 
 go well w/ hive2* jars. So, after running docker setup, follow the below steps. 
 ```
-docker cp packaging/hudi-integ-test-bundle/target/hudi-integ-test-bundle-0.10.0-SNAPSHOT.jar adhoc-2:/opt/
+docker cp packaging/hudi-integ-test-bundle/target/hudi-integ-test-bundle-0.11.0-SNAPSHOT.jar adhoc-2:/opt/
 docker cp docker/demo/config/test-suite/test.properties adhoc-2:/opt/
 ```
 Also copy your dag of interest to adhoc-2:/opt/
 ```
-docker cp docker/demo/config/test-suite/complex-dag-cow.yaml adhoc-2:/opt/
+docker cp docker/demo/config/test-suite/simple-deltastreamer.yaml adhoc-2:/opt/
 ```
 
 For repeated runs, two additional configs need to be set. "dag_rounds" and "dag_intermittent_delay_mins". 
-This means that your dag will be repeated for N times w/ a delay of Y mins between each round. Note: complex-dag-cow.yaml
+This means that your dag will be repeated for N times w/ a delay of Y mins between each round. Note: simple-deltastreamer.yaml
 already has all these configs set. So no changes required just to try it out. 
 
 Also, ValidateDatasetNode can be configured in two ways. Either with "delete_input_data" set to true or without 
@@ -457,7 +457,7 @@ spark-submit \
 --conf spark.driver.extraClassPath=/var/demo/jars/* \
 --conf spark.executor.extraClassPath=/var/demo/jars/* \
 --class org.apache.hudi.integ.testsuite.HoodieTestSuiteJob \
-/opt/hudi-integ-test-bundle-0.10.0-SNAPSHOT.jar \
+/opt/hudi-integ-test-bundle-0.11.0-SNAPSHOT.jar \
 --source-ordering-field test_suite_source_ordering_field \
 --use-deltastreamer \
 --target-base-path /user/hive/warehouse/hudi-integ-test-suite/output \
@@ -467,7 +467,7 @@ spark-submit \
 --schemaprovider-class org.apache.hudi.integ.testsuite.schema.TestSuiteFileBasedSchemaProvider \
 --source-class org.apache.hudi.utilities.sources.AvroDFSSource \
 --input-file-size 125829120 \
---workload-yaml-path file:/opt/complex-dag-cow.yaml \
+--workload-yaml-path file:/opt/simple-deltastreamer.yaml \
 --workload-generator-classname org.apache.hudi.integ.testsuite.dag.WorkflowDagGenerator \
 --table-type COPY_ON_WRITE \
 --compact-scheduling-minshare 1 \
@@ -486,8 +486,8 @@ If you wish to enable metrics add below properties as well
 Few ready to use dags are available under docker/demo/config/test-suite/ that could give you an idea for long running 
 dags.
 ```
-complex-dag-cow.yaml: simple 1 round dag for COW table.
-complex-dag-mor.yaml: simple 1 round dag for MOR table.
+simple-deltastreamer.yaml: simple 1 round dag for COW table.
+simple-deltastreamer.yaml: simple 1 round dag for MOR table.
 cow-clustering-example.yaml : dag with 3 rounds, in which inline clustering will trigger during 2nd iteration. 
 cow-long-running-example.yaml : long running dag with 50 iterations. only 1 partition is used. 
 cow-long-running-multi-partitions.yaml: long running dag wit 50 iterations with multiple partitions.
diff --git a/hudi-integ-test/pom.xml b/hudi-integ-test/pom.xml
index 7ca976f9f80a6..08affb5e48dee 100644
--- a/hudi-integ-test/pom.xml
+++ b/hudi-integ-test/pom.xml
@@ -363,7 +363,6 @@
     <dependency>
       <groupId>org.awaitility</groupId>
       <artifactId>awaitility</artifactId>
-      <version>3.1.2</version>
       <scope>test</scope>
     </dependency>
 
diff --git a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/HoodieTestSuiteWriter.java b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/HoodieTestSuiteWriter.java
index 41ef3f4ab968c..a98c7f2aec3f0 100644
--- a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/HoodieTestSuiteWriter.java
+++ b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/HoodieTestSuiteWriter.java
@@ -18,16 +18,15 @@
 
 package org.apache.hudi.integ.testsuite;
 
-import java.io.IOException;
-import java.io.Serializable;
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
 import org.apache.hudi.avro.model.HoodieCompactionPlan;
 import org.apache.hudi.client.HoodieReadClient;
 import org.apache.hudi.client.SparkRDDWriteClient;
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.client.common.HoodieSparkEngineContext;
+import org.apache.hudi.common.model.HoodieAvroRecord;
+import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieRecord;
+import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.model.WriteOperationType;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
 import org.apache.hudi.common.util.Option;
@@ -36,6 +35,7 @@
 import org.apache.hudi.config.HoodieIndexConfig;
 import org.apache.hudi.config.HoodiePayloadConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.data.HoodieJavaRDD;
 import org.apache.hudi.index.HoodieIndex;
 import org.apache.hudi.integ.testsuite.HoodieTestSuiteJob.HoodieTestSuiteConfig;
 import org.apache.hudi.integ.testsuite.dag.nodes.CleanNode;
@@ -43,8 +43,13 @@
 import org.apache.hudi.integ.testsuite.dag.nodes.RollbackNode;
 import org.apache.hudi.integ.testsuite.dag.nodes.ScheduleCompactNode;
 import org.apache.hudi.integ.testsuite.writer.DeltaWriteStats;
+import org.apache.hudi.table.HoodieSparkTable;
+import org.apache.hudi.table.action.HoodieWriteMetadata;
+import org.apache.hudi.table.action.compact.CompactHelpers;
 import org.apache.hudi.utilities.schema.SchemaProvider;
 
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericRecord;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
@@ -52,6 +57,8 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.IOException;
+import java.io.Serializable;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -128,7 +135,7 @@ public RDD<GenericRecord> getNextBatch() throws Exception {
     Pair<SchemaProvider, Pair<String, JavaRDD<HoodieRecord>>> nextBatch = fetchSource();
     lastCheckpoint = Option.of(nextBatch.getValue().getLeft());
     JavaRDD<HoodieRecord> inputRDD = nextBatch.getRight().getRight();
-    return inputRDD.map(r -> (GenericRecord) r.getData()
+    return inputRDD.map(r -> (GenericRecord) ((HoodieAvroRecord) r).getData()
         .getInsertValue(new Schema.Parser().parse(schema)).get()).rdd();
   }
 
@@ -214,7 +221,8 @@ public JavaRDD<WriteStatus> compact(Option<String> instantTime) throws Exception
         }
       }
       if (instantTime.isPresent()) {
-        return (JavaRDD<WriteStatus>) writeClient.compact(instantTime.get());
+        HoodieWriteMetadata<JavaRDD<WriteStatus>> compactionMetadata = writeClient.compact(instantTime.get());
+        return compactionMetadata.getWriteStatuses();
       } else {
         return null;
       }
@@ -271,7 +279,9 @@ public void commitCompaction(JavaRDD<WriteStatus> records, JavaRDD<DeltaWriteSta
         // Just stores the path where this batch of data is generated to
         extraMetadata.put(GENERATED_DATA_PATH, generatedDataStats.map(s -> s.getFilePath()).collect().get(0));
       }
-      writeClient.commitCompaction(instantTime.get(), records, Option.of(extraMetadata));
+      HoodieSparkTable<HoodieRecordPayload> table = HoodieSparkTable.create(writeClient.getConfig(), writeClient.getEngineContext());
+      HoodieCommitMetadata metadata = CompactHelpers.getInstance().createCompactionMetadata(table, instantTime.get(), HoodieJavaRDD.of(records), writeClient.getConfig().getSchema());
+      writeClient.commitCompaction(instantTime.get(), metadata, Option.of(extraMetadata));
     }
   }
 
diff --git a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/configuration/DeltaConfig.java b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/configuration/DeltaConfig.java
index b0ae06b6039d4..2c39f5f93a52c 100644
--- a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/configuration/DeltaConfig.java
+++ b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/configuration/DeltaConfig.java
@@ -95,6 +95,9 @@ public static class Config {
     private static String SCHEMA_VERSION = "schema_version";
     private static String NUM_ROLLBACKS = "num_rollbacks";
     private static String ENABLE_ROW_WRITING = "enable_row_writing";
+    private static String ENABLE_METADATA_VALIDATE = "enable_metadata_validate";
+    private static String VALIDATE_FULL_DATA = "validate_full_data";
+    private static String DELETE_INPUT_DATA_EXCEPT_LATEST = "delete_input_data_except_latest";
 
     // Spark SQL Create Table
     private static String TABLE_TYPE = "table_type";
@@ -149,6 +152,10 @@ public int getRecordSize() {
       return Integer.valueOf(configsMap.getOrDefault(RECORD_SIZE, 1024).toString());
     }
 
+    public boolean isEnableMetadataValidate() {
+      return Boolean.valueOf(configsMap.getOrDefault(ENABLE_METADATA_VALIDATE, false).toString());
+    }
+
     public int getNumInsertPartitions() {
       return Integer.valueOf(configsMap.getOrDefault(NUM_PARTITIONS_INSERT, 1).toString());
     }
@@ -201,10 +208,18 @@ public boolean isDeleteInputData() {
       return Boolean.valueOf(configsMap.getOrDefault(DELETE_INPUT_DATA, false).toString());
     }
 
+    public boolean isDeleteInputDataExceptLatest() {
+      return Boolean.valueOf(configsMap.getOrDefault(DELETE_INPUT_DATA_EXCEPT_LATEST, false).toString());
+    }
+
     public boolean isValidateHive() {
       return Boolean.valueOf(configsMap.getOrDefault(VALIDATE_HIVE, false).toString());
     }
 
+    public boolean isValidateFullData() {
+      return Boolean.valueOf(configsMap.getOrDefault(VALIDATE_FULL_DATA, false).toString());
+    }
+
     public int getIterationCountToExecute() {
       return Integer.valueOf(configsMap.getOrDefault(EXECUTE_ITR_COUNT, -1).toString());
     }
diff --git a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/BaseValidateDatasetNode.java b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/BaseValidateDatasetNode.java
index 1ae6d948f3e43..09d44d986e183 100644
--- a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/BaseValidateDatasetNode.java
+++ b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/BaseValidateDatasetNode.java
@@ -19,16 +19,14 @@
 
 package org.apache.hudi.integ.testsuite.dag.nodes;
 
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-
 import org.apache.hudi.DataSourceWriteOptions;
-import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.integ.testsuite.configuration.DeltaConfig;
 import org.apache.hudi.integ.testsuite.dag.ExecutionContext;
 import org.apache.hudi.integ.testsuite.schema.SchemaUtils;
 
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.api.java.function.ReduceFunction;
 import org.apache.spark.sql.Dataset;
@@ -42,13 +40,13 @@
 import org.apache.spark.sql.types.StructType;
 import org.slf4j.Logger;
 
+import java.util.List;
+import java.util.stream.Collectors;
+
 import scala.Tuple2;
 import scala.collection.JavaConversions;
 import scala.collection.JavaConverters;
 
-import java.util.List;
-import java.util.stream.Collectors;
-
 /**
  * This nodes validates contents from input path are in tact with Hudi. By default no configs are required for this node. But there is an
  * optional config "delete_input_data" that you can set for this node. If set, once validation completes, contents from inputPath are deleted. This will come in handy for long running test suites.
@@ -78,6 +76,7 @@ public abstract Dataset<Row> getDatasetToValidate(SparkSession session, Executio
   public void execute(ExecutionContext context, int curItrCount) throws Exception {
 
     SparkSession session = SparkSession.builder().sparkContext(context.getJsc().sc()).getOrCreate();
+
     // todo: Fix partitioning schemes. For now, assumes data based partitioning.
     String inputPath = context.getHoodieTestSuiteWriter().getCfg().inputBasePath + "/*/*";
     log.warn("Validation using data from input path " + inputPath);
@@ -97,43 +96,60 @@ public void execute(ExecutionContext context, int curItrCount) throws Exception
 
     // read from hudi and remove meta columns.
     Dataset<Row> trimmedHudiDf = getDatasetToValidate(session, context, inputSnapshotDf.schema());
-    Dataset<Row> intersectionDf = inputSnapshotDf.intersect(trimmedHudiDf);
-    long inputCount = inputSnapshotDf.count();
-    long outputCount = trimmedHudiDf.count();
-    log.debug("Input count: " + inputCount + "; output count: " + outputCount);
-    // the intersected df should be same as inputDf. if not, there is some mismatch.
-    if (outputCount == 0 || inputCount == 0 || inputSnapshotDf.except(intersectionDf).count() != 0) {
-      log.error("Data set validation failed. Total count in hudi " + outputCount + ", input df count " + inputCount);
-      throw new AssertionError("Hudi contents does not match contents input data. ");
-    }
-
-    if (config.isValidateHive()) {
-      String database = context.getWriterContext().getProps().getString(DataSourceWriteOptions.HIVE_DATABASE().key());
-      String tableName = context.getWriterContext().getProps().getString(DataSourceWriteOptions.HIVE_TABLE().key());
-      log.warn("Validating hive table with db : " + database + " and table : " + tableName);
-      Dataset<Row> cowDf = session.sql("SELECT * FROM " + database + "." + tableName);
-      Dataset<Row> trimmedCowDf = cowDf.drop(HoodieRecord.COMMIT_TIME_METADATA_FIELD).drop(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD).drop(HoodieRecord.RECORD_KEY_METADATA_FIELD)
-          .drop(HoodieRecord.PARTITION_PATH_METADATA_FIELD).drop(HoodieRecord.FILENAME_METADATA_FIELD);
-      intersectionDf = inputSnapshotDf.intersect(trimmedCowDf);
-      outputCount = trimmedHudiDf.count();
-      log.warn("Input count: " + inputCount + "; output count: " + outputCount);
+    if (config.isValidateFullData()) {
+      log.debug("Validating full dataset");
+      Dataset<Row> exceptInputDf = inputSnapshotDf.except(trimmedHudiDf);
+      Dataset<Row> exceptHudiDf = trimmedHudiDf.except(inputSnapshotDf);
+      long exceptInputCount = exceptInputDf.count();
+      long exceptHudiCount = exceptHudiDf.count();
+      log.debug("Except input df count " + exceptInputDf + ", except hudi count " + exceptHudiCount);
+      if (exceptInputCount != 0 || exceptHudiCount != 0) {
+        log.error("Data set validation failed. Total count in hudi " + trimmedHudiDf.count() + ", input df count " + inputSnapshotDf.count()
+            + ". InputDf except hudi df = " + exceptInputCount + ", Hudi df except Input df " + exceptHudiCount);
+        throw new AssertionError("Hudi contents does not match contents input data. ");
+      }
+    } else {
+      Dataset<Row> intersectionDf = inputSnapshotDf.intersect(trimmedHudiDf);
+      long inputCount = inputSnapshotDf.count();
+      long outputCount = trimmedHudiDf.count();
+      log.debug("Input count: " + inputCount + "; output count: " + outputCount);
       // the intersected df should be same as inputDf. if not, there is some mismatch.
-      if (outputCount == 0 || inputSnapshotDf.except(intersectionDf).count() != 0) {
-        log.error("Data set validation failed for COW hive table. Total count in hudi " + outputCount + ", input df count " + inputCount);
-        throw new AssertionError("Hudi hive table contents does not match contents input data. ");
+      if (outputCount == 0 || inputCount == 0 || inputSnapshotDf.except(intersectionDf).count() != 0) {
+        log.error("Data set validation failed. Total count in hudi " + outputCount + ", input df count " + inputCount);
+        throw new AssertionError("Hudi contents does not match contents input data. ");
       }
-    }
 
-    // if delete input data is enabled, erase input data.
-    if (config.isDeleteInputData()) {
-      // clean up input data for current group of writes.
-      inputPathStr = context.getHoodieTestSuiteWriter().getCfg().inputBasePath;
-      FileSystem fs = new Path(inputPathStr)
-          .getFileSystem(context.getHoodieTestSuiteWriter().getConfiguration());
-      FileStatus[] fileStatuses = fs.listStatus(new Path(inputPathStr));
-      for (FileStatus fileStatus : fileStatuses) {
-        log.debug("Micro batch to be deleted " + fileStatus.getPath().toString());
-        fs.delete(fileStatus.getPath(), true);
+      if (config.isValidateHive()) {
+        String database = context.getWriterContext().getProps().getString(DataSourceWriteOptions.HIVE_DATABASE().key());
+        String tableName = context.getWriterContext().getProps().getString(DataSourceWriteOptions.HIVE_TABLE().key());
+        log.warn("Validating hive table with db : " + database + " and table : " + tableName);
+        session.sql("REFRESH TABLE " + database + "." + tableName);
+        Dataset<Row> cowDf = session.sql("SELECT _row_key, rider, driver, begin_lat, begin_lon, end_lat, end_lon, fare, _hoodie_is_deleted, " +
+            "test_suite_source_ordering_field FROM " + database + "." + tableName);
+        Dataset<Row> reorderedInputDf = inputSnapshotDf.select("_row_key", "rider", "driver", "begin_lat", "begin_lon", "end_lat", "end_lon", "fare",
+            "_hoodie_is_deleted", "test_suite_source_ordering_field");
+
+        Dataset<Row> intersectedHiveDf = reorderedInputDf.intersect(cowDf);
+        outputCount = trimmedHudiDf.count();
+        log.warn("Input count: " + inputCount + "; output count: " + outputCount);
+        // the intersected df should be same as inputDf. if not, there is some mismatch.
+        if (outputCount == 0 || reorderedInputDf.except(intersectedHiveDf).count() != 0) {
+          log.error("Data set validation failed for COW hive table. Total count in hudi " + outputCount + ", input df count " + inputCount);
+          throw new AssertionError("Hudi hive table contents does not match contents input data. ");
+        }
+      }
+
+      // if delete input data is enabled, erase input data.
+      if (config.isDeleteInputData()) {
+        // clean up input data for current group of writes.
+        inputPathStr = context.getHoodieTestSuiteWriter().getCfg().inputBasePath;
+        FileSystem fs = new Path(inputPathStr)
+            .getFileSystem(context.getHoodieTestSuiteWriter().getConfiguration());
+        FileStatus[] fileStatuses = fs.listStatus(new Path(inputPathStr));
+        for (FileStatus fileStatus : fileStatuses) {
+          log.debug("Micro batch to be deleted " + fileStatus.getPath().toString());
+          fs.delete(fileStatus.getPath(), true);
+        }
       }
     }
   }
@@ -146,8 +162,8 @@ private Dataset<Row> getInputDf(ExecutionContext context, SparkSession session,
     Dataset<Row> inputDf = session.read().format("avro").load(inputPath);
     ExpressionEncoder encoder = getEncoder(inputDf.schema());
     return inputDf.groupByKey(
-        (MapFunction<Row, String>) value ->
-            value.getAs(partitionPathField) + "+" + value.getAs(recordKeyField), Encoders.STRING())
+            (MapFunction<Row, String>) value ->
+                value.getAs(partitionPathField) + "+" + value.getAs(recordKeyField), Encoders.STRING())
         .reduceGroups((ReduceFunction<Row>) (v1, v2) -> {
           int ts1 = v1.getAs(SchemaUtils.SOURCE_ORDERING_FIELD);
           int ts2 = v2.getAs(SchemaUtils.SOURCE_ORDERING_FIELD);
diff --git a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/DeleteInputDatasetNode.java b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/DeleteInputDatasetNode.java
new file mode 100644
index 0000000000000..2836f240ead3c
--- /dev/null
+++ b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/DeleteInputDatasetNode.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.integ.testsuite.dag.nodes;
+
+import org.apache.hudi.integ.testsuite.configuration.DeltaConfig;
+import org.apache.hudi.integ.testsuite.dag.ExecutionContext;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+/**
+ * Deletes all input except latest batch. Mostly used in insert_overwrite operations.
+ */
+public class DeleteInputDatasetNode extends DagNode<Boolean> {
+
+  public DeleteInputDatasetNode(DeltaConfig.Config config) {
+    this.config = config;
+  }
+
+  @Override
+  public void execute(ExecutionContext context, int curItrCount) throws Exception {
+
+    String latestBatch = String.valueOf(context.getWriterContext().getDeltaGenerator().getBatchId());
+
+    if (config.isDeleteInputDataExceptLatest()) {
+      String inputPathStr = context.getHoodieTestSuiteWriter().getCfg().inputBasePath;
+      FileSystem fs = new Path(inputPathStr)
+          .getFileSystem(context.getHoodieTestSuiteWriter().getConfiguration());
+      FileStatus[] fileStatuses = fs.listStatus(new Path(inputPathStr));
+      for (FileStatus fileStatus : fileStatuses) {
+        if (!fileStatus.getPath().getName().equals(latestBatch)) {
+          log.debug("Micro batch to be deleted " + fileStatus.getPath().toString());
+          fs.delete(fileStatus.getPath(), true);
+        }
+      }
+    }
+  }
+}
diff --git a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/ValidateDatasetNode.java b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/ValidateDatasetNode.java
index 03b37a9fc2b39..cc293ea470164 100644
--- a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/ValidateDatasetNode.java
+++ b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/dag/nodes/ValidateDatasetNode.java
@@ -18,6 +18,7 @@
 
 package org.apache.hudi.integ.testsuite.dag.nodes;
 
+import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.integ.testsuite.configuration.DeltaConfig.Config;
 import org.apache.hudi.integ.testsuite.dag.ExecutionContext;
@@ -49,7 +50,8 @@ public Dataset<Row> getDatasetToValidate(SparkSession session, ExecutionContext
                                            StructType inputSchema) {
     String hudiPath = context.getHoodieTestSuiteWriter().getCfg().targetBasePath + "/*/*/*";
     log.info("Validate data in target hudi path " + hudiPath);
-    Dataset<Row> hudiDf = session.read().format("hudi").load(hudiPath);
+    Dataset<Row> hudiDf = session.read().option(HoodieMetadataConfig.ENABLE.key(), String.valueOf(config.isEnableMetadataValidate()))
+        .format("hudi").load(hudiPath);
     return hudiDf.drop(HoodieRecord.COMMIT_TIME_METADATA_FIELD).drop(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD).drop(HoodieRecord.RECORD_KEY_METADATA_FIELD)
             .drop(HoodieRecord.PARTITION_PATH_METADATA_FIELD).drop(HoodieRecord.FILENAME_METADATA_FIELD);
   }
diff --git a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/generator/DeltaGenerator.java b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/generator/DeltaGenerator.java
index 6d5bc4ffedeca..69e32dfbc1182 100644
--- a/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/generator/DeltaGenerator.java
+++ b/hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/generator/DeltaGenerator.java
@@ -110,6 +110,10 @@ public JavaRDD<DeltaWriteStats> writeRecords(JavaRDD<GenericRecord> records) {
     return ws;
   }
 
+  public int getBatchId() {
+    return batchId;
+  }
+
   public JavaRDD<GenericRecord> generateInserts(Config operation) {
     int numPartitions = operation.getNumInsertPartitions();
     long recordsPerPartition = operation.getNumRecordsInsert();
diff --git a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkInsertNode.scala b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkInsertNode.scala
index 1b69cf8faf494..b8c46cad3fd69 100644
--- a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkInsertNode.scala
+++ b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkInsertNode.scala
@@ -54,13 +54,18 @@ class SparkInsertNode(dagNodeConfig: Config) extends DagNode[RDD[WriteStatus]] {
       context.getWriterContext.getSparkSession)
     inputDF.write.format("hudi")
       .options(DataSourceWriteOptions.translateSqlOptions(context.getWriterContext.getProps.asScala.toMap))
+      .option(DataSourceWriteOptions.PRECOMBINE_FIELD.key(), "test_suite_source_ordering_field")
       .option(DataSourceWriteOptions.TABLE_NAME.key, context.getHoodieTestSuiteWriter.getCfg.targetTableName)
       .option(DataSourceWriteOptions.TABLE_TYPE.key, context.getHoodieTestSuiteWriter.getCfg.tableType)
-      .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
+      .option(DataSourceWriteOptions.OPERATION.key, getOperation())
       .option(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX.key, "deltastreamer.checkpoint.key")
       .option("deltastreamer.checkpoint.key", context.getWriterContext.getHoodieTestSuiteWriter.getLastCheckpoint.orElse(""))
       .option(HoodieWriteConfig.TBL_NAME.key, context.getHoodieTestSuiteWriter.getCfg.targetTableName)
-      .mode(SaveMode.Overwrite)
+      .mode(SaveMode.Append)
       .save(context.getHoodieTestSuiteWriter.getWriteConfig.getBasePath)
   }
+
+  def getOperation(): String = {
+    DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL
+  }
 }
diff --git a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkInsertOverwriteNode.scala b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkInsertOverwriteNode.scala
new file mode 100644
index 0000000000000..6dd2eac522974
--- /dev/null
+++ b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkInsertOverwriteNode.scala
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.integ.testsuite.dag.nodes
+
+import org.apache.hudi.DataSourceWriteOptions
+import org.apache.hudi.integ.testsuite.configuration.DeltaConfig.Config
+
+class SparkInsertOverwriteNode(dagNodeConfig: Config) extends SparkInsertNode(dagNodeConfig) {
+
+  override def getOperation(): String = {
+    DataSourceWriteOptions.INSERT_OVERWRITE_OPERATION_OPT_VAL
+  }
+
+}
diff --git a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkInsertOverwriteTableNode.scala b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkInsertOverwriteTableNode.scala
new file mode 100644
index 0000000000000..a6b80b3a90cc1
--- /dev/null
+++ b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkInsertOverwriteTableNode.scala
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.integ.testsuite.dag.nodes
+
+import org.apache.hudi.DataSourceWriteOptions
+import org.apache.hudi.integ.testsuite.configuration.DeltaConfig.Config
+
+class SparkInsertOverwriteTableNode(dagNodeConfig: Config) extends SparkInsertNode(dagNodeConfig) {
+
+  override def getOperation(): String = {
+    DataSourceWriteOptions.INSERT_OVERWRITE_TABLE_OPERATION_OPT_VAL
+  }
+}
diff --git a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkUpsertNode.scala b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkUpsertNode.scala
index 858827a7b2c47..113de93adbb3a 100644
--- a/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkUpsertNode.scala
+++ b/hudi-integ-test/src/main/scala/org/apache/hudi/integ/testsuite/dag/nodes/SparkUpsertNode.scala
@@ -18,49 +18,17 @@
 
 package org.apache.hudi.integ.testsuite.dag.nodes
 
-import org.apache.hudi.client.WriteStatus
-import org.apache.hudi.config.HoodieWriteConfig
+import org.apache.hudi.DataSourceWriteOptions
 import org.apache.hudi.integ.testsuite.configuration.DeltaConfig.Config
-import org.apache.hudi.integ.testsuite.dag.ExecutionContext
-import org.apache.hudi.{AvroConversionUtils, DataSourceWriteOptions}
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.SaveMode
-
-import scala.collection.JavaConverters._
 
 /**
  * Spark datasource based upsert node
  *
  * @param dagNodeConfig DAG node configurations.
  */
-class SparkUpsertNode(dagNodeConfig: Config) extends DagNode[RDD[WriteStatus]] {
-
-  config = dagNodeConfig
+class SparkUpsertNode(dagNodeConfig: Config) extends SparkInsertNode(dagNodeConfig) {
 
-  /**
-   * Execute the {@link DagNode}.
-   *
-   * @param context     The context needed for an execution of a node.
-   * @param curItrCount iteration count for executing the node.
-   * @throws Exception Thrown if the execution failed.
-   */
-  override def execute(context: ExecutionContext, curItrCount: Int): Unit = {
-    if (!config.isDisableGenerate) {
-      println("Generating input data for node {}", this.getName)
-      context.getDeltaGenerator().writeRecords(context.getDeltaGenerator().generateInserts(config)).count()
-    }
-    val inputDF = AvroConversionUtils.createDataFrame(context.getWriterContext.getHoodieTestSuiteWriter.getNextBatch,
-      context.getWriterContext.getHoodieTestSuiteWriter.getSchema,
-      context.getWriterContext.getSparkSession)
-    inputDF.write.format("hudi")
-      .options(DataSourceWriteOptions.translateSqlOptions(context.getWriterContext.getProps.asScala.toMap))
-      .option(DataSourceWriteOptions.TABLE_NAME.key, context.getHoodieTestSuiteWriter.getCfg.targetTableName)
-      .option(DataSourceWriteOptions.TABLE_TYPE.key, context.getHoodieTestSuiteWriter.getCfg.tableType)
-      .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
-      .option(DataSourceWriteOptions.COMMIT_METADATA_KEYPREFIX.key, "deltastreamer.checkpoint.key")
-      .option("deltastreamer.checkpoint.key", context.getWriterContext.getHoodieTestSuiteWriter.getLastCheckpoint.orElse(""))
-      .option(HoodieWriteConfig.TBL_NAME.key, context.getHoodieTestSuiteWriter.getCfg.targetTableName)
-      .mode(SaveMode.Append)
-      .save(context.getHoodieTestSuiteWriter.getWriteConfig.getBasePath)
+  override def getOperation(): String = {
+    DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL
   }
 }
diff --git a/hudi-kafka-connect/pom.xml b/hudi-kafka-connect/pom.xml
index b12468579161e..8845bfb801ae3 100644
--- a/hudi-kafka-connect/pom.xml
+++ b/hudi-kafka-connect/pom.xml
@@ -66,7 +66,7 @@
             <plugin>
                 <groupId>com.github.os72</groupId>
                 <artifactId>protoc-jar-maven-plugin</artifactId>
-                <version>3.1.0.1</version>
+                <version>3.11.4</version>
                 <executions>
                     <execution>
                         <phase>generate-sources</phase>
diff --git a/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/writers/AbstractConnectWriter.java b/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/writers/AbstractConnectWriter.java
index a579484f67369..649150d16c828 100644
--- a/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/writers/AbstractConnectWriter.java
+++ b/hudi-kafka-connect/src/main/java/org/apache/hudi/connect/writers/AbstractConnectWriter.java
@@ -20,6 +20,7 @@
 
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.common.model.HoodieAvroPayload;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordLocation;
 import org.apache.hudi.common.util.Option;
@@ -81,7 +82,7 @@ public void writeRecord(SinkRecord record) throws IOException {
     }
 
     // Tag records with a file ID based on kafka partition and hudi partition.
-    HoodieRecord<?> hoodieRecord = new HoodieRecord<>(keyGenerator.getKey(avroRecord.get()), new HoodieAvroPayload(avroRecord));
+    HoodieRecord<?> hoodieRecord = new HoodieAvroRecord<>(keyGenerator.getKey(avroRecord.get()), new HoodieAvroPayload(avroRecord));
     String fileId = KafkaConnectUtils.hashDigest(String.format("%s-%s", record.kafkaPartition(), hoodieRecord.getPartitionPath()));
     hoodieRecord.unseal();
     hoodieRecord.setCurrentLocation(new HoodieRecordLocation(instantTime, fileId));
diff --git a/hudi-kafka-connect/src/test/java/org/apache/hudi/writers/TestAbstractConnectWriter.java b/hudi-kafka-connect/src/test/java/org/apache/hudi/writers/TestAbstractConnectWriter.java
index c8a3ad6ffd92e..7a286e565ea34 100644
--- a/hudi-kafka-connect/src/test/java/org/apache/hudi/writers/TestAbstractConnectWriter.java
+++ b/hudi-kafka-connect/src/test/java/org/apache/hudi/writers/TestAbstractConnectWriter.java
@@ -22,6 +22,7 @@
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.model.HoodieAvroPayload;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.testutils.SchemaTestUtil;
@@ -168,7 +169,7 @@ protected List<WriteStatus> flushRecords() {
   }
 
   private static HoodieRecord convertToHoodieRecords(IndexedRecord iRecord, String key, String partitionPath) {
-    return new HoodieRecord<>(new HoodieKey(key, partitionPath),
+    return new HoodieAvroRecord<>(new HoodieKey(key, partitionPath),
         new HoodieAvroPayload(Option.of((GenericRecord) iRecord)));
   }
 
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java b/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java
index a632b5a4e9096..bad6c2d7219e1 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/DataSourceUtils.java
@@ -18,14 +18,12 @@
 
 package org.apache.hudi;
 
-import org.apache.avro.generic.GenericRecord;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
 import org.apache.hudi.client.HoodieReadClient;
 import org.apache.hudi.client.HoodieWriteResult;
 import org.apache.hudi.client.SparkRDDWriteClient;
 import org.apache.hudi.client.common.HoodieSparkEngineContext;
 import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
@@ -47,12 +45,17 @@
 import org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor;
 import org.apache.hudi.table.BulkInsertPartitioner;
 import org.apache.hudi.util.DataTypeUtils;
+
+import org.apache.avro.generic.GenericRecord;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
+import org.apache.spark.sql.hive.HiveExternalCatalog;
 import org.apache.spark.sql.types.StructType;
 
 import java.io.IOException;
@@ -235,13 +238,13 @@ public static HoodieWriteResult doDeletePartitionsOperation(SparkRDDWriteClient
   public static HoodieRecord createHoodieRecord(GenericRecord gr, Comparable orderingVal, HoodieKey hKey,
       String payloadClass) throws IOException {
     HoodieRecordPayload payload = DataSourceUtils.createPayload(payloadClass, gr, orderingVal);
-    return new HoodieRecord<>(hKey, payload);
+    return new HoodieAvroRecord<>(hKey, payload);
   }
 
   public static HoodieRecord createHoodieRecord(GenericRecord gr, HoodieKey hKey,
                                                 String payloadClass) throws IOException {
     HoodieRecordPayload payload = DataSourceUtils.createPayload(payloadClass, gr);
-    return new HoodieRecord<>(hKey, payload);
+    return new HoodieAvroRecord<>(hKey, payload);
   }
 
   /**
@@ -290,6 +293,8 @@ public static HiveSyncConfig buildHiveSyncConfig(TypedProperties props, String b
         props.getString(DataSourceWriteOptions.HIVE_PASS().key(), DataSourceWriteOptions.HIVE_PASS().defaultValue());
     hiveSyncConfig.jdbcUrl =
         props.getString(DataSourceWriteOptions.HIVE_URL().key(), DataSourceWriteOptions.HIVE_URL().defaultValue());
+    hiveSyncConfig.metastoreUris =
+            props.getString(DataSourceWriteOptions.METASTORE_URIS().key(), DataSourceWriteOptions.METASTORE_URIS().defaultValue());
     hiveSyncConfig.partitionFields =
         props.getStringList(DataSourceWriteOptions.HIVE_PARTITION_FIELDS().key(), ",", new ArrayList<>());
     hiveSyncConfig.partitionValueExtractorClass =
@@ -314,6 +319,9 @@ public static HiveSyncConfig buildHiveSyncConfig(TypedProperties props, String b
         (boolean) DataSourceWriteOptions.HIVE_SYNC_BUCKET_SYNC().defaultValue())
         ? HiveSyncConfig.getBucketSpec(props.getString(HoodieIndexConfig.BUCKET_INDEX_HASH_FIELD.key()),
             props.getInteger(HoodieIndexConfig.BUCKET_INDEX_NUM_BUCKETS.key())) : null;
+    if (props.containsKey(HiveExternalCatalog.CREATED_SPARK_VERSION())) {
+      hiveSyncConfig.sparkVersion = props.getString(HiveExternalCatalog.CREATED_SPARK_VERSION());
+    }
     return hiveSyncConfig;
   }
 
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/SparkRowWriteHelper.java b/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/SparkRowWriteHelper.java
index 6f5dd3713d74f..ea9c9b2c03d93 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/SparkRowWriteHelper.java
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/SparkRowWriteHelper.java
@@ -19,7 +19,6 @@
 package org.apache.hudi;
 
 import org.apache.hudi.common.model.HoodieRecord;
-
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.api.java.function.ReduceFunction;
 import org.apache.spark.sql.Dataset;
@@ -30,14 +29,13 @@
 import org.apache.spark.sql.catalyst.encoders.RowEncoder;
 import org.apache.spark.sql.catalyst.expressions.Attribute;
 import org.apache.spark.sql.types.StructType;
-
-import java.util.List;
-import java.util.stream.Collectors;
-
 import scala.Tuple2;
 import scala.collection.JavaConversions;
 import scala.collection.JavaConverters;
 
+import java.util.List;
+import java.util.stream.Collectors;
+
 /**
  * Helper class to assist in deduplicating Rows for BulkInsert with Rows.
  */
@@ -55,20 +53,13 @@ public static SparkRowWriteHelper newInstance() {
   }
 
   public Dataset<Row> deduplicateRows(Dataset<Row> inputDf, String preCombineField, boolean isGlobalIndex) {
-    ExpressionEncoder encoder = getEncoder(inputDf.schema());
-
-    return inputDf.groupByKey(
-        (MapFunction<Row, String>) value ->
-            isGlobalIndex ? (value.getAs(HoodieRecord.RECORD_KEY_METADATA_FIELD)) :
-                (value.getAs(HoodieRecord.PARTITION_PATH_METADATA_FIELD) + "+" + value.getAs(HoodieRecord.RECORD_KEY_METADATA_FIELD)), Encoders.STRING())
-        .reduceGroups((ReduceFunction<Row>) (v1, v2) -> {
-          if (((Comparable) v1.getAs(preCombineField)).compareTo(((Comparable) v2.getAs(preCombineField))) >= 0) {
-            return v1;
-          } else {
-            return v2;
-          }
-            }
-        ).map((MapFunction<Tuple2<String, Row>, Row>) value -> value._2, encoder);
+    return inputDf.groupByKey((MapFunction<Row, String>) value ->
+            isGlobalIndex
+                ? (value.getAs(HoodieRecord.RECORD_KEY_METADATA_FIELD))
+                : (value.getAs(HoodieRecord.PARTITION_PATH_METADATA_FIELD) + "+" + value.getAs(HoodieRecord.RECORD_KEY_METADATA_FIELD)), Encoders.STRING())
+        .reduceGroups((ReduceFunction<Row>) (v1, v2) ->
+            ((Comparable) v1.getAs(preCombineField)).compareTo(v2.getAs(preCombineField)) >= 0 ? v1 : v2)
+        .map((MapFunction<Tuple2<String, Row>, Row>) value -> value._2, getEncoder(inputDf.schema()));
   }
 
   private ExpressionEncoder getEncoder(StructType schema) {
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/async/SparkStreamingAsyncClusteringService.java b/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/async/SparkStreamingAsyncClusteringService.java
index 81d880ee974df..f87e16a652900 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/async/SparkStreamingAsyncClusteringService.java
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/async/SparkStreamingAsyncClusteringService.java
@@ -19,8 +19,8 @@
 
 package org.apache.hudi.async;
 
-import org.apache.hudi.client.AbstractClusteringClient;
-import org.apache.hudi.client.AbstractHoodieWriteClient;
+import org.apache.hudi.client.BaseClusterer;
+import org.apache.hudi.client.BaseHoodieWriteClient;
 import org.apache.hudi.client.HoodieSparkClusteringClient;
 
 /**
@@ -31,12 +31,12 @@ public class SparkStreamingAsyncClusteringService extends AsyncClusteringService
 
   private static final long serialVersionUID = 1L;
 
-  public SparkStreamingAsyncClusteringService(AbstractHoodieWriteClient writeClient) {
+  public SparkStreamingAsyncClusteringService(BaseHoodieWriteClient writeClient) {
     super(writeClient, true);
   }
 
   @Override
-  protected AbstractClusteringClient createClusteringClient(AbstractHoodieWriteClient client) {
+  protected BaseClusterer createClusteringClient(BaseHoodieWriteClient client) {
     return new HoodieSparkClusteringClient(client);
   }
 }
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/async/SparkStreamingAsyncCompactService.java b/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/async/SparkStreamingAsyncCompactService.java
index 130ea7c27595a..2ff7b46c02018 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/async/SparkStreamingAsyncCompactService.java
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/java/org/apache/hudi/async/SparkStreamingAsyncCompactService.java
@@ -18,8 +18,8 @@
 
 package org.apache.hudi.async;
 
-import org.apache.hudi.client.AbstractCompactor;
-import org.apache.hudi.client.AbstractHoodieWriteClient;
+import org.apache.hudi.client.BaseCompactor;
+import org.apache.hudi.client.BaseHoodieWriteClient;
 import org.apache.hudi.client.HoodieSparkCompactor;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 
@@ -31,12 +31,12 @@ public class SparkStreamingAsyncCompactService extends AsyncCompactService {
 
   private static final long serialVersionUID = 1L;
 
-  public SparkStreamingAsyncCompactService(HoodieEngineContext context, AbstractHoodieWriteClient client) {
+  public SparkStreamingAsyncCompactService(HoodieEngineContext context, BaseHoodieWriteClient client) {
     super(context, client, true);
   }
 
   @Override
-  protected AbstractCompactor createCompactor(AbstractHoodieWriteClient client) {
+  protected BaseCompactor createCompactor(BaseHoodieWriteClient client) {
     return new HoodieSparkCompactor(client, this.context);
   }
 }
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/BaseFileOnlyViewRelation.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/BaseFileOnlyViewRelation.scala
new file mode 100644
index 0000000000000..8e94805328c69
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/BaseFileOnlyViewRelation.scala
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi
+
+import org.apache.hadoop.fs.Path
+
+import org.apache.hudi.common.table.HoodieTableMetaClient
+import org.apache.hudi.hadoop.HoodieROTablePathFilter
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Expression, Literal}
+import org.apache.spark.sql.execution.datasources.{FileStatusCache, PartitionedFile}
+import org.apache.spark.sql.{Row, SQLContext}
+import org.apache.spark.sql.sources.{BaseRelation, Filter}
+import org.apache.spark.sql.types.{BooleanType, StructType}
+
+/**
+ * The implement of [[BaseRelation]], which is used to respond to query that only touches the base files(Parquet),
+ * like query COW tables in Snapshot-Query and Read_Optimized mode and MOR tables in Read_Optimized mode.
+ */
+class BaseFileOnlyViewRelation(
+    sqlContext: SQLContext,
+    metaClient: HoodieTableMetaClient,
+    optParams: Map[String, String],
+    userSchema: Option[StructType],
+    globPaths: Seq[Path]
+  ) extends HoodieBaseRelation(sqlContext, metaClient, optParams, userSchema) with SparkAdapterSupport {
+
+  override def buildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[Row] = {
+    sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.enableVectorizedReader", "false")
+
+    val filterExpressions = HoodieSparkUtils.convertToCatalystExpressions(filters, tableStructSchema)
+      .getOrElse(Literal(true, BooleanType))
+    val (partitionFilters, dataFilters) = {
+      val splited = filters.map { filter =>
+        HoodieDataSourceHelper.splitPartitionAndDataPredicates(
+          sparkSession, filterExpressions, partitionColumns)
+      }
+      (splited.flatMap(_._1), splited.flatMap(_._2))
+    }
+    val partitionFiles = getPartitionFiles(partitionFilters, dataFilters)
+
+    val maxSplitBytes = sparkSession.sessionState.conf.filesMaxPartitionBytes
+    val filePartitions = sparkAdapter.getFilePartitions(sparkSession, partitionFiles, maxSplitBytes)
+
+    val requiredSchemaParquetReader = HoodieDataSourceHelper.buildHoodieParquetReader(
+      sparkSession = sparkSession,
+      dataSchema = tableStructSchema,
+      partitionSchema = StructType(Nil),
+      requiredSchema = tableStructSchema,
+      filters = filters,
+      options = optParams,
+      hadoopConf = sparkSession.sessionState.newHadoopConf()
+    )
+
+    new HoodieFileScanRDD(sparkSession, requiredColumns, tableStructSchema,
+      requiredSchemaParquetReader, filePartitions)
+  }
+
+  private def getPartitionFiles(partitionFilters: Seq[Expression], dataFilters: Seq[Expression]): Seq[PartitionedFile] = {
+    val partitionDirectories = if (globPaths.isEmpty) {
+      val hoodieFileIndex = HoodieFileIndex(sparkSession, metaClient, userSchema, optParams,
+        FileStatusCache.getOrCreate(sqlContext.sparkSession))
+      hoodieFileIndex.listFiles(partitionFilters, dataFilters)
+    } else {
+      sqlContext.sparkContext.hadoopConfiguration.setClass(
+        "mapreduce.input.pathFilter.class",
+        classOf[HoodieROTablePathFilter],
+        classOf[org.apache.hadoop.fs.PathFilter])
+
+      val inMemoryFileIndex = HoodieSparkUtils.createInMemoryFileIndex(sparkSession, globPaths)
+      inMemoryFileIndex.listFiles(partitionFilters, dataFilters)
+    }
+
+    val partitionFiles = partitionDirectories.flatMap { partition =>
+      partition.files.flatMap { file =>
+        HoodieDataSourceHelper.splitFiles(
+          sparkSession = sparkSession,
+          file = file,
+          partitionValues = partition.values
+        )
+      }
+    }
+
+    partitionFiles.map{ f =>
+      PartitionedFile(InternalRow.empty, f.filePath, f.start, f.length)
+    }
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala
index 1e1d887906c99..8a98657f242e2 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala
@@ -23,7 +23,7 @@ import org.apache.hudi.common.fs.ConsistencyGuardConfig
 import org.apache.hudi.common.model.{HoodieTableType, WriteOperationType}
 import org.apache.hudi.common.table.HoodieTableConfig
 import org.apache.hudi.common.util.Option
-import org.apache.hudi.config.HoodieWriteConfig
+import org.apache.hudi.config.{HoodieClusteringConfig, HoodieWriteConfig}
 import org.apache.hudi.hive.util.ConfigUtils
 import org.apache.hudi.hive.{HiveStylePartitionValueExtractor, HiveSyncTool, MultiPartKeysValueExtractor, NonPartitionedExtractor, SlashEncodedDayPartitionValueExtractor}
 import org.apache.hudi.keygen.constant.KeyGeneratorOptions
@@ -75,6 +75,7 @@ object DataSourceReadOptions {
   val ENABLE_HOODIE_FILE_INDEX: ConfigProperty[Boolean] = ConfigProperty
     .key("hoodie.file.index.enable")
     .defaultValue(true)
+    .deprecatedAfter("0.11.0")
     .withDocumentation("Enables use of the spark file index implementation for Hudi, "
       + "that speeds up listing of large tables.")
 
@@ -119,8 +120,13 @@ object DataSourceReadOptions {
     .key("hoodie.enable.data.skipping")
     .defaultValue(true)
     .sinceVersion("0.10.0")
-    .withDocumentation("enable data skipping to boost query after doing z-order optimize for current table")
+    .withDocumentation("Enables data-skipping allowing queries to leverage indexes to reduce the search space by " +
+      "skipping over files")
 
+  val INCREMENTAL_FALLBACK_TO_FULL_TABLE_SCAN_FOR_NON_EXISTING_FILES: ConfigProperty[String] = ConfigProperty
+    .key("hoodie.datasource.read.incr.fallback.fulltablescan.enable")
+    .defaultValue("false")
+    .withDocumentation("When doing an incremental query whether we should fall back to full table scans if file does not exist.")
   /** @deprecated Use {@link QUERY_TYPE} and its methods instead */
   @Deprecated
   val QUERY_TYPE_OPT_KEY = QUERY_TYPE.key()
@@ -422,6 +428,11 @@ object DataSourceWriteOptions {
   val HIVE_URL: ConfigProperty[String] = ConfigProperty
     .key("hoodie.datasource.hive_sync.jdbcurl")
     .defaultValue("jdbc:hive2://localhost:10000")
+    .withDocumentation("Hive jdbc url")
+
+  val METASTORE_URIS: ConfigProperty[String] = ConfigProperty
+    .key("hoodie.datasource.hive_sync.metastore.uris")
+    .defaultValue("thrift://localhost:9083")
     .withDocumentation("Hive metastore url")
 
   val hivePartitionFieldsInferFunc = DataSourceOptionsHelper.scalaFunctionToJavaFunction((p: HoodieConfig) => {
@@ -550,17 +561,9 @@ object DataSourceWriteOptions {
     .defaultValue("true")
     .withDocumentation("Controls whether async compaction should be turned on for MOR table writing.")
 
-  val INLINE_CLUSTERING_ENABLE: ConfigProperty[String] = ConfigProperty
-    .key("hoodie.datasource.clustering.inline.enable")
-    .defaultValue("false")
-    .sinceVersion("0.9.0")
-    .withDocumentation("Enable inline clustering. Disabled by default.")
+  val INLINE_CLUSTERING_ENABLE = HoodieClusteringConfig.INLINE_CLUSTERING
 
-  val ASYNC_CLUSTERING_ENABLE: ConfigProperty[String] = ConfigProperty
-    .key("hoodie.datasource.clustering.async.enable")
-    .defaultValue("false")
-    .sinceVersion("0.9.0")
-    .withDocumentation("Enable asynchronous clustering. Disabled by default.")
+  val ASYNC_CLUSTERING_ENABLE = HoodieClusteringConfig.ASYNC_CLUSTERING_ENABLE
 
   val KAFKA_AVRO_VALUE_DESERIALIZER_CLASS: ConfigProperty[String] = ConfigProperty
     .key("hoodie.deltastreamer.source.kafka.value.deserializer.class")
@@ -632,10 +635,10 @@ object DataSourceWriteOptions {
   @Deprecated
   val HIVE_PARTITION_EXTRACTOR_CLASS_OPT_KEY = HIVE_PARTITION_EXTRACTOR_CLASS.key()
 
-  /** @deprecated Use {@link KEYGENERATOR_CLASS} and its methods instead */
+  /** @deprecated Use {@link KEYGENERATOR_CLASS_NAME} and its methods instead */
   @Deprecated
   val DEFAULT_KEYGENERATOR_CLASS_OPT_VAL = KEYGENERATOR_CLASS_NAME.defaultValue()
-  /** @deprecated Use {@link KEYGENERATOR_CLASS} and its methods instead */
+  /** @deprecated Use {@link KEYGENERATOR_CLASS_NAME} and its methods instead */
   @Deprecated
   val KEYGENERATOR_CLASS_OPT_KEY = HoodieWriteConfig.KEYGENERATOR_CLASS_NAME.key()
   /** @deprecated Use {@link ENABLE_ROW_WRITER} and its methods instead */
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala
index 4c9e585c363e5..1508babcbba97 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala
@@ -85,20 +85,15 @@ class DefaultSource extends RelationProvider
     val allPaths = path.map(p => Seq(p)).getOrElse(Seq()) ++ readPaths
 
     val fs = FSUtils.getFs(allPaths.head, sqlContext.sparkContext.hadoopConfiguration)
-    // Use the HoodieFileIndex only if the 'path' is not globbed.
-    // Or else we use the original way to read hoodie table.
-    val enableFileIndex = optParams.get(ENABLE_HOODIE_FILE_INDEX.key)
-      .map(_.toBoolean).getOrElse(ENABLE_HOODIE_FILE_INDEX.defaultValue)
-    val useHoodieFileIndex = enableFileIndex && path.isDefined && !path.get.contains("*") &&
-      !parameters.contains(DataSourceReadOptions.READ_PATHS.key)
-    val globPaths = if (useHoodieFileIndex) {
-      None
+
+    val globPaths = if (path.exists(_.contains("*")) || readPaths.nonEmpty) {
+      HoodieSparkUtils.checkAndGlobPathIfNecessary(allPaths, fs)
     } else {
-      Some(HoodieSparkUtils.checkAndGlobPathIfNecessary(allPaths, fs))
+      Seq.empty
     }
     // Get the table base path
-    val tablePath = if (globPaths.isDefined) {
-      DataSourceUtils.getTablePath(fs, globPaths.get.toArray)
+    val tablePath = if (globPaths.nonEmpty) {
+      DataSourceUtils.getTablePath(fs, globPaths.toArray)
     } else {
       DataSourceUtils.getTablePath(fs, Array(new Path(path.get)))
     }
@@ -108,6 +103,7 @@ class DefaultSource extends RelationProvider
     val isBootstrappedTable = metaClient.getTableConfig.getBootstrapBasePath.isPresent
     val tableType = metaClient.getTableType
     val queryType = parameters(QUERY_TYPE.key)
+    val userSchema = if (schema == null) Option.empty[StructType] else Some(schema)
 
     log.info(s"Is bootstrapped table => $isBootstrappedTable, tableType is: $tableType, queryType is: $queryType")
     if (metaClient.getCommitsTimeline.filterCompletedInstants.countInstants() == 0) {
@@ -117,20 +113,19 @@ class DefaultSource extends RelationProvider
         case (COPY_ON_WRITE, QUERY_TYPE_SNAPSHOT_OPT_VAL, false) |
              (COPY_ON_WRITE, QUERY_TYPE_READ_OPTIMIZED_OPT_VAL, false) |
              (MERGE_ON_READ, QUERY_TYPE_READ_OPTIMIZED_OPT_VAL, false) =>
-          getBaseFileOnlyView(useHoodieFileIndex, sqlContext, parameters, schema, tablePath,
-            readPaths, metaClient)
+          new BaseFileOnlyViewRelation(sqlContext, metaClient, parameters, userSchema, globPaths)
 
         case (COPY_ON_WRITE, QUERY_TYPE_INCREMENTAL_OPT_VAL, _) =>
-          new IncrementalRelation(sqlContext, parameters, schema, metaClient)
+          new IncrementalRelation(sqlContext, parameters, userSchema, metaClient)
 
         case (MERGE_ON_READ, QUERY_TYPE_SNAPSHOT_OPT_VAL, false) =>
-          new MergeOnReadSnapshotRelation(sqlContext, parameters, schema, globPaths, metaClient)
+          new MergeOnReadSnapshotRelation(sqlContext, parameters, userSchema, globPaths, metaClient)
 
         case (MERGE_ON_READ, QUERY_TYPE_INCREMENTAL_OPT_VAL, _) =>
-          new MergeOnReadIncrementalRelation(sqlContext, parameters, schema, metaClient)
+          new MergeOnReadIncrementalRelation(sqlContext, parameters, userSchema, metaClient)
 
         case (_, _, true) =>
-          new HoodieBootstrapRelation(sqlContext, schema, globPaths, metaClient, parameters)
+          new HoodieBootstrapRelation(sqlContext, userSchema, globPaths, metaClient, parameters)
 
         case (_, _, _) =>
           throw new HoodieException(s"Invalid query type : $queryType for tableType: $tableType," +
@@ -182,65 +177,6 @@ class DefaultSource extends RelationProvider
 
   override def shortName(): String = "hudi_v1"
 
-  private def getBaseFileOnlyView(useHoodieFileIndex: Boolean,
-                                  sqlContext: SQLContext,
-                                  optParams: Map[String, String],
-                                  schema: StructType,
-                                  tablePath: String,
-                                  extraReadPaths: Seq[String],
-                                  metaClient: HoodieTableMetaClient): BaseRelation = {
-    log.info("Loading Base File Only View  with options :" + optParams)
-    val (tableFileFormat, formatClassName) = metaClient.getTableConfig.getBaseFileFormat match {
-      case HoodieFileFormat.PARQUET => (new ParquetFileFormat, "parquet")
-      case HoodieFileFormat.ORC => (new OrcFileFormat, "orc")
-    }
-
-    if (useHoodieFileIndex) {
-      val fileIndex = HoodieFileIndex(sqlContext.sparkSession, metaClient,
-        if (schema == null) Option.empty[StructType] else Some(schema),
-        optParams, FileStatusCache.getOrCreate(sqlContext.sparkSession))
-
-      HadoopFsRelation(
-        fileIndex,
-        fileIndex.partitionSchema,
-        fileIndex.dataSchema,
-        bucketSpec = None,
-        fileFormat = tableFileFormat,
-        optParams)(sqlContext.sparkSession)
-    } else {
-      // this is just effectively RO view only, where `path` can contain a mix of
-      // non-hoodie/hoodie path files. set the path filter up
-      sqlContext.sparkContext.hadoopConfiguration.setClass(
-        "mapreduce.input.pathFilter.class",
-        classOf[HoodieROTablePathFilter],
-        classOf[org.apache.hadoop.fs.PathFilter])
-
-      val specifySchema = if (schema == null) {
-        // Load the schema from the commit meta data.
-        // Here we should specify the schema to the latest commit schema since
-        // the table schema evolution.
-        val tableSchemaResolver = new TableSchemaResolver(metaClient)
-        try {
-          Some(AvroConversionUtils.convertAvroSchemaToStructType(tableSchemaResolver.getTableAvroSchema))
-        } catch {
-          case _: Throwable =>
-            None // If there is no commit in the table, we can not get the schema
-                 // with tableSchemaResolver, return None here.
-        }
-      } else {
-        Some(schema)
-      }
-      // simply return as a regular relation
-      DataSource.apply(
-        sparkSession = sqlContext.sparkSession,
-        paths = extraReadPaths,
-        userSpecifiedSchema = specifySchema,
-        className = formatClassName,
-        options = optParams)
-        .resolveRelation()
-    }
-  }
-
   override def sourceSchema(sqlContext: SQLContext,
                             schema: Option[StructType],
                             providerName: String,
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala
new file mode 100644
index 0000000000000..1e2946dd26e88
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala
@@ -0,0 +1,158 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi
+
+import org.apache.avro.Schema
+import org.apache.avro.generic.GenericRecord
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.hbase.io.hfile.CacheConfig
+import org.apache.hudi.common.config.SerializableConfiguration
+import org.apache.hudi.common.fs.FSUtils
+import org.apache.hudi.common.model.HoodieFileFormat
+import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
+import org.apache.hudi.common.util.StringUtils
+import org.apache.hudi.io.storage.HoodieHFileReader
+import org.apache.hudi.metadata.HoodieTableMetadata
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.avro.SchemaConverters
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.datasources.PartitionedFile
+import org.apache.spark.sql.sources.{BaseRelation, Filter, PrunedFilteredScan}
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.{SQLContext, SparkSession}
+
+import scala.collection.JavaConverters._
+import scala.util.Try
+
+case class HoodieTableSchema(structTypeSchema: StructType, avroSchemaStr: String)
+
+/**
+ * Hoodie BaseRelation which extends [[PrunedFilteredScan]].
+ */
+abstract class HoodieBaseRelation(
+    val sqlContext: SQLContext,
+    metaClient: HoodieTableMetaClient,
+    optParams: Map[String, String],
+    userSchema: Option[StructType])
+  extends BaseRelation with PrunedFilteredScan with Logging{
+
+  protected val sparkSession: SparkSession = sqlContext.sparkSession
+
+  protected lazy val tableAvroSchema: Schema = {
+    val schemaUtil = new TableSchemaResolver(metaClient)
+    Try(schemaUtil.getTableAvroSchema).getOrElse(
+      // If there is no commit in the table, we can't get the schema
+      // t/h [[TableSchemaResolver]], fallback to the provided [[userSchema]] instead.
+      userSchema match {
+        case Some(s) => SchemaConverters.toAvroType(s)
+        case _ => throw new IllegalArgumentException("User-provided schema is required in case the table is empty")
+      }
+    )
+  }
+
+  protected val tableStructSchema: StructType = AvroConversionUtils.convertAvroSchemaToStructType(tableAvroSchema)
+
+  protected val partitionColumns: Array[String] = metaClient.getTableConfig.getPartitionFields.orElse(Array.empty)
+
+  protected def getPrecombineFieldProperty: Option[String] =
+    Option(metaClient.getTableConfig.getPreCombineField)
+      .orElse(optParams.get(DataSourceWriteOptions.PRECOMBINE_FIELD.key)) match {
+      // NOTE: This is required to compensate for cases when empty string is used to stub
+      //       property value to avoid it being set with the default value
+      // TODO(HUDI-3456) cleanup
+      case Some(f) if !StringUtils.isNullOrEmpty(f) => Some(f)
+      case _ => None
+    }
+
+  override def schema: StructType = tableStructSchema
+}
+
+object HoodieBaseRelation {
+
+  def isMetadataTable(metaClient: HoodieTableMetaClient) =
+    HoodieTableMetadata.isMetadataTable(metaClient.getBasePath)
+
+  /**
+   * Returns file-reader routine accepting [[PartitionedFile]] and returning an [[Iterator]]
+   * over [[InternalRow]]
+   */
+  def createBaseFileReader(spark: SparkSession,
+                           partitionSchema: StructType,
+                           tableSchema: HoodieTableSchema,
+                           requiredSchema: HoodieTableSchema,
+                           filters: Seq[Filter],
+                           options: Map[String, String],
+                           hadoopConf: Configuration): PartitionedFile => Iterator[InternalRow] = {
+    val hfileReader = createHFileReader(
+      spark = spark,
+      tableSchema = tableSchema,
+      requiredSchema = requiredSchema,
+      filters = filters,
+      options = options,
+      hadoopConf = hadoopConf
+    )
+    val parquetReader = HoodieDataSourceHelper.buildHoodieParquetReader(
+      sparkSession = spark,
+      dataSchema = tableSchema.structTypeSchema,
+      partitionSchema = partitionSchema,
+      requiredSchema = requiredSchema.structTypeSchema,
+      filters = filters,
+      options = options,
+      hadoopConf = hadoopConf
+    )
+
+    partitionedFile => {
+      val extension = FSUtils.getFileExtension(partitionedFile.filePath)
+      if (HoodieFileFormat.PARQUET.getFileExtension.equals(extension)) {
+        parquetReader.apply(partitionedFile)
+      } else if (HoodieFileFormat.HFILE.getFileExtension.equals(extension)) {
+        hfileReader.apply(partitionedFile)
+      } else {
+        throw new UnsupportedOperationException(s"Base file format not supported by Spark DataSource ($partitionedFile)")
+      }
+    }
+  }
+
+  private def createHFileReader(spark: SparkSession,
+                                tableSchema: HoodieTableSchema,
+                                requiredSchema: HoodieTableSchema,
+                                filters: Seq[Filter],
+                                options: Map[String, String],
+                                hadoopConf: Configuration): PartitionedFile => Iterator[InternalRow] = {
+    val hadoopConfBroadcast =
+      spark.sparkContext.broadcast(new SerializableConfiguration(hadoopConf))
+
+    partitionedFile => {
+      val hadoopConf = hadoopConfBroadcast.value.get()
+      val reader = new HoodieHFileReader[GenericRecord](hadoopConf, new Path(partitionedFile.filePath),
+        new CacheConfig(hadoopConf))
+
+      val requiredRowSchema = requiredSchema.structTypeSchema
+      // NOTE: Schema has to be parsed at this point, since Avro's [[Schema]] aren't serializable
+      //       to be passed from driver to executor
+      val requiredAvroSchema = new Schema.Parser().parse(requiredSchema.avroSchemaStr)
+      val avroToRowConverter = AvroConversionUtils.createAvroToInternalRowConverter(requiredAvroSchema, requiredRowSchema)
+
+      reader.getRecordIterator(requiredAvroSchema).asScala
+        .map(record => {
+          avroToRowConverter.apply(record.asInstanceOf[GenericRecord]).get
+        })
+    }
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBootstrapRDD.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBootstrapRDD.scala
index a522db6afc6f1..ea997c86acb39 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBootstrapRDD.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBootstrapRDD.scala
@@ -24,12 +24,13 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.datasources.PartitionedFile
 import org.apache.spark.sql.types.StructType
-import org.apache.spark.sql.vectorized.ColumnarBatch
+
+import org.apache.hudi.HoodieDataSourceHelper._
 
 class HoodieBootstrapRDD(@transient spark: SparkSession,
-                        dataReadFunction: PartitionedFile => Iterator[Any],
-                        skeletonReadFunction: PartitionedFile => Iterator[Any],
-                        regularReadFunction: PartitionedFile => Iterator[Any],
+                        dataReadFunction: PartitionedFile => Iterator[InternalRow],
+                        skeletonReadFunction: PartitionedFile => Iterator[InternalRow],
+                        regularReadFunction: PartitionedFile => Iterator[InternalRow],
                         dataSchema: StructType,
                         skeletonSchema: StructType,
                         requiredColumns: Array[String],
@@ -56,18 +57,18 @@ class HoodieBootstrapRDD(@transient spark: SparkSession,
       // It is a bootstrap split. Check both skeleton and data files.
       if (dataSchema.isEmpty) {
         // No data column to fetch, hence fetch only from skeleton file
-        partitionedFileIterator = read(bootstrapPartition.split.skeletonFile.get,  skeletonReadFunction)
+        partitionedFileIterator = skeletonReadFunction(bootstrapPartition.split.skeletonFile.get)
       } else if (skeletonSchema.isEmpty) {
         // No metadata column to fetch, hence fetch only from data file
-        partitionedFileIterator = read(bootstrapPartition.split.dataFile, dataReadFunction)
+        partitionedFileIterator = dataReadFunction(bootstrapPartition.split.dataFile)
       } else {
         // Fetch from both data and skeleton file, and merge
-        val dataFileIterator = read(bootstrapPartition.split.dataFile, dataReadFunction)
-        val skeletonFileIterator = read(bootstrapPartition.split.skeletonFile.get, skeletonReadFunction)
+        val dataFileIterator = dataReadFunction(bootstrapPartition.split.dataFile)
+        val skeletonFileIterator = skeletonReadFunction(bootstrapPartition.split.skeletonFile.get)
         partitionedFileIterator = merge(skeletonFileIterator, dataFileIterator)
       }
     } else {
-      partitionedFileIterator = read(bootstrapPartition.split.dataFile, regularReadFunction)
+      partitionedFileIterator = regularReadFunction(bootstrapPartition.split.dataFile)
     }
     partitionedFileIterator
   }
@@ -101,19 +102,6 @@ class HoodieBootstrapRDD(@transient spark: SparkSession,
     mergedRow
   }
 
-  def read(partitionedFile: PartitionedFile, readFileFunction: PartitionedFile => Iterator[Any])
-    : Iterator[InternalRow] = {
-    val fileIterator = readFileFunction(partitionedFile)
-
-    import scala.collection.JavaConverters._
-
-    val rows = fileIterator.flatMap(_ match {
-      case r: InternalRow => Seq(r)
-      case b: ColumnarBatch => b.rowIterator().asScala
-    })
-    rows
-  }
-
   override protected def getPartitions: Array[Partition] = {
     tableState.files.zipWithIndex.map(file => {
       if (file._1.skeletonFile.isDefined) {
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBootstrapRelation.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBootstrapRelation.scala
index b1ab83a94cc9d..dd90d724c6b61 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBootstrapRelation.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBootstrapRelation.scala
@@ -52,8 +52,8 @@ import scala.collection.JavaConverters._
   * @param optParams DataSource options passed by the user
   */
 class HoodieBootstrapRelation(@transient val _sqlContext: SQLContext,
-                              val userSchema: StructType,
-                              val globPaths: Option[Seq[Path]],
+                              val userSchema: Option[StructType],
+                              val globPaths: Seq[Path],
                               val metaClient: HoodieTableMetaClient,
                               val optParams: Map[String, String]) extends BaseRelation
   with PrunedFilteredScan with Logging {
@@ -107,37 +107,35 @@ class HoodieBootstrapRelation(@transient val _sqlContext: SQLContext,
     })
 
     // Prepare readers for reading data file and skeleton files
-    val dataReadFunction = new ParquetFileFormat()
-        .buildReaderWithPartitionValues(
-          sparkSession = _sqlContext.sparkSession,
-          dataSchema = dataSchema,
-          partitionSchema = StructType(Seq.empty),
-          requiredSchema = requiredDataSchema,
-          filters = if (requiredSkeletonSchema.isEmpty) filters else Seq() ,
-          options = Map.empty,
-          hadoopConf = _sqlContext.sparkSession.sessionState.newHadoopConf()
-        )
-
-    val skeletonReadFunction = new ParquetFileFormat()
-      .buildReaderWithPartitionValues(
-        sparkSession = _sqlContext.sparkSession,
-        dataSchema = skeletonSchema,
-        partitionSchema = StructType(Seq.empty),
-        requiredSchema = requiredSkeletonSchema,
-        filters = if (requiredDataSchema.isEmpty) filters else Seq(),
-        options = Map.empty,
-        hadoopConf = _sqlContext.sparkSession.sessionState.newHadoopConf()
-      )
-
-    val regularReadFunction = new ParquetFileFormat()
-      .buildReaderWithPartitionValues(
-        sparkSession = _sqlContext.sparkSession,
-        dataSchema = fullSchema,
-        partitionSchema = StructType(Seq.empty),
-        requiredSchema = requiredColsSchema,
-        filters = filters,
-        options = Map.empty,
-        hadoopConf = _sqlContext.sparkSession.sessionState.newHadoopConf())
+    val dataReadFunction = HoodieDataSourceHelper.buildHoodieParquetReader(
+      sparkSession = _sqlContext.sparkSession,
+      dataSchema = dataSchema,
+      partitionSchema = StructType(Seq.empty),
+      requiredSchema = requiredDataSchema,
+      filters = if (requiredSkeletonSchema.isEmpty) filters else Seq() ,
+      options = optParams,
+      hadoopConf = _sqlContext.sparkSession.sessionState.newHadoopConf()
+    )
+
+    val skeletonReadFunction = HoodieDataSourceHelper.buildHoodieParquetReader(
+      sparkSession = _sqlContext.sparkSession,
+      dataSchema = skeletonSchema,
+      partitionSchema = StructType(Seq.empty),
+      requiredSchema = requiredSkeletonSchema,
+      filters = if (requiredDataSchema.isEmpty) filters else Seq(),
+      options = optParams,
+      hadoopConf = _sqlContext.sparkSession.sessionState.newHadoopConf()
+    )
+
+    val regularReadFunction = HoodieDataSourceHelper.buildHoodieParquetReader(
+      sparkSession = _sqlContext.sparkSession,
+      dataSchema = fullSchema,
+      partitionSchema = StructType(Seq.empty),
+      requiredSchema = requiredColsSchema,
+      filters = filters,
+      options = optParams,
+      hadoopConf = _sqlContext.sparkSession.sessionState.newHadoopConf()
+    )
 
     val rdd = new HoodieBootstrapRDD(_sqlContext.sparkSession, dataReadFunction, skeletonReadFunction,
       regularReadFunction, requiredDataSchema, requiredSkeletonSchema, requiredColumns, tableState)
@@ -157,9 +155,9 @@ class HoodieBootstrapRelation(@transient val _sqlContext: SQLContext,
 
   def buildFileIndex(): HoodieBootstrapFileIndex = {
     logInfo("Building file index..")
-    val fileStatuses  = if (globPaths.isDefined) {
+    val fileStatuses  = if (globPaths.nonEmpty) {
       // Load files from the global paths if it has defined to be compatible with the original mode
-      val inMemoryFileIndex = HoodieSparkUtils.createInMemoryFileIndex(_sqlContext.sparkSession, globPaths.get)
+      val inMemoryFileIndex = HoodieSparkUtils.createInMemoryFileIndex(_sqlContext.sparkSession, globPaths)
       inMemoryFileIndex.allFiles()
     } else { // Load files by the HoodieFileIndex.
         HoodieFileIndex(sqlContext.sparkSession, metaClient, Some(schema), optParams,
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieDataSourceHelper.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieDataSourceHelper.scala
new file mode 100644
index 0000000000000..fb12549f620bd
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieDataSourceHelper.scala
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.FileStatus
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Expression, PredicateHelper, SpecificInternalRow, SubqueryExpression, UnsafeProjection}
+import org.apache.spark.sql.execution.datasources.PartitionedFile
+import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
+import org.apache.spark.sql.sources.Filter
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.vectorized.ColumnarBatch
+
+import scala.collection.JavaConverters._
+
+object HoodieDataSourceHelper extends PredicateHelper {
+
+  /**
+   * Partition the given condition into two sequence of conjunctive predicates:
+   * - predicates that can be evaluated using metadata only.
+   * - other predicates.
+   */
+  def splitPartitionAndDataPredicates(
+      spark: SparkSession,
+      condition: Expression,
+      partitionColumns: Seq[String]): (Seq[Expression], Seq[Expression]) = {
+    splitConjunctivePredicates(condition).partition(
+      isPredicateMetadataOnly(spark, _, partitionColumns))
+  }
+
+  /**
+   * Check if condition can be evaluated using only metadata. In Delta, this means the condition
+   * only references partition columns and involves no subquery.
+   */
+  def isPredicateMetadataOnly(
+      spark: SparkSession,
+      condition: Expression,
+      partitionColumns: Seq[String]): Boolean = {
+    isPredicatePartitionColumnsOnly(spark, condition, partitionColumns) &&
+        !SubqueryExpression.hasSubquery(condition)
+  }
+
+  /**
+   * Does the predicate only contains partition columns?
+   */
+  def isPredicatePartitionColumnsOnly(
+      spark: SparkSession,
+      condition: Expression,
+      partitionColumns: Seq[String]): Boolean = {
+    val nameEquality = spark.sessionState.analyzer.resolver
+    condition.references.forall { r =>
+      partitionColumns.exists(nameEquality(r.name, _))
+    }
+  }
+
+  /**
+   * Wrapper `buildReaderWithPartitionValues` of [[ParquetFileFormat]]
+   * to deal with [[ColumnarBatch]] when enable parquet vectorized reader if necessary.
+   */
+  def buildHoodieParquetReader(sparkSession: SparkSession,
+                               dataSchema: StructType,
+                               partitionSchema: StructType,
+                               requiredSchema: StructType,
+                               filters: Seq[Filter],
+                               options: Map[String, String],
+                               hadoopConf: Configuration): PartitionedFile => Iterator[InternalRow] = {
+
+    val readParquetFile: PartitionedFile => Iterator[Any] = new ParquetFileFormat().buildReaderWithPartitionValues(
+      sparkSession = sparkSession,
+      dataSchema = dataSchema,
+      partitionSchema = partitionSchema,
+      requiredSchema = requiredSchema,
+      filters = filters,
+      options = options,
+      hadoopConf = hadoopConf
+    )
+
+    file: PartitionedFile => {
+      val iter = readParquetFile(file)
+      iter.flatMap {
+        case r: InternalRow => Seq(r)
+        case b: ColumnarBatch => b.rowIterator().asScala
+      }
+    }
+  }
+
+  /**
+   * Extract the required schema from [[InternalRow]]
+   */
+  def extractRequiredSchema(
+      iter: Iterator[InternalRow],
+      requiredSchema: StructType,
+      requiredFieldPos: Seq[Int]): Iterator[InternalRow] = {
+    val unsafeProjection = UnsafeProjection.create(requiredSchema)
+    val rows = iter.map { row =>
+      unsafeProjection(createInternalRowWithSchema(row, requiredSchema, requiredFieldPos))
+    }
+    rows
+  }
+
+  /**
+   * Convert [[InternalRow]] to [[SpecificInternalRow]].
+   */
+  def createInternalRowWithSchema(
+      row: InternalRow,
+      schema: StructType,
+      positions: Seq[Int]): InternalRow = {
+    val rowToReturn = new SpecificInternalRow(schema)
+    var curIndex = 0
+    schema.zip(positions).foreach { case (field, pos) =>
+      val curField = if (row.isNullAt(pos)) {
+        null
+      } else {
+        row.get(pos, field.dataType)
+      }
+      rowToReturn.update(curIndex, curField)
+      curIndex += 1
+    }
+    rowToReturn
+  }
+
+
+  def splitFiles(
+      sparkSession: SparkSession,
+      file: FileStatus,
+      partitionValues: InternalRow): Seq[PartitionedFile] = {
+    val filePath = file.getPath
+    val maxSplitBytes = sparkSession.sessionState.conf.filesMaxPartitionBytes
+    (0L until file.getLen by maxSplitBytes).map { offset =>
+      val remaining = file.getLen - offset
+      val size = if (remaining > maxSplitBytes) maxSplitBytes else remaining
+      PartitionedFile(partitionValues, filePath.toUri.toString, offset, size)
+    }
+  }
+
+}
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala
index f9a7620b9ffe9..9cdf5cc634ff9 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileIndex.scala
@@ -18,20 +18,30 @@
 package org.apache.hudi
 
 import org.apache.hadoop.fs.{FileStatus, Path}
+
 import org.apache.hudi.HoodieFileIndex.getConfigProperties
 import org.apache.hudi.common.config.{HoodieMetadataConfig, TypedProperties}
 import org.apache.hudi.common.table.HoodieTableMetaClient
+import org.apache.hudi.common.util.StringUtils
+import org.apache.hudi.keygen.constant.KeyGeneratorOptions
+import org.apache.hudi.keygen.{TimestampBasedAvroKeyGenerator, TimestampBasedKeyGenerator}
+
+import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{And, Expression}
+import org.apache.spark.sql.catalyst.expressions.{And, Expression, Literal}
 import org.apache.spark.sql.execution.datasources.{FileIndex, FileStatusCache, NoopCache, PartitionDirectory}
 import org.apache.spark.sql.hudi.DataSkippingUtils.createColumnStatsIndexFilterExpr
 import org.apache.spark.sql.hudi.HoodieSqlCommonUtils
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.types.{StringType, StructType}
 import org.apache.spark.sql.{AnalysisException, Column, SparkSession}
+import org.apache.spark.unsafe.types.UTF8String
 
 import scala.collection.JavaConverters._
 import scala.util.{Failure, Success, Try}
+import scala.util.control.NonFatal
+
+import java.text.SimpleDateFormat
 
 /**
  * A file index which support partition prune for hoodie snapshot and read-optimized query.
@@ -72,7 +82,7 @@ case class HoodieFileIndex(spark: SparkSession,
   )
     with FileIndex {
 
-  override def rootPaths: Seq[Path] = queryPaths
+  override def rootPaths: Seq[Path] = queryPaths.asScala
 
   def enableDataSkipping(): Boolean = {
     options.getOrElse(DataSourceReadOptions.ENABLE_DATA_SKIPPING.key(),
@@ -88,7 +98,7 @@ case class HoodieFileIndex(spark: SparkSession,
    * @return List of FileStatus for base files
    */
   def allFiles: Seq[FileStatus] = {
-    cachedAllInputFileSlices.values.flatten
+    cachedAllInputFileSlices.values.asScala.flatMap(_.asScala)
       .filter(_.getBaseFile.isPresent)
       .map(_.getBaseFile.get().getFileStatus)
       .toSeq
@@ -101,31 +111,33 @@ case class HoodieFileIndex(spark: SparkSession,
    * @param dataFilters      data columns filters
    * @return list of PartitionDirectory containing partition to base files mapping
    */
-  override def listFiles(partitionFilters: Seq[Expression],
-                         dataFilters: Seq[Expression]): Seq[PartitionDirectory] = {
+  override def listFiles(partitionFilters: Seq[Expression], dataFilters: Seq[Expression]): Seq[PartitionDirectory] = {
+
+    val convertedPartitionFilters =
+      HoodieFileIndex.convertFilterForTimestampKeyGenerator(metaClient, partitionFilters)
+
     // Look up candidate files names in the col-stats index, if all of the following conditions are true
     //    - Data-skipping is enabled
     //    - Col-Stats Index is present
     //    - List of predicates (filters) is present
     val candidateFilesNamesOpt: Option[Set[String]] =
-    lookupCandidateFilesInColStatsIndex(dataFilters) match {
-      case Success(opt) => opt
-      case Failure(e) =>
-        if (e.isInstanceOf[AnalysisException]) {
-          logDebug("Failed to relay provided data filters to Z-index lookup", e)
-        } else {
-          logError("Failed to lookup candidate files in Z-index", e)
-        }
-        Option.empty
-    }
+      lookupCandidateFilesInColStatsIndex(dataFilters) match {
+        case Success(opt) => opt
+        case Failure(e) =>
+          if (e.isInstanceOf[AnalysisException]) {
+            logDebug("Failed to relay provided data filters to Z-index lookup", e)
+          } else {
+            logError("Failed to lookup candidate files in Z-index", e)
+          }
+          Option.empty
+      }
 
     logDebug(s"Overlapping candidate files (from Z-index): ${candidateFilesNamesOpt.getOrElse(Set.empty)}")
 
     if (queryAsNonePartitionedTable) {
       // Read as Non-Partitioned table
       // Filter in candidate files based on the col-stats index lookup
-      val candidateFiles =
-      allFiles.filter(fileStatus =>
+      val candidateFiles = allFiles.filter(fileStatus =>
         // NOTE: This predicate is true when {@code Option} is empty
         candidateFilesNamesOpt.forall(_.contains(fileStatus.getPath.getName))
       )
@@ -137,22 +149,21 @@ case class HoodieFileIndex(spark: SparkSession,
       Seq(PartitionDirectory(InternalRow.empty, candidateFiles))
     } else {
       // Prune the partition path by the partition filters
-      val prunedPartitions = prunePartition(cachedAllInputFileSlices.keys.toSeq, partitionFilters)
+      val prunedPartitions = prunePartition(cachedAllInputFileSlices.keySet.asScala.toSeq, convertedPartitionFilters)
       var totalFileSize = 0
       var candidateFileSize = 0
 
       val result = prunedPartitions.map { partition =>
         val baseFileStatuses: Seq[FileStatus] =
-          cachedAllInputFileSlices(partition)
+          cachedAllInputFileSlices.get(partition).asScala
             .map(fs => fs.getBaseFile.orElse(null))
             .filter(_ != null)
             .map(_.getFileStatus)
 
         // Filter in candidate files based on the col-stats index lookup
-        val candidateFiles =
-          baseFileStatuses.filter(fs =>
-            // NOTE: This predicate is true when {@code Option} is empty
-            candidateFilesNamesOpt.forall(_.contains(fs.getPath.getName)))
+        val candidateFiles = baseFileStatuses.filter(fs =>
+          // NOTE: This predicate is true when {@code Option} is empty
+          candidateFilesNamesOpt.forall(_.contains(fs.getPath.getName)))
 
         totalFileSize += baseFileStatuses.size
         candidateFileSize += candidateFiles.size
@@ -194,12 +205,14 @@ case class HoodieFileIndex(spark: SparkSession,
       // scalastyle:on return
     }
 
+    val completedCommits = getActiveTimeline.filterCompletedInstants().getInstants.iterator.asScala.toList.map(_.getTimestamp)
+
     // Collect all index tables present in `.zindex` folder
     val candidateIndexTables =
       fs.listStatus(new Path(indexPath))
         .filter(_.isDirectory)
         .map(_.getPath.getName)
-        .filter(f => completedCommits.contains(f))
+        .filter(completedCommits.contains(_))
         .sortBy(x => x)
 
     if (candidateIndexTables.isEmpty) {
@@ -267,7 +280,7 @@ case class HoodieFileIndex(spark: SparkSession,
   }
 }
 
-object HoodieFileIndex {
+object HoodieFileIndex extends Logging {
 
   def getConfigProperties(spark: SparkSession, options: Map[String, String]) = {
     val sqlConf: SQLConf = spark.sessionState.conf
@@ -282,6 +295,41 @@ object HoodieFileIndex {
     properties
   }
 
+  def convertFilterForTimestampKeyGenerator(metaClient: HoodieTableMetaClient,
+      partitionFilters: Seq[Expression]): Seq[Expression] = {
+
+    val tableConfig = metaClient.getTableConfig
+    val keyGenerator = tableConfig.getKeyGeneratorClassName
+
+    if (keyGenerator != null && (keyGenerator.equals(classOf[TimestampBasedKeyGenerator].getCanonicalName) ||
+        keyGenerator.equals(classOf[TimestampBasedAvroKeyGenerator].getCanonicalName))) {
+      val inputFormat = tableConfig.getString(KeyGeneratorOptions.Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP)
+      val outputFormat = tableConfig.getString(KeyGeneratorOptions.Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP)
+      if (StringUtils.isNullOrEmpty(inputFormat) || StringUtils.isNullOrEmpty(outputFormat) ||
+          inputFormat.equals(outputFormat)) {
+        partitionFilters
+      } else {
+        try {
+          val inDateFormat = new SimpleDateFormat(inputFormat)
+          val outDateFormat = new SimpleDateFormat(outputFormat)
+          partitionFilters.toArray.map {
+            _.transformDown {
+              case Literal(value, dataType) if dataType.isInstanceOf[StringType] =>
+                val converted = outDateFormat.format(inDateFormat.parse(value.toString))
+                Literal(UTF8String.fromString(converted), StringType)
+            }
+          }
+        } catch {
+          case NonFatal(e) =>
+            logWarning("Fail to convert filters for TimestampBaseAvroKeyGenerator.")
+            partitionFilters
+        }
+      }
+    } else {
+      partitionFilters
+    }
+  }
+
   private def getQueryPath(options: Map[String, String]) = {
     new Path(options.getOrElse("path", "'path' option required"))
   }
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileScanRDD.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileScanRDD.scala
new file mode 100644
index 0000000000000..9f2d7d9e0380a
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieFileScanRDD.scala
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi
+
+import org.apache.spark.{Partition, TaskContext}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
+import org.apache.spark.sql.execution.QueryExecutionException
+import org.apache.spark.sql.{Row, SparkSession}
+import org.apache.spark.sql.execution.datasources.{FilePartition, PartitionedFile, SchemaColumnConvertNotSupportedException}
+import org.apache.spark.sql.types.StructType
+
+/**
+ * Similar to [[org.apache.spark.sql.execution.datasources.FileScanRDD]].
+ *
+ * This class will extract the fields needed according to [[requiredColumns]] and
+ * return iterator of [[org.apache.spark.sql.Row]] directly.
+ */
+class HoodieFileScanRDD(
+    @transient private val sparkSession: SparkSession,
+    requiredColumns: Array[String],
+    schema: StructType,
+    readFunction: PartitionedFile => Iterator[InternalRow],
+    @transient val filePartitions: Seq[FilePartition])
+  extends RDD[Row](sparkSession.sparkContext, Nil) {
+
+  private val requiredSchema = {
+    val nameToStructField = schema.map(field => (field.name, field)).toMap
+    StructType(requiredColumns.map(nameToStructField))
+  }
+
+  private val requiredFieldPos = HoodieSparkUtils.collectFieldIndexes(requiredSchema, schema)
+
+  override def compute(split: Partition, context: TaskContext): Iterator[Row] = {
+    val iterator = new Iterator[Object] with AutoCloseable {
+
+      private[this] val files = split.asInstanceOf[FilePartition].files.toIterator
+      private[this] var currentFile: PartitionedFile = null
+      private[this] var currentIterator: Iterator[Object] = null
+
+      override def hasNext: Boolean = {
+        (currentIterator != null && currentIterator.hasNext) || nextIterator()
+      }
+
+      def next(): Object = {
+        currentIterator.next()
+      }
+
+      /** Advances to the next file. Returns true if a new non-empty iterator is available. */
+      private def nextIterator(): Boolean = {
+        if (files.hasNext) {
+          currentFile = files.next()
+
+          logInfo(s"Reading File $currentFile")
+          currentIterator = readFunction(currentFile)
+
+          try {
+            hasNext
+          } catch {
+            case e: SchemaColumnConvertNotSupportedException =>
+              val message = "Parquet column cannot be converted in " +
+                s"file ${currentFile.filePath}. Column: ${e.getColumn}, " +
+                s"Expected: ${e.getLogicalType}, Found: ${e.getPhysicalType}"
+              throw new QueryExecutionException(message, e)
+
+            case e => throw e
+          }
+        } else {
+          currentFile = null
+          false
+        }
+      }
+
+      override def close(): Unit = {}
+    }
+
+    // Register an on-task-completion callback to close the input stream.
+    context.addTaskCompletionListener[Unit](_ => iterator.close())
+
+    // extract required columns from row
+    val iterAfterExtract = HoodieDataSourceHelper.extractRequiredSchema(
+      iterator.asInstanceOf[Iterator[InternalRow]],
+      requiredSchema,
+      requiredFieldPos)
+
+    // convert InternalRow to Row and return
+    val converter = CatalystTypeConverters.createToScalaConverter(requiredSchema)
+    iterAfterExtract.map(converter(_).asInstanceOf[Row])
+  }
+
+  override protected def getPartitions: Array[Partition] = filePartitions.toArray
+
+}
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala
index 226fb01f43f90..96fe47e0219d4 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieMergeOnReadRDD.scala
@@ -21,22 +21,28 @@ package org.apache.hudi
 import org.apache.avro.Schema
 import org.apache.avro.generic.{GenericRecord, GenericRecordBuilder}
 import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.hudi.HoodieDataSourceHelper._
+import org.apache.hudi.HoodieMergeOnReadRDD.resolveAvroSchemaNullability
+import org.apache.hudi.MergeOnReadSnapshotRelation.getFilePath
+import org.apache.hudi.common.config.HoodieMetadataConfig
+import org.apache.hudi.common.engine.HoodieLocalEngineContext
 import org.apache.hudi.common.fs.FSUtils
+import org.apache.hudi.common.fs.FSUtils.getRelativePartitionPath
 import org.apache.hudi.common.table.log.HoodieMergedLogRecordScanner
 import org.apache.hudi.config.HoodiePayloadConfig
 import org.apache.hudi.exception.HoodieException
 import org.apache.hudi.hadoop.config.HoodieRealtimeConfig
-import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils.HOODIE_RECORD_KEY_COL_POS
+import org.apache.hudi.metadata.HoodieTableMetadata.getDataTableBasePathFromMetadataTable
+import org.apache.hudi.metadata.{HoodieBackedTableMetadata, HoodieTableMetadata}
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.avro.{HoodieAvroSerializer, HoodieAvroDeserializer}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{SpecificInternalRow, UnsafeProjection}
+import org.apache.spark.sql.catalyst.expressions.UnsafeProjection
 import org.apache.spark.sql.execution.datasources.PartitionedFile
-import org.apache.spark.sql.vectorized.ColumnarBatch
 import org.apache.spark.{Partition, SerializableWritable, SparkContext, TaskContext}
 
 import java.io.Closeable
-
+import java.util.Properties
 import scala.collection.JavaConverters._
 import scala.collection.mutable
 import scala.util.Try
@@ -45,43 +51,40 @@ case class HoodieMergeOnReadPartition(index: Int, split: HoodieMergeOnReadFileSp
 
 class HoodieMergeOnReadRDD(@transient sc: SparkContext,
                            @transient config: Configuration,
-                           fullSchemaFileReader: PartitionedFile => Iterator[Any],
-                           requiredSchemaFileReader: PartitionedFile => Iterator[Any],
-                           tableState: HoodieMergeOnReadTableState)
+                           fullSchemaFileReader: PartitionedFile => Iterator[InternalRow],
+                           requiredSchemaFileReader: PartitionedFile => Iterator[InternalRow],
+                           tableState: HoodieMergeOnReadTableState,
+                           tableSchema: HoodieTableSchema,
+                           requiredSchema: HoodieTableSchema)
   extends RDD[InternalRow](sc, Nil) {
 
   private val confBroadcast = sc.broadcast(new SerializableWritable(config))
-  private val preCombineField = tableState.preCombineField
-  private val recordKeyFieldOpt = tableState.recordKeyFieldOpt
-  private val payloadProps = if (preCombineField.isDefined) {
-    Some(HoodiePayloadConfig.newBuilder.withPayloadOrderingField(preCombineField.get).build.getProps)
-  } else {
-    None
-  }
+  private val recordKeyField = tableState.recordKeyField
+  private val payloadProps = tableState.preCombineFieldOpt
+    .map(preCombineField =>
+      HoodiePayloadConfig.newBuilder
+        .withPayloadOrderingField(preCombineField)
+        .build
+        .getProps
+    )
+    .getOrElse(new Properties())
+
   override def compute(split: Partition, context: TaskContext): Iterator[InternalRow] = {
     val mergeOnReadPartition = split.asInstanceOf[HoodieMergeOnReadPartition]
     val iter = mergeOnReadPartition.split match {
-      case dataFileOnlySplit if dataFileOnlySplit.logPaths.isEmpty =>
-        read(dataFileOnlySplit.dataFile.get, requiredSchemaFileReader)
+      case dataFileOnlySplit if dataFileOnlySplit.logFiles.isEmpty =>
+        requiredSchemaFileReader(dataFileOnlySplit.dataFile.get)
       case logFileOnlySplit if logFileOnlySplit.dataFile.isEmpty =>
         logFileIterator(logFileOnlySplit, getConfig)
-      case skipMergeSplit if skipMergeSplit.mergeType
-        .equals(DataSourceReadOptions.REALTIME_SKIP_MERGE_OPT_VAL) =>
-        skipMergeFileIterator(
-          skipMergeSplit,
-          read(skipMergeSplit.dataFile.get, requiredSchemaFileReader),
-          getConfig
-        )
-      case payloadCombineSplit if payloadCombineSplit.mergeType
-        .equals(DataSourceReadOptions.REALTIME_PAYLOAD_COMBINE_OPT_VAL) =>
-        payloadCombineFileIterator(
-          payloadCombineSplit,
-          read(payloadCombineSplit.dataFile.get, fullSchemaFileReader),
-          getConfig
-        )
+      case skipMergeSplit if skipMergeSplit.mergeType.equals(DataSourceReadOptions.REALTIME_SKIP_MERGE_OPT_VAL) =>
+        skipMergeFileIterator(skipMergeSplit, requiredSchemaFileReader(skipMergeSplit.dataFile.get), getConfig)
+      case payloadCombineSplit
+        if payloadCombineSplit.mergeType.equals(DataSourceReadOptions.REALTIME_PAYLOAD_COMBINE_OPT_VAL) =>
+        payloadCombineFileIterator(payloadCombineSplit, fullSchemaFileReader(payloadCombineSplit.dataFile.get),
+          getConfig)
       case _ => throw new HoodieException(s"Unable to select an Iterator to read the Hoodie MOR File Split for " +
         s"file path: ${mergeOnReadPartition.split.dataFile.get.filePath}" +
-        s"log paths: ${mergeOnReadPartition.split.logPaths.toString}" +
+        s"log paths: ${mergeOnReadPartition.split.logFiles.toString}" +
         s"hoodie table path: ${mergeOnReadPartition.split.tablePath}" +
         s"spark partition Index: ${mergeOnReadPartition.index}" +
         s"merge type: ${mergeOnReadPartition.split.mergeType}")
@@ -108,43 +111,35 @@ class HoodieMergeOnReadRDD(@transient sc: SparkContext,
     }
   }
 
-  private def read(partitionedFile: PartitionedFile,
-                   readFileFunction: PartitionedFile => Iterator[Any]): Iterator[InternalRow] = {
-    val fileIterator = readFileFunction(partitionedFile)
-    val rows = fileIterator.flatMap(_ match {
-      case r: InternalRow => Seq(r)
-      case b: ColumnarBatch => b.rowIterator().asScala
-    })
-    rows
-  }
-
   private def logFileIterator(split: HoodieMergeOnReadFileSplit,
                               config: Configuration): Iterator[InternalRow] =
-    new Iterator[InternalRow] with Closeable {
-      private val tableAvroSchema = new Schema.Parser().parse(tableState.tableAvroSchema)
-      private val requiredAvroSchema = new Schema.Parser().parse(tableState.requiredAvroSchema)
+    new Iterator[InternalRow] with Closeable with SparkAdapterSupport {
+      private val tableAvroSchema = new Schema.Parser().parse(tableSchema.avroSchemaStr)
+      private val requiredAvroSchema = new Schema.Parser().parse(requiredSchema.avroSchemaStr)
       private val requiredFieldPosition =
-        tableState.requiredStructSchema
+        requiredSchema.structTypeSchema
           .map(f => tableAvroSchema.getField(f.name).pos()).toList
       private val recordBuilder = new GenericRecordBuilder(requiredAvroSchema)
-      private val deserializer = HoodieAvroDeserializer(requiredAvroSchema, tableState.requiredStructSchema)
-      private val unsafeProjection = UnsafeProjection.create(tableState.requiredStructSchema)
+      private val deserializer = sparkAdapter.createAvroDeserializer(requiredAvroSchema, requiredSchema.structTypeSchema)
+      private val unsafeProjection = UnsafeProjection.create(requiredSchema.structTypeSchema)
       private var logScanner = HoodieMergeOnReadRDD.scanLog(split, tableAvroSchema, config)
       private val logRecords = logScanner.getRecords
       private val logRecordsKeyIterator = logRecords.keySet().iterator().asScala
 
       private var recordToLoad: InternalRow = _
+
       override def hasNext: Boolean = {
         if (logRecordsKeyIterator.hasNext) {
           val curAvrokey = logRecordsKeyIterator.next()
-          val curAvroRecord = logRecords.get(curAvrokey).getData.getInsertValue(tableAvroSchema)
+          val curAvroRecord = logRecords.get(curAvrokey).getData.getInsertValue(tableAvroSchema, payloadProps)
           if (!curAvroRecord.isPresent) {
             // delete record found, skipping
             this.hasNext
           } else {
-            val requiredAvroRecord = AvroConversionUtils
-              .buildAvroRecordBySchema(curAvroRecord.get(), requiredAvroSchema, requiredFieldPosition, recordBuilder)
-            recordToLoad = unsafeProjection(deserializer.deserializeData(requiredAvroRecord).asInstanceOf[InternalRow])
+            val requiredAvroRecord = AvroConversionUtils.buildAvroRecordBySchema(curAvroRecord.get(), requiredAvroSchema,
+              requiredFieldPosition, recordBuilder)
+            val rowOpt = deserializer.deserialize(requiredAvroRecord)
+            recordToLoad = unsafeProjection(rowOpt.get.asInstanceOf[InternalRow])
             true
           }
         } else {
@@ -170,15 +165,15 @@ class HoodieMergeOnReadRDD(@transient sc: SparkContext,
   private def skipMergeFileIterator(split: HoodieMergeOnReadFileSplit,
                                     baseFileIterator: Iterator[InternalRow],
                                     config: Configuration): Iterator[InternalRow] =
-    new Iterator[InternalRow] with Closeable {
-      private val tableAvroSchema = new Schema.Parser().parse(tableState.tableAvroSchema)
-      private val requiredAvroSchema = new Schema.Parser().parse(tableState.requiredAvroSchema)
+    new Iterator[InternalRow] with Closeable with SparkAdapterSupport {
+      private val tableAvroSchema = new Schema.Parser().parse(tableSchema.avroSchemaStr)
+      private val requiredAvroSchema = new Schema.Parser().parse(requiredSchema.avroSchemaStr)
       private val requiredFieldPosition =
-        tableState.requiredStructSchema
+        requiredSchema.structTypeSchema
           .map(f => tableAvroSchema.getField(f.name).pos()).toList
       private val recordBuilder = new GenericRecordBuilder(requiredAvroSchema)
-      private val deserializer = HoodieAvroDeserializer(requiredAvroSchema, tableState.requiredStructSchema)
-      private val unsafeProjection = UnsafeProjection.create(tableState.requiredStructSchema)
+      private val deserializer = sparkAdapter.createAvroDeserializer(requiredAvroSchema, requiredSchema.structTypeSchema)
+      private val unsafeProjection = UnsafeProjection.create(requiredSchema.structTypeSchema)
       private var logScanner = HoodieMergeOnReadRDD.scanLog(split, tableAvroSchema, config)
       private val logRecords = logScanner.getRecords
       private val logRecordsKeyIterator = logRecords.keySet().iterator().asScala
@@ -188,19 +183,21 @@ class HoodieMergeOnReadRDD(@transient sc: SparkContext,
       @scala.annotation.tailrec
       override def hasNext: Boolean = {
         if (baseFileIterator.hasNext) {
-          recordToLoad = baseFileIterator.next()
+          val curRow = baseFileIterator.next()
+          recordToLoad = unsafeProjection(curRow)
           true
         } else {
           if (logRecordsKeyIterator.hasNext) {
             val curAvrokey = logRecordsKeyIterator.next()
-            val curAvroRecord = logRecords.get(curAvrokey).getData.getInsertValue(tableAvroSchema)
+            val curAvroRecord = logRecords.get(curAvrokey).getData.getInsertValue(tableAvroSchema, payloadProps)
             if (!curAvroRecord.isPresent) {
               // delete record found, skipping
               this.hasNext
             } else {
-              val requiredAvroRecord = AvroConversionUtils
-                .buildAvroRecordBySchema(curAvroRecord.get(), requiredAvroSchema, requiredFieldPosition, recordBuilder)
-              recordToLoad = unsafeProjection(deserializer.deserializeData(requiredAvroRecord).asInstanceOf[InternalRow])
+              val requiredAvroRecord = AvroConversionUtils.buildAvroRecordBySchema(curAvroRecord.get(), requiredAvroSchema,
+                requiredFieldPosition, recordBuilder)
+              val rowOpt = deserializer.deserialize(requiredAvroRecord)
+              recordToLoad = unsafeProjection(rowOpt.get.asInstanceOf[InternalRow])
               true
             }
           } else {
@@ -227,21 +224,22 @@ class HoodieMergeOnReadRDD(@transient sc: SparkContext,
   private def payloadCombineFileIterator(split: HoodieMergeOnReadFileSplit,
                                          baseFileIterator: Iterator[InternalRow],
                                          config: Configuration): Iterator[InternalRow] =
-    new Iterator[InternalRow] with Closeable {
-      private val tableAvroSchema = new Schema.Parser().parse(tableState.tableAvroSchema)
-      private val requiredAvroSchema = new Schema.Parser().parse(tableState.requiredAvroSchema)
+    new Iterator[InternalRow] with Closeable with SparkAdapterSupport {
+      private val tableAvroSchema = new Schema.Parser().parse(tableSchema.avroSchemaStr)
+      private val requiredAvroSchema = new Schema.Parser().parse(requiredSchema.avroSchemaStr)
       private val requiredFieldPosition =
-        tableState.requiredStructSchema
+        requiredSchema.structTypeSchema
           .map(f => tableAvroSchema.getField(f.name).pos()).toList
-      private val serializer = HoodieAvroSerializer(tableState.tableStructSchema, tableAvroSchema, false)
-      private val requiredDeserializer = HoodieAvroDeserializer(requiredAvroSchema, tableState.requiredStructSchema)
+      private val serializer = sparkAdapter.createAvroSerializer(tableSchema.structTypeSchema, tableAvroSchema,
+        resolveAvroSchemaNullability(tableAvroSchema))
+      private val requiredDeserializer = sparkAdapter.createAvroDeserializer(requiredAvroSchema, requiredSchema.structTypeSchema)
       private val recordBuilder = new GenericRecordBuilder(requiredAvroSchema)
-      private val unsafeProjection = UnsafeProjection.create(tableState.requiredStructSchema)
+      private val unsafeProjection = UnsafeProjection.create(requiredSchema.structTypeSchema)
       private var logScanner = HoodieMergeOnReadRDD.scanLog(split, tableAvroSchema, config)
       private val logRecords = logScanner.getRecords
       private val logRecordsKeyIterator = logRecords.keySet().iterator().asScala
       private val keyToSkip = mutable.Set.empty[String]
-      private val recordKeyPosition = if (recordKeyFieldOpt.isEmpty) HOODIE_RECORD_KEY_COL_POS else tableState.tableStructSchema.fieldIndex(recordKeyFieldOpt.get)
+      private val recordKeyPosition = tableSchema.structTypeSchema.fieldIndex(recordKeyField)
 
       private var recordToLoad: InternalRow = _
 
@@ -259,20 +257,15 @@ class HoodieMergeOnReadRDD(@transient sc: SparkContext,
               this.hasNext
             } else {
               // load merged record as InternalRow with required schema
-              val requiredAvroRecord = AvroConversionUtils
-                .buildAvroRecordBySchema(
-                  mergedAvroRecord.get(),
-                  requiredAvroSchema,
-                  requiredFieldPosition,
-                  recordBuilder
-                )
-              recordToLoad = unsafeProjection(requiredDeserializer
-                .deserializeData(requiredAvroRecord).asInstanceOf[InternalRow])
+              val requiredAvroRecord = AvroConversionUtils.buildAvroRecordBySchema(mergedAvroRecord.get(), requiredAvroSchema,
+                requiredFieldPosition, recordBuilder)
+              val rowOpt = requiredDeserializer.deserialize(requiredAvroRecord)
+              recordToLoad = unsafeProjection(rowOpt.get.asInstanceOf[InternalRow])
               true
             }
           } else {
             // No merge needed, load current row with required schema
-            recordToLoad = unsafeProjection(createRowWithRequiredSchema(curRow))
+            recordToLoad = unsafeProjection(createInternalRowWithSchema(curRow, requiredSchema.structTypeSchema, requiredFieldPosition))
             true
           }
         } else {
@@ -281,8 +274,7 @@ class HoodieMergeOnReadRDD(@transient sc: SparkContext,
             if (keyToSkip.contains(curKey)) {
               this.hasNext
             } else {
-              val insertAvroRecord =
-                logRecords.get(curKey).getData.getInsertValue(tableAvroSchema)
+              val insertAvroRecord = logRecords.get(curKey).getData.getInsertValue(tableAvroSchema, payloadProps)
               if (!insertAvroRecord.isPresent) {
                 // stand alone delete record, skipping
                 this.hasNext
@@ -294,8 +286,8 @@ class HoodieMergeOnReadRDD(@transient sc: SparkContext,
                     requiredFieldPosition,
                     recordBuilder
                   )
-                recordToLoad = unsafeProjection(requiredDeserializer
-                  .deserializeData(requiredAvroRecord).asInstanceOf[InternalRow])
+                val rowOpt = requiredDeserializer.deserialize(requiredAvroRecord)
+                recordToLoad = unsafeProjection(rowOpt.get.asInstanceOf[InternalRow])
                 true
               }
             }
@@ -317,29 +309,10 @@ class HoodieMergeOnReadRDD(@transient sc: SparkContext,
         }
       }
 
-      private def createRowWithRequiredSchema(row: InternalRow): InternalRow = {
-        val rowToReturn = new SpecificInternalRow(tableState.requiredStructSchema)
-        val posIterator = requiredFieldPosition.iterator
-        var curIndex = 0
-        tableState.requiredStructSchema.foreach(
-          f => {
-            val curPos = posIterator.next()
-            val curField = if (row.isNullAt(curPos)) null else row.get(curPos, f.dataType)
-            rowToReturn.update(curIndex, curField)
-            curIndex = curIndex + 1
-          }
-        )
-        rowToReturn
-      }
-
       private def mergeRowWithLog(curRow: InternalRow, curKey: String) = {
         val historyAvroRecord = serializer.serialize(curRow).asInstanceOf[GenericRecord]
-        if (payloadProps.isDefined) {
-          logRecords.get(curKey).getData.combineAndGetUpdateValue(historyAvroRecord,
-            tableAvroSchema, payloadProps.get)
-        } else {
-          logRecords.get(curKey).getData.combineAndGetUpdateValue(historyAvroRecord, tableAvroSchema)
-        }
+        logRecords.get(curKey).getData
+          .combineAndGetUpdateValue(historyAvroRecord, tableAvroSchema, payloadProps)
       }
     }
 }
@@ -349,24 +322,60 @@ private object HoodieMergeOnReadRDD {
 
   def scanLog(split: HoodieMergeOnReadFileSplit, logSchema: Schema, config: Configuration): HoodieMergedLogRecordScanner = {
     val fs = FSUtils.getFs(split.tablePath, config)
-    HoodieMergedLogRecordScanner.newBuilder()
-      .withFileSystem(fs)
-      .withBasePath(split.tablePath)
-      .withLogFilePaths(split.logPaths.get.asJava)
-      .withReaderSchema(logSchema)
-      .withLatestInstantTime(split.latestCommit)
-      .withReadBlocksLazily(
-        Try(config.get(HoodieRealtimeConfig.COMPACTION_LAZY_BLOCK_READ_ENABLED_PROP,
-          HoodieRealtimeConfig.DEFAULT_COMPACTION_LAZY_BLOCK_READ_ENABLED).toBoolean)
-          .getOrElse(false))
-      .withReverseReader(false)
-      .withBufferSize(
-        config.getInt(HoodieRealtimeConfig.MAX_DFS_STREAM_BUFFER_SIZE_PROP,
-          HoodieRealtimeConfig.DEFAULT_MAX_DFS_STREAM_BUFFER_SIZE))
-      .withMaxMemorySizeInBytes(split.maxCompactionMemoryInBytes)
-      .withSpillableMapBasePath(
-        config.get(HoodieRealtimeConfig.SPILLABLE_MAP_BASE_PATH_PROP,
-          HoodieRealtimeConfig.DEFAULT_SPILLABLE_MAP_BASE_PATH))
-      .build()
+    val logFiles = split.logFiles.get
+
+    if (HoodieTableMetadata.isMetadataTable(split.tablePath)) {
+      val metadataConfig = HoodieMetadataConfig.newBuilder().enable(true).build()
+      val dataTableBasePath = getDataTableBasePathFromMetadataTable(split.tablePath)
+      val metadataTable = new HoodieBackedTableMetadata(
+        new HoodieLocalEngineContext(config), metadataConfig,
+        dataTableBasePath,
+        config.get(HoodieRealtimeConfig.SPILLABLE_MAP_BASE_PATH_PROP, HoodieRealtimeConfig.DEFAULT_SPILLABLE_MAP_BASE_PATH))
+
+      // NOTE: In case of Metadata Table partition path equates to partition name (since there's just one level
+      //       of indirection among MT partitions)
+      val relativePartitionPath = getRelativePartitionPath(new Path(split.tablePath), getPartitionPath(split))
+      metadataTable.getLogRecordScanner(logFiles.asJava, relativePartitionPath).getLeft
+    } else {
+      val logRecordScannerBuilder = HoodieMergedLogRecordScanner.newBuilder()
+        .withFileSystem(fs)
+        .withBasePath(split.tablePath)
+        .withLogFilePaths(split.logFiles.get.map(logFile => getFilePath(logFile.getPath)).asJava)
+        .withReaderSchema(logSchema)
+        .withLatestInstantTime(split.latestCommit)
+        .withReadBlocksLazily(
+          Try(config.get(HoodieRealtimeConfig.COMPACTION_LAZY_BLOCK_READ_ENABLED_PROP,
+            HoodieRealtimeConfig.DEFAULT_COMPACTION_LAZY_BLOCK_READ_ENABLED).toBoolean)
+            .getOrElse(false))
+        .withReverseReader(false)
+        .withBufferSize(
+          config.getInt(HoodieRealtimeConfig.MAX_DFS_STREAM_BUFFER_SIZE_PROP,
+            HoodieRealtimeConfig.DEFAULT_MAX_DFS_STREAM_BUFFER_SIZE))
+        .withMaxMemorySizeInBytes(split.maxCompactionMemoryInBytes)
+        .withSpillableMapBasePath(
+          config.get(HoodieRealtimeConfig.SPILLABLE_MAP_BASE_PATH_PROP,
+            HoodieRealtimeConfig.DEFAULT_SPILLABLE_MAP_BASE_PATH))
+
+      if (logFiles.nonEmpty) {
+        logRecordScannerBuilder.withPartition(getRelativePartitionPath(new Path(split.tablePath), logFiles.head.getPath.getParent))
+      }
+
+      logRecordScannerBuilder.build()
+    }
+  }
+
+  private def getPartitionPath(split: HoodieMergeOnReadFileSplit): Path = {
+    // Determine partition path as an immediate parent folder of either
+    //    - The base file
+    //    - Some log file
+    split.dataFile.map(baseFile => new Path(baseFile.filePath))
+      .getOrElse(split.logFiles.get.head.getPath)
+      .getParent
+  }
+
+  private def resolveAvroSchemaNullability(schema: Schema) = {
+    AvroConversionUtils.resolveAvroTypeNullability(schema) match {
+      case (nullable, _) => nullable
+    }
   }
 }
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
index 1f2aae4119c55..6b6ddc38e3039 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala
@@ -19,6 +19,7 @@ package org.apache.hudi
 
 import org.apache.avro.Schema
 import org.apache.avro.generic.GenericRecord
+import org.apache.avro.reflect.AvroSchema
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.hadoop.hive.conf.HiveConf
@@ -28,7 +29,7 @@ import org.apache.hudi.avro.HoodieAvroUtils
 import org.apache.hudi.client.{HoodieWriteResult, SparkRDDWriteClient}
 import org.apache.hudi.common.config.{HoodieConfig, HoodieMetadataConfig, TypedProperties}
 import org.apache.hudi.common.fs.FSUtils
-import org.apache.hudi.common.model.{HoodieRecordPayload, HoodieTableType, HoodieTimelineTimeZone, WriteOperationType}
+import org.apache.hudi.common.model._
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline
 import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient, TableSchemaResolver}
 import org.apache.hudi.common.util.{CommitUtils, ReflectionUtils, StringUtils}
@@ -39,19 +40,21 @@ import org.apache.hudi.execution.bulkinsert.{BulkInsertInternalPartitionerWithRo
 import org.apache.hudi.hive.{HiveSyncConfig, HiveSyncTool}
 import org.apache.hudi.index.SparkHoodieIndexFactory
 import org.apache.hudi.internal.DataSourceInternalWriterHelper
+import org.apache.hudi.keygen.{TimestampBasedAvroKeyGenerator, TimestampBasedKeyGenerator}
 import org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory
 import org.apache.hudi.sync.common.AbstractSyncTool
 import org.apache.hudi.table.BulkInsertPartitioner
 import org.apache.log4j.LogManager
+import org.apache.spark.SPARK_VERSION
 import org.apache.spark.api.java.JavaSparkContext
+import org.apache.spark.sql.hive.HiveExternalCatalog
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
 import org.apache.spark.sql.types.StructType
-import org.apache.spark.sql.{DataFrame, Dataset, Row, SQLContext, SaveMode, SparkSession}
+import org.apache.spark.sql._
 import org.apache.spark.SparkContext
 
 import java.util.Properties
-
 import scala.collection.JavaConversions._
 import scala.collection.mutable
 import scala.collection.mutable.ListBuffer
@@ -85,6 +88,9 @@ object HoodieSparkSqlWriter {
     validateTableConfig(sqlContext.sparkSession, optParams, tableConfig)
 
     val (parameters, hoodieConfig) = mergeParamsAndGetHoodieConfig(optParams, tableConfig)
+    val originKeyGeneratorClassName = HoodieWriterUtils.getOriginKeyGenerator(parameters)
+    val timestampKeyGeneratorConfigs = extractConfigsRelatedToTimestmapBasedKeyGenerator(
+      originKeyGeneratorClassName, parameters)
     val databaseName = hoodieConfig.getStringOrDefault(HoodieTableConfig.DATABASE_NAME, "")
     val tblName = hoodieConfig.getStringOrThrow(HoodieWriteConfig.TBL_NAME,
       s"'${HoodieWriteConfig.TBL_NAME.key}' must be set.").trim
@@ -113,6 +119,11 @@ object HoodieSparkSqlWriter {
     }
 
     val jsc = new JavaSparkContext(sparkContext)
+    if (asyncCompactionTriggerFn.isDefined) {
+      if (jsc.getConf.getOption(SparkConfigs.SPARK_SCHEDULER_ALLOCATION_FILE_KEY).isDefined) {
+        jsc.setLocalProperty("spark.scheduler.pool", SparkConfigs.SPARK_DATASOURCE_WRITER_POOL_NAME)
+      }
+    }
     val instantTime = HoodieActiveTimeline.createNewInstantTime()
     val keyGenerator = HoodieSparkKeyGeneratorFactory.createKeyGenerator(new TypedProperties(hoodieConfig.getProps))
 
@@ -142,7 +153,8 @@ object HoodieSparkSqlWriter {
           .setPartitionFields(partitionColumns)
           .setPopulateMetaFields(populateMetaFields)
           .setRecordKeyFields(hoodieConfig.getString(RECORDKEY_FIELD))
-          .setKeyGeneratorClassProp(HoodieWriterUtils.getOriginKeyGenerator(parameters))
+          .setKeyGeneratorClassProp(originKeyGeneratorClassName)
+          .set(timestampKeyGeneratorConfigs)
           .setHiveStylePartitioningEnable(hoodieConfig.getBoolean(HIVE_STYLE_PARTITIONING))
           .setUrlEncodePartitioning(hoodieConfig.getBoolean(URL_ENCODE_PARTITIONING))
           .setCommitTimezone(HoodieTimelineTimeZone.valueOf(hoodieConfig.getStringOrDefault(HoodieTableConfig.TIMELINE_TIMEZONE)))
@@ -226,6 +238,7 @@ object HoodieSparkSqlWriter {
             if (reconcileSchema) {
               schema = getLatestTableSchema(fs, basePath, sparkContext, schema)
             }
+            validateSchemaForHoodieIsDeleted(schema)
             sparkContext.getConf.registerAvroSchemas(schema)
             log.info(s"Registered avro schema : ${schema.toString(true)}")
 
@@ -244,7 +257,8 @@ object HoodieSparkSqlWriter {
                   DataSourceWriteOptions.KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED.defaultValue()).toBoolean)
                   .asInstanceOf[Comparable[_]]
                 DataSourceUtils.createHoodieRecord(processedRecord,
-                  orderingVal, keyGenerator.getKey(gr),
+                  orderingVal,
+                  keyGenerator.getKey(gr),
                   hoodieConfig.getString(PAYLOAD_CLASS_NAME))
               } else {
                 DataSourceUtils.createHoodieRecord(processedRecord, keyGenerator.getKey(gr), hoodieConfig.getString(PAYLOAD_CLASS_NAME))
@@ -366,51 +380,62 @@ object HoodieSparkSqlWriter {
       schema = HoodieAvroUtils.getNullSchema.toString
     }
 
-    // Handle various save modes
     if (mode == SaveMode.Ignore && tableExists) {
       log.warn(s"hoodie table at $basePath already exists. Ignoring & not performing actual writes.")
+      if (!hoodieWriteClient.isEmpty) {
+        hoodieWriteClient.get.close()
+      }
       false
     } else {
+      // Handle various save modes
       handleSaveModes(sqlContext.sparkSession, mode, basePath, tableConfig, tableName, WriteOperationType.BOOTSTRAP, fs)
-    }
 
-    if (!tableExists) {
-      val archiveLogFolder = hoodieConfig.getStringOrDefault(HoodieTableConfig.ARCHIVELOG_FOLDER)
-      val partitionColumns = HoodieWriterUtils.getPartitionColumns(parameters)
-      val recordKeyFields = hoodieConfig.getString(DataSourceWriteOptions.RECORDKEY_FIELD)
-      val keyGenProp = hoodieConfig.getString(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME)
-      val populateMetaFields = parameters.getOrElse(HoodieTableConfig.POPULATE_META_FIELDS.key(), HoodieTableConfig.POPULATE_META_FIELDS.defaultValue()).toBoolean
-      val baseFileFormat = hoodieConfig.getStringOrDefault(HoodieTableConfig.BASE_FILE_FORMAT)
-
-      HoodieTableMetaClient.withPropertyBuilder()
-        .setTableType(HoodieTableType.valueOf(tableType))
-        .setTableName(tableName)
-        .setRecordKeyFields(recordKeyFields)
-        .setArchiveLogFolder(archiveLogFolder)
-        .setPayloadClassName(hoodieConfig.getStringOrDefault(PAYLOAD_CLASS_NAME))
-        .setPreCombineField(hoodieConfig.getStringOrDefault(PRECOMBINE_FIELD, null))
-        .setBootstrapIndexClass(bootstrapIndexClass)
-        .setBaseFileFormat(baseFileFormat)
-        .setBootstrapBasePath(bootstrapBasePath)
-        .setPartitionFields(partitionColumns)
-        .setPopulateMetaFields(populateMetaFields)
-        .setKeyGeneratorClassProp(keyGenProp)
-        .setHiveStylePartitioningEnable(hoodieConfig.getBoolean(HIVE_STYLE_PARTITIONING))
-        .setUrlEncodePartitioning(hoodieConfig.getBoolean(URL_ENCODE_PARTITIONING))
-        .setCommitTimezone(HoodieTimelineTimeZone.valueOf(hoodieConfig.getStringOrDefault(HoodieTableConfig.TIMELINE_TIMEZONE)))
-        .initTable(sparkContext.hadoopConfiguration, path)
+      if (!tableExists) {
+        val archiveLogFolder = hoodieConfig.getStringOrDefault(HoodieTableConfig.ARCHIVELOG_FOLDER)
+        val partitionColumns = HoodieWriterUtils.getPartitionColumns(parameters)
+        val recordKeyFields = hoodieConfig.getString(DataSourceWriteOptions.RECORDKEY_FIELD)
+        val keyGenProp = hoodieConfig.getString(HoodieTableConfig.KEY_GENERATOR_CLASS_NAME)
+        val populateMetaFields = parameters.getOrElse(HoodieTableConfig.POPULATE_META_FIELDS.key(), HoodieTableConfig.POPULATE_META_FIELDS.defaultValue()).toBoolean
+        val baseFileFormat = hoodieConfig.getStringOrDefault(HoodieTableConfig.BASE_FILE_FORMAT)
+
+        HoodieTableMetaClient.withPropertyBuilder()
+          .setTableType(HoodieTableType.valueOf(tableType))
+          .setTableName(tableName)
+          .setRecordKeyFields(recordKeyFields)
+          .setArchiveLogFolder(archiveLogFolder)
+          .setPayloadClassName(hoodieConfig.getStringOrDefault(PAYLOAD_CLASS_NAME))
+          .setPreCombineField(hoodieConfig.getStringOrDefault(PRECOMBINE_FIELD, null))
+          .setBootstrapIndexClass(bootstrapIndexClass)
+          .setBaseFileFormat(baseFileFormat)
+          .setBootstrapBasePath(bootstrapBasePath)
+          .setPartitionFields(partitionColumns)
+          .setPopulateMetaFields(populateMetaFields)
+          .setKeyGeneratorClassProp(keyGenProp)
+          .setHiveStylePartitioningEnable(hoodieConfig.getBoolean(HIVE_STYLE_PARTITIONING))
+          .setUrlEncodePartitioning(hoodieConfig.getBoolean(URL_ENCODE_PARTITIONING))
+          .setCommitTimezone(HoodieTimelineTimeZone.valueOf(hoodieConfig.getStringOrDefault(HoodieTableConfig.TIMELINE_TIMEZONE)))
+          .initTable(sparkContext.hadoopConfiguration, path)
+      }
+
+      val jsc = new JavaSparkContext(sqlContext.sparkContext)
+      val writeClient = hoodieWriteClient.getOrElse(DataSourceUtils.createHoodieClient(jsc,
+        schema, path, tableName, mapAsJavaMap(parameters)))
+      try {
+        writeClient.bootstrap(org.apache.hudi.common.util.Option.empty())
+      } finally {
+        writeClient.close()
+      }
+      val metaSyncSuccess = metaSync(sqlContext.sparkSession, hoodieConfig, basePath, df.schema)
+      metaSyncSuccess
     }
+  }
 
-    val jsc = new JavaSparkContext(sqlContext.sparkContext)
-    val writeClient = hoodieWriteClient.getOrElse(DataSourceUtils.createHoodieClient(jsc,
-      schema, path, tableName, mapAsJavaMap(parameters)))
-    try {
-      writeClient.bootstrap(org.apache.hudi.common.util.Option.empty())
-    } finally {
-      writeClient.close()
+  def validateSchemaForHoodieIsDeleted(schema: Schema): Unit = {
+    if (schema.getField(HoodieRecord.HOODIE_IS_DELETED) != null &&
+      AvroConversionUtils.resolveAvroTypeNullability(schema.getField(HoodieRecord.HOODIE_IS_DELETED).schema())._2.getType != Schema.Type.BOOLEAN) {
+      throw new HoodieException(HoodieRecord.HOODIE_IS_DELETED + " has to be BOOLEAN type. Passed in dataframe's schema has type "
+        + schema.getField(HoodieRecord.HOODIE_IS_DELETED).schema().getType)
     }
-    val metaSyncSuccess = metaSync(sqlContext.sparkSession, hoodieConfig, basePath, df.schema)
-    metaSyncSuccess
   }
 
   def bulkInsertAsRow(sqlContext: SQLContext,
@@ -435,6 +460,7 @@ object HoodieSparkSqlWriter {
     if (dropPartitionColumns) {
       schema = generateSchemaWithoutPartitionColumns(partitionColumns, schema)
     }
+    validateSchemaForHoodieIsDeleted(schema)
     sparkContext.getConf.registerAvroSchemas(schema)
     log.info(s"Registered avro schema : ${schema.toString(true)}")
     if (parameters(INSERT_DROP_DUPS.key).toBoolean) {
@@ -535,6 +561,9 @@ object HoodieSparkSqlWriter {
     val hiveSyncConfig: HiveSyncConfig = buildSyncConfig(basePath, hoodieConfig, sqlConf)
     val hiveConf: HiveConf = new HiveConf()
     hiveConf.addResource(fs.getConf)
+    if (StringUtils.isNullOrEmpty(hiveConf.get(HiveConf.ConfVars.METASTOREURIS.varname))) {
+      hiveConf.set(HiveConf.ConfVars.METASTOREURIS.varname, hiveSyncConfig.metastoreUris)
+    }
     new HiveSyncTool(hiveSyncConfig, hiveConf, fs).syncHoodieTable()
     true
   }
@@ -550,6 +579,7 @@ object HoodieSparkSqlWriter {
     hiveSyncConfig.hiveUser = hoodieConfig.getString(HIVE_USER)
     hiveSyncConfig.hivePass = hoodieConfig.getString(HIVE_PASS)
     hiveSyncConfig.jdbcUrl = hoodieConfig.getString(HIVE_URL)
+    hiveSyncConfig.metastoreUris = hoodieConfig.getStringOrDefault(METASTORE_URIS)
     hiveSyncConfig.skipROSuffix = hoodieConfig.getStringOrDefault(HIVE_SKIP_RO_SUFFIX_FOR_READ_OPTIMIZED_TABLE,
       DataSourceWriteOptions.HIVE_SKIP_RO_SUFFIX_FOR_READ_OPTIMIZED_TABLE.defaultValue).toBoolean
     hiveSyncConfig.partitionFields =
@@ -569,6 +599,7 @@ object HoodieSparkSqlWriter {
     hiveSyncConfig.syncMode = hoodieConfig.getString(HIVE_SYNC_MODE)
     hiveSyncConfig.serdeProperties = hoodieConfig.getString(HIVE_TABLE_SERDE_PROPERTIES)
     hiveSyncConfig.tableProperties = hoodieConfig.getString(HIVE_TABLE_PROPERTIES)
+    hiveSyncConfig.sparkVersion = SPARK_VERSION
     hiveSyncConfig
   }
 
@@ -730,7 +761,22 @@ object HoodieSparkSqlWriter {
         mergedParams(key) = value
       }
     }
+
+    // use preCombineField to fill in PAYLOAD_ORDERING_FIELD_PROP_KEY
+    if (mergedParams.contains(PRECOMBINE_FIELD.key())) {
+      mergedParams.put(HoodiePayloadProps.PAYLOAD_ORDERING_FIELD_PROP_KEY, mergedParams(PRECOMBINE_FIELD.key()))
+    }
     val params = mergedParams.toMap
     (params, HoodieWriterUtils.convertMapToHoodieConfig(params))
   }
+
+  private def extractConfigsRelatedToTimestmapBasedKeyGenerator(keyGenerator: String,
+      params: Map[String, String]): Map[String, String] = {
+    if (keyGenerator.equals(classOf[TimestampBasedKeyGenerator].getCanonicalName) ||
+        keyGenerator.equals(classOf[TimestampBasedAvroKeyGenerator].getCanonicalName)) {
+      params.filterKeys(HoodieTableConfig.PERSISTED_CONFIG_LIST.contains)
+    } else {
+      Map.empty
+    }
+  }
 }
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala
index 9a940ebcebf02..8a4ad9d85d72d 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala
@@ -72,6 +72,7 @@ object HoodieWriterUtils {
     hoodieConfig.setDefaultValue(HIVE_USER)
     hoodieConfig.setDefaultValue(HIVE_PASS)
     hoodieConfig.setDefaultValue(HIVE_URL)
+    hoodieConfig.setDefaultValue(METASTORE_URIS)
     hoodieConfig.setDefaultValue(HIVE_PARTITION_FIELDS)
     hoodieConfig.setDefaultValue(HIVE_PARTITION_EXTRACTOR_CLASS)
     hoodieConfig.setDefaultValue(HIVE_STYLE_PARTITIONING)
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/IncrementalRelation.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/IncrementalRelation.scala
index 19071080312bc..9247973e78fc0 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/IncrementalRelation.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/IncrementalRelation.scala
@@ -18,16 +18,17 @@
 package org.apache.hudi
 
 import org.apache.avro.Schema
+import org.apache.hudi.common.model.{HoodieCommitMetadata, HoodieRecord, HoodieReplaceCommitMetadata}
+import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
 
 import java.util.stream.Collectors
-import org.apache.hudi.common.model.{HoodieCommitMetadata, HoodieRecord, HoodieReplaceCommitMetadata, HoodieTableType}
-import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
+import org.apache.hadoop.fs.{GlobPattern, Path}
+import org.apache.hudi.client.common.HoodieSparkEngineContext
+import org.apache.hudi.common.fs.FSUtils
 import org.apache.hudi.common.table.timeline.{HoodieInstant, HoodieTimeline}
+import org.apache.hudi.common.util.HoodieTimer
 import org.apache.hudi.config.HoodieWriteConfig
 import org.apache.hudi.exception.HoodieException
-import org.apache.hadoop.fs.GlobPattern
-import org.apache.hudi.client.common.HoodieSparkEngineContext
-import org.apache.hudi.common.fs.FSUtils
 import org.apache.hudi.table.HoodieSparkTable
 import org.apache.log4j.LogManager
 import org.apache.spark.api.java.JavaSparkContext
@@ -40,14 +41,14 @@ import scala.collection.JavaConversions._
 import scala.collection.mutable
 
 /**
-  * Relation, that implements the Hoodie incremental view.
-  *
-  * Implemented for Copy_on_write storage.
-  *
-  */
+ * Relation, that implements the Hoodie incremental view.
+ *
+ * Implemented for Copy_on_write storage.
+ *
+ */
 class IncrementalRelation(val sqlContext: SQLContext,
                           val optParams: Map[String, String],
-                          val userSchema: StructType,
+                          val userSchema: Option[StructType],
                           val metaClient: HoodieTableMetaClient) extends BaseRelation with TableScan {
 
   private val log = LogManager.getLogger(classOf[IncrementalRelation])
@@ -85,7 +86,7 @@ class IncrementalRelation(val sqlContext: SQLContext,
     log.info("Inferring schema..")
     val schemaResolver = new TableSchemaResolver(metaClient)
     val tableSchema = if (useEndInstantSchema) {
-      if (commitsToReturn.isEmpty)  schemaResolver.getTableAvroSchemaWithoutMetadataFields() else
+      if (commitsToReturn.isEmpty) schemaResolver.getTableAvroSchemaWithoutMetadataFields() else
         schemaResolver.getTableAvroSchemaWithoutMetadataFields(commitsToReturn.last)
     } else {
       schemaResolver.getTableAvroSchemaWithoutMetadataFields()
@@ -165,26 +166,63 @@ class IncrementalRelation(val sqlContext: SQLContext,
       if (filteredRegularFullPaths.isEmpty && filteredMetaBootstrapFullPaths.isEmpty) {
         sqlContext.sparkContext.emptyRDD[Row]
       } else {
-        log.info("Additional Filters to be applied to incremental source are :" + filters)
+        log.info("Additional Filters to be applied to incremental source are :" + filters.mkString("Array(", ", ", ")"))
 
         var df: DataFrame = sqlContext.createDataFrame(sqlContext.sparkContext.emptyRDD[Row], usedSchema)
 
-        if (metaBootstrapFileIdToFullPath.nonEmpty) {
-          df = sqlContext.sparkSession.read
-            .format("hudi")
-            .schema(usedSchema)
-            .option(DataSourceReadOptions.READ_PATHS.key, filteredMetaBootstrapFullPaths.mkString(","))
-            .load()
+        val fallbackToFullTableScan = optParams.getOrElse(DataSourceReadOptions.INCREMENTAL_FALLBACK_TO_FULL_TABLE_SCAN_FOR_NON_EXISTING_FILES.key,
+          DataSourceReadOptions.INCREMENTAL_FALLBACK_TO_FULL_TABLE_SCAN_FOR_NON_EXISTING_FILES.defaultValue).toBoolean
+
+        var doFullTableScan = false
+
+        if (fallbackToFullTableScan) {
+          val fs = new Path(basePath).getFileSystem(sqlContext.sparkContext.hadoopConfiguration);
+          val timer = new HoodieTimer().startTimer();
+
+          val allFilesToCheck = filteredMetaBootstrapFullPaths ++ filteredRegularFullPaths
+          val firstNotFoundPath = allFilesToCheck.find(path => !fs.exists(new Path(path)))
+          val timeTaken = timer.endTimer()
+          log.info("Checking if paths exists took " + timeTaken + "ms")
+
+          val optStartTs = optParams(DataSourceReadOptions.BEGIN_INSTANTTIME.key)
+          val isInstantArchived = optStartTs.compareTo(commitTimeline.firstInstant().get().getTimestamp) < 0 // True if optStartTs < activeTimeline.first
+
+          if (isInstantArchived || firstNotFoundPath.isDefined) {
+            doFullTableScan = true
+            log.info("Falling back to full table scan")
+          }
         }
 
-        if (regularFileIdToFullPath.nonEmpty) {
-          df = df.union(sqlContext.read.options(sOpts)
+        if (doFullTableScan) {
+          val hudiDF = sqlContext.read
+            .format("hudi_v1")
             .schema(usedSchema)
-            .parquet(filteredRegularFullPaths.toList: _*)
-            .filter(String.format("%s >= '%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD,
-              commitsToReturn.head.getTimestamp))
+            .load(basePath)
+            .filter(String.format("%s > '%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD, //Notice the > in place of >= because we are working with optParam instead of first commit > optParam
+              optParams(DataSourceReadOptions.BEGIN_INSTANTTIME.key)))
             .filter(String.format("%s <= '%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD,
-              commitsToReturn.last.getTimestamp)))
+              commitsToReturn.last.getTimestamp))
+          // schema enforcement does not happen in above spark.read with hudi. hence selecting explicitly w/ right column order
+          val fieldNames : Array[String] = df.schema.fields.map(field => field.name)
+          df = df.union(hudiDF.select(fieldNames.head, fieldNames.tail: _*))
+        } else {
+          if (metaBootstrapFileIdToFullPath.nonEmpty) {
+            df = sqlContext.sparkSession.read
+              .format("hudi_v1")
+              .schema(usedSchema)
+              .option(DataSourceReadOptions.READ_PATHS.key, filteredMetaBootstrapFullPaths.mkString(","))
+              .load()
+          }
+
+          if (regularFileIdToFullPath.nonEmpty) {
+            df = df.union(sqlContext.read.options(sOpts)
+              .schema(usedSchema)
+              .parquet(filteredRegularFullPaths.toList: _*)
+              .filter(String.format("%s >= '%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD,
+                commitsToReturn.head.getTimestamp))
+              .filter(String.format("%s <= '%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD,
+                commitsToReturn.last.getTimestamp)))
+          }
         }
 
         filters.foldLeft(df)((e, f) => e.filter(f)).rdd
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala
index bc83a85415de2..b9d18c68d3d60 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala
@@ -17,21 +17,19 @@
 
 package org.apache.hudi
 
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{GlobPattern, Path}
+import org.apache.hadoop.mapred.JobConf
+import org.apache.hudi.HoodieBaseRelation.createBaseFileReader
 import org.apache.hudi.common.model.HoodieRecord
+import org.apache.hudi.common.table.HoodieTableMetaClient
 import org.apache.hudi.common.table.view.HoodieTableFileSystemView
-import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
 import org.apache.hudi.exception.HoodieException
-import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils.listAffectedFilesForCommits
-import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils.getCommitMetadata
-import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils.getWritePartitionPaths
+import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils.{getCommitMetadata, getWritePartitionPaths, listAffectedFilesForCommits}
 import org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils.getMaxCompactionMemoryInBytes
-import org.apache.hadoop.fs.{GlobPattern, Path}
-import org.apache.hadoop.mapred.JobConf
-import org.apache.log4j.LogManager
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.datasources.PartitionedFile
-import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.{Row, SQLContext}
@@ -39,19 +37,19 @@ import org.apache.spark.sql.{Row, SQLContext}
 import scala.collection.JavaConversions._
 
 /**
-  * Experimental.
-  * Relation, that implements the Hoodie incremental view for Merge On Read table.
-  *
-  */
-class MergeOnReadIncrementalRelation(val sqlContext: SQLContext,
+ * Experimental.
+ * Relation, that implements the Hoodie incremental view for Merge On Read table.
+ *
+ */
+class MergeOnReadIncrementalRelation(sqlContext: SQLContext,
                                      val optParams: Map[String, String],
-                                     val userSchema: StructType,
+                                     val userSchema: Option[StructType],
                                      val metaClient: HoodieTableMetaClient)
-  extends BaseRelation with PrunedFilteredScan {
+  extends HoodieBaseRelation(sqlContext, metaClient, optParams, userSchema) {
 
-  private val log = LogManager.getLogger(classOf[MergeOnReadIncrementalRelation])
-  private val conf = sqlContext.sparkContext.hadoopConfiguration
+  private val conf = new Configuration(sqlContext.sparkContext.hadoopConfiguration)
   private val jobConf = new JobConf(conf)
+
   private val commitTimeline = metaClient.getCommitsAndCompactionTimeline.filterCompletedInstants()
   if (commitTimeline.empty()) {
     throw new HoodieException("No instants to incrementally pull")
@@ -72,91 +70,96 @@ class MergeOnReadIncrementalRelation(val sqlContext: SQLContext,
   private val commitsTimelineToReturn = commitTimeline.findInstantsInRange(
     optParams(DataSourceReadOptions.BEGIN_INSTANTTIME.key),
     optParams.getOrElse(DataSourceReadOptions.END_INSTANTTIME.key, lastInstant.getTimestamp))
-  log.debug(s"${commitsTimelineToReturn.getInstants.iterator().toList.map(f => f.toString).mkString(",")}")
+  logDebug(s"${commitsTimelineToReturn.getInstants.iterator().toList.map(f => f.toString).mkString(",")}")
   private val commitsToReturn = commitsTimelineToReturn.getInstants.iterator().toList
-  private val schemaUtil = new TableSchemaResolver(metaClient)
-  private val tableAvroSchema = schemaUtil.getTableAvroSchema
-  private val tableStructSchema = AvroConversionUtils.convertAvroSchemaToStructType(tableAvroSchema)
+
   private val maxCompactionMemoryInBytes = getMaxCompactionMemoryInBytes(jobConf)
+
   private val fileIndex = if (commitsToReturn.isEmpty) List() else buildFileIndex()
-  private val preCombineField = {
-    val preCombineFieldFromTableConfig = metaClient.getTableConfig.getPreCombineField
-    if (preCombineFieldFromTableConfig != null) {
-      Some(preCombineFieldFromTableConfig)
-    } else {
-      // get preCombineFiled from the options if this is a old table which have not store
-      // the field to hoodie.properties
-      optParams.get(DataSourceReadOptions.READ_PRE_COMBINE_FIELD.key)
-    }
-  }
-  override def schema: StructType = tableStructSchema
 
-  override def needConversion: Boolean = false
+  private val preCombineFieldOpt = getPrecombineFieldProperty
 
-  override def unhandledFilters(filters: Array[Filter]): Array[Filter] = {
-    if (fileIndex.isEmpty) {
-      filters
-    } else {
-      val isNotNullFilter = IsNotNull(HoodieRecord.COMMIT_TIME_METADATA_FIELD)
-      val largerThanFilter = GreaterThanOrEqual(HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitsToReturn.head.getTimestamp)
-      val lessThanFilter = LessThanOrEqual(HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitsToReturn.last.getTimestamp)
-      filters :+ isNotNullFilter :+ largerThanFilter :+ lessThanFilter
-    }
+  // Record filters making sure that only records w/in the requested bounds are being fetched as part of the
+  // scan collected by this relation
+  private lazy val incrementalSpanRecordsFilters: Seq[Filter] = {
+    val isNotNullFilter = IsNotNull(HoodieRecord.COMMIT_TIME_METADATA_FIELD)
+    val largerThanFilter = GreaterThanOrEqual(HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitsToReturn.head.getTimestamp)
+    val lessThanFilter = LessThanOrEqual(HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitsToReturn.last.getTimestamp)
+    Seq(isNotNullFilter, largerThanFilter, lessThanFilter)
+  }
+
+  private lazy val mandatoryColumns = {
+    // NOTE: This columns are required for Incremental flow to be able to handle the rows properly, even in
+    //       cases when no columns are requested to be fetched (for ex, when using {@code count()} API)
+    Seq(HoodieRecord.RECORD_KEY_METADATA_FIELD, HoodieRecord.COMMIT_TIME_METADATA_FIELD) ++
+      preCombineFieldOpt.map(Seq(_)).getOrElse(Seq())
   }
 
+  override def needConversion: Boolean = false
+
   override def buildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[Row] = {
     if (fileIndex.isEmpty) {
       sqlContext.sparkContext.emptyRDD[Row]
     } else {
-      log.debug(s"buildScan requiredColumns = ${requiredColumns.mkString(",")}")
-      log.debug(s"buildScan filters = ${filters.mkString(",")}")
+      logDebug(s"buildScan requiredColumns = ${requiredColumns.mkString(",")}")
+      logDebug(s"buildScan filters = ${filters.mkString(",")}")
+
       // config to ensure the push down filter for parquet will be applied.
       sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.filterPushdown", "true")
       sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.recordLevelFilter.enabled", "true")
       sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.enableVectorizedReader", "false")
-      val pushDownFilter = {
-        val isNotNullFilter = IsNotNull(HoodieRecord.COMMIT_TIME_METADATA_FIELD)
-        val largerThanFilter = GreaterThanOrEqual(HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitsToReturn.head.getTimestamp)
-        val lessThanFilter = LessThanOrEqual(HoodieRecord.COMMIT_TIME_METADATA_FIELD, commitsToReturn.last.getTimestamp)
-        filters :+ isNotNullFilter :+ largerThanFilter :+ lessThanFilter
-      }
+
+      val fetchedColumns: Array[String] = appendMandatoryColumns(requiredColumns)
+
       val (requiredAvroSchema, requiredStructSchema) =
-        MergeOnReadSnapshotRelation.getRequiredSchema(tableAvroSchema, requiredColumns)
-
-      val hoodieTableState = HoodieMergeOnReadTableState(
-        tableStructSchema,
-        requiredStructSchema,
-        tableAvroSchema.toString,
-        requiredAvroSchema.toString,
-        fileIndex,
-        preCombineField,
-        Option.empty
-      )
-      val fullSchemaParquetReader = new ParquetFileFormat().buildReaderWithPartitionValues(
-        sparkSession = sqlContext.sparkSession,
-        dataSchema = tableStructSchema,
-        partitionSchema = StructType(Nil),
-        requiredSchema = tableStructSchema,
-        filters = pushDownFilter,
+        HoodieSparkUtils.getRequiredSchema(tableAvroSchema, fetchedColumns)
+
+      val partitionSchema = StructType(Nil)
+      val tableSchema = HoodieTableSchema(tableStructSchema, tableAvroSchema.toString)
+      val requiredSchema = HoodieTableSchema(requiredStructSchema, requiredAvroSchema.toString)
+
+      val fullSchemaParquetReader = createBaseFileReader(
+        spark = sqlContext.sparkSession,
+        partitionSchema = partitionSchema,
+        tableSchema = tableSchema,
+        requiredSchema = tableSchema,
+        // This file-reader is used to read base file records, subsequently merging them with the records
+        // stored in delta-log files. As such, we have to read _all_ records from the base file, while avoiding
+        // applying any user-defined filtering _before_ we complete combining them w/ delta-log records (to make sure that
+        // we combine them correctly)
+        //
+        // The only filtering applicable here is the filtering to make sure we're only fetching records that
+        // fall into incremental span of the timeline being queried
+        filters = incrementalSpanRecordsFilters,
         options = optParams,
-        hadoopConf = sqlContext.sparkSession.sessionState.newHadoopConf()
+        // NOTE: We have to fork the Hadoop Config here as Spark will be modifying it
+        //       to configure Parquet reader appropriately
+        hadoopConf = new Configuration(conf)
       )
-      val requiredSchemaParquetReader = new ParquetFileFormat().buildReaderWithPartitionValues(
-        sparkSession = sqlContext.sparkSession,
-        dataSchema = tableStructSchema,
-        partitionSchema = StructType(Nil),
-        requiredSchema = requiredStructSchema,
-        filters = pushDownFilter,
+      val requiredSchemaParquetReader = createBaseFileReader(
+        spark = sqlContext.sparkSession,
+        partitionSchema = partitionSchema,
+        tableSchema = tableSchema,
+        requiredSchema = requiredSchema,
+        filters = filters ++ incrementalSpanRecordsFilters,
         options = optParams,
-        hadoopConf = sqlContext.sparkSession.sessionState.newHadoopConf()
+        // NOTE: We have to fork the Hadoop Config here as Spark will be modifying it
+        //       to configure Parquet reader appropriately
+        hadoopConf = new Configuration(conf)
       )
 
+      val hoodieTableState = HoodieMergeOnReadTableState(fileIndex, HoodieRecord.RECORD_KEY_METADATA_FIELD, preCombineFieldOpt)
+
+      // TODO implement incremental span record filtering w/in RDD to make sure returned iterator is appropriately
+      //      filtered, since file-reader might not be capable to perform filtering
       val rdd = new HoodieMergeOnReadRDD(
         sqlContext.sparkContext,
         jobConf,
         fullSchemaParquetReader,
         requiredSchemaParquetReader,
-        hoodieTableState
+        hoodieTableState,
+        tableSchema,
+        requiredSchema
       )
       rdd.asInstanceOf[RDD[Row]]
     }
@@ -164,7 +167,7 @@ class MergeOnReadIncrementalRelation(val sqlContext: SQLContext,
 
   def buildFileIndex(): List[HoodieMergeOnReadFileSplit] = {
     val metadataList = commitsToReturn.map(instant => getCommitMetadata(instant, commitsTimelineToReturn))
-    val affectedFileStatus = listAffectedFilesForCommits(new Path(metaClient.getBasePath), metadataList)
+    val affectedFileStatus = listAffectedFilesForCommits(conf, new Path(metaClient.getBasePath), metadataList)
     val fsView = new HoodieTableFileSystemView(metaClient, commitsTimelineToReturn, affectedFileStatus)
 
     // Iterate partitions to create splits
@@ -173,7 +176,7 @@ class MergeOnReadIncrementalRelation(val sqlContext: SQLContext,
     ).toList
     val latestCommit = fsView.getLastInstant.get.getTimestamp
     if (log.isDebugEnabled) {
-      fileGroups.foreach(f => log.debug(s"current file group id: " +
+      fileGroups.foreach(f => logDebug(s"current file group id: " +
         s"${f.getFileGroupId} and file slices ${f.getLatestFileSlice.get.toString}"))
     }
 
@@ -210,10 +213,9 @@ class MergeOnReadIncrementalRelation(val sqlContext: SQLContext,
       }
 
       val logPath = if (f.getLatestFileSlice.isPresent) {
-        //If log path doesn't exist, we still include an empty path to avoid using
+        // If log path doesn't exist, we still include an empty path to avoid using
         // the default parquet reader to ensure the push down filter will be applied.
-        Option(f.getLatestFileSlice.get().getLogFiles.iterator().toList
-          .map(logfile => logfile.getPath.toString))
+        Option(f.getLatestFileSlice.get().getLogFiles.iterator().toList)
       }
       else {
         Option.empty
@@ -223,4 +225,9 @@ class MergeOnReadIncrementalRelation(val sqlContext: SQLContext,
         latestCommit, metaClient.getBasePath, maxCompactionMemoryInBytes, mergeType)
     })
   }
+
+  private def appendMandatoryColumns(requestedColumns: Array[String]): Array[String] = {
+    val missing = mandatoryColumns.filter(col => !requestedColumns.contains(col))
+    requestedColumns ++ missing
+  }
 }
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/MergeOnReadSnapshotRelation.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/MergeOnReadSnapshotRelation.scala
index c4d670bb62f8a..7c1a3540c814e 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/MergeOnReadSnapshotRelation.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/MergeOnReadSnapshotRelation.scala
@@ -18,84 +18,70 @@
 
 package org.apache.hudi
 
-import org.apache.avro.Schema
-import org.apache.hudi.common.model.HoodieLogFile
-import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.mapred.JobConf
+import org.apache.hudi.HoodieBaseRelation.{createBaseFileReader, isMetadataTable}
+import org.apache.hudi.common.model.{HoodieLogFile, HoodieRecord}
+import org.apache.hudi.common.table.HoodieTableMetaClient
 import org.apache.hudi.common.table.view.HoodieTableFileSystemView
 import org.apache.hudi.hadoop.utils.HoodieRealtimeInputFormatUtils
 import org.apache.hudi.hadoop.utils.HoodieRealtimeRecordReaderUtils.getMaxCompactionMemoryInBytes
-import org.apache.hadoop.fs.Path
-import org.apache.hadoop.mapred.JobConf
-import org.apache.spark.internal.Logging
+import org.apache.hudi.metadata.HoodieMetadataPayload
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.avro.SchemaConverters
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.execution.datasources.{FileStatusCache, PartitionedFile}
-import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
 import org.apache.spark.sql.hudi.HoodieSqlCommonUtils
-import org.apache.spark.sql.{Row, SQLContext}
-import org.apache.spark.sql.sources.{BaseRelation, Filter, PrunedFilteredScan}
+import org.apache.spark.sql.sources.Filter
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.{Row, SQLContext}
 
 import scala.collection.JavaConverters._
 
 case class HoodieMergeOnReadFileSplit(dataFile: Option[PartitionedFile],
-                                      logPaths: Option[List[String]],
+                                      logFiles: Option[List[HoodieLogFile]],
                                       latestCommit: String,
                                       tablePath: String,
                                       maxCompactionMemoryInBytes: Long,
                                       mergeType: String)
 
-case class HoodieMergeOnReadTableState(tableStructSchema: StructType,
-                                       requiredStructSchema: StructType,
-                                       tableAvroSchema: String,
-                                       requiredAvroSchema: String,
-                                       hoodieRealtimeFileSplits: List[HoodieMergeOnReadFileSplit],
-                                       preCombineField: Option[String],
-                                       recordKeyFieldOpt: Option[String])
-
-class MergeOnReadSnapshotRelation(val sqlContext: SQLContext,
-                                  val optParams: Map[String, String],
-                                  val userSchema: StructType,
-                                  val globPaths: Option[Seq[Path]],
+case class HoodieMergeOnReadTableState(hoodieRealtimeFileSplits: List[HoodieMergeOnReadFileSplit],
+                                       recordKeyField: String,
+                                       preCombineFieldOpt: Option[String])
+
+class MergeOnReadSnapshotRelation(sqlContext: SQLContext,
+                                  optParams: Map[String, String],
+                                  val userSchema: Option[StructType],
+                                  val globPaths: Seq[Path],
                                   val metaClient: HoodieTableMetaClient)
-  extends BaseRelation with PrunedFilteredScan with Logging {
+  extends HoodieBaseRelation(sqlContext, metaClient, optParams, userSchema) {
 
-  private val conf = sqlContext.sparkContext.hadoopConfiguration
+  private val conf = new Configuration(sqlContext.sparkContext.hadoopConfiguration)
   private val jobConf = new JobConf(conf)
-  // use schema from latest metadata, if not present, read schema from the data file
-  private val schemaUtil = new TableSchemaResolver(metaClient)
-  private lazy val tableAvroSchema = {
-    try {
-      schemaUtil.getTableAvroSchema
-    } catch {
-      case _: Throwable => // If there is no commit in the table, we cann't get the schema
-        // with schemaUtil, use the userSchema instead.
-        SchemaConverters.toAvroType(userSchema)
-    }
-  }
 
-  private lazy val tableStructSchema = AvroConversionUtils.convertAvroSchemaToStructType(tableAvroSchema)
   private val mergeType = optParams.getOrElse(
     DataSourceReadOptions.REALTIME_MERGE.key,
     DataSourceReadOptions.REALTIME_MERGE.defaultValue)
+
   private val maxCompactionMemoryInBytes = getMaxCompactionMemoryInBytes(jobConf)
-  private val preCombineField = {
-    val preCombineFieldFromTableConfig = metaClient.getTableConfig.getPreCombineField
-    if (preCombineFieldFromTableConfig != null) {
-      Some(preCombineFieldFromTableConfig)
+
+  // If meta fields are enabled, always prefer key from the meta field as opposed to user-specified one
+  // NOTE: This is historical behavior which is preserved as is
+  private val recordKeyField = {
+    if (metaClient.getTableConfig.populateMetaFields()) HoodieRecord.RECORD_KEY_METADATA_FIELD
+    else metaClient.getTableConfig.getRecordKeyFieldProp
+  }
+
+  private val preCombineFieldOpt = getPrecombineFieldProperty
+
+  private lazy val mandatoryColumns = {
+    if (isMetadataTable(metaClient)) {
+      Seq(HoodieMetadataPayload.KEY_FIELD_NAME, HoodieMetadataPayload.SCHEMA_FIELD_NAME_TYPE)
     } else {
-      // get preCombineFiled from the options if this is a old table which have not store
-      // the field to hoodie.properties
-      optParams.get(DataSourceReadOptions.READ_PRE_COMBINE_FIELD.key)
+      Seq(recordKeyField) ++ preCombineFieldOpt.map(Seq(_)).getOrElse(Seq())
     }
   }
-  private var recordKeyFieldOpt = Option.empty[String]
-  if (!metaClient.getTableConfig.populateMetaFields()) {
-    recordKeyFieldOpt = Option(metaClient.getTableConfig.getRecordKeyFieldProp)
-  }
-  override def schema: StructType = tableStructSchema
 
   override def needConversion: Boolean = false
 
@@ -106,51 +92,63 @@ class MergeOnReadSnapshotRelation(val sqlContext: SQLContext,
     log.debug(s" buildScan requiredColumns = ${requiredColumns.mkString(",")}")
     log.debug(s" buildScan filters = ${filters.mkString(",")}")
 
+    // NOTE: In case list of requested columns doesn't contain the Primary Key one, we
+    //       have to add it explicitly so that
+    //          - Merging could be performed correctly
+    //          - In case 0 columns are to be fetched (for ex, when doing {@code count()} on Spark's [[Dataset]],
+    //          Spark still fetches all the rows to execute the query correctly
+    //
+    //       It's okay to return columns that have not been requested by the caller, as those nevertheless will be
+    //       filtered out upstream
+    val fetchedColumns: Array[String] = appendMandatoryColumns(requiredColumns)
+
     val (requiredAvroSchema, requiredStructSchema) =
-      MergeOnReadSnapshotRelation.getRequiredSchema(tableAvroSchema, requiredColumns)
+      HoodieSparkUtils.getRequiredSchema(tableAvroSchema, fetchedColumns)
     val fileIndex = buildFileIndex(filters)
-    val hoodieTableState = HoodieMergeOnReadTableState(
-      tableStructSchema,
-      requiredStructSchema,
-      tableAvroSchema.toString,
-      requiredAvroSchema.toString,
-      fileIndex,
-      preCombineField,
-      recordKeyFieldOpt
-    )
-    val fullSchemaParquetReader = new ParquetFileFormat().buildReaderWithPartitionValues(
-      sparkSession = sqlContext.sparkSession,
-      dataSchema = tableStructSchema,
-      partitionSchema = StructType(Nil),
-      requiredSchema = tableStructSchema,
+
+    val partitionSchema = StructType(Nil)
+    val tableSchema = HoodieTableSchema(tableStructSchema, tableAvroSchema.toString)
+    val requiredSchema = HoodieTableSchema(requiredStructSchema, requiredAvroSchema.toString)
+
+    val fullSchemaParquetReader = createBaseFileReader(
+      spark = sqlContext.sparkSession,
+      partitionSchema = partitionSchema,
+      tableSchema = tableSchema,
+      requiredSchema = tableSchema,
+      // This file-reader is used to read base file records, subsequently merging them with the records
+      // stored in delta-log files. As such, we have to read _all_ records from the base file, while avoiding
+      // applying any filtering _before_ we complete combining them w/ delta-log records (to make sure that
+      // we combine them correctly)
       filters = Seq.empty,
       options = optParams,
-      hadoopConf = sqlContext.sparkSession.sessionState.newHadoopConf()
+      // NOTE: We have to fork the Hadoop Config here as Spark will be modifying it
+      //       to configure Parquet reader appropriately
+      hadoopConf = new Configuration(conf)
     )
-    val requiredSchemaParquetReader = new ParquetFileFormat().buildReaderWithPartitionValues(
-      sparkSession = sqlContext.sparkSession,
-      dataSchema = tableStructSchema,
-      partitionSchema = StructType(Nil),
-      requiredSchema = requiredStructSchema,
+    val requiredSchemaParquetReader = createBaseFileReader(
+      spark = sqlContext.sparkSession,
+      partitionSchema = partitionSchema,
+      tableSchema = tableSchema,
+      requiredSchema = requiredSchema,
       filters = filters,
       options = optParams,
-      hadoopConf = sqlContext.sparkSession.sessionState.newHadoopConf()
+      // NOTE: We have to fork the Hadoop Config here as Spark will be modifying it
+      //       to configure Parquet reader appropriately
+      hadoopConf = new Configuration(conf)
     )
 
-    val rdd = new HoodieMergeOnReadRDD(
-      sqlContext.sparkContext,
-      jobConf,
-      fullSchemaParquetReader,
-      requiredSchemaParquetReader,
-      hoodieTableState
-    )
+    val tableState = HoodieMergeOnReadTableState(fileIndex, recordKeyField, preCombineFieldOpt)
+
+    val rdd = new HoodieMergeOnReadRDD(sqlContext.sparkContext, jobConf, fullSchemaParquetReader,
+      requiredSchemaParquetReader, tableState, tableSchema, requiredSchema)
+
     rdd.asInstanceOf[RDD[Row]]
   }
 
   def buildFileIndex(filters: Array[Filter]): List[HoodieMergeOnReadFileSplit] = {
-    if (globPaths.isDefined) {
+    if (globPaths.nonEmpty) {
       // Load files from the global paths if it has defined to be compatible with the original mode
-      val inMemoryFileIndex = HoodieSparkUtils.createInMemoryFileIndex(sqlContext.sparkSession, globPaths.get)
+      val inMemoryFileIndex = HoodieSparkUtils.createInMemoryFileIndex(sqlContext.sparkSession, globPaths)
       val fsView = new HoodieTableFileSystemView(metaClient,
         // file-slice after pending compaction-requested instant-time is also considered valid
         metaClient.getCommitsAndCompactionTimeline.filterCompletedAndCompactionInstants,
@@ -196,10 +194,12 @@ class MergeOnReadSnapshotRelation(val sqlContext: SQLContext,
       val partitionFilters = filters.filter(f => f.references.forall(p => partitionColumns.contains(p)))
       val partitionFilterExpression =
         HoodieSparkUtils.convertToCatalystExpressions(partitionFilters, tableStructSchema)
+      val convertedPartitionFilterExpression =
+        HoodieFileIndex.convertFilterForTimestampKeyGenerator(metaClient, partitionFilterExpression.toSeq)
 
       // If convert success to catalyst expression, use the partition prune
-      val fileSlices = if (partitionFilterExpression.isDefined) {
-        hoodieFileIndex.listFileSlices(Seq(partitionFilterExpression.get))
+      val fileSlices = if (convertedPartitionFilterExpression.nonEmpty) {
+        hoodieFileIndex.listFileSlices(convertedPartitionFilterExpression)
       } else {
         hoodieFileIndex.listFileSlices(Seq.empty[Expression])
       }
@@ -221,8 +221,7 @@ class MergeOnReadSnapshotRelation(val sqlContext: SQLContext,
             Option.empty
           }
 
-          val logPaths = fileSlice.getLogFiles.sorted(HoodieLogFile.getLogFileComparator).iterator().asScala
-            .map(logFile => MergeOnReadSnapshotRelation.getFilePath(logFile.getPath)).toList
+          val logPaths = fileSlice.getLogFiles.sorted(HoodieLogFile.getLogFileComparator).iterator().asScala.toList
           val logPathsOptional = if (logPaths.isEmpty) Option.empty else Option(logPaths)
 
           HoodieMergeOnReadFileSplit(partitionedFile, logPathsOptional, queryInstant, metaClient.getBasePath,
@@ -232,6 +231,11 @@ class MergeOnReadSnapshotRelation(val sqlContext: SQLContext,
       }
     }
   }
+
+  private def appendMandatoryColumns(requestedColumns: Array[String]): Array[String] = {
+    val missing = mandatoryColumns.filter(col => !requestedColumns.contains(col))
+    requestedColumns ++ missing
+  }
 }
 
 object MergeOnReadSnapshotRelation {
@@ -252,14 +256,4 @@ object MergeOnReadSnapshotRelation {
     path.toUri.toString
   }
 
-  def getRequiredSchema(tableAvroSchema: Schema, requiredColumns: Array[String]): (Schema, StructType) = {
-    // First get the required avro-schema, then convert the avro-schema to spark schema.
-    val name2Fields = tableAvroSchema.getFields.asScala.map(f => f.name() -> f).toMap
-    val requiredFields = requiredColumns.map(c => name2Fields(c))
-      .map(f => new Schema.Field(f.name(), f.schema(), f.doc(), f.defaultVal(), f.order())).toList
-    val requiredAvroSchema = Schema.createRecord(tableAvroSchema.getName, tableAvroSchema.getDoc,
-      tableAvroSchema.getNamespace, tableAvroSchema.isError, requiredFields.asJava)
-    val requiredStructSchema = AvroConversionUtils.convertAvroSchemaToStructType(requiredAvroSchema)
-    (requiredAvroSchema, requiredStructSchema)
-  }
 }
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkConfigs.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkConfigs.scala
new file mode 100644
index 0000000000000..75dee2108914f
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkConfigs.scala
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi
+
+object SparkConfigs {
+
+  // spark data source write pool name. Incase of streaming sink, users might be interested to set custom scheduling configs
+  // for regular writes and async compaction. In such cases, this pool name will be used for spark datasource writes.
+  val SPARK_DATASOURCE_WRITER_POOL_NAME = "sparkdatasourcewrite"
+
+  /*
+  When async compaction is enabled (deltastreamer or streaming sink), users might be interested to set custom
+  scheduling configs for regular writes and async compaction. This is the property used to set custom scheduler config
+  file with spark. In Deltastreamer, the file is generated within hudi and set if necessary. Where as in case of streaming
+  sink, users have to set this property when they invoke spark shell.
+  Sample format of the file contents.
+  <?xml version="1.0"?>
+  <allocations>
+    <pool name="sparkdatasourcewrite">
+      <schedulingMode>FAIR</schedulingMode>
+      <weight>4</weight>
+      <minShare>2</minShare>
+    </pool>
+    <pool name="hoodiecompact">
+      <schedulingMode>FAIR</schedulingMode>
+      <weight>3</weight>
+      <minShare>1</minShare>
+    </pool>
+  </allocations>
+   */
+  val SPARK_SCHEDULER_ALLOCATION_FILE_KEY = "spark.scheduler.allocation.file"
+
+}
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala
index a79ac6f1db73b..46201c4132078 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala
@@ -18,8 +18,9 @@
 package org.apache.hudi
 
 import org.apache.hadoop.fs.{FileStatus, Path}
+import org.apache.hudi.BaseHoodieTableFileIndex.PartitionPath
 import org.apache.hudi.DataSourceReadOptions.{QUERY_TYPE, QUERY_TYPE_INCREMENTAL_OPT_VAL, QUERY_TYPE_READ_OPTIMIZED_OPT_VAL, QUERY_TYPE_SNAPSHOT_OPT_VAL}
-import org.apache.hudi.SparkHoodieTableFileIndex.{deduceQueryType, generateFieldMap}
+import org.apache.hudi.SparkHoodieTableFileIndex.{deduceQueryType, generateFieldMap, toJavaOption}
 import org.apache.hudi.client.common.HoodieSparkEngineContext
 import org.apache.hudi.common.config.TypedProperties
 import org.apache.hudi.common.model.{FileSlice, HoodieTableQueryType}
@@ -36,10 +37,11 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{StringType, StructField, StructType}
 import org.apache.spark.unsafe.types.UTF8String
 
-import scala.collection.JavaConversions._
+import scala.collection.JavaConverters._
+import scala.language.implicitConversions
 
 /**
- * Implementation of the [[HoodieTableFileIndexBase]] for Spark
+ * Implementation of the [[BaseHoodieTableFileIndex]] for Spark
  *
  * @param spark spark session
  * @param metaClient Hudi table's meta-client
@@ -55,14 +57,16 @@ class SparkHoodieTableFileIndex(spark: SparkSession,
                                 queryPaths: Seq[Path],
                                 specifiedQueryInstant: Option[String] = None,
                                 @transient fileStatusCache: FileStatusCache = NoopCache)
-  extends HoodieTableFileIndexBase(
-    engineContext = new HoodieSparkEngineContext(new JavaSparkContext(spark.sparkContext)),
+  extends BaseHoodieTableFileIndex(
+    new HoodieSparkEngineContext(new JavaSparkContext(spark.sparkContext)),
     metaClient,
     configProperties,
-    queryType = deduceQueryType(configProperties),
-    queryPaths,
-    specifiedQueryInstant,
-    fileStatusCache = SparkHoodieTableFileIndex.adapt(fileStatusCache)
+    deduceQueryType(configProperties),
+    queryPaths.asJava,
+    toJavaOption(specifiedQueryInstant),
+    false,
+    false,
+    SparkHoodieTableFileIndex.adapt(fileStatusCache)
   )
     with SparkAdapterSupport
     with Logging {
@@ -136,9 +140,9 @@ class SparkHoodieTableFileIndex(spark: SparkSession,
    */
   def listFileSlices(partitionFilters: Seq[Expression]): Map[String, Seq[FileSlice]] = {
     // Prune the partition path by the partition filters
-    val prunedPartitions = prunePartition(cachedAllInputFileSlices.keys.toSeq, partitionFilters)
+    val prunedPartitions = prunePartition(cachedAllInputFileSlices.asScala.keys.toSeq, partitionFilters)
     prunedPartitions.map(partition => {
-      (partition.path, cachedAllInputFileSlices(partition))
+      (partition.path, cachedAllInputFileSlices.get(partition).asScala)
     }).toMap
   }
 
@@ -150,9 +154,7 @@ class SparkHoodieTableFileIndex(spark: SparkSession,
    * @param predicates     The filter condition.
    * @return The Pruned partition paths.
    */
-  def prunePartition(partitionPaths: Seq[PartitionPath],
-                     predicates: Seq[Expression]): Seq[PartitionPath] = {
-
+  def prunePartition(partitionPaths: Seq[PartitionPath], predicates: Seq[Expression]): Seq[PartitionPath] = {
     val partitionColumnNames = partitionSchema.fields.map(_.name).toSet
     val partitionPruningPredicates = predicates.filter {
       _.references.map(_.name).toSet.subsetOf(partitionColumnNames)
@@ -167,8 +169,9 @@ class SparkHoodieTableFileIndex(spark: SparkSession,
       })
 
       val prunedPartitionPaths = partitionPaths.filter {
-        case PartitionPath(_, values) => boundPredicate.eval(InternalRow.fromSeq(values))
+        partitionPath => boundPredicate.eval(InternalRow.fromSeq(partitionPath.values))
       }
+
       logInfo(s"Total partition size is: ${partitionPaths.size}," +
         s" after partition prune size is: ${prunedPartitionPaths.size}")
       prunedPartitionPaths
@@ -177,7 +180,7 @@ class SparkHoodieTableFileIndex(spark: SparkSession,
     }
   }
 
-  protected def parsePartitionColumnValues(partitionColumns: Array[String], partitionPath: String): Array[Any] = {
+  protected def parsePartitionColumnValues(partitionColumns: Array[String], partitionPath: String): Array[Object] = {
     if (partitionColumns.length == 0) {
       // This is a non-partitioned table
       Array.empty
@@ -225,7 +228,7 @@ class SparkHoodieTableFileIndex(spark: SparkSession,
         val pathWithPartitionName = new Path(basePath, partitionWithName)
         val partitionValues = parsePartitionPath(pathWithPartitionName, partitionSchema)
 
-        partitionValues.toArray
+        partitionValues.map(_.asInstanceOf[Object]).toArray
       }
     }
   }
@@ -247,6 +250,13 @@ class SparkHoodieTableFileIndex(spark: SparkSession,
 
 object SparkHoodieTableFileIndex {
 
+  implicit def toJavaOption[T](opt: Option[T]): org.apache.hudi.common.util.Option[T] =
+    if (opt.isDefined) {
+      org.apache.hudi.common.util.Option.of(opt.get)
+    } else {
+      org.apache.hudi.common.util.Option.empty()
+    }
+
   /**
    * This method unravels [[StructType]] into a [[Map]] of pairs of dot-path notation with corresponding
    * [[StructField]] object for every field of the provided [[StructType]], recursively.
@@ -287,17 +297,17 @@ object SparkHoodieTableFileIndex {
   }
 
   private def deduceQueryType(configProperties: TypedProperties): HoodieTableQueryType = {
-    configProperties(QUERY_TYPE.key()) match {
-      case QUERY_TYPE_SNAPSHOT_OPT_VAL => HoodieTableQueryType.QUERY_TYPE_SNAPSHOT
-      case QUERY_TYPE_INCREMENTAL_OPT_VAL => HoodieTableQueryType.QUERY_TYPE_INCREMENTAL
-      case QUERY_TYPE_READ_OPTIMIZED_OPT_VAL => HoodieTableQueryType.QUERY_TYPE_READ_OPTIMIZED
+    configProperties.asScala(QUERY_TYPE.key()) match {
+      case QUERY_TYPE_SNAPSHOT_OPT_VAL => HoodieTableQueryType.SNAPSHOT
+      case QUERY_TYPE_INCREMENTAL_OPT_VAL => HoodieTableQueryType.INCREMENTAL
+      case QUERY_TYPE_READ_OPTIMIZED_OPT_VAL => HoodieTableQueryType.READ_OPTIMIZED
       case _ @ qt => throw new IllegalArgumentException(s"query-type ($qt) not supported")
     }
   }
 
-  private def adapt(cache: FileStatusCache): FileStatusCacheTrait = {
-    new FileStatusCacheTrait {
-      override def get(path: Path): Option[Array[FileStatus]] = cache.getLeafFiles(path)
+  private def adapt(cache: FileStatusCache): BaseHoodieTableFileIndex.FileStatusCache = {
+    new BaseHoodieTableFileIndex.FileStatusCache {
+      override def get(path: Path): org.apache.hudi.common.util.Option[Array[FileStatus]] = toJavaOption(cache.getLeafFiles(path))
       override def put(path: Path, leafFiles: Array[FileStatus]): Unit = cache.putLeafFiles(path, leafFiles)
       override def invalidate(): Unit = cache.invalidateAll()
     }
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/avro/HoodieAvroSerializer.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/avro/HoodieAvroSerializer.scala
index b464c2dc5d611..050efbd3d22c2 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/avro/HoodieAvroSerializer.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/avro/HoodieAvroSerializer.scala
@@ -20,9 +20,10 @@ package org.apache.spark.sql.avro
 import org.apache.avro.Schema
 import org.apache.spark.sql.types.DataType
 
-/**
- * As AvroSerializer cannot be access out of the spark.sql.avro package since spark 3.1, we define
- * this class to be accessed by other class.
- */
-case class HoodieAvroSerializer(rootCatalystType: DataType, rootAvroType: Schema, nullable: Boolean)
-  extends AvroSerializer(rootCatalystType, rootAvroType, nullable)
+class HoodieAvroSerializer(rootCatalystType: DataType, rootAvroType: Schema, nullable: Boolean)
+  extends HoodieAvroSerializerTrait {
+
+  val avroSerializer = new AvroSerializer(rootCatalystType, rootAvroType, nullable)
+
+  override def serialize(catalystData: Any): Any = avroSerializer.serialize(catalystData)
+}
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/catalyst/catalog/HoodieCatalogTable.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/catalyst/catalog/HoodieCatalogTable.scala
index f14ccbe6066c0..98823d14222d9 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/catalyst/catalog/HoodieCatalogTable.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/catalyst/catalog/HoodieCatalogTable.scala
@@ -25,17 +25,15 @@ import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient}
 import org.apache.hudi.common.util.ValidationUtils
 import org.apache.hudi.keygen.ComplexKeyGenerator
 import org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory
-
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{AnalysisException, SparkSession}
 import org.apache.spark.sql.avro.SchemaConverters
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.hudi.HoodieOptionConfig
 import org.apache.spark.sql.hudi.HoodieSqlCommonUtils._
 import org.apache.spark.sql.types.{StructField, StructType}
+import org.apache.spark.sql.{AnalysisException, SparkSession}
 
 import java.util.{Locale, Properties}
-
 import scala.collection.JavaConverters._
 import scala.collection.mutable
 
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlCommonUtils.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlCommonUtils.scala
index 4901c0d39117d..1e1e9c663e54f 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlCommonUtils.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlCommonUtils.scala
@@ -32,11 +32,11 @@ import org.apache.spark.api.java.JavaSparkContext
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{Resolver, UnresolvedRelation}
 import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType}
-import org.apache.spark.sql.catalyst.expressions.{And, Attribute, Expression}
+import org.apache.spark.sql.catalyst.expressions.{And, Attribute, Cast, Expression, Literal}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
 import org.apache.spark.sql.execution.datasources.LogicalRelation
-import org.apache.spark.sql.internal.StaticSQLConf
-import org.apache.spark.sql.types.{StringType, StructField, StructType}
+import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
+import org.apache.spark.sql.types.{DataType, NullType, StringType, StructField, StructType}
 import org.apache.spark.sql.{Column, DataFrame, SparkSession}
 
 import java.net.URI
@@ -54,24 +54,6 @@ object HoodieSqlCommonUtils extends SparkAdapterSupport {
     override def get() = new SimpleDateFormat("yyyy-MM-dd")
   })
 
-  def isHoodieTable(table: CatalogTable): Boolean = {
-    table.provider.map(_.toLowerCase(Locale.ROOT)).orNull == "hudi"
-  }
-
-  def isHoodieTable(tableId: TableIdentifier, spark: SparkSession): Boolean = {
-    val table = spark.sessionState.catalog.getTableMetadata(tableId)
-    isHoodieTable(table)
-  }
-
-  def isHoodieTable(table: LogicalPlan, spark: SparkSession): Boolean = {
-    tripAlias(table) match {
-      case LogicalRelation(_, _, Some(tbl), _) => isHoodieTable(tbl)
-      case relation: UnresolvedRelation =>
-        isHoodieTable(sparkAdapter.toTableIdentifier(relation), spark)
-      case _=> false
-    }
-  }
-
   def getTableIdentifier(table: LogicalPlan): TableIdentifier = {
     table match {
       case SubqueryAlias(name, _) => sparkAdapter.toTableIdentifier(name)
@@ -200,16 +182,29 @@ object HoodieSqlCommonUtils extends SparkAdapterSupport {
     getTableLocation(table, spark)
   }
 
+  def getTableLocation(properties: Map[String, String], identifier: TableIdentifier, sparkSession: SparkSession): String = {
+    val location: Option[String] = Some(properties.getOrElse("location", ""))
+    val isManaged = location.isEmpty || location.get.isEmpty
+    val uri = if (isManaged) {
+      Some(sparkSession.sessionState.catalog.defaultTablePath(identifier))
+    } else {
+      Some(new Path(location.get).toUri)
+    }
+    getTableLocation(uri, identifier, sparkSession)
+  }
+
   def getTableLocation(table: CatalogTable, sparkSession: SparkSession): String = {
-    val uri = if (table.tableType == CatalogTableType.MANAGED && isHoodieTable(table)) {
+    val uri = table.storage.locationUri.orElse {
       Some(sparkSession.sessionState.catalog.defaultTablePath(table.identifier))
-    } else {
-      table.storage.locationUri
     }
+    getTableLocation(uri, table.identifier, sparkSession)
+  }
+
+  def getTableLocation(uri: Option[URI], identifier: TableIdentifier, sparkSession: SparkSession): String = {
     val conf = sparkSession.sessionState.newHadoopConf()
     uri.map(makePathQualified(_, conf))
       .map(removePlaceHolder)
-      .getOrElse(throw new IllegalArgumentException(s"Missing location for ${table.identifier}"))
+      .getOrElse(throw new IllegalArgumentException(s"Missing location for ${identifier}"))
   }
 
   private def removePlaceHolder(path: String): String = {
@@ -312,4 +307,18 @@ object HoodieSqlCommonUtils extends SparkAdapterSupport {
       case field if resolver(field.name, name) => field
     }
   }
+
+  // Compare a [[StructField]] to another, return true if they have the same column
+  // name(by resolver) and dataType.
+  def columnEqual(field: StructField, other: StructField, resolver: Resolver): Boolean = {
+    resolver(field.name, other.name) && field.dataType == other.dataType
+  }
+
+  def castIfNeeded(child: Expression, dataType: DataType, conf: SQLConf): Expression = {
+    child match {
+      case Literal(nul, NullType) => Literal(nul, dataType)
+      case _ => if (child.dataType != dataType)
+        Cast(child, dataType, Option(conf.sessionLocalTimeZone)) else child
+    }
+  }
 }
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/ProvidesHoodieConfig.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/ProvidesHoodieConfig.scala
new file mode 100644
index 0000000000000..d6745b6795032
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/ProvidesHoodieConfig.scala
@@ -0,0 +1,196 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hudi
+
+import org.apache.hudi.DataSourceWriteOptions
+import org.apache.hudi.DataSourceWriteOptions._
+import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload
+import org.apache.hudi.config.HoodieWriteConfig
+import org.apache.hudi.config.HoodieWriteConfig.TBL_NAME
+import org.apache.hudi.hive.MultiPartKeysValueExtractor
+import org.apache.hudi.hive.ddl.HiveSyncMode
+import org.apache.hudi.keygen.ComplexKeyGenerator
+import org.apache.hudi.sql.InsertMode
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.catalog.HoodieCatalogTable
+import org.apache.spark.sql.hudi.HoodieSqlCommonUtils.{isEnableHive, withSparkConf}
+import org.apache.spark.sql.hudi.command.{SqlKeyGenerator, ValidateDuplicateKeyPayload}
+
+import scala.collection.JavaConverters.propertiesAsScalaMapConverter
+
+trait ProvidesHoodieConfig extends Logging {
+
+  def buildHoodieConfig(hoodieCatalogTable: HoodieCatalogTable): Map[String, String] = {
+    val sparkSession: SparkSession = hoodieCatalogTable.spark
+    val catalogProperties = hoodieCatalogTable.catalogProperties
+    val tableConfig = hoodieCatalogTable.tableConfig
+    val tableId = hoodieCatalogTable.table.identifier
+
+    // NOTE: Here we fallback to "" to make sure that null value is not overridden with
+    // default value ("ts")
+    // TODO(HUDI-3456) clean up
+    val preCombineField = Option(tableConfig.getPreCombineField).getOrElse("")
+
+    require(hoodieCatalogTable.primaryKeys.nonEmpty,
+      s"There are no primary key in table ${hoodieCatalogTable.table.identifier}, cannot execute update operator")
+    val enableHive = isEnableHive(sparkSession)
+
+    withSparkConf(sparkSession, catalogProperties) {
+      Map.apply(
+        "path" -> hoodieCatalogTable.tableLocation,
+        RECORDKEY_FIELD.key -> hoodieCatalogTable.primaryKeys.mkString(","),
+        TBL_NAME.key -> hoodieCatalogTable.tableName,
+        PRECOMBINE_FIELD.key -> preCombineField,
+        HIVE_STYLE_PARTITIONING.key -> tableConfig.getHiveStylePartitioningEnable,
+        URL_ENCODE_PARTITIONING.key -> tableConfig.getUrlEncodePartitioning,
+        KEYGENERATOR_CLASS_NAME.key -> classOf[SqlKeyGenerator].getCanonicalName,
+        SqlKeyGenerator.ORIGIN_KEYGEN_CLASS_NAME -> tableConfig.getKeyGeneratorClassName,
+        OPERATION.key -> UPSERT_OPERATION_OPT_VAL,
+        PARTITIONPATH_FIELD.key -> tableConfig.getPartitionFieldProp,
+        META_SYNC_ENABLED.key -> enableHive.toString,
+        HIVE_SYNC_MODE.key -> HiveSyncMode.HMS.name(),
+        HIVE_USE_JDBC.key -> "false",
+        HIVE_DATABASE.key -> tableId.database.getOrElse("default"),
+        HIVE_TABLE.key -> tableId.table,
+        HIVE_PARTITION_FIELDS.key -> tableConfig.getPartitionFieldProp,
+        HIVE_PARTITION_EXTRACTOR_CLASS.key -> classOf[MultiPartKeysValueExtractor].getCanonicalName,
+        HIVE_SUPPORT_TIMESTAMP_TYPE.key -> "true",
+        HoodieWriteConfig.UPSERT_PARALLELISM_VALUE.key -> "200",
+        SqlKeyGenerator.PARTITION_SCHEMA -> hoodieCatalogTable.partitionSchema.toDDL
+      )
+        .filter { case(_, v) => v != null }
+    }
+  }
+
+  /**
+   * Build the default config for insert.
+   *
+   * @return
+   */
+  def buildHoodieInsertConfig(hoodieCatalogTable: HoodieCatalogTable,
+                              sparkSession: SparkSession,
+                              isOverwrite: Boolean,
+                              insertPartitions: Map[String, Option[String]] = Map.empty,
+                              extraOptions: Map[String, String]): Map[String, String] = {
+
+    if (insertPartitions.nonEmpty &&
+      (insertPartitions.keys.toSet != hoodieCatalogTable.partitionFields.toSet)) {
+      throw new IllegalArgumentException(s"Insert partition fields" +
+        s"[${insertPartitions.keys.mkString(" ")}]" +
+        s" not equal to the defined partition in table[${hoodieCatalogTable.partitionFields.mkString(",")}]")
+    }
+    val path = hoodieCatalogTable.tableLocation
+    val tableType = hoodieCatalogTable.tableTypeName
+    val tableConfig = hoodieCatalogTable.tableConfig
+    val tableSchema = hoodieCatalogTable.tableSchema
+
+    val options = hoodieCatalogTable.catalogProperties ++ tableConfig.getProps.asScala.toMap ++ extraOptions
+    val parameters = withSparkConf(sparkSession, options)()
+
+    val partitionFieldsStr = hoodieCatalogTable.partitionFields.mkString(",")
+
+    // NOTE: Here we fallback to "" to make sure that null value is not overridden with
+    // default value ("ts")
+    // TODO(HUDI-3456) clean up
+    val preCombineField = hoodieCatalogTable.preCombineKey.getOrElse("")
+
+    val hiveStylePartitioningEnable = Option(tableConfig.getHiveStylePartitioningEnable).getOrElse("true")
+    val urlEncodePartitioning = Option(tableConfig.getUrlEncodePartitioning).getOrElse("false")
+    val keyGeneratorClassName = Option(tableConfig.getKeyGeneratorClassName)
+      .getOrElse(classOf[ComplexKeyGenerator].getCanonicalName)
+
+    val enableBulkInsert = parameters.getOrElse(DataSourceWriteOptions.SQL_ENABLE_BULK_INSERT.key,
+      DataSourceWriteOptions.SQL_ENABLE_BULK_INSERT.defaultValue()).toBoolean
+    val dropDuplicate = sparkSession.conf
+      .getOption(INSERT_DROP_DUPS.key).getOrElse(INSERT_DROP_DUPS.defaultValue).toBoolean
+
+    val insertMode = InsertMode.of(parameters.getOrElse(DataSourceWriteOptions.SQL_INSERT_MODE.key,
+      DataSourceWriteOptions.SQL_INSERT_MODE.defaultValue()))
+    val isNonStrictMode = insertMode == InsertMode.NON_STRICT
+    val isPartitionedTable = hoodieCatalogTable.partitionFields.nonEmpty
+    val hasPrecombineColumn = hoodieCatalogTable.preCombineKey.nonEmpty
+    val operation =
+      (enableBulkInsert, isOverwrite, dropDuplicate, isNonStrictMode, isPartitionedTable) match {
+        case (true, _, _, false, _) =>
+          throw new IllegalArgumentException(s"Table with primaryKey can not use bulk insert in ${insertMode.value()} mode.")
+        case (true, true, _, _, true) =>
+          throw new IllegalArgumentException(s"Insert Overwrite Partition can not use bulk insert.")
+        case (true, _, true, _, _) =>
+          throw new IllegalArgumentException(s"Bulk insert cannot support drop duplication." +
+            s" Please disable $INSERT_DROP_DUPS and try again.")
+        // if enableBulkInsert is true, use bulk insert for the insert overwrite non-partitioned table.
+        case (true, true, _, _, false) => BULK_INSERT_OPERATION_OPT_VAL
+        // insert overwrite table
+        case (false, true, _, _, false) => INSERT_OVERWRITE_TABLE_OPERATION_OPT_VAL
+        // insert overwrite partition
+        case (_, true, _, _, true) => INSERT_OVERWRITE_OPERATION_OPT_VAL
+        // disable dropDuplicate, and provide preCombineKey, use the upsert operation for strict and upsert mode.
+        case (false, false, false, false, _) if hasPrecombineColumn => UPSERT_OPERATION_OPT_VAL
+        // if table is pk table and has enableBulkInsert use bulk insert for non-strict mode.
+        case (true, _, _, true, _) => BULK_INSERT_OPERATION_OPT_VAL
+        // for the rest case, use the insert operation
+        case _ => INSERT_OPERATION_OPT_VAL
+      }
+
+    val payloadClassName = if (operation == UPSERT_OPERATION_OPT_VAL &&
+      tableType == COW_TABLE_TYPE_OPT_VAL && insertMode == InsertMode.STRICT) {
+      // Only validate duplicate key for COW, for MOR it will do the merge with the DefaultHoodieRecordPayload
+      // on reading.
+      classOf[ValidateDuplicateKeyPayload].getCanonicalName
+    } else {
+      classOf[OverwriteWithLatestAvroPayload].getCanonicalName
+    }
+
+    logInfo(s"Insert statement use write operation type: $operation, payloadClass: $payloadClassName")
+
+    val enableHive = isEnableHive(sparkSession)
+
+    withSparkConf(sparkSession, options) {
+      Map(
+        "path" -> path,
+        TABLE_TYPE.key -> tableType,
+        TBL_NAME.key -> hoodieCatalogTable.tableName,
+        OPERATION.key -> operation,
+        HIVE_STYLE_PARTITIONING.key -> hiveStylePartitioningEnable,
+        URL_ENCODE_PARTITIONING.key -> urlEncodePartitioning,
+        KEYGENERATOR_CLASS_NAME.key -> classOf[SqlKeyGenerator].getCanonicalName,
+        SqlKeyGenerator.ORIGIN_KEYGEN_CLASS_NAME -> keyGeneratorClassName,
+        RECORDKEY_FIELD.key -> hoodieCatalogTable.primaryKeys.mkString(","),
+        PRECOMBINE_FIELD.key -> preCombineField,
+        PARTITIONPATH_FIELD.key -> partitionFieldsStr,
+        PAYLOAD_CLASS_NAME.key -> payloadClassName,
+        ENABLE_ROW_WRITER.key -> enableBulkInsert.toString,
+        HoodieWriteConfig.COMBINE_BEFORE_INSERT.key -> String.valueOf(hasPrecombineColumn),
+        HIVE_PARTITION_FIELDS.key -> partitionFieldsStr,
+        META_SYNC_ENABLED.key -> enableHive.toString,
+        HIVE_SYNC_MODE.key -> HiveSyncMode.HMS.name(),
+        HIVE_USE_JDBC.key -> "false",
+        HIVE_DATABASE.key -> hoodieCatalogTable.table.identifier.database.getOrElse("default"),
+        HIVE_TABLE.key -> hoodieCatalogTable.table.identifier.table,
+        HIVE_SUPPORT_TIMESTAMP_TYPE.key -> "true",
+        HIVE_PARTITION_EXTRACTOR_CLASS.key -> classOf[MultiPartKeysValueExtractor].getCanonicalName,
+        HoodieWriteConfig.INSERT_PARALLELISM_VALUE.key -> "200",
+        HoodieWriteConfig.UPSERT_PARALLELISM_VALUE.key -> "200",
+        SqlKeyGenerator.PARTITION_SCHEMA -> hoodieCatalogTable.partitionSchema.toDDL
+      )
+        .filter { case (_, v) => v != null }
+    }
+  }
+
+}
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/AlterHoodieTableAddColumnsCommand.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/AlterHoodieTableAddColumnsCommand.scala
index c6c79f431337e..c4f5cd39f6073 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/AlterHoodieTableAddColumnsCommand.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/AlterHoodieTableAddColumnsCommand.scala
@@ -57,7 +57,8 @@ case class AlterHoodieTableAddColumnsCommand(
           s" table columns is: [${hoodieCatalogTable.tableSchemaWithoutMetaFields.fieldNames.mkString(",")}]")
       }
       // Get the new schema
-      val newSqlSchema = StructType(tableSchema.fields ++ colsToAdd)
+      val rearrangedSchema = hoodieCatalogTable.dataSchema ++ colsToAdd ++ hoodieCatalogTable.partitionSchema
+      val newSqlSchema = StructType(rearrangedSchema)
       val (structName, nameSpace) = AvroConversionUtils.getAvroRecordNameAndNamespace(tableId.table)
       val newSchema = AvroConversionUtils.convertStructTypeToAvroSchema(newSqlSchema, structName, nameSpace)
 
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/AlterHoodieTableChangeColumnCommand.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/AlterHoodieTableChangeColumnCommand.scala
index befda70680f85..3aa5ca945486e 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/AlterHoodieTableChangeColumnCommand.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/AlterHoodieTableChangeColumnCommand.scala
@@ -49,6 +49,13 @@ case class AlterHoodieTableChangeColumnCommand(
       throw new AnalysisException(s"Can't find column `$columnName` given table data columns " +
         s"${hoodieCatalogTable.dataSchema.fieldNames.mkString("[`", "`, `", "`]")}")
     )
+    // Throw an AnalysisException if the column name/dataType is changed.
+    if (!columnEqual(originColumn, newColumn, resolver)) {
+      throw new AnalysisException(
+        "ALTER TABLE CHANGE COLUMN is not supported for changing column " +
+          s"'${originColumn.name}' with type '${originColumn.dataType}' to " +
+          s"'${newColumn.name}' with type '${newColumn.dataType}'")
+    }
 
     // Get the new schema
     val newTableSchema = StructType(
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/SqlKeyGenerator.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/SqlKeyGenerator.scala
index e4392380465ee..9d139389fd235 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/SqlKeyGenerator.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/SqlKeyGenerator.scala
@@ -47,7 +47,7 @@ class SqlKeyGenerator(props: TypedProperties) extends ComplexKeyGenerator(props)
   // The origin key generator class for this table.
   private lazy val originKeyGen = {
     val beforeKeyGenClassName = props.getString(SqlKeyGenerator.ORIGIN_KEYGEN_CLASS_NAME, null)
-    if (beforeKeyGenClassName != null) {
+    if (beforeKeyGenClassName != null && beforeKeyGenClassName.nonEmpty) {
       val keyGenProps = new TypedProperties()
       keyGenProps.putAll(props)
       keyGenProps.remove(SqlKeyGenerator.ORIGIN_KEYGEN_CLASS_NAME)
@@ -132,7 +132,7 @@ object SqlKeyGenerator {
 
   def getRealKeyGenClassName(props: TypedProperties): String = {
     val beforeKeyGenClassName = props.getString(SqlKeyGenerator.ORIGIN_KEYGEN_CLASS_NAME, null)
-    if (beforeKeyGenClassName != null) {
+    if (beforeKeyGenClassName != null && beforeKeyGenClassName.nonEmpty) {
       HoodieSparkKeyGeneratorFactory.convertToSparkKeyGenerator(beforeKeyGenClassName)
     } else {
       classOf[ComplexKeyGenerator].getCanonicalName
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/payload/ExpressionCodeGen.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/payload/ExpressionCodeGen.scala
index 509746bae160f..947291d10373b 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/payload/ExpressionCodeGen.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/payload/ExpressionCodeGen.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.hudi.command.payload
 
-import java.util.UUID
 import org.apache.avro.generic.{GenericRecord, IndexedRecord}
 import org.apache.hudi.sql.IExpressionEvaluator
 import org.apache.spark.executor.InputMetrics
@@ -37,6 +36,8 @@ import org.apache.spark.{TaskContext, TaskKilledException}
 import org.codehaus.commons.compiler.CompileException
 import org.codehaus.janino.{ClassBodyEvaluator, InternalCompilerException}
 
+import java.util.UUID
+
 /**
  * Do CodeGen for expression based on IndexedRecord.
  * The mainly difference with the spark's CodeGen for expression is that
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/payload/ExpressionPayload.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/payload/ExpressionPayload.scala
index 0800d1712d978..e59a609321549 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/payload/ExpressionPayload.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/payload/ExpressionPayload.scala
@@ -17,14 +17,9 @@
 
 package org.apache.spark.sql.hudi.command.payload
 
-import java.util.{Base64, Properties}
-import java.util.concurrent.Callable
-
 import com.google.common.cache.CacheBuilder
-
 import org.apache.avro.Schema
 import org.apache.avro.generic.{GenericData, GenericRecord, IndexedRecord}
-
 import org.apache.hudi.AvroConversionUtils
 import org.apache.hudi.DataSourceWriteOptions._
 import org.apache.hudi.avro.HoodieAvroUtils
@@ -34,13 +29,14 @@ import org.apache.hudi.common.util.{ValidationUtils, Option => HOption}
 import org.apache.hudi.config.HoodieWriteConfig
 import org.apache.hudi.io.HoodieWriteHandle
 import org.apache.hudi.sql.IExpressionEvaluator
-
-import org.apache.spark.sql.avro.{AvroSerializer, HoodieAvroSerializer, SchemaConverters}
+import org.apache.spark.sql.avro.{AvroSerializer, SchemaConverters}
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.hudi.SerDeUtils
 import org.apache.spark.sql.hudi.command.payload.ExpressionPayload.getEvaluator
 import org.apache.spark.sql.types.{StructField, StructType}
 
+import java.util.concurrent.Callable
+import java.util.{Base64, Properties}
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 
@@ -315,7 +311,7 @@ object ExpressionPayload {
               val conditionEvaluator = ExpressionCodeGen.doCodeGen(Seq(condition), conditionSerializer)
 
               val assignSqlType = AvroConversionUtils.convertAvroSchemaToStructType(writeSchema)
-              val assignSerializer = new HoodieAvroSerializer(assignSqlType, writeSchema, false)
+              val assignSerializer = new AvroSerializer(assignSqlType, writeSchema, false)
               val assignmentEvaluator = ExpressionCodeGen.doCodeGen(assignments, assignSerializer)
               conditionEvaluator -> assignmentEvaluator
           }
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/payload/SqlTypedRecord.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/payload/SqlTypedRecord.scala
index 749761443547a..29025877b48c9 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/payload/SqlTypedRecord.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/command/payload/SqlTypedRecord.scala
@@ -17,22 +17,19 @@
 
 package org.apache.spark.sql.hudi.command.payload
 
-import org.apache.avro.generic.IndexedRecord
 import org.apache.avro.Schema
-
-import org.apache.hudi.AvroConversionUtils
-
-import org.apache.spark.sql.avro.HoodieAvroDeserializer
+import org.apache.avro.generic.IndexedRecord
+import org.apache.hudi.{AvroConversionUtils, SparkAdapterSupport}
 import org.apache.spark.sql.catalyst.InternalRow
 
 /**
  * A sql typed record which will convert the avro field to sql typed value.
  */
-class SqlTypedRecord(val record: IndexedRecord) extends IndexedRecord {
+class SqlTypedRecord(val record: IndexedRecord) extends IndexedRecord with SparkAdapterSupport {
 
   private lazy val sqlType = AvroConversionUtils.convertAvroSchemaToStructType(getSchema)
-  private lazy val avroDeserializer = HoodieAvroDeserializer(record.getSchema, sqlType)
-  private lazy val sqlRow = avroDeserializer.deserializeData(record).asInstanceOf[InternalRow]
+  private lazy val avroDeserializer = sparkAdapter.createAvroDeserializer(record.getSchema, sqlType)
+  private lazy val sqlRow = avroDeserializer.deserialize(record).get.asInstanceOf[InternalRow]
 
   override def put(i: Int, v: Any): Unit = {
     record.put(i, v)
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/streaming/HoodieStreamSource.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/streaming/HoodieStreamSource.scala
index ffe9b64984027..4e46233c3596e 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/streaming/HoodieStreamSource.scala
+++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/streaming/HoodieStreamSource.scala
@@ -161,12 +161,12 @@ class HoodieStreamSource(
       val rdd = tableType match {
         case HoodieTableType.COPY_ON_WRITE =>
           val serDe = sparkAdapter.createSparkRowSerDe(RowEncoder(schema))
-          new IncrementalRelation(sqlContext, incParams, schema, metaClient)
+          new IncrementalRelation(sqlContext, incParams, Some(schema), metaClient)
             .buildScan()
             .map(serDe.serializeRow)
         case HoodieTableType.MERGE_ON_READ =>
           val requiredColumns = schema.fields.map(_.name)
-          new MergeOnReadIncrementalRelation(sqlContext, incParams, schema, metaClient)
+          new MergeOnReadIncrementalRelation(sqlContext, incParams, Some(schema), metaClient)
             .buildScan(requiredColumns, Array.empty[Filter])
             .asInstanceOf[RDD[InternalRow]]
         case _ => throw new IllegalArgumentException(s"UnSupport tableType: $tableType")
diff --git a/hudi-spark-datasource/hudi-spark/src/main/antlr4/org/apache/hudi/spark/sql/parser/HoodieSqlCommon.g4 b/hudi-spark-datasource/hudi-spark/src/main/antlr4/org/apache/hudi/spark/sql/parser/HoodieSqlCommon.g4
index 74f83438f659c..0cde14a4e4a0e 100644
--- a/hudi-spark-datasource/hudi-spark/src/main/antlr4/org/apache/hudi/spark/sql/parser/HoodieSqlCommon.g4
+++ b/hudi-spark-datasource/hudi-spark/src/main/antlr4/org/apache/hudi/spark/sql/parser/HoodieSqlCommon.g4
@@ -14,59 +14,197 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-grammar HoodieSqlCommon;
+
+ grammar HoodieSqlCommon;
+
+ @lexer::members {
+  /**
+   * Verify whether current token is a valid decimal token (which contains dot).
+   * Returns true if the character that follows the token is not a digit or letter or underscore.
+   *
+   * For example:
+   * For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'.
+   * For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'.
+   * For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'.
+   * For char stream "12.0D 34.E2+0.12 "  12.0D is a valid decimal token because it is followed
+   * by a space. 34.E2 is a valid decimal token because it is followed by symbol '+'
+   * which is not a digit or letter or underscore.
+   */
+  public boolean isValidDecimal() {
+    int nextChar = _input.LA(1);
+    if (nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' ||
+      nextChar == '_') {
+      return false;
+    } else {
+      return true;
+    }
+  }
+}
 
  singleStatement
     : statement EOF
     ;
 
-statement
-    : compactionStatement                                              #compactionCommand
-    | .*?                                                              #passThrough
+ statement
+    : compactionStatement                                                       #compactionCommand
+    | CALL multipartIdentifier '(' (callArgument (',' callArgument)*)? ')'      #call
+    | .*?                                                                       #passThrough
     ;
 
  compactionStatement
-    : operation = (RUN | SCHEDULE) COMPACTION  ON tableIdentifier (AT instantTimestamp = NUMBER)?    #compactionOnTable
-    | operation = (RUN | SCHEDULE) COMPACTION  ON path = STRING   (AT instantTimestamp = NUMBER)?    #compactionOnPath
-    | SHOW COMPACTION  ON tableIdentifier (LIMIT limit = NUMBER)?                             #showCompactionOnTable
-    | SHOW COMPACTION  ON path = STRING (LIMIT limit = NUMBER)?                               #showCompactionOnPath
+    : operation = (RUN | SCHEDULE) COMPACTION  ON tableIdentifier (AT instantTimestamp = INTEGER_VALUE)?    #compactionOnTable
+    | operation = (RUN | SCHEDULE) COMPACTION  ON path = STRING   (AT instantTimestamp = INTEGER_VALUE)?    #compactionOnPath
+    | SHOW COMPACTION  ON tableIdentifier (LIMIT limit = INTEGER_VALUE)?                             #showCompactionOnTable
+    | SHOW COMPACTION  ON path = STRING (LIMIT limit = INTEGER_VALUE)?                               #showCompactionOnPath
     ;
 
  tableIdentifier
     : (db=IDENTIFIER '.')? table=IDENTIFIER
     ;
 
+ callArgument
+    : expression                    #positionalArgument
+    | identifier '=>' expression    #namedArgument
+    ;
+
+ expression
+    : constant
+    | stringMap
+    ;
+
+ constant
+    : number                          #numericLiteral
+    | booleanValue                    #booleanLiteral
+    | STRING+                         #stringLiteral
+    | identifier STRING               #typeConstructor
+    ;
+
+ stringMap
+    : MAP '(' constant (',' constant)* ')'
+    ;
+
+ booleanValue
+    : TRUE | FALSE
+    ;
+
+ number
+    : MINUS? EXPONENT_VALUE           #exponentLiteral
+    | MINUS? DECIMAL_VALUE            #decimalLiteral
+    | MINUS? INTEGER_VALUE            #integerLiteral
+    | MINUS? BIGINT_LITERAL           #bigIntLiteral
+    | MINUS? SMALLINT_LITERAL         #smallIntLiteral
+    | MINUS? TINYINT_LITERAL          #tinyIntLiteral
+    | MINUS? DOUBLE_LITERAL           #doubleLiteral
+    | MINUS? FLOAT_LITERAL            #floatLiteral
+    | MINUS? BIGDECIMAL_LITERAL       #bigDecimalLiteral
+    ;
+
+ multipartIdentifier
+    : parts+=identifier ('.' parts+=identifier)*
+    ;
+
+ identifier
+    : IDENTIFIER              #unquotedIdentifier
+    | quotedIdentifier        #quotedIdentifierAlternative
+    | nonReserved             #unquotedIdentifier
+    ;
+
+ quotedIdentifier
+    : BACKQUOTED_IDENTIFIER
+    ;
+
+ nonReserved
+     : CALL | COMPACTION | RUN | SCHEDULE | ON | SHOW | LIMIT
+     ;
+
  ALL: 'ALL';
  AT: 'AT';
+ CALL: 'CALL';
  COMPACTION: 'COMPACTION';
  RUN: 'RUN';
  SCHEDULE: 'SCHEDULE';
  ON: 'ON';
  SHOW: 'SHOW';
  LIMIT: 'LIMIT';
+ MAP: 'MAP';
+ NULL: 'NULL';
+ TRUE: 'TRUE';
+ FALSE: 'FALSE';
+ INTERVAL: 'INTERVAL';
+ TO: 'TO';
+
+ PLUS: '+';
+ MINUS: '-';
+
+ STRING
+    : '\'' ( ~('\''|'\\') | ('\\' .) )* '\''
+    | '"' ( ~('"'|'\\') | ('\\' .) )* '"'
+    ;
 
- NUMBER
+ BIGINT_LITERAL
+    : DIGIT+ 'L'
+    ;
+
+ SMALLINT_LITERAL
+    : DIGIT+ 'S'
+    ;
+
+ TINYINT_LITERAL
+    : DIGIT+ 'Y'
+    ;
+
+ INTEGER_VALUE
     : DIGIT+
     ;
 
+ EXPONENT_VALUE
+    : DIGIT+ EXPONENT
+    | DECIMAL_DIGITS EXPONENT {isValidDecimal()}?
+    ;
+
+ DECIMAL_VALUE
+    : DECIMAL_DIGITS {isValidDecimal()}?
+    ;
+
+ FLOAT_LITERAL
+    : DIGIT+ EXPONENT? 'F'
+    | DECIMAL_DIGITS EXPONENT? 'F' {isValidDecimal()}?
+    ;
+
+ DOUBLE_LITERAL
+    : DIGIT+ EXPONENT? 'D'
+    | DECIMAL_DIGITS EXPONENT? 'D' {isValidDecimal()}?
+    ;
+
+ BIGDECIMAL_LITERAL
+    : DIGIT+ EXPONENT? 'BD'
+    | DECIMAL_DIGITS EXPONENT? 'BD' {isValidDecimal()}?
+    ;
+
  IDENTIFIER
-     : (LETTER | DIGIT | '_')+
-     ;
+    : (LETTER | DIGIT | '_')+
+    ;
 
-STRING
-    : '\'' ( ~('\''|'\\') | ('\\' .) )* '\''
-    | '"' ( ~('"'|'\\') | ('\\' .) )* '"'
+ BACKQUOTED_IDENTIFIER
+    : '`' ( ~'`' | '``' )* '`'
     ;
 
+ fragment DECIMAL_DIGITS
+    : DIGIT+ '.' DIGIT*
+    | '.' DIGIT+
+    ;
 
+ fragment EXPONENT
+    : 'E' [+-]? DIGIT+
+    ;
 
  fragment DIGIT
-     : [0-9]
-     ;
+    : [0-9]
+    ;
 
  fragment LETTER
-     : [A-Z]
-     ;
+    : [A-Z]
+    ;
 
  SIMPLE_COMMENT
      : '--' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN)
diff --git a/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java b/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java
index e0929efed1f87..9aa7ac1a664cd 100644
--- a/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java
+++ b/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/QuickstartUtils.java
@@ -19,6 +19,7 @@
 package org.apache.hudi;
 
 import org.apache.hudi.avro.HoodieAvroUtils;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload;
@@ -149,7 +150,7 @@ public Stream<HoodieRecord> generateInsertsStream(String randomString, Integer n
         existingKeys.put(currSize + i, key);
         numExistingKeys++;
         try {
-          return new HoodieRecord(key, generateRandomValue(key, randomString));
+          return new HoodieAvroRecord(key, generateRandomValue(key, randomString));
         } catch (IOException e) {
           throw new HoodieIOException(e.getMessage(), e);
         }
@@ -165,7 +166,7 @@ public List<HoodieRecord> generateInserts(Integer n) throws IOException {
     }
 
     public HoodieRecord generateUpdateRecord(HoodieKey key, String randomString) throws IOException {
-      return new HoodieRecord(key, generateRandomValue(key, randomString));
+      return new HoodieAvroRecord(key, generateRandomValue(key, randomString));
     }
 
     /**
diff --git a/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/payload/AWSDmsAvroPayload.java b/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/payload/AWSDmsAvroPayload.java
index d0e1326761076..0eba1d9a6a4bc 100644
--- a/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/payload/AWSDmsAvroPayload.java
+++ b/hudi-spark-datasource/hudi-spark/src/main/java/org/apache/hudi/payload/AWSDmsAvroPayload.java
@@ -26,6 +26,7 @@
 import org.apache.avro.generic.IndexedRecord;
 
 import java.io.IOException;
+import java.util.Properties;
 
 /**
  * Provides support for seamlessly applying changes captured via Amazon Database Migration Service onto S3.
@@ -68,12 +69,25 @@ private Option<IndexedRecord> handleDeleteOperation(IndexedRecord insertValue) t
     return delete ? Option.empty() : Option.of(insertValue);
   }
 
+  @Override
+  public Option<IndexedRecord> getInsertValue(Schema schema, Properties properties) throws IOException {
+    IndexedRecord insertValue = super.getInsertValue(schema, properties).get();
+    return handleDeleteOperation(insertValue);
+  }
+
   @Override
   public Option<IndexedRecord> getInsertValue(Schema schema) throws IOException {
     IndexedRecord insertValue = super.getInsertValue(schema).get();
     return handleDeleteOperation(insertValue);
   }
 
+  @Override
+  public Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema, Properties properties)
+      throws IOException {
+    IndexedRecord insertValue = super.getInsertValue(schema, properties).get();
+    return handleDeleteOperation(insertValue);
+  }
+
   @Override
   public Option<IndexedRecord> combineAndGetUpdateValue(IndexedRecord currentValue, Schema schema)
       throws IOException {
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Call.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Call.scala
new file mode 100644
index 0000000000000..df2a953752fa8
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Call.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.catalyst.expressions.Expression
+
+case class CallCommand(name: Seq[String], args: Seq[CallArgument]) extends Command {
+  override def children: Seq[LogicalPlan] = Seq.empty
+
+  def withNewChildrenInternal(newChildren: IndexedSeq[LogicalPlan]): CallCommand = {
+    this
+  }
+}
+
+/**
+ * An argument in a CALL statement.
+ */
+sealed trait CallArgument {
+  def expr: Expression
+}
+
+/**
+ * An argument in a CALL statement identified by name.
+ */
+case class NamedArgument(name: String, expr: Expression) extends CallArgument
+
+/**
+ * An argument in a CALL statement identified by position.
+ */
+case class PositionalArgument(expr: Expression) extends CallArgument
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlUtils.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlUtils.scala
index a198d0e009af2..048ca4ec6e758 100644
--- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlUtils.scala
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlUtils.scala
@@ -19,10 +19,8 @@ package org.apache.spark.sql.hudi
 
 import org.apache.hudi.SparkAdapterSupport
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.expressions.{And, Cast, Expression, Literal}
+import org.apache.spark.sql.catalyst.expressions.{And, Expression}
 import org.apache.spark.sql.catalyst.plans.logical.{MergeIntoTable, SubqueryAlias}
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{DataType, NullType}
 
 object HoodieSqlUtils extends SparkAdapterSupport {
 
@@ -50,12 +48,4 @@ object HoodieSqlUtils extends SparkAdapterSupport {
       case exp => Seq(exp)
     }
   }
-
-  def castIfNeeded(child: Expression, dataType: DataType, conf: SQLConf): Expression = {
-    child match {
-      case Literal(nul, NullType) => Literal(nul, dataType)
-      case _ => if (child.dataType != dataType)
-        Cast(child, dataType, Option(conf.sessionLocalTimeZone)) else child
-    }
-  }
 }
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieAnalysis.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieAnalysis.scala
index c8fa32891e0f9..28f8a92e94405 100644
--- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieAnalysis.scala
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieAnalysis.scala
@@ -17,24 +17,26 @@
 
 package org.apache.spark.sql.hudi.analysis
 
-import org.apache.hudi.{HoodieSparkUtils, SparkAdapterSupport}
 import org.apache.hudi.DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL
 import org.apache.hudi.common.model.HoodieRecord
-import org.apache.hudi.common.table.HoodieTableMetaClient
+import org.apache.hudi.common.util.ReflectionUtils
+import org.apache.hudi.{HoodieSparkUtils, SparkAdapterSupport}
 import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedStar}
-import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, Expression, Literal, NamedExpression}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, Expression, GenericInternalRow, Literal, NamedExpression}
 import org.apache.spark.sql.catalyst.plans.Inner
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.execution.datasources.{CreateTable, LogicalRelation}
-import org.apache.spark.sql.hudi.HoodieSqlCommonUtils.{getTableIdentifier, getTableLocation, isHoodieTable, removeMetaFields, tableExistsInPath}
+import org.apache.spark.sql.hudi.HoodieSqlCommonUtils.{getTableIdentifier, removeMetaFields}
 import org.apache.spark.sql.hudi.HoodieSqlUtils._
 import org.apache.spark.sql.hudi.command._
-import org.apache.spark.sql.hudi.{HoodieOptionConfig, HoodieSqlCommonUtils, HoodieSqlUtils}
+import org.apache.spark.sql.hudi.command.procedures.{HoodieProcedures, Procedure, ProcedureArgs}
+import org.apache.spark.sql.hudi.{HoodieOptionConfig, HoodieSqlCommonUtils}
 import org.apache.spark.sql.types.StringType
 import org.apache.spark.sql.{AnalysisException, SparkSession}
 
+import java.util
 import scala.collection.JavaConverters._
 
 object HoodieAnalysis {
@@ -42,16 +44,44 @@ object HoodieAnalysis {
     Seq(
       session => HoodieResolveReferences(session),
       session => HoodieAnalysis(session)
-    )
+    ) ++ extraResolutionRules()
 
   def customPostHocResolutionRules(): Seq[SparkSession => Rule[LogicalPlan]] =
     Seq(
       session => HoodiePostAnalysisRule(session)
-    )
+    ) ++ extraPostHocResolutionRules()
+
+  def extraResolutionRules(): Seq[SparkSession => Rule[LogicalPlan]] = {
+    if (HoodieSparkUtils.gteqSpark3_2) {
+      val spark3AnalysisClass = "org.apache.spark.sql.hudi.analysis.HoodieSpark3Analysis"
+      val spark3Analysis: SparkSession => Rule[LogicalPlan] =
+        session => ReflectionUtils.loadClass(spark3AnalysisClass, session).asInstanceOf[Rule[LogicalPlan]]
+
+      val spark3ResolveReferences = "org.apache.spark.sql.hudi.analysis.HoodieSpark3ResolveReferences"
+      val spark3References: SparkSession => Rule[LogicalPlan] =
+        session => ReflectionUtils.loadClass(spark3ResolveReferences, session).asInstanceOf[Rule[LogicalPlan]]
+
+      Seq(spark3Analysis, spark3References)
+    } else {
+      Seq.empty
+    }
+  }
+
+  def extraPostHocResolutionRules(): Seq[SparkSession => Rule[LogicalPlan]] =
+    if (HoodieSparkUtils.gteqSpark3_2) {
+      val spark3PostHocResolutionClass = "org.apache.spark.sql.hudi.analysis.HoodieSpark3PostAnalysisRule"
+      val spark3PostHocResolution: SparkSession => Rule[LogicalPlan] =
+        session => ReflectionUtils.loadClass(spark3PostHocResolutionClass, session).asInstanceOf[Rule[LogicalPlan]]
+
+      Seq(spark3PostHocResolution)
+    } else {
+      Seq.empty
+    }
 }
 
 /**
  * Rule for convert the logical plan to command.
+ *
  * @param sparkSession
  */
 case class HoodieAnalysis(sparkSession: SparkSession) extends Rule[LogicalPlan]
@@ -61,36 +91,36 @@ case class HoodieAnalysis(sparkSession: SparkSession) extends Rule[LogicalPlan]
     plan match {
       // Convert to MergeIntoHoodieTableCommand
       case m @ MergeIntoTable(target, _, _, _, _)
-        if m.resolved && isHoodieTable(target, sparkSession) =>
+        if m.resolved && sparkAdapter.isHoodieTable(target, sparkSession) =>
           MergeIntoHoodieTableCommand(m)
 
       // Convert to UpdateHoodieTableCommand
       case u @ UpdateTable(table, _, _)
-        if u.resolved && isHoodieTable(table, sparkSession) =>
+        if u.resolved && sparkAdapter.isHoodieTable(table, sparkSession) =>
           UpdateHoodieTableCommand(u)
 
       // Convert to DeleteHoodieTableCommand
       case d @ DeleteFromTable(table, _)
-        if d.resolved && isHoodieTable(table, sparkSession) =>
+        if d.resolved && sparkAdapter.isHoodieTable(table, sparkSession) =>
           DeleteHoodieTableCommand(d)
 
       // Convert to InsertIntoHoodieTableCommand
       case l if sparkAdapter.isInsertInto(l) =>
         val (table, partition, query, overwrite, _) = sparkAdapter.getInsertIntoChildren(l).get
         table match {
-          case relation: LogicalRelation if isHoodieTable(relation, sparkSession) =>
+          case relation: LogicalRelation if sparkAdapter.isHoodieTable(relation, sparkSession) =>
             new InsertIntoHoodieTableCommand(relation, query, partition, overwrite)
           case _ =>
             l
         }
       // Convert to CreateHoodieTableAsSelectCommand
       case CreateTable(table, mode, Some(query))
-        if query.resolved && isHoodieTable(table) =>
+        if query.resolved && sparkAdapter.isHoodieTable(table) =>
           CreateHoodieTableAsSelectCommand(table, mode, query)
 
       // Convert to CompactionHoodieTableCommand
       case CompactionTable(table, operation, options)
-        if table.resolved && isHoodieTable(table, sparkSession) =>
+        if table.resolved && sparkAdapter.isHoodieTable(table, sparkSession) =>
         val tableId = getTableIdentifier(table)
         val catalogTable = sparkSession.sessionState.catalog.getTableMetadata(tableId)
         CompactionHoodieTableCommand(catalogTable, operation, options)
@@ -99,33 +129,76 @@ case class HoodieAnalysis(sparkSession: SparkSession) extends Rule[LogicalPlan]
         CompactionHoodiePathCommand(path, operation, options)
       // Convert to CompactionShowOnTable
       case CompactionShowOnTable(table, limit)
-        if isHoodieTable(table, sparkSession) =>
+        if sparkAdapter.isHoodieTable(table, sparkSession) =>
         val tableId = getTableIdentifier(table)
         val catalogTable = sparkSession.sessionState.catalog.getTableMetadata(tableId)
         CompactionShowHoodieTableCommand(catalogTable, limit)
       // Convert to CompactionShowHoodiePathCommand
       case CompactionShowOnPath(path, limit) =>
         CompactionShowHoodiePathCommand(path, limit)
-      case _=> plan
+      // Convert to HoodieCallProcedureCommand
+      case c@CallCommand(_, _) =>
+        val procedure: Option[Procedure] = loadProcedure(c.name)
+        val input = buildProcedureArgs(c.args)
+        if (procedure.nonEmpty) {
+          CallProcedureHoodieCommand(procedure.get, input)
+        } else {
+          c
+        }
+      case _ => plan
+    }
+  }
+
+  private def loadProcedure(name: Seq[String]): Option[Procedure] = {
+    val procedure: Option[Procedure] = if (name.nonEmpty) {
+      val builder = HoodieProcedures.newBuilder(name.last)
+      if (builder != null) {
+        Option(builder.build)
+      } else {
+        throw new AnalysisException(s"procedure: ${name.last} is not exists")
+      }
+    } else {
+      None
     }
+    procedure
+  }
+
+  private def buildProcedureArgs(exprs: Seq[CallArgument]): ProcedureArgs = {
+    val values = new Array[Any](exprs.size)
+    var isNamedArgs: Boolean = false
+    val map = new util.LinkedHashMap[String, Int]()
+    for (index <- exprs.indices) {
+      exprs(index) match {
+        case expr: NamedArgument =>
+          map.put(expr.name, index)
+          values(index) = expr.expr.eval()
+          isNamedArgs = true
+        case _ =>
+          map.put(index.toString, index)
+          values(index) = exprs(index).expr.eval()
+          isNamedArgs = false
+      }
+    }
+    ProcedureArgs(isNamedArgs, map, new GenericInternalRow(values))
   }
 }
 
 /**
  * Rule for resolve hoodie's extended syntax or rewrite some logical plan.
+ *
  * @param sparkSession
  */
 case class HoodieResolveReferences(sparkSession: SparkSession) extends Rule[LogicalPlan]
   with SparkAdapterSupport {
   private lazy val analyzer = sparkSession.sessionState.analyzer
 
-  def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperatorsUp  {
+  def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperatorsUp {
     // Resolve merge into
     case mergeInto @ MergeIntoTable(target, source, mergeCondition, matchedActions, notMatchedActions)
-      if isHoodieTable(target, sparkSession) && target.resolved =>
-
+      if sparkAdapter.isHoodieTable(target, sparkSession) && target.resolved =>
       val resolver = sparkSession.sessionState.conf.resolver
       val resolvedSource = analyzer.execute(source)
+
       def isInsertOrUpdateStar(assignments: Seq[Assignment]): Boolean = {
         if (assignments.isEmpty) {
           true
@@ -277,7 +350,7 @@ case class HoodieResolveReferences(sparkSession: SparkSession) extends Rule[Logi
 
     // Resolve update table
     case UpdateTable(table, assignments, condition)
-      if isHoodieTable(table, sparkSession) && table.resolved =>
+      if sparkAdapter.isHoodieTable(table, sparkSession) && table.resolved =>
       // Resolve condition
       val resolvedCondition = condition.map(resolveExpressionFrom(table)(_))
       // Resolve assignments
@@ -291,7 +364,7 @@ case class HoodieResolveReferences(sparkSession: SparkSession) extends Rule[Logi
 
     // Resolve Delete Table
     case DeleteFromTable(table, condition)
-      if isHoodieTable(table, sparkSession) && table.resolved =>
+      if sparkAdapter.isHoodieTable(table, sparkSession) && table.resolved =>
       // Resolve condition
       val resolvedCondition = condition.map(resolveExpressionFrom(table)(_))
       // Return the resolved DeleteTable
@@ -303,7 +376,7 @@ case class HoodieResolveReferences(sparkSession: SparkSession) extends Rule[Logi
       val (table, partition, query, overwrite, ifPartitionNotExists) =
         sparkAdapter.getInsertIntoChildren(l).get
 
-      if (isHoodieTable(table, sparkSession) && query.resolved &&
+      if (sparkAdapter.isHoodieTable(table, sparkSession) && query.resolved &&
         !containUnResolvedStar(query) &&
         !checkAlreadyAppendMetaField(query)) {
         val metaFields = HoodieRecord.HOODIE_META_COLUMNS.asScala.map(
@@ -336,6 +409,7 @@ case class HoodieResolveReferences(sparkSession: SparkSession) extends Rule[Logi
   /**
    * Check if the the query of insert statement has already append the meta fields to avoid
    * duplicate append.
+   *
    * @param query
    * @return
    */
@@ -401,37 +475,37 @@ case class HoodiePostAnalysisRule(sparkSession: SparkSession) extends Rule[Logic
     plan match {
       // Rewrite the CreateDataSourceTableCommand to CreateHoodieTableCommand
       case CreateDataSourceTableCommand(table, ignoreIfExists)
-        if isHoodieTable(table) =>
+        if sparkAdapter.isHoodieTable(table) =>
         CreateHoodieTableCommand(table, ignoreIfExists)
       // Rewrite the DropTableCommand to DropHoodieTableCommand
       case DropTableCommand(tableName, ifExists, isView, purge)
-        if isHoodieTable(tableName, sparkSession) =>
+        if sparkAdapter.isHoodieTable(tableName, sparkSession) =>
         DropHoodieTableCommand(tableName, ifExists, isView, purge)
       // Rewrite the AlterTableDropPartitionCommand to AlterHoodieTableDropPartitionCommand
       case AlterTableDropPartitionCommand(tableName, specs, ifExists, purge, retainData)
-        if isHoodieTable(tableName, sparkSession) =>
+        if sparkAdapter.isHoodieTable(tableName, sparkSession) =>
           AlterHoodieTableDropPartitionCommand(tableName, specs, ifExists, purge, retainData)
       // Rewrite the AlterTableRenameCommand to AlterHoodieTableRenameCommand
       // Rewrite the AlterTableAddColumnsCommand to AlterHoodieTableAddColumnsCommand
       case AlterTableAddColumnsCommand(tableId, colsToAdd)
-        if isHoodieTable(tableId, sparkSession) =>
+        if sparkAdapter.isHoodieTable(tableId, sparkSession) =>
           AlterHoodieTableAddColumnsCommand(tableId, colsToAdd)
       // Rewrite the AlterTableRenameCommand to AlterHoodieTableRenameCommand
       case AlterTableRenameCommand(oldName, newName, isView)
-        if !isView && isHoodieTable(oldName, sparkSession) =>
+        if !isView && sparkAdapter.isHoodieTable(oldName, sparkSession) =>
           new AlterHoodieTableRenameCommand(oldName, newName, isView)
       // Rewrite the AlterTableChangeColumnCommand to AlterHoodieTableChangeColumnCommand
       case AlterTableChangeColumnCommand(tableName, columnName, newColumn)
-        if isHoodieTable(tableName, sparkSession) =>
+        if sparkAdapter.isHoodieTable(tableName, sparkSession) =>
           AlterHoodieTableChangeColumnCommand(tableName, columnName, newColumn)
       // SPARK-34238: the definition of ShowPartitionsCommand has been changed in Spark3.2.
       // Match the class type instead of call the `unapply` method.
       case s: ShowPartitionsCommand
-        if isHoodieTable(s.tableName, sparkSession) =>
+        if sparkAdapter.isHoodieTable(s.tableName, sparkSession) =>
           ShowHoodieTablePartitionsCommand(s.tableName, s.spec)
       // Rewrite TruncateTableCommand to TruncateHoodieTableCommand
       case TruncateTableCommand(tableName, partitionSpec)
-        if isHoodieTable(tableName, sparkSession) =>
+        if sparkAdapter.isHoodieTable(tableName, sparkSession) =>
         new TruncateHoodieTableCommand(tableName, partitionSpec)
       case _ => plan
     }
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CallProcedureHoodieCommand.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CallProcedureHoodieCommand.scala
new file mode 100644
index 0000000000000..f63f4115e9195
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CallProcedureHoodieCommand.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hudi.command
+
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.hudi.command.procedures.{Procedure, ProcedureArgs}
+import org.apache.spark.sql.{Row, SparkSession}
+
+import scala.collection.Seq
+
+case class CallProcedureHoodieCommand(
+   procedure: Procedure,
+   args: ProcedureArgs) extends HoodieLeafRunnableCommand {
+
+  override def output: Seq[Attribute] = procedure.outputType.toAttributes
+
+  override def run(sparkSession: SparkSession): Seq[Row] = {
+    procedure.call(args)
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionHoodiePathCommand.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionHoodiePathCommand.scala
index 1363fb939b4e3..2f5c4d004f58f 100644
--- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionHoodiePathCommand.scala
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CompactionHoodiePathCommand.scala
@@ -17,14 +17,13 @@
 
 package org.apache.spark.sql.hudi.command
 
-import org.apache.hudi.client.WriteStatus
-import org.apache.hudi.common.model.HoodieTableType
+import org.apache.hudi.common.model.{HoodieCommitMetadata, HoodieTableType}
 import org.apache.hudi.common.table.timeline.{HoodieActiveTimeline, HoodieTimeline}
 import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
 import org.apache.hudi.common.util.{HoodieTimer, Option => HOption}
 import org.apache.hudi.exception.HoodieException
 import org.apache.hudi.{DataSourceUtils, DataSourceWriteOptions, HoodieWriterUtils}
-import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
+import org.apache.spark.api.java.JavaSparkContext
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.logical.CompactionOperation
 import org.apache.spark.sql.catalyst.plans.logical.CompactionOperation.{CompactionOperation, RUN, SCHEDULE}
@@ -100,8 +99,8 @@ case class CompactionHoodiePathCommand(path: String,
           timer.startTimer()
           willCompactionInstants.foreach {compactionInstant =>
             val writeResponse = client.compact(compactionInstant)
-            handlerResponse(writeResponse)
-            client.commitCompaction(compactionInstant, writeResponse, HOption.empty())
+            handleResponse(writeResponse.getCommitMetadata.get())
+            client.commitCompaction(compactionInstant, writeResponse.getCommitMetadata.get(), HOption.empty())
           }
           logInfo(s"Finish Run compaction at instants: [${willCompactionInstants.mkString(",")}]," +
             s" spend: ${timer.endTimer()}ms")
@@ -111,17 +110,13 @@ case class CompactionHoodiePathCommand(path: String,
     }
   }
 
-  private def handlerResponse(writeResponse: JavaRDD[WriteStatus]): Unit = {
+  private def handleResponse(metadata: HoodieCommitMetadata): Unit = {
+
     // Handle error
-    val error = writeResponse.rdd.filter(f => f.hasErrors).take(1).headOption
-    if (error.isDefined) {
-      if (error.get.hasGlobalError) {
-        throw error.get.getGlobalError
-      } else if (!error.get.getErrors.isEmpty) {
-        val key = error.get.getErrors.asScala.head._1
-        val exception = error.get.getErrors.asScala.head._2
-        throw new HoodieException(s"Error in write record: $key", exception)
-      }
+    val writeStats = metadata.getPartitionToWriteStats.entrySet().flatMap(e => e.getValue).toList
+    val errorsCount = writeStats.map(state => state.getTotalWriteErrors).sum
+    if (errorsCount > 0) {
+      throw new HoodieException(s" Found $errorsCount when writing record")
     }
   }
 
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CreateHoodieTableAsSelectCommand.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CreateHoodieTableAsSelectCommand.scala
index 572013981d698..2877dd8d9ee94 100644
--- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CreateHoodieTableAsSelectCommand.scala
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/CreateHoodieTableAsSelectCommand.scala
@@ -19,17 +19,14 @@ package org.apache.spark.sql.hudi.command
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
-
 import org.apache.hudi.DataSourceWriteOptions
 import org.apache.hudi.hive.util.ConfigUtils
 import org.apache.hudi.sql.InsertMode
-
-import org.apache.spark.sql.{Row, SaveMode, SparkSession}
 import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType, HoodieCatalogTable}
+import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project}
-import org.apache.spark.sql.execution.SparkPlan
-import org.apache.spark.sql.execution.command.DataWritingCommand
 import org.apache.spark.sql.hudi.HoodieSqlCommonUtils
+import org.apache.spark.sql.{Row, SaveMode, SparkSession}
 
 import scala.collection.JavaConverters._
 
@@ -40,6 +37,7 @@ case class CreateHoodieTableAsSelectCommand(
    table: CatalogTable,
    mode: SaveMode,
    query: LogicalPlan) extends HoodieLeafRunnableCommand {
+  override def innerChildren: Seq[QueryPlan[_]] = Seq(query)
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
     assert(table.tableType != CatalogTableType.VIEW)
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/InsertIntoHoodieTableCommand.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/InsertIntoHoodieTableCommand.scala
index 030d3e3c623ca..f058b47d782d5 100644
--- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/InsertIntoHoodieTableCommand.scala
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/InsertIntoHoodieTableCommand.scala
@@ -17,27 +17,18 @@
 
 package org.apache.spark.sql.hudi.command
 
-import org.apache.hudi.DataSourceWriteOptions._
-import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload
-import org.apache.hudi.config.HoodieWriteConfig
-import org.apache.hudi.config.HoodieWriteConfig.TBL_NAME
-import org.apache.hudi.hive.MultiPartKeysValueExtractor
-import org.apache.hudi.hive.ddl.HiveSyncMode
-import org.apache.hudi.keygen.ComplexKeyGenerator
-import org.apache.hudi.sql.InsertMode
-import org.apache.hudi.{DataSourceWriteOptions, HoodieSparkSqlWriter}
+import org.apache.hudi.HoodieSparkSqlWriter
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.catalog.{CatalogTable, HoodieCatalogTable}
 import org.apache.spark.sql.catalyst.expressions.{Alias, Literal}
+import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project}
 import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.hudi.HoodieSqlCommonUtils._
-import org.apache.spark.sql.hudi.HoodieSqlUtils.castIfNeeded
+import org.apache.spark.sql.hudi.ProvidesHoodieConfig
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.{Dataset, Row, SaveMode, SparkSession}
 
-import scala.collection.JavaConverters._
-
 /**
  * Command for insert into hoodie table.
  */
@@ -47,6 +38,7 @@ case class InsertIntoHoodieTableCommand(
     partition: Map[String, Option[String]],
     overwrite: Boolean)
   extends HoodieLeafRunnableCommand {
+  override def innerChildren: Seq[QueryPlan[_]] = Seq(query)
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
     assert(logicalRelation.catalogTable.isDefined, "Missing catalog table")
@@ -57,7 +49,7 @@ case class InsertIntoHoodieTableCommand(
   }
 }
 
-object InsertIntoHoodieTableCommand extends Logging {
+object InsertIntoHoodieTableCommand extends Logging with ProvidesHoodieConfig {
   /**
    * Run the insert query. We support both dynamic partition insert and static partition insert.
    * @param sparkSession The spark session.
@@ -174,112 +166,4 @@ object InsertIntoHoodieTableCommand extends Logging {
     val alignedProjects = dataProjectsWithoutMetaFields ++ partitionProjects
     Project(alignedProjects, query)
   }
-
-  /**
-   * Build the default config for insert.
-   * @return
-   */
-  private def buildHoodieInsertConfig(
-      hoodieCatalogTable: HoodieCatalogTable,
-      sparkSession: SparkSession,
-      isOverwrite: Boolean,
-      insertPartitions: Map[String, Option[String]] = Map.empty,
-      extraOptions: Map[String, String]): Map[String, String] = {
-
-    if (insertPartitions.nonEmpty &&
-      (insertPartitions.keys.toSet != hoodieCatalogTable.partitionFields.toSet)) {
-      throw new IllegalArgumentException(s"Insert partition fields" +
-        s"[${insertPartitions.keys.mkString(" " )}]" +
-        s" not equal to the defined partition in table[${hoodieCatalogTable.partitionFields.mkString(",")}]")
-    }
-    val path = hoodieCatalogTable.tableLocation
-    val tableType = hoodieCatalogTable.tableTypeName
-    val tableConfig = hoodieCatalogTable.tableConfig
-    val tableSchema = hoodieCatalogTable.tableSchema
-
-    val options = hoodieCatalogTable.catalogProperties ++ tableConfig.getProps.asScala.toMap ++ extraOptions
-    val parameters = withSparkConf(sparkSession, options)()
-
-    val preCombineColumn = hoodieCatalogTable.preCombineKey.getOrElse("")
-    val partitionFields = hoodieCatalogTable.partitionFields.mkString(",")
-
-    val hiveStylePartitioningEnable = Option(tableConfig.getHiveStylePartitioningEnable).getOrElse("true")
-    val urlEncodePartitioning = Option(tableConfig.getUrlEncodePartitioning).getOrElse("false")
-    val keyGeneratorClassName = Option(tableConfig.getKeyGeneratorClassName)
-      .getOrElse(classOf[ComplexKeyGenerator].getCanonicalName)
-
-    val enableBulkInsert = parameters.getOrElse(DataSourceWriteOptions.SQL_ENABLE_BULK_INSERT.key,
-      DataSourceWriteOptions.SQL_ENABLE_BULK_INSERT.defaultValue()).toBoolean
-    val dropDuplicate = sparkSession.conf
-      .getOption(INSERT_DROP_DUPS.key).getOrElse(INSERT_DROP_DUPS.defaultValue).toBoolean
-
-    val insertMode = InsertMode.of(parameters.getOrElse(DataSourceWriteOptions.SQL_INSERT_MODE.key,
-      DataSourceWriteOptions.SQL_INSERT_MODE.defaultValue()))
-    val isNonStrictMode = insertMode == InsertMode.NON_STRICT
-    val isPartitionedTable = hoodieCatalogTable.partitionFields.nonEmpty
-    val hasPreCombineColumn = preCombineColumn.nonEmpty
-    val operation =
-      (enableBulkInsert, isOverwrite, dropDuplicate, isNonStrictMode, isPartitionedTable) match {
-        case (true, _, _, false, _) =>
-          throw new IllegalArgumentException(s"Table with primaryKey can not use bulk insert in ${insertMode.value()} mode.")
-        case (true, true, _, _, true) =>
-          throw new IllegalArgumentException(s"Insert Overwrite Partition can not use bulk insert.")
-        case (true, _, true, _, _) =>
-          throw new IllegalArgumentException(s"Bulk insert cannot support drop duplication." +
-            s" Please disable $INSERT_DROP_DUPS and try again.")
-        // if enableBulkInsert is true, use bulk insert for the insert overwrite non-partitioned table.
-        case (true, true, _, _, false) => BULK_INSERT_OPERATION_OPT_VAL
-        // insert overwrite table
-        case (false, true, _, _, false) => INSERT_OVERWRITE_TABLE_OPERATION_OPT_VAL
-        // insert overwrite partition
-        case (_, true, _, _, true) => INSERT_OVERWRITE_OPERATION_OPT_VAL
-        // disable dropDuplicate, and provide preCombineKey, use the upsert operation for strict and upsert mode.
-        case (false, false, false, false, _) if hasPreCombineColumn => UPSERT_OPERATION_OPT_VAL
-        // if table is pk table and has enableBulkInsert use bulk insert for non-strict mode.
-        case (true, _, _, true, _) => BULK_INSERT_OPERATION_OPT_VAL
-        // for the rest case, use the insert operation
-        case _ => INSERT_OPERATION_OPT_VAL
-      }
-
-    val payloadClassName = if (operation ==  UPSERT_OPERATION_OPT_VAL &&
-      tableType == COW_TABLE_TYPE_OPT_VAL && insertMode == InsertMode.STRICT) {
-      // Only validate duplicate key for COW, for MOR it will do the merge with the DefaultHoodieRecordPayload
-      // on reading.
-      classOf[ValidateDuplicateKeyPayload].getCanonicalName
-    } else {
-      classOf[OverwriteWithLatestAvroPayload].getCanonicalName
-    }
-    logInfo(s"insert statement use write operation type: $operation, payloadClass: $payloadClassName")
-
-    val enableHive = isEnableHive(sparkSession)
-    withSparkConf(sparkSession, options) {
-      Map(
-        "path" -> path,
-        TABLE_TYPE.key -> tableType,
-        TBL_NAME.key -> hoodieCatalogTable.tableName,
-        PRECOMBINE_FIELD.key -> preCombineColumn,
-        OPERATION.key -> operation,
-        HIVE_STYLE_PARTITIONING.key -> hiveStylePartitioningEnable,
-        URL_ENCODE_PARTITIONING.key -> urlEncodePartitioning,
-        KEYGENERATOR_CLASS_NAME.key -> classOf[SqlKeyGenerator].getCanonicalName,
-        SqlKeyGenerator.ORIGIN_KEYGEN_CLASS_NAME -> keyGeneratorClassName,
-        RECORDKEY_FIELD.key -> hoodieCatalogTable.primaryKeys.mkString(","),
-        PARTITIONPATH_FIELD.key -> partitionFields,
-        PAYLOAD_CLASS_NAME.key -> payloadClassName,
-        ENABLE_ROW_WRITER.key -> enableBulkInsert.toString,
-        HoodieWriteConfig.COMBINE_BEFORE_INSERT.key -> String.valueOf(hasPreCombineColumn),
-        META_SYNC_ENABLED.key -> enableHive.toString,
-        HIVE_SYNC_MODE.key -> HiveSyncMode.HMS.name(),
-        HIVE_USE_JDBC.key -> "false",
-        HIVE_DATABASE.key -> hoodieCatalogTable.table.identifier.database.getOrElse("default"),
-        HIVE_TABLE.key -> hoodieCatalogTable.table.identifier.table,
-        HIVE_SUPPORT_TIMESTAMP_TYPE.key -> "true",
-        HIVE_PARTITION_FIELDS.key -> partitionFields,
-        HIVE_PARTITION_EXTRACTOR_CLASS.key -> classOf[MultiPartKeysValueExtractor].getCanonicalName,
-        HoodieWriteConfig.INSERT_PARALLELISM_VALUE.key -> "200",
-        HoodieWriteConfig.UPSERT_PARALLELISM_VALUE.key -> "200",
-        SqlKeyGenerator.PARTITION_SCHEMA -> hoodieCatalogTable.partitionSchema.toDDL
-      )
-    }
-  }
 }
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/MergeIntoHoodieTableCommand.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/MergeIntoHoodieTableCommand.scala
index 2c76ad567f58b..1d9aedd2af6fa 100644
--- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/MergeIntoHoodieTableCommand.scala
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/MergeIntoHoodieTableCommand.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.catalog.HoodieCatalogTable
 import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, BoundReference, Cast, EqualTo, Expression, Literal}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.hudi.HoodieSqlCommonUtils._
-import org.apache.spark.sql.hudi.HoodieSqlUtils.{castIfNeeded, getMergeIntoTargetTableId}
+import org.apache.spark.sql.hudi.HoodieSqlUtils.getMergeIntoTargetTableId
 import org.apache.spark.sql.hudi.SerDeUtils
 import org.apache.spark.sql.hudi.command.payload.ExpressionPayload
 import org.apache.spark.sql.hudi.command.payload.ExpressionPayload._
@@ -443,13 +443,18 @@ case class MergeIntoHoodieTableCommand(mergeInto: MergeIntoTable) extends Hoodie
     val partitionColumns = tableConfig.getPartitionFieldProp.split(",").map(_.toLowerCase)
     val partitionSchema = StructType(tableSchema.filter(f => partitionColumns.contains(f.name)))
 
+    // NOTE: Here we fallback to "" to make sure that null value is not overridden with
+    // default value ("ts")
+    // TODO(HUDI-3456) clean up
+    val preCombineField = hoodieCatalogTable.preCombineKey.getOrElse("")
+
     // Enable the hive sync by default if spark have enable the hive metastore.
     val enableHive = isEnableHive(sparkSession)
     withSparkConf(sparkSession, hoodieCatalogTable.catalogProperties) {
       Map(
         "path" -> path,
         RECORDKEY_FIELD.key -> tableConfig.getRecordKeyFieldProp,
-        PRECOMBINE_FIELD.key -> hoodieCatalogTable.preCombineKey.getOrElse(""),
+        PRECOMBINE_FIELD.key -> preCombineField,
         TBL_NAME.key -> hoodieCatalogTable.tableName,
         PARTITIONPATH_FIELD.key -> tableConfig.getPartitionFieldProp,
         PAYLOAD_CLASS_NAME.key -> classOf[ExpressionPayload].getCanonicalName,
@@ -470,6 +475,7 @@ case class MergeIntoHoodieTableCommand(mergeInto: MergeIntoTable) extends Hoodie
         HoodieWriteConfig.DELETE_PARALLELISM_VALUE.key -> "200",
         SqlKeyGenerator.PARTITION_SCHEMA -> partitionSchema.toDDL
       )
+        .filter { case (_, v) => v != null }
     }
   }
 }
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/UpdateHoodieTableCommand.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/UpdateHoodieTableCommand.scala
index 512e9a18bd560..277f2643423dd 100644
--- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/UpdateHoodieTableCommand.scala
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/UpdateHoodieTableCommand.scala
@@ -17,26 +17,21 @@
 
 package org.apache.spark.sql.hudi.command
 
-import org.apache.hudi.DataSourceWriteOptions._
 import org.apache.hudi.SparkAdapterSupport
 import org.apache.hudi.common.model.HoodieRecord
-import org.apache.hudi.config.HoodieWriteConfig
-import org.apache.hudi.config.HoodieWriteConfig.TBL_NAME
-import org.apache.hudi.hive.MultiPartKeysValueExtractor
-import org.apache.hudi.hive.ddl.HiveSyncMode
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.catalog.HoodieCatalogTable
 import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Expression}
 import org.apache.spark.sql.catalyst.plans.logical.{Assignment, UpdateTable}
 import org.apache.spark.sql.hudi.HoodieSqlCommonUtils._
-import org.apache.spark.sql.hudi.HoodieSqlUtils.castIfNeeded
+import org.apache.spark.sql.hudi.ProvidesHoodieConfig
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructField
 
 import scala.collection.JavaConverters._
 
 case class UpdateHoodieTableCommand(updateTable: UpdateTable) extends HoodieLeafRunnableCommand
-  with SparkAdapterSupport {
+  with SparkAdapterSupport with ProvidesHoodieConfig {
 
   private val table = updateTable.table
   private val tableId = getTableIdentifier(table)
@@ -72,7 +67,7 @@ case class UpdateHoodieTableCommand(updateTable: UpdateTable) extends HoodieLeaf
       df = df.filter(Column(updateTable.condition.get))
     }
     df = df.select(projects: _*)
-    val config = buildHoodieConfig(sparkSession)
+    val config = buildHoodieConfig(HoodieCatalogTable(sparkSession, tableId))
     df.write
       .format("hudi")
       .mode(SaveMode.Append)
@@ -83,42 +78,6 @@ case class UpdateHoodieTableCommand(updateTable: UpdateTable) extends HoodieLeaf
     Seq.empty[Row]
   }
 
-  private def buildHoodieConfig(sparkSession: SparkSession): Map[String, String] = {
-    val hoodieCatalogTable = HoodieCatalogTable(sparkSession, tableId)
-    val catalogProperties = hoodieCatalogTable.catalogProperties
-    val tableConfig = hoodieCatalogTable.tableConfig
-
-    val preCombineColumn = Option(tableConfig.getPreCombineField).getOrElse("")
-    assert(hoodieCatalogTable.primaryKeys.nonEmpty,
-      s"There are no primary key in table $tableId, cannot execute update operator")
-    val enableHive = isEnableHive(sparkSession)
-
-    withSparkConf(sparkSession, catalogProperties) {
-      Map(
-        "path" -> hoodieCatalogTable.tableLocation,
-        RECORDKEY_FIELD.key -> hoodieCatalogTable.primaryKeys.mkString(","),
-        PRECOMBINE_FIELD.key -> preCombineColumn,
-        TBL_NAME.key -> hoodieCatalogTable.tableName,
-        HIVE_STYLE_PARTITIONING.key -> tableConfig.getHiveStylePartitioningEnable,
-        URL_ENCODE_PARTITIONING.key -> tableConfig.getUrlEncodePartitioning,
-        KEYGENERATOR_CLASS_NAME.key -> classOf[SqlKeyGenerator].getCanonicalName,
-        SqlKeyGenerator.ORIGIN_KEYGEN_CLASS_NAME -> tableConfig.getKeyGeneratorClassName,
-        OPERATION.key -> UPSERT_OPERATION_OPT_VAL,
-        PARTITIONPATH_FIELD.key -> tableConfig.getPartitionFieldProp,
-        META_SYNC_ENABLED.key -> enableHive.toString,
-        HIVE_SYNC_MODE.key -> HiveSyncMode.HMS.name(),
-        HIVE_USE_JDBC.key -> "false",
-        HIVE_DATABASE.key -> tableId.database.getOrElse("default"),
-        HIVE_TABLE.key -> tableId.table,
-        HIVE_PARTITION_FIELDS.key -> tableConfig.getPartitionFieldProp,
-        HIVE_PARTITION_EXTRACTOR_CLASS.key -> classOf[MultiPartKeysValueExtractor].getCanonicalName,
-        HIVE_SUPPORT_TIMESTAMP_TYPE.key -> "true",
-        HoodieWriteConfig.UPSERT_PARALLELISM_VALUE.key -> "200",
-        SqlKeyGenerator.PARTITION_SCHEMA -> hoodieCatalogTable.partitionSchema.toDDL
-      )
-    }
-  }
-
   def cast(exp:Expression, field: StructField, sqlConf: SQLConf): Expression = {
     castIfNeeded(exp, field.dataType, sqlConf)
   }
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/BaseProcedure.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/BaseProcedure.scala
new file mode 100644
index 0000000000000..e64df997da2ff
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/BaseProcedure.scala
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hudi.command.procedures
+
+import org.apache.hudi.client.SparkRDDWriteClient
+import org.apache.hudi.client.common.HoodieSparkEngineContext
+import org.apache.hudi.common.model.HoodieRecordPayload
+import org.apache.hudi.config.{HoodieIndexConfig, HoodieWriteConfig}
+import org.apache.hudi.index.HoodieIndex.IndexType
+import org.apache.spark.api.java.JavaSparkContext
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.types._
+
+abstract class BaseProcedure extends Procedure {
+  val INVALID_ARG_INDEX: Int = -1
+
+  val spark: SparkSession = SparkSession.active
+  val jsc = new JavaSparkContext(spark.sparkContext)
+
+  protected def sparkSession: SparkSession = spark
+
+  protected def createHoodieClient(jsc: JavaSparkContext, basePath: String): SparkRDDWriteClient[_ <: HoodieRecordPayload[_ <: AnyRef]] = {
+    val config = getWriteConfig(basePath)
+    new SparkRDDWriteClient(new HoodieSparkEngineContext(jsc), config)
+  }
+
+  protected def getWriteConfig(basePath: String): HoodieWriteConfig = {
+    HoodieWriteConfig.newBuilder
+      .withPath(basePath)
+      .withIndexConfig(HoodieIndexConfig.newBuilder.withIndexType(IndexType.BLOOM).build)
+      .withRollbackUsingMarkers(false)
+      .build
+  }
+
+  protected def checkArgs(target: Array[ProcedureParameter], args: ProcedureArgs): Unit = {
+    val internalRow = args.internalRow
+    for (i <- target.indices) {
+      if (target(i).required) {
+        var argsIndex: Integer = null
+        if (args.isNamedArgs) {
+          argsIndex = getArgsIndex(target(i).name, args)
+        } else {
+          argsIndex = getArgsIndex(i.toString, args)
+        }
+        assert(-1 != argsIndex && internalRow.get(argsIndex, target(i).dataType) != null,
+          s"Argument: ${target(i).name} is required")
+      }
+    }
+  }
+
+  protected def getArgsIndex(key: String, args: ProcedureArgs): Integer = {
+    args.map.getOrDefault(key, INVALID_ARG_INDEX)
+  }
+
+  protected def getArgValueOrDefault(args: ProcedureArgs, parameter: ProcedureParameter): Any = {
+    var argsIndex: Int = INVALID_ARG_INDEX
+    if (args.isNamedArgs) {
+      argsIndex = getArgsIndex(parameter.name, args)
+    } else {
+      argsIndex = getArgsIndex(parameter.index.toString, args)
+    }
+    if (argsIndex.equals(INVALID_ARG_INDEX)) parameter.default else getInternalRowValue(args.internalRow, argsIndex, parameter.dataType)
+  }
+
+  protected def getInternalRowValue(row: InternalRow, index: Int, dataType: DataType): Any = {
+    dataType match {
+      case StringType => row.getString(index)
+      case BinaryType => row.getBinary(index)
+      case BooleanType => row.getBoolean(index)
+      case CalendarIntervalType => row.getInterval(index)
+      case DoubleType => row.getDouble(index)
+      case d: DecimalType => row.getDecimal(index, d.precision, d.scale)
+      case FloatType => row.getFloat(index)
+      case ByteType => row.getByte(index)
+      case IntegerType => row.getInt(index)
+      case LongType => row.getLong(index)
+      case ShortType => row.getShort(index)
+      case NullType => null
+      case _ =>
+        throw new UnsupportedOperationException(s"type: ${dataType.typeName} not supported")
+    }
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/HoodieProcedures.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/HoodieProcedures.scala
new file mode 100644
index 0000000000000..7b919fcef08b5
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/HoodieProcedures.scala
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hudi.command.procedures
+
+import com.google.common.collect.ImmutableMap
+
+import java.util
+import java.util.Locale
+import java.util.function.Supplier
+
+object HoodieProcedures {
+  private val BUILDERS: util.Map[String, Supplier[ProcedureBuilder]] = initProcedureBuilders
+
+  def newBuilder(name: String): ProcedureBuilder = {
+    val builderSupplier: Supplier[ProcedureBuilder] = BUILDERS.get(name.toLowerCase(Locale.ROOT))
+    if (builderSupplier != null) builderSupplier.get else null
+  }
+
+  private def initProcedureBuilders: util.Map[String, Supplier[ProcedureBuilder]] = {
+    val mapBuilder: ImmutableMap.Builder[String, Supplier[ProcedureBuilder]] = ImmutableMap.builder()
+    mapBuilder.put(ShowCommitsProcedure.NAME, ShowCommitsProcedure.builder)
+    mapBuilder.put(ShowCommitsMetadataProcedure.NAME, ShowCommitsMetadataProcedure.builder)
+    mapBuilder.put(RollbackToInstantTimeProcedure.NAME, RollbackToInstantTimeProcedure.builder)
+    mapBuilder.build
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/Procedure.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/Procedure.scala
new file mode 100644
index 0000000000000..f34e306159827
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/Procedure.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hudi.command.procedures
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.types.StructType
+
+import java.util
+import scala.collection.mutable
+
+/**
+ * An interface representing a stored procedure available for execution.
+ */
+trait Procedure {
+  /**
+   * Returns the input parameters of this procedure.
+   */
+  def parameters: Array[ProcedureParameter]
+
+  /**
+   * Returns the type of rows produced by this procedure.
+   */
+  def outputType: StructType
+
+  /**
+   * Executes this procedure.
+   * <p>
+   * Spark will align the provided arguments according to the input parameters
+   * defined in {@link #parameters ( )} either by position or by name before execution.
+   * <p>
+   * Implementations may provide a summary of execution by returning one or many rows
+   * as a result. The schema of output rows must match the defined output type
+   * in {@link #outputType ( )}.
+   *
+   * @param args input arguments
+   * @return the result of executing this procedure with the given arguments
+   */
+  def call(args: ProcedureArgs): Seq[Row]
+
+  /**
+   * Returns the description of this procedure.
+   */
+  def description: String = this.getClass.toString
+}
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ProcedureArgs.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ProcedureArgs.scala
new file mode 100644
index 0000000000000..5c462c1b892a0
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ProcedureArgs.scala
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hudi.command.procedures
+
+import org.apache.spark.sql.catalyst.InternalRow
+
+import java.util
+
+case class ProcedureArgs(isNamedArgs: Boolean,
+   map: util.LinkedHashMap[String, Int],
+   internalRow: InternalRow) {
+}
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ProcedureBuilder.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ProcedureBuilder.scala
new file mode 100644
index 0000000000000..b2ecd0a3089c4
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ProcedureBuilder.scala
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hudi.command.procedures
+
+trait ProcedureBuilder {
+  def build: Procedure
+}
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ProcedureParameter.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ProcedureParameter.scala
new file mode 100644
index 0000000000000..a9ad252bd7a05
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ProcedureParameter.scala
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hudi.command.procedures
+
+import org.apache.spark.sql.types.DataType
+
+/**
+ * An input parameter of a {@link Procedure stored procedure}.
+ */
+abstract class ProcedureParameter {
+  def index: Int
+
+  /**
+   * Returns the name of this parameter.
+   */
+  def name: String
+
+  /**
+   * Returns the type of this parameter.
+   */
+  def dataType: DataType
+
+  /**
+   * Returns true if this parameter is required.
+   */
+  def required: Boolean
+
+  /**
+   * this parameter's default value.
+   */
+  def default: Any
+}
+
+object ProcedureParameter {
+  /**
+   * Creates a required input parameter.
+   *
+   * @param name     the name of the parameter
+   * @param dataType the type of the parameter
+   * @return the constructed stored procedure parameter
+   */
+  def required(index: Int, name: String, dataType: DataType, default: Any): ProcedureParameterImpl = {
+    ProcedureParameterImpl(index, name, dataType, default, required = true)
+  }
+
+  /**
+   * Creates an optional input parameter.
+   *
+   * @param name     the name of the parameter.
+   * @param dataType the type of the parameter.
+   * @return the constructed optional stored procedure parameter
+   */
+  def optional(index: Int, name: String, dataType: DataType, default: Any): ProcedureParameterImpl = {
+    ProcedureParameterImpl(index, name, dataType, default, required = false)
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ProcedureParameterImpl.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ProcedureParameterImpl.scala
new file mode 100644
index 0000000000000..a7f4117047457
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ProcedureParameterImpl.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hudi.command.procedures
+
+import org.apache.spark.sql.types.DataType
+
+import java.util.Objects
+
+case class ProcedureParameterImpl(index: Int, name: String, dataType: DataType, default: Any, required: Boolean)
+  extends ProcedureParameter {
+
+  override def equals(other: Any): Boolean = {
+    val that = other.asInstanceOf[ProcedureParameterImpl]
+    val rtn = if (this == other) {
+      true
+    } else if (other == null || (getClass ne other.getClass)) {
+      false
+    } else {
+      index == that.index && required == that.required && default == that.default && Objects.equals(name, that.name) && Objects.equals(dataType, that.dataType)
+    }
+    rtn
+  }
+
+  override def hashCode: Int = Seq(index, name, dataType, required, default).hashCode()
+
+  override def toString: String = s"ProcedureParameter(index='$index',name='$name', type=$dataType, required=$required, default=$default)"
+}
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RollbackToInstantTimeProcedure.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RollbackToInstantTimeProcedure.scala
new file mode 100644
index 0000000000000..5414e8db6b37d
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/RollbackToInstantTimeProcedure.scala
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hudi.command.procedures
+
+import org.apache.hudi.common.table.HoodieTableMetaClient
+import org.apache.hudi.common.table.timeline.HoodieTimeline
+import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion
+import org.apache.hudi.common.util.Option
+import org.apache.hudi.exception.HoodieException
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.catalog.HoodieCatalogTable
+import org.apache.spark.sql.types.{DataTypes, Metadata, StructField, StructType}
+
+import java.util.function.Supplier
+
+class RollbackToInstantTimeProcedure extends BaseProcedure with ProcedureBuilder {
+  private val PARAMETERS = Array[ProcedureParameter](
+    ProcedureParameter.required(0, "table", DataTypes.StringType, None),
+    ProcedureParameter.required(1, "instant_time", DataTypes.StringType, None))
+
+  private val OUTPUT_TYPE = new StructType(Array[StructField](
+    StructField("rollback_result", DataTypes.BooleanType, nullable = true, Metadata.empty))
+  )
+
+  def parameters: Array[ProcedureParameter] = PARAMETERS
+
+  def outputType: StructType = OUTPUT_TYPE
+
+  override def call(args: ProcedureArgs): Seq[Row] = {
+    super.checkArgs(PARAMETERS, args)
+
+    val table = getArgValueOrDefault(args, PARAMETERS(0)).asInstanceOf[String]
+    val instantTime = getArgValueOrDefault(args, PARAMETERS(1)).asInstanceOf[String]
+
+    val hoodieCatalogTable = HoodieCatalogTable(sparkSession, new TableIdentifier(table))
+    val basePath = hoodieCatalogTable.tableLocation
+    val client = createHoodieClient(jsc, basePath)
+    val config = getWriteConfig(basePath)
+    val metaClient = HoodieTableMetaClient.builder
+      .setConf(jsc.hadoopConfiguration)
+      .setBasePath(config.getBasePath)
+      .setLoadActiveTimelineOnLoad(false)
+      .setConsistencyGuardConfig(config.getConsistencyGuardConfig)
+      .setLayoutVersion(Option.of(new TimelineLayoutVersion(config.getTimelineLayoutVersion)))
+      .build
+
+    val activeTimeline = metaClient.getActiveTimeline
+    val completedTimeline: HoodieTimeline = activeTimeline.getCommitsTimeline.filterCompletedInstants
+    val filteredTimeline = completedTimeline.containsInstant(instantTime)
+    if (!filteredTimeline) {
+      throw new HoodieException(s"Commit $instantTime not found in Commits $completedTimeline")
+    }
+
+    val result = if (client.rollback(instantTime)) true else false
+    val outputRow = Row(result)
+
+    Seq(outputRow)
+  }
+
+  override def build: Procedure = new RollbackToInstantTimeProcedure()
+}
+
+object RollbackToInstantTimeProcedure {
+  val NAME: String = "rollback_to_instant"
+
+  def builder: Supplier[ProcedureBuilder] = new Supplier[ProcedureBuilder] {
+    override def get(): RollbackToInstantTimeProcedure = new RollbackToInstantTimeProcedure()
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowCommitsProcedure.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowCommitsProcedure.scala
new file mode 100644
index 0000000000000..da089baba9cb6
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ShowCommitsProcedure.scala
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hudi.command.procedures
+
+import org.apache.hudi.common.model.HoodieCommitMetadata
+import org.apache.hudi.common.table.HoodieTableMetaClient
+import org.apache.hudi.common.table.timeline.{HoodieDefaultTimeline, HoodieInstant}
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.catalog.HoodieCatalogTable
+import org.apache.spark.sql.types.{DataTypes, Metadata, StructField, StructType}
+
+import java.util
+import java.util.Collections
+import java.util.function.Supplier
+import scala.collection.JavaConverters._
+
+class ShowCommitsProcedure(includeExtraMetadata: Boolean) extends BaseProcedure with ProcedureBuilder {
+  var sortByFieldParameter: ProcedureParameter = _
+
+  private val PARAMETERS = Array[ProcedureParameter](
+    ProcedureParameter.required(0, "table", DataTypes.StringType, None),
+    ProcedureParameter.optional(1, "limit", DataTypes.IntegerType, 10)
+  )
+
+  private val OUTPUT_TYPE = new StructType(Array[StructField](
+    StructField("commit_time", DataTypes.StringType, nullable = true, Metadata.empty),
+    StructField("total_bytes_written", DataTypes.LongType, nullable = true, Metadata.empty),
+    StructField("total_files_added", DataTypes.LongType, nullable = true, Metadata.empty),
+    StructField("total_files_updated", DataTypes.LongType, nullable = true, Metadata.empty),
+    StructField("total_partitions_written", DataTypes.LongType, nullable = true, Metadata.empty),
+    StructField("total_records_written", DataTypes.LongType, nullable = true, Metadata.empty),
+    StructField("total_update_records_written", DataTypes.LongType, nullable = true, Metadata.empty),
+    StructField("total_errors", DataTypes.LongType, nullable = true, Metadata.empty)
+  ))
+
+  private val METADATA_OUTPUT_TYPE = new StructType(Array[StructField](
+    StructField("commit_time", DataTypes.StringType, nullable = true, Metadata.empty),
+    StructField("action", DataTypes.StringType, nullable = true, Metadata.empty),
+    StructField("partition", DataTypes.StringType, nullable = true, Metadata.empty),
+    StructField("file_id", DataTypes.StringType, nullable = true, Metadata.empty),
+    StructField("previous_commit", DataTypes.StringType, nullable = true, Metadata.empty),
+    StructField("num_writes", DataTypes.LongType, nullable = true, Metadata.empty),
+    StructField("num_inserts", DataTypes.LongType, nullable = true, Metadata.empty),
+    StructField("num_deletes", DataTypes.LongType, nullable = true, Metadata.empty),
+    StructField("num_update_writes", DataTypes.LongType, nullable = true, Metadata.empty),
+    StructField("total_errors", DataTypes.LongType, nullable = true, Metadata.empty),
+    StructField("total_log_blocks", DataTypes.LongType, nullable = true, Metadata.empty),
+    StructField("total_corrupt_logblocks", DataTypes.LongType, nullable = true, Metadata.empty),
+    StructField("total_rollback_blocks", DataTypes.LongType, nullable = true, Metadata.empty),
+    StructField("total_log_records", DataTypes.LongType, nullable = true, Metadata.empty),
+    StructField("total_updated_records_compacted", DataTypes.LongType, nullable = true, Metadata.empty),
+    StructField("total_bytes_written", DataTypes.LongType, nullable = true, Metadata.empty)
+  ))
+
+  def parameters: Array[ProcedureParameter] = PARAMETERS
+
+  def outputType: StructType = if (includeExtraMetadata) METADATA_OUTPUT_TYPE else OUTPUT_TYPE
+
+  override def call(args: ProcedureArgs): Seq[Row] = {
+    super.checkArgs(PARAMETERS, args)
+
+    val table = getArgValueOrDefault(args, PARAMETERS(0)).asInstanceOf[String]
+    val limit = getArgValueOrDefault(args, PARAMETERS(1)).asInstanceOf[Int]
+
+    val hoodieCatalogTable = HoodieCatalogTable(sparkSession, new TableIdentifier(table))
+    val basePath = hoodieCatalogTable.tableLocation
+    val metaClient = HoodieTableMetaClient.builder.setConf(jsc.hadoopConfiguration()).setBasePath(basePath).build
+
+    val activeTimeline = metaClient.getActiveTimeline
+    if (includeExtraMetadata) {
+      getCommitsWithMetadata(activeTimeline, limit)
+    } else {
+      getCommits(activeTimeline, limit)
+    }
+  }
+
+  override def build: Procedure = new ShowCommitsProcedure(includeExtraMetadata)
+
+  private def getCommitsWithMetadata(timeline: HoodieDefaultTimeline,
+                                     limit: Int): Seq[Row] = {
+    import scala.collection.JavaConversions._
+
+    val (rows: util.ArrayList[Row], newCommits: util.ArrayList[HoodieInstant]) = getSortCommits(timeline)
+
+    for (i <- 0 until newCommits.size) {
+      val commit = newCommits.get(i)
+      val commitMetadata = HoodieCommitMetadata.fromBytes(timeline.getInstantDetails(commit).get, classOf[HoodieCommitMetadata])
+      for (partitionWriteStat <- commitMetadata.getPartitionToWriteStats.entrySet) {
+        for (hoodieWriteStat <- partitionWriteStat.getValue) {
+          rows.add(Row(
+            commit.getTimestamp, commit.getAction, hoodieWriteStat.getPartitionPath,
+            hoodieWriteStat.getFileId, hoodieWriteStat.getPrevCommit, hoodieWriteStat.getNumWrites,
+            hoodieWriteStat.getNumInserts, hoodieWriteStat.getNumDeletes, hoodieWriteStat.getNumUpdateWrites,
+            hoodieWriteStat.getTotalWriteErrors, hoodieWriteStat.getTotalLogBlocks, hoodieWriteStat.getTotalCorruptLogBlock,
+            hoodieWriteStat.getTotalRollbackBlocks, hoodieWriteStat.getTotalLogRecords,
+            hoodieWriteStat.getTotalUpdatedRecordsCompacted, hoodieWriteStat.getTotalWriteBytes))
+        }
+      }
+    }
+
+    rows.stream().limit(limit).toArray().map(r => r.asInstanceOf[Row]).toList
+  }
+
+  private def getSortCommits(timeline: HoodieDefaultTimeline): (util.ArrayList[Row], util.ArrayList[HoodieInstant]) = {
+    val rows = new util.ArrayList[Row]
+    // timeline can be read from multiple files. So sort is needed instead of reversing the collection
+    val commits: util.List[HoodieInstant] = timeline.getCommitsTimeline.filterCompletedInstants
+      .getInstants.toArray().map(instant => instant.asInstanceOf[HoodieInstant]).toList.asJava
+    val newCommits = new util.ArrayList[HoodieInstant](commits)
+    Collections.sort(newCommits, HoodieInstant.COMPARATOR.reversed)
+    (rows, newCommits)
+  }
+
+  def getCommits(timeline: HoodieDefaultTimeline,
+                 limit: Int): Seq[Row] = {
+    val (rows: util.ArrayList[Row], newCommits: util.ArrayList[HoodieInstant]) = getSortCommits(timeline)
+
+    for (i <- 0 until newCommits.size) {
+      val commit = newCommits.get(i)
+      val commitMetadata = HoodieCommitMetadata.fromBytes(timeline.getInstantDetails(commit).get, classOf[HoodieCommitMetadata])
+      rows.add(Row(commit.getTimestamp, commitMetadata.fetchTotalBytesWritten, commitMetadata.fetchTotalFilesInsert,
+        commitMetadata.fetchTotalFilesUpdated, commitMetadata.fetchTotalPartitionsWritten,
+        commitMetadata.fetchTotalRecordsWritten, commitMetadata.fetchTotalUpdateRecordsWritten,
+        commitMetadata.fetchTotalWriteErrors))
+    }
+
+    rows.stream().limit(limit).toArray().map(r => r.asInstanceOf[Row]).toList
+  }
+}
+
+object ShowCommitsProcedure {
+  val NAME = "show_commits"
+
+  def builder: Supplier[ProcedureBuilder] = new Supplier[ProcedureBuilder] {
+    override def get() = new ShowCommitsProcedure(false)
+  }
+}
+
+object ShowCommitsMetadataProcedure {
+  val NAME = "show_commits_metadata"
+
+  def builder: Supplier[ProcedureBuilder] = new Supplier[ProcedureBuilder] {
+    override def get() = new ShowCommitsProcedure(true)
+  }
+}
+
+
diff --git a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/parser/HoodieSqlCommonAstBuilder.scala b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/parser/HoodieSqlCommonAstBuilder.scala
index b1f5a32fe1e19..3146740b1f3f5 100644
--- a/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/parser/HoodieSqlCommonAstBuilder.scala
+++ b/hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/parser/HoodieSqlCommonAstBuilder.scala
@@ -17,22 +17,39 @@
 
 package org.apache.spark.sql.parser
 
+import org.antlr.v4.runtime.ParserRuleContext
+import org.antlr.v4.runtime.tree.{ParseTree, RuleNode, TerminalNode}
 import org.apache.hudi.SparkAdapterSupport
-import org.apache.hudi.spark.sql.parser.{HoodieSqlCommonBaseVisitor, HoodieSqlCommonParser}
-import org.apache.hudi.spark.sql.parser.HoodieSqlCommonParser.{CompactionOnPathContext, CompactionOnTableContext, ShowCompactionOnPathContext, ShowCompactionOnTableContext, SingleStatementContext, TableIdentifierContext}
+import org.apache.hudi.spark.sql.parser.HoodieSqlCommonBaseVisitor
+import org.apache.hudi.spark.sql.parser.HoodieSqlCommonParser._
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
-import org.apache.spark.sql.catalyst.parser.ParserUtils.withOrigin
-import org.apache.spark.sql.catalyst.parser.{ParserInterface, ParserUtils}
-import org.apache.spark.sql.catalyst.plans.logical.{CompactionOperation, CompactionPath, CompactionShowOnPath, CompactionShowOnTable, CompactionTable, LogicalPlan}
+import org.apache.spark.sql.catalyst.expressions.{Expression, Literal}
+import org.apache.spark.sql.catalyst.parser.{ParseException, ParserInterface, ParserUtils}
+import org.apache.spark.sql.catalyst.plans.logical._
+
+import scala.collection.JavaConverters._
 
 class HoodieSqlCommonAstBuilder(session: SparkSession, delegate: ParserInterface)
   extends HoodieSqlCommonBaseVisitor[AnyRef] with Logging with SparkAdapterSupport {
 
   import ParserUtils._
 
+  /**
+   * Override the default behavior for all visit methods. This will only return a non-null result
+   * when the context has only one child. This is done because there is no generic method to
+   * combine the results of the context children. In all other cases null is returned.
+   */
+  override def visitChildren(node: RuleNode): AnyRef = {
+    if (node.getChildCount == 1) {
+      node.getChild(0).accept(this)
+    } else {
+      null
+    }
+  }
+
   override def visitSingleStatement(ctx: SingleStatementContext): LogicalPlan = withOrigin(ctx) {
     ctx.statement().accept(this).asInstanceOf[LogicalPlan]
   }
@@ -72,4 +89,62 @@ class HoodieSqlCommonAstBuilder(session: SparkSession, delegate: ParserInterface
   override def visitTableIdentifier(ctx: TableIdentifierContext): LogicalPlan = withOrigin(ctx) {
     UnresolvedRelation(TableIdentifier(ctx.table.getText, Option(ctx.db).map(_.getText)))
   }
+
+  override def visitCall(ctx: CallContext): LogicalPlan = withOrigin(ctx) {
+    if (ctx.callArgument().isEmpty) {
+      throw new ParseException(s"Procedures arguments is empty", ctx)
+    }
+
+    val name: Seq[String] = ctx.multipartIdentifier().parts.asScala.map(_.getText)
+    val args: Seq[CallArgument] = ctx.callArgument().asScala.map(typedVisit[CallArgument])
+    CallCommand(name, args)
+  }
+
+  /**
+   * Return a multi-part identifier as Seq[String].
+   */
+  override def visitMultipartIdentifier(ctx: MultipartIdentifierContext): Seq[String] = withOrigin(ctx) {
+    ctx.parts.asScala.map(_.getText)
+  }
+
+  /**
+   * Create a positional argument in a stored procedure call.
+   */
+  override def visitPositionalArgument(ctx: PositionalArgumentContext): CallArgument = withOrigin(ctx) {
+    val expr = typedVisit[Expression](ctx.expression)
+    PositionalArgument(expr)
+  }
+
+  /**
+   * Create a named argument in a stored procedure call.
+   */
+  override def visitNamedArgument(ctx: NamedArgumentContext): CallArgument = withOrigin(ctx) {
+    val name = ctx.identifier.getText
+    val expr = typedVisit[Expression](ctx.expression)
+    NamedArgument(name, expr)
+  }
+
+  def visitConstant(ctx: ConstantContext): Literal = {
+    delegate.parseExpression(ctx.getText).asInstanceOf[Literal]
+  }
+
+  override def visitExpression(ctx: ExpressionContext): Expression = {
+    // reconstruct the SQL string and parse it using the main Spark parser
+    // while we can avoid the logic to build Spark expressions, we still have to parse them
+    // we cannot call ctx.getText directly since it will not render spaces correctly
+    // that's why we need to recurse down the tree in reconstructSqlString
+    val sqlString = reconstructSqlString(ctx)
+    delegate.parseExpression(sqlString)
+  }
+
+  private def reconstructSqlString(ctx: ParserRuleContext): String = {
+    ctx.children.asScala.map {
+      case c: ParserRuleContext => reconstructSqlString(c)
+      case t: TerminalNode => t.getText
+    }.mkString(" ")
+  }
+
+  private def typedVisit[T](ctx: ParseTree): T = {
+    ctx.accept(this).asInstanceOf[T]
+  }
 }
diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/HoodieJavaStreamingApp.java b/hudi-spark-datasource/hudi-spark/src/test/java/HoodieJavaStreamingApp.java
index f1e6b45b292b7..5baaffab0cf7c 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/java/HoodieJavaStreamingApp.java
+++ b/hudi-spark-datasource/hudi-spark/src/test/java/HoodieJavaStreamingApp.java
@@ -164,7 +164,7 @@ public void run() throws Exception {
     ExecutorService executor = Executors.newFixedThreadPool(2);
     int numInitialCommits = 0;
 
-    // thread for spark strucutured streaming
+    // thread for spark structured streaming
     try {
       Future<Void> streamFuture = executor.submit(() -> {
         LOG.info("===== Streaming Starting =====");
@@ -211,7 +211,7 @@ public void run() throws Exception {
     Dataset<Row> inputDF3 = newSpark.read().json(jssc.parallelize(deletes, 2));
     executor = Executors.newFixedThreadPool(2);
 
-    // thread for spark strucutured streaming
+    // thread for spark structured streaming
     try {
       Future<Void> streamFuture = executor.submit(() -> {
         LOG.info("===== Streaming Starting =====");
diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/TestDataSourceUtils.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/TestDataSourceUtils.java
index 0c5a2122d509a..bf3520f0956d7 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/TestDataSourceUtils.java
+++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/TestDataSourceUtils.java
@@ -191,7 +191,7 @@ public void testDoWriteOperationWithUserDefinedBulkInsertPartitioner() throws Ho
 
   @Test
   public void testCreateUserDefinedBulkInsertPartitionerRowsWithInValidPartitioner() throws HoodieException {
-    config = HoodieWriteConfig.newBuilder().withPath("/").withUserDefinedBulkInsertPartitionerClass("NonExistantUserDefinedClass").build();
+    config = HoodieWriteConfig.newBuilder().withPath("/").withUserDefinedBulkInsertPartitionerClass("NonExistentUserDefinedClass").build();
 
     Exception exception = assertThrows(HoodieException.class, () -> {
       DataSourceUtils.createUserDefinedBulkInsertPartitionerWithRows(config);
diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestBootstrap.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestBootstrap.java
index 2e89baa70b8bf..d2257f58d0e80 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestBootstrap.java
+++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestBootstrap.java
@@ -32,6 +32,7 @@
 import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieTableType;
@@ -103,7 +104,6 @@
 
 import static java.util.stream.Collectors.mapping;
 import static java.util.stream.Collectors.toList;
-import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.generateGenericRecord;
 import static org.apache.spark.sql.functions.callUDF;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
@@ -178,7 +178,7 @@ public Schema generateNewDataSetAndReturnSchema(long timestamp, int numRecords,
   }
 
   @Test
-  public void testMetadataBootstrapUnpartitionedCOW() throws Exception {
+  public void testMetadataBootstrapNonpartitionedCOW() throws Exception {
     testBootstrapCommon(false, false, EffectiveMode.METADATA_BOOTSTRAP_MODE);
   }
 
@@ -228,7 +228,7 @@ private void testBootstrapCommon(boolean partitioned, boolean deltaCommit, Effec
         bootstrapInstants = Arrays.asList(bootstrapCommitInstantTs);
         break;
       default:
-        bootstrapModeSelectorClass = TestRandomBootstapModeSelector.class.getName();
+        bootstrapModeSelectorClass = TestRandomBootstrapModeSelector.class.getName();
         bootstrapCommitInstantTs = HoodieTimeline.FULL_BOOTSTRAP_INSTANT_TS;
         checkNumRawFiles = false;
         isBootstrapIndexCreated = true;
@@ -252,7 +252,6 @@ private void testBootstrapCommon(boolean partitioned, boolean deltaCommit, Effec
             .withFullBootstrapInputProvider(TestFullBootstrapDataProvider.class.getName())
             .withBootstrapParallelism(3)
             .withBootstrapModeSelector(bootstrapModeSelectorClass).build())
-        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(false).build())
         .build();
     SparkRDDWriteClient client = new SparkRDDWriteClient(context, config);
     client.bootstrap(Option.empty());
@@ -510,7 +509,7 @@ private static JavaRDD<HoodieRecord> generateInputBatch(JavaSparkContext jsc,
           try {
             String key = gr.get("_row_key").toString();
             String pPath = p.getKey();
-            return new HoodieRecord<>(new HoodieKey(key, pPath), new RawTripTestPayload(gr.toString(), key, pPath,
+            return new HoodieAvroRecord<>(new HoodieKey(key, pPath), new RawTripTestPayload(gr.toString(), key, pPath,
                 HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA));
           } catch (IOException e) {
             throw new HoodieIOException(e.getMessage(), e);
@@ -522,11 +521,11 @@ private static JavaRDD<HoodieRecord> generateInputBatch(JavaSparkContext jsc,
     }).collect(Collectors.toList()));
   }
 
-  public static class TestRandomBootstapModeSelector extends BootstrapModeSelector {
+  public static class TestRandomBootstrapModeSelector extends BootstrapModeSelector {
 
     private int currIdx = new Random().nextInt(2);
 
-    public TestRandomBootstapModeSelector(HoodieWriteConfig writeConfig) {
+    public TestRandomBootstrapModeSelector(HoodieWriteConfig writeConfig) {
       super(writeConfig);
     }
 
@@ -564,8 +563,7 @@ public static Dataset<Row> generateTestRawTripDataset(long timestamp, int from,
     final List<String> records = new ArrayList<>();
     IntStream.range(from, to).forEach(i -> {
       String id = "" + i;
-      records.add(generateGenericRecord("trip_" + id, Long.toString(timestamp), "rider_" + id, "driver_" + id,
-          timestamp, false, false).toString());
+      records.add(new HoodieTestDataGenerator().generateGenericRecord("trip_" + id, Long.toString(timestamp), "rider_" + id, "driver_" + id, timestamp, false, false).toString());
     });
     if (isPartitioned) {
       sqlContext.udf().register("partgen",
diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestOrcBootstrap.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestOrcBootstrap.java
index fba09091add50..9146cdc4e81f7 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestOrcBootstrap.java
+++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestOrcBootstrap.java
@@ -29,8 +29,8 @@
 import org.apache.hudi.client.common.HoodieSparkEngineContext;
 import org.apache.hudi.common.bootstrap.FileStatusUtils;
 import org.apache.hudi.common.bootstrap.index.BootstrapIndex;
-import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieFileFormat;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
@@ -98,7 +98,6 @@
 
 import static java.util.stream.Collectors.mapping;
 import static java.util.stream.Collectors.toList;
-import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.generateGenericRecord;
 import static org.apache.spark.sql.functions.callUDF;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
@@ -171,7 +170,7 @@ public Schema generateNewDataSetAndReturnSchema(long timestamp, int numRecords,
   }
 
   @Test
-  public void testMetadataBootstrapUnpartitionedCOW() throws Exception {
+  public void testMetadataBootstrapNonpartitionedCOW() throws Exception {
     testBootstrapCommon(false, false, EffectiveMode.METADATA_BOOTSTRAP_MODE);
   }
 
@@ -221,7 +220,7 @@ private void testBootstrapCommon(boolean partitioned, boolean deltaCommit, Effec
         bootstrapInstants = Arrays.asList(bootstrapCommitInstantTs);
         break;
       default:
-        bootstrapModeSelectorClass = TestRandomBootstapModeSelector.class.getName();
+        bootstrapModeSelectorClass = TestRandomBootstrapModeSelector.class.getName();
         bootstrapCommitInstantTs = HoodieTimeline.FULL_BOOTSTRAP_INSTANT_TS;
         checkNumRawFiles = false;
         isBootstrapIndexCreated = true;
@@ -245,7 +244,6 @@ private void testBootstrapCommon(boolean partitioned, boolean deltaCommit, Effec
             .withFullBootstrapInputProvider(TestFullBootstrapDataProvider.class.getName())
             .withBootstrapParallelism(3)
             .withBootstrapModeSelector(bootstrapModeSelectorClass).build())
-        .withMetadataConfig(HoodieMetadataConfig.newBuilder().enable(false).build())
         .build();
     SparkRDDWriteClient client = new SparkRDDWriteClient(context, config);
     client.bootstrap(Option.empty());
@@ -425,7 +423,7 @@ private static JavaRDD<HoodieRecord> generateInputBatch(JavaSparkContext jsc,
           try {
             String key = gr.get("_row_key").toString();
             String pPath = p.getKey();
-            return new HoodieRecord<>(new HoodieKey(key, pPath), new RawTripTestPayload(gr.toString(), key, pPath,
+            return new HoodieAvroRecord<>(new HoodieKey(key, pPath), new RawTripTestPayload(gr.toString(), key, pPath,
                 HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA));
           } catch (IOException e) {
             throw new HoodieIOException(e.getMessage(), e);
@@ -437,10 +435,10 @@ private static JavaRDD<HoodieRecord> generateInputBatch(JavaSparkContext jsc,
     }).collect(Collectors.toList()));
   }
 
-  public static class TestRandomBootstapModeSelector extends BootstrapModeSelector {
+  public static class TestRandomBootstrapModeSelector extends BootstrapModeSelector {
     private int currIdx = new Random().nextInt(2);
 
-    public TestRandomBootstapModeSelector(HoodieWriteConfig writeConfig) {
+    public TestRandomBootstrapModeSelector(HoodieWriteConfig writeConfig) {
       super(writeConfig);
     }
 
@@ -477,8 +475,7 @@ public static Dataset<Row> generateTestRawTripDataset(long timestamp, int from,
     final List<String> records = new ArrayList<>();
     IntStream.range(from, to).forEach(i -> {
       String id = "" + i;
-      records.add(generateGenericRecord("trip_" + id, Long.toString(timestamp), "rider_" + id, "driver_" + id,
-          timestamp, false, false).toString());
+      records.add(new HoodieTestDataGenerator().generateGenericRecord("trip_" + id, Long.toString(timestamp), "rider_" + id, "driver_" + id, timestamp, false, false).toString());
     });
     if (isPartitioned) {
       sqlContext.udf().register("partgen",
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestComplexKeyGenerator.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestComplexKeyGenerator.java
similarity index 99%
rename from hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestComplexKeyGenerator.java
rename to hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestComplexKeyGenerator.java
index 87deef2a58c4e..735277d959ee4 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestComplexKeyGenerator.java
+++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestComplexKeyGenerator.java
@@ -18,13 +18,12 @@
 
 package org.apache.hudi.keygen;
 
+import org.apache.avro.generic.GenericRecord;
 import org.apache.hudi.AvroConversionUtils;
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
 import org.apache.hudi.exception.HoodieKeyException;
-
-import org.apache.avro.generic.GenericRecord;
 import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
 import org.apache.hudi.testutils.KeyGeneratorTestUtilities;
 import org.apache.spark.sql.Row;
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestCustomKeyGenerator.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestCustomKeyGenerator.java
similarity index 99%
rename from hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestCustomKeyGenerator.java
rename to hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestCustomKeyGenerator.java
index 4b590d9374c8e..26a2b439abfb2 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestCustomKeyGenerator.java
+++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestCustomKeyGenerator.java
@@ -18,10 +18,9 @@
 
 package org.apache.hudi.keygen;
 
-import org.apache.hudi.common.model.HoodieKey;
-import org.apache.hudi.common.config.TypedProperties;
-
 import org.apache.avro.generic.GenericRecord;
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
 import org.apache.hudi.keygen.constant.KeyGeneratorType;
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestGlobalDeleteKeyGenerator.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestGlobalDeleteKeyGenerator.java
similarity index 99%
rename from hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestGlobalDeleteKeyGenerator.java
rename to hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestGlobalDeleteKeyGenerator.java
index aa9568b7a4663..a0d90e028af82 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestGlobalDeleteKeyGenerator.java
+++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestGlobalDeleteKeyGenerator.java
@@ -18,11 +18,10 @@
 
 package org.apache.hudi.keygen;
 
+import org.apache.avro.generic.GenericRecord;
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.exception.HoodieKeyException;
-
-import org.apache.avro.generic.GenericRecord;
 import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
 import org.apache.hudi.testutils.KeyGeneratorTestUtilities;
 import org.apache.spark.sql.Row;
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestNonpartitionedKeyGenerator.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestNonpartitionedKeyGenerator.java
similarity index 99%
rename from hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestNonpartitionedKeyGenerator.java
rename to hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestNonpartitionedKeyGenerator.java
index 0760de112b934..297b077794d56 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestNonpartitionedKeyGenerator.java
+++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestNonpartitionedKeyGenerator.java
@@ -18,12 +18,11 @@
 
 package org.apache.hudi.keygen;
 
+import org.apache.avro.generic.GenericRecord;
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
 import org.apache.hudi.exception.HoodieKeyException;
-
-import org.apache.avro.generic.GenericRecord;
 import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
 import org.apache.hudi.testutils.KeyGeneratorTestUtilities;
 import org.apache.spark.sql.Row;
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestSimpleKeyGenerator.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestSimpleKeyGenerator.java
similarity index 99%
rename from hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestSimpleKeyGenerator.java
rename to hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestSimpleKeyGenerator.java
index 0fc90c83a08d4..7dea9e414e693 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestSimpleKeyGenerator.java
+++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestSimpleKeyGenerator.java
@@ -18,13 +18,12 @@
 
 package org.apache.hudi.keygen;
 
+import org.apache.avro.generic.GenericRecord;
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.exception.HoodieKeyException;
 import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
 import org.apache.hudi.testutils.KeyGeneratorTestUtilities;
-
-import org.apache.avro.generic.GenericRecord;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.junit.jupiter.api.Assertions;
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestTimestampBasedKeyGenerator.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestTimestampBasedKeyGenerator.java
similarity index 75%
rename from hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestTimestampBasedKeyGenerator.java
rename to hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestTimestampBasedKeyGenerator.java
index 6f3c1a39f81ff..1fc4b9f1ef694 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/TestTimestampBasedKeyGenerator.java
+++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/TestTimestampBasedKeyGenerator.java
@@ -18,18 +18,18 @@
 
 package org.apache.hudi.keygen;
 
-import org.apache.hudi.AvroConversionHelper;
+import org.apache.avro.Conversions;
+import org.apache.avro.LogicalTypes;
+import org.apache.avro.Schema;
+import org.apache.avro.generic.GenericFixed;
+import org.apache.avro.generic.GenericRecord;
 import org.apache.hudi.AvroConversionUtils;
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.testutils.SchemaTestUtil;
-
-import org.apache.avro.Schema;
-import org.apache.avro.generic.GenericRecord;
 import org.apache.hudi.exception.HoodieKeyGeneratorException;
 import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
 import org.apache.hudi.testutils.KeyGeneratorTestUtilities;
-
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema;
@@ -37,12 +37,12 @@
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
+import scala.Function1;
+import scala.Tuple2;
 
 import java.io.IOException;
 import java.math.BigDecimal;
 
-import scala.Function1;
-
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
 public class TestTimestampBasedKeyGenerator {
@@ -69,21 +69,9 @@ public void initialize() throws IOException {
     properties.setProperty(KeyGeneratorOptions.HIVE_STYLE_PARTITIONING_ENABLE.key(), "false");
   }
 
-  private TypedProperties getBaseKeyConfig(String timestampType, String dateFormat, String timezone, String scalarType) {
-    properties.setProperty(TimestampBasedAvroKeyGenerator.Config.TIMESTAMP_TYPE_FIELD_PROP, timestampType);
-    properties.setProperty(TimestampBasedAvroKeyGenerator.Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP, dateFormat);
-    properties.setProperty(TimestampBasedAvroKeyGenerator.Config.TIMESTAMP_TIMEZONE_FORMAT_PROP, timezone);
-
-    if (scalarType != null) {
-      properties.setProperty("hoodie.deltastreamer.keygen.timebased.timestamp.scalar.time.unit", scalarType);
-    }
-
-    return properties;
-  }
-
   private Row genericRecordToRow(GenericRecord baseRecord) {
-    Function1<Object, Object> convertor = AvroConversionHelper.createConverterToRow(baseRecord.getSchema(), structType);
-    Row row = (Row) convertor.apply(baseRecord);
+    Function1<GenericRecord, Row> convertor = AvroConversionUtils.createConverterToRow(baseRecord.getSchema(), structType);
+    Row row = convertor.apply(baseRecord);
     int fieldCount = structType.fieldNames().length;
     Object[] values = new Object[fieldCount];
     for (int i = 0; i < fieldCount; i++) {
@@ -92,24 +80,49 @@ private Row genericRecordToRow(GenericRecord baseRecord) {
     return new GenericRowWithSchema(values, structType);
   }
 
-  private TypedProperties getBaseKeyConfig(String timestampType, String inputFormatList, String inputFormatDelimiterRegex, String inputTimezone, String outputFormat, String outputTimezone) {
+  private TypedProperties getBaseKeyConfig(String partitionPathField, String timestampType, String dateFormat, String timezone, String scalarType) {
+    TypedProperties properties = new TypedProperties(this.properties);
+
+    properties.setProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), partitionPathField);
+    properties.setProperty(KeyGeneratorOptions.Config.TIMESTAMP_TYPE_FIELD_PROP, timestampType);
+    properties.setProperty(KeyGeneratorOptions.Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP, dateFormat);
+    properties.setProperty(KeyGeneratorOptions.Config.TIMESTAMP_TIMEZONE_FORMAT_PROP, timezone);
+
+    if (scalarType != null) {
+      properties.setProperty("hoodie.deltastreamer.keygen.timebased.timestamp.scalar.time.unit", scalarType);
+    }
+
+    return properties;
+  }
+
+  private TypedProperties getBaseKeyConfig(String partitionPathField,
+                                           String timestampType,
+                                           String inputFormatList,
+                                           String inputFormatDelimiterRegex,
+                                           String inputTimezone,
+                                           String outputFormat,
+                                           String outputTimezone) {
+    TypedProperties properties = new TypedProperties(this.properties);
+
+    properties.setProperty(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), partitionPathField);
+
     if (timestampType != null) {
-      properties.setProperty(TimestampBasedAvroKeyGenerator.Config.TIMESTAMP_TYPE_FIELD_PROP, timestampType);
+      properties.setProperty(KeyGeneratorOptions.Config.TIMESTAMP_TYPE_FIELD_PROP, timestampType);
     }
     if (inputFormatList != null) {
-      properties.setProperty(TimestampBasedAvroKeyGenerator.Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP, inputFormatList);
+      properties.setProperty(KeyGeneratorOptions.Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP, inputFormatList);
     }
     if (inputFormatDelimiterRegex != null) {
-      properties.setProperty(TimestampBasedAvroKeyGenerator.Config.TIMESTAMP_INPUT_DATE_FORMAT_LIST_DELIMITER_REGEX_PROP, inputFormatDelimiterRegex);
+      properties.setProperty(KeyGeneratorOptions.Config.TIMESTAMP_INPUT_DATE_FORMAT_LIST_DELIMITER_REGEX_PROP, inputFormatDelimiterRegex);
     }
     if (inputTimezone != null) {
-      properties.setProperty(TimestampBasedAvroKeyGenerator.Config.TIMESTAMP_INPUT_TIMEZONE_FORMAT_PROP, inputTimezone);
+      properties.setProperty(KeyGeneratorOptions.Config.TIMESTAMP_INPUT_TIMEZONE_FORMAT_PROP, inputTimezone);
     }
     if (outputFormat != null) {
-      properties.setProperty(TimestampBasedAvroKeyGenerator.Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP, outputFormat);
+      properties.setProperty(KeyGeneratorOptions.Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP, outputFormat);
     }
     if (outputTimezone != null) {
-      properties.setProperty(TimestampBasedAvroKeyGenerator.Config.TIMESTAMP_OUTPUT_TIMEZONE_FORMAT_PROP, outputTimezone);
+      properties.setProperty(KeyGeneratorOptions.Config.TIMESTAMP_OUTPUT_TIMEZONE_FORMAT_PROP, outputTimezone);
     }
     return properties;
   }
@@ -118,7 +131,7 @@ private TypedProperties getBaseKeyConfig(String timestampType, String inputForma
   public void testTimestampBasedKeyGenerator() throws IOException {
     // timezone is GMT+8:00
     baseRecord.put("createTime", 1578283932000L);
-    properties = getBaseKeyConfig("EPOCHMILLISECONDS", "yyyy-MM-dd hh", "GMT+8:00", null);
+    properties = getBaseKeyConfig("createTime", "EPOCHMILLISECONDS", "yyyy-MM-dd hh", "GMT+8:00", null);
     TimestampBasedKeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
     HoodieKey hk1 = keyGen.getKey(baseRecord);
     assertEquals("2020-01-06 12", hk1.getPartitionPath());
@@ -128,68 +141,60 @@ public void testTimestampBasedKeyGenerator() throws IOException {
     assertEquals("2020-01-06 12", keyGen.getPartitionPath(internalRow, baseRow.schema()));
 
     // timezone is GMT+8:00, createTime is BigDecimal
-    baseRecord.put("createTime", new BigDecimal(1578283932000.00001));
-    properties = getBaseKeyConfig("EPOCHMILLISECONDS", "yyyy-MM-dd hh", "GMT+8:00", null);
+    BigDecimal decimal = new BigDecimal("1578283932000.0001");
+    Conversions.DecimalConversion conversion = new Conversions.DecimalConversion();
+    Tuple2<Object, Schema> resolvedNullableSchema = AvroConversionUtils.resolveAvroTypeNullability(schema.getField("createTimeDecimal").schema());
+    GenericFixed avroDecimal = conversion.toFixed(decimal, resolvedNullableSchema._2, LogicalTypes.decimal(20, 4));
+    baseRecord.put("createTimeDecimal", avroDecimal);
+    properties = getBaseKeyConfig("createTimeDecimal", "EPOCHMILLISECONDS", "yyyy-MM-dd hh", "GMT+8:00", null);
     keyGen = new TimestampBasedKeyGenerator(properties);
     HoodieKey bigDecimalKey = keyGen.getKey(baseRecord);
     assertEquals("2020-01-06 12", bigDecimalKey.getPartitionPath());
-
-    // test w/ Row
     baseRow = genericRecordToRow(baseRecord);
     assertEquals("2020-01-06 12", keyGen.getPartitionPath(baseRow));
 
     // timezone is GMT
-    properties = getBaseKeyConfig("EPOCHMILLISECONDS", "yyyy-MM-dd hh", "GMT", null);
+    properties = getBaseKeyConfig("createTime", "EPOCHMILLISECONDS", "yyyy-MM-dd hh", "GMT", null);
     keyGen = new TimestampBasedKeyGenerator(properties);
     HoodieKey hk2 = keyGen.getKey(baseRecord);
     assertEquals("2020-01-06 04", hk2.getPartitionPath());
-
-    // test w/ Row
     assertEquals("2020-01-06 04", keyGen.getPartitionPath(baseRow));
 
     // timestamp is DATE_STRING, timezone is GMT+8:00
-    baseRecord.put("createTime", "2020-01-06 12:12:12");
-    properties = getBaseKeyConfig("DATE_STRING", "yyyy-MM-dd hh", "GMT+8:00", null);
+    baseRecord.put("createTimeString", "2020-01-06 12:12:12");
+    properties = getBaseKeyConfig("createTimeString", "DATE_STRING", "yyyy-MM-dd hh", "GMT+8:00", null);
     properties.setProperty("hoodie.deltastreamer.keygen.timebased.input.dateformat", "yyyy-MM-dd hh:mm:ss");
     keyGen = new TimestampBasedKeyGenerator(properties);
     HoodieKey hk3 = keyGen.getKey(baseRecord);
     assertEquals("2020-01-06 12", hk3.getPartitionPath());
-
-    // test w/ Row
     baseRow = genericRecordToRow(baseRecord);
     assertEquals("2020-01-06 12", keyGen.getPartitionPath(baseRow));
 
     // timezone is GMT
-    properties = getBaseKeyConfig("DATE_STRING", "yyyy-MM-dd hh", "GMT", null);
+    properties = getBaseKeyConfig("createTimeString", "DATE_STRING", "yyyy-MM-dd hh", "GMT", null);
     keyGen = new TimestampBasedKeyGenerator(properties);
     HoodieKey hk4 = keyGen.getKey(baseRecord);
     assertEquals("2020-01-06 12", hk4.getPartitionPath());
-
-    // test w/ Row
     assertEquals("2020-01-06 12", keyGen.getPartitionPath(baseRow));
 
     // timezone is GMT+8:00, createTime is null
     baseRecord.put("createTime", null);
-    properties = getBaseKeyConfig("EPOCHMILLISECONDS", "yyyy-MM-dd hh", "GMT+8:00", null);
+    properties = getBaseKeyConfig("createTime", "EPOCHMILLISECONDS", "yyyy-MM-dd hh", "GMT+8:00", null);
     keyGen = new TimestampBasedKeyGenerator(properties);
     HoodieKey hk5 = keyGen.getKey(baseRecord);
     assertEquals("1970-01-01 08", hk5.getPartitionPath());
-
-    // test w/ Row
     baseRow = genericRecordToRow(baseRecord);
     assertEquals("1970-01-01 08", keyGen.getPartitionPath(baseRow));
     internalRow = KeyGeneratorTestUtilities.getInternalRow(baseRow);
     assertEquals("1970-01-01 08", keyGen.getPartitionPath(internalRow, baseRow.schema()));
 
     // timestamp is DATE_STRING, timezone is GMT, createTime is null
-    baseRecord.put("createTime", null);
-    properties = getBaseKeyConfig("DATE_STRING", "yyyy-MM-dd hh:mm:ss", "GMT", null);
+    baseRecord.put("createTimeString", null);
+    properties = getBaseKeyConfig("createTime", "DATE_STRING", "yyyy-MM-dd hh:mm:ss", "GMT", null);
     properties.setProperty("hoodie.deltastreamer.keygen.timebased.input.dateformat", "yyyy-MM-dd hh:mm:ss");
     keyGen = new TimestampBasedKeyGenerator(properties);
     HoodieKey hk6 = keyGen.getKey(baseRecord);
     assertEquals("1970-01-01 12:00:00", hk6.getPartitionPath());
-
-    // test w/ Row
     baseRow = genericRecordToRow(baseRecord);
     assertEquals("1970-01-01 12:00:00", keyGen.getPartitionPath(baseRow));
     internalRow = KeyGeneratorTestUtilities.getInternalRow(baseRow);
@@ -202,7 +207,7 @@ public void testScalar() throws IOException {
     baseRecord.put("createTime", 20000L);
 
     // timezone is GMT
-    properties = getBaseKeyConfig("SCALAR", "yyyy-MM-dd hh", "GMT", "days");
+    properties = getBaseKeyConfig("createTime", "SCALAR", "yyyy-MM-dd hh", "GMT", "days");
     TimestampBasedKeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
     HoodieKey hk1 = keyGen.getKey(baseRecord);
     assertEquals(hk1.getPartitionPath(), "2024-10-04 12");
@@ -215,7 +220,7 @@ public void testScalar() throws IOException {
 
     // timezone is GMT, createTime is null
     baseRecord.put("createTime", null);
-    properties = getBaseKeyConfig("SCALAR", "yyyy-MM-dd hh", "GMT", "days");
+    properties = getBaseKeyConfig("createTime", "SCALAR", "yyyy-MM-dd hh", "GMT", "days");
     keyGen = new TimestampBasedKeyGenerator(properties);
     HoodieKey hk2 = keyGen.getKey(baseRecord);
     assertEquals("1970-01-02 12", hk2.getPartitionPath());
@@ -227,8 +232,8 @@ public void testScalar() throws IOException {
     assertEquals("1970-01-02 12", keyGen.getPartitionPath(internalRow, baseRow.schema()));
 
     // timezone is GMT. number of days store integer in mysql
-    baseRecord.put("createTime", 18736);
-    properties = getBaseKeyConfig("SCALAR", "yyyy-MM-dd", "GMT", "DAYS");
+    baseRecord.put("createTime", 18736L);
+    properties = getBaseKeyConfig("createTime", "SCALAR", "yyyy-MM-dd", "GMT", "DAYS");
     keyGen = new TimestampBasedKeyGenerator(properties);
     HoodieKey scalarSecondsKey = keyGen.getKey(baseRecord);
     assertEquals("2021-04-19", scalarSecondsKey.getPartitionPath());
@@ -245,7 +250,7 @@ public void testScalarWithLogicalType() throws IOException {
     baseRecord = SchemaTestUtil.generateAvroRecordFromJson(schema, 1, "001", "f1");
     baseRecord.put("createTime", 1638513806000000L);
 
-    properties = getBaseKeyConfig("SCALAR", "yyyy/MM/dd", "GMT", "MICROSECONDS");
+    properties = getBaseKeyConfig("createTime", "SCALAR", "yyyy/MM/dd", "GMT", "MICROSECONDS");
     properties.setProperty(KeyGeneratorOptions.KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED.key(), "true");
     TimestampBasedKeyGenerator keyGen = new TimestampBasedKeyGenerator(properties);
     HoodieKey hk1 = keyGen.getKey(baseRecord);
@@ -259,7 +264,7 @@ public void testScalarWithLogicalType() throws IOException {
 
     // timezone is GMT, createTime is null
     baseRecord.put("createTime", null);
-    properties = getBaseKeyConfig("SCALAR", "yyyy/MM/dd", "GMT", "MICROSECONDS");
+    properties = getBaseKeyConfig("createTime", "SCALAR", "yyyy/MM/dd", "GMT", "MICROSECONDS");
     properties.setProperty(KeyGeneratorOptions.KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED.key(), "true");
     keyGen = new TimestampBasedKeyGenerator(properties);
     HoodieKey hk2 = keyGen.getKey(baseRecord);
@@ -274,8 +279,9 @@ public void testScalarWithLogicalType() throws IOException {
 
   @Test
   public void test_ExpectsMatch_SingleInputFormat_ISO8601WithMsZ_OutputTimezoneAsUTC() throws IOException {
-    baseRecord.put("createTime", "2020-04-01T13:01:33.428Z");
+    baseRecord.put("createTimeString", "2020-04-01T13:01:33.428Z");
     properties = this.getBaseKeyConfig(
+        "createTimeString",
         "DATE_STRING",
         "yyyy-MM-dd'T'HH:mm:ss.SSSZ",
         "",
@@ -292,8 +298,9 @@ public void test_ExpectsMatch_SingleInputFormat_ISO8601WithMsZ_OutputTimezoneAsU
 
   @Test
   public void test_ExpectsMatch_SingleInputFormats_ISO8601WithMsZ_OutputTimezoneAsInputDateTimeZone() throws IOException {
-    baseRecord.put("createTime", "2020-04-01T13:01:33.428Z");
+    baseRecord.put("createTimeString", "2020-04-01T13:01:33.428Z");
     properties = this.getBaseKeyConfig(
+        "createTimeString",
         "DATE_STRING",
         "yyyy-MM-dd'T'HH:mm:ss.SSSZ",
         "",
@@ -310,8 +317,9 @@ public void test_ExpectsMatch_SingleInputFormats_ISO8601WithMsZ_OutputTimezoneAs
 
   @Test
   public void test_ExpectsMatch_MultipleInputFormats_ISO8601WithMsZ_OutputTimezoneAsUTC() throws IOException {
-    baseRecord.put("createTime", "2020-04-01T13:01:33.428Z");
+    baseRecord.put("createTimeString", "2020-04-01T13:01:33.428Z");
     properties = this.getBaseKeyConfig(
+        "createTimeString",
         "DATE_STRING",
         "yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ",
         "",
@@ -328,8 +336,9 @@ public void test_ExpectsMatch_MultipleInputFormats_ISO8601WithMsZ_OutputTimezone
 
   @Test
   public void test_ExpectsMatch_MultipleInputFormats_ISO8601NoMsZ_OutputTimezoneAsUTC() throws IOException {
-    baseRecord.put("createTime", "2020-04-01T13:01:33Z");
+    baseRecord.put("createTimeString", "2020-04-01T13:01:33Z");
     properties = this.getBaseKeyConfig(
+        "createTimeString",
         "DATE_STRING",
         "yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ",
         "",
@@ -346,8 +355,9 @@ public void test_ExpectsMatch_MultipleInputFormats_ISO8601NoMsZ_OutputTimezoneAs
 
   @Test
   public void test_ExpectsMatch_MultipleInputFormats_ISO8601NoMsWithOffset_OutputTimezoneAsUTC() throws IOException {
-    baseRecord.put("createTime", "2020-04-01T13:01:33-05:00");
+    baseRecord.put("createTimeString", "2020-04-01T13:01:33-05:00");
     properties = this.getBaseKeyConfig(
+        "createTimeString",
         "DATE_STRING",
         "yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ",
         "",
@@ -364,8 +374,9 @@ public void test_ExpectsMatch_MultipleInputFormats_ISO8601NoMsWithOffset_OutputT
 
   @Test
   public void test_ExpectsMatch_MultipleInputFormats_ISO8601WithMsWithOffset_OutputTimezoneAsUTC() throws IOException {
-    baseRecord.put("createTime", "2020-04-01T13:01:33.123-05:00");
+    baseRecord.put("createTimeString", "2020-04-01T13:01:33.123-05:00");
     properties = this.getBaseKeyConfig(
+        "createTimeString",
         "DATE_STRING",
         "yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ",
         "",
@@ -382,8 +393,9 @@ public void test_ExpectsMatch_MultipleInputFormats_ISO8601WithMsWithOffset_Outpu
 
   @Test
   public void test_ExpectsMatch_MultipleInputFormats_ISO8601WithMsZ_OutputTimezoneAsEST() throws IOException {
-    baseRecord.put("createTime", "2020-04-01T13:01:33.123Z");
+    baseRecord.put("createTimeString", "2020-04-01T13:01:33.123Z");
     properties = this.getBaseKeyConfig(
+        "createTimeString",
         "DATE_STRING",
         "yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ",
         "",
@@ -400,8 +412,9 @@ public void test_ExpectsMatch_MultipleInputFormats_ISO8601WithMsZ_OutputTimezone
 
   @Test
   public void test_Throws_MultipleInputFormats_InputDateNotMatchingFormats() throws IOException {
-    baseRecord.put("createTime", "2020-04-01 13:01:33.123-05:00");
+    baseRecord.put("createTimeString", "2020-04-01 13:01:33.123-05:00");
     properties = this.getBaseKeyConfig(
+        "createTimeString",
         "DATE_STRING",
         "yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ",
         "",
@@ -417,8 +430,9 @@ public void test_Throws_MultipleInputFormats_InputDateNotMatchingFormats() throw
 
   @Test
   public void test_ExpectsMatch_MultipleInputFormats_ShortDate_OutputCustomDate() throws IOException {
-    baseRecord.put("createTime", "20200401");
+    baseRecord.put("createTimeString", "20200401");
     properties = this.getBaseKeyConfig(
+        "createTimeString",
         "DATE_STRING",
         "yyyy-MM-dd'T'HH:mm:ssZ,yyyy-MM-dd'T'HH:mm:ss.SSSZ,yyyyMMdd",
         "",
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/factory/TestCreateKeyGeneratorByTypeWithFactory.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/factory/TestCreateKeyGeneratorByTypeWithFactory.java
similarity index 100%
rename from hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/factory/TestCreateKeyGeneratorByTypeWithFactory.java
rename to hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/factory/TestCreateKeyGeneratorByTypeWithFactory.java
diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/factory/TestHoodieSparkKeyGeneratorFactory.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/factory/TestHoodieSparkKeyGeneratorFactory.java
similarity index 98%
rename from hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/factory/TestHoodieSparkKeyGeneratorFactory.java
rename to hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/factory/TestHoodieSparkKeyGeneratorFactory.java
index dffe1eaa96c24..816c1fb86d4b9 100644
--- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/keygen/factory/TestHoodieSparkKeyGeneratorFactory.java
+++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/keygen/factory/TestHoodieSparkKeyGeneratorFactory.java
@@ -25,8 +25,8 @@
 import org.apache.hudi.keygen.SimpleKeyGenerator;
 import org.apache.hudi.keygen.TestComplexKeyGenerator;
 import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
-
 import org.apache.hudi.keygen.constant.KeyGeneratorType;
+
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
 
@@ -58,7 +58,7 @@ public void testKeyGeneratorFactory() throws IOException {
     // set both class name and keyGenerator type
     props.put(HoodieWriteConfig.KEYGENERATOR_TYPE.key(), KeyGeneratorType.CUSTOM.name());
     KeyGenerator keyGenerator3 = HoodieSparkKeyGeneratorFactory.createKeyGenerator(props);
-    // KEYGENERATOR_TYPE_PROP was overitten by KEYGENERATOR_CLASS_PROP
+    // KEYGENERATOR_TYPE_PROP was overwritten by KEYGENERATOR_CLASS_PROP
     Assertions.assertEquals(SimpleKeyGenerator.class.getName(), keyGenerator3.getClass().getName());
 
     // set wrong class name
diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/payload/TestAWSDmsAvroPayload.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/payload/TestAWSDmsAvroPayload.java
index 802096a3a74e1..cf3d9a94d1be2 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/payload/TestAWSDmsAvroPayload.java
+++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/payload/TestAWSDmsAvroPayload.java
@@ -25,7 +25,6 @@
 import org.apache.avro.generic.GenericData;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.generic.IndexedRecord;
-
 import org.junit.jupiter.api.Test;
 
 import static org.junit.jupiter.api.Assertions.assertFalse;
@@ -98,7 +97,7 @@ public void testDelete() {
 
     try {
       Option<IndexedRecord> outputPayload = payload.combineAndGetUpdateValue(oldRecord, avroSchema);
-      // expect nothing to be comitted to table
+      // expect nothing to be committed to table
       assertFalse(outputPayload.isPresent());
     } catch (Exception e) {
       fail("Unexpected exception");
@@ -123,7 +122,7 @@ public void testPreCombineWithDelete() {
     try {
       OverwriteWithLatestAvroPayload output = payload.preCombine(insertPayload);
       Option<IndexedRecord> outputPayload = output.getInsertValue(avroSchema);
-      // expect nothing to be comitted to table
+      // expect nothing to be committed to table
       assertFalse(outputPayload.isPresent());
     } catch (Exception e) {
       fail("Unexpected exception");
diff --git a/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/another-input-table-json/part-00000-7e680484-e7e1-48b6-8289-1a7c483b530b-c000.json b/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/another-input-table-json/part-00000-7e680484-e7e1-48b6-8289-1a7c483b530b-c000.json
new file mode 100644
index 0000000000000..59b3ff043a8ec
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/another-input-table-json/part-00000-7e680484-e7e1-48b6-8289-1a7c483b530b-c000.json
@@ -0,0 +1,10 @@
+{"c1":770,"c2":" 770sdc","c3":335.770,"c4":"2021-11-18T23:34:44.201-08:00","c5":78,"c6":"2020-01-15","c7":"Ag==","c8":9}
+{"c1":768,"c2":" 768sdc","c3":64.768,"c4":"2021-11-18T23:34:44.201-08:00","c5":78,"c6":"2020-10-13","c7":"AA==","c8":9}
+{"c1":431,"c2":" 431sdc","c3":153.431,"c4":"2021-11-18T23:34:44.186-08:00","c5":44,"c6":"2020-03-12","c7":"rw==","c8":9}
+{"c1":427,"c2":" 427sdc","c3":246.427,"c4":"2021-11-18T23:34:44.186-08:00","c5":44,"c6":"2020-10-08","c7":"qw==","c8":9}
+{"c1":328,"c2":" 328sdc","c3":977.328,"c4":"2021-11-18T23:34:44.181-08:00","c5":34,"c6":"2020-10-21","c7":"SA==","c8":9}
+{"c1":320,"c2":" 320sdc","c3":230.320,"c4":"2021-11-18T23:34:44.180-08:00","c5":33,"c6":"2020-02-13","c7":"QA==","c8":9}
+{"c1":317,"c2":" 317sdc","c3":580.317,"c4":"2021-11-18T23:34:44.180-08:00","c5":33,"c6":"2020-10-10","c7":"PQ==","c8":9}
+{"c1":308,"c2":" 308sdc","c3":375.308,"c4":"2021-11-18T23:34:44.180-08:00","c5":32,"c6":"2020-01-01","c7":"NA==","c8":9}
+{"c1":304,"c2":" 304sdc","c3":904.304,"c4":"2021-11-18T23:34:44.179-08:00","c5":32,"c6":"2020-08-25","c7":"MA==","c8":9}
+{"c1":300,"c2":" 300sdc","c3":398.300,"c4":"2021-11-18T23:34:44.179-08:00","c5":31,"c6":"2020-04-21","c7":"LA==","c8":9}
\ No newline at end of file
diff --git a/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/another-input-table-json/part-00001-7e680484-e7e1-48b6-8289-1a7c483b530b-c000.json b/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/another-input-table-json/part-00001-7e680484-e7e1-48b6-8289-1a7c483b530b-c000.json
new file mode 100644
index 0000000000000..c5a11067c9782
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/another-input-table-json/part-00001-7e680484-e7e1-48b6-8289-1a7c483b530b-c000.json
@@ -0,0 +1,10 @@
+{"c1":719,"c2":" 719sdc","c3":707.719,"c4":"2021-11-18T23:34:44.199-08:00","c5":73,"c6":"2020-05-20","c7":"zw==","c8":9}
+{"c1":715,"c2":" 715sdc","c3":777.715,"c4":"2021-11-18T23:34:44.199-08:00","c5":73,"c6":"2020-01-16","c7":"yw==","c8":9}
+{"c1":579,"c2":" 579sdc","c3":958.579,"c4":"2021-11-18T23:34:44.193-08:00","c5":59,"c6":"2020-08-20","c7":"Qw==","c8":9}
+{"c1":568,"c2":" 568sdc","c3":667.568,"c4":"2021-11-18T23:34:44.193-08:00","c5":58,"c6":"2020-08-09","c7":"OA==","c8":9}
+{"c1":367,"c2":" 367sdc","c3":791.367,"c4":"2021-11-18T23:34:44.183-08:00","c5":38,"c6":"2020-05-04","c7":"bw==","c8":9}
+{"c1":364,"c2":" 364sdc","c3":264.364,"c4":"2021-11-18T23:34:44.183-08:00","c5":38,"c6":"2020-02-01","c7":"bA==","c8":9}
+{"c1":250,"c2":" 250sdc","c3":624.250,"c4":"2021-11-18T23:34:44.176-08:00","c5":26,"c6":"2020-09-27","c7":"+g==","c8":9}
+{"c1":249,"c2":" 249sdc","c3":579.249,"c4":"2021-11-18T23:34:44.176-08:00","c5":26,"c6":"2020-08-26","c7":"+Q==","c8":9}
+{"c1":246,"c2":" 246sdc","c3":413.246,"c4":"2021-11-18T23:34:44.176-08:00","c5":26,"c6":"2020-05-23","c7":"9g==","c8":9}
+{"c1":125,"c2":" 125sdc","c3":153.125,"c4":"2021-11-18T23:34:44.169-08:00","c5":14,"c6":"2020-05-14","c7":"fQ==","c8":9}
\ No newline at end of file
diff --git a/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/another-input-table-json/part-00002-7e680484-e7e1-48b6-8289-1a7c483b530b-c000.json b/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/another-input-table-json/part-00002-7e680484-e7e1-48b6-8289-1a7c483b530b-c000.json
new file mode 100644
index 0000000000000..585eb31329e62
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/another-input-table-json/part-00002-7e680484-e7e1-48b6-8289-1a7c483b530b-c000.json
@@ -0,0 +1,10 @@
+{"c1":486,"c2":" 486sdc","c3":278.486,"c4":"2021-11-18T23:34:44.189-08:00","c5":50,"c6":"2020-03-11","c7":"5g==","c8":9}
+{"c1":483,"c2":" 483sdc","c3":162.483,"c4":"2021-11-18T23:34:44.189-08:00","c5":49,"c6":"2020-11-08","c7":"4w==","c8":9}
+{"c1":224,"c2":" 224sdc","c3":294.224,"c4":"2021-11-18T23:34:44.175-08:00","c5":24,"c6":"2020-05-01","c7":"4A==","c8":9}
+{"c1":118,"c2":" 118sdc","c3":204.118,"c4":"2021-11-18T23:34:44.168-08:00","c5":13,"c6":"2020-09-07","c7":"dg==","c8":9}
+{"c1":111,"c2":" 111sdc","c3":82.111,"c4":"2021-11-18T23:34:44.168-08:00","c5":12,"c6":"2020-02-28","c7":"bw==","c8":9}
+{"c1":79,"c2":" 79sdc","c3":198.790,"c4":"2021-11-18T23:34:44.166-08:00","c5":9,"c6":"2020-03-24","c7":"Tw==","c8":9}
+{"c1":77,"c2":" 77sdc","c3":619.770,"c4":"2021-11-18T23:34:44.166-08:00","c5":9,"c6":"2020-01-22","c7":"TQ==","c8":9}
+{"c1":76,"c2":" 76sdc","c3":315.760,"c4":"2021-11-18T23:34:44.166-08:00","c5":9,"c6":"2020-11-21","c7":"TA==","c8":9}
+{"c1":60,"c2":" 60sdc","c3":326.600,"c4":"2021-11-18T23:34:44.164-08:00","c5":7,"c6":"2020-06-05","c7":"PA==","c8":9}
+{"c1":59,"c2":" 59sdc","c3":771.590,"c4":"2021-11-18T23:34:44.164-08:00","c5":7,"c6":"2020-05-04","c7":"Ow==","c8":9}
diff --git a/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/another-input-table-json/part-00003-7e680484-e7e1-48b6-8289-1a7c483b530b-c000.json b/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/another-input-table-json/part-00003-7e680484-e7e1-48b6-8289-1a7c483b530b-c000.json
new file mode 100644
index 0000000000000..2e37e6a180eba
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/another-input-table-json/part-00003-7e680484-e7e1-48b6-8289-1a7c483b530b-c000.json
@@ -0,0 +1,10 @@
+{"c1":272,"c2":" 272sdc","c3":979.272,"c4":"2021-11-18T23:34:44.178-08:00","c5":28,"c6":"2020-09-21","c7":"EA==","c8":9}
+{"c1":258,"c2":" 258sdc","c3":627.258,"c4":"2021-11-18T23:34:44.177-08:00","c5":27,"c6":"2020-06-07","c7":"Ag==","c8":9}
+{"c1":240,"c2":" 240sdc","c3":880.240,"c4":"2021-11-18T23:34:44.176-08:00","c5":25,"c6":"2020-10-17","c7":"8A==","c8":9}
+{"c1":236,"c2":" 236sdc","c3":576.236,"c4":"2021-11-18T23:34:44.176-08:00","c5":25,"c6":"2020-06-13","c7":"7A==","c8":9}
+{"c1":137,"c2":" 137sdc","c3":597.137,"c4":"2021-11-18T23:34:44.170-08:00","c5":15,"c6":"2020-06-26","c7":"iQ==","c8":9}
+{"c1":134,"c2":" 134sdc","c3":802.134,"c4":"2021-11-18T23:34:44.170-08:00","c5":15,"c6":"2020-03-23","c7":"hg==","c8":9}
+{"c1":131,"c2":" 131sdc","c3":959.131,"c4":"2021-11-18T23:34:44.169-08:00","c5":14,"c6":"2020-11-20","c7":"gw==","c8":9}
+{"c1":129,"c2":" 129sdc","c3":430.129,"c4":"2021-11-18T23:34:44.169-08:00","c5":14,"c6":"2020-09-18","c7":"gQ==","c8":9}
+{"c1":24,"c2":" 24sdc","c3":867.240,"c4":"2021-11-18T23:34:44.161-08:00","c5":4,"c6":"2020-03-25","c7":"GA==","c8":9}
+{"c1":8,"c2":" 8sdc","c3":977.800,"c4":"2021-11-18T23:34:44.159-08:00","c5":2,"c6":"2020-09-09","c7":"CA==","c8":9}
diff --git a/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/input-table-json/part-00000-4468afca-8a37-4ae8-a150-0c2fd3361080-c000.json b/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/input-table-json/part-00000-4468afca-8a37-4ae8-a150-0c2fd3361080-c000.json
new file mode 100644
index 0000000000000..43d89698c40ba
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/input-table-json/part-00000-4468afca-8a37-4ae8-a150-0c2fd3361080-c000.json
@@ -0,0 +1,10 @@
+{"c1":323,"c2":" 323sdc","c3":738.323,"c4":"2021-11-19T20:40:55.522-08:00","c5":33,"c6":"2020-05-16","c7":"Qw==","c8":9}
+{"c1":326,"c2":" 326sdc","c3":481.326,"c4":"2021-11-19T20:40:55.522-08:00","c5":34,"c6":"2020-08-19","c7":"Rg==","c8":9}
+{"c1":555,"c2":" 555sdc","c3":791.555,"c4":"2021-11-19T20:40:55.535-08:00","c5":57,"c6":"2020-06-24","c7":"Kw==","c8":9}
+{"c1":556,"c2":" 556sdc","c3":100.556,"c4":"2021-11-19T20:40:55.535-08:00","c5":57,"c6":"2020-07-25","c7":"LA==","c8":9}
+{"c1":562,"c2":" 562sdc","c3":100.562,"c4":"2021-11-19T20:40:55.535-08:00","c5":57,"c6":"2020-02-03","c7":"Mg==","c8":9}
+{"c1":619,"c2":" 619sdc","c3":284.619,"c4":"2021-11-19T20:40:55.537-08:00","c5":63,"c6":"2020-04-04","c7":"aw==","c8":9}
+{"c1":624,"c2":" 624sdc","c3":783.624,"c4":"2021-11-19T20:40:55.537-08:00","c5":64,"c6":"2020-09-09","c7":"cA==","c8":9}
+{"c1":633,"c2":" 633sdc","c3":706.633,"c4":"2021-11-19T20:40:55.538-08:00","c5":64,"c6":"2020-07-18","c7":"eQ==","c8":9}
+{"c1":638,"c2":" 638sdc","c3":811.638,"c4":"2021-11-19T20:40:55.538-08:00","c5":65,"c6":"2020-01-23","c7":"fg==","c8":9}
+{"c1":639,"c2":" 639sdc","c3":299.639,"c4":"2021-11-19T20:40:55.538-08:00","c5":65,"c6":"2020-02-24","c7":"fw==","c8":9}
diff --git a/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/input-table-json/part-00001-4468afca-8a37-4ae8-a150-0c2fd3361080-c000.json b/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/input-table-json/part-00001-4468afca-8a37-4ae8-a150-0c2fd3361080-c000.json
new file mode 100644
index 0000000000000..7537986a1f7cc
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/input-table-json/part-00001-4468afca-8a37-4ae8-a150-0c2fd3361080-c000.json
@@ -0,0 +1,10 @@
+{"c1":74,"c2":" 74sdc","c3":38.740,"c4":"2021-11-19T20:40:55.507-08:00","c5":9,"c6":"2020-09-19","c7":"Sg==","c8":9}
+{"c1":181,"c2":" 181sdc","c3":754.181,"c4":"2021-11-19T20:40:55.514-08:00","c5":19,"c6":"2020-06-14","c7":"tQ==","c8":9}
+{"c1":212,"c2":" 212sdc","c3":633.212,"c4":"2021-11-19T20:40:55.516-08:00","c5":22,"c6":"2020-04-17","c7":"1A==","c8":9}
+{"c1":213,"c2":" 213sdc","c3":980.213,"c4":"2021-11-19T20:40:55.516-08:00","c5":22,"c6":"2020-05-18","c7":"1Q==","c8":9}
+{"c1":428,"c2":" 428sdc","c3":550.428,"c4":"2021-11-19T20:40:55.528-08:00","c5":44,"c6":"2020-11-09","c7":"rA==","c8":9}
+{"c1":429,"c2":" 429sdc","c3":799.429,"c4":"2021-11-19T20:40:55.528-08:00","c5":44,"c6":"2020-01-10","c7":"rQ==","c8":9}
+{"c1":430,"c2":" 430sdc","c3":76.430,"c4":"2021-11-19T20:40:55.528-08:00","c5":44,"c6":"2020-02-11","c7":"rg==","c8":9}
+{"c1":539,"c2":" 539sdc","c3":866.539,"c4":"2021-11-19T20:40:55.534-08:00","c5":55,"c6":"2020-01-08","c7":"Gw==","c8":9}
+{"c1":552,"c2":" 552sdc","c3":382.552,"c4":"2021-11-19T20:40:55.535-08:00","c5":56,"c6":"2020-03-21","c7":"KA==","c8":9}
+{"c1":559,"c2":" 559sdc","c3":699.559,"c4":"2021-11-19T20:40:55.535-08:00","c5":57,"c6":"2020-10-28","c7":"Lw==","c8":9}
diff --git a/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/input-table-json/part-00002-4468afca-8a37-4ae8-a150-0c2fd3361080-c000.json b/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/input-table-json/part-00002-4468afca-8a37-4ae8-a150-0c2fd3361080-c000.json
new file mode 100644
index 0000000000000..7f171d3b7f575
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/input-table-json/part-00002-4468afca-8a37-4ae8-a150-0c2fd3361080-c000.json
@@ -0,0 +1,10 @@
+{"c1":355,"c2":" 355sdc","c3":994.355,"c4":"2021-11-19T20:40:55.524-08:00","c5":37,"c6":"2020-04-20","c7":"Yw==","c8":9}
+{"c1":358,"c2":" 358sdc","c3":975.358,"c4":"2021-11-19T20:40:55.524-08:00","c5":37,"c6":"2020-07-23","c7":"Zg==","c8":9}
+{"c1":769,"c2":" 769sdc","c3":919.769,"c4":"2021-11-19T20:40:55.543-08:00","c5":78,"c6":"2020-11-14","c7":"AQ==","c8":9}
+{"c1":882,"c2":" 882sdc","c3":374.882,"c4":"2021-11-19T20:40:55.547-08:00","c5":89,"c6":"2020-03-15","c7":"cg==","c8":9}
+{"c1":892,"c2":" 892sdc","c3":787.892,"c4":"2021-11-19T20:40:55.547-08:00","c5":90,"c6":"2020-02-25","c7":"fA==","c8":9}
+{"c1":917,"c2":" 917sdc","c3":912.917,"c4":"2021-11-19T20:40:55.548-08:00","c5":93,"c6":"2020-05-22","c7":"lQ==","c8":9}
+{"c1":932,"c2":" 932sdc","c3":990.932,"c4":"2021-11-19T20:40:55.549-08:00","c5":94,"c6":"2020-09-09","c7":"pA==","c8":9}
+{"c1":933,"c2":" 933sdc","c3":510.933,"c4":"2021-11-19T20:40:55.549-08:00","c5":94,"c6":"2020-10-10","c7":"pQ==","c8":9}
+{"c1":943,"c2":" 943sdc","c3":601.943,"c4":"2021-11-19T20:40:55.549-08:00","c5":95,"c6":"2020-09-20","c7":"rw==","c8":9}
+{"c1":945,"c2":" 945sdc","c3":790.945,"c4":"2021-11-19T20:40:55.549-08:00","c5":96,"c6":"2020-11-22","c7":"sQ==","c8":9}
\ No newline at end of file
diff --git a/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/input-table-json/part-00003-4468afca-8a37-4ae8-a150-0c2fd3361080-c000.json b/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/input-table-json/part-00003-4468afca-8a37-4ae8-a150-0c2fd3361080-c000.json
new file mode 100644
index 0000000000000..48d91417b2c60
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/input-table-json/part-00003-4468afca-8a37-4ae8-a150-0c2fd3361080-c000.json
@@ -0,0 +1,10 @@
+{"c1":0,"c2":" 0sdc","c3":19.000,"c4":"2021-11-19T20:40:55.339-08:00","c5":1,"c6":"2020-01-01","c7":"AA==","c8":9}
+{"c1":89,"c2":" 89sdc","c3":759.890,"c4":"2021-11-19T20:40:55.508-08:00","c5":10,"c6":"2020-02-06","c7":"WQ==","c8":9}
+{"c1":199,"c2":" 199sdc","c3":315.199,"c4":"2021-11-19T20:40:55.515-08:00","c5":21,"c6":"2020-02-04","c7":"xw==","c8":9}
+{"c1":200,"c2":" 200sdc","c3":618.200,"c4":"2021-11-19T20:40:55.515-08:00","c5":21,"c6":"2020-03-05","c7":"yA==","c8":9}
+{"c1":309,"c2":" 309sdc","c3":642.309,"c4":"2021-11-19T20:40:55.521-08:00","c5":32,"c6":"2020-02-02","c7":"NQ==","c8":9}
+{"c1":318,"c2":" 318sdc","c3":106.318,"c4":"2021-11-19T20:40:55.522-08:00","c5":33,"c6":"2020-11-11","c7":"Pg==","c8":9}
+{"c1":329,"c2":" 329sdc","c3":200.329,"c4":"2021-11-19T20:40:55.522-08:00","c5":34,"c6":"2020-11-22","c7":"SQ==","c8":9}
+{"c1":690,"c2":" 690sdc","c3":854.690,"c4":"2021-11-19T20:40:55.540-08:00","c5":70,"c6":"2020-09-19","c7":"sg==","c8":9}
+{"c1":697,"c2":" 697sdc","c3":916.697,"c4":"2021-11-19T20:40:55.540-08:00","c5":71,"c6":"2020-05-26","c7":"uQ==","c8":9}
+{"c1":959,"c2":" 959sdc","c3":480.959,"c4":"2021-11-19T20:40:55.550-08:00","c5":97,"c6":"2020-03-08","c7":"vw==","c8":9}
\ No newline at end of file
diff --git a/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/z-index-table-merged.json b/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/z-index-table-merged.json
index 5c876126ae1d6..00d16c660c503 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/z-index-table-merged.json
+++ b/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/z-index-table-merged.json
@@ -1,8 +1,8 @@
-{"c1_maxValue":1000,"c1_minValue":3,"c1_num_nulls":0,"c2_maxValue":" 993sdc","c2_minValue":" 1000sdc","c2_num_nulls":0,"c3_maxValue":999.348,"c3_minValue":5.102,"c3_num_nulls":0,"c5_maxValue":101,"c5_minValue":1,"c5_num_nulls":0,"c6_maxValue":"2020-11-27","c6_minValue":"2020-01-01","c6_num_nulls":0,"c7_maxValue":"/w==","c7_minValue":"AA==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00000-1c8226c2-f2a0-455d-aedd-c544003b0b3d-c000.snappy.parquet"}
-{"c1_maxValue":1000,"c1_minValue":0,"c1_num_nulls":0,"c2_maxValue":" 996sdc","c2_minValue":" 0sdc","c2_num_nulls":0,"c3_maxValue":999.779,"c3_minValue":2.992,"c3_num_nulls":0,"c5_maxValue":101,"c5_minValue":1,"c5_num_nulls":0,"c6_maxValue":"2020-11-28","c6_minValue":"2020-01-01","c6_num_nulls":0,"c7_maxValue":"/g==","c7_minValue":"AA==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00000-5034d84a-c4c8-4eba-85b5-a52f47e628a7-c000.snappy.parquet"}
-{"c1_maxValue":998,"c1_minValue":2,"c1_num_nulls":0,"c2_maxValue":" 998sdc","c2_minValue":" 104sdc","c2_num_nulls":0,"c3_maxValue":997.905,"c3_minValue":0.876,"c3_num_nulls":0,"c5_maxValue":101,"c5_minValue":1,"c5_num_nulls":0,"c6_maxValue":"2020-11-28","c6_minValue":"2020-01-02","c6_num_nulls":0,"c7_maxValue":"/w==","c7_minValue":"Ag==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00001-1c8226c2-f2a0-455d-aedd-c544003b0b3d-c000.snappy.parquet"}
-{"c1_maxValue":997,"c1_minValue":3,"c1_num_nulls":0,"c2_maxValue":" 9sdc","c2_minValue":" 102sdc","c2_num_nulls":0,"c3_maxValue":990.531,"c3_minValue":2.336,"c3_num_nulls":0,"c5_maxValue":101,"c5_minValue":1,"c5_num_nulls":0,"c6_maxValue":"2020-11-27","c6_minValue":"2020-01-02","c6_num_nulls":0,"c7_maxValue":"/w==","c7_minValue":"AA==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00001-5034d84a-c4c8-4eba-85b5-a52f47e628a7-c000.snappy.parquet"}
-{"c1_maxValue":994,"c1_minValue":0,"c1_num_nulls":0,"c2_maxValue":" 9sdc","c2_minValue":" 0sdc","c2_num_nulls":0,"c3_maxValue":997.496,"c3_minValue":7.742,"c3_num_nulls":0,"c5_maxValue":101,"c5_minValue":1,"c5_num_nulls":0,"c6_maxValue":"2020-11-28","c6_minValue":"2020-01-01","c6_num_nulls":0,"c7_maxValue":"/w==","c7_minValue":"AA==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00002-1c8226c2-f2a0-455d-aedd-c544003b0b3d-c000.snappy.parquet"}
-{"c1_maxValue":999,"c1_minValue":1,"c1_num_nulls":0,"c2_maxValue":" 999sdc","c2_minValue":" 100sdc","c2_num_nulls":0,"c3_maxValue":980.676,"c3_minValue":0.120,"c3_num_nulls":0,"c5_maxValue":101,"c5_minValue":1,"c5_num_nulls":0,"c6_maxValue":"2020-11-28","c6_minValue":"2020-01-03","c6_num_nulls":0,"c7_maxValue":"/w==","c7_minValue":"AA==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00002-5034d84a-c4c8-4eba-85b5-a52f47e628a7-c000.snappy.parquet"}
-{"c1_maxValue":999,"c1_minValue":1,"c1_num_nulls":0,"c2_maxValue":" 99sdc","c2_minValue":" 10sdc","c2_num_nulls":0,"c3_maxValue":993.940,"c3_minValue":4.598,"c3_num_nulls":0,"c5_maxValue":101,"c5_minValue":1,"c5_num_nulls":0,"c6_maxValue":"2020-11-28","c6_minValue":"2020-01-03","c6_num_nulls":0,"c7_maxValue":"/g==","c7_minValue":"AQ==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00003-1c8226c2-f2a0-455d-aedd-c544003b0b3d-c000.snappy.parquet"}
-{"c1_maxValue":998,"c1_minValue":6,"c1_num_nulls":0,"c2_maxValue":" 99sdc","c2_minValue":" 111sdc","c2_num_nulls":0,"c3_maxValue":999.282,"c3_minValue":1.217,"c3_num_nulls":0,"c5_maxValue":101,"c5_minValue":2,"c5_num_nulls":0,"c6_maxValue":"2020-11-28","c6_minValue":"2020-01-01","c6_num_nulls":0,"c7_maxValue":"/w==","c7_minValue":"AA==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00003-5034d84a-c4c8-4eba-85b5-a52f47e628a7-c000.snappy.parquet"}
\ No newline at end of file
+{"c1_maxValue":272,"c1_minValue":8,"c1_num_nulls":0,"c2_maxValue":" 8sdc","c2_minValue":" 129sdc","c2_num_nulls":0,"c3_maxValue":979.272,"c3_minValue":430.129,"c3_num_nulls":0,"c5_maxValue":28,"c5_minValue":2,"c5_num_nulls":0,"c6_maxValue":"2020-11-20","c6_minValue":"2020-03-23","c6_num_nulls":0,"c7_maxValue":"8A==","c7_minValue":"Ag==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00003-xxx-c000.snappy.parquet"}
+{"c1_maxValue":486,"c1_minValue":59,"c1_num_nulls":0,"c2_maxValue":" 79sdc","c2_minValue":" 111sdc","c2_num_nulls":0,"c3_maxValue":771.590,"c3_minValue":82.111,"c3_num_nulls":0,"c5_maxValue":50,"c5_minValue":7,"c5_num_nulls":0,"c6_maxValue":"2020-11-21","c6_minValue":"2020-01-22","c6_num_nulls":0,"c7_maxValue":"5g==","c7_minValue":"Ow==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00002-xxx-c000.snappy.parquet"}
+{"c1_maxValue":559,"c1_minValue":74,"c1_num_nulls":0,"c2_maxValue":" 74sdc","c2_minValue":" 181sdc","c2_num_nulls":0,"c3_maxValue":980.213,"c3_minValue":38.740,"c3_num_nulls":0,"c5_maxValue":57,"c5_minValue":9,"c5_num_nulls":0,"c6_maxValue":"2020-11-09","c6_minValue":"2020-01-08","c6_num_nulls":0,"c7_maxValue":"1Q==","c7_minValue":"Gw==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00001-xxx-c000.snappy.parquet"}
+{"c1_maxValue":639,"c1_minValue":323,"c1_num_nulls":0,"c2_maxValue":" 639sdc","c2_minValue":" 323sdc","c2_num_nulls":0,"c3_maxValue":811.638,"c3_minValue":100.556,"c3_num_nulls":0,"c5_maxValue":65,"c5_minValue":33,"c5_num_nulls":0,"c6_maxValue":"2020-09-09","c6_minValue":"2020-01-23","c6_num_nulls":0,"c7_maxValue":"fw==","c7_minValue":"Kw==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00000-xxx-c000.snappy.parquet"}
+{"c1_maxValue":719,"c1_minValue":125,"c1_num_nulls":0,"c2_maxValue":" 719sdc","c2_minValue":" 125sdc","c2_num_nulls":0,"c3_maxValue":958.579,"c3_minValue":153.125,"c3_num_nulls":0,"c5_maxValue":73,"c5_minValue":14,"c5_num_nulls":0,"c6_maxValue":"2020-09-27","c6_minValue":"2020-01-16","c6_num_nulls":0,"c7_maxValue":"+g==","c7_minValue":"OA==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00001-xxx-c000.snappy.parquet"}
+{"c1_maxValue":770,"c1_minValue":300,"c1_num_nulls":0,"c2_maxValue":" 770sdc","c2_minValue":" 300sdc","c2_num_nulls":0,"c3_maxValue":977.328,"c3_minValue":64.768,"c3_num_nulls":0,"c5_maxValue":78,"c5_minValue":31,"c5_num_nulls":0,"c6_maxValue":"2020-10-21","c6_minValue":"2020-01-01","c6_num_nulls":0,"c7_maxValue":"rw==","c7_minValue":"AA==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00000-xxx-c000.snappy.parquet"}
+{"c1_maxValue":945,"c1_minValue":355,"c1_num_nulls":0,"c2_maxValue":" 945sdc","c2_minValue":" 355sdc","c2_num_nulls":0,"c3_maxValue":994.355,"c3_minValue":374.882,"c3_num_nulls":0,"c5_maxValue":96,"c5_minValue":37,"c5_num_nulls":0,"c6_maxValue":"2020-11-22","c6_minValue":"2020-02-25","c6_num_nulls":0,"c7_maxValue":"sQ==","c7_minValue":"AQ==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00002-xxx-c000.snappy.parquet"}
+{"c1_maxValue":959,"c1_minValue":0,"c1_num_nulls":0,"c2_maxValue":" 959sdc","c2_minValue":" 0sdc","c2_num_nulls":0,"c3_maxValue":916.697,"c3_minValue":19.000,"c3_num_nulls":0,"c5_maxValue":97,"c5_minValue":1,"c5_num_nulls":0,"c6_maxValue":"2020-11-22","c6_minValue":"2020-01-01","c6_num_nulls":0,"c7_maxValue":"yA==","c7_minValue":"AA==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00003-xxx-c000.snappy.parquet"}
\ No newline at end of file
diff --git a/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/z-index-table.json b/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/z-index-table.json
index 45cb9aaf88c22..a633e3170e108 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/z-index-table.json
+++ b/hudi-spark-datasource/hudi-spark/src/test/resources/index/zorder/z-index-table.json
@@ -1,4 +1,4 @@
-{"c1_maxValue":1000,"c1_minValue":0,"c1_num_nulls":0,"c2_maxValue":" 996sdc","c2_minValue":" 0sdc","c2_num_nulls":0,"c3_maxValue":999.779,"c3_minValue":2.992,"c3_num_nulls":0,"c5_maxValue":101,"c5_minValue":1,"c5_num_nulls":0,"c6_maxValue":"2020-11-28","c6_minValue":"2020-01-01","c6_num_nulls":0,"c7_maxValue":"/g==","c7_minValue":"AA==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00000-5034d84a-c4c8-4eba-85b5-a52f47e628a7-c000.snappy.parquet"}
-{"c1_maxValue":997,"c1_minValue":3,"c1_num_nulls":0,"c2_maxValue":" 9sdc","c2_minValue":" 102sdc","c2_num_nulls":0,"c3_maxValue":990.531,"c3_minValue":2.336,"c3_num_nulls":0,"c5_maxValue":101,"c5_minValue":1,"c5_num_nulls":0,"c6_maxValue":"2020-11-27","c6_minValue":"2020-01-02","c6_num_nulls":0,"c7_maxValue":"/w==","c7_minValue":"AA==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00001-5034d84a-c4c8-4eba-85b5-a52f47e628a7-c000.snappy.parquet"}
-{"c1_maxValue":999,"c1_minValue":1,"c1_num_nulls":0,"c2_maxValue":" 999sdc","c2_minValue":" 100sdc","c2_num_nulls":0,"c3_maxValue":980.676,"c3_minValue":0.120,"c3_num_nulls":0,"c5_maxValue":101,"c5_minValue":1,"c5_num_nulls":0,"c6_maxValue":"2020-11-28","c6_minValue":"2020-01-03","c6_num_nulls":0,"c7_maxValue":"/w==","c7_minValue":"AA==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00002-5034d84a-c4c8-4eba-85b5-a52f47e628a7-c000.snappy.parquet"}
-{"c1_maxValue":998,"c1_minValue":6,"c1_num_nulls":0,"c2_maxValue":" 99sdc","c2_minValue":" 111sdc","c2_num_nulls":0,"c3_maxValue":999.282,"c3_minValue":1.217,"c3_num_nulls":0,"c5_maxValue":101,"c5_minValue":2,"c5_num_nulls":0,"c6_maxValue":"2020-11-28","c6_minValue":"2020-01-01","c6_num_nulls":0,"c7_maxValue":"/w==","c7_minValue":"AA==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00003-5034d84a-c4c8-4eba-85b5-a52f47e628a7-c000.snappy.parquet"}
\ No newline at end of file
+{"c1_maxValue":559,"c1_minValue":74,"c1_num_nulls":0,"c2_maxValue":" 74sdc","c2_minValue":" 181sdc","c2_num_nulls":0,"c3_maxValue":980.213,"c3_minValue":38.740,"c3_num_nulls":0,"c5_maxValue":57,"c5_minValue":9,"c5_num_nulls":0,"c6_maxValue":"2020-11-09","c6_minValue":"2020-01-08","c6_num_nulls":0,"c7_maxValue":"1Q==","c7_minValue":"Gw==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00001-xxx-c000.snappy.parquet"}
+{"c1_maxValue":639,"c1_minValue":323,"c1_num_nulls":0,"c2_maxValue":" 639sdc","c2_minValue":" 323sdc","c2_num_nulls":0,"c3_maxValue":811.638,"c3_minValue":100.556,"c3_num_nulls":0,"c5_maxValue":65,"c5_minValue":33,"c5_num_nulls":0,"c6_maxValue":"2020-09-09","c6_minValue":"2020-01-23","c6_num_nulls":0,"c7_maxValue":"fw==","c7_minValue":"Kw==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00000-xxx-c000.snappy.parquet"}
+{"c1_maxValue":945,"c1_minValue":355,"c1_num_nulls":0,"c2_maxValue":" 945sdc","c2_minValue":" 355sdc","c2_num_nulls":0,"c3_maxValue":994.355,"c3_minValue":374.882,"c3_num_nulls":0,"c5_maxValue":96,"c5_minValue":37,"c5_num_nulls":0,"c6_maxValue":"2020-11-22","c6_minValue":"2020-02-25","c6_num_nulls":0,"c7_maxValue":"sQ==","c7_minValue":"AQ==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00002-xxx-c000.snappy.parquet"}
+{"c1_maxValue":959,"c1_minValue":0,"c1_num_nulls":0,"c2_maxValue":" 959sdc","c2_minValue":" 0sdc","c2_num_nulls":0,"c3_maxValue":916.697,"c3_minValue":19.000,"c3_num_nulls":0,"c5_maxValue":97,"c5_minValue":1,"c5_num_nulls":0,"c6_maxValue":"2020-11-22","c6_minValue":"2020-01-01","c6_num_nulls":0,"c7_maxValue":"yA==","c7_minValue":"AA==","c7_num_nulls":0,"c8_maxValue":9,"c8_minValue":9,"c8_num_nulls":0,"file":"part-00003-xxx-c000.snappy.parquet"}
\ No newline at end of file
diff --git a/hudi-spark-datasource/hudi-spark/src/test/resources/sql-statements.sql b/hudi-spark-datasource/hudi-spark/src/test/resources/sql-statements.sql
index 135c83b4b975e..e19dd1eb6b8ba 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/resources/sql-statements.sql
+++ b/hudi-spark-datasource/hudi-spark/src/test/resources/sql-statements.sql
@@ -239,10 +239,6 @@ alter table h2_p add columns(ext0 int);
 +----------+
 | ok       |
 +----------+
-alter table h2_p change column ext0 ext0 bigint;
-+----------+
-| ok       |
-+----------+
 
 # DROP TABLE
 drop table h0;
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestAvroConversionHelper.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestAvroConversionHelper.scala
index e29944529b51b..686d09ccf64fd 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestAvroConversionHelper.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestAvroConversionHelper.scala
@@ -18,13 +18,13 @@
 
 package org.apache.hudi
 
-import java.time.LocalDate
-
 import org.apache.avro.Schema
 import org.apache.avro.generic.GenericData
 import org.apache.spark.sql.catalyst.expressions.GenericRow
 import org.scalatest.{FunSuite, Matchers}
 
+import java.time.LocalDate
+
 class TestAvroConversionHelper extends FunSuite with Matchers {
 
   val dateSchema = s"""
@@ -42,7 +42,7 @@ class TestAvroConversionHelper extends FunSuite with Matchers {
 
   test("Logical type: date") {
     val schema = new Schema.Parser().parse(dateSchema)
-    val convertor = AvroConversionHelper.createConverterToRow(schema, AvroConversionUtils.convertAvroSchemaToStructType(schema))
+    val convertor = AvroConversionUtils.createConverterToRow(schema, AvroConversionUtils.convertAvroSchemaToStructType(schema))
 
     val dateOutputData = dateInputData.map(x => {
       val record = new GenericData.Record(schema) {{ put("date", x) }}
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSourceDefaults.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSourceDefaults.scala
index d3be8c9b3e209..7fc7d318d362f 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSourceDefaults.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSourceDefaults.scala
@@ -243,7 +243,7 @@ class TestDataSourceDefaults {
     val partitionPathProp: String = props.getString(DataSourceWriteOptions.PARTITIONPATH_FIELD.key)
     val STRUCT_NAME: String = "hoodieRowTopLevelField"
     val NAMESPACE: String = "hoodieRow"
-    var converterFn: Function1[Any, Any] = _
+    var converterFn: Function1[Row, GenericRecord] = _
 
     override def getKey(record: GenericRecord): HoodieKey = {
       new HoodieKey(HoodieAvroUtils.getNestedFieldValAsString(record, recordKeyProp, true, false),
@@ -251,13 +251,13 @@ class TestDataSourceDefaults {
     }
 
     override def getRecordKey(row: Row): String = {
-      if (null == converterFn) converterFn = AvroConversionHelper.createConverterToAvro(row.schema, STRUCT_NAME, NAMESPACE)
+      if (null == converterFn) converterFn = AvroConversionUtils.createConverterToAvro(row.schema, STRUCT_NAME, NAMESPACE)
       val genericRecord = converterFn.apply(row).asInstanceOf[GenericRecord]
       getKey(genericRecord).getRecordKey
     }
 
     override def getPartitionPath(row: Row): String = {
-      if (null == converterFn) converterFn = AvroConversionHelper.createConverterToAvro(row.schema, STRUCT_NAME, NAMESPACE)
+      if (null == converterFn) converterFn = AvroConversionUtils.createConverterToAvro(row.schema, STRUCT_NAME, NAMESPACE)
       val genericRecord = converterFn.apply(row).asInstanceOf[GenericRecord]
       getKey(genericRecord).getPartitionPath
     }
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieFileIndex.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieFileIndex.scala
index 4896ddf07fda2..fa07c573f2725 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieFileIndex.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieFileIndex.scala
@@ -18,6 +18,7 @@
 package org.apache.hudi
 
 import org.apache.hadoop.conf.Configuration
+
 import org.apache.hudi.DataSourceWriteOptions._
 import org.apache.hudi.client.HoodieJavaWriteClient
 import org.apache.hudi.client.common.HoodieJavaEngineContext
@@ -26,27 +27,32 @@ import org.apache.hudi.common.engine.EngineType
 import org.apache.hudi.common.model.{HoodieRecord, HoodieTableType}
 import org.apache.hudi.common.table.HoodieTableMetaClient
 import org.apache.hudi.common.table.view.HoodieTableFileSystemView
-import org.apache.hudi.common.testutils.{HoodieTestDataGenerator, HoodieTestUtils}
 import org.apache.hudi.common.testutils.HoodieTestTable.makeNewCommitTime
 import org.apache.hudi.common.testutils.RawTripTestPayload.recordsToStrings
+import org.apache.hudi.common.testutils.{HoodieTestDataGenerator, HoodieTestUtils}
 import org.apache.hudi.common.util.PartitionPathEncodeUtils
 import org.apache.hudi.common.util.StringUtils.isNullOrEmpty
 import org.apache.hudi.config.HoodieWriteConfig
 import org.apache.hudi.keygen.ComplexKeyGenerator
-import org.apache.hudi.keygen.TimestampBasedAvroKeyGenerator.{Config, TimestampType}
+import org.apache.hudi.keygen.TimestampBasedAvroKeyGenerator.TimestampType
+import org.apache.hudi.keygen.constant.KeyGeneratorOptions.Config
 import org.apache.hudi.testutils.HoodieClientTestBase
+
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, EqualTo, GreaterThanOrEqual, LessThan, Literal}
 import org.apache.spark.sql.execution.datasources.PartitionDirectory
 import org.apache.spark.sql.functions.{lit, struct}
 import org.apache.spark.sql.types.StringType
 import org.apache.spark.sql.{DataFrameWriter, Row, SaveMode, SparkSession}
+
 import org.junit.jupiter.api.Assertions.assertEquals
 import org.junit.jupiter.api.{BeforeEach, Test}
 import org.junit.jupiter.params.ParameterizedTest
 import org.junit.jupiter.params.provider.{Arguments, CsvSource, MethodSource, ValueSource}
 
 import java.util.Properties
+
+
 import scala.collection.JavaConversions._
 import scala.collection.JavaConverters._
 
@@ -213,8 +219,11 @@ class TestHoodieFileIndex extends HoodieClientTestBase {
       GreaterThanOrEqual(attribute("partition"), literal("2021/03/08")),
       LessThan(attribute("partition"), literal("2021/03/10"))
     )
-    val prunedPartitions = fileIndex.listFiles(Seq(partitionFilter2),
-      Seq.empty).map(_.values.toSeq(Seq(StringType)).mkString(",")).toList
+    val prunedPartitions = fileIndex.listFiles(Seq(partitionFilter2), Seq.empty)
+      .map(_.values.toSeq(Seq(StringType))
+      .mkString(","))
+      .toList
+      .sorted
 
     assertEquals(List("2021/03/08", "2021/03/09"), prunedPartitions)
   }
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala
index bd520c91f4fa5..b5186fb1ac089 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala
@@ -32,13 +32,13 @@ import org.apache.hudi.functional.TestBootstrap
 import org.apache.hudi.hive.HiveSyncConfig
 import org.apache.hudi.keygen.{ComplexKeyGenerator, NonpartitionedKeyGenerator, SimpleKeyGenerator}
 import org.apache.hudi.testutils.DataSourceTestUtils
-import org.apache.spark.SparkContext
 import org.apache.spark.api.java.JavaSparkContext
 import org.apache.spark.sql._
 import org.apache.spark.sql.functions.{expr, lit}
 import org.apache.spark.sql.hudi.HoodieSparkSessionExtension
 import org.apache.spark.sql.hudi.command.SqlKeyGenerator
 import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
+import org.apache.spark.{SparkConf, SparkContext}
 import org.junit.jupiter.api.Assertions.{assertEquals, assertFalse, assertTrue, fail}
 import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
 import org.junit.jupiter.params.ParameterizedTest
@@ -94,11 +94,17 @@ class TestHoodieSparkSqlWriter {
    * Utility method for initializing the spark context.
    */
   def initSparkContext(): Unit = {
+    val sparkConf = new SparkConf()
+    if (HoodieSparkUtils.gteqSpark3_2) {
+      sparkConf.set("spark.sql.catalog.spark_catalog",
+        "org.apache.spark.sql.hudi.catalog.HoodieCatalog")
+    }
     spark = SparkSession.builder()
       .appName(hoodieFooTableName)
       .master("local[2]")
       .withExtensions(new HoodieSparkSessionExtension)
       .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+      .config(sparkConf)
       .getOrCreate()
     sc = spark.sparkContext
     sc.setLogLevel("ERROR")
@@ -543,6 +549,12 @@ class TestHoodieSparkSqlWriter {
 
       // Verify that HoodieWriteClient is closed correctly
       verify(client, times(1)).close()
+
+      val ignoreResult = HoodieSparkSqlWriter.bootstrap(sqlContext, SaveMode.Ignore, fooTableModifier, spark.emptyDataFrame, Option.empty,
+        Option(client))
+      assertFalse(ignoreResult)
+      verify(client, times(2)).close()
+
       // fetch all records from parquet files generated from write to hudi
       val actualDf = sqlContext.read.parquet(tempBasePath)
       assert(actualDf.count == 100)
@@ -815,33 +827,32 @@ class TestHoodieSparkSqlWriter {
   /**
    * Test case for non partition table with metatable support.
    */
-  @Test
-  def testNonPartitionTableWithMetatableSupport(): Unit = {
-    List(DataSourceWriteOptions.COW_TABLE_TYPE_OPT_VAL, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL).foreach { tableType =>
-      val options = Map(DataSourceWriteOptions.TABLE_TYPE.key -> tableType,
-        DataSourceWriteOptions.PRECOMBINE_FIELD.key -> "col3",
-        DataSourceWriteOptions.RECORDKEY_FIELD.key -> "keyid",
-        DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "",
-        DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME.key -> "org.apache.hudi.keygen.NonpartitionedKeyGenerator",
-        HoodieWriteConfig.TBL_NAME.key -> "hoodie_test",
-        "hoodie.insert.shuffle.parallelism" -> "1",
-        "hoodie.metadata.enable" -> "true")
-      val df = spark.range(0, 10).toDF("keyid")
-        .withColumn("col3", expr("keyid"))
-        .withColumn("age", expr("keyid + 1000"))
-      df.write.format("hudi")
-        .options(options.updated(DataSourceWriteOptions.OPERATION.key, "insert"))
-        .mode(SaveMode.Overwrite).save(tempBasePath)
-      // upsert same record again
-      val df_update = spark.range(0, 10).toDF("keyid")
-        .withColumn("col3", expr("keyid"))
-        .withColumn("age", expr("keyid + 2000"))
-      df_update.write.format("hudi")
-        .options(options.updated(DataSourceWriteOptions.OPERATION.key, "upsert"))
-        .mode(SaveMode.Append).save(tempBasePath)
-      assert(spark.read.format("hudi").load(tempBasePath).count() == 10)
-      assert(spark.read.format("hudi").load(tempBasePath).where("age >= 2000").count() == 10)
-    }
+  @ParameterizedTest
+  @EnumSource(value = classOf[HoodieTableType])
+  def testNonPartitionTableWithMetatableSupport(tableType: HoodieTableType): Unit = {
+    val options = Map(DataSourceWriteOptions.TABLE_TYPE.key -> tableType.name,
+      DataSourceWriteOptions.PRECOMBINE_FIELD.key -> "col3",
+      DataSourceWriteOptions.RECORDKEY_FIELD.key -> "keyid",
+      DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "",
+      DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME.key -> "org.apache.hudi.keygen.NonpartitionedKeyGenerator",
+      HoodieWriteConfig.TBL_NAME.key -> "hoodie_test",
+      "hoodie.insert.shuffle.parallelism" -> "1",
+      "hoodie.metadata.enable" -> "true")
+    val df = spark.range(0, 10).toDF("keyid")
+      .withColumn("col3", expr("keyid"))
+      .withColumn("age", expr("keyid + 1000"))
+    df.write.format("hudi")
+      .options(options.updated(DataSourceWriteOptions.OPERATION.key, "insert"))
+      .mode(SaveMode.Overwrite).save(tempBasePath)
+    // upsert same record again
+    val df_update = spark.range(0, 10).toDF("keyid")
+      .withColumn("col3", expr("keyid"))
+      .withColumn("age", expr("keyid + 2000"))
+    df_update.write.format("hudi")
+      .options(options.updated(DataSourceWriteOptions.OPERATION.key, "upsert"))
+      .mode(SaveMode.Append).save(tempBasePath)
+    assert(spark.read.format("hudi").load(tempBasePath).count() == 10)
+    assert(spark.read.format("hudi").load(tempBasePath).where("age >= 2000").count() == 10)
   }
 
   /**
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkUtils.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkUtils.scala
index ad974286ac5a7..9f00b5dcdf64f 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkUtils.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkUtils.scala
@@ -18,11 +18,13 @@
 
 package org.apache.hudi
 
+import org.apache.avro.Schema
 import org.apache.avro.generic.GenericRecord
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
+import org.apache.hudi.exception.SchemaCompatibilityException
 import org.apache.hudi.testutils.DataSourceTestUtils
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.types.{StructType, TimestampType}
 import org.apache.spark.sql.{Row, SparkSession}
 import org.junit.jupiter.api.Assertions._
 import org.junit.jupiter.api.Test
@@ -185,7 +187,7 @@ class TestHoodieSparkUtils {
     val genRecRDD3 = HoodieSparkUtils.createRdd(df1, "test_struct_name", "test_namespace", true,
       org.apache.hudi.common.util.Option.of(schema2))
     assert(genRecRDD3.collect()(0).getSchema.equals(schema2))
-    genRecRDD3.foreach(entry => assertNull(entry.get("nonNullableInnerStruct2")))
+    genRecRDD3.foreach(entry => assertNull(entry.get("nullableInnerStruct2")))
 
     val innerStruct3 = new StructType().add("innerKey","string",false).add("innerValue", "long", true)
       .add("new_nested_col","string",true)
@@ -226,12 +228,36 @@ class TestHoodieSparkUtils {
       fail("createRdd should fail, because records don't have a column which is not nullable in the passed in schema")
     } catch {
       case e: Exception =>
-        e.getCause.asInstanceOf[NullPointerException]
-        assertTrue(e.getMessage.contains("null of string in field new_nested_col of"))
+        val cause = e.getCause
+        assertTrue(cause.isInstanceOf[SchemaCompatibilityException])
+        assertTrue(e.getMessage.contains("Unable to validate the rewritten record {\"innerKey\": \"innerKey1_2\", \"innerValue\": 2} against schema"))
     }
     spark.stop()
   }
 
+  @Test
+  def testGetRequiredSchema(): Unit = {
+    val avroSchemaString = "{\"type\":\"record\",\"name\":\"record\"," +
+    "\"fields\":[{\"name\":\"_hoodie_commit_time\",\"type\":[\"null\",\"string\"],\"doc\":\"\",\"default\":null}," +
+    "{\"name\":\"_hoodie_commit_seqno\",\"type\":[\"null\",\"string\"],\"doc\":\"\",\"default\":null}," +
+    "{\"name\":\"_hoodie_record_key\",\"type\":[\"null\",\"string\"],\"doc\":\"\",\"default\":null}," +
+    "{\"name\":\"_hoodie_partition_path\",\"type\":[\"null\",\"string\"],\"doc\":\"\",\"default\":null}," +
+    "{\"name\":\"_hoodie_file_name\",\"type\":[\"null\",\"string\"],\"doc\":\"\",\"default\":null}," +
+    "{\"name\":\"uuid\",\"type\":\"string\"},{\"name\":\"name\",\"type\":[\"null\",\"string\"],\"default\":null}," +
+    "{\"name\":\"age\",\"type\":[\"null\",\"int\"],\"default\":null}," +
+    "{\"name\":\"ts\",\"type\":[\"null\",{\"type\":\"long\",\"logicalType\":\"timestamp-millis\"}],\"default\":null}," +
+    "{\"name\":\"partition\",\"type\":[\"null\",\"string\"],\"default\":null}]}"
+
+    val tableAvroSchema = new Schema.Parser().parse(avroSchemaString)
+
+    val (requiredAvroSchema, requiredStructSchema) =
+      HoodieSparkUtils.getRequiredSchema(tableAvroSchema, Array("ts"))
+
+    assertEquals("timestamp-millis",
+      requiredAvroSchema.getField("ts").schema().getTypes.get(1).getLogicalType.getName)
+    assertEquals(TimestampType, requiredStructSchema.fields(0).dataType)
+  }
+
   def convertRowListToSeq(inputList: java.util.List[Row]): Seq[Row] =
     JavaConverters.asScalaIteratorConverter(inputList.iterator).asScala.toSeq
 }
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestMergeOnReadSnapshotRelation.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestMergeOnReadSnapshotRelation.scala
deleted file mode 100644
index 80a883a001d98..0000000000000
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestMergeOnReadSnapshotRelation.scala
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hudi
-
-import org.apache.avro.Schema
-import org.apache.spark.sql.types.TimestampType
-import org.junit.jupiter.api.Assertions.assertEquals
-import org.junit.jupiter.api.Test
-
-class TestMergeOnReadSnapshotRelation {
-
-  @Test
-  def testGetRequiredSchema(): Unit = {
-    val avroSchemaString = "{\"type\":\"record\",\"name\":\"record\"," +
-      "\"fields\":[{\"name\":\"_hoodie_commit_time\",\"type\":[\"null\",\"string\"],\"doc\":\"\",\"default\":null}," +
-      "{\"name\":\"_hoodie_commit_seqno\",\"type\":[\"null\",\"string\"],\"doc\":\"\",\"default\":null}," +
-      "{\"name\":\"_hoodie_record_key\",\"type\":[\"null\",\"string\"],\"doc\":\"\",\"default\":null}," +
-      "{\"name\":\"_hoodie_partition_path\",\"type\":[\"null\",\"string\"],\"doc\":\"\",\"default\":null}," +
-      "{\"name\":\"_hoodie_file_name\",\"type\":[\"null\",\"string\"],\"doc\":\"\",\"default\":null}," +
-      "{\"name\":\"uuid\",\"type\":\"string\"},{\"name\":\"name\",\"type\":[\"null\",\"string\"],\"default\":null}," +
-      "{\"name\":\"age\",\"type\":[\"null\",\"int\"],\"default\":null}," +
-      "{\"name\":\"ts\",\"type\":[\"null\",{\"type\":\"long\",\"logicalType\":\"timestamp-millis\"}],\"default\":null}," +
-      "{\"name\":\"partition\",\"type\":[\"null\",\"string\"],\"default\":null}]}"
-
-    val tableAvroSchema = new Schema.Parser().parse(avroSchemaString)
-
-    val (requiredAvroSchema, requiredStructSchema) =
-      MergeOnReadSnapshotRelation.getRequiredSchema(tableAvroSchema, Array("ts"))
-
-    assertEquals("timestamp-millis",
-      requiredAvroSchema.getField("ts").schema().getTypes.get(1).getLogicalType.getName)
-    assertEquals(TimestampType, requiredStructSchema.fields(0).dataType)
-  }
-}
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestTableSchemaResolverWithSparkSQL.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestTableSchemaResolverWithSparkSQL.scala
new file mode 100644
index 0000000000000..85e1925bc1655
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestTableSchemaResolverWithSparkSQL.scala
@@ -0,0 +1,236 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi
+
+import org.apache.avro.Schema
+import org.apache.commons.io.FileUtils
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.hudi.avro.HoodieAvroUtils
+import org.apache.hudi.avro.model.HoodieMetadataRecord
+import org.apache.hudi.common.model._
+import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
+import org.apache.hudi.config.HoodieWriteConfig
+import org.apache.hudi.testutils.DataSourceTestUtils
+import org.apache.spark.SparkContext
+import org.apache.spark.sql._
+import org.apache.spark.sql.hudi.HoodieSparkSessionExtension
+import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue}
+import org.junit.jupiter.api.{AfterEach, BeforeEach, Tag, Test}
+import org.junit.jupiter.params.ParameterizedTest
+import org.junit.jupiter.params.provider.CsvSource
+
+import scala.collection.JavaConverters
+
+/**
+ * Test suite for TableSchemaResolver with SparkSqlWriter.
+ */
+@Tag("functional")
+class TestTableSchemaResolverWithSparkSQL {
+  var spark: SparkSession = _
+  var sqlContext: SQLContext = _
+  var sc: SparkContext = _
+  var tempPath: java.nio.file.Path = _
+  var tempBootStrapPath: java.nio.file.Path = _
+  var hoodieFooTableName = "hoodie_foo_tbl"
+  var tempBasePath: String = _
+  var commonTableModifier: Map[String, String] = Map()
+
+  case class StringLongTest(uuid: String, ts: Long)
+
+  /**
+   * Setup method running before each test.
+   */
+  @BeforeEach
+  def setUp(): Unit = {
+    initSparkContext()
+    tempPath = java.nio.file.Files.createTempDirectory("hoodie_test_path")
+    tempBootStrapPath = java.nio.file.Files.createTempDirectory("hoodie_test_bootstrap")
+    tempBasePath = tempPath.toAbsolutePath.toString
+    commonTableModifier = getCommonParams(tempPath, hoodieFooTableName, HoodieTableType.COPY_ON_WRITE.name())
+  }
+
+  /**
+   * Tear down method running after each test.
+   */
+  @AfterEach
+  def tearDown(): Unit = {
+    cleanupSparkContexts()
+    FileUtils.deleteDirectory(tempPath.toFile)
+    FileUtils.deleteDirectory(tempBootStrapPath.toFile)
+  }
+
+  /**
+   * Utility method for initializing the spark context.
+   */
+  def initSparkContext(): Unit = {
+    spark = SparkSession.builder()
+      .appName(hoodieFooTableName)
+      .master("local[2]")
+      .withExtensions(new HoodieSparkSessionExtension)
+      .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+      .getOrCreate()
+    sc = spark.sparkContext
+    sc.setLogLevel("ERROR")
+    sqlContext = spark.sqlContext
+  }
+
+  /**
+   * Utility method for cleaning up spark resources.
+   */
+  def cleanupSparkContexts(): Unit = {
+    if (sqlContext != null) {
+      sqlContext.clearCache();
+      sqlContext = null;
+    }
+    if (sc != null) {
+      sc.stop()
+      sc = null
+    }
+    if (spark != null) {
+      spark.close()
+    }
+  }
+
+  /**
+   * Utility method for creating common params for writer.
+   *
+   * @param path               Path for hoodie table
+   * @param hoodieFooTableName Name of hoodie table
+   * @param tableType          Type of table
+   * @return Map of common params
+   */
+  def getCommonParams(path: java.nio.file.Path, hoodieFooTableName: String, tableType: String): Map[String, String] = {
+    Map("path" -> path.toAbsolutePath.toString,
+      HoodieWriteConfig.TBL_NAME.key -> hoodieFooTableName,
+      "hoodie.insert.shuffle.parallelism" -> "1",
+      "hoodie.upsert.shuffle.parallelism" -> "1",
+      DataSourceWriteOptions.TABLE_TYPE.key -> tableType,
+      DataSourceWriteOptions.RECORDKEY_FIELD.key -> "_row_key",
+      DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "partition",
+      DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME.key -> "org.apache.hudi.keygen.SimpleKeyGenerator")
+  }
+
+  /**
+   * Utility method for converting list of Row to list of Seq.
+   *
+   * @param inputList list of Row
+   * @return list of Seq
+   */
+  def convertRowListToSeq(inputList: java.util.List[Row]): Seq[Row] =
+    JavaConverters.asScalaIteratorConverter(inputList.iterator).asScala.toSeq
+
+  @Test
+  def testTableSchemaResolverInMetadataTable(): Unit = {
+    val schema = DataSourceTestUtils.getStructTypeExampleSchema
+    //create a new table
+    val tableName = hoodieFooTableName
+    val fooTableModifier = Map("path" -> tempPath.toAbsolutePath.toString,
+      HoodieWriteConfig.TBL_NAME.key -> tableName,
+      "hoodie.avro.schema" -> schema.toString(),
+      "hoodie.insert.shuffle.parallelism" -> "1",
+      "hoodie.upsert.shuffle.parallelism" -> "1",
+      DataSourceWriteOptions.RECORDKEY_FIELD.key -> "_row_key",
+      DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "partition",
+      DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME.key -> "org.apache.hudi.keygen.SimpleKeyGenerator",
+      "hoodie.metadata.compact.max.delta.commits" -> "2",
+      HoodieWriteConfig.ALLOW_OPERATION_METADATA_FIELD.key -> "true"
+    )
+
+    // generate the inserts
+    val structType = AvroConversionUtils.convertAvroSchemaToStructType(schema)
+    val records = DataSourceTestUtils.generateRandomRows(10)
+    val recordsSeq = convertRowListToSeq(records)
+    val df1 = spark.createDataFrame(sc.parallelize(recordsSeq), structType)
+    HoodieSparkSqlWriter.write(sqlContext, SaveMode.Overwrite, fooTableModifier, df1)
+
+    // do update
+    HoodieSparkSqlWriter.write(sqlContext, SaveMode.Append, fooTableModifier, df1)
+
+    val metadataTablePath = tempPath.toAbsolutePath.toString + "/.hoodie/metadata"
+    val metaClient = HoodieTableMetaClient.builder()
+      .setBasePath(metadataTablePath)
+      .setConf(spark.sessionState.newHadoopConf())
+      .build()
+
+    // Delete latest metadata table deltacommit
+    // Get schema from metadata table hfile format base file.
+    val latestInstant = metaClient.getActiveTimeline.getCommitsTimeline.getReverseOrderedInstants.findFirst()
+    val path = new Path(metadataTablePath + "/.hoodie", latestInstant.get().getFileName)
+    val fs = path.getFileSystem(new Configuration())
+    fs.delete(path, false)
+    schemaValuationBasedOnDataFile(metaClient, HoodieMetadataRecord.getClassSchema.toString())
+  }
+
+  @ParameterizedTest
+  @CsvSource(Array("COPY_ON_WRITE,parquet", "COPY_ON_WRITE,orc", "COPY_ON_WRITE,hfile",
+    "MERGE_ON_READ,parquet", "MERGE_ON_READ,orc", "MERGE_ON_READ,hfile"))
+  def testTableSchemaResolver(tableType: String, baseFileFormat: String): Unit = {
+    val schema = DataSourceTestUtils.getStructTypeExampleSchema
+
+    //create a new table
+    val tableName = hoodieFooTableName
+    val fooTableModifier = Map("path" -> tempPath.toAbsolutePath.toString,
+      HoodieWriteConfig.BASE_FILE_FORMAT.key -> baseFileFormat,
+      DataSourceWriteOptions.TABLE_TYPE.key -> tableType,
+      HoodieWriteConfig.TBL_NAME.key -> tableName,
+      "hoodie.avro.schema" -> schema.toString(),
+      "hoodie.insert.shuffle.parallelism" -> "1",
+      "hoodie.upsert.shuffle.parallelism" -> "1",
+      DataSourceWriteOptions.RECORDKEY_FIELD.key -> "_row_key",
+      DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "partition",
+      DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME.key -> "org.apache.hudi.keygen.SimpleKeyGenerator",
+      HoodieWriteConfig.ALLOW_OPERATION_METADATA_FIELD.key -> "true"
+    )
+
+    // generate the inserts
+    val structType = AvroConversionUtils.convertAvroSchemaToStructType(schema)
+    val records = DataSourceTestUtils.generateRandomRows(10)
+    val recordsSeq = convertRowListToSeq(records)
+    val df1 = spark.createDataFrame(sc.parallelize(recordsSeq), structType)
+    HoodieSparkSqlWriter.write(sqlContext, SaveMode.Overwrite, fooTableModifier, df1)
+
+    val metaClient = HoodieTableMetaClient.builder()
+      .setBasePath(tempPath.toAbsolutePath.toString)
+      .setConf(spark.sessionState.newHadoopConf())
+      .build()
+
+    assertTrue(new TableSchemaResolver(metaClient).isHasOperationField)
+    schemaValuationBasedOnDataFile(metaClient, schema.toString())
+  }
+
+  /**
+   * Test and valuate schema read from data file --> getTableAvroSchemaFromDataFile
+   * @param metaClient
+   * @param schemaString
+   */
+  def schemaValuationBasedOnDataFile(metaClient: HoodieTableMetaClient, schemaString: String): Unit = {
+    metaClient.reloadActiveTimeline()
+    var tableSchemaResolverParsingException: Exception = null
+    try {
+      val schemaFromData = new TableSchemaResolver(metaClient).getTableAvroSchemaFromDataFile
+      val structFromData = AvroConversionUtils.convertAvroSchemaToStructType(HoodieAvroUtils.removeMetadataFields(schemaFromData))
+      val schemeDesign = new Schema.Parser().parse(schemaString)
+      val structDesign = AvroConversionUtils.convertAvroSchemaToStructType(schemeDesign)
+      assertEquals(structFromData, structDesign)
+    } catch {
+      case e: Exception => tableSchemaResolverParsingException = e;
+    }
+    assert(tableSchemaResolverParsingException == null)
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala
index e8b179804dfca..96d50f6b57b80 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala
@@ -19,22 +19,24 @@ package org.apache.hudi.functional
 
 import org.apache.hadoop.fs.FileSystem
 import org.apache.hudi.common.config.HoodieMetadataConfig
+import org.apache.hudi.common.model.HoodieRecord
 import org.apache.hudi.common.table.timeline.HoodieInstant
 import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator
 import org.apache.hudi.common.testutils.RawTripTestPayload.{deleteRecordsToStrings, recordsToStrings}
 import org.apache.hudi.config.HoodieWriteConfig
-import org.apache.hudi.exception.HoodieUpsertException
-import org.apache.hudi.keygen.TimestampBasedAvroKeyGenerator.Config
+import org.apache.hudi.exception.{HoodieException, HoodieUpsertException}
 import org.apache.hudi.keygen._
+import org.apache.hudi.keygen.constant.KeyGeneratorOptions.Config
 import org.apache.hudi.testutils.HoodieClientTestBase
-import org.apache.hudi.{AvroConversionUtils, DataSourceReadOptions, DataSourceWriteOptions, HoodieDataSourceHelpers}
+import org.apache.hudi.{AvroConversionUtils, DataSourceReadOptions, DataSourceWriteOptions, HoodieDataSourceHelpers, HoodieMergeOnReadRDD}
 import org.apache.spark.sql._
 import org.apache.spark.sql.functions.{col, concat, lit, udf}
 import org.apache.spark.sql.types._
 import org.joda.time.DateTime
 import org.joda.time.format.DateTimeFormat
-import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue, fail}
+import org.junit.jupiter.api.Assertions.{assertEquals, assertThrows, assertTrue, fail}
+import org.junit.jupiter.api.function.Executable
 import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
 import org.junit.jupiter.params.ParameterizedTest
 import org.junit.jupiter.params.provider.{CsvSource, ValueSource}
@@ -57,7 +59,8 @@ class TestCOWDataSource extends HoodieClientTestBase {
     DataSourceWriteOptions.RECORDKEY_FIELD.key -> "_row_key",
     DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "partition",
     DataSourceWriteOptions.PRECOMBINE_FIELD.key -> "timestamp",
-    HoodieWriteConfig.TBL_NAME.key -> "hoodie_test"
+    HoodieWriteConfig.TBL_NAME.key -> "hoodie_test",
+    HoodieMetadataConfig.COMPACT_NUM_DELTA_COMMITS.key -> "1"
   )
 
   val verificationCol: String = "driver"
@@ -92,6 +95,79 @@ class TestCOWDataSource extends HoodieClientTestBase {
     assertTrue(HoodieDataSourceHelpers.hasNewCommits(fs, basePath, "000"))
   }
 
+
+  @Test def testHoodieIsDeletedNonBooleanField() {
+    // Insert Operation
+    val records = recordsToStrings(dataGen.generateInserts("000", 100)).toList
+    val inputDF = spark.read.json(spark.sparkContext.parallelize(records, 2))
+    val df = inputDF.withColumn(HoodieRecord.HOODIE_IS_DELETED, lit("abc"))
+
+    assertThrows(classOf[HoodieException], new Executable {
+      override def execute(): Unit = {
+        df.write.format("hudi")
+          .options(commonOpts)
+          .mode(SaveMode.Overwrite)
+          .save(basePath)
+      }
+    }, "Should have failed since _hoodie_is_deleted is not a BOOLEAN data type")
+  }
+
+  /**
+   * This tests the case that query by with a specified partition condition on hudi table which is
+   * different between the value of the partition field and the actual partition path,
+   * like hudi table written by TimestampBasedKeyGenerator.
+   *
+   * For COW table, test the snapshot query mode and incremental query mode.
+   */
+  @Test
+  def testPrunePartitionForTimestampBasedKeyGenerator(): Unit = {
+    val options = commonOpts ++ Map(
+      "hoodie.compact.inline" -> "false",
+      DataSourceWriteOptions.TABLE_TYPE.key -> DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL,
+      DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME.key -> "org.apache.hudi.keygen.TimestampBasedKeyGenerator",
+      Config.TIMESTAMP_TYPE_FIELD_PROP -> "DATE_STRING",
+      Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP -> "yyyy/MM/dd",
+      Config.TIMESTAMP_TIMEZONE_FORMAT_PROP -> "GMT+8:00",
+      Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP -> "yyyy-MM-dd"
+    )
+
+    val dataGen1 = new HoodieTestDataGenerator(Array("2022-01-01"))
+    val records1 = recordsToStrings(dataGen1.generateInserts("001", 20)).toList
+    val inputDF1 = spark.read.json(spark.sparkContext.parallelize(records1, 2))
+    inputDF1.write.format("org.apache.hudi")
+      .options(options)
+      .mode(SaveMode.Overwrite)
+      .save(basePath)
+    metaClient = HoodieTableMetaClient.builder()
+      .setBasePath(basePath)
+      .setConf(spark.sessionState.newHadoopConf)
+      .build()
+    val commit1Time = metaClient.getActiveTimeline.lastInstant().get().getTimestamp
+
+    val dataGen2 = new HoodieTestDataGenerator(Array("2022-01-02"))
+    val records2 = recordsToStrings(dataGen2.generateInserts("002", 30)).toList
+    val inputDF2 = spark.read.json(spark.sparkContext.parallelize(records2, 2))
+    inputDF2.write.format("org.apache.hudi")
+      .options(options)
+      .mode(SaveMode.Append)
+      .save(basePath)
+    val commit2Time = metaClient.reloadActiveTimeline.lastInstant().get().getTimestamp
+
+    // snapshot query
+    val snapshotQueryRes = spark.read.format("hudi").load(basePath)
+    assertEquals(snapshotQueryRes.where("partition = '2022-01-01'").count, 20)
+    assertEquals(snapshotQueryRes.where("partition = '2022-01-02'").count, 30)
+
+    // incremental query
+    val incrementalQueryRes = spark.read.format("hudi")
+      .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL)
+      .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, commit1Time)
+      .option(DataSourceReadOptions.END_INSTANTTIME.key, commit2Time)
+      .load(basePath)
+    assertEquals(incrementalQueryRes.where("partition = '2022-01-01'").count, 0)
+    assertEquals(incrementalQueryRes.where("partition = '2022-01-02'").count, 30)
+  }
+
   /**
    * Test for https://issues.apache.org/jira/browse/HUDI-1615. Null Schema in BulkInsert row writer flow.
    * This was reported by customer when archival kicks in as the schema in commit metadata is not set for bulk_insert
@@ -156,7 +232,7 @@ class TestCOWDataSource extends HoodieClientTestBase {
 
     val snapshotDF2 = spark.read.format("org.apache.hudi")
       .load(basePath + "/*/*/*/*")
-    assertEquals(snapshotDF1.count() - inputDF2.count(), snapshotDF2.count())
+    assertEquals(snapshotDF2.count(), 80)
   }
 
   @Test def testOverWriteModeUseReplaceAction(): Unit = {
@@ -404,15 +480,10 @@ class TestCOWDataSource extends HoodieClientTestBase {
   }
 
   private def getDataFrameWriter(keyGenerator: String): DataFrameWriter[Row] = {
-    getDataFrameWriter(keyGenerator, true)
-  }
-
-  private def getDataFrameWriter(keyGenerator: String, enableMetadata: Boolean): DataFrameWriter[Row] = {
     val records = recordsToStrings(dataGen.generateInserts("000", 100)).toList
     val inputDF = spark.read.json(spark.sparkContext.parallelize(records, 2))
-    val opts = commonOpts ++ Map(HoodieMetadataConfig.ENABLE.key() -> String.valueOf(enableMetadata))
     inputDF.write.format("hudi")
-      .options(opts)
+      .options(commonOpts)
       .option(DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME.key, keyGenerator)
       .mode(SaveMode.Overwrite)
   }
@@ -440,7 +511,7 @@ class TestCOWDataSource extends HoodieClientTestBase {
     assertTrue(recordsReadDF.filter(col("_hoodie_partition_path") =!= udf_date_format(col("current_ts"))).count() == 0)
 
     // Mixed fieldType
-    writer = getDataFrameWriter(classOf[CustomKeyGenerator].getName, false)
+    writer = getDataFrameWriter(classOf[CustomKeyGenerator].getName)
     writer.partitionBy("driver", "rider:SIMPLE", "current_ts:TIMESTAMP")
       .option(Config.TIMESTAMP_TYPE_FIELD_PROP, "EPOCHMILLISECONDS")
       .option(Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP, "yyyyMMdd")
@@ -452,7 +523,7 @@ class TestCOWDataSource extends HoodieClientTestBase {
       concat(col("driver"), lit("/"), col("rider"), lit("/"), udf_date_format(col("current_ts")))).count() == 0)
 
     // Test invalid partitionKeyType
-    writer = getDataFrameWriter(classOf[CustomKeyGenerator].getName, false)
+    writer = getDataFrameWriter(classOf[CustomKeyGenerator].getName)
     writer = writer.partitionBy("current_ts:DUMMY")
       .option(Config.TIMESTAMP_TYPE_FIELD_PROP, "EPOCHMILLISECONDS")
       .option(Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP, "yyyyMMdd")
@@ -708,6 +779,89 @@ class TestCOWDataSource extends HoodieClientTestBase {
     assertEquals(numRecords - numRecordsToDelete, snapshotDF2.count())
   }
 
+  @Test def testFailEarlyForIncrViewQueryForNonExistingFiles(): Unit = {
+    // Create 10 commits
+    for (i <- 1 to 10) {
+      val records = recordsToStrings(dataGen.generateInserts("%05d".format(i), 100)).toList
+      val inputDF = spark.read.json(spark.sparkContext.parallelize(records, 2))
+      inputDF.write.format("org.apache.hudi")
+        .options(commonOpts)
+        .option("hoodie.cleaner.commits.retained", "3")
+        .option("hoodie.keep.min.commits", "4")
+        .option("hoodie.keep.max.commits", "5")
+        .option(DataSourceWriteOptions.OPERATION.key(), DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
+        .mode(SaveMode.Append)
+        .save(basePath)
+    }
+
+    val hoodieMetaClient = HoodieTableMetaClient.builder().setConf(spark.sparkContext.hadoopConfiguration).setBasePath(basePath).setLoadActiveTimelineOnLoad(true).build()
+    /**
+      * State of timeline after 10 commits
+      * +------------------+--------------------------------------+
+      * |     Archived     |            Active Timeline           |
+      * +------------------+--------------+-----------------------+
+      * | C0   C1   C2  C3 |    C4   C5   |   C6    C7   C8   C9  |
+      * +------------------+--------------+-----------------------+
+      * |          Data cleaned           |  Data exists in table |
+      * +---------------------------------+-----------------------+
+      */
+
+    val completedCommits = hoodieMetaClient.getCommitsTimeline.filterCompletedInstants() // C4 to C9
+    //Anything less than 2 is a valid commit in the sense no cleanup has been done for those commit files
+    var startTs = completedCommits.nthInstant(0).get().getTimestamp //C4
+    var endTs = completedCommits.nthInstant(1).get().getTimestamp //C5
+
+    //Calling without the fallback should result in Path does not exist
+    var hoodieIncViewDF = spark.read.format("org.apache.hudi")
+      .option(DataSourceReadOptions.QUERY_TYPE.key(), DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL)
+      .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key(), startTs)
+      .option(DataSourceReadOptions.END_INSTANTTIME.key(), endTs)
+      .load(basePath)
+
+    val msg = "Should fail with Path does not exist"
+    assertThrows(classOf[AnalysisException], new Executable {
+      override def execute(): Unit = {
+        hoodieIncViewDF.count()
+      }
+    }, msg)
+
+    //Should work with fallback enabled
+    hoodieIncViewDF = spark.read.format("org.apache.hudi")
+      .option(DataSourceReadOptions.QUERY_TYPE.key(), DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL)
+      .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key(), startTs)
+      .option(DataSourceReadOptions.END_INSTANTTIME.key(), endTs)
+      .option(DataSourceReadOptions.INCREMENTAL_FALLBACK_TO_FULL_TABLE_SCAN_FOR_NON_EXISTING_FILES.key(), "true")
+      .load(basePath)
+    assertEquals(100, hoodieIncViewDF.count())
+
+    //Test out for archived commits
+    val archivedInstants = hoodieMetaClient.getArchivedTimeline.getInstants.distinct().toArray
+    startTs = archivedInstants(0).asInstanceOf[HoodieInstant].getTimestamp //C0
+    endTs = completedCommits.nthInstant(1).get().getTimestamp //C5
+
+    //Calling without the fallback should result in Path does not exist
+    hoodieIncViewDF = spark.read.format("org.apache.hudi")
+      .option(DataSourceReadOptions.QUERY_TYPE.key(), DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL)
+      .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key(), startTs)
+      .option(DataSourceReadOptions.END_INSTANTTIME.key(), endTs)
+      .load(basePath)
+
+    assertThrows(classOf[AnalysisException], new Executable {
+      override def execute(): Unit = {
+        hoodieIncViewDF.count()
+      }
+    }, msg)
+
+    //Should work with fallback enabled
+    hoodieIncViewDF = spark.read.format("org.apache.hudi")
+      .option(DataSourceReadOptions.QUERY_TYPE.key(), DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL)
+      .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key(), startTs)
+      .option(DataSourceReadOptions.END_INSTANTTIME.key(), endTs)
+      .option(DataSourceReadOptions.INCREMENTAL_FALLBACK_TO_FULL_TABLE_SCAN_FOR_NON_EXISTING_FILES.key(), "true")
+      .load(basePath)
+    assertEquals(500, hoodieIncViewDF.count())
+  }
+
   def copyOnWriteTableSelect(enableDropPartitionColumns: Boolean): Boolean = {
     val records1 = recordsToStrings(dataGen.generateInsertsContainsAllPartitions("000", 3)).toList
     val inputDF1 = spark.read.json(spark.sparkContext.parallelize(records1, 2))
@@ -748,4 +902,48 @@ class TestCOWDataSource extends HoodieClientTestBase {
     assertEquals(inputDF2.sort("_row_key").select("shortDecimal").collect().map(_.getDecimal(0).toPlainString).mkString(","),
       readResult.sort("_row_key").select("shortDecimal").collect().map(_.getDecimal(0).toPlainString).mkString(","))
   }
+
+  @Test
+  def testHoodieBaseFileOnlyViewRelation(): Unit = {
+    val _spark = spark
+    import _spark.implicits._
+
+    val df = Seq((1, "z3", 30, "v1", "2018-09-23"), (2, "z3", 35, "v1", "2018-09-24"))
+      .toDF("id", "name", "age", "ts", "data_date")
+
+    df.write.format("hudi")
+      .options(commonOpts)
+      .option(DataSourceWriteOptions.RECORDKEY_FIELD.key, "id")
+      .option(DataSourceWriteOptions.RECORDKEY_FIELD.key, "id")
+      .option(DataSourceWriteOptions.RECORDKEY_FIELD.key, "id")
+      .option("hoodie.insert.shuffle.parallelism", "4")
+      .option("hoodie.upsert.shuffle.parallelism", "4")
+      .option("hoodie.bulkinsert.shuffle.parallelism", "2")
+      .option(DataSourceWriteOptions.RECORDKEY_FIELD.key, "id")
+      .option(DataSourceWriteOptions.PARTITIONPATH_FIELD.key, "data_date")
+      .option(DataSourceWriteOptions.PRECOMBINE_FIELD.key, "ts")
+      .option(DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME.key, "org.apache.hudi.keygen.TimestampBasedKeyGenerator")
+      .option(Config.TIMESTAMP_TYPE_FIELD_PROP, "DATE_STRING")
+      .option(Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP, "yyyy/MM/dd")
+      .option(Config.TIMESTAMP_TIMEZONE_FORMAT_PROP, "GMT+8:00")
+      .option(Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP, "yyyy-MM-dd")
+      .mode(org.apache.spark.sql.SaveMode.Append)
+      .save(basePath)
+
+    val res = spark.read.format("hudi").load(basePath)
+
+    assert(res.count() == 2)
+
+    // data_date is the partition field. Persist to the parquet file using the origin values, and read it.
+    assertTrue(
+      res.select("data_date").map(_.get(0).toString).collect().sorted.sameElements(
+        Array("2018-09-23", "2018-09-24")
+      )
+    )
+    assertTrue(
+      res.select("_hoodie_partition_path").map(_.get(0).toString).collect().sorted.sameElements(
+        Array("2018/09/23", "2018/09/24")
+      )
+    )
+  }
 }
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSourceStorage.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSourceStorage.scala
index bf616e2cb314a..e7daf08d1193c 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSourceStorage.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSourceStorage.scala
@@ -26,7 +26,7 @@ import org.apache.hudi.common.table.timeline.{HoodieInstant, HoodieTimeline}
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator
 import org.apache.hudi.common.testutils.RawTripTestPayload.recordsToStrings
 import org.apache.hudi.config.HoodieWriteConfig
-import org.apache.hudi.keygen.TimestampBasedAvroKeyGenerator.Config
+import org.apache.hudi.keygen.constant.KeyGeneratorOptions.Config
 import org.apache.hudi.keygen.{ComplexKeyGenerator, TimestampBasedKeyGenerator}
 import org.apache.hudi.testutils.SparkClientFunctionalTestHarness
 import org.apache.hudi.{DataSourceReadOptions, DataSourceWriteOptions, HoodieDataSourceHelpers}
@@ -58,9 +58,14 @@ class TestCOWDataSourceStorage extends SparkClientFunctionalTestHarness {
   val updatedVerificationVal: String = "driver_update"
 
   @ParameterizedTest
-  @CsvSource(Array("true,org.apache.hudi.keygen.SimpleKeyGenerator", "true,org.apache.hudi.keygen.ComplexKeyGenerator",
-    "true,org.apache.hudi.keygen.TimestampBasedKeyGenerator", "false,org.apache.hudi.keygen.SimpleKeyGenerator",
-    "false,org.apache.hudi.keygen.ComplexKeyGenerator", "false,org.apache.hudi.keygen.TimestampBasedKeyGenerator"))
+  @CsvSource(Array(
+    "true,org.apache.hudi.keygen.SimpleKeyGenerator",
+    "true,org.apache.hudi.keygen.ComplexKeyGenerator",
+    "true,org.apache.hudi.keygen.TimestampBasedKeyGenerator",
+    "false,org.apache.hudi.keygen.SimpleKeyGenerator",
+    "false,org.apache.hudi.keygen.ComplexKeyGenerator",
+    "false,org.apache.hudi.keygen.TimestampBasedKeyGenerator"
+  ))
   def testCopyOnWriteStorage(isMetadataEnabled: Boolean, keyGenClass: String): Unit = {
     commonOpts += DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME.key() -> keyGenClass
     if (classOf[ComplexKeyGenerator].getName.equals(keyGenClass)) {
@@ -72,7 +77,7 @@ class TestCOWDataSourceStorage extends SparkClientFunctionalTestHarness {
       commonOpts += Config.TIMESTAMP_TYPE_FIELD_PROP -> "EPOCHMILLISECONDS"
       commonOpts += Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP -> "yyyyMMdd"
     }
-    val dataGen = new HoodieTestDataGenerator()
+    val dataGen = new HoodieTestDataGenerator(0xDEED)
     val fs = FSUtils.getFs(basePath, spark.sparkContext.hadoopConfiguration)
     // Insert Operation
     val records0 = recordsToStrings(dataGen.generateInserts("000", 100)).toList
@@ -99,9 +104,13 @@ class TestCOWDataSourceStorage extends SparkClientFunctionalTestHarness {
     var updateDf: DataFrame = null
     if (classOf[TimestampBasedKeyGenerator].getName.equals(keyGenClass)) {
       // update current_ts to be same as original record so that partition path does not change with timestamp based key gen
-      val orignalRow = inputDF1.filter(col("_row_key") === verificationRowKey).collectAsList().get(0)
-      updateDf = snapshotDF1.filter(col("_row_key") === verificationRowKey).withColumn(verificationCol, lit(updatedVerificationVal))
-        .withColumn("current_ts", lit(orignalRow.getAs("current_ts")))
+      val originalRow = snapshotDF1.filter(col("_row_key") === verificationRowKey).collectAsList().get(0)
+      updateDf = inputDF1.filter(col("_row_key") === verificationRowKey)
+        .withColumn(verificationCol, lit(updatedVerificationVal))
+        .withColumn("current_ts", lit(originalRow.getAs[Long]("current_ts")))
+        .limit(1)
+      val updatedRow = updateDf.collectAsList().get(0)
+      assertEquals(originalRow.getAs[Long]("current_ts"), updatedRow.getAs[Long]("current_ts"));
     } else {
       updateDf = snapshotDF1.filter(col("_row_key") === verificationRowKey).withColumn(verificationCol, lit(updatedVerificationVal))
     }
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestColumnStatsIndex.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestColumnStatsIndex.scala
index e79067041fb62..ae41fa8eb551f 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestColumnStatsIndex.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestColumnStatsIndex.scala
@@ -18,16 +18,22 @@
 
 package org.apache.hudi.functional
 
-import org.apache.hadoop.fs.{LocatedFileStatus, Path}
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileSystem, LocatedFileStatus, Path}
+import org.apache.hudi.common.util.ParquetUtils
 import org.apache.hudi.index.columnstats.ColumnStatsIndexHelper
 import org.apache.hudi.testutils.HoodieClientTestBase
+import org.apache.spark.sql._
+import org.apache.spark.sql.expressions.UserDefinedFunction
 import org.apache.spark.sql.functions.typedLit
 import org.apache.spark.sql.types._
-import org.apache.spark.sql.{DataFrame, SparkSession}
-import org.junit.jupiter.api.Assertions.assertEquals
-import org.junit.jupiter.api.{AfterEach, BeforeEach, Disabled, Test}
+import org.junit.jupiter.api.Assertions.{assertEquals, assertNotNull, assertTrue}
+import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
 
-import scala.collection.JavaConversions._
+import java.math.BigInteger
+import java.sql.{Date, Timestamp}
+import scala.collection.JavaConverters._
+import scala.util.Random
 
 class TestColumnStatsIndex extends HoodieClientTestBase {
   var spark: SparkSession = _
@@ -58,15 +64,17 @@ class TestColumnStatsIndex extends HoodieClientTestBase {
   }
 
   @Test
-  @Disabled
-  def testColumnStatsTableComposition(): Unit = {
+  def testZIndexTableComposition(): Unit = {
+    val targetParquetTablePath = tempDir.resolve("index/zorder/input-table").toAbsolutePath.toString
+    val sourceJSONTablePath = getClass.getClassLoader.getResource("index/zorder/input-table-json").toString
+
+    bootstrapParquetInputTableFromJSON(sourceJSONTablePath, targetParquetTablePath)
+
     val inputDf =
     // NOTE: Schema here is provided for validation that the input date is in the appropriate format
       spark.read
         .schema(sourceTableSchema)
-        .parquet(
-          getClass.getClassLoader.getResource("index/zorder/input-table").toString
-        )
+        .parquet(targetParquetTablePath)
 
     val zorderedCols = Seq("c1", "c2", "c3", "c5", "c6", "c7", "c8")
     val zorderedColsSchemaFields = inputDf.schema.fields.filter(f => zorderedCols.contains(f.name)).toSeq
@@ -75,22 +83,18 @@ class TestColumnStatsIndex extends HoodieClientTestBase {
     val newZIndexTableDf =
       ColumnStatsIndexHelper.buildColumnStatsTableFor(
         inputDf.sparkSession,
-        inputDf.inputFiles.toSeq,
-        zorderedColsSchemaFields
+        inputDf.inputFiles.toSeq.asJava,
+        zorderedColsSchemaFields.asJava
       )
 
     val indexSchema =
       ColumnStatsIndexHelper.composeIndexSchema(
-        sourceTableSchema.fields.filter(f => zorderedCols.contains(f.name)).toSeq
+        sourceTableSchema.fields.filter(f => zorderedCols.contains(f.name)).toSeq.asJava
       )
 
     // Collect Z-index stats manually (reading individual Parquet files)
     val manualZIndexTableDf =
-      buildColumnStatsTableManually(
-        getClass.getClassLoader.getResource("index/zorder/input-table").toString,
-        zorderedCols,
-        indexSchema
-      )
+      buildColumnStatsTableManually(targetParquetTablePath, zorderedCols, indexSchema)
 
     // NOTE: Z-index is built against stats collected w/in Parquet footers, which will be
     //       represented w/ corresponding Parquet schema (INT, INT64, INT96, etc).
@@ -107,18 +111,23 @@ class TestColumnStatsIndex extends HoodieClientTestBase {
         .schema(indexSchema)
         .json(getClass.getClassLoader.getResource("index/zorder/z-index-table.json").toString)
 
-    assertEquals(asJson(sort(expectedZIndexTableDf)), asJson(sort(newZIndexTableDf)))
+    assertEquals(asJson(sort(expectedZIndexTableDf)), asJson(sort(replace(newZIndexTableDf))))
   }
 
   @Test
-  @Disabled
-  def testColumnStatsTableMerge(): Unit = {
+  def testZIndexTableMerge(): Unit = {
     val testZIndexPath = new Path(basePath, "zindex")
 
+    val firstParquetTablePath = tempDir.resolve("index/zorder/input-table").toAbsolutePath.toString
+    val firstJSONTablePath = getClass.getClassLoader.getResource("index/zorder/input-table-json").toString
+
+    // Bootstrap FIRST source Parquet table
+    bootstrapParquetInputTableFromJSON(firstJSONTablePath, firstParquetTablePath)
+
     val zorderedCols = Seq("c1", "c2", "c3", "c5", "c6", "c7", "c8")
     val indexSchema =
       ColumnStatsIndexHelper.composeIndexSchema(
-        sourceTableSchema.fields.filter(f => zorderedCols.contains(f.name)).toSeq
+        sourceTableSchema.fields.filter(f => zorderedCols.contains(f.name)).toSeq.asJava
       )
 
     //
@@ -126,19 +135,16 @@ class TestColumnStatsIndex extends HoodieClientTestBase {
     //
 
     val firstCommitInstance = "0"
-    val firstInputDf =
-      spark.read.parquet(
-        getClass.getClassLoader.getResource("index/zorder/input-table").toString
-      )
+    val firstInputDf = spark.read.parquet(firstParquetTablePath)
 
     ColumnStatsIndexHelper.updateColumnStatsIndexFor(
       firstInputDf.sparkSession,
       sourceTableSchema,
-      firstInputDf.inputFiles.toSeq,
-      zorderedCols.toSeq,
+      firstInputDf.inputFiles.toSeq.asJava,
+      zorderedCols.asJava,
       testZIndexPath.toString,
       firstCommitInstance,
-      Seq()
+      Seq().asJava
     )
 
     // NOTE: We don't need to provide schema upon reading from Parquet, since Spark will be able
@@ -152,15 +158,19 @@ class TestColumnStatsIndex extends HoodieClientTestBase {
         .schema(indexSchema)
         .json(getClass.getClassLoader.getResource("index/zorder/z-index-table.json").toString)
 
-    assertEquals(asJson(sort(expectedInitialZIndexTableDf)), asJson(sort(initialZIndexTable)))
+    assertEquals(asJson(sort(expectedInitialZIndexTableDf)), asJson(sort(replace(initialZIndexTable))))
+
+    // Bootstrap SECOND source Parquet table
+    val secondParquetTablePath = tempDir.resolve("index/zorder/another-input-table").toAbsolutePath.toString
+    val secondJSONTablePath = getClass.getClassLoader.getResource("index/zorder/another-input-table-json").toString
+
+    bootstrapParquetInputTableFromJSON(secondJSONTablePath, secondParquetTablePath)
 
     val secondCommitInstance = "1"
     val secondInputDf =
       spark.read
         .schema(sourceTableSchema)
-        .parquet(
-          getClass.getClassLoader.getResource("index/zorder/another-input-table").toString
-        )
+        .parquet(secondParquetTablePath)
 
     //
     // Update Z-index table
@@ -169,11 +179,11 @@ class TestColumnStatsIndex extends HoodieClientTestBase {
     ColumnStatsIndexHelper.updateColumnStatsIndexFor(
       secondInputDf.sparkSession,
       sourceTableSchema,
-      secondInputDf.inputFiles.toSeq,
-      zorderedCols.toSeq,
+      secondInputDf.inputFiles.toSeq.asJava,
+      zorderedCols.asJava,
       testZIndexPath.toString,
       secondCommitInstance,
-      Seq(firstCommitInstance)
+      Seq(firstCommitInstance).asJava
     )
 
     // NOTE: We don't need to provide schema upon reading from Parquet, since Spark will be able
@@ -187,56 +197,96 @@ class TestColumnStatsIndex extends HoodieClientTestBase {
         .schema(indexSchema)
         .json(getClass.getClassLoader.getResource("index/zorder/z-index-table-merged.json").toString)
 
-    assertEquals(asJson(sort(expectedMergedZIndexTableDf)), asJson(sort(mergedZIndexTable)))
+    assertEquals(asJson(sort(expectedMergedZIndexTableDf)), asJson(sort(replace(mergedZIndexTable))))
   }
 
   @Test
-  @Disabled
   def testColumnStatsTablesGarbageCollection(): Unit = {
-    val testZIndexPath = new Path(System.getProperty("java.io.tmpdir"), "zindex")
-    val fs = testZIndexPath.getFileSystem(spark.sparkContext.hadoopConfiguration)
+    val targetParquetTablePath = tempDir.resolve("index/zorder/input-table").toAbsolutePath.toString
+    val sourceJSONTablePath = getClass.getClassLoader.getResource("index/zorder/input-table-json").toString
 
-    val inputDf =
-      spark.read.parquet(
-        getClass.getClassLoader.getResource("index/zorder/input-table").toString
-      )
+    bootstrapParquetInputTableFromJSON(sourceJSONTablePath, targetParquetTablePath)
+
+    val inputDf = spark.read.parquet(targetParquetTablePath)
+
+    val testColumnStatsIndexPath = new Path(tempDir.resolve("zindex").toAbsolutePath.toString)
+    val fs = testColumnStatsIndexPath.getFileSystem(spark.sparkContext.hadoopConfiguration)
 
     // Try to save statistics
     ColumnStatsIndexHelper.updateColumnStatsIndexFor(
       inputDf.sparkSession,
       sourceTableSchema,
-      inputDf.inputFiles.toSeq,
-      Seq("c1","c2","c3","c5","c6","c7","c8"),
-      testZIndexPath.toString,
+      inputDf.inputFiles.toSeq.asJava,
+      Seq("c1","c2","c3","c5","c6","c7","c8").asJava,
+      testColumnStatsIndexPath.toString,
       "2",
-      Seq("0", "1")
+      Seq("0", "1").asJava
     )
 
     // Save again
     ColumnStatsIndexHelper.updateColumnStatsIndexFor(
       inputDf.sparkSession,
       sourceTableSchema,
-      inputDf.inputFiles.toSeq,
-      Seq("c1","c2","c3","c5","c6","c7","c8"),
-      testZIndexPath.toString,
+      inputDf.inputFiles.toSeq.asJava,
+      Seq("c1","c2","c3","c5","c6","c7","c8").asJava,
+      testColumnStatsIndexPath.toString,
       "3",
-      Seq("0", "1", "2")
+      Seq("0", "1", "2").asJava
     )
 
     // Test old index table being cleaned up
     ColumnStatsIndexHelper.updateColumnStatsIndexFor(
       inputDf.sparkSession,
       sourceTableSchema,
-      inputDf.inputFiles.toSeq,
-      Seq("c1","c2","c3","c5","c6","c7","c8"),
-      testZIndexPath.toString,
+      inputDf.inputFiles.toSeq.asJava,
+      Seq("c1","c2","c3","c5","c6","c7","c8").asJava,
+      testColumnStatsIndexPath.toString,
       "4",
-      Seq("0", "1", "3")
+      Seq("0", "1", "3").asJava
     )
 
-    assertEquals(!fs.exists(new Path(testZIndexPath, "2")), true)
-    assertEquals(!fs.exists(new Path(testZIndexPath, "3")), true)
-    assertEquals(fs.exists(new Path(testZIndexPath, "4")), true)
+    assertEquals(!fs.exists(new Path(testColumnStatsIndexPath, "2")), true)
+    assertEquals(!fs.exists(new Path(testColumnStatsIndexPath, "3")), true)
+    assertEquals(fs.exists(new Path(testColumnStatsIndexPath, "4")), true)
+  }
+
+  @Test
+  def testParquetMetadataRangeExtraction(): Unit = {
+    val df = generateRandomDataFrame(spark)
+
+    val pathStr = tempDir.resolve("min-max").toAbsolutePath.toString
+
+    df.write.format("parquet")
+      .mode(SaveMode.Overwrite)
+      .save(pathStr)
+
+    val utils = new ParquetUtils
+
+    val conf = new Configuration()
+    val path = new Path(pathStr)
+    val fs = path.getFileSystem(conf)
+
+    val parquetFilePath = fs.listStatus(path).filter(fs => fs.getPath.getName.endsWith(".parquet")).toSeq.head.getPath
+
+    val ranges = utils.readRangeFromParquetMetadata(conf, parquetFilePath,
+      Seq("c1", "c2", "c3a", "c3b", "c3c", "c4", "c5", "c6", "c7", "c8").asJava)
+
+    ranges.asScala.foreach(r => {
+      // NOTE: Unfortunately Parquet can't compute statistics for Timestamp column, hence we
+      //       skip it in our assertions
+      if (r.getColumnName.equals("c4")) {
+        // scalastyle:off return
+        return
+        // scalastyle:on return
+      }
+
+      val min = r.getMinValue
+      val max = r.getMaxValue
+
+      assertNotNull(min)
+      assertNotNull(max)
+      assertTrue(r.getMinValue.asInstanceOf[Comparable[Object]].compareTo(r.getMaxValue.asInstanceOf[Object]) <= 0)
+    })
   }
 
   private def buildColumnStatsTableManually(tablePath: String, zorderedCols: Seq[String], indexSchema: StructType) = {
@@ -268,11 +318,84 @@ class TestColumnStatsIndex extends HoodieClientTestBase {
 
         df.selectExpr(exprs: _*)
           .collect()
-      }),
+      }).asJava,
       indexSchema
     )
   }
 
+  def bootstrapParquetInputTableFromJSON(sourceJSONTablePath: String, targetParquetTablePath: String): Unit = {
+    val jsonInputDF =
+    // NOTE: Schema here is provided for validation that the input date is in the appropriate format
+      spark.read
+        .schema(sourceTableSchema)
+        .json(sourceJSONTablePath)
+
+    jsonInputDF
+      .sort("c1")
+      .repartition(4, new Column("c1"))
+      .write
+      .format("parquet")
+      .mode("overwrite")
+      .save(targetParquetTablePath)
+
+    val fs = FileSystem.get(spark.sparkContext.hadoopConfiguration)
+    // Have to cleanup additional artefacts of Spark write
+    fs.delete(new Path(targetParquetTablePath, "_SUCCESS"), false)
+  }
+
+  def replace(ds: Dataset[Row]): DataFrame = {
+    val uuidRegexp = "[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12}"
+
+    val uuids =
+      ds.selectExpr(s"regexp_extract(file, '(${uuidRegexp})')")
+        .distinct()
+        .collect()
+        .map(_.getString(0))
+
+    val uuidToIdx: UserDefinedFunction = functions.udf((fileName: String) => {
+      val uuid = uuids.find(uuid => fileName.contains(uuid)).get
+      fileName.replace(uuid, "xxx")
+    })
+
+    ds.withColumn("file", uuidToIdx(ds("file")))
+  }
+
+  private def generateRandomDataFrame(spark: SparkSession): DataFrame = {
+    val sourceTableSchema =
+      new StructType()
+        .add("c1", IntegerType)
+        .add("c2", StringType)
+        // NOTE: We're testing different values for precision of the decimal to make sure
+        //       we execute paths bearing different underlying representations in Parquet
+        // REF: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#DECIMAL
+        .add("c3a", DecimalType(9,3))
+        .add("c3b", DecimalType(10,3))
+        .add("c3c", DecimalType(20,3))
+        .add("c4", TimestampType)
+        .add("c5", ShortType)
+        .add("c6", DateType)
+        .add("c7", BinaryType)
+        .add("c8", ByteType)
+
+    val rdd = spark.sparkContext.parallelize(0 to 1000, 1).map { item =>
+      val c1 = Integer.valueOf(item)
+      val c2 = Random.nextString(10)
+      val c3a = java.math.BigDecimal.valueOf(Random.nextInt() % (1 << 24), 3)
+      val c3b = java.math.BigDecimal.valueOf(Random.nextLong() % (1L << 32), 3)
+      // NOTE: We cap it at 2^64 to make sure we're not exceeding target decimal's range
+      val c3c = new java.math.BigDecimal(new BigInteger(64, new java.util.Random()), 3)
+      val c4 = new Timestamp(System.currentTimeMillis())
+      val c5 = java.lang.Short.valueOf(s"${(item + 16) / 10}")
+      val c6 = Date.valueOf(s"${2020}-${item % 11 + 1}-${item % 28 + 1}")
+      val c7 = Array(item).map(_.toByte)
+      val c8 = java.lang.Byte.valueOf("9")
+
+      RowFactory.create(c1, c2, c3a, c3b, c3c, c4, c5, c6, c7, c8)
+    }
+
+    spark.createDataFrame(rdd, sourceTableSchema)
+  }
+
   private def asJson(df: DataFrame) =
     df.toJSON
       .select("value")
@@ -281,14 +404,12 @@ class TestColumnStatsIndex extends HoodieClientTestBase {
       .map(_.getString(0))
       .mkString("\n")
 
-
   private def sort(df: DataFrame): DataFrame = {
-    // Since upon parsing JSON, Spark re-order columns in lexicographical order
-    // of their names, we have to shuffle new Z-index table columns order to match
-    // Rows are sorted by filename as well to avoid
     val sortedCols = df.columns.sorted
+    // Sort dataset by the first 2 columns (to minimize non-determinism in case multiple files have the same
+    // value of the first column)
     df.select(sortedCols.head, sortedCols.tail: _*)
-      .sort("file")
+      .sort("c1_maxValue", "c1_minValue")
   }
 
 }
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestSpaceCurveLayoutOptimization.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestLayoutOptimization.scala
similarity index 81%
rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestSpaceCurveLayoutOptimization.scala
rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestLayoutOptimization.scala
index e453953ff11e2..818addaf87399 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestSpaceCurveLayoutOptimization.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestLayoutOptimization.scala
@@ -32,12 +32,10 @@ import org.junit.jupiter.params.ParameterizedTest
 import org.junit.jupiter.params.provider.Arguments.arguments
 import org.junit.jupiter.params.provider.{Arguments, MethodSource}
 
-import java.sql.{Date, Timestamp}
 import scala.collection.JavaConversions._
-import scala.util.Random
 
 @Tag("functional")
-class TestSpaceCurveLayoutOptimization extends HoodieClientTestBase {
+class TestLayoutOptimization extends HoodieClientTestBase {
   var spark: SparkSession = _
 
   val sourceTableSchema =
@@ -79,7 +77,13 @@ class TestSpaceCurveLayoutOptimization extends HoodieClientTestBase {
 
   @ParameterizedTest
   @MethodSource(Array("testLayoutOptimizationParameters"))
-  def testLayoutOptimizationFunctional(tableType: String): Unit = {
+  def testLayoutOptimizationFunctional(tableType: String,
+                                       layoutOptimizationStrategy: String,
+                                       spatialCurveCompositionStrategy: String): Unit = {
+    val curveCompositionStrategy =
+      Option(spatialCurveCompositionStrategy)
+        .getOrElse(HoodieClusteringConfig.LAYOUT_OPTIMIZE_SPATIAL_CURVE_BUILD_METHOD.defaultValue())
+
     val targetRecordsCount = 10000
     // Bulk Insert Operation
     val records = recordsToStrings(dataGen.generateInserts("001", targetRecordsCount)).toList
@@ -98,8 +102,9 @@ class TestSpaceCurveLayoutOptimization extends HoodieClientTestBase {
       .option("hoodie.clustering.plan.strategy.small.file.limit", "629145600")
       .option("hoodie.clustering.plan.strategy.max.bytes.per.group", Long.MaxValue.toString)
       .option("hoodie.clustering.plan.strategy.target.file.max.bytes", String.valueOf(64 * 1024 * 1024L))
-      .option(HoodieClusteringConfig.LAYOUT_OPTIMIZE_ENABLE.key, "true")
-      .option(HoodieClusteringConfig.PLAN_STRATEGY_SORT_COLUMNS.key, "begin_lat, begin_lon")
+      .option(HoodieClusteringConfig.LAYOUT_OPTIMIZE_STRATEGY.key(), layoutOptimizationStrategy)
+      .option(HoodieClusteringConfig.LAYOUT_OPTIMIZE_SPATIAL_CURVE_BUILD_METHOD.key(), curveCompositionStrategy)
+      .option(HoodieClusteringConfig.PLAN_STRATEGY_SORT_COLUMNS.key, "begin_lat,begin_lon")
       .mode(SaveMode.Overwrite)
       .save(basePath)
 
@@ -144,32 +149,22 @@ class TestSpaceCurveLayoutOptimization extends HoodieClientTestBase {
     val rows = one.count()
     assert(rows == other.count() && one.intersect(other).count() == rows)
   }
-
-  def createComplexDataFrame(spark: SparkSession): DataFrame = {
-    val rdd = spark.sparkContext.parallelize(0 to 1000, 1).map { item =>
-      val c1 = Integer.valueOf(item)
-      val c2 = s" ${item}sdc"
-      val c3 = new java.math.BigDecimal(s"${Random.nextInt(1000)}.${item}")
-      val c4 = new Timestamp(System.currentTimeMillis())
-      val c5 = java.lang.Short.valueOf(s"${(item + 16) /10}")
-      val c6 = Date.valueOf(s"${2020}-${item % 11  +  1}-${item % 28  + 1}")
-      val c7 = Array(item).map(_.toByte)
-      val c8 = java.lang.Byte.valueOf("9")
-
-      RowFactory.create(c1, c2, c3, c4, c5, c6, c7, c8)
-    }
-    spark.createDataFrame(rdd, sourceTableSchema)
-  }
 }
 
-object TestSpaceCurveLayoutOptimization {
+object TestLayoutOptimization {
   def testLayoutOptimizationParameters(): java.util.stream.Stream[Arguments] = {
     java.util.stream.Stream.of(
-      arguments("COPY_ON_WRITE", "hilbert"),
-      arguments("COPY_ON_WRITE", "z-order"),
-      arguments("MERGE_ON_READ", "hilbert"),
-      arguments("MERGE_ON_READ", "z-order")
+      arguments("COPY_ON_WRITE", "linear", null),
+      arguments("COPY_ON_WRITE", "z-order", "direct"),
+      arguments("COPY_ON_WRITE", "z-order", "sample"),
+      arguments("COPY_ON_WRITE", "hilbert", "direct"),
+      arguments("COPY_ON_WRITE", "hilbert", "sample"),
+
+      arguments("MERGE_ON_READ", "linear", null),
+      arguments("MERGE_ON_READ", "z-order", "direct"),
+      arguments("MERGE_ON_READ", "z-order", "sample"),
+      arguments("MERGE_ON_READ", "hilbert", "direct"),
+      arguments("MERGE_ON_READ", "hilbert", "sample")
     )
   }
 }
-
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMORDataSource.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMORDataSource.scala
index f420b296e2b3a..ed6ef87b8e14f 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMORDataSource.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMORDataSource.scala
@@ -17,19 +17,22 @@
 
 package org.apache.hudi.functional
 
+import org.apache.avro.generic.GenericRecord
 import org.apache.hadoop.fs.Path
 import org.apache.hudi.DataSourceWriteOptions._
 import org.apache.hudi.common.config.HoodieMetadataConfig
-import org.apache.hudi.common.model.{DefaultHoodieRecordPayload, HoodieTableType}
+import org.apache.hudi.common.model.{DefaultHoodieRecordPayload, HoodieRecord, HoodieRecordPayload, HoodieTableType}
 import org.apache.hudi.common.table.HoodieTableMetaClient
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator
 import org.apache.hudi.common.testutils.RawTripTestPayload.recordsToStrings
 import org.apache.hudi.config.{HoodieIndexConfig, HoodieWriteConfig}
 import org.apache.hudi.index.HoodieIndex.IndexType
 import org.apache.hudi.keygen.NonpartitionedKeyGenerator
+import org.apache.hudi.keygen.constant.KeyGeneratorOptions.Config
 import org.apache.hudi.testutils.{DataSourceTestUtils, HoodieClientTestBase}
-import org.apache.hudi.{DataSourceReadOptions, DataSourceWriteOptions, HoodieDataSourceHelpers}
+import org.apache.hudi.{AvroConversionUtils, DataSourceReadOptions, DataSourceWriteOptions, HoodieDataSourceHelpers, HoodieSparkUtils}
 import org.apache.log4j.LogManager
+import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types.BooleanType
@@ -38,6 +41,7 @@ import org.junit.jupiter.api.{AfterEach, BeforeEach, Test}
 import org.junit.jupiter.params.ParameterizedTest
 import org.junit.jupiter.params.provider.CsvSource
 
+import java.util
 import scala.collection.JavaConversions._
 import scala.collection.JavaConverters._
 
@@ -345,11 +349,15 @@ class TestMORDataSource extends HoodieClientTestBase {
     // First Operation:
     // Producing parquet files to three default partitions.
     // SNAPSHOT view on MOR table with parquet files only.
+
+    // Overriding the partition-path field
+    val opts = commonOpts + (DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "partition_path")
+
     val hoodieRecords1 = dataGen.generateInserts("001", 100)
-    val records1 = recordsToStrings(hoodieRecords1).toList
-    val inputDF1 = spark.read.json(spark.sparkContext.parallelize(records1, 2))
+
+    val inputDF1 = toDataset(hoodieRecords1)
     inputDF1.write.format("org.apache.hudi")
-      .options(commonOpts)
+      .options(opts)
       .option("hoodie.compact.inline", "false") // else fails due to compaction & deltacommit instant times being same
       .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
       .option(DataSourceWriteOptions.TABLE_TYPE.key, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL)
@@ -372,11 +380,10 @@ class TestMORDataSource extends HoodieClientTestBase {
     // Second Operation:
     // Upsert 50 update records
     // Snopshot view should read 100 records
-    val records2 = recordsToStrings(dataGen.generateUniqueUpdates("002", 50))
-      .toList
-    val inputDF2: Dataset[Row] = spark.read.json(spark.sparkContext.parallelize(records2, 2))
+    val records2 = dataGen.generateUniqueUpdates("002", 50)
+    val inputDF2 = toDataset(records2)
     inputDF2.write.format("org.apache.hudi")
-      .options(commonOpts)
+      .options(opts)
       .mode(SaveMode.Append)
       .save(basePath)
     val hudiSnapshotDF2 = spark.read.format("org.apache.hudi")
@@ -420,17 +427,31 @@ class TestMORDataSource extends HoodieClientTestBase {
     verifyShow(hudiIncDF2)
     verifyShow(hudiIncDF1Skipmerge)
 
-    val record3 = recordsToStrings(dataGen.generateUpdatesWithTS("003", hoodieRecords1, -1))
-    spark.read.json(spark.sparkContext.parallelize(record3, 2))
-      .write.format("org.apache.hudi").options(commonOpts)
+    val record3 = dataGen.generateUpdatesWithTS("003", hoodieRecords1, -1)
+    val inputDF3 = toDataset(record3)
+    inputDF3.write.format("org.apache.hudi").options(opts)
       .mode(SaveMode.Append).save(basePath)
+
     val hudiSnapshotDF3 = spark.read.format("org.apache.hudi")
       .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL)
       .load(basePath + "/*/*/*/*")
+
+    verifyShow(hudiSnapshotDF3);
+
     assertEquals(100, hudiSnapshotDF3.count())
     assertEquals(0, hudiSnapshotDF3.filter("rider = 'rider-003'").count())
   }
 
+  private def toDataset(records: util.List[HoodieRecord[_]]) = {
+    val avroRecords = records.map(_.getData
+      .asInstanceOf[HoodieRecordPayload[_]]
+      .getInsertValue(HoodieTestDataGenerator.AVRO_SCHEMA)
+      .get
+      .asInstanceOf[GenericRecord])
+    val rdd: RDD[GenericRecord] = spark.sparkContext.parallelize(avroRecords, 2)
+    AvroConversionUtils.createDataFrame(rdd, HoodieTestDataGenerator.AVRO_SCHEMA.toString, spark)
+  }
+
   @Test
   def testVectorizedReader() {
     spark.conf.set("spark.sql.parquet.enableVectorizedReader", true)
@@ -549,10 +570,10 @@ class TestMORDataSource extends HoodieClientTestBase {
       .orderBy(desc("_hoodie_commit_time"))
       .head()
     assertEquals(sampleRow.getDouble(0), sampleRow.get(0))
-    assertEquals(sampleRow.getLong(1), sampleRow.get(1))
+    assertEquals(sampleRow.getDate(1), sampleRow.get(1))
     assertEquals(sampleRow.getString(2), sampleRow.get(2))
     assertEquals(sampleRow.getSeq(3), sampleRow.get(3))
-    assertEquals(sampleRow.getStruct(4), sampleRow.get(4))
+    assertEquals(sampleRow.getAs[Array[Byte]](4), sampleRow.get(4))
   }
 
   def verifyShow(df: DataFrame): Unit = {
@@ -770,4 +791,79 @@ class TestMORDataSource extends HoodieClientTestBase {
       .load(basePath + "/*/*/*/*")
     assertEquals(numRecords - numRecordsToDelete, snapshotDF2.count())
   }
+
+  /**
+   * This tests the case that query by with a specified partition condition on hudi table which is
+   * different between the value of the partition field and the actual partition path,
+   * like hudi table written by TimestampBasedKeyGenerator.
+   *
+   * For MOR table, test all the three query modes.
+   */
+  @Test
+  def testPrunePartitionForTimestampBasedKeyGenerator(): Unit = {
+    val options = commonOpts ++ Map(
+      "hoodie.compact.inline" -> "false",
+      DataSourceWriteOptions.TABLE_TYPE.key -> DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL,
+      DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME.key -> "org.apache.hudi.keygen.TimestampBasedKeyGenerator",
+      Config.TIMESTAMP_TYPE_FIELD_PROP -> "DATE_STRING",
+      Config.TIMESTAMP_OUTPUT_DATE_FORMAT_PROP -> "yyyy/MM/dd",
+      Config.TIMESTAMP_TIMEZONE_FORMAT_PROP -> "GMT+8:00",
+      Config.TIMESTAMP_INPUT_DATE_FORMAT_PROP -> "yyyy-MM-dd"
+    )
+
+    val dataGen1 = new HoodieTestDataGenerator(Array("2022-01-01"))
+    val records1 = recordsToStrings(dataGen1.generateInserts("001", 50)).toList
+    val inputDF1 = spark.read.json(spark.sparkContext.parallelize(records1, 2))
+    inputDF1.write.format("org.apache.hudi")
+      .options(options)
+      .mode(SaveMode.Overwrite)
+      .save(basePath)
+    metaClient = HoodieTableMetaClient.builder()
+      .setBasePath(basePath)
+      .setConf(spark.sessionState.newHadoopConf)
+      .build()
+    val commit1Time = metaClient.getActiveTimeline.lastInstant().get().getTimestamp
+
+    val dataGen2 = new HoodieTestDataGenerator(Array("2022-01-02"))
+    val records2 = recordsToStrings(dataGen2.generateInserts("002", 60)).toList
+    val inputDF2 = spark.read.json(spark.sparkContext.parallelize(records2, 2))
+    inputDF2.write.format("org.apache.hudi")
+      .options(options)
+      .mode(SaveMode.Append)
+      .save(basePath)
+    val commit2Time = metaClient.reloadActiveTimeline.lastInstant().get().getTimestamp
+
+    val records3 = recordsToStrings(dataGen2.generateUniqueUpdates("003", 20)).toList
+    val inputDF3 = spark.read.json(spark.sparkContext.parallelize(records3, 2))
+    inputDF3.write.format("org.apache.hudi")
+      .options(options)
+      .mode(SaveMode.Append)
+      .save(basePath)
+    val commit3Time = metaClient.reloadActiveTimeline.lastInstant().get().getTimestamp
+
+    // snapshot query
+    val snapshotQueryRes = spark.read.format("hudi").load(basePath)
+      assertEquals(snapshotQueryRes.where(s"_hoodie_commit_time = '$commit1Time'").count, 50)
+    assertEquals(snapshotQueryRes.where(s"_hoodie_commit_time = '$commit2Time'").count, 40)
+    assertEquals(snapshotQueryRes.where(s"_hoodie_commit_time = '$commit3Time'").count, 20)
+
+    assertEquals(snapshotQueryRes.where("partition = '2022-01-01'").count, 50)
+    assertEquals(snapshotQueryRes.where("partition = '2022-01-02'").count, 60)
+
+    // read_optimized query
+    val readOptimizedQueryRes = spark.read.format("hudi")
+      .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_READ_OPTIMIZED_OPT_VAL)
+      .load(basePath)
+    assertEquals(readOptimizedQueryRes.where("partition = '2022-01-01'").count, 50)
+    assertEquals(readOptimizedQueryRes.where("partition = '2022-01-02'").count, 60)
+
+    // incremental query
+    val incrementalQueryRes = spark.read.format("hudi")
+      .option(DataSourceReadOptions.QUERY_TYPE.key, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL)
+      .option(DataSourceReadOptions.BEGIN_INSTANTTIME.key, commit2Time)
+      .option(DataSourceReadOptions.END_INSTANTTIME.key, commit3Time)
+      .load(basePath)
+    assertEquals(incrementalQueryRes.where("partition = '2022-01-01'").count, 0)
+    assertEquals(incrementalQueryRes.where("partition = '2022-01-02'").count, 20)
+  }
 }
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMetadataTableWithSparkDataSource.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMetadataTableWithSparkDataSource.scala
new file mode 100644
index 0000000000000..918202e974682
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMetadataTableWithSparkDataSource.scala
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.functional
+
+import org.apache.hudi.DataSourceWriteOptions
+import org.apache.hudi.common.config.HoodieMetadataConfig
+import org.apache.hudi.common.testutils.HoodieTestDataGenerator
+import org.apache.hudi.common.testutils.RawTripTestPayload.recordsToStrings
+import org.apache.hudi.config.HoodieWriteConfig
+import org.apache.hudi.testutils.SparkClientFunctionalTestHarness
+import org.apache.spark.sql.SaveMode
+import org.junit.jupiter.api.Assertions.assertEquals
+import org.junit.jupiter.api.{Tag, Test}
+
+import scala.collection.JavaConverters._
+
+@Tag("functional")
+class TestMetadataTableWithSparkDataSource extends SparkClientFunctionalTestHarness {
+
+  val hudi = "org.apache.hudi"
+  var commonOpts = Map(
+    "hoodie.insert.shuffle.parallelism" -> "4",
+    "hoodie.upsert.shuffle.parallelism" -> "4",
+    "hoodie.bulkinsert.shuffle.parallelism" -> "2",
+    "hoodie.delete.shuffle.parallelism" -> "1",
+    DataSourceWriteOptions.RECORDKEY_FIELD.key -> "_row_key",
+    DataSourceWriteOptions.PARTITIONPATH_FIELD.key -> "partition",
+    DataSourceWriteOptions.PRECOMBINE_FIELD.key -> "timestamp",
+    HoodieWriteConfig.TBL_NAME.key -> "hoodie_test"
+  )
+
+  @Test
+  def testReadability(): Unit = {
+    val dataGen = new HoodieTestDataGenerator()
+
+    val opts: Map[String, String] = commonOpts ++ Map(
+      HoodieMetadataConfig.ENABLE.key -> "true",
+      HoodieMetadataConfig.COMPACT_NUM_DELTA_COMMITS.key -> "1"
+    )
+
+    // Insert records
+    val newRecords = dataGen.generateInserts("001", 100)
+    val newRecordsDF = parseRecords(recordsToStrings(newRecords).asScala)
+
+    newRecordsDF.write.format(hudi)
+      .options(opts)
+      .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.INSERT_OPERATION_OPT_VAL)
+      .mode(SaveMode.Append)
+      .save(basePath)
+
+    // Update records
+    val updatedRecords = dataGen.generateUpdates("002", newRecords)
+    val updatedRecordsDF = parseRecords(recordsToStrings(updatedRecords).asScala)
+
+    updatedRecordsDF.write.format(hudi)
+      .options(opts)
+      .option(DataSourceWriteOptions.OPERATION.key, DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL)
+      .mode(SaveMode.Append)
+      .save(basePath)
+
+    val metadataDF = spark.read.format(hudi).load(s"$basePath/.hoodie/metadata")
+
+    // Smoke test
+    metadataDF.show()
+
+    // Query w/ 0 requested columns should be working fine
+    assertEquals(4, metadataDF.count())
+
+    val expectedKeys = Seq("2015/03/16", "2015/03/17", "2016/03/15", "__all_partitions__")
+    val keys = metadataDF.select("key")
+      .collect()
+      .map(_.getString(0))
+      .toSeq
+      .sorted
+
+    assertEquals(expectedKeys, keys)
+  }
+
+  private def parseRecords(records: Seq[String]) = {
+    spark.read.json(spark.sparkContext.parallelize(records, 2))
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestAlterTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestAlterTable.scala
index 469b135959846..0f2cb547c2fe9 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestAlterTable.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestAlterTable.scala
@@ -91,9 +91,10 @@ class TestAlterTable extends TestHoodieSqlBase {
         )
 
         // change column's data type
-        spark.sql(s"alter table $newTableName change column id id bigint")
-        assertResult(StructType(Seq(StructField("id", LongType, nullable = true))))(
-        spark.sql(s"select id from $newTableName").schema)
+        checkExceptionContain(s"alter table $newTableName change column id id bigint") (
+          "ALTER TABLE CHANGE COLUMN is not supported for changing column 'id'" +
+            " with type 'IntegerType' to 'id' with type 'LongType'"
+        )
 
         // Insert data to the new table.
         spark.sql(s"insert into $newTableName values(2, 'a2', 12, 1000, 'e0')")
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestCallCommandParser.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestCallCommandParser.scala
new file mode 100644
index 0000000000000..9d1c02ad99faa
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestCallCommandParser.scala
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hudi
+
+import com.google.common.collect.ImmutableList
+import org.apache.spark.sql.catalyst.expressions.Literal
+import org.apache.spark.sql.catalyst.plans.logical.{CallCommand, NamedArgument, PositionalArgument}
+import org.apache.spark.sql.types.{DataType, DataTypes}
+
+import java.math.BigDecimal
+import scala.collection.JavaConverters
+
+class TestCallCommandParser extends TestHoodieSqlBase {
+  private val parser = spark.sessionState.sqlParser
+
+  test("Test Call Produce with Positional Arguments") {
+    val call = parser.parsePlan("CALL c.n.func(1, '2', 3L, true, 1.0D, 9.0e1, 900e-1BD)").asInstanceOf[CallCommand]
+    assertResult(ImmutableList.of("c", "n", "func"))(JavaConverters.seqAsJavaListConverter(call.name).asJava)
+
+    assertResult(7)(call.args.size)
+
+    checkArg(call, 0, 1, DataTypes.IntegerType)
+    checkArg(call, 1, "2", DataTypes.StringType)
+    checkArg(call, 2, 3L, DataTypes.LongType)
+    checkArg(call, 3, true, DataTypes.BooleanType)
+    checkArg(call, 4, 1.0D, DataTypes.DoubleType)
+    checkArg(call, 5, new BigDecimal("9.0e1"), DataTypes.createDecimalType(2, 0))
+    checkArg(call, 6, new BigDecimal("900e-1"), DataTypes.createDecimalType(3, 1))
+  }
+
+  test("Test Call Produce with Named Arguments") {
+    val call = parser.parsePlan("CALL system.func(c1 => 1, c2 => '2', c3 => true)").asInstanceOf[CallCommand]
+    assertResult(ImmutableList.of("system", "func"))(JavaConverters.seqAsJavaListConverter(call.name).asJava)
+
+    assertResult(3)(call.args.size)
+
+    checkArg(call, 0, "c1", 1, DataTypes.IntegerType)
+    checkArg(call, 1, "c2", "2", DataTypes.StringType)
+    checkArg(call, 2, "c3", true, DataTypes.BooleanType)
+  }
+
+  test("Test Call Produce with Var Substitution") {
+    val call = parser.parsePlan("CALL system.func('${spark.extra.prop}')").asInstanceOf[CallCommand]
+    assertResult(ImmutableList.of("system", "func"))(JavaConverters.seqAsJavaListConverter(call.name).asJava)
+
+    assertResult(1)(call.args.size)
+
+    checkArg(call, 0, "value", DataTypes.StringType)
+  }
+
+  test("Test Call Produce with Mixed Arguments") {
+    val call = parser.parsePlan("CALL system.func(c1 => 1, '2')").asInstanceOf[CallCommand]
+    assertResult(ImmutableList.of("system", "func"))(JavaConverters.seqAsJavaListConverter(call.name).asJava)
+
+    assertResult(2)(call.args.size)
+
+    checkArg(call, 0, "c1", 1, DataTypes.IntegerType)
+    checkArg(call, 1, "2", DataTypes.StringType)
+  }
+
+  test("Test Call Parse Error") {
+    checkParseExceptionContain("CALL cat.system radish kebab")("mismatched input 'CALL' expecting")
+  }
+
+  protected def checkParseExceptionContain(sql: String)(errorMsg: String): Unit = {
+    var hasException = false
+    try {
+      parser.parsePlan(sql)
+    } catch {
+      case e: Throwable =>
+        assertResult(true)(e.getMessage.contains(errorMsg))
+        hasException = true
+    }
+    assertResult(true)(hasException)
+  }
+
+  private def checkArg(call: CallCommand, index: Int, expectedValue: Any, expectedType: DataType): Unit = {
+    checkArg(call, index, null, expectedValue, expectedType)
+  }
+
+  private def checkArg(call: CallCommand, index: Int, expectedName: String, expectedValue: Any, expectedType: DataType): Unit = {
+    if (expectedName != null) {
+      val arg = checkCast(call.args.apply(index), classOf[NamedArgument])
+      assertResult(expectedName)(arg.name)
+    }
+    else {
+      val arg = call.args.apply(index)
+      checkCast(arg, classOf[PositionalArgument])
+    }
+    val expectedExpr = toSparkLiteral(expectedValue, expectedType)
+    val actualExpr = call.args.apply(index).expr
+    assertResult(expectedExpr.dataType)(actualExpr.dataType)
+  }
+
+  private def toSparkLiteral(value: Any, dataType: DataType) = Literal.apply(value, dataType)
+
+  private def checkCast[T](value: Any, expectedClass: Class[T]) = {
+    assertResult(true)(expectedClass.isInstance(value))
+    expectedClass.cast(value)
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestCallProcedure.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestCallProcedure.scala
new file mode 100644
index 0000000000000..eb2c614df201b
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestCallProcedure.scala
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hudi
+
+class TestCallProcedure extends TestHoodieSqlBase {
+
+  test("Test Call show_commits Procedure") {
+    withTempDir { tmp =>
+      val tableName = generateTableName
+      // create table
+      spark.sql(
+        s"""
+           |create table $tableName (
+           |  id int,
+           |  name string,
+           |  price double,
+           |  ts long
+           |) using hudi
+           | location '${tmp.getCanonicalPath}/$tableName'
+           | tblproperties (
+           |  primaryKey = 'id',
+           |  preCombineField = 'ts'
+           | )
+       """.stripMargin)
+      // insert data to table
+      spark.sql(s"insert into $tableName select 1, 'a1', 10, 1000")
+      spark.sql(s"insert into $tableName select 2, 'a2', 20, 1500")
+
+      // Check required fields
+      checkExceptionContain(s"""call show_commits(limit => 10)""")(
+        s"Argument: table is required")
+
+      // collect commits for table
+      val commits = spark.sql(s"""call show_commits(table => '$tableName', limit => 10)""").collect()
+      assertResult(2) {
+        commits.length
+      }
+    }
+  }
+
+  test("Test Call show_commits_metadata Procedure") {
+    withTempDir { tmp =>
+      val tableName = generateTableName
+      // create table
+      spark.sql(
+        s"""
+           |create table $tableName (
+           |  id int,
+           |  name string,
+           |  price double,
+           |  ts long
+           |) using hudi
+           | location '${tmp.getCanonicalPath}/$tableName'
+           | tblproperties (
+           |  primaryKey = 'id',
+           |  preCombineField = 'ts'
+           | )
+       """.stripMargin)
+      // insert data to table
+      spark.sql(s"insert into $tableName select 1, 'a1', 10, 1000")
+
+      // Check required fields
+      checkExceptionContain(s"""call show_commits_metadata(limit => 10)""")(
+        s"Argument: table is required")
+
+      // collect commits for table
+      val commits = spark.sql(s"""call show_commits_metadata(table => '$tableName', limit => 10)""").collect()
+      assertResult(1) {
+        commits.length
+      }
+    }
+  }
+
+  test("Test Call rollback_to_instant Procedure") {
+    withTempDir { tmp =>
+      val tableName = generateTableName
+      // create table
+      spark.sql(
+        s"""
+           |create table $tableName (
+           |  id int,
+           |  name string,
+           |  price double,
+           |  ts long
+           |) using hudi
+           | location '${tmp.getCanonicalPath}/$tableName'
+           | tblproperties (
+           |  primaryKey = 'id',
+           |  preCombineField = 'ts'
+           | )
+       """.stripMargin)
+      // insert data to table
+      spark.sql(s"insert into $tableName select 1, 'a1', 10, 1000")
+      spark.sql(s"insert into $tableName select 2, 'a2', 20, 1500")
+      spark.sql(s"insert into $tableName select 3, 'a3', 30, 2000")
+
+      // Check required fields
+      checkExceptionContain(s"""call rollback_to_instant(table => '$tableName')""")(
+        s"Argument: instant_time is required")
+
+      // 3 commits are left before rollback
+      var commits = spark.sql(s"""call show_commits(table => '$tableName', limit => 10)""").collect()
+      assertResult(3){commits.length}
+
+      // Call rollback_to_instant Procedure with Named Arguments
+      var instant_time = commits(0).get(0).toString
+      checkAnswer(s"""call rollback_to_instant(table => '$tableName', instant_time => '$instant_time')""")(Seq(true))
+      // Call rollback_to_instant Procedure with Positional Arguments
+      instant_time = commits(1).get(0).toString
+      checkAnswer(s"""call rollback_to_instant('$tableName', '$instant_time')""")(Seq(true))
+
+      // 1 commits are left after rollback
+      commits = spark.sql(s"""call show_commits(table => '$tableName', limit => 10)""").collect()
+      assertResult(1){commits.length}
+    }
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestHoodieSqlBase.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestHoodieSqlBase.scala
index a5b49cc3683d0..ca3919599b6fa 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestHoodieSqlBase.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestHoodieSqlBase.scala
@@ -18,8 +18,10 @@
 package org.apache.spark.sql.hudi
 
 import org.apache.hadoop.fs.Path
+import org.apache.hudi.HoodieSparkUtils
 import org.apache.hudi.common.fs.FSUtils
 import org.apache.log4j.Level
+import org.apache.spark.SparkConf
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.{Row, SparkSession}
 import org.apache.spark.util.Utils
@@ -49,10 +51,20 @@ class TestHoodieSqlBase extends FunSuite with BeforeAndAfterAll {
     .config("hoodie.delete.shuffle.parallelism", "4")
     .config("spark.sql.warehouse.dir", sparkWareHouse.getCanonicalPath)
     .config("spark.sql.session.timeZone", "CTT")
+    .config(sparkConf())
     .getOrCreate()
 
   private var tableId = 0
 
+  def sparkConf(): SparkConf = {
+    val sparkConf = new SparkConf()
+    if (HoodieSparkUtils.gteqSpark3_2) {
+      sparkConf.set("spark.sql.catalog.spark_catalog",
+        "org.apache.spark.sql.hudi.catalog.HoodieCatalog")
+    }
+    sparkConf
+  }
+
   protected def withTempDir(f: File => Unit): Unit = {
     val tempDir = Utils.createTempDir()
     try f(tempDir) finally {
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala
index 4d12d987ff3eb..b186381c25203 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestInsertTable.scala
@@ -17,8 +17,12 @@
 
 package org.apache.spark.sql.hudi
 
-import org.apache.hudi.common.table.HoodieTableMetaClient
+import org.apache.hudi.DataSourceWriteOptions.{KEYGENERATOR_CLASS_NAME, MOR_TABLE_TYPE_OPT_VAL, PARTITIONPATH_FIELD, PRECOMBINE_FIELD, RECORDKEY_FIELD, TABLE_TYPE}
+import org.apache.hudi.common.table.{HoodieTableMetaClient, TableSchemaResolver}
+import org.apache.hudi.config.HoodieWriteConfig
 import org.apache.hudi.exception.HoodieDuplicateKeyException
+import org.apache.hudi.keygen.ComplexKeyGenerator
+import org.apache.spark.sql.SaveMode
 
 import java.io.File
 
@@ -582,8 +586,48 @@ class TestInsertTable extends TestHoodieSqlBase {
       checkAnswer(s"select id, name, price, ts from $tableName")(
         Seq(1, "a1", 11.0, 1000)
       )
-
     }
   }
 
+  test("Test For read operation's field") {
+      withTempDir { tmp => {
+        val tableName = generateTableName
+        val tablePath = s"${tmp.getCanonicalPath}/$tableName"
+        import spark.implicits._
+        val day = "2021-08-02"
+        val df = Seq((1, "a1", 10, 1000, day, 12)).toDF("id", "name", "value", "ts", "day", "hh")
+        // Write a table by spark dataframe.
+        df.write.format("hudi")
+          .option(HoodieWriteConfig.TBL_NAME.key, tableName)
+          .option(TABLE_TYPE.key, MOR_TABLE_TYPE_OPT_VAL)
+          .option(RECORDKEY_FIELD.key, "id")
+          .option(PRECOMBINE_FIELD.key, "ts")
+          .option(PARTITIONPATH_FIELD.key, "day,hh")
+          .option(KEYGENERATOR_CLASS_NAME.key, classOf[ComplexKeyGenerator].getName)
+          .option(HoodieWriteConfig.INSERT_PARALLELISM_VALUE.key, "1")
+          .option(HoodieWriteConfig.UPSERT_PARALLELISM_VALUE.key, "1")
+          .option(HoodieWriteConfig.ALLOW_OPERATION_METADATA_FIELD.key, "true")
+          .mode(SaveMode.Overwrite)
+          .save(tablePath)
+
+        val metaClient = HoodieTableMetaClient.builder()
+          .setBasePath(tablePath)
+          .setConf(spark.sessionState.newHadoopConf())
+          .build()
+
+        assertResult(true)(new TableSchemaResolver(metaClient).isHasOperationField)
+
+        spark.sql(
+          s"""
+             |create table $tableName using hudi
+             |location '${tablePath}'
+             |""".stripMargin)
+
+        // Note: spark sql batch write currently does not write actual content to the operation field
+        checkAnswer(s"select id, _hoodie_operation from $tableName")(
+          Seq(1, null)
+        )
+      }
+    }
+  }
 }
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestMergeIntoTable.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestMergeIntoTable.scala
index baac82f4bd153..28dee88e1f61e 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestMergeIntoTable.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/TestMergeIntoTable.scala
@@ -87,7 +87,7 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
            | on s0.id = $tableName.id
            | when matched then update set
            | id = s0.id, name = s0.name, price = s0.price + $tableName.price, ts = s0.ts
-           | when not matched and id % 2 = 0 then insert *
+           | when not matched and s0.id % 2 = 0 then insert *
        """.stripMargin)
       checkAnswer(s"select id, name, price, ts from $tableName")(
         Seq(1, "a1", 30.0, 1002),
@@ -102,9 +102,9 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
            |  select 1 as id, 'a1' as name, 12 as price, 1003 as ts
            | ) s0
            | on s0.id = $tableName.id
-           | when matched and id != 1 then update set
+           | when matched and s0.id != 1 then update set
            |    id = s0.id, name = s0.name, price = s0.price, ts = s0.ts
-           | when matched and id = 1 then delete
+           | when matched and s0.id = 1 then delete
            | when not matched then insert *
        """.stripMargin)
       val cnt = spark.sql(s"select * from $tableName where id = 1").count()
@@ -178,7 +178,7 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
            |  )
            | ) s0
            | on s0.s_id = t0.id
-           | when matched and ts = 1001 then update set id = s0.s_id, name = t0.name, price =
+           | when matched and s0.ts = 1001 then update set id = s0.s_id, name = t0.name, price =
            | s0.price, ts = s0.ts
          """.stripMargin
       )
@@ -233,7 +233,7 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
            |  select 1 as id, 'a1' as name, 12 as price, 1001 as ts, '2021-03-21' as dt
            | ) as s0
            | on t0.id = s0.id
-           | when matched and id % 2 = 0 then update set *
+           | when matched and s0.id % 2 = 0 then update set *
          """.stripMargin
       )
       checkAnswer(s"select id,name,price,dt from $tableName")(
@@ -488,7 +488,7 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
              |merge into $targetTable t0
              |using $sourceTable s0
              |on t0.id = s0.id
-             |when matched and cast(_ts as string) > '1000' then update set *
+             |when matched and cast(s0._ts as string) > '1000' then update set *
            """.stripMargin)
         checkAnswer(s"select id, name, price, _ts from $targetTable")(
           Seq(1, "a1", 12, 1001)
@@ -512,7 +512,7 @@ class TestMergeIntoTable extends TestHoodieSqlBase {
              |using $sourceTable s0
              |on t0.id = s0.id
              |when matched then update set *
-             |when not matched and name = 'a2' then insert *
+             |when not matched and s0.name = 'a2' then insert *
            """.stripMargin)
         checkAnswer(s"select id, name, price, _ts from $targetTable order by id")(
           Seq(1, "a1", 12, 1001),
diff --git a/hudi-spark-datasource/hudi-spark2/pom.xml b/hudi-spark-datasource/hudi-spark2/pom.xml
index 2283603542ee8..3fb6cf3dd65ba 100644
--- a/hudi-spark-datasource/hudi-spark2/pom.xml
+++ b/hudi-spark-datasource/hudi-spark2/pom.xml
@@ -199,6 +199,15 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-sql_${scala.binary.version}</artifactId>
       <version>${spark2.version}</version>
+      <scope>provided</scope>
+      <optional>true</optional>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-avro_${scala.binary.version}</artifactId>
+      <version>${spark2.version}</version>
+      <scope>provided</scope>
       <optional>true</optional>
     </dependency>
 
diff --git a/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/adapter/Spark2Adapter.scala b/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/adapter/Spark2Adapter.scala
index bf1cd24484c1a..5dfa7d9574d9a 100644
--- a/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/adapter/Spark2Adapter.scala
+++ b/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/adapter/Spark2Adapter.scala
@@ -17,8 +17,10 @@
 
 package org.apache.spark.sql.adapter
 
+import org.apache.avro.Schema
 import org.apache.hudi.Spark2RowSerDe
 import org.apache.hudi.client.utils.SparkRowSerDe
+import org.apache.spark.sql.avro.{HoodieAvroDeserializerTrait, HoodieAvroSerializerTrait, Spark2HoodieAvroDeserializer, HoodieAvroSerializer}
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.{Expression, Like}
@@ -26,17 +28,26 @@ import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.catalyst.plans.JoinType
 import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, Join, LogicalPlan}
 import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier}
-import org.apache.spark.sql.execution.datasources.{Spark2ParsePartitionUtil, SparkParsePartitionUtil}
+import org.apache.spark.sql.execution.datasources.{FilePartition, PartitionedFile, Spark2ParsePartitionUtil, SparkParsePartitionUtil}
 import org.apache.spark.sql.hudi.SparkAdapter
 import org.apache.spark.sql.hudi.parser.HoodieSpark2ExtendedSqlParser
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.DataType
 import org.apache.spark.sql.{Row, SparkSession}
 
+import scala.collection.mutable.ArrayBuffer
+
 /**
  * The adapter for spark2.
  */
 class Spark2Adapter extends SparkAdapter {
 
+  def createAvroSerializer(rootCatalystType: DataType, rootAvroType: Schema, nullable: Boolean): HoodieAvroSerializerTrait =
+    new HoodieAvroSerializer(rootCatalystType, rootAvroType, nullable)
+
+  def createAvroDeserializer(rootAvroType: Schema, rootCatalystType: DataType): HoodieAvroDeserializerTrait =
+    new Spark2HoodieAvroDeserializer(rootAvroType, rootCatalystType)
+
   override def createSparkRowSerDe(encoder: ExpressionEncoder[Row]): SparkRowSerDe = {
     new Spark2RowSerDe(encoder)
   }
@@ -86,4 +97,44 @@ class Spark2Adapter extends SparkAdapter {
   override def parseMultipartIdentifier(parser: ParserInterface, sqlText: String): Seq[String] = {
     throw new IllegalStateException(s"Should not call ParserInterface#parseMultipartIdentifier for spark2")
   }
+
+  /**
+   * Combine [[PartitionedFile]] to [[FilePartition]] according to `maxSplitBytes`.
+   *
+   * This is a copy of org.apache.spark.sql.execution.datasources.FilePartition#getFilePartitions from Spark 3.2.
+   * And this will be called only in Spark 2.
+   */
+  override def getFilePartitions(
+      sparkSession: SparkSession,
+      partitionedFiles: Seq[PartitionedFile],
+      maxSplitBytes: Long): Seq[FilePartition] = {
+
+    val partitions = new ArrayBuffer[FilePartition]
+    val currentFiles = new ArrayBuffer[PartitionedFile]
+    var currentSize = 0L
+
+    /** Close the current partition and move to the next. */
+    def closePartition(): Unit = {
+      if (currentFiles.nonEmpty) {
+        // Copy to a new Array.
+        val newPartition = FilePartition(partitions.size, currentFiles.toArray)
+        partitions += newPartition
+      }
+      currentFiles.clear()
+      currentSize = 0
+    }
+
+    val openCostInBytes = sparkSession.sessionState.conf.filesOpenCostInBytes
+    // Assign files to partitions using "Next Fit Decreasing"
+    partitionedFiles.foreach { file =>
+      if (currentSize + file.length > maxSplitBytes) {
+        closePartition()
+      }
+      // Add the given file to the current partition.
+      currentSize += file.length + openCostInBytes
+      currentFiles += file
+    }
+    closePartition()
+    partitions.toSeq
+  }
 }
diff --git a/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/avro/PatchedAvroDeserializer.scala b/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/avro/PatchedAvroDeserializer.scala
new file mode 100644
index 0000000000000..8d9948c58cdd8
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/avro/PatchedAvroDeserializer.scala
@@ -0,0 +1,398 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.avro
+
+import org.apache.avro.Conversions.DecimalConversion
+import org.apache.avro.LogicalTypes.{TimestampMicros, TimestampMillis}
+import org.apache.avro.Schema.Type._
+import org.apache.avro.generic._
+import org.apache.avro.util.Utf8
+import org.apache.avro.{LogicalTypes, Schema, SchemaBuilder}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{SpecificInternalRow, UnsafeArrayData}
+import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, DateTimeUtils, GenericArrayData}
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
+
+import java.math.BigDecimal
+import java.nio.ByteBuffer
+import scala.collection.JavaConverters._
+import scala.collection.mutable.ArrayBuffer
+
+/**
+ * A deserializer to deserialize data in avro format to data in catalyst format.
+ *
+ * NOTE: This is a version of {@code AvroDeserializer} impl from Spark 2.4.4 w/ the fix for SPARK-30267
+ *       applied on top of it
+ */
+class PatchedAvroDeserializer(rootAvroType: Schema, rootCatalystType: DataType) {
+  private lazy val decimalConversions = new DecimalConversion()
+
+  private val converter: Any => Any = rootCatalystType match {
+    // A shortcut for empty schema.
+    case st: StructType if st.isEmpty =>
+      (data: Any) => InternalRow.empty
+
+    case st: StructType =>
+      val resultRow = new SpecificInternalRow(st.map(_.dataType))
+      val fieldUpdater = new RowUpdater(resultRow)
+      val writer = getRecordWriter(rootAvroType, st, Nil)
+      (data: Any) => {
+        val record = data.asInstanceOf[GenericRecord]
+        writer(fieldUpdater, record)
+        resultRow
+      }
+
+    case _ =>
+      val tmpRow = new SpecificInternalRow(Seq(rootCatalystType))
+      val fieldUpdater = new RowUpdater(tmpRow)
+      val writer = newWriter(rootAvroType, rootCatalystType, Nil)
+      (data: Any) => {
+        writer(fieldUpdater, 0, data)
+        tmpRow.get(0, rootCatalystType)
+      }
+  }
+
+  def deserialize(data: Any): Any = converter(data)
+
+  /**
+   * Creates a writer to write avro values to Catalyst values at the given ordinal with the given
+   * updater.
+   */
+  private def newWriter(
+                         avroType: Schema,
+                         catalystType: DataType,
+                         path: List[String]): (CatalystDataUpdater, Int, Any) => Unit =
+    (avroType.getType, catalystType) match {
+      case (NULL, NullType) => (updater, ordinal, _) =>
+        updater.setNullAt(ordinal)
+
+      // TODO: we can avoid boxing if future version of avro provide primitive accessors.
+      case (BOOLEAN, BooleanType) => (updater, ordinal, value) =>
+        updater.setBoolean(ordinal, value.asInstanceOf[Boolean])
+
+      case (INT, IntegerType) => (updater, ordinal, value) =>
+        updater.setInt(ordinal, value.asInstanceOf[Int])
+
+      case (INT, DateType) => (updater, ordinal, value) =>
+        updater.setInt(ordinal, value.asInstanceOf[Int])
+
+      case (LONG, LongType) => (updater, ordinal, value) =>
+        updater.setLong(ordinal, value.asInstanceOf[Long])
+
+      case (LONG, TimestampType) => avroType.getLogicalType match {
+        case _: TimestampMillis => (updater, ordinal, value) =>
+          updater.setLong(ordinal, value.asInstanceOf[Long] * 1000)
+        case _: TimestampMicros => (updater, ordinal, value) =>
+          updater.setLong(ordinal, value.asInstanceOf[Long])
+        case null => (updater, ordinal, value) =>
+          // For backward compatibility, if the Avro type is Long and it is not logical type,
+          // the value is processed as timestamp type with millisecond precision.
+          updater.setLong(ordinal, value.asInstanceOf[Long] * 1000)
+        case other => throw new IncompatibleSchemaException(
+          s"Cannot convert Avro logical type ${other} to Catalyst Timestamp type.")
+      }
+
+      // Before we upgrade Avro to 1.8 for logical type support, spark-avro converts Long to Date.
+      // For backward compatibility, we still keep this conversion.
+      case (LONG, DateType) => (updater, ordinal, value) =>
+        updater.setInt(ordinal, (value.asInstanceOf[Long] / DateTimeUtils.MILLIS_PER_DAY).toInt)
+
+      case (FLOAT, FloatType) => (updater, ordinal, value) =>
+        updater.setFloat(ordinal, value.asInstanceOf[Float])
+
+      case (DOUBLE, DoubleType) => (updater, ordinal, value) =>
+        updater.setDouble(ordinal, value.asInstanceOf[Double])
+
+      case (STRING, StringType) => (updater, ordinal, value) =>
+        val str = value match {
+          case s: String => UTF8String.fromString(s)
+          case s: Utf8 =>
+            val bytes = new Array[Byte](s.getByteLength)
+            System.arraycopy(s.getBytes, 0, bytes, 0, s.getByteLength)
+            UTF8String.fromBytes(bytes)
+        }
+        updater.set(ordinal, str)
+
+      case (ENUM, StringType) => (updater, ordinal, value) =>
+        updater.set(ordinal, UTF8String.fromString(value.toString))
+
+      case (FIXED, BinaryType) => (updater, ordinal, value) =>
+        updater.set(ordinal, value.asInstanceOf[GenericFixed].bytes().clone())
+
+      case (BYTES, BinaryType) => (updater, ordinal, value) =>
+        val bytes = value match {
+          case b: ByteBuffer =>
+            val bytes = new Array[Byte](b.remaining)
+            b.get(bytes)
+            bytes
+          case b: Array[Byte] => b
+          case other => throw new RuntimeException(s"$other is not a valid avro binary.")
+        }
+        updater.set(ordinal, bytes)
+
+      case (FIXED, d: DecimalType) => (updater, ordinal, value) =>
+        val bigDecimal = decimalConversions.fromFixed(value.asInstanceOf[GenericFixed], avroType,
+          LogicalTypes.decimal(d.precision, d.scale))
+        val decimal = createDecimal(bigDecimal, d.precision, d.scale)
+        updater.setDecimal(ordinal, decimal)
+
+      case (BYTES, d: DecimalType) => (updater, ordinal, value) =>
+        val bigDecimal = decimalConversions.fromBytes(value.asInstanceOf[ByteBuffer], avroType,
+          LogicalTypes.decimal(d.precision, d.scale))
+        val decimal = createDecimal(bigDecimal, d.precision, d.scale)
+        updater.setDecimal(ordinal, decimal)
+
+      case (RECORD, st: StructType) =>
+        val writeRecord = getRecordWriter(avroType, st, path)
+        (updater, ordinal, value) =>
+          val row = new SpecificInternalRow(st)
+          writeRecord(new RowUpdater(row), value.asInstanceOf[GenericRecord])
+          updater.set(ordinal, row)
+
+      case (ARRAY, ArrayType(elementType, containsNull)) =>
+        val elementWriter = newWriter(avroType.getElementType, elementType, path)
+        val elementPath = path :+ "element"
+        (updater, ordinal, value) =>
+          val collection = value.asInstanceOf[java.util.Collection[Any]]
+          val len = collection.size()
+          val result = createArrayData(elementType, len)
+          val elementUpdater = new ArrayDataUpdater(result)
+
+          var i = 0
+          val iter = collection.iterator()
+          while (iter.hasNext) {
+            val element = iter.next()
+            if (element == null) {
+              if (!containsNull) {
+                throw new RuntimeException(
+                  s"Array value at path '${elementPath.mkString(".")}' is not allowed to be null")
+              } else {
+                elementUpdater.setNullAt(i)
+              }
+            } else {
+              elementWriter(elementUpdater, i, element)
+            }
+            i += 1
+          }
+
+          updater.set(ordinal, result)
+
+      case (MAP, MapType(keyType, valueType, valueContainsNull)) if keyType == StringType =>
+        val keyWriter = newWriter(SchemaBuilder.builder().stringType(), StringType, path)
+        val valueWriter = newWriter(avroType.getValueType, valueType, path)
+        (updater, ordinal, value) =>
+          val map = value.asInstanceOf[java.util.Map[AnyRef, AnyRef]]
+          val keyArray = createArrayData(keyType, map.size())
+          val keyUpdater = new ArrayDataUpdater(keyArray)
+          val valueArray = createArrayData(valueType, map.size())
+          val valueUpdater = new ArrayDataUpdater(valueArray)
+          val iter = map.entrySet().iterator()
+          var i = 0
+          while (iter.hasNext) {
+            val entry = iter.next()
+            assert(entry.getKey != null)
+            keyWriter(keyUpdater, i, entry.getKey)
+            if (entry.getValue == null) {
+              if (!valueContainsNull) {
+                throw new RuntimeException(s"Map value at path ${path.mkString(".")} is not " +
+                  "allowed to be null")
+              } else {
+                valueUpdater.setNullAt(i)
+              }
+            } else {
+              valueWriter(valueUpdater, i, entry.getValue)
+            }
+            i += 1
+          }
+
+          updater.set(ordinal, new ArrayBasedMapData(keyArray, valueArray))
+
+      case (UNION, _) =>
+        val allTypes = avroType.getTypes.asScala
+        val nonNullTypes = allTypes.filter(_.getType != NULL)
+        val nonNullAvroType = Schema.createUnion(nonNullTypes.asJava)
+        if (nonNullTypes.nonEmpty) {
+          if (nonNullTypes.length == 1) {
+            newWriter(nonNullTypes.head, catalystType, path)
+          } else {
+            nonNullTypes.map(_.getType) match {
+              case Seq(a, b) if Set(a, b) == Set(INT, LONG) && catalystType == LongType =>
+                (updater, ordinal, value) => value match {
+                  case null => updater.setNullAt(ordinal)
+                  case l: java.lang.Long => updater.setLong(ordinal, l)
+                  case i: java.lang.Integer => updater.setLong(ordinal, i.longValue())
+                }
+
+              case Seq(a, b) if Set(a, b) == Set(FLOAT, DOUBLE) && catalystType == DoubleType =>
+                (updater, ordinal, value) => value match {
+                  case null => updater.setNullAt(ordinal)
+                  case d: java.lang.Double => updater.setDouble(ordinal, d)
+                  case f: java.lang.Float => updater.setDouble(ordinal, f.doubleValue())
+                }
+
+              case _ =>
+                catalystType match {
+                  case st: StructType if st.length == nonNullTypes.size =>
+                    val fieldWriters = nonNullTypes.zip(st.fields).map {
+                      case (schema, field) => newWriter(schema, field.dataType, path :+ field.name)
+                    }.toArray
+                    (updater, ordinal, value) => {
+                      val row = new SpecificInternalRow(st)
+                      val fieldUpdater = new RowUpdater(row)
+                      val i = GenericData.get().resolveUnion(nonNullAvroType, value)
+                      fieldWriters(i)(fieldUpdater, i, value)
+                      updater.set(ordinal, row)
+                    }
+
+                  case _ =>
+                    throw new IncompatibleSchemaException(
+                      s"Cannot convert Avro to catalyst because schema at path " +
+                        s"${path.mkString(".")} is not compatible " +
+                        s"(avroType = $avroType, sqlType = $catalystType).\n" +
+                        s"Source Avro schema: $rootAvroType.\n" +
+                        s"Target Catalyst type: $rootCatalystType")
+                }
+            }
+          }
+        } else {
+          (updater, ordinal, value) => updater.setNullAt(ordinal)
+        }
+
+      case _ =>
+        throw new IncompatibleSchemaException(
+          s"Cannot convert Avro to catalyst because schema at path ${path.mkString(".")} " +
+            s"is not compatible (avroType = $avroType, sqlType = $catalystType).\n" +
+            s"Source Avro schema: $rootAvroType.\n" +
+            s"Target Catalyst type: $rootCatalystType")
+    }
+
+  // TODO: move the following method in Decimal object on creating Decimal from BigDecimal?
+  private def createDecimal(decimal: BigDecimal, precision: Int, scale: Int): Decimal = {
+    if (precision <= Decimal.MAX_LONG_DIGITS) {
+      // Constructs a `Decimal` with an unscaled `Long` value if possible.
+      Decimal(decimal.unscaledValue().longValue(), precision, scale)
+    } else {
+      // Otherwise, resorts to an unscaled `BigInteger` instead.
+      Decimal(decimal, precision, scale)
+    }
+  }
+
+  private def getRecordWriter(
+                               avroType: Schema,
+                               sqlType: StructType,
+                               path: List[String]): (CatalystDataUpdater, GenericRecord) => Unit = {
+    val validFieldIndexes = ArrayBuffer.empty[Int]
+    val fieldWriters = ArrayBuffer.empty[(CatalystDataUpdater, Any) => Unit]
+
+    val length = sqlType.length
+    var i = 0
+    while (i < length) {
+      val sqlField = sqlType.fields(i)
+      val avroField = avroType.getField(sqlField.name)
+      if (avroField != null) {
+        validFieldIndexes += avroField.pos()
+
+        val baseWriter = newWriter(avroField.schema(), sqlField.dataType, path :+ sqlField.name)
+        val ordinal = i
+        val fieldWriter = (fieldUpdater: CatalystDataUpdater, value: Any) => {
+          if (value == null) {
+            fieldUpdater.setNullAt(ordinal)
+          } else {
+            baseWriter(fieldUpdater, ordinal, value)
+          }
+        }
+        fieldWriters += fieldWriter
+      } else if (!sqlField.nullable) {
+        throw new IncompatibleSchemaException(
+          s"""
+             |Cannot find non-nullable field ${path.mkString(".")}.${sqlField.name} in Avro schema.
+             |Source Avro schema: $rootAvroType.
+             |Target Catalyst type: $rootCatalystType.
+           """.stripMargin)
+      }
+      i += 1
+    }
+
+    (fieldUpdater, record) => {
+      var i = 0
+      while (i < validFieldIndexes.length) {
+        fieldWriters(i)(fieldUpdater, record.get(validFieldIndexes(i)))
+        i += 1
+      }
+    }
+  }
+
+  private def createArrayData(elementType: DataType, length: Int): ArrayData = elementType match {
+    case BooleanType => UnsafeArrayData.fromPrimitiveArray(new Array[Boolean](length))
+    case ByteType => UnsafeArrayData.fromPrimitiveArray(new Array[Byte](length))
+    case ShortType => UnsafeArrayData.fromPrimitiveArray(new Array[Short](length))
+    case IntegerType => UnsafeArrayData.fromPrimitiveArray(new Array[Int](length))
+    case LongType => UnsafeArrayData.fromPrimitiveArray(new Array[Long](length))
+    case FloatType => UnsafeArrayData.fromPrimitiveArray(new Array[Float](length))
+    case DoubleType => UnsafeArrayData.fromPrimitiveArray(new Array[Double](length))
+    case _ => new GenericArrayData(new Array[Any](length))
+  }
+
+  /**
+   * A base interface for updating values inside catalyst data structure like `InternalRow` and
+   * `ArrayData`.
+   */
+  sealed trait CatalystDataUpdater {
+    def set(ordinal: Int, value: Any): Unit
+
+    def setNullAt(ordinal: Int): Unit = set(ordinal, null)
+    def setBoolean(ordinal: Int, value: Boolean): Unit = set(ordinal, value)
+    def setByte(ordinal: Int, value: Byte): Unit = set(ordinal, value)
+    def setShort(ordinal: Int, value: Short): Unit = set(ordinal, value)
+    def setInt(ordinal: Int, value: Int): Unit = set(ordinal, value)
+    def setLong(ordinal: Int, value: Long): Unit = set(ordinal, value)
+    def setDouble(ordinal: Int, value: Double): Unit = set(ordinal, value)
+    def setFloat(ordinal: Int, value: Float): Unit = set(ordinal, value)
+    def setDecimal(ordinal: Int, value: Decimal): Unit = set(ordinal, value)
+  }
+
+  final class RowUpdater(row: InternalRow) extends CatalystDataUpdater {
+    override def set(ordinal: Int, value: Any): Unit = row.update(ordinal, value)
+
+    override def setNullAt(ordinal: Int): Unit = row.setNullAt(ordinal)
+    override def setBoolean(ordinal: Int, value: Boolean): Unit = row.setBoolean(ordinal, value)
+    override def setByte(ordinal: Int, value: Byte): Unit = row.setByte(ordinal, value)
+    override def setShort(ordinal: Int, value: Short): Unit = row.setShort(ordinal, value)
+    override def setInt(ordinal: Int, value: Int): Unit = row.setInt(ordinal, value)
+    override def setLong(ordinal: Int, value: Long): Unit = row.setLong(ordinal, value)
+    override def setDouble(ordinal: Int, value: Double): Unit = row.setDouble(ordinal, value)
+    override def setFloat(ordinal: Int, value: Float): Unit = row.setFloat(ordinal, value)
+    override def setDecimal(ordinal: Int, value: Decimal): Unit =
+      row.setDecimal(ordinal, value, value.precision)
+  }
+
+  final class ArrayDataUpdater(array: ArrayData) extends CatalystDataUpdater {
+    override def set(ordinal: Int, value: Any): Unit = array.update(ordinal, value)
+
+    override def setNullAt(ordinal: Int): Unit = array.setNullAt(ordinal)
+    override def setBoolean(ordinal: Int, value: Boolean): Unit = array.setBoolean(ordinal, value)
+    override def setByte(ordinal: Int, value: Byte): Unit = array.setByte(ordinal, value)
+    override def setShort(ordinal: Int, value: Short): Unit = array.setShort(ordinal, value)
+    override def setInt(ordinal: Int, value: Int): Unit = array.setInt(ordinal, value)
+    override def setLong(ordinal: Int, value: Long): Unit = array.setLong(ordinal, value)
+    override def setDouble(ordinal: Int, value: Double): Unit = array.setDouble(ordinal, value)
+    override def setFloat(ordinal: Int, value: Float): Unit = array.setFloat(ordinal, value)
+    override def setDecimal(ordinal: Int, value: Decimal): Unit = array.update(ordinal, value)
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/avro/Spark2HoodieAvroDeserializer.scala b/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/avro/Spark2HoodieAvroDeserializer.scala
new file mode 100644
index 0000000000000..ac2c82f70dacf
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/avro/Spark2HoodieAvroDeserializer.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.avro
+
+import org.apache.avro.Schema
+import org.apache.spark.sql.types.DataType
+
+/**
+ * This is Spark 2 implementation for the [[HoodieAvroDeserializerTrait]] leveraging [[PatchedAvroDeserializer]],
+ * which is just copied over version of [[AvroDeserializer]] from Spark 2.4.4 w/ SPARK-30267 being back-ported to it
+ */
+class Spark2HoodieAvroDeserializer(rootAvroType: Schema, rootCatalystType: DataType)
+  extends HoodieAvroDeserializerTrait {
+
+  private val avroDeserializer = new PatchedAvroDeserializer(rootAvroType, rootCatalystType)
+
+  def doDeserialize(data: Any): Any = avroDeserializer.deserialize(data)
+}
diff --git a/hudi-spark-datasource/hudi-spark3-common/pom.xml b/hudi-spark-datasource/hudi-spark3-common/pom.xml
index affa987372963..30e7bda2e2eb9 100644
--- a/hudi-spark-datasource/hudi-spark3-common/pom.xml
+++ b/hudi-spark-datasource/hudi-spark3-common/pom.xml
@@ -168,6 +168,15 @@
             <groupId>org.apache.spark</groupId>
             <artifactId>spark-sql_2.12</artifactId>
             <version>${spark3.version}</version>
+            <scope>provided</scope>
+            <optional>true</optional>
+        </dependency>
+
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-avro_2.12</artifactId>
+            <version>${spark3.version}</version>
+            <scope>provided</scope>
             <optional>true</optional>
         </dependency>
 
@@ -244,4 +253,4 @@
         </dependency>
     </dependencies>
 
-</project>
\ No newline at end of file
+</project>
diff --git a/hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/HoodieSpark3SqlUtils.scala b/hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/HoodieSpark3SqlUtils.scala
new file mode 100644
index 0000000000000..c4c6fd682df5f
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/HoodieSpark3SqlUtils.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.hudi.exception.HoodieException
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
+import org.apache.spark.sql.connector.expressions.{BucketTransform, FieldReference, IdentityTransform, Transform}
+
+import scala.collection.mutable
+
+object HoodieSpark3SqlUtils {
+  def convertTransforms(partitions: Seq[Transform]): (Seq[String], Option[BucketSpec]) = {
+    val identityCols = new mutable.ArrayBuffer[String]
+    var bucketSpec = Option.empty[BucketSpec]
+
+    partitions.map {
+      case IdentityTransform(FieldReference(Seq(col))) =>
+        identityCols += col
+
+
+      case BucketTransform(numBuckets, FieldReference(Seq(col))) =>
+        bucketSpec = Some(BucketSpec(numBuckets, col :: Nil, Nil))
+
+      case _ =>
+        throw new HoodieException(s"Partitioning by expressions is not supported.")
+    }
+
+    (identityCols, bucketSpec)
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/adapter/Spark3Adapter.scala b/hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/adapter/Spark3Adapter.scala
index 61fcc9634f3f3..8f073bb1cdaaf 100644
--- a/hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/adapter/Spark3Adapter.scala
+++ b/hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/adapter/Spark3Adapter.scala
@@ -17,10 +17,11 @@
 
 package org.apache.spark.sql.adapter
 
+import org.apache.avro.Schema
 import org.apache.hudi.Spark3RowSerDe
 import org.apache.hudi.client.utils.SparkRowSerDe
 import org.apache.hudi.spark3.internal.ReflectUtil
-import org.apache.spark.sql.{Row, SparkSession}
+import org.apache.spark.sql.avro.{HoodieAvroDeserializerTrait, HoodieAvroSerializerTrait, Spark3HoodieAvroDeserializer, HoodieAvroSerializer}
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.{Expression, Like}
@@ -30,16 +31,24 @@ import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoStatement, Join, J
 import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier}
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 import org.apache.spark.sql.connector.catalog.Table
+import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
-import org.apache.spark.sql.execution.datasources.{LogicalRelation, Spark3ParsePartitionUtil, SparkParsePartitionUtil}
 import org.apache.spark.sql.hudi.SparkAdapter
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.{Row, SparkSession}
 
 /**
  * The adapter for spark3.
  */
 class Spark3Adapter extends SparkAdapter {
 
+  def createAvroSerializer(rootCatalystType: DataType, rootAvroType: Schema, nullable: Boolean): HoodieAvroSerializerTrait =
+    new HoodieAvroSerializer(rootCatalystType, rootAvroType, nullable)
+
+  def createAvroDeserializer(rootAvroType: Schema, rootCatalystType: DataType): HoodieAvroDeserializerTrait =
+    new Spark3HoodieAvroDeserializer(rootAvroType, rootCatalystType)
+
   override def createSparkRowSerDe(encoder: ExpressionEncoder[Row]): SparkRowSerDe = {
     new Spark3RowSerDe(encoder)
   }
@@ -94,4 +103,24 @@ class Spark3Adapter extends SparkAdapter {
   override def parseMultipartIdentifier(parser: ParserInterface, sqlText: String): Seq[String] = {
     parser.parseMultipartIdentifier(sqlText)
   }
+
+  /**
+   * Combine [[PartitionedFile]] to [[FilePartition]] according to `maxSplitBytes`.
+   */
+  override def getFilePartitions(
+      sparkSession: SparkSession,
+      partitionedFiles: Seq[PartitionedFile],
+      maxSplitBytes: Long): Seq[FilePartition] = {
+    FilePartition.getFilePartitions(sparkSession, partitionedFiles, maxSplitBytes)
+  }
+
+  override def isHoodieTable(table: LogicalPlan, spark: SparkSession): Boolean = {
+    tripAlias(table) match {
+      case LogicalRelation(_, _, Some(tbl), _) => isHoodieTable(tbl)
+      case relation: UnresolvedRelation =>
+        isHoodieTable(toTableIdentifier(relation), spark)
+      case DataSourceV2Relation(table: Table, _, _, _, _) => isHoodieTable(table.properties())
+      case _=> false
+    }
+  }
 }
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/avro/HoodieAvroDeserializer.scala b/hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/avro/Spark3HoodieAvroDeserializer.scala
similarity index 67%
rename from hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/avro/HoodieAvroDeserializer.scala
rename to hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/avro/Spark3HoodieAvroDeserializer.scala
index 1678dc05da4f8..fa03f5d841cfb 100644
--- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/avro/HoodieAvroDeserializer.scala
+++ b/hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/avro/Spark3HoodieAvroDeserializer.scala
@@ -18,20 +18,15 @@
 package org.apache.spark.sql.avro
 
 import org.apache.avro.Schema
-
 import org.apache.hudi.HoodieSparkUtils
-
 import org.apache.spark.sql.types.DataType
 
-/**
- * This is to be compatible with the type returned by Spark 3.1
- * and other spark versions for AvroDeserializer
- */
-case class HoodieAvroDeserializer(rootAvroType: Schema, rootCatalystType: DataType) {
+class Spark3HoodieAvroDeserializer(rootAvroType: Schema, rootCatalystType: DataType)
+  extends HoodieAvroDeserializerTrait {
 
+  // SPARK-34404: As of Spark3.2, there is no AvroDeserializer's constructor with Schema and DataType arguments.
+  // So use the reflection to get AvroDeserializer instance.
   private val avroDeserializer = if (HoodieSparkUtils.isSpark3_2) {
-    // SPARK-34404: As of Spark3.2, there is no AvroDeserializer's constructor with Schema and DataType arguments.
-    // So use the reflection to get AvroDeserializer instance.
     val constructor = classOf[AvroDeserializer].getConstructor(classOf[Schema], classOf[DataType], classOf[String])
     constructor.newInstance(rootAvroType, rootCatalystType, "EXCEPTION")
   } else {
@@ -39,10 +34,5 @@ case class HoodieAvroDeserializer(rootAvroType: Schema, rootCatalystType: DataTy
     constructor.newInstance(rootAvroType, rootCatalystType)
   }
 
-  def deserializeData(data: Any): Any = {
-    avroDeserializer.deserialize(data) match {
-      case Some(r) => r // As of spark 3.1, this will return data wrapped with Option, so we fetch the data.
-      case o => o // for other spark version, return the data directly.
-    }
-  }
+  def doDeserialize(data: Any): Any = avroDeserializer.deserialize(data)
 }
diff --git a/hudi-spark-datasource/hudi-spark3/pom.xml b/hudi-spark-datasource/hudi-spark3/pom.xml
index d8dba8384886c..722a1b4101241 100644
--- a/hudi-spark-datasource/hudi-spark3/pom.xml
+++ b/hudi-spark-datasource/hudi-spark3/pom.xml
@@ -158,6 +158,7 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-sql_2.12</artifactId>
       <version>${spark3.version}</version>
+      <scope>provided</scope>
       <optional>true</optional>
     </dependency>
 
diff --git a/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/hudi/Spark3DefaultSource.scala b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/hudi/Spark3DefaultSource.scala
index b553790878e42..d94fee1f410ae 100644
--- a/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/hudi/Spark3DefaultSource.scala
+++ b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/hudi/Spark3DefaultSource.scala
@@ -17,8 +17,30 @@
 
 package org.apache.hudi
 
+import org.apache.hudi.exception.HoodieException
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connector.catalog.{Table, TableProvider}
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.hudi.catalog.HoodieInternalV2Table
 import org.apache.spark.sql.sources.DataSourceRegister
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+class Spark3DefaultSource extends DefaultSource with DataSourceRegister with TableProvider {
 
-class Spark3DefaultSource extends DefaultSource with DataSourceRegister {
   override def shortName(): String = "hudi"
+
+  def inferSchema: StructType = new StructType()
+
+  override def inferSchema(options: CaseInsensitiveStringMap): StructType = inferSchema
+
+  override def getTable(schema: StructType,
+                        partitioning: Array[Transform],
+                        properties: java.util.Map[String, String]): Table = {
+    val options = new CaseInsensitiveStringMap(properties)
+    val path = options.get("path")
+    if (path == null) throw new HoodieException("'path' cannot be null, missing 'path' from table properties")
+
+    HoodieInternalV2Table(SparkSession.active, path)
+  }
 }
diff --git a/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/connector/catalog/HoodieIdentifier.scala b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/connector/catalog/HoodieIdentifier.scala
new file mode 100644
index 0000000000000..2649c56e5a8a4
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/connector/catalog/HoodieIdentifier.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog
+
+import java.util
+import java.util.Objects
+
+/**
+ * This class is to make scala-2.11 compilable.
+ * Using Identifier.of(namespace, name) to get a IdentifierImpl will throw
+ * compile exception( Static methods in interface require -target:jvm-1.8)
+ */
+case class HoodieIdentifier(namespace: Array[String], name: String) extends Identifier {
+
+  override def equals(o: Any): Boolean = {
+    o match {
+      case that: HoodieIdentifier => util.Arrays.equals(namespace.asInstanceOf[Array[Object]],
+        that.namespace.asInstanceOf[Array[Object]]) && name == that.name
+      case _ => false
+    }
+  }
+
+  override def hashCode: Int = {
+    val nh = namespace.toSeq.hashCode().asInstanceOf[Object]
+    Objects.hash(nh, name)
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark3Analysis.scala b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark3Analysis.scala
new file mode 100644
index 0000000000000..e20f934592e45
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieSpark3Analysis.scala
@@ -0,0 +1,206 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hudi.analysis
+
+import org.apache.hudi.common.table.HoodieTableMetaClient
+import org.apache.hudi.{DefaultSource, SparkAdapterSupport}
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.analysis.{ResolvedTable, UnresolvedPartitionSpec}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute}
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper
+import org.apache.spark.sql.execution.datasources.LogicalRelation
+import org.apache.spark.sql.execution.datasources.PreWriteCheck.failAnalysis
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, V2SessionCatalog}
+import org.apache.spark.sql.hudi.{HoodieSqlCommonUtils, ProvidesHoodieConfig}
+import org.apache.spark.sql.hudi.HoodieSqlCommonUtils.{castIfNeeded, getTableLocation, removeMetaFields, tableExistsInPath}
+import org.apache.spark.sql.hudi.catalog.{HoodieCatalog, HoodieInternalV2Table}
+import org.apache.spark.sql.hudi.command.{AlterHoodieTableDropPartitionCommand, ShowHoodieTablePartitionsCommand, TruncateHoodieTableCommand}
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.{AnalysisException, SQLContext, SparkSession}
+
+import scala.collection.JavaConverters.mapAsJavaMapConverter
+
+/**
+ * Rule for convert the logical plan to command.
+ * @param sparkSession
+ */
+case class HoodieSpark3Analysis(sparkSession: SparkSession) extends Rule[LogicalPlan]
+  with SparkAdapterSupport with ProvidesHoodieConfig {
+
+  override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsDown {
+    case dsv2 @ DataSourceV2Relation(d: HoodieInternalV2Table, _, _, _, _) =>
+      val output = dsv2.output
+      val catalogTable = if (d.catalogTable.isDefined) {
+        Some(d.v1Table)
+      } else {
+        None
+      }
+      val relation = new DefaultSource().createRelation(new SQLContext(sparkSession),
+        buildHoodieConfig(d.hoodieCatalogTable))
+      LogicalRelation(relation, output, catalogTable, isStreaming = false)
+    case a @ InsertIntoStatement(r: DataSourceV2Relation, partitionSpec, _, _, _, _) if a.query.resolved &&
+      r.table.isInstanceOf[HoodieInternalV2Table] &&
+      needsSchemaAdjustment(a.query, r.table.asInstanceOf[HoodieInternalV2Table], partitionSpec, r.schema) =>
+      val projection = resolveQueryColumnsByOrdinal(a.query, r.output)
+      if (projection != a.query) {
+        a.copy(query = projection)
+      } else {
+        a
+      }
+  }
+
+  /**
+   * Need to adjust schema based on the query and relation schema, for example,
+   * if using insert into xx select 1, 2 here need to map to column names
+   * @param query
+   * @param hoodieTable
+   * @param partitionSpec
+   * @param schema
+   * @return
+   */
+  private def needsSchemaAdjustment(query: LogicalPlan,
+                                    hoodieTable: HoodieInternalV2Table,
+                                    partitionSpec: Map[String, Option[String]],
+                                    schema: StructType): Boolean = {
+    val output = query.output
+    val queryOutputWithoutMetaFields = removeMetaFields(output)
+    val partitionFields = hoodieTable.hoodieCatalogTable.partitionFields
+    val partitionSchema = hoodieTable.hoodieCatalogTable.partitionSchema
+    val staticPartitionValues = partitionSpec.filter(p => p._2.isDefined).mapValues(_.get)
+
+    assert(staticPartitionValues.isEmpty ||
+      staticPartitionValues.size == partitionSchema.size,
+      s"Required partition columns is: ${partitionSchema.json}, Current static partitions " +
+        s"is: ${staticPartitionValues.mkString("," + "")}")
+
+    assert(staticPartitionValues.size + queryOutputWithoutMetaFields.size
+      == hoodieTable.hoodieCatalogTable.tableSchemaWithoutMetaFields.size,
+      s"Required select columns count: ${hoodieTable.hoodieCatalogTable.tableSchemaWithoutMetaFields.size}, " +
+        s"Current select columns(including static partition column) count: " +
+        s"${staticPartitionValues.size + queryOutputWithoutMetaFields.size}，columns: " +
+        s"(${(queryOutputWithoutMetaFields.map(_.name) ++ staticPartitionValues.keys).mkString(",")})")
+
+    // static partition insert.
+    if (staticPartitionValues.nonEmpty) {
+      // drop partition fields in origin schema to align fields.
+      schema.dropWhile(p => partitionFields.contains(p.name))
+    }
+
+    val existingSchemaOutput = output.take(schema.length)
+    existingSchemaOutput.map(_.name) != schema.map(_.name) ||
+      existingSchemaOutput.map(_.dataType) != schema.map(_.dataType)
+  }
+
+  private def resolveQueryColumnsByOrdinal(query: LogicalPlan,
+                                           targetAttrs: Seq[Attribute]): LogicalPlan = {
+    // always add a Cast. it will be removed in the optimizer if it is unnecessary.
+    val project = query.output.zipWithIndex.map { case (attr, i) =>
+      if (i < targetAttrs.length) {
+        val targetAttr = targetAttrs(i)
+        val castAttr = castIfNeeded(attr.withNullability(targetAttr.nullable), targetAttr.dataType, conf)
+        Alias(castAttr, targetAttr.name)()
+      } else {
+        attr
+      }
+    }
+    Project(project, query)
+  }
+}
+
+/**
+ * Rule for resolve hoodie's extended syntax or rewrite some logical plan.
+ * @param sparkSession
+ */
+case class HoodieSpark3ResolveReferences(sparkSession: SparkSession) extends Rule[LogicalPlan]
+  with SparkAdapterSupport with ProvidesHoodieConfig {
+
+  def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperatorsUp {
+    // Fill schema for Create Table without specify schema info
+    case c @ CreateV2Table(tableCatalog, tableName, schema, partitioning, properties, _)
+      if sparkAdapter.isHoodieTable(properties.asJava) =>
+
+      if (schema.isEmpty && partitioning.nonEmpty) {
+        failAnalysis("It is not allowed to specify partition columns when the table schema is " +
+          "not defined. When the table schema is not provided, schema and partition columns " +
+          "will be inferred.")
+      }
+      val hoodieCatalog = tableCatalog match {
+        case catalog: HoodieCatalog => catalog
+        case _ => tableCatalog.asInstanceOf[V2SessionCatalog]
+      }
+      val tablePath = getTableLocation(properties,
+        TableIdentifier(tableName.name(), tableName.namespace().lastOption), sparkSession)
+
+      val tableExistInCatalog = hoodieCatalog.tableExists(tableName)
+      // Only when the table has not exist in catalog, we need to fill the schema info for creating table.
+      if (!tableExistInCatalog && tableExistsInPath(tablePath, sparkSession.sessionState.newHadoopConf())) {
+        val metaClient = HoodieTableMetaClient.builder()
+          .setBasePath(tablePath)
+          .setConf(sparkSession.sessionState.newHadoopConf())
+          .build()
+        val tableSchema = HoodieSqlCommonUtils.getTableSqlSchema(metaClient)
+        if (tableSchema.isDefined && schema.isEmpty) {
+          // Fill the schema with the schema from the table
+          c.copy(tableSchema = tableSchema.get)
+        } else if (tableSchema.isDefined && schema != tableSchema.get) {
+          throw new AnalysisException(s"Specified schema in create table statement is not equal to the table schema." +
+            s"You should not specify the schema for an exist table: $tableName ")
+        } else {
+          c
+        }
+      } else {
+        c
+      }
+    case p => p
+  }
+}
+
+/**
+ * Rule for rewrite some spark commands to hudi's implementation.
+ * @param sparkSession
+ */
+case class HoodieSpark3PostAnalysisRule(sparkSession: SparkSession) extends Rule[LogicalPlan] {
+  override def apply(plan: LogicalPlan): LogicalPlan = {
+    plan match {
+      case ShowPartitions(child, specOpt, _)
+        if child.isInstanceOf[ResolvedTable] &&
+          child.asInstanceOf[ResolvedTable].table.isInstanceOf[HoodieInternalV2Table] =>
+        ShowHoodieTablePartitionsCommand(child.asInstanceOf[ResolvedTable].identifier.asTableIdentifier, specOpt.map(s => s.asInstanceOf[UnresolvedPartitionSpec].spec))
+
+      // Rewrite TruncateTableCommand to TruncateHoodieTableCommand
+      case TruncateTable(child)
+        if child.isInstanceOf[ResolvedTable] &&
+          child.asInstanceOf[ResolvedTable].table.isInstanceOf[HoodieInternalV2Table] =>
+        new TruncateHoodieTableCommand(child.asInstanceOf[ResolvedTable].identifier.asTableIdentifier, None)
+
+      case DropPartitions(child, specs, ifExists, purge)
+        if child.resolved && child.isInstanceOf[ResolvedTable] && child.asInstanceOf[ResolvedTable].table.isInstanceOf[HoodieInternalV2Table] =>
+        AlterHoodieTableDropPartitionCommand(
+          child.asInstanceOf[ResolvedTable].identifier.asTableIdentifier,
+          specs.seq.map(f => f.asInstanceOf[UnresolvedPartitionSpec]).map(s => s.spec),
+          ifExists,
+          purge,
+          retainData = true
+        )
+
+      case _ => plan
+    }
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/catalog/BasicStagedTable.scala b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/catalog/BasicStagedTable.scala
new file mode 100644
index 0000000000000..67d9e1ebb2bf8
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/catalog/BasicStagedTable.scala
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hudi.catalog
+
+import org.apache.hudi.exception.HoodieException
+import org.apache.spark.sql.connector.catalog._
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder}
+import org.apache.spark.sql.types.StructType
+
+import java.util
+
+/**
+ * Basic implementation that represents a table which is staged for being committed.
+ * @param ident table ident
+ * @param table table
+ * @param catalog table catalog
+ */
+case class BasicStagedTable(ident: Identifier,
+                            table: Table,
+                            catalog: TableCatalog) extends SupportsWrite with StagedTable {
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
+    info match {
+      case supportsWrite: SupportsWrite => supportsWrite.newWriteBuilder(info)
+      case _ => throw new HoodieException(s"Table `${ident.name}` does not support writes.")
+    }
+  }
+
+  override def abortStagedChanges(): Unit = catalog.dropTable(ident)
+
+  override def commitStagedChanges(): Unit = {}
+
+  override def name(): String = ident.name()
+
+  override def schema(): StructType = table.schema()
+
+  override def partitioning(): Array[Transform] = table.partitioning()
+
+  override def capabilities(): util.Set[TableCapability] = table.capabilities()
+
+  override def properties(): util.Map[String, String] = table.properties()
+}
diff --git a/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/catalog/HoodieCatalog.scala b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/catalog/HoodieCatalog.scala
new file mode 100644
index 0000000000000..3046af991404b
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/catalog/HoodieCatalog.scala
@@ -0,0 +1,303 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hudi.catalog
+
+import org.apache.hadoop.fs.Path
+import org.apache.hudi.{DataSourceWriteOptions, SparkAdapterSupport}
+import org.apache.hudi.client.common.HoodieSparkEngineContext
+import org.apache.hudi.common.fs.FSUtils
+import org.apache.hudi.exception.HoodieException
+import org.apache.hudi.hive.util.ConfigUtils
+import org.apache.hudi.sql.InsertMode
+import org.apache.spark.sql.HoodieSpark3SqlUtils.convertTransforms
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, TableAlreadyExistsException, UnresolvedAttribute}
+import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType, CatalogUtils, HoodieCatalogTable}
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper
+import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, ColumnChange, UpdateColumnComment, UpdateColumnType}
+import org.apache.spark.sql.connector.catalog._
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.execution.datasources.DataSource
+import org.apache.spark.sql.hudi.command.{AlterHoodieTableAddColumnsCommand, AlterHoodieTableChangeColumnCommand, AlterHoodieTableRenameCommand, CreateHoodieTableCommand}
+import org.apache.spark.sql.hudi.{HoodieSqlCommonUtils, ProvidesHoodieConfig}
+import org.apache.spark.sql.types.{StructField, StructType}
+import org.apache.spark.sql.{Dataset, SaveMode, SparkSession, _}
+
+import java.util
+import scala.collection.JavaConverters.{mapAsJavaMapConverter, mapAsScalaMapConverter}
+
+class HoodieCatalog extends DelegatingCatalogExtension
+  with StagingTableCatalog
+  with SparkAdapterSupport
+  with ProvidesHoodieConfig {
+
+  val spark: SparkSession = SparkSession.active
+
+  override def stageCreate(ident: Identifier, schema: StructType, partitions: Array[Transform], properties: util.Map[String, String]): StagedTable = {
+    if (sparkAdapter.isHoodieTable(properties)) {
+      HoodieStagedTable(ident, this, schema, partitions, properties, TableCreationMode.STAGE_CREATE)
+    } else {
+      BasicStagedTable(
+        ident,
+        super.createTable(ident, schema, partitions, properties),
+        this)
+    }
+  }
+
+  override def stageReplace(ident: Identifier, schema: StructType, partitions: Array[Transform], properties: util.Map[String, String]): StagedTable = {
+    if (sparkAdapter.isHoodieTable(properties)) {
+      HoodieStagedTable(ident, this, schema, partitions, properties, TableCreationMode.STAGE_REPLACE)
+    } else {
+      super.dropTable(ident)
+      BasicStagedTable(
+        ident,
+        super.createTable(ident, schema, partitions, properties),
+        this)
+    }
+  }
+
+  override def stageCreateOrReplace(ident: Identifier,
+                                    schema: StructType,
+                                    partitions: Array[Transform],
+                                    properties: util.Map[String, String]): StagedTable = {
+    if (sparkAdapter.isHoodieTable(properties)) {
+      HoodieStagedTable(
+        ident, this, schema, partitions, properties, TableCreationMode.CREATE_OR_REPLACE)
+    } else {
+      try super.dropTable(ident) catch {
+        case _: NoSuchTableException => // ignore the exception
+      }
+      BasicStagedTable(
+        ident,
+        super.createTable(ident, schema, partitions, properties),
+        this)
+    }
+  }
+
+  override def loadTable(ident: Identifier): Table = {
+    try {
+      super.loadTable(ident) match {
+        case v1: V1Table if sparkAdapter.isHoodieTable(v1.catalogTable) =>
+          HoodieInternalV2Table(
+            spark,
+            v1.catalogTable.location.toString,
+            catalogTable = Some(v1.catalogTable),
+            tableIdentifier = Some(ident.toString))
+        case o => o
+      }
+    } catch {
+      case e: Exception =>
+        throw e
+    }
+  }
+
+  override def createTable(ident: Identifier,
+                           schema: StructType,
+                           partitions: Array[Transform],
+                           properties: util.Map[String, String]): Table = {
+    createHoodieTable(ident, schema, partitions, properties, Map.empty, Option.empty, TableCreationMode.CREATE)
+  }
+
+  override def tableExists(ident: Identifier): Boolean = super.tableExists(ident)
+
+  override def dropTable(ident: Identifier): Boolean = super.dropTable(ident)
+
+  override def purgeTable(ident: Identifier): Boolean = {
+    val table = loadTable(ident)
+    table match {
+      case hoodieTable: HoodieInternalV2Table =>
+        val location = hoodieTable.hoodieCatalogTable.tableLocation
+        val targetPath = new Path(location)
+        val engineContext = new HoodieSparkEngineContext(spark.sparkContext)
+        val fs = FSUtils.getFs(location, spark.sparkContext.hadoopConfiguration)
+        FSUtils.deleteDir(engineContext, fs, targetPath, spark.sparkContext.defaultParallelism)
+        super.dropTable(ident)
+      case _ =>
+    }
+    true
+  }
+
+  @throws[NoSuchTableException]
+  @throws[TableAlreadyExistsException]
+  override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit = {
+    loadTable(oldIdent) match {
+      case _: HoodieInternalV2Table =>
+        new AlterHoodieTableRenameCommand(oldIdent.asTableIdentifier, newIdent.asTableIdentifier, false).run(spark)
+      case _ => super.renameTable(oldIdent, newIdent)
+    }
+  }
+
+  override def alterTable(ident: Identifier, changes: TableChange*): Table = {
+    val tableIdent = TableIdentifier(ident.name(), ident.namespace().lastOption)
+    // scalastyle:off
+    val table = loadTable(ident) match {
+      case hoodieTable: HoodieInternalV2Table => hoodieTable
+      case _ => return super.alterTable(ident, changes: _*)
+    }
+    // scalastyle:on
+
+    val grouped = changes.groupBy(c => c.getClass)
+
+    grouped.foreach {
+      case (t, newColumns) if t == classOf[AddColumn] =>
+        AlterHoodieTableAddColumnsCommand(
+          tableIdent,
+          newColumns.asInstanceOf[Seq[AddColumn]].map { col =>
+            StructField(
+              col.fieldNames()(0),
+              col.dataType(),
+              col.isNullable)
+          }).run(spark)
+      case (t, columnChanges) if classOf[ColumnChange].isAssignableFrom(t) =>
+        columnChanges.foreach {
+          case dataType: UpdateColumnType =>
+            val colName = UnresolvedAttribute(dataType.fieldNames()).name
+            val newDataType = dataType.newDataType()
+            val structField = StructField(colName, newDataType)
+            AlterHoodieTableChangeColumnCommand(tableIdent, colName, structField).run(spark)
+          case dataType: UpdateColumnComment =>
+            val newComment = dataType.newComment()
+            val colName = UnresolvedAttribute(dataType.fieldNames()).name
+            val fieldOpt = table.schema().findNestedField(dataType.fieldNames(), includeCollections = true,
+              spark.sessionState.conf.resolver).map(_._2)
+            val field = fieldOpt.getOrElse {
+              throw new AnalysisException(
+                s"Couldn't find column $colName in:\n${table.schema().treeString}")
+            }
+            AlterHoodieTableChangeColumnCommand(tableIdent, colName, field.withComment(newComment)).run(spark)
+        }
+      case (t, _) =>
+        throw new UnsupportedOperationException(s"not supported table change: ${t.getClass}")
+    }
+
+    loadTable(ident)
+  }
+
+  def createHoodieTable(ident: Identifier,
+                        schema: StructType,
+                        partitions: Array[Transform],
+                        allTableProperties: util.Map[String, String],
+                        writeOptions: Map[String, String],
+                        sourceQuery: Option[DataFrame],
+                        operation: TableCreationMode): Table = {
+
+    val (partitionColumns, maybeBucketSpec) = convertTransforms(partitions)
+    val newSchema = schema
+    val newPartitionColumns = partitionColumns
+    val newBucketSpec = maybeBucketSpec
+
+    val isByPath = isPathIdentifier(ident)
+
+    val location = if (isByPath) Option(ident.name()) else Option(allTableProperties.get("location"))
+    val id = ident.asTableIdentifier
+
+    val locUriOpt = location.map(CatalogUtils.stringToURI)
+    val existingTableOpt = getExistingTableIfExists(id)
+    val loc = locUriOpt
+      .orElse(existingTableOpt.flatMap(_.storage.locationUri))
+      .getOrElse(spark.sessionState.catalog.defaultTablePath(id))
+    val storage = DataSource.buildStorageFormatFromOptions(writeOptions)
+      .copy(locationUri = Option(loc))
+    val tableType =
+      if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED
+    val commentOpt = Option(allTableProperties.get("comment"))
+
+    val tablePropertiesNew = new util.HashMap[String, String](allTableProperties)
+    // put path to table properties.
+    tablePropertiesNew.put("path", loc.getPath)
+
+    val tableDesc = new CatalogTable(
+      identifier = id,
+      tableType = tableType,
+      storage = storage,
+      schema = newSchema,
+      provider = Option("hudi"),
+      partitionColumnNames = newPartitionColumns,
+      bucketSpec = newBucketSpec,
+      properties = tablePropertiesNew.asScala.toMap,
+      comment = commentOpt)
+
+    val hoodieCatalogTable = HoodieCatalogTable(spark, tableDesc)
+
+    if (operation == TableCreationMode.STAGE_CREATE) {
+      val tablePath = hoodieCatalogTable.tableLocation
+      val hadoopConf = spark.sessionState.newHadoopConf()
+      assert(HoodieSqlCommonUtils.isEmptyPath(tablePath, hadoopConf),
+        s"Path '$tablePath' should be empty for CTAS")
+      hoodieCatalogTable.initHoodieTable()
+
+      val tblProperties = hoodieCatalogTable.catalogProperties
+      val options = Map(
+        DataSourceWriteOptions.HIVE_CREATE_MANAGED_TABLE.key -> (tableDesc.tableType == CatalogTableType.MANAGED).toString,
+        DataSourceWriteOptions.HIVE_TABLE_SERDE_PROPERTIES.key -> ConfigUtils.configToString(tblProperties.asJava),
+        DataSourceWriteOptions.HIVE_TABLE_PROPERTIES.key -> ConfigUtils.configToString(tableDesc.properties.asJava),
+        DataSourceWriteOptions.SQL_INSERT_MODE.key -> InsertMode.NON_STRICT.value(),
+        DataSourceWriteOptions.SQL_ENABLE_BULK_INSERT.key -> "true"
+      )
+      saveSourceDF(sourceQuery, tableDesc.properties ++ buildHoodieInsertConfig(hoodieCatalogTable, spark, isOverwrite = false, Map.empty, options))
+      CreateHoodieTableCommand.createTableInCatalog(spark, hoodieCatalogTable, ignoreIfExists = false)
+    } else if (sourceQuery.isEmpty) {
+      saveSourceDF(sourceQuery, tableDesc.properties)
+      new CreateHoodieTableCommand(tableDesc, false).run(spark)
+    } else {
+      saveSourceDF(sourceQuery, tableDesc.properties ++ buildHoodieInsertConfig(hoodieCatalogTable, spark, isOverwrite = false, Map.empty, Map.empty))
+      new CreateHoodieTableCommand(tableDesc, false).run(spark)
+    }
+
+    loadTable(ident)
+  }
+
+  private def isPathIdentifier(ident: Identifier) = new Path(ident.name()).isAbsolute
+
+  protected def isPathIdentifier(table: CatalogTable): Boolean = {
+    isPathIdentifier(table.identifier)
+  }
+
+  protected def isPathIdentifier(tableIdentifier: TableIdentifier): Boolean = {
+    isPathIdentifier(HoodieIdentifier(tableIdentifier.database.toArray, tableIdentifier.table))
+  }
+
+  private def getExistingTableIfExists(table: TableIdentifier): Option[CatalogTable] = {
+    // If this is a path identifier, we cannot return an existing CatalogTable. The Create command
+    // will check the file system itself
+    val catalog = spark.sessionState.catalog
+    // scalastyle:off
+    if (isPathIdentifier(table)) return None
+    // scalastyle:on
+    val tableExists = catalog.tableExists(table)
+    if (tableExists) {
+      val oldTable = catalog.getTableMetadata(table)
+      if (oldTable.tableType == CatalogTableType.VIEW) throw new HoodieException(
+        s"$table is a view. You may not write data into a view.")
+      if (!sparkAdapter.isHoodieTable(oldTable)) throw new HoodieException(s"$table is not a Hoodie table.")
+      Some(oldTable)
+    } else None
+  }
+
+  private def saveSourceDF(sourceQuery: Option[Dataset[_]],
+                           properties: Map[String, String]): Unit = {
+    sourceQuery.map(df => {
+      df.write.format("org.apache.hudi")
+        .options(properties)
+        .mode(SaveMode.Append)
+        .save()
+      df
+    })
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/catalog/HoodieInternalV2Table.scala b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/catalog/HoodieInternalV2Table.scala
new file mode 100644
index 0000000000000..848925aafe417
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/catalog/HoodieInternalV2Table.scala
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hudi.catalog
+
+import org.apache.hudi.common.table.{HoodieTableConfig, HoodieTableMetaClient}
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.catalog.{CatalogTable, HoodieCatalogTable}
+import org.apache.spark.sql.connector.catalog.TableCapability._
+import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table, TableCapability, V2TableWithV1Fallback}
+import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform, Transform}
+import org.apache.spark.sql.connector.write._
+import org.apache.spark.sql.hudi.ProvidesHoodieConfig
+import org.apache.spark.sql.sources.{Filter, InsertableRelation}
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
+
+import java.util
+import scala.collection.JavaConverters.{mapAsJavaMapConverter, setAsJavaSetConverter}
+
+case class HoodieInternalV2Table(spark: SparkSession,
+                                 path: String,
+                                 catalogTable: Option[CatalogTable] = None,
+                                 tableIdentifier: Option[String] = None,
+                                 options: CaseInsensitiveStringMap = CaseInsensitiveStringMap.empty())
+  extends Table with SupportsWrite with V2TableWithV1Fallback {
+
+  lazy val hoodieCatalogTable: HoodieCatalogTable = if (catalogTable.isDefined) {
+    HoodieCatalogTable(spark, catalogTable.get)
+  } else {
+    val metaClient: HoodieTableMetaClient = HoodieTableMetaClient.builder()
+      .setBasePath(path)
+      .setConf(SparkSession.active.sessionState.newHadoopConf)
+      .build()
+
+    val tableConfig: HoodieTableConfig = metaClient.getTableConfig
+    val tableName: String = tableConfig.getTableName
+
+    HoodieCatalogTable(spark, TableIdentifier(tableName))
+  }
+
+  private lazy val tableSchema: StructType = hoodieCatalogTable.tableSchema
+
+  override def name(): String = hoodieCatalogTable.table.identifier.unquotedString
+
+  override def schema(): StructType = tableSchema
+
+  override def capabilities(): util.Set[TableCapability] = Set(
+    BATCH_READ, V1_BATCH_WRITE, OVERWRITE_BY_FILTER, TRUNCATE, ACCEPT_ANY_SCHEMA
+  ).asJava
+
+  override def properties(): util.Map[String, String] = {
+    hoodieCatalogTable.catalogProperties.asJava
+  }
+
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
+    new HoodieV1WriteBuilder(info.options, hoodieCatalogTable, spark)
+  }
+
+  override def v1Table: CatalogTable = hoodieCatalogTable.table
+
+  override def partitioning(): Array[Transform] = {
+    hoodieCatalogTable.partitionFields.map { col =>
+      new IdentityTransform(new FieldReference(Seq(col)))
+    }.toArray
+  }
+
+}
+
+private class HoodieV1WriteBuilder(writeOptions: CaseInsensitiveStringMap,
+                                     hoodieCatalogTable: HoodieCatalogTable,
+                                     spark: SparkSession)
+  extends SupportsTruncate with SupportsOverwrite with ProvidesHoodieConfig {
+
+  private var forceOverwrite = false
+
+  override def truncate(): HoodieV1WriteBuilder = {
+    forceOverwrite = true
+    this
+  }
+
+  override def overwrite(filters: Array[Filter]): WriteBuilder = {
+    forceOverwrite = true
+    this
+  }
+
+  override def build(): V1Write = new V1Write {
+    override def toInsertableRelation: InsertableRelation = {
+      new InsertableRelation {
+        override def insert(data: DataFrame, overwrite: Boolean): Unit = {
+          val mode = if (forceOverwrite && hoodieCatalogTable.partitionFields.isEmpty) {
+            // insert overwrite non-partition table
+            SaveMode.Overwrite
+          } else {
+            // for insert into or insert overwrite partition we use append mode.
+            SaveMode.Append
+          }
+          alignOutputColumns(data).write.format("org.apache.hudi")
+            .mode(mode)
+            .options(buildHoodieConfig(hoodieCatalogTable) ++
+              buildHoodieInsertConfig(hoodieCatalogTable, spark, forceOverwrite, Map.empty, Map.empty))
+            .save()
+        }
+      }
+    }
+  }
+
+  private def alignOutputColumns(data: DataFrame): DataFrame = {
+    val schema = hoodieCatalogTable.tableSchema
+    spark.createDataFrame(data.toJavaRDD, schema)
+  }
+}
diff --git a/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/catalog/HoodieStagedTable.scala b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/catalog/HoodieStagedTable.scala
new file mode 100644
index 0000000000000..4034862167aa5
--- /dev/null
+++ b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/catalog/HoodieStagedTable.scala
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hudi.catalog
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.hudi.DataSourceWriteOptions.RECORDKEY_FIELD
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.connector.catalog.{Identifier, StagedTable, SupportsWrite, TableCapability}
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, V1Write, WriteBuilder}
+import org.apache.spark.sql.sources.InsertableRelation
+import org.apache.spark.sql.types.StructType
+
+import java.util
+import scala.collection.JavaConverters.{mapAsScalaMapConverter, setAsJavaSetConverter}
+
+case class HoodieStagedTable(ident: Identifier,
+                             catalog: HoodieCatalog,
+                             override val schema: StructType,
+                             partitions: Array[Transform],
+                             override val properties: util.Map[String, String],
+                             mode: TableCreationMode) extends StagedTable with SupportsWrite {
+
+  private var sourceQuery: Option[DataFrame] = None
+  private var writeOptions: Map[String, String] = Map.empty
+
+  override def commitStagedChanges(): Unit = {
+    val props = new util.HashMap[String, String]()
+    val optionsThroughProperties = properties.asScala.collect {
+      case (k, _) if k.startsWith("option.") => k.stripPrefix("option.")
+    }.toSet
+    val sqlWriteOptions = new util.HashMap[String, String]()
+    properties.asScala.foreach { case (k, v) =>
+      if (!k.startsWith("option.") && !optionsThroughProperties.contains(k)) {
+        props.put(k, v)
+      } else if (optionsThroughProperties.contains(k)) {
+        sqlWriteOptions.put(k, v)
+      }
+    }
+    if (writeOptions.isEmpty && !sqlWriteOptions.isEmpty) {
+      writeOptions = sqlWriteOptions.asScala.toMap
+    }
+    props.putAll(properties)
+    props.put("hoodie.table.name", ident.name())
+    props.put(RECORDKEY_FIELD.key, properties.get("primaryKey"))
+    catalog.createHoodieTable(ident, schema, partitions, props, writeOptions, sourceQuery, mode)
+  }
+
+  override def name(): String = ident.name()
+
+  override def abortStagedChanges(): Unit = {
+    clearTablePath(properties.get("location"), catalog.spark.sparkContext.hadoopConfiguration)
+  }
+
+  private def clearTablePath(tablePath: String, conf: Configuration): Unit = {
+    val path = new Path(tablePath)
+    val fs = path.getFileSystem(conf)
+    fs.delete(path, true)
+  }
+
+  override def capabilities(): util.Set[TableCapability] = Set(TableCapability.V1_BATCH_WRITE).asJava
+
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
+    writeOptions = info.options.asCaseSensitiveMap().asScala.toMap
+    new HoodieV1WriteBuilder
+  }
+
+  /*
+   * WriteBuilder for creating a Hoodie table.
+   */
+  private class HoodieV1WriteBuilder extends WriteBuilder {
+    override def build(): V1Write = new V1Write {
+      override def toInsertableRelation(): InsertableRelation = {
+        new InsertableRelation {
+          override def insert(data: DataFrame, overwrite: Boolean): Unit = {
+            sourceQuery = Option(data)
+          }
+        }
+      }
+    }
+  }
+
+}
diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/MapColumnVector.java b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/catalog/TableCreationMode.java
similarity index 71%
rename from hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/MapColumnVector.java
rename to hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/catalog/TableCreationMode.java
index 38424dad7d3a7..8b54775be149e 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/MapColumnVector.java
+++ b/hudi-spark-datasource/hudi-spark3/src/main/scala/org/apache/spark/sql/hudi/catalog/TableCreationMode.java
@@ -1,29 +1,23 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hudi.table.format.cow.vector;
-
-import org.apache.flink.table.data.MapData;
-import org.apache.flink.table.data.vector.ColumnVector;
-
-/**
- * Map column vector.
- */
-public interface MapColumnVector extends ColumnVector {
-  MapData getMap(int i);
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hudi.catalog;
+
+public enum TableCreationMode {
+    CREATE, CREATE_OR_REPLACE, STAGE_CREATE, STAGE_REPLACE
+}
diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java
index 8dc3c6e0e468f..50991852b2c3b 100644
--- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java
+++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncConfig.java
@@ -49,6 +49,9 @@ public class HiveSyncConfig implements Serializable {
   @Parameter(names = {"--jdbc-url"}, description = "Hive jdbc connect url")
   public String jdbcUrl;
 
+  @Parameter(names = {"--metastore-uris"}, description = "Hive metastore uris")
+  public String metastoreUris;
+
   @Parameter(names = {"--base-path"}, description = "Basepath of hoodie table to sync", required = true)
   public String basePath;
 
@@ -126,6 +129,9 @@ public class HiveSyncConfig implements Serializable {
   @Parameter(names = {"--conditional-sync"}, description = "If true, only sync on conditions like schema change or partition change.")
   public Boolean isConditionalSync = false;
 
+  @Parameter(names = {"--spark-version"}, description = "The spark version", required = false)
+  public String sparkVersion;
+
   // enhance the similar function in child class
   public static HiveSyncConfig copy(HiveSyncConfig cfg) {
     HiveSyncConfig newConfig = new HiveSyncConfig();
@@ -137,6 +143,7 @@ public static HiveSyncConfig copy(HiveSyncConfig cfg) {
     newConfig.partitionFields = cfg.partitionFields;
     newConfig.partitionValueExtractorClass = cfg.partitionValueExtractorClass;
     newConfig.jdbcUrl = cfg.jdbcUrl;
+    newConfig.metastoreUris = cfg.metastoreUris;
     newConfig.tableName = cfg.tableName;
     newConfig.bucketSpec = cfg.bucketSpec;
     newConfig.usePreApacheInputFormat = cfg.usePreApacheInputFormat;
@@ -151,6 +158,7 @@ public static HiveSyncConfig copy(HiveSyncConfig cfg) {
     newConfig.sparkSchemaLengthThreshold = cfg.sparkSchemaLengthThreshold;
     newConfig.withOperationField = cfg.withOperationField;
     newConfig.isConditionalSync = cfg.isConditionalSync;
+    newConfig.sparkVersion = cfg.sparkVersion;
     return newConfig;
   }
 
@@ -164,6 +172,7 @@ public String toString() {
       + ", hiveUser='" + hiveUser + '\''
       + ", hivePass='" + hivePass + '\''
       + ", jdbcUrl='" + jdbcUrl + '\''
+      + ", metastoreUris='" + metastoreUris + '\''
       + ", basePath='" + basePath + '\''
       + ", partitionFields=" + partitionFields
       + ", partitionValueExtractorClass='" + partitionValueExtractorClass + '\''
diff --git a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java
index b37b28ed27636..35200216ee9c0 100644
--- a/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java
+++ b/hudi-sync/hudi-hive-sync/src/main/java/org/apache/hudi/hive/HiveSyncTool.java
@@ -27,6 +27,7 @@
 import org.apache.hudi.common.model.HoodieFileFormat;
 import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.InvalidTableException;
 import org.apache.hudi.hadoop.utils.HoodieInputFormatUtils;
@@ -302,6 +303,9 @@ private Map<String, String> getSparkTableProperties(int schemaLengthThreshold, M
 
     Map<String, String> sparkProperties = new HashMap<>();
     sparkProperties.put("spark.sql.sources.provider", "hudi");
+    if (!StringUtils.isNullOrEmpty(cfg.sparkVersion)) {
+      sparkProperties.put("spark.sql.create.version", cfg.sparkVersion);
+    }
     // Split the schema string to multi-parts according the schemaLengthThreshold size.
     String schemaString = Parquet2SparkSchemaUtils.convertToSparkSchemaJson(reOrderedType);
     int numSchemaPart = (schemaString.length() + schemaLengthThreshold - 1) / schemaLengthThreshold;
diff --git a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestUtil.java b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestUtil.java
index 4b92b252cb0c8..e66bb7c914645 100644
--- a/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestUtil.java
+++ b/hudi-sync/hudi-hive-sync/src/test/java/org/apache/hudi/hive/testutils/HiveTestUtil.java
@@ -28,6 +28,7 @@
 import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieDeltaWriteStat;
 import org.apache.hudi.common.model.HoodieLogFile;
+import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
 import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.model.HoodieWriteStat;
@@ -428,7 +429,7 @@ private static HoodieLogFile generateLogData(Path parquetFilePath, boolean isLog
     Map<HeaderMetadataType, String> header = new HashMap<>(2);
     header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, dataFile.getCommitTime());
     header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
-    HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header);
+    HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header, HoodieRecord.RECORD_KEY_METADATA_FIELD);
     logWriter.appendBlock(dataBlock);
     logWriter.close();
     return logWriter.getLogFile();
diff --git a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/AbstractSyncHoodieClient.java b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/AbstractSyncHoodieClient.java
index 98b11f2f37cc4..1815491f1867e 100644
--- a/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/AbstractSyncHoodieClient.java
+++ b/hudi-sync/hudi-sync-common/src/main/java/org/apache/hudi/sync/common/AbstractSyncHoodieClient.java
@@ -18,6 +18,8 @@
 
 package org.apache.hudi.sync.common;
 
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.apache.hudi.common.engine.HoodieLocalEngineContext;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
@@ -29,9 +31,6 @@
 import org.apache.hudi.common.table.timeline.TimelineUtils;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.ValidationUtils;
-
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.parquet.schema.MessageType;
@@ -149,11 +148,7 @@ public void closeQuietly(ResultSet resultSet, Statement stmt) {
    */
   public MessageType getDataSchema() {
     try {
-      if (withOperationField) {
-        return new TableSchemaResolver(metaClient, true).getTableParquetSchema();
-      } else {
-        return new TableSchemaResolver(metaClient).getTableParquetSchema();
-      }
+      return new TableSchemaResolver(metaClient).getTableParquetSchema();
     } catch (Exception e) {
       throw new HoodieSyncException("Failed to read data schema", e);
     }
@@ -162,11 +157,7 @@ public MessageType getDataSchema() {
   public boolean isDropPartition() {
     try {
       Option<HoodieCommitMetadata> hoodieCommitMetadata;
-      if (withOperationField) {
-        hoodieCommitMetadata = new TableSchemaResolver(metaClient, true).getLatestCommitMetadata();
-      } else {
-        hoodieCommitMetadata = new TableSchemaResolver(metaClient).getLatestCommitMetadata();
-      }
+      hoodieCommitMetadata = new TableSchemaResolver(metaClient).getLatestCommitMetadata();
 
       if (hoodieCommitMetadata.isPresent()
           && WriteOperationType.DELETE_PARTITION.equals(hoodieCommitMetadata.get().getOperationType())) {
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HDFSParquetImporter.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HDFSParquetImporter.java
index 8651e30c044c2..b5d7dc4b107dd 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HDFSParquetImporter.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HDFSParquetImporter.java
@@ -25,6 +25,7 @@
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.engine.HoodieEngineContext;
 import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieKey;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
@@ -197,7 +198,7 @@ protected JavaRDD<HoodieRecord<HoodieRecordPayload>> buildHoodieRecordsForImport
               LOG.warn("Unable to parse date from partition field. Assuming partition as (" + partitionField + ")");
             }
           }
-          return new HoodieRecord<>(new HoodieKey(rowField.toString(), partitionPath),
+          return new HoodieAvroRecord<>(new HoodieKey(rowField.toString(), partitionPath),
               new HoodieJsonPayload(genericRecord.toString()));
         });
   }
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieClusteringJob.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieClusteringJob.java
index a4ee8089f8316..26639628eab1b 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieClusteringJob.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieClusteringJob.java
@@ -23,7 +23,6 @@
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieRecordPayload;
-import org.apache.hudi.common.model.HoodieWriteStat;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.TableSchemaResolver;
 import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
@@ -49,7 +48,6 @@
 import java.util.ArrayList;
 import java.util.Date;
 import java.util.List;
-import java.util.stream.Collectors;
 
 public class HoodieClusteringJob {
 
@@ -189,11 +187,11 @@ public int cluster(int retry) {
   }
 
   private String getSchemaFromLatestInstant() throws Exception {
-    TableSchemaResolver schemaUtil = new TableSchemaResolver(metaClient);
+    TableSchemaResolver schemaResolver = new TableSchemaResolver(metaClient);
     if (metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().countInstants() == 0) {
       throw new HoodieException("Cannot run clustering without any completed commits");
     }
-    Schema schema = schemaUtil.getTableAvroSchema(false);
+    Schema schema = schemaResolver.getTableAvroSchema(false);
     return schema.toString();
   }
 
@@ -216,7 +214,7 @@ private int doCluster(JavaSparkContext jsc) throws Exception {
       }
       Option<HoodieCommitMetadata> commitMetadata = client.cluster(cfg.clusteringInstantTime, true).getCommitMetadata();
 
-      return handleErrors(commitMetadata.get(), cfg.clusteringInstantTime);
+      return UtilHelpers.handleErrors(commitMetadata.get(), cfg.clusteringInstantTime);
     }
   }
 
@@ -271,20 +269,7 @@ private int doScheduleAndCluster(JavaSparkContext jsc) throws Exception {
       LOG.info("The schedule instant time is " + instantTime.get());
       LOG.info("Step 2: Do cluster");
       Option<HoodieCommitMetadata> metadata = client.cluster(instantTime.get(), true).getCommitMetadata();
-      return handleErrors(metadata.get(), instantTime.get());
+      return UtilHelpers.handleErrors(metadata.get(), instantTime.get());
     }
   }
-
-  private int handleErrors(HoodieCommitMetadata metadata, String instantTime) {
-    List<HoodieWriteStat> writeStats = metadata.getPartitionToWriteStats().entrySet().stream().flatMap(e ->
-        e.getValue().stream()).collect(Collectors.toList());
-    long errorsCount = writeStats.stream().mapToLong(HoodieWriteStat::getTotalWriteErrors).sum();
-    if (errorsCount == 0) {
-      LOG.info(String.format("Table imported into hoodie with %s instant time.", instantTime));
-      return 0;
-    }
-
-    LOG.error(String.format("Import failed with %d errors.", errorsCount));
-    return -1;
-  }
 }
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactor.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactor.java
index 706d1d9df4b9e..ce2be7d5038dc 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactor.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieCompactor.java
@@ -18,13 +18,14 @@
 
 package org.apache.hudi.utilities;
 
+import org.apache.avro.Schema;
 import org.apache.hudi.client.SparkRDDWriteClient;
 import org.apache.hudi.client.WriteStatus;
 import org.apache.hudi.common.config.TypedProperties;
-import org.apache.hudi.common.fs.ConsistencyGuardConfig;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.HoodieRecordPayload;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.TableSchemaResolver;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
 import org.apache.hudi.common.util.Option;
@@ -35,6 +36,10 @@
 import com.beust.jcommander.Parameter;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hudi.exception.HoodieException;
+
+import org.apache.hudi.table.action.HoodieWriteMetadata;
+import org.apache.hudi.table.action.compact.strategy.LogFileSizeBasedCompactionStrategy;
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.spark.api.java.JavaRDD;
@@ -43,15 +48,19 @@
 import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Objects;
 
 public class HoodieCompactor {
 
   private static final Logger LOG = LogManager.getLogger(HoodieCompactor.class);
-  private static ConsistencyGuardConfig consistencyGuardConfig = ConsistencyGuardConfig.newBuilder().build();
+  public static final String EXECUTE = "execute";
+  public static final String SCHEDULE = "schedule";
+  public static final String SCHEDULE_AND_EXECUTE = "scheduleandexecute";
   private final Config cfg;
   private transient FileSystem fs;
   private TypedProperties props;
   private final JavaSparkContext jsc;
+  private final HoodieTableMetaClient metaClient;
 
   public HoodieCompactor(JavaSparkContext jsc, Config cfg) {
     this.cfg = cfg;
@@ -59,6 +68,7 @@ public HoodieCompactor(JavaSparkContext jsc, Config cfg) {
     this.props = cfg.propsFilePath == null
         ? UtilHelpers.buildProperties(cfg.configs)
         : readConfigFromFileSystem(jsc, cfg);
+    this.metaClient = UtilHelpers.createMetaClient(jsc, cfg.basePath, true);
   }
 
   private TypedProperties readConfigFromFileSystem(JavaSparkContext jsc, Config cfg) {
@@ -73,9 +83,9 @@ public static class Config implements Serializable {
     public String tableName = null;
     @Parameter(names = {"--instant-time", "-it"}, description = "Compaction Instant time", required = false)
     public String compactionInstantTime = null;
-    @Parameter(names = {"--parallelism", "-pl"}, description = "Parallelism for hoodie insert", required = true)
-    public int parallelism = 1;
-    @Parameter(names = {"--schema-file", "-sf"}, description = "path for Avro schema file", required = true)
+    @Parameter(names = {"--parallelism", "-pl"}, description = "Parallelism for hoodie insert", required = false)
+    public int parallelism = 200;
+    @Parameter(names = {"--schema-file", "-sf"}, description = "path for Avro schema file", required = false)
     public String schemaFile = null;
     @Parameter(names = {"--spark-master", "-ms"}, description = "Spark master", required = false)
     public String sparkMaster = null;
@@ -85,8 +95,12 @@ public static class Config implements Serializable {
     public int retry = 0;
     @Parameter(names = {"--schedule", "-sc"}, description = "Schedule compaction", required = false)
     public Boolean runSchedule = false;
+    @Parameter(names = {"--mode", "-m"}, description = "Set job mode: Set \"schedule\" means make a compact plan; "
+        + "Set \"execute\" means execute a compact plan at given instant which means --instant-time is needed here; "
+        + "Set \"scheduleAndExecute\" means make a compact plan first and execute that plan immediately", required = false)
+    public String runningMode = null;
     @Parameter(names = {"--strategy", "-st"}, description = "Strategy Class", required = false)
-    public String strategyClassName = null;
+    public String strategyClassName = LogFileSizeBasedCompactionStrategy.class.getName();
     @Parameter(names = {"--help", "-h"}, help = true)
     public Boolean help = false;
 
@@ -96,8 +110,57 @@ public static class Config implements Serializable {
 
     @Parameter(names = {"--hoodie-conf"}, description = "Any configuration that can be set in the properties file "
         + "(using the CLI parameter \"--props\") can also be passed command line using this parameter. This can be repeated",
-            splitter = IdentitySplitter.class)
+        splitter = IdentitySplitter.class)
     public List<String> configs = new ArrayList<>();
+
+    @Override
+    public String toString() {
+      return "HoodieCompactorConfig {\n"
+          + "   --base-path " + basePath + ", \n"
+          + "   --table-name " + tableName + ", \n"
+          + "   --instant-time " + compactionInstantTime + ", \n"
+          + "   --parallelism " + parallelism + ", \n"
+          + "   --schema-file " + schemaFile + ", \n"
+          + "   --spark-master " + sparkMaster + ", \n"
+          + "   --spark-memory " + sparkMemory + ", \n"
+          + "   --retry " + retry + ", \n"
+          + "   --schedule " + runSchedule + ", \n"
+          + "   --mode " + runningMode + ", \n"
+          + "   --strategy " + strategyClassName + ", \n"
+          + "   --props " + propsFilePath + ", \n"
+          + "   --hoodie-conf " + configs
+          + "\n}";
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (o == null || getClass() != o.getClass()) {
+        return false;
+      }
+      Config config = (Config) o;
+      return basePath.equals(config.basePath)
+          && Objects.equals(tableName, config.tableName)
+          && Objects.equals(compactionInstantTime, config.compactionInstantTime)
+          && Objects.equals(parallelism, config.parallelism)
+          && Objects.equals(schemaFile, config.schemaFile)
+          && Objects.equals(sparkMaster, config.sparkMaster)
+          && Objects.equals(sparkMemory, config.sparkMemory)
+          && Objects.equals(retry, config.retry)
+          && Objects.equals(runSchedule, config.runSchedule)
+          && Objects.equals(runningMode, config.runningMode)
+          && Objects.equals(strategyClassName, config.strategyClassName)
+          && Objects.equals(propsFilePath, config.propsFilePath)
+          && Objects.equals(configs, config.configs);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(basePath, tableName, compactionInstantTime, schemaFile,
+          sparkMaster, parallelism, sparkMemory, retry, runSchedule, runningMode, strategyClassName, propsFilePath, configs, help);
+    }
   }
 
   public static void main(String[] args) {
@@ -120,52 +183,115 @@ public static void main(String[] args) {
 
   public int compact(int retry) {
     this.fs = FSUtils.getFs(cfg.basePath, jsc.hadoopConfiguration());
+    // need to do validate in case that users call compact() directly without setting cfg.runningMode
+    validateRunningMode(cfg);
+    LOG.info(cfg);
+
     int ret = UtilHelpers.retry(retry, () -> {
-      if (cfg.runSchedule) {
-        if (null == cfg.strategyClassName) {
-          throw new IllegalArgumentException("Missing Strategy class name for running compaction");
+      switch (cfg.runningMode.toLowerCase()) {
+        case SCHEDULE: {
+          LOG.info("Running Mode: [" + SCHEDULE + "]; Do schedule");
+          Option<String> instantTime = doSchedule(jsc);
+          int result = instantTime.isPresent() ? 0 : -1;
+          if (result == 0) {
+            LOG.info("The schedule instant time is " + instantTime.get());
+          }
+          return result;
+        }
+        case SCHEDULE_AND_EXECUTE: {
+          LOG.info("Running Mode: [" + SCHEDULE_AND_EXECUTE + "]");
+          return doScheduleAndCompact(jsc);
+        }
+        case EXECUTE: {
+          LOG.info("Running Mode: [" + EXECUTE + "]; Do compaction");
+          return doCompact(jsc);
+        }
+        default: {
+          LOG.info("Unsupported running mode [" + cfg.runningMode + "], quit the job directly");
+          return -1;
         }
-        return doSchedule(jsc);
-      } else {
-        return doCompact(jsc);
       }
     }, "Compact failed");
     return ret;
   }
 
+  private Integer doScheduleAndCompact(JavaSparkContext jsc) throws Exception {
+    LOG.info("Step 1: Do schedule");
+    Option<String> instantTime = doSchedule(jsc);
+    if (!instantTime.isPresent()) {
+      LOG.warn("Couldn't do schedule");
+      return -1;
+    } else {
+      cfg.compactionInstantTime = instantTime.get();
+    }
+
+    LOG.info("The schedule instant time is " + instantTime.get());
+    LOG.info("Step 2: Do compaction");
+
+    return doCompact(jsc);
+  }
+
+  // make sure that cfg.runningMode couldn't be null
+  private static void validateRunningMode(Config cfg) {
+    // --mode has a higher priority than --schedule
+    // If we remove --schedule option in the future we need to change runningMode default value to EXECUTE
+    if (StringUtils.isNullOrEmpty(cfg.runningMode)) {
+      cfg.runningMode = cfg.runSchedule ? SCHEDULE : EXECUTE;
+    }
+  }
+
   private int doCompact(JavaSparkContext jsc) throws Exception {
     // Get schema.
-    String schemaStr = UtilHelpers.parseSchema(fs, cfg.schemaFile);
-    SparkRDDWriteClient<HoodieRecordPayload> client =
-        UtilHelpers.createHoodieClient(jsc, cfg.basePath, schemaStr, cfg.parallelism, Option.empty(), props);
-    // If no compaction instant is provided by --instant-time, find the earliest scheduled compaction
-    // instant from the active timeline
-    if (StringUtils.isNullOrEmpty(cfg.compactionInstantTime)) {
-      HoodieTableMetaClient metaClient = UtilHelpers.createMetaClient(jsc, cfg.basePath, true);
-      Option<HoodieInstant> firstCompactionInstant =
-          metaClient.getActiveTimeline().firstInstant(
-              HoodieTimeline.COMPACTION_ACTION, HoodieInstant.State.REQUESTED);
-      if (firstCompactionInstant.isPresent()) {
-        cfg.compactionInstantTime = firstCompactionInstant.get().getTimestamp();
-        LOG.info("Found the earliest scheduled compaction instant which will be executed: "
-            + cfg.compactionInstantTime);
-      } else {
-        throw new HoodieCompactionException("There is no scheduled compaction in the table.");
+    String schemaStr;
+    if (StringUtils.isNullOrEmpty(cfg.schemaFile)) {
+      schemaStr = getSchemaFromLatestInstant();
+    } else {
+      schemaStr = UtilHelpers.parseSchema(fs, cfg.schemaFile);
+    }
+    LOG.info("Schema --> : " + schemaStr);
+
+    try (SparkRDDWriteClient<HoodieRecordPayload> client =
+             UtilHelpers.createHoodieClient(jsc, cfg.basePath, schemaStr, cfg.parallelism, Option.empty(), props)) {
+      // If no compaction instant is provided by --instant-time, find the earliest scheduled compaction
+      // instant from the active timeline
+      if (StringUtils.isNullOrEmpty(cfg.compactionInstantTime)) {
+        HoodieTableMetaClient metaClient = UtilHelpers.createMetaClient(jsc, cfg.basePath, true);
+        Option<HoodieInstant> firstCompactionInstant =
+            metaClient.getActiveTimeline().firstInstant(
+                HoodieTimeline.COMPACTION_ACTION, HoodieInstant.State.REQUESTED);
+        if (firstCompactionInstant.isPresent()) {
+          cfg.compactionInstantTime = firstCompactionInstant.get().getTimestamp();
+          LOG.info("Found the earliest scheduled compaction instant which will be executed: "
+              + cfg.compactionInstantTime);
+        } else {
+          throw new HoodieCompactionException("There is no scheduled compaction in the table.");
+        }
       }
+      HoodieWriteMetadata<JavaRDD<WriteStatus>> compactionMetadata = client.compact(cfg.compactionInstantTime);
+      return UtilHelpers.handleErrors(compactionMetadata.getCommitMetadata().get(), cfg.compactionInstantTime);
     }
-    JavaRDD<WriteStatus> writeResponse = client.compact(cfg.compactionInstantTime);
-    return UtilHelpers.handleErrors(jsc, cfg.compactionInstantTime, writeResponse);
   }
 
-  private int doSchedule(JavaSparkContext jsc) throws Exception {
-    // Get schema.
-    SparkRDDWriteClient client =
-        UtilHelpers.createHoodieClient(jsc, cfg.basePath, "", cfg.parallelism, Option.of(cfg.strategyClassName), props);
-    if (StringUtils.isNullOrEmpty(cfg.compactionInstantTime)) {
-      throw new IllegalArgumentException("No instant time is provided for scheduling compaction. "
-          + "Please specify the compaction instant time by using --instant-time.");
+  private Option<String> doSchedule(JavaSparkContext jsc) {
+    try (SparkRDDWriteClient client =
+             UtilHelpers.createHoodieClient(jsc, cfg.basePath, "", cfg.parallelism, Option.of(cfg.strategyClassName), props)) {
+
+      if (StringUtils.isNullOrEmpty(cfg.compactionInstantTime)) {
+        LOG.warn("No instant time is provided for scheduling compaction.");
+        return client.scheduleCompaction(Option.empty());
+      }
+
+      client.scheduleCompactionAtInstant(cfg.compactionInstantTime, Option.empty());
+      return Option.of(cfg.compactionInstantTime);
+    }
+  }
+
+  private String getSchemaFromLatestInstant() throws Exception {
+    TableSchemaResolver schemaUtil = new TableSchemaResolver(metaClient);
+    if (metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants().countInstants() == 0) {
+      throw new HoodieException("Cannot run compaction without any completed commits");
     }
-    client.scheduleCompactionAtInstant(cfg.compactionInstantTime, Option.empty());
-    return 0;
+    Schema schema = schemaUtil.getTableAvroSchema(false);
+    return schema.toString();
   }
 }
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDataTableUtils.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDataTableUtils.java
new file mode 100644
index 0000000000000..755a203d17933
--- /dev/null
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDataTableUtils.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.utilities;
+
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.metadata.HoodieTableMetadata;
+
+import org.apache.hadoop.fs.Path;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
+public class HoodieDataTableUtils {
+
+  /**
+   * @return All hoodie files of the table from the file system.
+   * @throws IOException upon errors.
+   */
+  static List<Path> getBaseAndLogFilePathsFromFileSystem(HoodieTableMetadata tableMetadata, String basePath) throws IOException {
+    List<String> allPartitionPaths = tableMetadata.getAllPartitionPaths()
+        .stream().map(partitionPath ->
+            FSUtils.getPartitionPath(basePath, partitionPath).toString())
+        .collect(Collectors.toList());
+    return tableMetadata.getAllFilesInPartitions(allPartitionPaths).values().stream()
+        .map(fileStatuses ->
+            Arrays.stream(fileStatuses).map(fileStatus -> fileStatus.getPath()).collect(Collectors.toList()))
+        .flatMap(list -> list.stream())
+        .collect(Collectors.toList());
+  }
+
+}
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDataTableValidator.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDataTableValidator.java
new file mode 100644
index 0000000000000..0180fa0af1590
--- /dev/null
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieDataTableValidator.java
@@ -0,0 +1,396 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.utilities;
+
+import org.apache.hudi.async.HoodieAsyncService;
+import org.apache.hudi.client.common.HoodieSparkEngineContext;
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
+import org.apache.hudi.common.table.timeline.HoodieTimeline;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.exception.HoodieValidationException;
+import org.apache.hudi.metadata.FileSystemBackedTableMetadata;
+import org.apache.hudi.metadata.HoodieTableMetadata;
+import org.apache.hudi.table.repair.RepairUtils;
+
+import com.beust.jcommander.JCommander;
+import com.beust.jcommander.Parameter;
+import org.apache.hadoop.fs.Path;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+/**
+ * A validator with spark-submit to ensure there are no dangling data files in the data table.
+ * No data files found for commits prior to active timeline.
+ * No extra data files found for completed commits more than whats present in commit metadata.
+ *
+ * <p>
+ * - Default : This validator will validate the data files only once.
+ * <p>
+ * Example command:
+ * ```
+ * spark-submit \
+ * --class org.apache.hudi.utilities.HoodieDataTableValidator \
+ * --packages org.apache.spark:spark-avro_2.11:2.4.4 \
+ * --master spark://xxxx:7077 \
+ * --driver-memory 1g \
+ * --executor-memory 1g \
+ * $HUDI_DIR/hudi/packaging/hudi-utilities-bundle/target/hudi-utilities-bundle_2.11-0.11.0-SNAPSHOT.jar \
+ * --base-path basePath
+ * ```
+ *
+ * <p>
+ * Also You can set `--continuous` for long running this validator.
+ * And use `--min-validate-interval-seconds` to control the validation frequency, default is 10 minutes.
+ * <p>
+ * Example command:
+ * ```
+ * spark-submit \
+ * --class org.apache.hudi.utilities.HoodieDataTableValidator \
+ * --packages org.apache.spark:spark-avro_2.11:2.4.4 \
+ * --master spark://xxxx:7077 \
+ * --driver-memory 1g \
+ * --executor-memory 1g \
+ * $HUDI_DIR/hudi/packaging/hudi-utilities-bundle/target/hudi-utilities-bundle_2.11-0.11.0-SNAPSHOT.jar \
+ * --base-path basePath
+ * --continuous \
+ * --min-validate-interval-seconds 60
+ * ```
+ */
+public class HoodieDataTableValidator implements Serializable {
+
+  private static final Logger LOG = LogManager.getLogger(HoodieDataTableValidator.class);
+
+  // Spark context
+  private transient JavaSparkContext jsc;
+  // config
+  private Config cfg;
+  // Properties with source, hoodie client, key generator etc.
+  private TypedProperties props;
+
+  private HoodieTableMetaClient metaClient;
+
+  protected transient Option<AsyncDataTableValidateService> asyncDataTableValidateService;
+
+  public HoodieDataTableValidator(HoodieTableMetaClient metaClient) {
+    this.metaClient = metaClient;
+  }
+
+  public HoodieDataTableValidator(JavaSparkContext jsc, Config cfg) {
+    this.jsc = jsc;
+    this.cfg = cfg;
+
+    this.props = cfg.propsFilePath == null
+        ? UtilHelpers.buildProperties(cfg.configs)
+        : readConfigFromFileSystem(jsc, cfg);
+
+    this.metaClient = HoodieTableMetaClient.builder()
+        .setConf(jsc.hadoopConfiguration()).setBasePath(cfg.basePath)
+        .setLoadActiveTimelineOnLoad(true)
+        .build();
+
+    this.asyncDataTableValidateService = cfg.continuous ? Option.of(new AsyncDataTableValidateService()) : Option.empty();
+  }
+
+  /**
+   * Reads config from the file system.
+   *
+   * @param jsc {@link JavaSparkContext} instance.
+   * @param cfg {@link Config} instance.
+   * @return the {@link TypedProperties} instance.
+   */
+  private TypedProperties readConfigFromFileSystem(JavaSparkContext jsc, Config cfg) {
+    return UtilHelpers.readConfig(jsc.hadoopConfiguration(), new Path(cfg.propsFilePath), cfg.configs)
+        .getProps(true);
+  }
+
+  public static class Config implements Serializable {
+    @Parameter(names = {"--base-path", "-sp"}, description = "Base path for the table", required = true)
+    public String basePath = null;
+
+    @Parameter(names = {"--continuous"}, description = "Running MetadataTableValidator in continuous. "
+        + "Can use --min-validate-interval-seconds to control validation frequency", required = false)
+    public boolean continuous = false;
+
+    @Parameter(names = {"--min-validate-interval-seconds"},
+        description = "the min validate interval of each validate when set --continuous, default is 10 minutes.")
+    public Integer minValidateIntervalSeconds = 10 * 60;
+
+    @Parameter(names = {"--parallelism", "-pl"}, description = "Parallelism for validation", required = false)
+    public int parallelism = 200;
+
+    @Parameter(names = {"--ignore-failed", "-ig"}, description = "Ignore data table validate failure and continue.", required = false)
+    public boolean ignoreFailed = false;
+
+    @Parameter(names = {"--assume-date-partitioning"}, description = "Should HoodieWriteClient assume the data is partitioned by dates, i.e three levels from base path."
+        + "This is a stop-gap to support tables created by versions < 0.3.1. Will be removed eventually", required = false)
+    public Boolean assumeDatePartitioning = false;
+
+    @Parameter(names = {"--spark-master", "-ms"}, description = "Spark master", required = false)
+    public String sparkMaster = null;
+
+    @Parameter(names = {"--spark-memory", "-sm"}, description = "spark memory to use", required = false)
+    public String sparkMemory = "1g";
+
+    @Parameter(names = {"--props"}, description = "path to properties file on localfs or dfs, with configurations for "
+        + "hoodie client")
+    public String propsFilePath = null;
+
+    @Parameter(names = {"--hoodie-conf"}, description = "Any configuration that can be set in the properties file "
+        + "(using the CLI parameter \"--props\") can also be passed command line using this parameter. This can be repeated",
+        splitter = IdentitySplitter.class)
+    public List<String> configs = new ArrayList<>();
+
+    @Parameter(names = {"--help", "-h"}, help = true)
+    public Boolean help = false;
+
+    @Override
+    public String toString() {
+      return "MetadataTableValidatorConfig {\n"
+          + "   --base-path " + basePath + ", \n"
+          + "   --continuous " + continuous + ", \n"
+          + "   --ignore-failed " + ignoreFailed + ", \n"
+          + "   --min-validate-interval-seconds " + minValidateIntervalSeconds + ", \n"
+          + "   --parallelism " + parallelism + ", \n"
+          + "   --spark-master " + sparkMaster + ", \n"
+          + "   --spark-memory " + sparkMemory + ", \n"
+          + "   --assumeDatePartitioning-memory " + assumeDatePartitioning + ", \n"
+          + "   --props " + propsFilePath + ", \n"
+          + "   --hoodie-conf " + configs
+          + "\n}";
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (o == null || getClass() != o.getClass()) {
+        return false;
+      }
+      HoodieMetadataTableValidator.Config config = (HoodieMetadataTableValidator.Config) o;
+      return basePath.equals(config.basePath)
+          && Objects.equals(continuous, config.continuous)
+          && Objects.equals(minValidateIntervalSeconds, config.minValidateIntervalSeconds)
+          && Objects.equals(parallelism, config.parallelism)
+          && Objects.equals(ignoreFailed, config.ignoreFailed)
+          && Objects.equals(sparkMaster, config.sparkMaster)
+          && Objects.equals(sparkMemory, config.sparkMemory)
+          && Objects.equals(assumeDatePartitioning, config.assumeDatePartitioning)
+          && Objects.equals(propsFilePath, config.propsFilePath)
+          && Objects.equals(configs, config.configs);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(basePath, continuous, minValidateIntervalSeconds, parallelism, ignoreFailed, sparkMaster, sparkMemory,
+          assumeDatePartitioning, propsFilePath, configs, help);
+    }
+  }
+
+  public static void main(String[] args) {
+    final Config cfg = new Config();
+    JCommander cmd = new JCommander(cfg, null, args);
+
+    if (cfg.help || args.length == 0) {
+      cmd.usage();
+      System.exit(1);
+    }
+
+    SparkConf sparkConf = UtilHelpers.buildSparkConf("Hoodie-Data-Table-Validator", cfg.sparkMaster);
+    sparkConf.set("spark.executor.memory", cfg.sparkMemory);
+    JavaSparkContext jsc = new JavaSparkContext(sparkConf);
+
+    HoodieDataTableValidator validator = new HoodieDataTableValidator(jsc, cfg);
+
+    try {
+      validator.run();
+    } catch (Throwable throwable) {
+      LOG.error("Fail to do hoodie Data table validation for " + validator.cfg, throwable);
+    } finally {
+      jsc.stop();
+    }
+  }
+
+  public void run() {
+    try {
+      LOG.info(cfg);
+      if (cfg.continuous) {
+        LOG.info(" ****** do hoodie data table validation in CONTINUOUS mode ******");
+        doHoodieDataTableValidationContinuous();
+      } else {
+        LOG.info(" ****** do hoodie data table validation once ******");
+        doHoodieDataTableValidationOnce();
+      }
+    } catch (Exception e) {
+      throw new HoodieException("Unable to do hoodie data table validation in " + cfg.basePath, e);
+    } finally {
+
+      if (asyncDataTableValidateService.isPresent()) {
+        asyncDataTableValidateService.get().shutdown(true);
+      }
+    }
+  }
+
+  private void doHoodieDataTableValidationOnce() {
+    try {
+      doDataTableValidation();
+    } catch (HoodieValidationException e) {
+      LOG.error("Metadata table validation failed to HoodieValidationException", e);
+      if (!cfg.ignoreFailed) {
+        throw e;
+      }
+    }
+  }
+
+  private void doHoodieDataTableValidationContinuous() {
+    asyncDataTableValidateService.ifPresent(service -> {
+      service.start(null);
+      try {
+        service.waitForShutdown();
+      } catch (Exception e) {
+        throw new HoodieException(e.getMessage(), e);
+      }
+    });
+  }
+
+  public void doDataTableValidation() {
+    boolean finalResult = true;
+    metaClient.reloadActiveTimeline();
+    String basePath = metaClient.getBasePath();
+    HoodieSparkEngineContext engineContext = new HoodieSparkEngineContext(jsc);
+    try {
+      HoodieTableMetadata tableMetadata = new FileSystemBackedTableMetadata(
+          engineContext, engineContext.getHadoopConf(), cfg.basePath, cfg.assumeDatePartitioning);
+      List<Path> allDataFilePaths = HoodieDataTableUtils.getBaseAndLogFilePathsFromFileSystem(tableMetadata, cfg.basePath);
+      // verify that no data files present with commit time < earliest commit in active timeline.
+      if (metaClient.getActiveTimeline().firstInstant().isPresent()) {
+        String earliestInstant = metaClient.getActiveTimeline().firstInstant().get().getTimestamp();
+        List<Path> danglingFilePaths = allDataFilePaths.stream().filter(path -> {
+          String instantTime = FSUtils.getCommitTime(path.getName());
+          return HoodieTimeline.compareTimestamps(instantTime, HoodieTimeline.LESSER_THAN, earliestInstant);
+        }).collect(Collectors.toList());
+
+        if (!danglingFilePaths.isEmpty() && danglingFilePaths.size() > 0) {
+          LOG.error("Data table validation failed due to dangling files count " + danglingFilePaths.size() + ", found before active timeline");
+          danglingFilePaths.forEach(entry -> LOG.error("Dangling file: " + entry.toString()));
+          finalResult = false;
+          if (!cfg.ignoreFailed) {
+            throw new HoodieValidationException("Data table validation failed due to dangling files " + danglingFilePaths.size());
+          }
+        }
+
+        // Verify that for every completed commit in active timeline, there are no extra files found apart from what is present in
+        // commit metadata.
+        Map<String, List<String>> instantToFilesMap = RepairUtils.tagInstantsOfBaseAndLogFiles(
+            metaClient.getBasePath(), allDataFilePaths);
+        HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
+        List<HoodieInstant> hoodieInstants = activeTimeline.filterCompletedInstants().getInstants().collect(Collectors.toList());
+
+        List<String> danglingFiles = engineContext.flatMap(hoodieInstants, instant -> {
+          Option<Set<String>> filesFromTimeline = RepairUtils.getBaseAndLogFilePathsFromTimeline(
+              activeTimeline, instant);
+          List<String> baseAndLogFilesFromFs = instantToFilesMap.containsKey(instant.getTimestamp()) ? instantToFilesMap.get(instant.getTimestamp())
+              : Collections.emptyList();
+          if (!baseAndLogFilesFromFs.isEmpty()) {
+            Set<String> danglingInstantFiles = new HashSet<>(baseAndLogFilesFromFs);
+            if (filesFromTimeline.isPresent()) {
+              danglingInstantFiles.removeAll(filesFromTimeline.get());
+            }
+            return new ArrayList<>(danglingInstantFiles).stream();
+          } else {
+            return Stream.empty();
+          }
+        }, hoodieInstants.size()).stream().collect(Collectors.toList());
+
+        if (!danglingFiles.isEmpty()) {
+          LOG.error("Data table validation failed due to extra files found for completed commits " + danglingFiles.size());
+          danglingFiles.forEach(entry -> LOG.error("Dangling file: " + entry.toString()));
+          finalResult = false;
+          if (!cfg.ignoreFailed) {
+            throw new HoodieValidationException("Data table validation failed due to dangling files " + danglingFiles.size());
+          }
+        }
+      }
+    } catch (Exception e) {
+      LOG.error("Data table validation failed due to " + e.getMessage(), e);
+      if (!cfg.ignoreFailed) {
+        throw new HoodieValidationException("Data table validation failed due to " + e.getMessage(), e);
+      }
+    }
+
+    if (finalResult) {
+      LOG.info("Data table validation succeeded.");
+    } else {
+      LOG.warn("Data table validation failed.");
+    }
+  }
+
+  public class AsyncDataTableValidateService extends HoodieAsyncService {
+    private final transient ExecutorService executor = Executors.newSingleThreadExecutor();
+
+    @Override
+    protected Pair<CompletableFuture, ExecutorService> startService() {
+      return Pair.of(CompletableFuture.supplyAsync(() -> {
+        while (true) {
+          try {
+            long start = System.currentTimeMillis();
+            doDataTableValidation();
+            long toSleepMs = cfg.minValidateIntervalSeconds * 1000 - (System.currentTimeMillis() - start);
+
+            if (toSleepMs > 0) {
+              LOG.info("Last validate ran less than min validate interval: " + cfg.minValidateIntervalSeconds + " s, sleep: "
+                  + toSleepMs + " ms.");
+              Thread.sleep(toSleepMs);
+            }
+          } catch (HoodieValidationException e) {
+            LOG.error("Shutting down AsyncDataTableValidateService due to HoodieValidationException", e);
+            if (!cfg.ignoreFailed) {
+              throw e;
+            }
+          } catch (InterruptedException e) {
+            // ignore InterruptedException here.
+          }
+        }
+      }, executor), executor);
+    }
+  }
+}
\ No newline at end of file
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java
new file mode 100644
index 0000000000000..f9b0e1a86d6af
--- /dev/null
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java
@@ -0,0 +1,757 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.utilities;
+
+import org.apache.hudi.async.HoodieAsyncService;
+import org.apache.hudi.client.common.HoodieSparkEngineContext;
+import org.apache.hudi.common.bloom.BloomFilter;
+import org.apache.hudi.common.config.HoodieMetadataConfig;
+import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.engine.HoodieEngineContext;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.BaseFile;
+import org.apache.hudi.common.model.FileSlice;
+import org.apache.hudi.common.model.HoodieBaseFile;
+import org.apache.hudi.common.model.HoodieColumnRangeMetadata;
+import org.apache.hudi.common.model.HoodieFileGroup;
+import org.apache.hudi.common.table.HoodieTableMetaClient;
+import org.apache.hudi.common.table.TableSchemaResolver;
+import org.apache.hudi.common.table.view.FileSystemViewManager;
+import org.apache.hudi.common.table.view.FileSystemViewStorageConfig;
+import org.apache.hudi.common.table.view.HoodieTableFileSystemView;
+import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.ParquetUtils;
+import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.exception.HoodieException;
+import org.apache.hudi.exception.HoodieValidationException;
+import org.apache.hudi.io.storage.HoodieFileReader;
+import org.apache.hudi.io.storage.HoodieFileReaderFactory;
+import org.apache.hudi.metadata.HoodieTableMetadata;
+import org.apache.hudi.utilities.util.BloomFilterData;
+
+import com.beust.jcommander.JCommander;
+import com.beust.jcommander.Parameter;
+import jline.internal.Log;
+import org.apache.avro.generic.IndexedRecord;
+import org.apache.hadoop.fs.Path;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Objects;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.stream.Collectors;
+
+/**
+ * A validator with spark-submit to compare information, such as partitions, file listing, index, etc.,
+ * between metadata table and filesystem.
+ * <p>
+ * There are five validation tasks, that can be enabled independently through the following CLI options:
+ * - `--validate-latest-file-slices`: validate latest file slices for all partitions.
+ * - `--validate-latest-base-files`: validate latest base files for all partitions.
+ * - `--validate-all-file-groups`: validate all file groups, and all file slices within file groups.
+ * - `--validate-all-column-stats`: validate column stats for all columns in the schema
+ * - `--validate-bloom-filters`: validate bloom filters of base files
+ * <p>
+ * - Default : This validator will compare the results between metadata table and filesystem only once.
+ * <p>
+ * Example command:
+ * ```
+ * spark-submit \
+ *  --class org.apache.hudi.utilities.HoodieMetadataTableValidator \
+ *  --packages org.apache.spark:spark-avro_2.11:2.4.4 \
+ *  --master spark://xxxx:7077 \
+ *  --driver-memory 1g \
+ *  --executor-memory 1g \
+ *  $HUDI_DIR/hudi/packaging/hudi-utilities-bundle/target/hudi-utilities-bundle_2.11-0.11.0-SNAPSHOT.jar \
+ *  --base-path basePath \
+ *  --validate-latest-file-slices \
+ *  --validate-latest-base-files \
+ *  --validate-all-file-groups
+ * ```
+ *
+ * <p>
+ * Also You can set `--continuous` for long running this validator.
+ * And use `--min-validate-interval-seconds` to control the validation frequency, default is 10 minutes.
+ * <p>
+ * Example command:
+ * ```
+ * spark-submit \
+ *  --class org.apache.hudi.utilities.HoodieMetadataTableValidator \
+ *  --packages org.apache.spark:spark-avro_2.11:2.4.4 \
+ *  --master spark://xxxx:7077 \
+ *  --driver-memory 1g \
+ *  --executor-memory 1g \
+ *  $HUDI_DIR/hudi/packaging/hudi-utilities-bundle/target/hudi-utilities-bundle_2.11-0.11.0-SNAPSHOT.jar \
+ *  --base-path basePath \
+ *  --validate-latest-file-slices \
+ *  --validate-latest-base-files \
+ *  --validate-all-file-groups \
+ *  --continuous \
+ *  --min-validate-interval-seconds 60
+ * ```
+ *
+ */
+public class HoodieMetadataTableValidator implements Serializable {
+
+  private static final Logger LOG = LogManager.getLogger(HoodieMetadataTableValidator.class);
+
+  // Spark context
+  private transient JavaSparkContext jsc;
+  // config
+  private Config cfg;
+  // Properties with source, hoodie client, key generator etc.
+  private TypedProperties props;
+
+  private HoodieTableMetaClient metaClient;
+
+  protected transient Option<AsyncMetadataTableValidateService> asyncMetadataTableValidateService;
+
+  public HoodieMetadataTableValidator(HoodieTableMetaClient metaClient) {
+    this.metaClient = metaClient;
+  }
+
+  public HoodieMetadataTableValidator(JavaSparkContext jsc, Config cfg) {
+    this.jsc = jsc;
+    this.cfg = cfg;
+
+    this.props = cfg.propsFilePath == null
+        ? UtilHelpers.buildProperties(cfg.configs)
+        : readConfigFromFileSystem(jsc, cfg);
+
+    this.metaClient = HoodieTableMetaClient.builder()
+        .setConf(jsc.hadoopConfiguration()).setBasePath(cfg.basePath)
+        .setLoadActiveTimelineOnLoad(true)
+        .build();
+
+    this.asyncMetadataTableValidateService = cfg.continuous ? Option.of(new AsyncMetadataTableValidateService()) : Option.empty();
+  }
+
+  /**
+   * Reads config from the file system.
+   *
+   * @param jsc {@link JavaSparkContext} instance.
+   * @param cfg {@link Config} instance.
+   * @return the {@link TypedProperties} instance.
+   */
+  private TypedProperties readConfigFromFileSystem(JavaSparkContext jsc, Config cfg) {
+    return UtilHelpers.readConfig(jsc.hadoopConfiguration(), new Path(cfg.propsFilePath), cfg.configs)
+        .getProps(true);
+  }
+
+  public static class Config implements Serializable {
+    @Parameter(names = {"--base-path", "-sp"}, description = "Base path for the table", required = true)
+    public String basePath = null;
+
+    @Parameter(names = {"--continuous"}, description = "Running MetadataTableValidator in continuous. "
+        + "Can use --min-validate-interval-seconds to control validation frequency", required = false)
+    public boolean continuous = false;
+
+    @Parameter(names = {"--validate-latest-file-slices"}, description = "Validate latest file slices for all partitions.", required = false)
+    public boolean validateLatestFileSlices = false;
+
+    @Parameter(names = {"--validate-latest-base-files"}, description = "Validate latest base files for all partitions.", required = false)
+    public boolean validateLatestBaseFiles = false;
+
+    @Parameter(names = {"--validate-all-file-groups"}, description = "Validate all file groups, and all file slices within file groups.", required = false)
+    public boolean validateAllFileGroups = false;
+
+    @Parameter(names = {"--validate-all-column-stats"}, description = "Validate column stats for all columns in the schema", required = false)
+    public boolean validateAllColumnStats = false;
+
+    @Parameter(names = {"--validate-bloom-filters"}, description = "Validate bloom filters of base files", required = false)
+    public boolean validateBloomFilters = false;
+
+    @Parameter(names = {"--min-validate-interval-seconds"},
+        description = "the min validate interval of each validate when set --continuous, default is 10 minutes.")
+    public Integer minValidateIntervalSeconds = 10 * 60;
+
+    @Parameter(names = {"--parallelism", "-pl"}, description = "Parallelism for valuation", required = false)
+    public int parallelism = 200;
+
+    @Parameter(names = {"--ignore-failed", "-ig"}, description = "Ignore metadata validate failure and continue.", required = false)
+    public boolean ignoreFailed = false;
+
+    @Parameter(names = {"--spark-master", "-ms"}, description = "Spark master", required = false)
+    public String sparkMaster = null;
+
+    @Parameter(names = {"--spark-memory", "-sm"}, description = "spark memory to use", required = false)
+    public String sparkMemory = "1g";
+
+    @Parameter(names = {"--assume-date-partitioning"}, description = "Should HoodieWriteClient assume the data is partitioned by dates, i.e three levels from base path."
+        + "This is a stop-gap to support tables created by versions < 0.3.1. Will be removed eventually", required = false)
+    public Boolean assumeDatePartitioning = false;
+
+    @Parameter(names = {"--props"}, description = "path to properties file on localfs or dfs, with configurations for "
+        + "hoodie client")
+    public String propsFilePath = null;
+
+    @Parameter(names = {"--hoodie-conf"}, description = "Any configuration that can be set in the properties file "
+        + "(using the CLI parameter \"--props\") can also be passed command line using this parameter. This can be repeated",
+        splitter = IdentitySplitter.class)
+    public List<String> configs = new ArrayList<>();
+
+    @Parameter(names = {"--help", "-h"}, help = true)
+    public Boolean help = false;
+
+    @Override
+    public String toString() {
+      return "MetadataTableValidatorConfig {\n"
+          + "   --base-path " + basePath + ", \n"
+          + "   --validate-latest-file-slices " + validateLatestFileSlices + ", \n"
+          + "   --validate-latest-base-files " + validateLatestBaseFiles + ", \n"
+          + "   --validate-all-file-groups " + validateAllFileGroups + ", \n"
+          + "   --validate-all-column-stats " + validateAllColumnStats + ", \n"
+          + "   --validate-bloom-filters " + validateBloomFilters + ", \n"
+          + "   --continuous " + continuous + ", \n"
+          + "   --ignore-failed " + ignoreFailed + ", \n"
+          + "   --min-validate-interval-seconds " + minValidateIntervalSeconds + ", \n"
+          + "   --parallelism " + parallelism + ", \n"
+          + "   --spark-master " + sparkMaster + ", \n"
+          + "   --spark-memory " + sparkMemory + ", \n"
+          + "   --assumeDatePartitioning-memory " + assumeDatePartitioning + ", \n"
+          + "   --props " + propsFilePath + ", \n"
+          + "   --hoodie-conf " + configs
+          + "\n}";
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (o == null || getClass() != o.getClass()) {
+        return false;
+      }
+      Config config = (Config) o;
+      return basePath.equals(config.basePath)
+          && Objects.equals(continuous, config.continuous)
+          && Objects.equals(validateLatestFileSlices, config.validateLatestFileSlices)
+          && Objects.equals(validateLatestBaseFiles, config.validateLatestBaseFiles)
+          && Objects.equals(validateAllFileGroups, config.validateAllFileGroups)
+          && Objects.equals(validateAllColumnStats, config.validateAllColumnStats)
+          && Objects.equals(validateBloomFilters, config.validateBloomFilters)
+          && Objects.equals(minValidateIntervalSeconds, config.minValidateIntervalSeconds)
+          && Objects.equals(parallelism, config.parallelism)
+          && Objects.equals(ignoreFailed, config.ignoreFailed)
+          && Objects.equals(sparkMaster, config.sparkMaster)
+          && Objects.equals(sparkMemory, config.sparkMemory)
+          && Objects.equals(assumeDatePartitioning, config.assumeDatePartitioning)
+          && Objects.equals(propsFilePath, config.propsFilePath)
+          && Objects.equals(configs, config.configs);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(basePath, continuous, validateLatestFileSlices, validateLatestBaseFiles,
+          validateAllFileGroups, validateAllColumnStats, validateBloomFilters, minValidateIntervalSeconds,
+          parallelism, ignoreFailed, sparkMaster, sparkMemory, assumeDatePartitioning, propsFilePath, configs, help);
+    }
+  }
+
+  public static void main(String[] args) {
+    final Config cfg = new Config();
+    JCommander cmd = new JCommander(cfg, null, args);
+
+    if (cfg.help || args.length == 0) {
+      cmd.usage();
+      System.exit(1);
+    }
+
+    SparkConf sparkConf = UtilHelpers.buildSparkConf("Hoodie-Metadata-Table-Validator", cfg.sparkMaster);
+    sparkConf.set("spark.executor.memory", cfg.sparkMemory);
+    JavaSparkContext jsc = new JavaSparkContext(sparkConf);
+
+    HoodieMetadataTableValidator validator = new HoodieMetadataTableValidator(jsc, cfg);
+
+    try {
+      validator.run();
+    } catch (Throwable throwable) {
+      LOG.error("Fail to do hoodie metadata table validation for " + validator.cfg, throwable);
+    } finally {
+      jsc.stop();
+    }
+  }
+
+  public void run() {
+    try {
+      LOG.info(cfg);
+      if (cfg.continuous) {
+        LOG.info(" ****** do hoodie metadata table validation in CONTINUOUS mode ******");
+        doHoodieMetadataTableValidationContinuous();
+      } else {
+        LOG.info(" ****** do hoodie metadata table validation once ******");
+        doHoodieMetadataTableValidationOnce();
+      }
+    } catch (Exception e) {
+      throw new HoodieException("Unable to do hoodie metadata table validation in " + cfg.basePath, e);
+    } finally {
+
+      if (asyncMetadataTableValidateService.isPresent()) {
+        asyncMetadataTableValidateService.get().shutdown(true);
+      }
+    }
+  }
+
+  private void doHoodieMetadataTableValidationOnce() {
+    try {
+      doMetadataTableValidation();
+    } catch (HoodieValidationException e) {
+      LOG.error("Metadata table validation failed to HoodieValidationException", e);
+      if (!cfg.ignoreFailed) {
+        throw e;
+      }
+    }
+  }
+
+  private void doHoodieMetadataTableValidationContinuous() {
+    asyncMetadataTableValidateService.ifPresent(service -> {
+      service.start(null);
+      try {
+        service.waitForShutdown();
+      } catch (Exception e) {
+        throw new HoodieException(e.getMessage(), e);
+      }
+    });
+  }
+
+  public void doMetadataTableValidation() {
+    boolean finalResult = true;
+    metaClient.reloadActiveTimeline();
+    String basePath = metaClient.getBasePath();
+    HoodieSparkEngineContext engineContext = new HoodieSparkEngineContext(jsc);
+    List<String> allPartitions = validatePartitions(engineContext, basePath);
+    HoodieMetadataValidationContext metadataTableBasedContext =
+        new HoodieMetadataValidationContext(engineContext, cfg, metaClient, true);
+    HoodieMetadataValidationContext fsBasedContext =
+        new HoodieMetadataValidationContext(engineContext, cfg, metaClient, false);
+
+    List<Boolean> result = engineContext.parallelize(allPartitions, allPartitions.size()).map(partitionPath -> {
+      try {
+        validateFilesInPartition(metadataTableBasedContext, fsBasedContext, partitionPath);
+        LOG.info("Metadata table validation succeeded for " + partitionPath);
+        return true;
+      } catch (HoodieValidationException e) {
+        LOG.error("Metadata table validation failed for " + partitionPath + " due to HoodieValidationException", e);
+        if (!cfg.ignoreFailed) {
+          throw e;
+        }
+        return false;
+      }
+    }).collectAsList();
+
+    for (Boolean res : result) {
+      finalResult &= res;
+    }
+
+    if (finalResult) {
+      LOG.info("Metadata table validation succeeded.");
+    } else {
+      LOG.warn("Metadata table validation failed.");
+    }
+  }
+
+  /**
+   * Compare the listing partitions result between metadata table and fileSystem.
+   */
+  private List<String> validatePartitions(HoodieSparkEngineContext engineContext, String basePath) {
+    // compare partitions
+    List<String> allPartitionPathsFromFS = FSUtils.getAllPartitionPaths(engineContext, basePath, false, cfg.assumeDatePartitioning);
+    List<String> allPartitionPathsMeta = FSUtils.getAllPartitionPaths(engineContext, basePath, true, cfg.assumeDatePartitioning);
+
+    Collections.sort(allPartitionPathsFromFS);
+    Collections.sort(allPartitionPathsMeta);
+
+    if (allPartitionPathsFromFS.size() != allPartitionPathsMeta.size()
+        || !allPartitionPathsFromFS.equals(allPartitionPathsMeta)) {
+      String message = "Compare Partitions Failed! " + "AllPartitionPathsFromFS : " + allPartitionPathsFromFS + " and allPartitionPathsMeta : " + allPartitionPathsMeta;
+      LOG.error(message);
+      throw new HoodieValidationException(message);
+    }
+
+    return allPartitionPathsMeta;
+  }
+
+  /**
+   * Compare the file listing and index data between metadata table and fileSystem.
+   * For now, validate five kinds of apis:
+   * 1. HoodieMetadataFileSystemView::getLatestFileSlices
+   * 2. HoodieMetadataFileSystemView::getLatestBaseFiles
+   * 3. HoodieMetadataFileSystemView::getAllFileGroups and HoodieMetadataFileSystemView::getAllFileSlices
+   * 4. HoodieBackedTableMetadata::getColumnStats
+   * 5. HoodieBackedTableMetadata::getBloomFilters
+   *
+   * @param metadataTableBasedContext Validation context containing information based on metadata table
+   * @param fsBasedContext            Validation context containing information based on the file system
+   * @param partitionPath             Partition path String
+   */
+  private void validateFilesInPartition(
+      HoodieMetadataValidationContext metadataTableBasedContext,
+      HoodieMetadataValidationContext fsBasedContext, String partitionPath) {
+    if (cfg.validateLatestFileSlices) {
+      validateLatestFileSlices(metadataTableBasedContext, fsBasedContext, partitionPath);
+    }
+
+    if (cfg.validateLatestBaseFiles) {
+      validateLatestBaseFiles(metadataTableBasedContext, fsBasedContext, partitionPath);
+    }
+
+    if (cfg.validateAllFileGroups) {
+      validateAllFileGroups(metadataTableBasedContext, fsBasedContext, partitionPath);
+    }
+
+    if (cfg.validateAllColumnStats) {
+      validateAllColumnStats(metadataTableBasedContext, fsBasedContext, partitionPath);
+    }
+
+    if (cfg.validateBloomFilters) {
+      validateBloomFilters(metadataTableBasedContext, fsBasedContext, partitionPath);
+    }
+  }
+
+  private void validateAllFileGroups(
+      HoodieMetadataValidationContext metadataTableBasedContext,
+      HoodieMetadataValidationContext fsBasedContext, String partitionPath) {
+    List<FileSlice> allFileSlicesFromMeta = metadataTableBasedContext
+        .getSortedAllFileGroupList(partitionPath).stream()
+        .flatMap(HoodieFileGroup::getAllFileSlices).sorted(new FileSliceComparator())
+        .collect(Collectors.toList());
+    List<FileSlice> allFileSlicesFromFS = fsBasedContext
+        .getSortedAllFileGroupList(partitionPath).stream()
+        .flatMap(HoodieFileGroup::getAllFileSlices).sorted(new FileSliceComparator())
+        .collect(Collectors.toList());
+
+    LOG.debug("All file slices from metadata: " + allFileSlicesFromMeta + ". For partitions " + partitionPath);
+    LOG.debug("All file slices from direct listing: " + allFileSlicesFromFS + ". For partitions " + partitionPath);
+    validate(allFileSlicesFromMeta, allFileSlicesFromFS, partitionPath, "file slices");
+
+    LOG.info("Validation of all file groups succeeded for partition " + partitionPath);
+  }
+
+  /**
+   * Compare getLatestBaseFiles between metadata table and fileSystem.
+   */
+  private void validateLatestBaseFiles(
+      HoodieMetadataValidationContext metadataTableBasedContext,
+      HoodieMetadataValidationContext fsBasedContext, String partitionPath) {
+
+    List<HoodieBaseFile> latestFilesFromMetadata = metadataTableBasedContext.getSortedLatestBaseFileList(partitionPath);
+    List<HoodieBaseFile> latestFilesFromFS = fsBasedContext.getSortedLatestBaseFileList(partitionPath);
+
+    LOG.debug("Latest base file from metadata: " + latestFilesFromMetadata + ". For partitions " + partitionPath);
+    LOG.debug("Latest base file from direct listing: " + latestFilesFromFS + ". For partitions " + partitionPath);
+    if (latestFilesFromMetadata.size() != latestFilesFromFS.size()
+        || !latestFilesFromMetadata.equals(latestFilesFromFS)) {
+      String message = "Validation of metadata get latest base file for partition " + partitionPath + " failed. "
+          + "Latest base file from metadata: " + latestFilesFromMetadata
+          + "Latest base file from direct listing: " + latestFilesFromFS;
+      LOG.error(message);
+      throw new HoodieValidationException(message);
+    } else {
+      LOG.info("Validation of getLatestBaseFiles succeeded for partition " + partitionPath);
+    }
+  }
+
+  /**
+   * Compare getLatestFileSlices between metadata table and fileSystem.
+   */
+  private void validateLatestFileSlices(
+      HoodieMetadataValidationContext metadataTableBasedContext,
+      HoodieMetadataValidationContext fsBasedContext, String partitionPath) {
+
+    List<FileSlice> latestFileSlicesFromMetadataTable = metadataTableBasedContext.getSortedLatestFileSliceList(partitionPath);
+    List<FileSlice> latestFileSlicesFromFS = fsBasedContext.getSortedLatestFileSliceList(partitionPath);
+
+    LOG.debug("Latest file list from metadata: " + latestFileSlicesFromMetadataTable + ". For partition " + partitionPath);
+    LOG.debug("Latest file list from direct listing: " + latestFileSlicesFromFS + ". For partition " + partitionPath);
+
+    validate(latestFileSlicesFromMetadataTable, latestFileSlicesFromFS, partitionPath, "file slices");
+    LOG.info("Validation of getLatestFileSlices succeeded for partition " + partitionPath);
+  }
+
+  private void validateAllColumnStats(
+      HoodieMetadataValidationContext metadataTableBasedContext,
+      HoodieMetadataValidationContext fsBasedContext, String partitionPath) {
+    List<String> latestBaseFilenameList = fsBasedContext.getSortedLatestBaseFileList(partitionPath)
+        .stream().map(BaseFile::getFileName).collect(Collectors.toList());
+    List<HoodieColumnRangeMetadata<String>> metadataBasedColStats = metadataTableBasedContext
+        .getSortedColumnStatsList(partitionPath, latestBaseFilenameList);
+    List<HoodieColumnRangeMetadata<String>> fsBasedColStats = fsBasedContext
+        .getSortedColumnStatsList(partitionPath, latestBaseFilenameList);
+
+    validate(metadataBasedColStats, fsBasedColStats, partitionPath, "column stats");
+
+    LOG.info("Validation of column stats succeeded for partition " + partitionPath);
+  }
+
+  private void validateBloomFilters(
+      HoodieMetadataValidationContext metadataTableBasedContext,
+      HoodieMetadataValidationContext fsBasedContext, String partitionPath) {
+    List<String> latestBaseFilenameList = fsBasedContext.getSortedLatestBaseFileList(partitionPath)
+        .stream().map(BaseFile::getFileName).collect(Collectors.toList());
+    List<BloomFilterData> metadataBasedBloomFilters = metadataTableBasedContext
+        .getSortedBloomFilterList(partitionPath, latestBaseFilenameList);
+    List<BloomFilterData> fsBasedBloomFilters = fsBasedContext
+        .getSortedBloomFilterList(partitionPath, latestBaseFilenameList);
+
+    validate(metadataBasedBloomFilters, fsBasedBloomFilters, partitionPath, "bloom filters");
+
+    LOG.info("Validation of bloom filters succeeded for partition " + partitionPath);
+  }
+
+  private <T> void validate(
+      List<T> infoListFromMetadataTable, List<T> infoListFromFS, String partitionPath, String label) {
+    if (infoListFromMetadataTable.size() != infoListFromFS.size()
+        || !infoListFromMetadataTable.equals(infoListFromFS)) {
+      String message = String.format("Validation of %s for partition %s failed."
+              + "\n%s from metadata: %s\n%s from file system and base files: %s",
+          label, partitionPath, label, infoListFromMetadataTable, label, infoListFromFS);
+      LOG.error(message);
+      throw new HoodieValidationException(message);
+    } else {
+      LOG.info(String.format("Validation of %s succeeded for partition %s", label, partitionPath));
+    }
+  }
+
+  public class AsyncMetadataTableValidateService extends HoodieAsyncService {
+    private final transient ExecutorService executor = Executors.newSingleThreadExecutor();
+
+    @Override
+    protected Pair<CompletableFuture, ExecutorService> startService() {
+      return Pair.of(CompletableFuture.supplyAsync(() -> {
+        while (true) {
+          try {
+            long start = System.currentTimeMillis();
+            doMetadataTableValidation();
+            long toSleepMs = cfg.minValidateIntervalSeconds * 1000 - (System.currentTimeMillis() - start);
+
+            if (toSleepMs > 0) {
+              LOG.info("Last validate ran less than min validate interval: " + cfg.minValidateIntervalSeconds + " s, sleep: "
+                  + toSleepMs + " ms.");
+              Thread.sleep(toSleepMs);
+            }
+          } catch (HoodieValidationException e) {
+            LOG.error("Shutting down AsyncMetadataTableValidateService due to HoodieValidationException", e);
+            if (!cfg.ignoreFailed) {
+              throw e;
+            }
+          } catch (InterruptedException e) {
+            // ignore InterruptedException here.
+          }
+        }
+      }, executor), executor);
+    }
+  }
+
+  public static class FileSliceComparator implements Comparator<FileSlice>, Serializable {
+
+    @Override
+    public int compare(FileSlice o1, FileSlice o2) {
+      return (o1.getPartitionPath() + o1.getFileId() + o1.getBaseInstantTime())
+          .compareTo(o2.getPartitionPath() + o2.getFileId() + o2.getBaseInstantTime());
+    }
+  }
+
+  public static class HoodieBaseFileComparator implements Comparator<HoodieBaseFile>, Serializable {
+
+    @Override
+    public int compare(HoodieBaseFile o1, HoodieBaseFile o2) {
+      return o1.getPath().compareTo(o2.getPath());
+    }
+  }
+
+  public static class HoodieFileGroupComparator implements Comparator<HoodieFileGroup>, Serializable {
+
+    @Override
+    public int compare(HoodieFileGroup o1, HoodieFileGroup o2) {
+      return o1.getFileGroupId().compareTo(o2.getFileGroupId());
+    }
+  }
+
+  public static class HoodieColumnRangeMetadataComparator
+      implements Comparator<HoodieColumnRangeMetadata<String>>, Serializable {
+
+    @Override
+    public int compare(HoodieColumnRangeMetadata<String> o1, HoodieColumnRangeMetadata<String> o2) {
+      return o1.toString().compareTo(o2.toString());
+    }
+  }
+
+  /**
+   * Class for storing relevant information for metadata table validation.
+   * <p>
+   * If metadata table is disabled, the APIs provide the information, e.g., file listing,
+   * index, from the file system and base files.  If metadata table is enabled, the APIs
+   * provide the information from the metadata table.  The same API is expected to return
+   * the same information regardless of whether metadata table is enabled, which is
+   * verified in the {@link HoodieMetadataTableValidator}.
+   */
+  private static class HoodieMetadataValidationContext implements Serializable {
+    private HoodieTableMetaClient metaClient;
+    private HoodieTableFileSystemView fileSystemView;
+    private HoodieTableMetadata tableMetadata;
+    private boolean enableMetadataTable;
+    private List<String> allColumnNameList;
+
+    public HoodieMetadataValidationContext(
+        HoodieEngineContext engineContext, Config cfg, HoodieTableMetaClient metaClient,
+        boolean enableMetadataTable) {
+      this.metaClient = metaClient;
+      this.enableMetadataTable = enableMetadataTable;
+      HoodieMetadataConfig metadataConfig = HoodieMetadataConfig.newBuilder()
+          .enable(enableMetadataTable)
+          .withMetadataIndexBloomFilter(enableMetadataTable)
+          .withMetadataIndexColumnStats(enableMetadataTable)
+          .withMetadataIndexForAllColumns(enableMetadataTable)
+          .withAssumeDatePartitioning(cfg.assumeDatePartitioning)
+          .build();
+      this.fileSystemView = FileSystemViewManager.createInMemoryFileSystemView(engineContext,
+          metaClient, metadataConfig);
+      this.tableMetadata = HoodieTableMetadata.create(engineContext, metadataConfig, metaClient.getBasePath(),
+          FileSystemViewStorageConfig.SPILLABLE_DIR.defaultValue());
+      if (metaClient.getCommitsTimeline().filterCompletedInstants().countInstants() > 0) {
+        this.allColumnNameList = getAllColumnNames();
+      }
+    }
+
+    public List<HoodieBaseFile> getSortedLatestBaseFileList(String partitionPath) {
+      return fileSystemView.getLatestBaseFiles(partitionPath)
+          .sorted(new HoodieBaseFileComparator()).collect(Collectors.toList());
+    }
+
+    public List<FileSlice> getSortedLatestFileSliceList(String partitionPath) {
+      return fileSystemView.getLatestFileSlices(partitionPath)
+          .sorted(new FileSliceComparator()).collect(Collectors.toList());
+    }
+
+    public List<HoodieFileGroup> getSortedAllFileGroupList(String partitionPath) {
+      return fileSystemView.getAllFileGroups(partitionPath)
+          .sorted(new HoodieFileGroupComparator()).collect(Collectors.toList());
+    }
+
+    public List<HoodieColumnRangeMetadata<String>> getSortedColumnStatsList(
+        String partitionPath, List<String> baseFileNameList) {
+      LOG.info("All column names for getting column stats: " + allColumnNameList);
+      if (enableMetadataTable) {
+        List<Pair<String, String>> partitionFileNameList = baseFileNameList.stream()
+            .map(filename -> Pair.of(partitionPath, filename)).collect(Collectors.toList());
+        return allColumnNameList.stream()
+            .flatMap(columnName ->
+                tableMetadata.getColumnStats(partitionFileNameList, columnName).values().stream()
+                    .map(stats -> new HoodieColumnRangeMetadata<>(
+                        stats.getFileName(),
+                        columnName,
+                        stats.getMinValue(),
+                        stats.getMaxValue(),
+                        stats.getNullCount(),
+                        stats.getValueCount(),
+                        stats.getTotalSize(),
+                        stats.getTotalUncompressedSize()))
+                    .collect(Collectors.toList())
+                    .stream())
+            .sorted(new HoodieColumnRangeMetadataComparator())
+            .collect(Collectors.toList());
+      } else {
+        return baseFileNameList.stream().flatMap(filename ->
+                new ParquetUtils().readRangeFromParquetMetadata(
+                    metaClient.getHadoopConf(),
+                    new Path(new Path(metaClient.getBasePath(), partitionPath), filename),
+                    allColumnNameList).stream())
+            .map(rangeMetadata -> new HoodieColumnRangeMetadata<String>(
+                rangeMetadata.getFilePath(),
+                rangeMetadata.getColumnName(),
+                // Note: here we ignore the type in the validation,
+                // since column stats from metadata table store the min/max values as String
+                rangeMetadata.getMinValue().toString(),
+                rangeMetadata.getMaxValue().toString(),
+                rangeMetadata.getNullCount(),
+                rangeMetadata.getValueCount(),
+                rangeMetadata.getTotalSize(),
+                rangeMetadata.getTotalUncompressedSize()
+            ))
+            .sorted(new HoodieColumnRangeMetadataComparator())
+            .collect(Collectors.toList());
+      }
+    }
+
+    public List<BloomFilterData> getSortedBloomFilterList(
+        String partitionPath, List<String> baseFileNameList) {
+      if (enableMetadataTable) {
+        List<Pair<String, String>> partitionFileNameList = baseFileNameList.stream()
+            .map(filename -> Pair.of(partitionPath, filename)).collect(Collectors.toList());
+        return tableMetadata.getBloomFilters(partitionFileNameList).entrySet().stream()
+            .map(entry -> BloomFilterData.builder()
+                .setPartitionPath(entry.getKey().getKey())
+                .setFilename(entry.getKey().getValue())
+                .setBloomFilter(entry.getValue())
+                .build())
+            .sorted()
+            .collect(Collectors.toList());
+      } else {
+        return baseFileNameList.stream()
+            .map(filename -> readBloomFilterFromFile(partitionPath, filename))
+            .filter(Option::isPresent)
+            .map(Option::get)
+            .sorted()
+            .collect(Collectors.toList());
+      }
+    }
+
+    private List<String> getAllColumnNames() {
+      TableSchemaResolver schemaResolver = new TableSchemaResolver(metaClient);
+      try {
+        return schemaResolver.getTableAvroSchema().getFields().stream()
+            .map(entry -> entry.name()).collect(Collectors.toList());
+      } catch (Exception e) {
+        throw new HoodieException("Failed to get all column names for " + metaClient.getBasePath());
+      }
+    }
+
+    private Option<BloomFilterData> readBloomFilterFromFile(String partitionPath, String filename) {
+      Path path = new Path(new Path(metaClient.getBasePath(), partitionPath), filename);
+      HoodieFileReader<IndexedRecord> fileReader;
+      try {
+        fileReader = HoodieFileReaderFactory.getFileReader(metaClient.getHadoopConf(), path);
+      } catch (IOException e) {
+        Log.error("Failed to get file reader for " + path + " " + e.getMessage());
+        return Option.empty();
+      }
+      final BloomFilter fileBloomFilter = fileReader.readBloomFilter();
+      if (fileBloomFilter == null) {
+        Log.error("Failed to read bloom filter for " + path);
+        return Option.empty();
+      }
+      return Option.of(BloomFilterData.builder()
+          .setPartitionPath(partitionPath)
+          .setFilename(filename)
+          .setBloomFilter(ByteBuffer.wrap(fileBloomFilter.serializeToString().getBytes()))
+          .build());
+    }
+  }
+}
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieRepairTool.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieRepairTool.java
index d6b74c8099dfc..7d725ed6af37a 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieRepairTool.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieRepairTool.java
@@ -48,7 +48,6 @@
 import java.io.Serializable;
 import java.security.SecureRandom;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
 import java.util.stream.Collectors;
@@ -345,7 +344,7 @@ static boolean deleteFiles(
   boolean doRepair(
       Option<String> startingInstantOption, Option<String> endingInstantOption, boolean isDryRun) throws IOException {
     // Scans all partitions to find base and log files in the base path
-    List<Path> allFilesInPartitions = getBaseAndLogFilePathsFromFileSystem();
+    List<Path> allFilesInPartitions = HoodieDataTableUtils.getBaseAndLogFilePathsFromFileSystem(tableMetadata, cfg.basePath);
     // Buckets the files based on instant time
     // instant time -> relative paths of base and log files to base path
     Map<String, List<String>> instantToFilesMap = RepairUtils.tagInstantsOfBaseAndLogFiles(
@@ -388,22 +387,6 @@ boolean doRepair(
     return true;
   }
 
-  /**
-   * @return All hoodie files of the table from the file system.
-   * @throws IOException upon errors.
-   */
-  List<Path> getBaseAndLogFilePathsFromFileSystem() throws IOException {
-    List<String> allPartitionPaths = tableMetadata.getAllPartitionPaths()
-        .stream().map(partitionPath ->
-            FSUtils.getPartitionPath(cfg.basePath, partitionPath).toString())
-        .collect(Collectors.toList());
-    return tableMetadata.getAllFilesInPartitions(allPartitionPaths).values().stream()
-        .map(fileStatuses ->
-            Arrays.stream(fileStatuses).map(fileStatus -> fileStatus.getPath()).collect(Collectors.toList()))
-        .flatMap(list -> list.stream())
-        .collect(Collectors.toList());
-  }
-
   /**
    * Undoes repair for UNDO mode.
    *
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java
index 81c5caf82142f..8690ff1cfb132 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java
@@ -26,7 +26,9 @@
 import org.apache.hudi.common.config.DFSPropertiesConfiguration;
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieRecordPayload;
+import org.apache.hudi.common.model.HoodieWriteStat;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.TableSchemaResolver;
 import org.apache.hudi.common.util.Functions.Function1;
@@ -220,7 +222,7 @@ public static String parseSchema(FileSystem fs, String schemaFile) throws Except
     return new String(buf.array());
   }
 
-  private static SparkConf buildSparkConf(String appName, String defaultMaster) {
+  public static SparkConf buildSparkConf(String appName, String defaultMaster) {
     return buildSparkConf(appName, defaultMaster, new HashMap<>());
   }
 
@@ -303,6 +305,18 @@ public static int handleErrors(JavaSparkContext jsc, String instantTime, JavaRDD
     return -1;
   }
 
+  public static int handleErrors(HoodieCommitMetadata metadata, String instantTime) {
+    List<HoodieWriteStat> writeStats = metadata.getWriteStats();
+    long errorsCount = writeStats.stream().mapToLong(HoodieWriteStat::getTotalWriteErrors).sum();
+    if (errorsCount == 0) {
+      LOG.info(String.format("Finish job with %s instant time.", instantTime));
+      return 0;
+    }
+
+    LOG.error(String.format("Job failed with %d errors.", errorsCount));
+    return -1;
+  }
+
   /**
    * Returns a factory for creating connections to the given JDBC URL.
    *
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/BootstrapExecutor.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/BootstrapExecutor.java
index 682c2daa1f68e..833fce295e326 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/BootstrapExecutor.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/BootstrapExecutor.java
@@ -160,10 +160,12 @@ public void execute() throws IOException {
    * Sync to Hive.
    */
   private void syncHive() {
-    if (cfg.enableHiveSync) {
+    if (cfg.enableHiveSync || cfg.enableMetaSync) {
       HiveSyncConfig hiveSyncConfig = DataSourceUtils.buildHiveSyncConfig(props, cfg.targetBasePath, cfg.baseFileFormat);
-      LOG.info("Syncing target hoodie table with hive table(" + hiveSyncConfig.tableName + "). Hive metastore URL :"
-          + hiveSyncConfig.jdbcUrl + ", basePath :" + cfg.targetBasePath);
+      HiveConf hiveConf = new HiveConf(fs.getConf(), HiveConf.class);
+      hiveConf.set(HiveConf.ConfVars.METASTOREURIS.varname,hiveSyncConfig.metastoreUris);
+      LOG.info("Hive Conf => " + hiveConf.getAllProperties().toString());
+      LOG.info("Hive Sync Conf => " + hiveSyncConfig);
       new HiveSyncTool(hiveSyncConfig, new HiveConf(configuration, HiveConf.class), fs).syncHoodieTable();
     }
   }
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java
index c7b29c9f0f520..082a9b1d5e82d 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/DeltaSync.java
@@ -29,6 +29,7 @@
 import org.apache.hudi.client.embedded.EmbeddedTimelineService;
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieRecordPayload;
@@ -57,6 +58,7 @@
 import org.apache.hudi.keygen.SimpleKeyGenerator;
 import org.apache.hudi.keygen.constant.KeyGeneratorOptions;
 import org.apache.hudi.keygen.factory.HoodieSparkKeyGeneratorFactory;
+import org.apache.hudi.metrics.HoodieMetrics;
 import org.apache.hudi.sync.common.AbstractSyncTool;
 import org.apache.hudi.utilities.UtilHelpers;
 import org.apache.hudi.utilities.callback.kafka.HoodieWriteCommitKafkaCallback;
@@ -65,6 +67,7 @@
 import org.apache.hudi.utilities.callback.pulsar.HoodieWriteCommitPulsarCallbackConfig;
 import org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamer.Config;
 import org.apache.hudi.utilities.exception.HoodieDeltaStreamerException;
+import org.apache.hudi.utilities.exception.HoodieSourceTimeoutException;
 import org.apache.hudi.utilities.schema.DelegatingSchemaProvider;
 import org.apache.hudi.utilities.schema.SchemaProvider;
 import org.apache.hudi.utilities.schema.SchemaSet;
@@ -190,6 +193,9 @@ public class DeltaSync implements Serializable {
    */
   private transient Option<HoodieTimeline> commitTimelineOpt;
 
+  // all commits timeline
+  private transient Option<HoodieTimeline> allCommitsTimelineOpt;
+
   /**
    * Tracks whether new schema is being seen and creates client accordingly.
    */
@@ -208,6 +214,8 @@ public class DeltaSync implements Serializable {
 
   private transient HoodieDeltaStreamerMetrics metrics;
 
+  private transient HoodieMetrics hoodieMetrics;
+
   public DeltaSync(HoodieDeltaStreamer.Config cfg, SparkSession sparkSession, SchemaProvider schemaProvider,
                    TypedProperties props, JavaSparkContext jssc, FileSystem fs, Configuration conf,
                    Function<SparkRDDWriteClient, Boolean> onInitializingHoodieWriteClient) throws IOException {
@@ -228,6 +236,7 @@ public DeltaSync(HoodieDeltaStreamer.Config cfg, SparkSession sparkSession, Sche
     this.transformer = UtilHelpers.createTransformer(cfg.transformerClassNames);
 
     this.metrics = new HoodieDeltaStreamerMetrics(getHoodieClientConfig(this.schemaProvider));
+    this.hoodieMetrics = new HoodieMetrics(getHoodieClientConfig(this.schemaProvider));
 
     this.formatAdapter = new SourceFormatAdapter(
         UtilHelpers.createSource(cfg.sourceClassName, props, jssc, sparkSession, schemaProvider, metrics));
@@ -245,15 +254,18 @@ public void refreshTimeline() throws IOException {
       switch (meta.getTableType()) {
         case COPY_ON_WRITE:
           this.commitTimelineOpt = Option.of(meta.getActiveTimeline().getCommitTimeline().filterCompletedInstants());
+          this.allCommitsTimelineOpt = Option.of(meta.getActiveTimeline().getAllCommitsTimeline());
           break;
         case MERGE_ON_READ:
           this.commitTimelineOpt = Option.of(meta.getActiveTimeline().getDeltaCommitTimeline().filterCompletedInstants());
+          this.allCommitsTimelineOpt = Option.of(meta.getActiveTimeline().getAllCommitsTimeline());
           break;
         default:
           throw new HoodieException("Unsupported table type :" + meta.getTableType());
       }
     } else {
       this.commitTimelineOpt = Option.empty();
+      this.allCommitsTimelineOpt = Option.empty();
       String partitionColumns = HoodieSparkUtils.getPartitionColumns(keyGenerator, props);
       HoodieTableMetaClient.withPropertyBuilder()
           .setTableType(cfg.tableType)
@@ -306,6 +318,14 @@ public Pair<Option<String>, JavaRDD<WriteStatus>> syncOnce() throws IOException
         }
       }
 
+      // complete the pending clustering before writing to sink
+      if (cfg.retryLastPendingInlineClusteringJob && getHoodieClientConfig(this.schemaProvider).inlineClusteringEnabled()) {
+        Option<String> pendingClusteringInstant = getLastPendingClusteringInstant(allCommitsTimelineOpt);
+        if (pendingClusteringInstant.isPresent()) {
+          writeClient.cluster(pendingClusteringInstant.get(), true);
+        }
+      }
+
       result = writeToSink(srcRecordsWithCkpt.getRight().getRight(),
           srcRecordsWithCkpt.getRight().getLeft(), metrics, overallTimerContext);
     }
@@ -317,6 +337,14 @@ public Pair<Option<String>, JavaRDD<WriteStatus>> syncOnce() throws IOException
     return result;
   }
 
+  private Option<String> getLastPendingClusteringInstant(Option<HoodieTimeline> commitTimelineOpt) {
+    if (commitTimelineOpt.isPresent()) {
+      Option<HoodieInstant> pendingClusteringInstant = commitTimelineOpt.get().filterPendingReplaceTimeline().lastInstant();
+      return pendingClusteringInstant.isPresent() ? Option.of(pendingClusteringInstant.get().getTimestamp()) : Option.empty();
+    }
+    return Option.empty();
+  }
+
   /**
    * Read from Upstream Source and apply transformation if needed.
    *
@@ -348,11 +376,35 @@ public Pair<SchemaProvider, Pair<String, JavaRDD<HoodieRecord>>> readFromSource(
           .initTable(new Configuration(jssc.hadoopConfiguration()), cfg.targetBasePath);
     }
 
+    LOG.debug("Checkpoint from config: " + cfg.checkpoint);
     if (!resumeCheckpointStr.isPresent() && cfg.checkpoint != null) {
       resumeCheckpointStr = Option.of(cfg.checkpoint);
     }
     LOG.info("Checkpoint to resume from : " + resumeCheckpointStr);
 
+    int maxRetryCount = cfg.retryOnSourceFailures ? cfg.maxRetryCount : 1;
+    int curRetryCount = 0;
+    Pair<SchemaProvider, Pair<String, JavaRDD<HoodieRecord>>> sourceDataToSync = null;
+    while (curRetryCount++ < maxRetryCount && sourceDataToSync == null) {
+      try {
+        sourceDataToSync = fetchFromSource(resumeCheckpointStr);
+      } catch (HoodieSourceTimeoutException e) {
+        if (curRetryCount >= maxRetryCount) {
+          throw e;
+        }
+        try {
+          LOG.error("Exception thrown while fetching data from source. Msg : " + e.getMessage() + ", class : " + e.getClass() + ", cause : " + e.getCause());
+          LOG.error("Sleeping for " + (cfg.retryIntervalSecs) + " before retrying again. Current retry count " + curRetryCount + ", max retry count " + cfg.maxRetryCount);
+          Thread.sleep(cfg.retryIntervalSecs * 1000);
+        } catch (InterruptedException ex) {
+          LOG.error("Ignoring InterruptedException while waiting to retry on source failure " + e.getMessage());
+        }
+      }
+    }
+    return sourceDataToSync;
+  }
+
+  private Pair<SchemaProvider, Pair<String, JavaRDD<HoodieRecord>>> fetchFromSource(Option<String> resumeCheckpointStr) {
     final Option<JavaRDD<GenericRecord>> avroRDDOptional;
     final String checkpointStr;
     SchemaProvider schemaProvider;
@@ -391,7 +443,7 @@ public Pair<SchemaProvider, Pair<String, JavaRDD<HoodieRecord>>> readFromSource(
                     targetSchemaProvider = UtilHelpers.createRowBasedSchemaProvider(r.schema(), props, jssc);
                   }
                   return (SchemaProvider) new DelegatingSchemaProvider(props, jssc,
-                    dataAndCheckpoint.getSchemaProvider(), targetSchemaProvider); })
+                      dataAndCheckpoint.getSchemaProvider(), targetSchemaProvider); })
                 .orElse(dataAndCheckpoint.getSchemaProvider());
         avroRDDOptional = transformed
             .map(t -> HoodieSparkUtils.createRdd(
@@ -408,9 +460,11 @@ public Pair<SchemaProvider, Pair<String, JavaRDD<HoodieRecord>>> readFromSource(
       schemaProvider = dataAndCheckpoint.getSchemaProvider();
     }
 
-    if (Objects.equals(checkpointStr, resumeCheckpointStr.orElse(null))) {
+    if (!cfg.allowCommitOnNoCheckpointChange && Objects.equals(checkpointStr, resumeCheckpointStr.orElse(null))) {
       LOG.info("No new data, source checkpoint has not changed. Nothing to commit. Old checkpoint=("
-           + resumeCheckpointStr + "). New Checkpoint=(" + checkpointStr + ")");
+          + resumeCheckpointStr + "). New Checkpoint=(" + checkpointStr + ")");
+      String commitActionType = CommitUtils.getCommitActionType(cfg.operation, HoodieTableType.valueOf(cfg.tableType));
+      hoodieMetrics.updateMetricsForEmptyData(commitActionType);
       return null;
     }
 
@@ -428,7 +482,7 @@ public Pair<SchemaProvider, Pair<String, JavaRDD<HoodieRecord>>> readFromSource(
               KeyGeneratorOptions.KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED.key(),
               Boolean.parseBoolean(KeyGeneratorOptions.KEYGENERATOR_CONSISTENT_LOGICAL_TIMESTAMP_ENABLED.defaultValue()))))
           : DataSourceUtils.createPayload(cfg.payloadClassName, gr);
-      return new HoodieRecord<>(keyGenerator.getKey(gr), payload);
+      return new HoodieAvroRecord<>(keyGenerator.getKey(gr), payload);
     });
 
     return Pair.of(schemaProvider, Pair.of(checkpointStr, records));
@@ -448,6 +502,7 @@ private Option<String> getCheckpointToResume(Option<HoodieTimeline> commitTimeli
       Option<HoodieCommitMetadata> commitMetadataOption = getLatestCommitMetadataWithValidCheckpointInfo(commitTimelineOpt.get());
       if (commitMetadataOption.isPresent()) {
         HoodieCommitMetadata commitMetadata = commitMetadataOption.get();
+        LOG.debug("Checkpoint reset from metadata: " + commitMetadata.getMetadata(CHECKPOINT_RESET_KEY));
         if (cfg.checkpoint != null && (StringUtils.isNullOrEmpty(commitMetadata.getMetadata(CHECKPOINT_RESET_KEY))
             || !cfg.checkpoint.equals(commitMetadata.getMetadata(CHECKPOINT_RESET_KEY)))) {
           resumeCheckpointStr = Option.of(cfg.checkpoint);
@@ -530,6 +585,10 @@ private Pair<Option<String>, JavaRDD<WriteStatus>> writeToSink(JavaRDD<HoodieRec
       case INSERT_OVERWRITE_TABLE:
         writeStatusRDD = writeClient.insertOverwriteTable(records, instantTime).getWriteStatuses();
         break;
+      case DELETE_PARTITION:
+        List<String> partitions = records.map(record -> record.getPartitionPath()).distinct().collect();
+        writeStatusRDD = writeClient.deletePartitions(partitions, instantTime).getWriteStatuses();
+        break;
       default:
         throw new HoodieDeltaStreamerException("Unknown operation : " + cfg.operation);
     }
@@ -541,7 +600,9 @@ private Pair<Option<String>, JavaRDD<WriteStatus>> writeToSink(JavaRDD<HoodieRec
     long metaSyncTimeMs = 0;
     if (!hasErrors || cfg.commitOnErrors) {
       HashMap<String, String> checkpointCommitMetadata = new HashMap<>();
-      checkpointCommitMetadata.put(CHECKPOINT_KEY, checkpointStr);
+      if (checkpointStr != null) {
+        checkpointCommitMetadata.put(CHECKPOINT_KEY, checkpointStr);
+      }
       if (cfg.checkpoint != null) {
         checkpointCommitMetadata.put(CHECKPOINT_RESET_KEY, cfg.checkpoint);
       }
@@ -655,9 +716,10 @@ private void syncMeta(HoodieDeltaStreamerMetrics metrics) {
 
   public void syncHive() {
     HiveSyncConfig hiveSyncConfig = DataSourceUtils.buildHiveSyncConfig(props, cfg.targetBasePath, cfg.baseFileFormat);
-    LOG.info("Syncing target hoodie table with hive table(" + hiveSyncConfig.tableName + "). Hive metastore URL :"
-        + hiveSyncConfig.jdbcUrl + ", basePath :" + cfg.targetBasePath);
     HiveConf hiveConf = new HiveConf(conf, HiveConf.class);
+    if (StringUtils.isNullOrEmpty(hiveConf.get(HiveConf.ConfVars.METASTOREURIS.varname))) {
+      hiveConf.set(HiveConf.ConfVars.METASTOREURIS.varname, hiveSyncConfig.metastoreUris);
+    }
     LOG.info("Hive Conf => " + hiveConf.getAllProperties().toString());
     LOG.info("Hive Sync Conf => " + hiveSyncConfig.toString());
     new HiveSyncTool(hiveSyncConfig, hiveConf, fs).syncHoodieTable();
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java
index 3ceb0028751a2..c0c141db11de3 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieDeltaStreamer.java
@@ -369,9 +369,25 @@ public static class Config implements Serializable {
     @Parameter(names = {"--bootstrap-index-class"}, description = "subclass of BootstrapIndex")
     public String bootstrapIndexClass = HFileBootstrapIndex.class.getName();
 
+    @Parameter(names = {"--retry-on-source-failures"}, description = "Retry on any source failures")
+    public Boolean retryOnSourceFailures = false;
+
+    @Parameter(names = {"--retry-interval-seconds"}, description = "the retry interval for source failures if --retry-on-source-failures is enabled")
+    public Integer retryIntervalSecs = 30;
+
+    @Parameter(names = {"--max-retry-count"}, description = "the max retry count if --retry-on-source-failures is enabled")
+    public Integer maxRetryCount = 3;
+
+    @Parameter(names = {"--allow-commit-on-no-checkpoint-change"}, description = "allow commits even if checkpoint has not changed before and after fetch data"
+        + "from souce. This might be useful in sources like SqlSource where there is not checkpoint. And is not recommended to enable in continuous mode.")
+    public Boolean allowCommitOnNoCheckpointChange = false;
+
     @Parameter(names = {"--help", "-h"}, help = true)
     public Boolean help = false;
 
+    @Parameter(names = {"--retry-last-pending-inline-clustering", "-rc"}, description = "Retry last pending inline clustering plan before writing to sink.")
+    public Boolean retryLastPendingInlineClusteringJob = false;
+
     public boolean isAsyncCompactionEnabled() {
       return continuousMode && !forceDisableCompaction
           && HoodieTableType.MERGE_ON_READ.equals(HoodieTableType.valueOf(tableType));
@@ -643,6 +659,10 @@ protected Pair<CompletableFuture, ExecutorService> startService() {
                 asyncCompactService.get().enqueuePendingAsyncServiceInstant(new HoodieInstant(State.REQUESTED,
                     HoodieTimeline.COMPACTION_ACTION, scheduledCompactionInstantAndRDD.get().getLeft().get()));
                 asyncCompactService.get().waitTillPendingAsyncServiceInstantsReducesTo(cfg.maxPendingCompactions);
+                if (asyncCompactService.get().hasError()) {
+                  error = true;
+                  throw new HoodieException("Async compaction failed.  Shutting down Delta Sync...");
+                }
               }
               if (clusteringConfig.isAsyncClusteringEnabled()) {
                 Option<String> clusteringInstant = deltaSync.getClusteringInstantOpt();
@@ -650,6 +670,10 @@ protected Pair<CompletableFuture, ExecutorService> startService() {
                   LOG.info("Scheduled async clustering for instant: " + clusteringInstant.get());
                   asyncClusteringService.get().enqueuePendingAsyncServiceInstant(new HoodieInstant(State.REQUESTED, HoodieTimeline.REPLACE_COMMIT_ACTION, clusteringInstant.get()));
                   asyncClusteringService.get().waitTillPendingAsyncServiceInstantsReducesTo(cfg.maxPendingClustering);
+                  if (asyncClusteringService.get().hasError()) {
+                    error = true;
+                    throw new HoodieException("Async clustering failed.  Shutting down Delta Sync...");
+                  }
                 }
               }
               long toSleepMs = cfg.minSyncIntervalSeconds * 1000 - (System.currentTimeMillis() - start);
@@ -668,6 +692,7 @@ protected Pair<CompletableFuture, ExecutorService> startService() {
           }
         } finally {
           shutdownAsyncServices(error);
+          executor.shutdownNow();
         }
         return true;
       }, executor), executor);
@@ -721,13 +746,12 @@ protected Boolean onInitializingWriteClient(SparkRDDWriteClient writeClient) {
               HoodieTableMetaClient.builder().setConf(new Configuration(jssc.hadoopConfiguration())).setBasePath(cfg.targetBasePath).setLoadActiveTimelineOnLoad(true).build();
           List<HoodieInstant> pending = CompactionUtils.getPendingCompactionInstantTimes(meta);
           pending.forEach(hoodieInstant -> asyncCompactService.get().enqueuePendingAsyncServiceInstant(hoodieInstant));
-          asyncCompactService.get().start((error) -> {
-            // Shutdown DeltaSync
-            shutdown(false);
-            return true;
-          });
+          asyncCompactService.get().start(error -> true);
           try {
             asyncCompactService.get().waitTillPendingAsyncServiceInstantsReducesTo(cfg.maxPendingCompactions);
+            if (asyncCompactService.get().hasError()) {
+              throw new HoodieException("Async compaction failed during write client initialization.");
+            }
           } catch (InterruptedException ie) {
             throw new HoodieException(ie);
           }
@@ -746,12 +770,12 @@ protected Boolean onInitializingWriteClient(SparkRDDWriteClient writeClient) {
           List<HoodieInstant> pending = ClusteringUtils.getPendingClusteringInstantTimes(meta);
           LOG.info(String.format("Found %d pending clustering instants ", pending.size()));
           pending.forEach(hoodieInstant -> asyncClusteringService.get().enqueuePendingAsyncServiceInstant(hoodieInstant));
-          asyncClusteringService.get().start((error) -> {
-            shutdown(false);
-            return true;
-          });
+          asyncClusteringService.get().start(error -> true);
           try {
             asyncClusteringService.get().waitTillPendingAsyncServiceInstantsReducesTo(cfg.maxPendingClustering);
+            if (asyncClusteringService.get().hasError()) {
+              throw new HoodieException("Async clustering failed during write client initialization.");
+            }
           } catch (InterruptedException e) {
             throw new HoodieException(e);
           }
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieMultiTableDeltaStreamer.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieMultiTableDeltaStreamer.java
index dc150803e8b38..bcd7b3b7d8ac6 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieMultiTableDeltaStreamer.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/HoodieMultiTableDeltaStreamer.java
@@ -51,6 +51,9 @@
 import java.util.Objects;
 import java.util.Set;
 
+import static org.apache.hudi.utilities.schema.SchemaRegistryProvider.Config.SRC_SCHEMA_REGISTRY_URL_PROP;
+import static org.apache.hudi.utilities.schema.SchemaRegistryProvider.Config.TARGET_SCHEMA_REGISTRY_URL_PROP;
+
 /**
  * Wrapper over HoodieDeltaStreamer.java class.
  * Helps with ingesting incremental data into hoodie datasets for multiple tables.
@@ -152,19 +155,38 @@ private List<String> getTablesToBeIngested(TypedProperties properties) {
 
   private void populateSchemaProviderProps(HoodieDeltaStreamer.Config cfg, TypedProperties typedProperties) {
     if (Objects.equals(cfg.schemaProviderClassName, SchemaRegistryProvider.class.getName())) {
+      populateSourceRegistryProp(typedProperties);
+      populateTargetRegistryProp(typedProperties);
+    }
+  }
+
+  private void populateTargetRegistryProp(TypedProperties typedProperties) {
+    String schemaRegistryTargetUrl = typedProperties.getString(TARGET_SCHEMA_REGISTRY_URL_PROP, null);
+    if (StringUtils.isNullOrEmpty(schemaRegistryTargetUrl)) {
       String schemaRegistryBaseUrl = typedProperties.getString(Constants.SCHEMA_REGISTRY_BASE_URL_PROP);
       String schemaRegistrySuffix = typedProperties.getString(Constants.SCHEMA_REGISTRY_URL_SUFFIX_PROP, null);
-      String sourceSchemaRegistrySuffix;
       String targetSchemaRegistrySuffix;
       if (StringUtils.isNullOrEmpty(schemaRegistrySuffix)) {
-        sourceSchemaRegistrySuffix = typedProperties.getString(Constants.SCHEMA_REGISTRY_SOURCE_URL_SUFFIX);
         targetSchemaRegistrySuffix = typedProperties.getString(Constants.SCHEMA_REGISTRY_TARGET_URL_SUFFIX);
       } else {
         targetSchemaRegistrySuffix = schemaRegistrySuffix;
+      }
+      typedProperties.setProperty(TARGET_SCHEMA_REGISTRY_URL_PROP, schemaRegistryBaseUrl + typedProperties.getString(Constants.KAFKA_TOPIC_PROP) + targetSchemaRegistrySuffix);
+    }
+  }
+
+  private void populateSourceRegistryProp(TypedProperties typedProperties) {
+    String schemaRegistrySourceUrl = typedProperties.getString(SRC_SCHEMA_REGISTRY_URL_PROP, null);
+    if (StringUtils.isNullOrEmpty(schemaRegistrySourceUrl)) {
+      String schemaRegistryBaseUrl = typedProperties.getString(Constants.SCHEMA_REGISTRY_BASE_URL_PROP);
+      String schemaRegistrySuffix = typedProperties.getString(Constants.SCHEMA_REGISTRY_URL_SUFFIX_PROP, null);
+      String sourceSchemaRegistrySuffix;
+      if (StringUtils.isNullOrEmpty(schemaRegistrySuffix)) {
+        sourceSchemaRegistrySuffix = typedProperties.getString(Constants.SCHEMA_REGISTRY_SOURCE_URL_SUFFIX);
+      } else {
         sourceSchemaRegistrySuffix = schemaRegistrySuffix;
       }
-      typedProperties.setProperty(Constants.SOURCE_SCHEMA_REGISTRY_URL_PROP, schemaRegistryBaseUrl + typedProperties.getString(Constants.KAFKA_TOPIC_PROP) + sourceSchemaRegistrySuffix);
-      typedProperties.setProperty(Constants.TARGET_SCHEMA_REGISTRY_URL_PROP, schemaRegistryBaseUrl + typedProperties.getString(Constants.KAFKA_TOPIC_PROP) + targetSchemaRegistrySuffix);
+      typedProperties.setProperty(SRC_SCHEMA_REGISTRY_URL_PROP, schemaRegistryBaseUrl + typedProperties.getString(Constants.KAFKA_TOPIC_PROP) + sourceSchemaRegistrySuffix);
     }
   }
 
@@ -397,8 +419,6 @@ public void sync() {
 
   public static class Constants {
     public static final String KAFKA_TOPIC_PROP = "hoodie.deltastreamer.source.kafka.topic";
-    private static final String SOURCE_SCHEMA_REGISTRY_URL_PROP = "hoodie.deltastreamer.schemaprovider.registry.url";
-    private static final String TARGET_SCHEMA_REGISTRY_URL_PROP = "hoodie.deltastreamer.schemaprovider.registry.targetUrl";
     public static final String HIVE_SYNC_TABLE_PROP = "hoodie.datasource.hive_sync.table";
     private static final String SCHEMA_REGISTRY_BASE_URL_PROP = "hoodie.deltastreamer.schemaprovider.registry.baseUrl";
     private static final String SCHEMA_REGISTRY_URL_SUFFIX_PROP = "hoodie.deltastreamer.schemaprovider.registry.urlSuffix";
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SchedulerConfGenerator.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SchedulerConfGenerator.java
index f5f1f384765ab..b991f9d46cb0b 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SchedulerConfGenerator.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/deltastreamer/SchedulerConfGenerator.java
@@ -18,6 +18,7 @@
 
 package org.apache.hudi.utilities.deltastreamer;
 
+import org.apache.hudi.SparkConfigs;
 import org.apache.hudi.async.AsyncCompactService;
 import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.util.Option;
@@ -46,7 +47,6 @@ public class SchedulerConfGenerator {
   public static final String COMPACT_POOL_NAME = AsyncCompactService.COMPACT_POOL_NAME;
   public static final String SPARK_SCHEDULER_MODE_KEY = "spark.scheduler.mode";
   public static final String SPARK_SCHEDULER_FAIR_MODE = "FAIR";
-  public static final String SPARK_SCHEDULER_ALLOCATION_FILE_KEY = "spark.scheduler.allocation.file";
 
   private static final String SPARK_SCHEDULING_PATTERN =
       "<?xml version=\"1.0\"?>\n<allocations>\n  <pool name=\"%s\">\n"
@@ -85,7 +85,7 @@ public static Map<String, String> getSparkSchedulingConfigs(HoodieDeltaStreamer.
         && cfg.continuousMode && cfg.tableType.equals(HoodieTableType.MERGE_ON_READ.name())) {
       String sparkSchedulingConfFile = generateAndStoreConfig(cfg.deltaSyncSchedulingWeight,
           cfg.compactSchedulingWeight, cfg.deltaSyncSchedulingMinShare, cfg.compactSchedulingMinShare);
-      additionalSparkConfigs.put(SPARK_SCHEDULER_ALLOCATION_FILE_KEY, sparkSchedulingConfFile);
+      additionalSparkConfigs.put(SparkConfigs.SPARK_SCHEDULER_ALLOCATION_FILE_KEY(), sparkSchedulingConfFile);
     } else {
       LOG.warn("Job Scheduling Configs will not be in effect as spark.scheduler.mode "
           + "is not set to FAIR at instantiation time. Continuing without scheduling configs");
diff --git a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/RowColumnVector.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/exception/HoodieSchemaProviderException.java
similarity index 66%
rename from hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/RowColumnVector.java
rename to hudi-utilities/src/main/java/org/apache/hudi/utilities/exception/HoodieSchemaProviderException.java
index 293af7b9cf2eb..26b6f53b052ae 100644
--- a/hudi-flink/src/main/java/org/apache/hudi/table/format/cow/vector/RowColumnVector.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/exception/HoodieSchemaProviderException.java
@@ -7,7 +7,7 @@
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -16,15 +16,17 @@
  * limitations under the License.
  */
 
-package org.apache.hudi.table.format.cow.vector;
+package org.apache.hudi.utilities.exception;
 
-import org.apache.hudi.table.format.cow.data.ColumnarRowData;
+import org.apache.hudi.exception.HoodieException;
 
-import org.apache.flink.table.data.vector.ColumnVector;
+public class HoodieSchemaProviderException extends HoodieException {
 
-/**
- * Row column vector.
- */
-public interface RowColumnVector extends ColumnVector {
-  ColumnarRowData getRow(int i);
-}
\ No newline at end of file
+  public HoodieSchemaProviderException(String msg, Throwable e) {
+    super(msg, e);
+  }
+
+  public HoodieSchemaProviderException(String msg) {
+    super(msg);
+  }
+}
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/exception/HoodieSourceTimeoutException.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/exception/HoodieSourceTimeoutException.java
new file mode 100644
index 0000000000000..d95f4f4b5e808
--- /dev/null
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/exception/HoodieSourceTimeoutException.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.utilities.exception;
+
+import org.apache.hudi.exception.HoodieException;
+
+public class HoodieSourceTimeoutException extends HoodieException {
+
+  public HoodieSourceTimeoutException(String msg, Throwable e) {
+    super(msg, e);
+  }
+
+  public HoodieSourceTimeoutException(String msg) {
+    super(msg);
+  }
+}
\ No newline at end of file
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java
index ac15897f5785c..d992976da2b5a 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/perf/TimelineServerPerf.java
@@ -74,7 +74,7 @@ public class TimelineServerPerf implements Serializable {
   public TimelineServerPerf(Config cfg) throws IOException {
     this.cfg = cfg;
     useExternalTimelineServer = (cfg.serverHost != null);
-    TimelineService.Config timelineServiceConf = cfg.getTimelinServerConfig();
+    TimelineService.Config timelineServiceConf = cfg.getTimelineServerConfig();
     this.timelineServer = new TimelineService(
         new HoodieLocalEngineContext(FSUtils.prepareHadoopConf(new Configuration())),
         new Configuration(), timelineServiceConf, FileSystem.get(new Configuration()),
@@ -281,7 +281,7 @@ public static class Config implements Serializable {
         description = " Server Host (Set it for externally managed timeline service")
     public String serverHost = null;
 
-    @Parameter(names = {"--view-storage", "-st"}, description = "View Storage Type. Defaut - SPILLABLE_DISK")
+    @Parameter(names = {"--view-storage", "-st"}, description = "View Storage Type. Default - SPILLABLE_DISK")
     public FileSystemViewStorageType viewStorageType = FileSystemViewStorageType.SPILLABLE_DISK;
 
     @Parameter(names = {"--max-view-mem-per-table", "-mv"},
@@ -310,7 +310,7 @@ public static class Config implements Serializable {
     @Parameter(names = {"--help", "-h"})
     public Boolean help = false;
 
-    public TimelineService.Config getTimelinServerConfig() {
+    public TimelineService.Config getTimelineServerConfig() {
       TimelineService.Config c = new TimelineService.Config();
       c.viewStorageType = viewStorageType;
       c.baseStorePathForFileGroups = baseStorePathForFileGroups;
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/HiveSchemaProvider.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/HiveSchemaProvider.java
index 219b1ae57886d..9fca2a241a66b 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/HiveSchemaProvider.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/HiveSchemaProvider.java
@@ -19,12 +19,12 @@
 
 package org.apache.hudi.utilities.schema;
 
-import org.apache.avro.Schema;
 import org.apache.hudi.AvroConversionUtils;
 import org.apache.hudi.DataSourceUtils;
 import org.apache.hudi.common.config.TypedProperties;
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
+import org.apache.hudi.utilities.exception.HoodieSchemaProviderException;
+
+import org.apache.avro.Schema;
 import org.apache.spark.api.java.JavaSparkContext;
 import org.apache.spark.sql.SparkSession;
 import org.apache.spark.sql.catalyst.TableIdentifier;
@@ -34,6 +34,9 @@
 
 import java.util.Collections;
 
+/**
+ * A schema provider to get data schema through user specified hive table.
+ */
 public class HiveSchemaProvider extends SchemaProvider {
 
   /**
@@ -46,40 +49,42 @@ public static class Config {
     private static final String TARGET_SCHEMA_TABLE_PROP = "hoodie.deltastreamer.schemaprovider.target.schema.hive.table";
   }
 
-  private static final Logger LOG = LogManager.getLogger(HiveSchemaProvider.class);
-
   private final Schema sourceSchema;
-
   private Schema targetSchema;
 
   public HiveSchemaProvider(TypedProperties props, JavaSparkContext jssc) {
     super(props, jssc);
     DataSourceUtils.checkRequiredProperties(props, Collections.singletonList(Config.SOURCE_SCHEMA_TABLE_PROP));
-    String sourceSchemaDBName = props.getString(Config.SOURCE_SCHEMA_DATABASE_PROP, "default");
+    String sourceSchemaDatabaseName = props.getString(Config.SOURCE_SCHEMA_DATABASE_PROP, "default");
     String sourceSchemaTableName = props.getString(Config.SOURCE_SCHEMA_TABLE_PROP);
     SparkSession spark = SparkSession.builder().config(jssc.getConf()).enableHiveSupport().getOrCreate();
+
+    // source schema
     try {
-      TableIdentifier sourceSchemaTable = new TableIdentifier(sourceSchemaTableName, scala.Option.apply(sourceSchemaDBName));
+      TableIdentifier sourceSchemaTable = new TableIdentifier(sourceSchemaTableName, scala.Option.apply(sourceSchemaDatabaseName));
       StructType sourceSchema = spark.sessionState().catalog().getTableMetadata(sourceSchemaTable).schema();
-
       this.sourceSchema = AvroConversionUtils.convertStructTypeToAvroSchema(
-              sourceSchema,
-              sourceSchemaTableName,
-              "hoodie." + sourceSchemaDBName);
+          sourceSchema,
+          sourceSchemaTableName,
+          "hoodie." + sourceSchemaDatabaseName);
+    } catch (NoSuchTableException | NoSuchDatabaseException e) {
+      throw new HoodieSchemaProviderException(String.format("Can't find Hive table: %s.%s", sourceSchemaDatabaseName, sourceSchemaTableName), e);
+    }
 
-      if (props.containsKey(Config.TARGET_SCHEMA_TABLE_PROP)) {
-        String targetSchemaDBName = props.getString(Config.TARGET_SCHEMA_DATABASE_PROP, "default");
-        String targetSchemaTableName = props.getString(Config.TARGET_SCHEMA_TABLE_PROP);
-        TableIdentifier targetSchemaTable = new TableIdentifier(targetSchemaTableName, scala.Option.apply(targetSchemaDBName));
+    // target schema
+    if (props.containsKey(Config.TARGET_SCHEMA_TABLE_PROP)) {
+      String targetSchemaDatabaseName = props.getString(Config.TARGET_SCHEMA_DATABASE_PROP, "default");
+      String targetSchemaTableName = props.getString(Config.TARGET_SCHEMA_TABLE_PROP);
+      try {
+        TableIdentifier targetSchemaTable = new TableIdentifier(targetSchemaTableName, scala.Option.apply(targetSchemaDatabaseName));
         StructType targetSchema = spark.sessionState().catalog().getTableMetadata(targetSchemaTable).schema();
         this.targetSchema = AvroConversionUtils.convertStructTypeToAvroSchema(
-                targetSchema,
-                targetSchemaTableName,
-                "hoodie." + targetSchemaDBName);
+            targetSchema,
+            targetSchemaTableName,
+            "hoodie." + targetSchemaDatabaseName);
+      } catch (NoSuchDatabaseException | NoSuchTableException e) {
+        throw new HoodieSchemaProviderException(String.format("Can't find Hive table: %s.%s", targetSchemaDatabaseName, targetSchemaTableName), e);
       }
-    } catch (NoSuchTableException | NoSuchDatabaseException e) {
-      String message = String.format("Can't find Hive table(s): %s", sourceSchemaTableName + "," + props.getString(Config.TARGET_SCHEMA_TABLE_PROP));
-      throw new IllegalArgumentException(message, e);
     }
   }
 
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaRegistryProvider.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaRegistryProvider.java
index 216369296ad53..1046eac975968 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaRegistryProvider.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/schema/SchemaRegistryProvider.java
@@ -50,7 +50,7 @@ public class SchemaRegistryProvider extends SchemaProvider {
   public static class Config {
 
     public static final String SRC_SCHEMA_REGISTRY_URL_PROP = "hoodie.deltastreamer.schemaprovider.registry.url";
-    private static final String TARGET_SCHEMA_REGISTRY_URL_PROP =
+    public static final String TARGET_SCHEMA_REGISTRY_URL_PROP =
         "hoodie.deltastreamer.schemaprovider.registry.targetUrl";
   }
 
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroKafkaSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroKafkaSource.java
index ff8ea5a7aa2da..84c6fd815e838 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroKafkaSource.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/AvroKafkaSource.java
@@ -25,6 +25,7 @@
 import org.apache.hudi.exception.HoodieIOException;
 import org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamerMetrics;
 import org.apache.hudi.utilities.deser.KafkaAvroSchemaDeserializer;
+import org.apache.hudi.utilities.exception.HoodieSourceTimeoutException;
 import org.apache.hudi.utilities.schema.SchemaProvider;
 import org.apache.hudi.utilities.sources.helpers.AvroConvertor;
 import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen;
@@ -64,12 +65,12 @@ public AvroKafkaSource(TypedProperties props, JavaSparkContext sparkContext, Spa
       SchemaProvider schemaProvider, HoodieDeltaStreamerMetrics metrics) {
     super(props, sparkContext, sparkSession, schemaProvider);
 
-    props.put(NATIVE_KAFKA_KEY_DESERIALIZER_PROP, StringDeserializer.class);
+    props.put(NATIVE_KAFKA_KEY_DESERIALIZER_PROP, StringDeserializer.class.getName());
     deserializerClassName = props.getString(DataSourceWriteOptions.KAFKA_AVRO_VALUE_DESERIALIZER_CLASS().key(),
             DataSourceWriteOptions.KAFKA_AVRO_VALUE_DESERIALIZER_CLASS().defaultValue());
 
     try {
-      props.put(NATIVE_KAFKA_VALUE_DESERIALIZER_PROP, Class.forName(deserializerClassName));
+      props.put(NATIVE_KAFKA_VALUE_DESERIALIZER_PROP, Class.forName(deserializerClassName).getName());
       if (deserializerClassName.equals(KafkaAvroSchemaDeserializer.class.getName())) {
         if (schemaProvider == null) {
           throw new HoodieIOException("SchemaProvider has to be set to use KafkaAvroSchemaDeserializer");
@@ -89,14 +90,18 @@ public AvroKafkaSource(TypedProperties props, JavaSparkContext sparkContext, Spa
 
   @Override
   protected InputBatch<JavaRDD<GenericRecord>> fetchNewData(Option<String> lastCheckpointStr, long sourceLimit) {
-    OffsetRange[] offsetRanges = offsetGen.getNextOffsetRanges(lastCheckpointStr, sourceLimit, metrics);
-    long totalNewMsgs = CheckpointUtils.totalNewMessages(offsetRanges);
-    LOG.info("About to read " + totalNewMsgs + " from Kafka for topic :" + offsetGen.getTopicName());
-    if (totalNewMsgs <= 0) {
-      return new InputBatch<>(Option.empty(), CheckpointUtils.offsetsToStr(offsetRanges));
+    try {
+      OffsetRange[] offsetRanges = offsetGen.getNextOffsetRanges(lastCheckpointStr, sourceLimit, metrics);
+      long totalNewMsgs = CheckpointUtils.totalNewMessages(offsetRanges);
+      LOG.info("About to read " + totalNewMsgs + " from Kafka for topic :" + offsetGen.getTopicName());
+      if (totalNewMsgs <= 0) {
+        return new InputBatch<>(Option.empty(), CheckpointUtils.offsetsToStr(offsetRanges));
+      }
+      JavaRDD<GenericRecord> newDataRDD = toRDD(offsetRanges);
+      return new InputBatch<>(Option.of(newDataRDD), CheckpointUtils.offsetsToStr(offsetRanges));
+    } catch (org.apache.kafka.common.errors.TimeoutException e) {
+      throw new HoodieSourceTimeoutException("Kafka Source timed out " + e.getMessage());
     }
-    JavaRDD<GenericRecord> newDataRDD = toRDD(offsetRanges);
-    return new InputBatch<>(Option.of(newDataRDD), CheckpointUtils.offsetsToStr(offsetRanges));
   }
 
   private JavaRDD<GenericRecord> toRDD(OffsetRange[] offsetRanges) {
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HoodieIncrSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HoodieIncrSource.java
index ebb359390be0c..aa1e261c250b5 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HoodieIncrSource.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/HoodieIncrSource.java
@@ -31,7 +31,6 @@
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.sql.DataFrameReader;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SparkSession;
@@ -123,21 +122,34 @@ public Pair<Option<Dataset<Row>>, String> fetchNextBatch(Option<String> lastCkpt
     Option<String> beginInstant =
         lastCkptStr.isPresent() ? lastCkptStr.get().isEmpty() ? Option.empty() : lastCkptStr : Option.empty();
 
-    Pair<String, String> instantEndpts = IncrSourceHelper.calculateBeginAndEndInstants(sparkContext, srcPath,
+    Pair<String, Pair<String, String>> queryTypeAndInstantEndpts = IncrSourceHelper.calculateBeginAndEndInstants(sparkContext, srcPath,
         numInstantsPerFetch, beginInstant, missingCheckpointStrategy);
 
-    if (instantEndpts.getKey().equals(instantEndpts.getValue())) {
-      LOG.warn("Already caught up. Begin Checkpoint was :" + instantEndpts.getKey());
-      return Pair.of(Option.empty(), instantEndpts.getKey());
+    if (queryTypeAndInstantEndpts.getValue().getKey().equals(queryTypeAndInstantEndpts.getValue().getValue())) {
+      LOG.warn("Already caught up. Begin Checkpoint was :" + queryTypeAndInstantEndpts.getValue().getKey());
+      return Pair.of(Option.empty(), queryTypeAndInstantEndpts.getValue().getKey());
     }
 
+    Dataset<Row> source = null;
     // Do Incr pull. Set end instant if available
-    DataFrameReader reader = sparkSession.read().format("org.apache.hudi")
-        .option(DataSourceReadOptions.QUERY_TYPE().key(), DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL())
-        .option(DataSourceReadOptions.BEGIN_INSTANTTIME().key(), instantEndpts.getLeft())
-        .option(DataSourceReadOptions.END_INSTANTTIME().key(), instantEndpts.getRight());
-
-    Dataset<Row> source = reader.load(srcPath);
+    if (queryTypeAndInstantEndpts.getKey().equals(DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL())) {
+      source = sparkSession.read().format("org.apache.hudi")
+          .option(DataSourceReadOptions.QUERY_TYPE().key(), DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL())
+          .option(DataSourceReadOptions.BEGIN_INSTANTTIME().key(), queryTypeAndInstantEndpts.getValue().getLeft())
+          .option(DataSourceReadOptions.END_INSTANTTIME().key(), queryTypeAndInstantEndpts.getValue().getRight())
+          .option(DataSourceReadOptions.INCREMENTAL_FALLBACK_TO_FULL_TABLE_SCAN_FOR_NON_EXISTING_FILES().key(),
+              props.getString(DataSourceReadOptions.INCREMENTAL_FALLBACK_TO_FULL_TABLE_SCAN_FOR_NON_EXISTING_FILES().key(),
+                  DataSourceReadOptions.INCREMENTAL_FALLBACK_TO_FULL_TABLE_SCAN_FOR_NON_EXISTING_FILES().defaultValue()))
+          .load(srcPath);
+    } else {
+      // if checkpoint is missing from source table, and if strategy is set to READ_UPTO_LATEST_COMMIT, we have to issue snapshot query
+      source = sparkSession.read().format("org.apache.hudi")
+          .option(DataSourceReadOptions.QUERY_TYPE().key(), DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL())
+          .load(srcPath)
+          // add filtering so that only interested records are returned.
+          .filter(String.format("%s > '%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD,
+              queryTypeAndInstantEndpts.getRight().getLeft()));
+    }
 
     /*
      * log.info("Partition Fields are : (" + partitionFields + "). Initial Source Schema :" + source.schema());
@@ -165,6 +177,6 @@ public Pair<Option<Dataset<Row>>, String> fetchNextBatch(Option<String> lastCkpt
     final Dataset<Row> src = source.drop(HoodieRecord.HOODIE_META_COLUMNS.stream()
         .filter(x -> !x.equals(HoodieRecord.PARTITION_PATH_METADATA_FIELD)).toArray(String[]::new));
     // log.info("Final Schema from Source is :" + src.schema());
-    return Pair.of(Option.of(src), instantEndpts.getRight());
+    return Pair.of(Option.of(src), queryTypeAndInstantEndpts.getRight().getRight());
   }
 }
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonKafkaSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonKafkaSource.java
index 3dfc611000a35..d6152a177f7fd 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonKafkaSource.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/JsonKafkaSource.java
@@ -20,7 +20,9 @@
 
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.util.Option;
+import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.utilities.deltastreamer.HoodieDeltaStreamerMetrics;
+import org.apache.hudi.utilities.exception.HoodieSourceTimeoutException;
 import org.apache.hudi.utilities.schema.SchemaProvider;
 import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen;
 import org.apache.hudi.utilities.sources.helpers.KafkaOffsetGen.CheckpointUtils;
@@ -35,8 +37,6 @@
 import org.apache.spark.streaming.kafka010.LocationStrategies;
 import org.apache.spark.streaming.kafka010.OffsetRange;
 
-import java.util.Objects;
-
 /**
  * Read json kafka data.
  */
@@ -52,21 +52,25 @@ public JsonKafkaSource(TypedProperties properties, JavaSparkContext sparkContext
                          SchemaProvider schemaProvider, HoodieDeltaStreamerMetrics metrics) {
     super(properties, sparkContext, sparkSession, schemaProvider);
     this.metrics = metrics;
-    properties.put("key.deserializer", StringDeserializer.class);
-    properties.put("value.deserializer", StringDeserializer.class);
+    properties.put("key.deserializer", StringDeserializer.class.getName());
+    properties.put("value.deserializer", StringDeserializer.class.getName());
     offsetGen = new KafkaOffsetGen(properties);
   }
 
   @Override
   protected InputBatch<JavaRDD<String>> fetchNewData(Option<String> lastCheckpointStr, long sourceLimit) {
-    OffsetRange[] offsetRanges = offsetGen.getNextOffsetRanges(lastCheckpointStr, sourceLimit, metrics);
-    long totalNewMsgs = CheckpointUtils.totalNewMessages(offsetRanges);
-    LOG.info("About to read " + totalNewMsgs + " from Kafka for topic :" + offsetGen.getTopicName());
-    if (totalNewMsgs <= 0) {
-      return new InputBatch<>(Option.empty(), CheckpointUtils.offsetsToStr(offsetRanges));
+    try {
+      OffsetRange[] offsetRanges = offsetGen.getNextOffsetRanges(lastCheckpointStr, sourceLimit, metrics);
+      long totalNewMsgs = CheckpointUtils.totalNewMessages(offsetRanges);
+      LOG.info("About to read " + totalNewMsgs + " from Kafka for topic :" + offsetGen.getTopicName());
+      if (totalNewMsgs <= 0) {
+        return new InputBatch<>(Option.empty(), CheckpointUtils.offsetsToStr(offsetRanges));
+      }
+      JavaRDD<String> newDataRDD = toRDD(offsetRanges);
+      return new InputBatch<>(Option.of(newDataRDD), CheckpointUtils.offsetsToStr(offsetRanges));
+    } catch (org.apache.kafka.common.errors.TimeoutException e) {
+      throw new HoodieSourceTimeoutException("Kafka Source timed out " + e.getMessage());
     }
-    JavaRDD<String> newDataRDD = toRDD(offsetRanges);
-    return new InputBatch<>(Option.of(newDataRDD), CheckpointUtils.offsetsToStr(offsetRanges));
   }
 
   private JavaRDD<String> toRDD(OffsetRange[] offsetRanges) {
@@ -74,7 +78,7 @@ private JavaRDD<String> toRDD(OffsetRange[] offsetRanges) {
             offsetGen.getKafkaParams(),
             offsetRanges,
             LocationStrategies.PreferConsistent())
-        .filter(x -> Objects.nonNull(x.value()))
+        .filter(x -> !StringUtils.isNullOrEmpty((String)x.value()))
         .map(x -> x.value().toString());
   }
 
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/S3EventsHoodieIncrSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/S3EventsHoodieIncrSource.java
index 434e14a80453d..2f7d9898b95b0 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/S3EventsHoodieIncrSource.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/S3EventsHoodieIncrSource.java
@@ -22,6 +22,7 @@
 import org.apache.hudi.DataSourceUtils;
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.common.util.collection.Pair;
@@ -33,7 +34,6 @@
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.sql.DataFrameReader;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SparkSession;
@@ -101,34 +101,43 @@ public Pair<Option<Dataset<Row>>, String> fetchNextBatch(Option<String> lastCkpt
             ? lastCkptStr.get().isEmpty() ? Option.empty() : lastCkptStr
             : Option.empty();
 
-    Pair<String, String> instantEndpts =
+    Pair<String, Pair<String, String>> queryTypeAndInstantEndpts =
         IncrSourceHelper.calculateBeginAndEndInstants(
             sparkContext, srcPath, numInstantsPerFetch, beginInstant, missingCheckpointStrategy);
 
-    if (instantEndpts.getKey().equals(instantEndpts.getValue())) {
-      LOG.warn("Already caught up. Begin Checkpoint was :" + instantEndpts.getKey());
-      return Pair.of(Option.empty(), instantEndpts.getKey());
+    if (queryTypeAndInstantEndpts.getValue().getKey().equals(queryTypeAndInstantEndpts.getValue().getValue())) {
+      LOG.warn("Already caught up. Begin Checkpoint was :" + queryTypeAndInstantEndpts.getValue().getKey());
+      return Pair.of(Option.empty(), queryTypeAndInstantEndpts.getValue().getKey());
     }
 
+    Dataset<Row> source = null;
     // Do incremental pull. Set end instant if available.
-    DataFrameReader metaReader = sparkSession.read().format("org.apache.hudi")
-        .option(DataSourceReadOptions.QUERY_TYPE().key(), DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL())
-        .option(DataSourceReadOptions.BEGIN_INSTANTTIME().key(), instantEndpts.getLeft())
-        .option(DataSourceReadOptions.END_INSTANTTIME().key(), instantEndpts.getRight());
-    Dataset<Row> source = metaReader.load(srcPath);
+    if (queryTypeAndInstantEndpts.getKey().equals(DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL())) {
+      source = sparkSession.read().format("org.apache.hudi")
+          .option(DataSourceReadOptions.QUERY_TYPE().key(), DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL())
+          .option(DataSourceReadOptions.BEGIN_INSTANTTIME().key(), queryTypeAndInstantEndpts.getRight().getLeft())
+          .option(DataSourceReadOptions.END_INSTANTTIME().key(), queryTypeAndInstantEndpts.getRight().getRight()).load(srcPath);
+    } else {
+      // if checkpoint is missing from source table, and if strategy is set to READ_UPTO_LATEST_COMMIT, we have to issue snapshot query
+      source = sparkSession.read().format("org.apache.hudi")
+          .option(DataSourceReadOptions.QUERY_TYPE().key(), DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL()).load(srcPath)
+          // add filtering so that only interested records are returned.
+          .filter(String.format("%s > '%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD,
+              queryTypeAndInstantEndpts.getRight().getLeft()));
+    }
     
     if (source.isEmpty()) {
-      return Pair.of(Option.empty(), instantEndpts.getRight());
+      return Pair.of(Option.empty(), queryTypeAndInstantEndpts.getRight().getRight());
     }
 
     String filter = "s3.object.size > 0";
-    if (!StringUtils.isNullOrEmpty(props.getString(Config.S3_KEY_PREFIX))) {
+    if (!StringUtils.isNullOrEmpty(props.getString(Config.S3_KEY_PREFIX, null))) {
       filter = filter + " and s3.object.key like '" + props.getString(Config.S3_KEY_PREFIX) + "%'";
     }
-    if (!StringUtils.isNullOrEmpty(props.getString(Config.S3_IGNORE_KEY_PREFIX))) {
+    if (!StringUtils.isNullOrEmpty(props.getString(Config.S3_IGNORE_KEY_PREFIX, null))) {
       filter = filter + " and s3.object.key not like '" + props.getString(Config.S3_IGNORE_KEY_PREFIX) + "%'";
     }
-    if (!StringUtils.isNullOrEmpty(props.getString(Config.S3_IGNORE_KEY_SUBSTRING))) {
+    if (!StringUtils.isNullOrEmpty(props.getString(Config.S3_IGNORE_KEY_SUBSTRING, null))) {
       filter = filter + " and s3.object.key not like '%" + props.getString(Config.S3_IGNORE_KEY_SUBSTRING) + "%'";
     }
     // add file format filtering by default
@@ -167,6 +176,6 @@ public Pair<Option<Dataset<Row>>, String> fetchNextBatch(Option<String> lastCkpt
     if (!cloudFiles.isEmpty()) {
       dataset = Option.of(sparkSession.read().format(fileFormat).load(cloudFiles.toArray(new String[0])));
     }
-    return Pair.of(dataset, instantEndpts.getRight());
+    return Pair.of(dataset, queryTypeAndInstantEndpts.getRight().getRight());
   }
 }
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/SqlSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/SqlSource.java
index d832e43d2ae0b..056e035175937 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/SqlSource.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/SqlSource.java
@@ -48,6 +48,8 @@
  * <p>To fetch and use the latest incremental checkpoint, you need to also set this hoodie_conf for deltastremer jobs:
  *
  * <p>hoodie.write.meta.key.prefixes = 'deltastreamer.checkpoint.key'
+ *
+ * Also, users are expected to set --allow-commit-on-no-checkpoint-change while using this SqlSource.
  */
 public class SqlSource extends RowSource {
   private static final long serialVersionUID = 1L;
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/debezium/DebeziumSource.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/debezium/DebeziumSource.java
index 7018419c2d6de..d9be692b5bc57 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/debezium/DebeziumSource.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/debezium/DebeziumSource.java
@@ -82,12 +82,12 @@ public DebeziumSource(TypedProperties props, JavaSparkContext sparkContext,
                         HoodieDeltaStreamerMetrics metrics) {
     super(props, sparkContext, sparkSession, schemaProvider);
 
-    props.put(NATIVE_KAFKA_KEY_DESERIALIZER_PROP, StringDeserializer.class);
+    props.put(NATIVE_KAFKA_KEY_DESERIALIZER_PROP, StringDeserializer.class.getName());
     deserializerClassName = props.getString(DataSourceWriteOptions.KAFKA_AVRO_VALUE_DESERIALIZER_CLASS().key(),
         DataSourceWriteOptions.KAFKA_AVRO_VALUE_DESERIALIZER_CLASS().defaultValue());
 
     try {
-      props.put(NATIVE_KAFKA_VALUE_DESERIALIZER_PROP, Class.forName(deserializerClassName));
+      props.put(NATIVE_KAFKA_VALUE_DESERIALIZER_PROP, Class.forName(deserializerClassName).getName());
     } catch (ClassNotFoundException e) {
       String error = "Could not load custom avro kafka deserializer: " + deserializerClassName;
       LOG.error(error);
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/IncrSourceHelper.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/IncrSourceHelper.java
index a370c314a168f..cbfb153ee9ca4 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/IncrSourceHelper.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/IncrSourceHelper.java
@@ -18,6 +18,7 @@
 
 package org.apache.hudi.utilities.sources.helpers;
 
+import org.apache.hudi.DataSourceReadOptions;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.table.timeline.HoodieTimeline;
@@ -63,10 +64,10 @@ private static String getStrictlyLowerTimestamp(String timestamp) {
    * @param numInstantsPerFetch             Max Instants per fetch
    * @param beginInstant                    Last Checkpoint String
    * @param missingCheckpointStrategy when begin instant is missing, allow reading based on missing checkpoint strategy
-   * @return begin and end instants
+   * @return begin and end instants along with query type.
    */
-  public static Pair<String, String> calculateBeginAndEndInstants(JavaSparkContext jssc, String srcBasePath,
-                                                                  int numInstantsPerFetch, Option<String> beginInstant, MissingCheckpointStrategy missingCheckpointStrategy) {
+  public static Pair<String, Pair<String, String>> calculateBeginAndEndInstants(JavaSparkContext jssc, String srcBasePath,
+                                                                                 int numInstantsPerFetch, Option<String> beginInstant, MissingCheckpointStrategy missingCheckpointStrategy) {
     ValidationUtils.checkArgument(numInstantsPerFetch > 0,
         "Make sure the config hoodie.deltastreamer.source.hoodieincr.num_instants is set to a positive value");
     HoodieTableMetaClient srcMetaClient = HoodieTableMetaClient.builder().setConf(jssc.hadoopConfiguration()).setBasePath(srcBasePath).setLoadActiveTimelineOnLoad(true).build();
@@ -88,15 +89,14 @@ public static Pair<String, String> calculateBeginAndEndInstants(JavaSparkContext
       }
     });
 
-    if (!beginInstantTime.equals(DEFAULT_BEGIN_TIMESTAMP)) {
+    if (missingCheckpointStrategy == MissingCheckpointStrategy.READ_LATEST || !activeCommitTimeline.isBeforeTimelineStarts(beginInstantTime)) {
       Option<HoodieInstant> nthInstant = Option.fromJavaOptional(activeCommitTimeline
           .findInstantsAfter(beginInstantTime, numInstantsPerFetch).getInstants().reduce((x, y) -> y));
-      return Pair.of(beginInstantTime, nthInstant.map(HoodieInstant::getTimestamp).orElse(beginInstantTime));
+      return Pair.of(DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL(), Pair.of(beginInstantTime, nthInstant.map(HoodieInstant::getTimestamp).orElse(beginInstantTime)));
     } else {
-      // if beginInstant is DEFAULT_BEGIN_TIMESTAMP,  MissingCheckpointStrategy should be set.
-      // when MissingCheckpointStrategy is set to read everything until latest.
+      // when MissingCheckpointStrategy is set to read everything until latest, trigger snapshot query.
       Option<HoodieInstant> lastInstant = activeCommitTimeline.lastInstant();
-      return Pair.of(beginInstantTime, lastInstant.get().getTimestamp());
+      return Pair.of(DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL(), Pair.of(beginInstantTime, lastInstant.get().getTimestamp()));
     }
   }
 
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/util/BloomFilterData.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/util/BloomFilterData.java
new file mode 100644
index 0000000000000..1d4f0539136b4
--- /dev/null
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/util/BloomFilterData.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.utilities.util;
+
+import org.jetbrains.annotations.NotNull;
+
+import java.nio.ByteBuffer;
+import java.util.Objects;
+
+/**
+ * Includes partition path, filename and bloom filter for validation
+ */
+public class BloomFilterData implements Comparable<BloomFilterData> {
+  private final String partitionPath;
+  private final String filename;
+  private final ByteBuffer bloomFilter;
+
+  private BloomFilterData(
+      String partitionPath, String filename, ByteBuffer bloomFilter) {
+    this.partitionPath = partitionPath;
+    this.filename = filename;
+    this.bloomFilter = bloomFilter;
+  }
+
+  public static Builder builder() {
+    return new Builder();
+  }
+
+  @Override
+  public int compareTo(@NotNull BloomFilterData o) {
+    return this.toString().compareTo(o.toString());
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+    BloomFilterData that = (BloomFilterData) o;
+    return partitionPath.equals(that.partitionPath) && filename.equals(that.filename)
+        && bloomFilter.equals(that.bloomFilter);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(partitionPath, filename, bloomFilter);
+  }
+
+  @Override
+  public String toString() {
+    String bloomFilterString = new String(bloomFilter.array());
+    return "BloomFilterData{"
+        + "partitionPath='" + partitionPath + '\''
+        + ", filename='" + filename + '\''
+        + ", bloomFilter="
+        + (bloomFilterString.length() > 50 ? bloomFilterString.substring(0, 50) + "..." : bloomFilterString)
+        + '}';
+  }
+
+  public static class Builder {
+    private String partitionPath;
+    private String filename;
+    private ByteBuffer bloomFilter;
+
+    public Builder setPartitionPath(String partitionPath) {
+      this.partitionPath = partitionPath;
+      return this;
+    }
+
+    public Builder setFilename(String filename) {
+      this.filename = filename;
+      return this;
+    }
+
+    public Builder setBloomFilter(ByteBuffer bloomFilter) {
+      this.bloomFilter = bloomFilter;
+      return this;
+    }
+
+    public BloomFilterData build() {
+      return new BloomFilterData(partitionPath, filename, bloomFilter);
+    }
+  }
+}
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestSchedulerConfGenerator.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestSchedulerConfGenerator.java
index 77fd04fb4bdd3..26b8bc1c88580 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestSchedulerConfGenerator.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/deltastreamer/TestSchedulerConfGenerator.java
@@ -18,6 +18,7 @@
 
 package org.apache.hudi.utilities.deltastreamer;
 
+import org.apache.hudi.SparkConfigs;
 import org.apache.hudi.common.model.HoodieTableType;
 
 import org.junit.jupiter.api.Test;
@@ -33,21 +34,21 @@ public class TestSchedulerConfGenerator {
   public void testGenerateSparkSchedulingConf() throws Exception {
     HoodieDeltaStreamer.Config cfg = new HoodieDeltaStreamer.Config();
     Map<String, String> configs = SchedulerConfGenerator.getSparkSchedulingConfigs(cfg);
-    assertNull(configs.get(SchedulerConfGenerator.SPARK_SCHEDULER_ALLOCATION_FILE_KEY), "spark.scheduler.mode not set");
+    assertNull(configs.get(SparkConfigs.SPARK_SCHEDULER_ALLOCATION_FILE_KEY()), "spark.scheduler.mode not set");
 
     System.setProperty(SchedulerConfGenerator.SPARK_SCHEDULER_MODE_KEY, "FAIR");
     cfg.continuousMode = false;
     configs = SchedulerConfGenerator.getSparkSchedulingConfigs(cfg);
-    assertNull(configs.get(SchedulerConfGenerator.SPARK_SCHEDULER_ALLOCATION_FILE_KEY), "continuousMode is false");
+    assertNull(configs.get(SparkConfigs.SPARK_SCHEDULER_ALLOCATION_FILE_KEY()), "continuousMode is false");
 
     cfg.continuousMode = true;
     cfg.tableType = HoodieTableType.COPY_ON_WRITE.name();
     configs = SchedulerConfGenerator.getSparkSchedulingConfigs(cfg);
-    assertNull(configs.get(SchedulerConfGenerator.SPARK_SCHEDULER_ALLOCATION_FILE_KEY),
+    assertNull(configs.get(SparkConfigs.SPARK_SCHEDULER_ALLOCATION_FILE_KEY()),
         "table type is not MERGE_ON_READ");
 
     cfg.tableType = HoodieTableType.MERGE_ON_READ.name();
     configs = SchedulerConfGenerator.getSparkSchedulingConfigs(cfg);
-    assertNotNull(configs.get(SchedulerConfGenerator.SPARK_SCHEDULER_ALLOCATION_FILE_KEY), "all satisfies");
+    assertNotNull(configs.get(SparkConfigs.SPARK_SCHEDULER_ALLOCATION_FILE_KEY()), "all satisfies");
   }
 }
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/HoodieDeltaStreamerTestBase.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/HoodieDeltaStreamerTestBase.java
index b7e6f1870df44..02b1848e2e31e 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/HoodieDeltaStreamerTestBase.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/HoodieDeltaStreamerTestBase.java
@@ -61,6 +61,7 @@ public class HoodieDeltaStreamerTestBase extends UtilitiesTestBase {
   static final String PROPS_FILENAME_TEST_PARQUET = "test-parquet-dfs-source.properties";
   static final String PROPS_FILENAME_TEST_ORC = "test-orc-dfs-source.properties";
   static final String PROPS_FILENAME_TEST_JSON_KAFKA = "test-json-kafka-dfs-source.properties";
+  static final String PROPS_FILENAME_TEST_SQL_SOURCE = "test-sql-source-source.properties";
   static final String PROPS_FILENAME_TEST_MULTI_WRITER = "test-multi-writer.properties";
   static final String FIRST_PARQUET_FILE_NAME = "1.parquet";
   static final String FIRST_ORC_FILE_NAME = "1.orc";
@@ -71,6 +72,7 @@ public class HoodieDeltaStreamerTestBase extends UtilitiesTestBase {
   static final int ORC_NUM_RECORDS = 5;
   static final int CSV_NUM_RECORDS = 3;
   static final int JSON_KAFKA_NUM_RECORDS = 5;
+  static final int SQL_SOURCE_NUM_RECORDS = 1000;
   String kafkaCheckpointType = "string";
   // Required fields
   static final String TGT_BASE_PATH_PARAM = "--target-base-path";
@@ -171,7 +173,7 @@ protected static void writeCommonPropsToFile(FileSystem dfs, String dfsBasePath)
     props.setProperty("include", "sql-transformer.properties");
     props.setProperty("hoodie.datasource.write.keygenerator.class", TestHoodieDeltaStreamer.TestGenerator.class.getName());
     props.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
-    props.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
+    props.setProperty("hoodie.datasource.write.partitionpath.field", "partition_path");
     props.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", dfsBasePath + "/source.avsc");
     props.setProperty("hoodie.deltastreamer.schemaprovider.target.schema.file", dfsBasePath + "/target.avsc");
 
@@ -279,35 +281,35 @@ protected static void prepareORCDFSFiles(int numRecords, String baseORCPath, Str
     HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
     if (useCustomSchema) {
       Helpers.saveORCToDFS(Helpers.toGenericRecords(
-              dataGenerator.generateInsertsAsPerSchema("000", numRecords, schemaStr),
-              schema), new Path(path), HoodieTestDataGenerator.ORC_TRIP_SCHEMA);
+          dataGenerator.generateInsertsAsPerSchema("000", numRecords, schemaStr),
+          schema), new Path(path), HoodieTestDataGenerator.ORC_TRIP_SCHEMA);
     } else {
       Helpers.saveORCToDFS(Helpers.toGenericRecords(
-              dataGenerator.generateInserts("000", numRecords)), new Path(path));
+          dataGenerator.generateInserts("000", numRecords)), new Path(path));
     }
   }
 
-  static void addCommitToTimeline(HoodieTableMetaClient metaCient) throws IOException {
-    addCommitToTimeline(metaCient, Collections.emptyMap());
+  static void addCommitToTimeline(HoodieTableMetaClient metaClient) throws IOException {
+    addCommitToTimeline(metaClient, Collections.emptyMap());
   }
 
-  static void addCommitToTimeline(HoodieTableMetaClient metaCient, Map<String, String> extraMetadata) throws IOException {
-    addCommitToTimeline(metaCient, WriteOperationType.UPSERT, HoodieTimeline.COMMIT_ACTION, extraMetadata);
+  static void addCommitToTimeline(HoodieTableMetaClient metaClient, Map<String, String> extraMetadata) throws IOException {
+    addCommitToTimeline(metaClient, WriteOperationType.UPSERT, HoodieTimeline.COMMIT_ACTION, extraMetadata);
   }
 
-  static void addReplaceCommitToTimeline(HoodieTableMetaClient metaCient, Map<String, String> extraMetadata) throws IOException {
-    addCommitToTimeline(metaCient, WriteOperationType.CLUSTER, HoodieTimeline.REPLACE_COMMIT_ACTION, extraMetadata);
+  static void addReplaceCommitToTimeline(HoodieTableMetaClient metaClient, Map<String, String> extraMetadata) throws IOException {
+    addCommitToTimeline(metaClient, WriteOperationType.CLUSTER, HoodieTimeline.REPLACE_COMMIT_ACTION, extraMetadata);
   }
 
-  static void addCommitToTimeline(HoodieTableMetaClient metaCient, WriteOperationType writeOperationType, String commitActiontype,
+  static void addCommitToTimeline(HoodieTableMetaClient metaClient, WriteOperationType writeOperationType, String commitActiontype,
                                   Map<String, String> extraMetadata) throws IOException {
     HoodieCommitMetadata commitMetadata = new HoodieCommitMetadata();
     commitMetadata.setOperationType(writeOperationType);
-    extraMetadata.forEach((k,v) -> commitMetadata.getExtraMetadata().put(k, v));
+    extraMetadata.forEach((k, v) -> commitMetadata.getExtraMetadata().put(k, v));
     String commitTime = HoodieActiveTimeline.createNewInstantTime();
-    metaCient.getActiveTimeline().createNewInstant(new HoodieInstant(HoodieInstant.State.REQUESTED, commitActiontype, commitTime));
-    metaCient.getActiveTimeline().createNewInstant(new HoodieInstant(HoodieInstant.State.INFLIGHT, commitActiontype, commitTime));
-    metaCient.getActiveTimeline().saveAsComplete(
+    metaClient.getActiveTimeline().createNewInstant(new HoodieInstant(HoodieInstant.State.REQUESTED, commitActiontype, commitTime));
+    metaClient.getActiveTimeline().createNewInstant(new HoodieInstant(HoodieInstant.State.INFLIGHT, commitActiontype, commitTime));
+    metaClient.getActiveTimeline().saveAsComplete(
         new HoodieInstant(HoodieInstant.State.INFLIGHT, commitActiontype, commitTime),
         Option.of(commitMetadata.toJsonString().getBytes(StandardCharsets.UTF_8)));
   }
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHDFSParquetImporter.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHDFSParquetImporter.java
index 3ac490bf9163e..a57be62461d45 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHDFSParquetImporter.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHDFSParquetImporter.java
@@ -233,8 +233,7 @@ public List<GenericRecord> createInsertRecords(Path srcFolder) throws ParseExcep
     long startTime = HoodieActiveTimeline.parseDateFromInstantTime("20170203000000").getTime() / 1000;
     List<GenericRecord> records = new ArrayList<GenericRecord>();
     for (long recordNum = 0; recordNum < 96; recordNum++) {
-      records.add(HoodieTestDataGenerator.generateGenericRecord(Long.toString(recordNum), "0", "rider-" + recordNum,
-          "driver-" + recordNum, startTime + TimeUnit.HOURS.toSeconds(recordNum)));
+      records.add(new HoodieTestDataGenerator().generateGenericRecord(Long.toString(recordNum), "0", "rider-" + recordNum, "driver-" + recordNum, startTime + TimeUnit.HOURS.toSeconds(recordNum)));
     }
     try (ParquetWriter<GenericRecord> writer = AvroParquetWriter.<GenericRecord>builder(srcFile)
         .withSchema(HoodieTestDataGenerator.AVRO_SCHEMA).withConf(HoodieTestUtils.getDefaultHadoopConf()).build()) {
@@ -251,12 +250,12 @@ public List<GenericRecord> createUpsertRecords(Path srcFolder) throws ParseExcep
     List<GenericRecord> records = new ArrayList<GenericRecord>();
     // 10 for update
     for (long recordNum = 0; recordNum < 11; recordNum++) {
-      records.add(HoodieTestDataGenerator.generateGenericRecord(Long.toString(recordNum), "0", "rider-upsert-" + recordNum,
+      records.add(new HoodieTestDataGenerator().generateGenericRecord(Long.toString(recordNum), "0", "rider-upsert-" + recordNum,
           "driver-upsert" + recordNum, startTime + TimeUnit.HOURS.toSeconds(recordNum)));
     }
     // 4 for insert
     for (long recordNum = 96; recordNum < 100; recordNum++) {
-      records.add(HoodieTestDataGenerator.generateGenericRecord(Long.toString(recordNum), "0", "rider-upsert-" + recordNum,
+      records.add(new HoodieTestDataGenerator().generateGenericRecord(Long.toString(recordNum), "0", "rider-upsert-" + recordNum,
           "driver-upsert" + recordNum, startTime + TimeUnit.HOURS.toSeconds(recordNum)));
     }
     try (ParquetWriter<GenericRecord> writer = AvroParquetWriter.<GenericRecord>builder(srcFile)
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamer.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamer.java
index 1874991888cbf..1c80896586515 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamer.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamer.java
@@ -19,18 +19,22 @@
 package org.apache.hudi.utilities.functional;
 
 import org.apache.hudi.AvroConversionUtils;
+import org.apache.hudi.DataSourceReadOptions;
 import org.apache.hudi.DataSourceWriteOptions;
 import org.apache.hudi.client.SparkRDDWriteClient;
+import org.apache.hudi.client.transaction.lock.InProcessLockProvider;
 import org.apache.hudi.common.config.DFSPropertiesConfiguration;
 import org.apache.hudi.common.config.HoodieConfig;
 import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.common.model.HoodieCommitMetadata;
+import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
 import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.model.OverwriteWithLatestAvroPayload;
+import org.apache.hudi.common.model.WriteConcurrencyMode;
 import org.apache.hudi.common.model.WriteOperationType;
 import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
@@ -43,6 +47,7 @@
 import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.config.HoodieClusteringConfig;
 import org.apache.hudi.config.HoodieCompactionConfig;
+import org.apache.hudi.config.HoodieLockConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.exception.HoodieException;
 import org.apache.hudi.exception.TableNotFoundException;
@@ -63,6 +68,7 @@
 import org.apache.hudi.utilities.sources.JsonKafkaSource;
 import org.apache.hudi.utilities.sources.ORCDFSSource;
 import org.apache.hudi.utilities.sources.ParquetDFSSource;
+import org.apache.hudi.utilities.sources.SqlSource;
 import org.apache.hudi.utilities.sources.TestDataSource;
 import org.apache.hudi.utilities.testutils.JdbcTestUtils;
 import org.apache.hudi.utilities.testutils.UtilitiesTestBase;
@@ -207,6 +213,14 @@ static HoodieDeltaStreamer.Config makeConfig(String basePath, WriteOperationType
                                                  List<String> transformerClassNames, String propsFilename, boolean enableHiveSync, boolean useSchemaProviderClass,
                                                  int sourceLimit, boolean updatePayloadClass, String payloadClassName, String tableType, String sourceOrderingField,
                                                  String checkpoint) {
+      return makeConfig(basePath, op, sourceClassName, transformerClassNames, propsFilename, enableHiveSync, useSchemaProviderClass, sourceLimit, updatePayloadClass, payloadClassName,
+          tableType, sourceOrderingField, checkpoint, false);
+    }
+
+    static HoodieDeltaStreamer.Config makeConfig(String basePath, WriteOperationType op, String sourceClassName,
+                                                 List<String> transformerClassNames, String propsFilename, boolean enableHiveSync, boolean useSchemaProviderClass,
+                                                 int sourceLimit, boolean updatePayloadClass, String payloadClassName, String tableType, String sourceOrderingField,
+                                                 String checkpoint, boolean allowCommitOnNoCheckpointChange) {
       HoodieDeltaStreamer.Config cfg = new HoodieDeltaStreamer.Config();
       cfg.targetBasePath = basePath;
       cfg.targetTableName = "hoodie_trips";
@@ -225,6 +239,7 @@ static HoodieDeltaStreamer.Config makeConfig(String basePath, WriteOperationType
       if (useSchemaProviderClass) {
         cfg.schemaProviderClassName = defaultSchemaProviderClassName;
       }
+      cfg.allowCommitOnNoCheckpointChange = allowCommitOnNoCheckpointChange;
       return cfg;
     }
 
@@ -257,6 +272,19 @@ static void assertRecordCount(long expected, String tablePath, SQLContext sqlCon
       assertEquals(expected, recordCount);
     }
 
+    static Map<String, Long> getPartitionRecordCount(String basePath, SQLContext sqlContext) {
+      sqlContext.clearCache();
+      List<Row> rows = sqlContext.read().format("org.apache.hudi").load(basePath).groupBy(HoodieRecord.PARTITION_PATH_METADATA_FIELD).count().collectAsList();
+      Map<String, Long> partitionRecordCount = new HashMap<>();
+      rows.stream().forEach(row -> partitionRecordCount.put(row.getString(0), row.getLong(1)));
+      return partitionRecordCount;
+    }
+
+    static void assertNoPartitionMatch(String basePath, SQLContext sqlContext, String partitionToValidate) {
+      sqlContext.clearCache();
+      assertEquals(0, sqlContext.read().format("org.apache.hudi").load(basePath).filter(HoodieRecord.PARTITION_PATH_METADATA_FIELD + " = " + partitionToValidate).count());
+    }
+
     static void assertDistinctRecordCount(long expected, String tablePath, SQLContext sqlContext) {
       sqlContext.clearCache();
       long recordCount = sqlContext.read().format("org.apache.hudi").load(tablePath).select("_hoodie_record_key").distinct().count();
@@ -754,6 +782,38 @@ public void testInlineClustering(String preserveCommitMetadata) throws Exception
     });
   }
 
+  @Test
+  public void testDeltaSyncWithPendingClustering() throws Exception {
+    String tableBasePath = dfsBasePath + "/inlineClusteringPending";
+    // ingest data
+    int totalRecords = 2000;
+    HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.INSERT);
+    cfg.continuousMode = false;
+    cfg.tableType = HoodieTableType.COPY_ON_WRITE.name();
+    HoodieDeltaStreamer ds = new HoodieDeltaStreamer(cfg, jsc);
+    ds.sync();
+    // assert ingest successful
+    TestHelpers.assertAtLeastNCommits(1, tableBasePath, dfs);
+
+    // schedule a clustering job to build a clustering plan and transition to inflight
+    HoodieClusteringJob clusteringJob = initialHoodieClusteringJob(tableBasePath, null, false, "schedule");
+    clusteringJob.cluster(0);
+    HoodieTableMetaClient meta = HoodieTableMetaClient.builder().setConf(dfs.getConf()).setBasePath(tableBasePath).build();
+    List<HoodieInstant> hoodieClusteringInstants = meta.getActiveTimeline().filterPendingReplaceTimeline().getInstants().collect(Collectors.toList());
+    HoodieInstant clusteringRequest = hoodieClusteringInstants.get(0);
+    meta.getActiveTimeline().transitionReplaceRequestedToInflight(clusteringRequest, Option.empty());
+
+    // do another ingestion with inline clustering enabled
+    cfg.configs.addAll(getAsyncServicesConfigs(totalRecords, "false", "true", "2", "", ""));
+    cfg.retryLastPendingInlineClusteringJob = true;
+    HoodieDeltaStreamer ds2 = new HoodieDeltaStreamer(cfg, jsc);
+    ds2.sync();
+    String completeClusteringTimeStamp = meta.reloadActiveTimeline().getCompletedReplaceTimeline().lastInstant().get().getTimestamp();
+    assertEquals(clusteringRequest.getTimestamp(), completeClusteringTimeStamp);
+    TestHelpers.assertAtLeastNCommits(2, tableBasePath, dfs);
+    TestHelpers.assertAtLeastNReplaceCommits(1, tableBasePath, dfs);
+  }
+
   @ParameterizedTest
   @ValueSource(booleans = {true, false})
   public void testCleanerDeleteReplacedDataWithArchive(Boolean asyncClean) throws Exception {
@@ -768,7 +828,7 @@ public void testCleanerDeleteReplacedDataWithArchive(Boolean asyncClean) throws
     cfg.tableType = HoodieTableType.COPY_ON_WRITE.name();
     cfg.configs.addAll(getAsyncServicesConfigs(totalRecords, "false", "true", "2", "", ""));
     cfg.configs.add(String.format("%s=%s", HoodieCompactionConfig.PARQUET_SMALL_FILE_LIMIT.key(), "0"));
-    cfg.configs.add(HoodieMetadataConfig.ENABLE.key() + "=false");
+    cfg.configs.add(String.format("%s=%s", HoodieMetadataConfig.COMPACT_NUM_DELTA_COMMITS.key(), "1"));
     HoodieDeltaStreamer ds = new HoodieDeltaStreamer(cfg, jsc);
     deltaStreamerTestRunner(ds, cfg, (r) -> {
       TestHelpers.assertAtLeastNReplaceCommits(2, tableBasePath, dfs);
@@ -819,8 +879,16 @@ public void testCleanerDeleteReplacedDataWithArchive(Boolean asyncClean) throws
     configs.add(String.format("%s=%s", HoodieCompactionConfig.CLEANER_COMMITS_RETAINED.key(), "1"));
     configs.add(String.format("%s=%s", HoodieCompactionConfig.MIN_COMMITS_TO_KEEP.key(), "2"));
     configs.add(String.format("%s=%s", HoodieCompactionConfig.MAX_COMMITS_TO_KEEP.key(), "3"));
-    configs.add(String.format("%s=%s", HoodieCompactionConfig.ASYNC_CLEAN, asyncClean));
-    configs.add(HoodieMetadataConfig.ENABLE.key() + "=false");
+    configs.add(String.format("%s=%s", HoodieCompactionConfig.ASYNC_CLEAN.key(), asyncClean));
+    configs.add(String.format("%s=%s", HoodieMetadataConfig.COMPACT_NUM_DELTA_COMMITS.key(), "1"));
+    if (asyncClean) {
+      configs.add(String.format("%s=%s", HoodieWriteConfig.WRITE_CONCURRENCY_MODE.key(),
+          WriteConcurrencyMode.OPTIMISTIC_CONCURRENCY_CONTROL.name()));
+      configs.add(String.format("%s=%s", HoodieCompactionConfig.FAILED_WRITES_CLEANER_POLICY.key(),
+          HoodieFailedWritesCleaningPolicy.LAZY.name()));
+      configs.add(String.format("%s=%s", HoodieLockConfig.LOCK_PROVIDER_CLASS_NAME.key(),
+          InProcessLockProvider.class.getName()));
+    }
     cfg.configs = configs;
     cfg.continuousMode = false;
     ds = new HoodieDeltaStreamer(cfg, jsc);
@@ -1323,6 +1391,13 @@ private void prepareParquetDFSSource(boolean useSchemaProvider, boolean hasTrans
 
   private void prepareParquetDFSSource(boolean useSchemaProvider, boolean hasTransformer, String sourceSchemaFile, String targetSchemaFile,
                                        String propsFileName, String parquetSourceRoot, boolean addCommonProps) throws IOException {
+    prepareParquetDFSSource(useSchemaProvider, hasTransformer, sourceSchemaFile, targetSchemaFile, propsFileName, parquetSourceRoot, addCommonProps,
+        "not_there");
+  }
+
+  private void prepareParquetDFSSource(boolean useSchemaProvider, boolean hasTransformer, String sourceSchemaFile, String targetSchemaFile,
+                                       String propsFileName, String parquetSourceRoot, boolean addCommonProps,
+                                       String partitionPath) throws IOException {
     // Properties used for testing delta-streamer with Parquet source
     TypedProperties parquetProps = new TypedProperties();
 
@@ -1333,7 +1408,7 @@ private void prepareParquetDFSSource(boolean useSchemaProvider, boolean hasTrans
     parquetProps.setProperty("include", "base.properties");
     parquetProps.setProperty("hoodie.embed.timeline.server", "false");
     parquetProps.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
-    parquetProps.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
+    parquetProps.setProperty("hoodie.datasource.write.partitionpath.field", partitionPath);
     if (useSchemaProvider) {
       parquetProps.setProperty("hoodie.deltastreamer.schemaprovider.source.schema.file", dfsBasePath + "/" + sourceSchemaFile);
       if (hasTransformer) {
@@ -1668,6 +1743,41 @@ public void testCsvDFSSourceNoHeaderWithSchemaProviderAndTransformer() throws Ex
     testCsvDFSSource(false, '\t', true, Collections.singletonList(TripsWithDistanceTransformer.class.getName()));
   }
 
+  private void prepareSqlSource() throws IOException {
+    String sourceRoot = dfsBasePath + "sqlSourceFiles";
+    TypedProperties sqlSourceProps = new TypedProperties();
+    sqlSourceProps.setProperty("include", "base.properties");
+    sqlSourceProps.setProperty("hoodie.embed.timeline.server", "false");
+    sqlSourceProps.setProperty("hoodie.datasource.write.recordkey.field", "_row_key");
+    sqlSourceProps.setProperty("hoodie.datasource.write.partitionpath.field", "not_there");
+    sqlSourceProps.setProperty("hoodie.deltastreamer.source.sql.sql.query","select * from test_sql_table");
+
+    UtilitiesTestBase.Helpers.savePropsToDFS(sqlSourceProps, dfs, dfsBasePath + "/" + PROPS_FILENAME_TEST_SQL_SOURCE);
+
+    // Data generation
+    HoodieTestDataGenerator dataGenerator = new HoodieTestDataGenerator();
+    generateSqlSourceTestTable(sourceRoot, "1", "1000", SQL_SOURCE_NUM_RECORDS, dataGenerator);
+  }
+
+  private void generateSqlSourceTestTable(String dfsRoot, String filename, String instantTime, int n, HoodieTestDataGenerator dataGenerator) throws IOException {
+    Path path = new Path(dfsRoot, filename);
+    Helpers.saveParquetToDFS(Helpers.toGenericRecords(dataGenerator.generateInserts(instantTime, n, false)), path);
+    sparkSession.read().parquet(dfsRoot).createOrReplaceTempView("test_sql_table");
+  }
+
+  @Test
+  public void testSqlSourceSource() throws Exception {
+    prepareSqlSource();
+    String tableBasePath = dfsBasePath + "/test_sql_source_table" + testNum++;
+    HoodieDeltaStreamer deltaStreamer =
+        new HoodieDeltaStreamer(TestHelpers.makeConfig(
+            tableBasePath, WriteOperationType.INSERT, SqlSource.class.getName(),
+            Collections.emptyList(), PROPS_FILENAME_TEST_SQL_SOURCE, false,
+            false, 1000, false, null, null, "timestamp", null, true), jsc);
+    deltaStreamer.sync();
+    TestHelpers.assertRecordCount(SQL_SOURCE_NUM_RECORDS, tableBasePath + "/*/*.parquet", sqlContext);
+  }
+
   @Test
   public void testJdbcSourceIncrementalFetchInContinuousMode() {
     try (Connection connection = DriverManager.getConnection("jdbc:h2:mem:test_mem", "test", "jdbc")) {
@@ -1707,6 +1817,54 @@ public void testJdbcSourceIncrementalFetchInContinuousMode() {
     }
   }
 
+  @Test
+  public void testHoodieIncrFallback() throws Exception {
+    String tableBasePath = dfsBasePath + "/incr_test_table";
+    String downstreamTableBasePath = dfsBasePath + "/incr_test_downstream_table";
+
+    insertInTable(tableBasePath, 1, WriteOperationType.BULK_INSERT);
+    HoodieDeltaStreamer.Config downstreamCfg =
+        TestHelpers.makeConfigForHudiIncrSrc(tableBasePath, downstreamTableBasePath,
+            WriteOperationType.BULK_INSERT, true, null);
+    new HoodieDeltaStreamer(downstreamCfg, jsc).sync();
+
+    insertInTable(tableBasePath, 9, WriteOperationType.UPSERT);
+    //No change as this fails with Path not exist error
+    assertThrows(org.apache.spark.sql.AnalysisException.class, () -> new HoodieDeltaStreamer(downstreamCfg, jsc).sync());
+    TestHelpers.assertRecordCount(1000, downstreamTableBasePath + "/*/*", sqlContext);
+
+    if (downstreamCfg.configs == null) {
+      downstreamCfg.configs = new ArrayList<>();
+    }
+
+    downstreamCfg.configs.add(DataSourceReadOptions.INCREMENTAL_FALLBACK_TO_FULL_TABLE_SCAN_FOR_NON_EXISTING_FILES().key() + "=true");
+    //Adding this conf to make testing easier :)
+    downstreamCfg.configs.add("hoodie.deltastreamer.source.hoodieincr.num_instants=10");
+    downstreamCfg.operation = WriteOperationType.UPSERT;
+    new HoodieDeltaStreamer(downstreamCfg, jsc).sync();
+    new HoodieDeltaStreamer(downstreamCfg, jsc).sync();
+
+    long baseTableRecords = sqlContext.read().format("org.apache.hudi").load(tableBasePath + "/*/*.parquet").count();
+    long downStreamTableRecords = sqlContext.read().format("org.apache.hudi").load(downstreamTableBasePath + "/*/*.parquet").count();
+    assertEquals(baseTableRecords, downStreamTableRecords);
+  }
+
+  private void insertInTable(String tableBasePath, int count, WriteOperationType operationType) throws Exception {
+    HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(tableBasePath, operationType,
+        Collections.singletonList(SqlQueryBasedTransformer.class.getName()), PROPS_FILENAME_TEST_SOURCE, false);
+    if (cfg.configs == null) {
+      cfg.configs = new ArrayList<>();
+    }
+    cfg.configs.add("hoodie.cleaner.commits.retained=3");
+    cfg.configs.add("hoodie.keep.min.commits=4");
+    cfg.configs.add("hoodie.keep.max.commits=5");
+    cfg.configs.add("hoodie.test.source.generate.inserts=true");
+
+    for (int i = 0; i < count; i++) {
+      new HoodieDeltaStreamer(cfg, jsc).sync();
+    }
+  }
+
   @Test
   public void testInsertOverwrite() throws Exception {
     testDeltaStreamerWithSpecifiedOperation(dfsBasePath + "/insert_overwrite", WriteOperationType.INSERT_OVERWRITE);
@@ -1717,6 +1875,31 @@ public void testInsertOverwriteTable() throws Exception {
     testDeltaStreamerWithSpecifiedOperation(dfsBasePath + "/insert_overwrite_table", WriteOperationType.INSERT_OVERWRITE_TABLE);
   }
 
+  @Test
+  public void testDeletePartitions() throws Exception {
+    prepareParquetDFSSource(false, false, "source.avsc", "target.avsc",
+        PROPS_FILENAME_TEST_PARQUET, PARQUET_SOURCE_ROOT, false, "partition_path");
+    String tableBasePath = dfsBasePath + "/test_parquet_table" + testNum;
+    HoodieDeltaStreamer deltaStreamer = new HoodieDeltaStreamer(
+        TestHelpers.makeConfig(tableBasePath, WriteOperationType.INSERT, ParquetDFSSource.class.getName(),
+            null, PROPS_FILENAME_TEST_PARQUET, false,
+            false, 100000, false, null, null, "timestamp", null), jsc);
+    deltaStreamer.sync();
+    TestHelpers.assertRecordCount(PARQUET_NUM_RECORDS, tableBasePath, sqlContext);
+    testNum++;
+
+    prepareParquetDFSFiles(PARQUET_NUM_RECORDS, PARQUET_SOURCE_ROOT);
+    prepareParquetDFSSource(false, false);
+    // set write operation to DELETE_PARTITION and add transformer to filter only for records with partition HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION
+    deltaStreamer = new HoodieDeltaStreamer(
+        TestHelpers.makeConfig(tableBasePath, WriteOperationType.DELETE_PARTITION, ParquetDFSSource.class.getName(),
+            Collections.singletonList(TestSpecificPartitionTransformer.class.getName()), PROPS_FILENAME_TEST_PARQUET, false,
+            false, 100000, false, null, null, "timestamp", null), jsc);
+    deltaStreamer.sync();
+    // No records should match the HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION.
+    TestHelpers.assertNoPartitionMatch(tableBasePath, sqlContext, HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH);
+  }
+  
   void testDeltaStreamerWithSpecifiedOperation(final String tableBasePath, WriteOperationType operationType) throws Exception {
     // Initial insert
     HoodieDeltaStreamer.Config cfg = TestHelpers.makeConfig(tableBasePath, WriteOperationType.BULK_INSERT);
@@ -1863,6 +2046,16 @@ public Dataset<Row> apply(JavaSparkContext jsc, SparkSession sparkSession, Datas
     }
   }
 
+  public static class TestSpecificPartitionTransformer implements Transformer {
+
+    @Override
+    public Dataset<Row> apply(JavaSparkContext jsc, SparkSession sparkSession, Dataset<Row> rowDataset,
+                              TypedProperties properties) {
+      Dataset<Row> toReturn = rowDataset.filter("partition_path == '" + HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH + "'");
+      return toReturn;
+    }
+  }
+
   /**
    * Add new field evoluted_optional_union_field with value of the field rider.
    */
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamerWithMultiWriter.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamerWithMultiWriter.java
index 6a3831a960561..e383236af18a3 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamerWithMultiWriter.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieDeltaStreamerWithMultiWriter.java
@@ -38,6 +38,7 @@
 import org.apache.log4j.LogManager;
 import org.apache.log4j.Logger;
 import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Tag;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.EnumSource;
@@ -48,15 +49,12 @@
 import java.util.Collections;
 import java.util.ConcurrentModificationException;
 import java.util.List;
-import java.util.Objects;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.function.Function;
 
-import static org.apache.hudi.common.testutils.FixtureUtils.prepareFixtureTable;
-import static org.apache.hudi.common.testutils.HoodieTestDataGenerator.TRIP_EXAMPLE_SCHEMA;
 import static org.apache.hudi.config.HoodieWriteConfig.BULKINSERT_PARALLELISM_VALUE;
 import static org.apache.hudi.config.HoodieWriteConfig.BULK_INSERT_SORT_MODE;
 import static org.apache.hudi.config.HoodieWriteConfig.FINALIZE_WRITE_PARALLELISM_VALUE;
@@ -68,31 +66,50 @@
 import static org.apache.hudi.utilities.functional.HoodieDeltaStreamerTestBase.defaultSchemaProviderClassName;
 import static org.apache.hudi.utilities.functional.HoodieDeltaStreamerTestBase.prepareInitialConfigs;
 import static org.apache.hudi.utilities.functional.TestHoodieDeltaStreamer.deltaStreamerTestRunner;
-import static org.apache.hudi.utilities.testutils.sources.AbstractBaseTestSource.DEFAULT_PARTITION_NUM;
-import static org.apache.hudi.utilities.testutils.sources.AbstractBaseTestSource.dataGeneratorMap;
-import static org.apache.hudi.utilities.testutils.sources.AbstractBaseTestSource.initDataGen;
 
 @Tag("functional")
 public class TestHoodieDeltaStreamerWithMultiWriter extends SparkClientFunctionalTestHarness {
 
-  private static final String COW_TEST_TABLE_NAME = "testtable_COPY_ON_WRITE";
   private static final Logger LOG = LogManager.getLogger(TestHoodieDeltaStreamerWithMultiWriter.class);
 
   String basePath;
   String propsFilePath;
   String tableBasePath;
-  int totalRecords;
 
   @ParameterizedTest
   @EnumSource(HoodieTableType.class)
   void testUpsertsContinuousModeWithMultipleWritersForConflicts(HoodieTableType tableType) throws Exception {
     // NOTE : Overriding the LockProvider to InProcessLockProvider since Zookeeper locks work in unit test but fail on Jenkins with connection timeouts
-    setUpTestTable(tableType);
+    basePath = Paths.get(URI.create(basePath().replaceAll("/$", ""))).toString();
+    propsFilePath = basePath + "/" + PROPS_FILENAME_TEST_MULTI_WRITER;
+    tableBasePath = basePath + "/testtable_" + tableType;
     prepareInitialConfigs(fs(), basePath, "foo");
     TypedProperties props = prepareMultiWriterProps(fs(), basePath, propsFilePath);
     props.setProperty("hoodie.write.lock.provider", "org.apache.hudi.client.transaction.lock.InProcessLockProvider");
     props.setProperty(LockConfiguration.LOCK_ACQUIRE_WAIT_TIMEOUT_MS_PROP_KEY,"3000");
     UtilitiesTestBase.Helpers.savePropsToDFS(props, fs(), propsFilePath);
+    // Keep it higher than batch-size to test continuous mode
+    int totalRecords = 3000;
+
+    HoodieDeltaStreamer.Config prepJobConfig = getDeltaStreamerConfig(tableBasePath, tableType.name(), WriteOperationType.UPSERT,
+        propsFilePath, Collections.singletonList(TestHoodieDeltaStreamer.TripsWithDistanceTransformer.class.getName()));
+    prepJobConfig.continuousMode = true;
+    prepJobConfig.configs.add(String.format("%s=%d", SourceConfigs.MAX_UNIQUE_RECORDS_PROP, totalRecords));
+    prepJobConfig.configs.add(String.format("%s=false", HoodieCompactionConfig.AUTO_CLEAN.key()));
+    HoodieDeltaStreamer prepJob = new HoodieDeltaStreamer(prepJobConfig, jsc());
+
+    // Prepare base dataset with some commits
+    deltaStreamerTestRunner(prepJob, prepJobConfig, (r) -> {
+      if (tableType.equals(HoodieTableType.MERGE_ON_READ)) {
+        TestHoodieDeltaStreamer.TestHelpers.assertAtleastNDeltaCommits(3, tableBasePath, fs());
+        TestHoodieDeltaStreamer.TestHelpers.assertAtleastNCompactionCommits(1, tableBasePath, fs());
+      } else {
+        TestHoodieDeltaStreamer.TestHelpers.assertAtleastNCompactionCommits(3, tableBasePath, fs());
+      }
+      TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext());
+      TestHoodieDeltaStreamer.TestHelpers.assertDistanceCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext());
+      return true;
+    });
 
     HoodieDeltaStreamer.Config cfgIngestionJob = getDeltaStreamerConfig(tableBasePath, tableType.name(), WriteOperationType.UPSERT,
         propsFilePath, Collections.singletonList(TestHoodieDeltaStreamer.TripsWithDistanceTransformer.class.getName()));
@@ -125,12 +142,36 @@ void testUpsertsContinuousModeWithMultipleWritersForConflicts(HoodieTableType ta
   @EnumSource(HoodieTableType.class)
   void testUpsertsContinuousModeWithMultipleWritersWithoutConflicts(HoodieTableType tableType) throws Exception {
     // NOTE : Overriding the LockProvider to InProcessLockProvider since Zookeeper locks work in unit test but fail on Jenkins with connection timeouts
-    setUpTestTable(tableType);
+    basePath = Paths.get(URI.create(basePath().replaceAll("/$", ""))).toString();
+    propsFilePath = basePath + "/" + PROPS_FILENAME_TEST_MULTI_WRITER;
+    tableBasePath = basePath + "/testtable_" + tableType;
     prepareInitialConfigs(fs(), basePath, "foo");
     TypedProperties props = prepareMultiWriterProps(fs(), basePath, propsFilePath);
     props.setProperty("hoodie.write.lock.provider", "org.apache.hudi.client.transaction.lock.InProcessLockProvider");
     props.setProperty(LockConfiguration.LOCK_ACQUIRE_WAIT_TIMEOUT_MS_PROP_KEY,"3000");
     UtilitiesTestBase.Helpers.savePropsToDFS(props, fs(), propsFilePath);
+    // Keep it higher than batch-size to test continuous mode
+    int totalRecords = 3000;
+
+    HoodieDeltaStreamer.Config prepJobConfig = getDeltaStreamerConfig(tableBasePath, tableType.name(), WriteOperationType.UPSERT,
+        propsFilePath, Collections.singletonList(TestHoodieDeltaStreamer.TripsWithDistanceTransformer.class.getName()));
+    prepJobConfig.continuousMode = true;
+    prepJobConfig.configs.add(String.format("%s=%d", SourceConfigs.MAX_UNIQUE_RECORDS_PROP, totalRecords));
+    prepJobConfig.configs.add(String.format("%s=false", HoodieCompactionConfig.AUTO_CLEAN.key()));
+    HoodieDeltaStreamer prepJob = new HoodieDeltaStreamer(prepJobConfig, jsc());
+
+    // Prepare base dataset with some commits
+    deltaStreamerTestRunner(prepJob, prepJobConfig, (r) -> {
+      if (tableType.equals(HoodieTableType.MERGE_ON_READ)) {
+        TestHoodieDeltaStreamer.TestHelpers.assertAtleastNDeltaCommits(3, tableBasePath, fs());
+        TestHoodieDeltaStreamer.TestHelpers.assertAtleastNCompactionCommits(1, tableBasePath, fs());
+      } else {
+        TestHoodieDeltaStreamer.TestHelpers.assertAtleastNCompactionCommits(3, tableBasePath, fs());
+      }
+      TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext());
+      TestHoodieDeltaStreamer.TestHelpers.assertDistanceCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext());
+      return true;
+    });
 
     // create new ingestion & backfill job config to generate only INSERTS to avoid conflict
     props = prepareMultiWriterProps(fs(), basePath, propsFilePath);
@@ -164,26 +205,41 @@ void testUpsertsContinuousModeWithMultipleWritersWithoutConflicts(HoodieTableTyp
         cfgIngestionJob2, backfillJob2, cfgBackfillJob2, false, "batch2");
   }
 
+  @Disabled
   @ParameterizedTest
   @EnumSource(value = HoodieTableType.class, names = {"COPY_ON_WRITE"})
-  public void testLatestCheckpointCarryOverWithMultipleWriters(HoodieTableType tableType) throws Exception {
-    testCheckpointCarryOver(tableType);
-  }
-
-  private void testCheckpointCarryOver(HoodieTableType tableType) throws Exception {
+  void testLatestCheckpointCarryOverWithMultipleWriters(HoodieTableType tableType) throws Exception {
     // NOTE : Overriding the LockProvider to InProcessLockProvider since Zookeeper locks work in unit test but fail on Jenkins with connection timeouts
-    setUpTestTable(tableType);
+    basePath = Paths.get(URI.create(basePath().replaceAll("/$", ""))).toString();
+    propsFilePath = basePath + "/" + PROPS_FILENAME_TEST_MULTI_WRITER;
+    tableBasePath = basePath + "/testtable_" + tableType;
     prepareInitialConfigs(fs(), basePath, "foo");
     TypedProperties props = prepareMultiWriterProps(fs(), basePath, propsFilePath);
     props.setProperty("hoodie.write.lock.provider", "org.apache.hudi.client.transaction.lock.InProcessLockProvider");
     props.setProperty(LockConfiguration.LOCK_ACQUIRE_WAIT_TIMEOUT_MS_PROP_KEY,"3000");
     UtilitiesTestBase.Helpers.savePropsToDFS(props, fs(), propsFilePath);
+    // Keep it higher than batch-size to test continuous mode
+    int totalRecords = 3000;
 
-    HoodieDeltaStreamer.Config cfgIngestionJob = getDeltaStreamerConfig(tableBasePath, tableType.name(), WriteOperationType.UPSERT,
+    HoodieDeltaStreamer.Config prepJobConfig = getDeltaStreamerConfig(tableBasePath, tableType.name(), WriteOperationType.UPSERT,
         propsFilePath, Collections.singletonList(TestHoodieDeltaStreamer.TripsWithDistanceTransformer.class.getName()));
-    cfgIngestionJob.continuousMode = true;
-    cfgIngestionJob.configs.add(String.format("%s=%d", SourceConfigs.MAX_UNIQUE_RECORDS_PROP, totalRecords));
-    cfgIngestionJob.configs.add(String.format("%s=false", HoodieCompactionConfig.AUTO_CLEAN.key()));
+    prepJobConfig.continuousMode = true;
+    prepJobConfig.configs.add(String.format("%s=%d", SourceConfigs.MAX_UNIQUE_RECORDS_PROP, totalRecords));
+    prepJobConfig.configs.add(String.format("%s=false", HoodieCompactionConfig.AUTO_CLEAN.key()));
+    HoodieDeltaStreamer prepJob = new HoodieDeltaStreamer(prepJobConfig, jsc());
+
+    // Prepare base dataset with some commits
+    deltaStreamerTestRunner(prepJob, prepJobConfig, (r) -> {
+      if (tableType.equals(HoodieTableType.MERGE_ON_READ)) {
+        TestHoodieDeltaStreamer.TestHelpers.assertAtleastNDeltaCommits(3, tableBasePath, fs());
+        TestHoodieDeltaStreamer.TestHelpers.assertAtleastNCompactionCommits(1, tableBasePath, fs());
+      } else {
+        TestHoodieDeltaStreamer.TestHelpers.assertAtleastNCompactionCommits(3, tableBasePath, fs());
+      }
+      TestHoodieDeltaStreamer.TestHelpers.assertRecordCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext());
+      TestHoodieDeltaStreamer.TestHelpers.assertDistanceCount(totalRecords, tableBasePath + "/*/*.parquet", sqlContext());
+      return true;
+    });
 
     // create a backfill job with checkpoint from the first instant
     HoodieDeltaStreamer.Config cfgBackfillJob = getDeltaStreamerConfig(tableBasePath, tableType.name(), WriteOperationType.UPSERT,
@@ -292,26 +348,6 @@ private static HoodieDeltaStreamer.Config getDeltaStreamerConfig(String basePath
     return cfg;
   }
 
-  /**
-   * Specifically used for {@link TestHoodieDeltaStreamerWithMultiWriter}.
-   *
-   * The fixture test tables have random records generated by
-   * {@link org.apache.hudi.common.testutils.HoodieTestDataGenerator} using
-   * {@link org.apache.hudi.common.testutils.HoodieTestDataGenerator#TRIP_EXAMPLE_SCHEMA}.
-   *
-   * The COW fixture test table has 3000 unique records in 7 commits.
-   * The MOR fixture test table has 3000 unique records in 9 deltacommits and 1 compaction commit.
-   */
-  private void setUpTestTable(HoodieTableType tableType) throws IOException {
-    basePath = Paths.get(URI.create(basePath().replaceAll("/$", ""))).toString();
-    propsFilePath = basePath + "/" + PROPS_FILENAME_TEST_MULTI_WRITER;
-    String fixtureName = String.format("fixtures/testUpsertsContinuousModeWithMultipleWriters.%s.zip", tableType.name());
-    tableBasePath = prepareFixtureTable(Objects.requireNonNull(getClass()
-        .getClassLoader().getResource(fixtureName)), Paths.get(basePath)).toString();
-    initDataGen(sqlContext(), tableBasePath + "/*/*.parquet", DEFAULT_PARTITION_NUM);
-    totalRecords = dataGeneratorMap.get(DEFAULT_PARTITION_NUM).getNumExistingKeys(TRIP_EXAMPLE_SCHEMA);
-  }
-
   private void runJobsInParallel(String tableBasePath, HoodieTableType tableType, int totalRecords,
       HoodieDeltaStreamer ingestionJob, HoodieDeltaStreamer.Config cfgIngestionJob, HoodieDeltaStreamer backfillJob,
       HoodieDeltaStreamer.Config cfgBackfillJob, boolean expectConflict, String jobId) throws Exception {
@@ -331,22 +367,22 @@ private void runJobsInParallel(String tableBasePath, HoodieTableType tableType,
       return true;
     };
 
-    AtomicBoolean continousFailed = new AtomicBoolean(false);
+    AtomicBoolean continuousFailed = new AtomicBoolean(false);
     AtomicBoolean backfillFailed = new AtomicBoolean(false);
     try {
       Future regularIngestionJobFuture = service.submit(() -> {
         try {
           deltaStreamerTestRunner(ingestionJob, cfgIngestionJob, conditionForRegularIngestion, jobId);
         } catch (Throwable ex) {
-          continousFailed.set(true);
+          continuousFailed.set(true);
           LOG.error("Continuous job failed " + ex.getMessage());
           throw new RuntimeException(ex);
         }
       });
       Future backfillJobFuture = service.submit(() -> {
         try {
-          // trigger backfill atleast after 1 requested entry is added to timline from continuous job. If not, there is a chance that backfill will complete even before
-          // continous job starts.
+          // trigger backfill atleast after 1 requested entry is added to timeline from continuous job. If not, there is a chance that backfill will complete even before
+          // continuous job starts.
           awaitCondition(new GetCommitsAfterInstant(tableBasePath, lastSuccessfulCommit));
           backfillJob.sync();
         } catch (Throwable ex) {
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieMultiTableDeltaStreamer.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieMultiTableDeltaStreamer.java
index 8eb91d24687c1..da5c6cc66a2ff 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieMultiTableDeltaStreamer.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieMultiTableDeltaStreamer.java
@@ -25,6 +25,7 @@
 import org.apache.hudi.utilities.deltastreamer.HoodieMultiTableDeltaStreamer;
 import org.apache.hudi.utilities.deltastreamer.TableExecutionContext;
 import org.apache.hudi.utilities.schema.FilebasedSchemaProvider;
+import org.apache.hudi.utilities.schema.SchemaRegistryProvider;
 import org.apache.hudi.utilities.sources.JsonKafkaSource;
 import org.apache.hudi.utilities.sources.ParquetDFSSource;
 import org.apache.hudi.utilities.sources.TestDataSource;
@@ -49,12 +50,13 @@ public class TestHoodieMultiTableDeltaStreamer extends HoodieDeltaStreamerTestBa
 
   static class TestHelpers {
 
-    static HoodieMultiTableDeltaStreamer.Config getConfig(String fileName, String configFolder, String sourceClassName, boolean enableHiveSync, boolean enableMetaSync) {
-      return getConfig(fileName, configFolder, sourceClassName, enableHiveSync, enableMetaSync, true, "multi_table_dataset");
+    static HoodieMultiTableDeltaStreamer.Config getConfig(String fileName, String configFolder, String sourceClassName, boolean enableHiveSync, boolean enableMetaSync,
+                                                          Class<?> clazz) {
+      return getConfig(fileName, configFolder, sourceClassName, enableHiveSync, enableMetaSync, true, "multi_table_dataset", clazz);
     }
 
     static HoodieMultiTableDeltaStreamer.Config getConfig(String fileName, String configFolder, String sourceClassName, boolean enableHiveSync, boolean enableMetaSync,
-        boolean setSchemaProvider, String basePathPrefix) {
+        boolean setSchemaProvider, String basePathPrefix, Class<?> clazz) {
       HoodieMultiTableDeltaStreamer.Config config = new HoodieMultiTableDeltaStreamer.Config();
       config.configFolder = configFolder;
       config.targetTableName = "dummy_table";
@@ -64,7 +66,7 @@ static HoodieMultiTableDeltaStreamer.Config getConfig(String fileName, String co
       config.sourceClassName = sourceClassName;
       config.sourceOrderingField = "timestamp";
       if (setSchemaProvider) {
-        config.schemaProviderClassName = FilebasedSchemaProvider.class.getName();
+        config.schemaProviderClassName = clazz != null ? clazz.getName() : FilebasedSchemaProvider.class.getName();
       }
       config.enableHiveSync = enableHiveSync;
       config.enableMetaSync = enableMetaSync;
@@ -74,7 +76,7 @@ static HoodieMultiTableDeltaStreamer.Config getConfig(String fileName, String co
 
   @Test
   public void testInvalidHiveSyncProps() throws IOException {
-    HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_INVALID_HIVE_SYNC_TEST_SOURCE1, dfsBasePath + "/config", TestDataSource.class.getName(), true, true);
+    HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_INVALID_HIVE_SYNC_TEST_SOURCE1, dfsBasePath + "/config", TestDataSource.class.getName(), true, true, null);
     Exception e = assertThrows(HoodieException.class, () -> {
       new HoodieMultiTableDeltaStreamer(cfg, jsc);
     }, "Should fail when hive sync table not provided with enableHiveSync flag");
@@ -84,7 +86,7 @@ public void testInvalidHiveSyncProps() throws IOException {
 
   @Test
   public void testInvalidPropsFilePath() throws IOException {
-    HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_INVALID_FILE, dfsBasePath + "/config", TestDataSource.class.getName(), true, true);
+    HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_INVALID_FILE, dfsBasePath + "/config", TestDataSource.class.getName(), true, true, null);
     Exception e = assertThrows(IllegalArgumentException.class, () -> {
       new HoodieMultiTableDeltaStreamer(cfg, jsc);
     }, "Should fail when invalid props file is provided");
@@ -94,7 +96,7 @@ public void testInvalidPropsFilePath() throws IOException {
 
   @Test
   public void testInvalidTableConfigFilePath() throws IOException {
-    HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_INVALID_TABLE_CONFIG_FILE, dfsBasePath + "/config", TestDataSource.class.getName(), true, true);
+    HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_INVALID_TABLE_CONFIG_FILE, dfsBasePath + "/config", TestDataSource.class.getName(), true, true, null);
     Exception e = assertThrows(IllegalArgumentException.class, () -> {
       new HoodieMultiTableDeltaStreamer(cfg, jsc);
     }, "Should fail when invalid table config props file path is provided");
@@ -104,7 +106,7 @@ public void testInvalidTableConfigFilePath() throws IOException {
 
   @Test
   public void testCustomConfigProps() throws IOException {
-    HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_FILENAME_TEST_SOURCE1, dfsBasePath + "/config", TestDataSource.class.getName(), false, false);
+    HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_FILENAME_TEST_SOURCE1, dfsBasePath + "/config", TestDataSource.class.getName(), false, false, SchemaRegistryProvider.class);
     HoodieMultiTableDeltaStreamer streamer = new HoodieMultiTableDeltaStreamer(cfg, jsc);
     TableExecutionContext executionContext = streamer.getTableExecutionContexts().get(1);
     assertEquals(2, streamer.getTableExecutionContexts().size());
@@ -114,13 +116,16 @@ public void testCustomConfigProps() throws IOException {
     assertEquals("_row_key", executionContext.getProperties().getString(DataSourceWriteOptions.RECORDKEY_FIELD().key()));
     assertEquals(TestHoodieDeltaStreamer.TestGenerator.class.getName(), executionContext.getProperties().getString(DataSourceWriteOptions.KEYGENERATOR_CLASS_NAME().key()));
     assertEquals("uber_hive_dummy_table", executionContext.getProperties().getString(HoodieMultiTableDeltaStreamer.Constants.HIVE_SYNC_TABLE_PROP));
+    assertEquals("http://localhost:8081/subjects/random-value/versions/latest", executionContext.getProperties().getString(SchemaRegistryProvider.Config.SRC_SCHEMA_REGISTRY_URL_PROP));
+    assertEquals("http://localhost:8081/subjects/topic2-value/versions/latest",
+            streamer.getTableExecutionContexts().get(0).getProperties().getString(SchemaRegistryProvider.Config.SRC_SCHEMA_REGISTRY_URL_PROP));
   }
 
   @Test
   @Disabled
   public void testInvalidIngestionProps() {
     Exception e = assertThrows(Exception.class, () -> {
-      HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_FILENAME_TEST_SOURCE1, dfsBasePath + "/config", TestDataSource.class.getName(), true, true);
+      HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_FILENAME_TEST_SOURCE1, dfsBasePath + "/config", TestDataSource.class.getName(), true, true, null);
       new HoodieMultiTableDeltaStreamer(cfg, jsc);
     }, "Creation of execution object should fail without kafka topic");
     log.debug("Creation of execution object failed with error: " + e.getMessage(), e);
@@ -139,7 +144,7 @@ public void testMultiTableExecutionWithKafkaSource() throws IOException {
     testUtils.sendMessages(topicName1, Helpers.jsonifyRecords(dataGenerator.generateInsertsAsPerSchema("000", 5, HoodieTestDataGenerator.TRIP_SCHEMA)));
     testUtils.sendMessages(topicName2, Helpers.jsonifyRecords(dataGenerator.generateInsertsAsPerSchema("000", 10, HoodieTestDataGenerator.SHORT_TRIP_SCHEMA)));
 
-    HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_FILENAME_TEST_SOURCE1, dfsBasePath + "/config", JsonKafkaSource.class.getName(), false, false);
+    HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_FILENAME_TEST_SOURCE1, dfsBasePath + "/config", JsonKafkaSource.class.getName(), false, false, null);
     HoodieMultiTableDeltaStreamer streamer = new HoodieMultiTableDeltaStreamer(cfg, jsc);
     List<TableExecutionContext> executionContexts = streamer.getTableExecutionContexts();
     TypedProperties properties = executionContexts.get(1).getProperties();
@@ -189,7 +194,7 @@ public void testMultiTableExecutionWithParquetSource() throws IOException {
     String parquetPropsFile = populateCommonPropsAndWriteToFile();
 
     HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(parquetPropsFile, dfsBasePath + "/config", ParquetDFSSource.class.getName(), false, false,
-        false, "multi_table_parquet");
+        false, "multi_table_parquet", null);
     HoodieMultiTableDeltaStreamer streamer = new HoodieMultiTableDeltaStreamer(cfg, jsc);
 
     List<TableExecutionContext> executionContexts = streamer.getTableExecutionContexts();
@@ -219,7 +224,7 @@ public void testMultiTableExecutionWithParquetSource() throws IOException {
 
   @Test
   public void testTableLevelProperties() throws IOException {
-    HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_FILENAME_TEST_SOURCE1, dfsBasePath + "/config", TestDataSource.class.getName(), false, false);
+    HoodieMultiTableDeltaStreamer.Config cfg = TestHelpers.getConfig(PROPS_FILENAME_TEST_SOURCE1, dfsBasePath + "/config", TestDataSource.class.getName(), false, false, null);
     HoodieMultiTableDeltaStreamer streamer = new HoodieMultiTableDeltaStreamer(cfg, jsc);
     List<TableExecutionContext> tableExecutionContexts = streamer.getTableExecutionContexts();
     tableExecutionContexts.forEach(tableExecutionContext -> {
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieSnapshotCopier.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieSnapshotCopier.java
index f192ede73a159..dd25e7f8bebad 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieSnapshotCopier.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/functional/TestHoodieSnapshotCopier.java
@@ -96,7 +96,7 @@ public void testSnapshotCopy() throws Exception {
     new File(basePath + "/2016/05/01/").mkdirs();
     new File(basePath + "/2016/05/02/").mkdirs();
     new File(basePath + "/2016/05/06/").mkdirs();
-    HoodieTestDataGenerator.writePartitionMetadata(fs, new String[] {"2016/05/01", "2016/05/02", "2016/05/06"},
+    HoodieTestDataGenerator.writePartitionMetadataDeprecated(fs, new String[] {"2016/05/01", "2016/05/02", "2016/05/06"},
         basePath);
     // Make commit1
     File file11 = new File(basePath + "/2016/05/01/" + FSUtils.makeDataFileName(commitTime1, TEST_WRITE_TOKEN, "id11"));
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestHoodieIncrSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestHoodieIncrSource.java
index 250e288294aca..1f15cc3093e7a 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestHoodieIncrSource.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestHoodieIncrSource.java
@@ -20,12 +20,14 @@
 
 import org.apache.hudi.client.SparkRDDWriteClient;
 import org.apache.hudi.client.WriteStatus;
+import org.apache.hudi.common.config.HoodieMetadataConfig;
 import org.apache.hudi.common.config.TypedProperties;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.table.timeline.versioning.TimelineLayoutVersion;
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.collection.Pair;
+import org.apache.hudi.config.HoodieCompactionConfig;
 import org.apache.hudi.config.HoodieWriteConfig;
 import org.apache.hudi.testutils.HoodieClientTestHarness;
 import org.apache.hudi.utilities.schema.SchemaProvider;
@@ -46,6 +48,7 @@
 
 import static org.apache.hudi.testutils.Assertions.assertNoWriteErrors;
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
 
 public class TestHoodieIncrSource extends HoodieClientTestHarness {
 
@@ -61,21 +64,42 @@ public void tearDown() throws IOException {
 
   @Test
   public void testHoodieIncrSource() throws IOException {
-    HoodieWriteConfig writeConfig = getConfigBuilder(basePath).build();
+    HoodieWriteConfig writeConfig = getConfigBuilder(basePath)
+        .withCompactionConfig(HoodieCompactionConfig.newBuilder()
+            .archiveCommitsWith(2, 3).retainCommits(1).build())
+        .withMetadataConfig(HoodieMetadataConfig.newBuilder()
+            .withMaxNumDeltaCommitsBeforeCompaction(1).build())
+        .build();
 
     SparkRDDWriteClient writeClient = new SparkRDDWriteClient(context, writeConfig);
-    Pair<String, List<HoodieRecord>> inserts = writeRecords(writeClient, true, null);
-    Pair<String, List<HoodieRecord>> inserts2 = writeRecords(writeClient, true, null);
-    Pair<String, List<HoodieRecord>> inserts3 = writeRecords(writeClient, true, null);
+    Pair<String, List<HoodieRecord>> inserts = writeRecords(writeClient, true, null, "100");
+    Pair<String, List<HoodieRecord>> inserts2 = writeRecords(writeClient, true, null, "200");
+    Pair<String, List<HoodieRecord>> inserts3 = writeRecords(writeClient, true, null, "300");
+    Pair<String, List<HoodieRecord>> inserts4 = writeRecords(writeClient, true, null, "400");
+    Pair<String, List<HoodieRecord>> inserts5 = writeRecords(writeClient, true, null, "500");
 
     // read everything upto latest
-    readAndAssert(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT, 300, inserts3.getKey());
+    readAndAssert(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT, Option.empty(), 500, inserts5.getKey());
+
+    // even if the begin timestamp is archived (100), full table scan should kick in, but should filter for records having commit time > 100
+    readAndAssert(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT, Option.of("100"), 400, inserts5.getKey());
+
+    // even if the read upto latest is set, if begin timestamp is in active timeline, only incremental should kick in.
+    readAndAssert(IncrSourceHelper.MissingCheckpointStrategy.READ_UPTO_LATEST_COMMIT, Option.of("400"), 100, inserts5.getKey());
 
     // read just the latest
-    readAndAssert(IncrSourceHelper.MissingCheckpointStrategy.READ_LATEST, 100, inserts3.getKey());
+    readAndAssert(IncrSourceHelper.MissingCheckpointStrategy.READ_LATEST, Option.empty(), 100, inserts5.getKey());
+
+    // ensure checkpoint does not move
+    readAndAssert(IncrSourceHelper.MissingCheckpointStrategy.READ_LATEST, Option.of(inserts5.getKey()), 0, inserts5.getKey());
+
+    Pair<String, List<HoodieRecord>> inserts6 = writeRecords(writeClient, true, null, "600");
+
+    // insert new batch and ensure the checkpoint moves
+    readAndAssert(IncrSourceHelper.MissingCheckpointStrategy.READ_LATEST, Option.of(inserts5.getKey()), 100, inserts6.getKey());
   }
 
-  private void readAndAssert(IncrSourceHelper.MissingCheckpointStrategy missingCheckpointStrategy, int expectedCount, String expectedCheckpoint) {
+  private void readAndAssert(IncrSourceHelper.MissingCheckpointStrategy missingCheckpointStrategy, Option<String> checkpointToPull, int expectedCount, String expectedCheckpoint) {
 
     Properties properties = new Properties();
     properties.setProperty("hoodie.deltastreamer.source.hoodieincr.path", basePath);
@@ -84,14 +108,18 @@ private void readAndAssert(IncrSourceHelper.MissingCheckpointStrategy missingChe
     HoodieIncrSource incrSource = new HoodieIncrSource(typedProperties, jsc, sparkSession, new TestSchemaProvider(HoodieTestDataGenerator.AVRO_SCHEMA));
 
     // read everything until latest
-    Pair<Option<Dataset<Row>>, String> batchCheckPoint = incrSource.fetchNextBatch(Option.empty(), 500);
+    Pair<Option<Dataset<Row>>, String> batchCheckPoint = incrSource.fetchNextBatch(checkpointToPull, 500);
     Assertions.assertNotNull(batchCheckPoint.getValue());
-    assertEquals(batchCheckPoint.getKey().get().count(), expectedCount);
+    if (expectedCount == 0) {
+      assertFalse(batchCheckPoint.getKey().isPresent());
+    } else {
+      assertEquals(batchCheckPoint.getKey().get().count(), expectedCount);
+    }
     Assertions.assertEquals(batchCheckPoint.getRight(), expectedCheckpoint);
   }
 
-  public Pair<String, List<HoodieRecord>> writeRecords(SparkRDDWriteClient writeClient, boolean insert, List<HoodieRecord> insertRecords) throws IOException {
-    String commit = writeClient.startCommit();
+  public Pair<String, List<HoodieRecord>> writeRecords(SparkRDDWriteClient writeClient, boolean insert, List<HoodieRecord> insertRecords, String commit) throws IOException {
+    writeClient.startCommitWithTime(commit);
     List<HoodieRecord> records = insert ? dataGen.generateInserts(commit, 100) : dataGen.generateUpdates(commit, insertRecords);
     JavaRDD<WriteStatus> result = writeClient.upsert(jsc.parallelize(records, 1), commit);
     List<WriteStatus> statuses = result.collect();
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestSqlSource.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestSqlSource.java
index 9c3d5584a5dd7..e4ca51842e87e 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestSqlSource.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestSqlSource.java
@@ -40,6 +40,7 @@
 import java.io.IOException;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNull;
 import static org.junit.jupiter.api.Assertions.assertThrows;
 
 /**
@@ -135,6 +136,23 @@ public void testSqlSourceRowFormat() throws IOException {
     assertEquals(10000, fetch1AsRows.getBatch().get().count());
   }
 
+  /**
+   * Runs the test scenario of reading data from the source in row format.
+   * Source has no records.
+   *
+   * @throws IOException
+   */
+  @Test
+  public void testSqlSourceCheckpoint() throws IOException {
+    props.setProperty(sqlSourceConfig, "select * from test_sql_table where 1=0");
+    sqlSource = new SqlSource(props, jsc, sparkSession, schemaProvider);
+    sourceFormatAdapter = new SourceFormatAdapter(sqlSource);
+
+    InputBatch<Dataset<Row>> fetch1AsRows =
+            sourceFormatAdapter.fetchNewDataInRowFormat(Option.empty(), Long.MAX_VALUE);
+    assertNull(fetch1AsRows.getCheckpointForNextBatch());
+  }
+
   /**
    * Runs the test scenario of reading data from the source in row format.
    * Source has more records than source limit.
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/JdbcTestUtils.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/JdbcTestUtils.java
index 377063eb045e9..79173dbdc8a0c 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/JdbcTestUtils.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/JdbcTestUtils.java
@@ -20,6 +20,7 @@
 package org.apache.hudi.utilities.testutils;
 
 import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.testutils.HoodieTestDataGenerator;
 
@@ -81,7 +82,8 @@ public static List<HoodieRecord> insert(String commitTime, int numRecords, Conne
         .stream()
         .map(r -> {
           try {
-            return ((GenericRecord) r.getData().getInsertValue(HoodieTestDataGenerator.AVRO_SCHEMA, props).get());
+            return ((GenericRecord) ((HoodieAvroRecord) r).getData()
+                .getInsertValue(HoodieTestDataGenerator.AVRO_SCHEMA, props).get());
           } catch (IOException e) {
             return null;
           }
@@ -125,7 +127,7 @@ public static List<HoodieRecord> update(String commitTime, List<HoodieRecord> in
     List<HoodieRecord> updateRecords = dataGenerator.generateUpdates(commitTime, inserts);
     updateRecords.stream().map(m -> {
       try {
-        return m.getData().getInsertValue(HoodieTestDataGenerator.AVRO_SCHEMA, props).get();
+        return ((HoodieAvroRecord) m).getData().getInsertValue(HoodieTestDataGenerator.AVRO_SCHEMA, props).get();
       } catch (IOException e) {
         return null;
       }
diff --git a/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java b/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java
index 90a3f5af38021..8464740bf2bf0 100644
--- a/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java
+++ b/hudi-utilities/src/test/java/org/apache/hudi/utilities/testutils/UtilitiesTestBase.java
@@ -20,6 +20,7 @@
 
 import org.apache.hudi.client.common.HoodieSparkEngineContext;
 import org.apache.hudi.common.config.TypedProperties;
+import org.apache.hudi.common.model.HoodieAvroRecord;
 import org.apache.hudi.common.model.HoodieRecord;
 import org.apache.hudi.common.model.HoodieTableType;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
@@ -378,7 +379,7 @@ public static TypedProperties setupSchemaOnDFSWithAbsoluteScope(String scope, St
 
     public static GenericRecord toGenericRecord(HoodieRecord hoodieRecord, Schema schema) {
       try {
-        Option<IndexedRecord> recordOpt = hoodieRecord.getData().getInsertValue(schema);
+        Option<IndexedRecord> recordOpt = ((HoodieAvroRecord) hoodieRecord).getData().getInsertValue(schema);
         return (GenericRecord) recordOpt.get();
       } catch (IOException e) {
         return null;
diff --git a/hudi-utilities/src/test/resources/delta-streamer-config/short_trip_uber_config.properties b/hudi-utilities/src/test/resources/delta-streamer-config/short_trip_uber_config.properties
index 243afc90f3742..75d74d6bc8932 100644
--- a/hudi-utilities/src/test/resources/delta-streamer-config/short_trip_uber_config.properties
+++ b/hudi-utilities/src/test/resources/delta-streamer-config/short_trip_uber_config.properties
@@ -22,4 +22,6 @@ hoodie.deltastreamer.source.kafka.topic=topic2
 hoodie.deltastreamer.keygen.timebased.timestamp.type=UNIX_TIMESTAMP
 hoodie.deltastreamer.keygen.timebased.input.dateformat=yyyy-MM-dd HH:mm:ss.S
 hoodie.datasource.hive_sync.table=short_trip_uber_hive_dummy_table
-hoodie.datasource.write.keygenerator.class=org.apache.hudi.utilities.functional.TestHoodieDeltaStreamer$TestTableLevelGenerator
\ No newline at end of file
+hoodie.datasource.write.keygenerator.class=org.apache.hudi.utilities.functional.TestHoodieDeltaStreamer$TestTableLevelGenerator
+hoodie.deltastreamer.schemaprovider.registry.baseUrl=http://localhost:8081/subjects/
+hoodie.deltastreamer.schemaprovider.registry.urlSuffix=-value/versions/latest
\ No newline at end of file
diff --git a/hudi-utilities/src/test/resources/delta-streamer-config/uber_config.properties b/hudi-utilities/src/test/resources/delta-streamer-config/uber_config.properties
index 3d3501fec73d3..f5b079265d438 100644
--- a/hudi-utilities/src/test/resources/delta-streamer-config/uber_config.properties
+++ b/hudi-utilities/src/test/resources/delta-streamer-config/uber_config.properties
@@ -22,4 +22,6 @@ hoodie.deltastreamer.source.kafka.topic=topic1
 hoodie.deltastreamer.keygen.timebased.timestamp.type=UNIX_TIMESTAMP
 hoodie.deltastreamer.keygen.timebased.input.dateformat=yyyy-MM-dd HH:mm:ss.S
 hoodie.datasource.hive_sync.database=uber_hive_db
-hoodie.datasource.hive_sync.table=uber_hive_dummy_table
\ No newline at end of file
+hoodie.datasource.hive_sync.table=uber_hive_dummy_table
+hoodie.deltastreamer.schemaprovider.registry.url=http://localhost:8081/subjects/random-value/versions/latest
+hoodie.deltastreamer.schemaprovider.registry.targetUrl=http://localhost:8081/subjects/random-value/versions/latest
\ No newline at end of file
diff --git a/hudi-utilities/src/test/resources/fixtures/testUpsertsContinuousModeWithMultipleWriters.COPY_ON_WRITE.zip b/hudi-utilities/src/test/resources/fixtures/testUpsertsContinuousModeWithMultipleWriters.COPY_ON_WRITE.zip
deleted file mode 100644
index 299b070bee34a..0000000000000
Binary files a/hudi-utilities/src/test/resources/fixtures/testUpsertsContinuousModeWithMultipleWriters.COPY_ON_WRITE.zip and /dev/null differ
diff --git a/hudi-utilities/src/test/resources/fixtures/testUpsertsContinuousModeWithMultipleWriters.MERGE_ON_READ.zip b/hudi-utilities/src/test/resources/fixtures/testUpsertsContinuousModeWithMultipleWriters.MERGE_ON_READ.zip
deleted file mode 100644
index d80439d20d3df..0000000000000
Binary files a/hudi-utilities/src/test/resources/fixtures/testUpsertsContinuousModeWithMultipleWriters.MERGE_ON_READ.zip and /dev/null differ
diff --git a/packaging/hudi-flink-bundle/pom.xml b/packaging/hudi-flink-bundle/pom.xml
index 066cefb1ec2b3..222478090b4b0 100644
--- a/packaging/hudi-flink-bundle/pom.xml
+++ b/packaging/hudi-flink-bundle/pom.xml
@@ -136,7 +136,8 @@
                   <include>org.apache.hive:hive-common</include>
                   <include>org.apache.hive:hive-service</include>
                   <include>org.apache.hive:hive-service-rpc</include>
-                  <include>org.apache.hive:hive-exec</include>
+		  <include>org.apache.hive:hive-exec</include>
+		  <include>org.apache.hive:hive-standalone-metastore</include>
                   <include>org.apache.hive:hive-metastore</include>
                   <include>org.apache.hive:hive-jdbc</include>
                   <include>org.datanucleus:datanucleus-core</include>
@@ -161,10 +162,6 @@
                   <pattern>org.apache.avro.</pattern>
                   <shadedPattern>${flink.bundle.shade.prefix}org.apache.avro.</shadedPattern>
                 </relocation>
-                <relocation>
-                  <pattern>org.apache.parquet.</pattern>
-                  <shadedPattern>${flink.bundle.shade.prefix}org.apache.parquet.</shadedPattern>
-                </relocation>
                 <relocation>
                   <pattern>com.yammer.metrics.</pattern>
                   <shadedPattern>${flink.bundle.shade.prefix}com.yammer.metrics.</shadedPattern>
@@ -173,46 +170,6 @@
                   <pattern>com.beust.jcommander.</pattern>
                   <shadedPattern>${flink.bundle.shade.prefix}com.beust.jcommander.</shadedPattern>
                 </relocation>
-                <relocation>
-                  <pattern>org.apache.hive.jdbc.</pattern>
-                  <shadedPattern>${flink.bundle.shade.prefix}org.apache.hive.jdbc.</shadedPattern>
-                </relocation>
-                <relocation>
-                  <pattern>org.apache.hadoop.hive.metastore.</pattern>
-                  <shadedPattern>${flink.bundle.shade.prefix}org.apache.hadoop.hive.metastore.</shadedPattern>
-                </relocation>
-                <relocation>
-                  <pattern>org.apache.hive.common.</pattern>
-                  <shadedPattern>${flink.bundle.shade.prefix}org.apache.hive.common.</shadedPattern>
-                </relocation>
-                <relocation>
-                  <pattern>org.apache.hadoop.hive.common.</pattern>
-                  <shadedPattern>${flink.bundle.shade.prefix}org.apache.hadoop.hive.common.</shadedPattern>
-                </relocation>
-                <relocation>
-                  <pattern>org.apache.hadoop.hive.conf.</pattern>
-                  <shadedPattern>${flink.bundle.shade.prefix}org.apache.hadoop.hive.conf.</shadedPattern>
-                </relocation>
-                <relocation>
-                  <pattern>org.apache.hive.service.</pattern>
-                  <shadedPattern>${flink.bundle.shade.prefix}org.apache.hive.service.</shadedPattern>
-                </relocation>
-                <relocation>
-                  <pattern>org.apache.hadoop.hive.service.</pattern>
-                  <shadedPattern>${flink.bundle.shade.prefix}org.apache.hadoop.hive.service.</shadedPattern>
-                </relocation>
-                <relocation>
-                  <pattern>org.apache.hadoop.hive.ql.metadata.</pattern>
-                  <shadedPattern>${flink.bundle.shade.prefix}org.apache.hadoop.hive.ql.metadata.</shadedPattern>
-                </relocation>
-                <relocation>
-                  <pattern>org.apache.hadoop.hive.ql.optimizer.</pattern>
-                  <shadedPattern>${flink.bundle.shade.prefix}org.apache.hadoop.hive.ql.optimizer.</shadedPattern>
-                </relocation>
-                <relocation>
-                  <pattern>org.apache.hadoop.hive.ql.lockmgr.</pattern>
-                  <shadedPattern>${flink.bundle.shade.prefix}org.apache.hadoop.hive.ql.lockmgr.</shadedPattern>
-                </relocation>
                 <relocation>
                   <pattern>com.codahale.metrics.</pattern>
                   <shadedPattern>${flink.bundle.shade.prefix}com.codahale.metrics.</shadedPattern>
@@ -687,6 +644,12 @@
           <version>${hive.version}</version>
           <scope>${flink.bundle.hive.scope}</scope>
         </dependency>
+        <dependency>
+          <groupId>${hive.groupid}</groupId>
+          <artifactId>hive-standalone-metastore</artifactId>
+          <version>${hive.version}</version>
+          <scope>${flink.bundle.hive.scope}</scope>
+        </dependency>
       </dependencies>
     </profile>
   </profiles>
diff --git a/packaging/hudi-hadoop-mr-bundle/pom.xml b/packaging/hudi-hadoop-mr-bundle/pom.xml
index 23399233e670a..f6215b1e017a5 100644
--- a/packaging/hudi-hadoop-mr-bundle/pom.xml
+++ b/packaging/hudi-hadoop-mr-bundle/pom.xml
@@ -67,9 +67,6 @@
                   <include>org.apache.hudi:hudi-common</include>
                   <include>org.apache.hudi:hudi-hadoop-mr</include>
 
-                  <!-- TODO(HUDI-3239) remove this -->
-                  <include>org.scala-lang:scala-library</include>
-
                   <include>org.apache.parquet:parquet-avro</include>
                   <include>org.apache.avro:avro</include>
                   <include>com.esotericsoftware:kryo-shaded</include>
@@ -155,14 +152,6 @@
       <version>${project.version}</version>
     </dependency>
 
-    <!-- Scala -->
-    <!-- TODO(HUDI-3239) remove this dep -->
-    <dependency>
-      <groupId>org.scala-lang</groupId>
-      <artifactId>scala-library</artifactId>
-      <version>${scala.version}</version>
-    </dependency>
-
     <!-- Parquet -->
     <dependency>
       <groupId>org.apache.parquet</groupId>
diff --git a/packaging/hudi-hive-sync-bundle/pom.xml b/packaging/hudi-hive-sync-bundle/pom.xml
index 9b775e76c7b48..75fce574eb3d6 100644
--- a/packaging/hudi-hive-sync-bundle/pom.xml
+++ b/packaging/hudi-hive-sync-bundle/pom.xml
@@ -69,9 +69,6 @@
                   <include>org.apache.hudi:hudi-sync-common</include>
                   <include>org.apache.hudi:hudi-hive-sync</include>
 
-                  <!-- TODO(HUDI-3239) remove this -->
-                  <include>org.scala-lang:scala-library</include>
-
                   <include>com.beust:jcommander</include>
                   <include>org.apache.avro:avro</include>
                   <include>org.apache.parquet:parquet-avro</include>
@@ -134,14 +131,6 @@
       <version>${project.version}</version>
     </dependency>
 
-    <!-- Scala -->
-    <!-- TODO(HUDI-3239) remove this dep -->
-    <dependency>
-      <groupId>org.scala-lang</groupId>
-      <artifactId>scala-library</artifactId>
-      <version>${scala.version}</version>
-    </dependency>
-
     <!-- Parquet -->
     <dependency>
       <groupId>org.apache.parquet</groupId>
diff --git a/packaging/hudi-presto-bundle/pom.xml b/packaging/hudi-presto-bundle/pom.xml
index f085c30b48d57..90c1087dcb4d2 100644
--- a/packaging/hudi-presto-bundle/pom.xml
+++ b/packaging/hudi-presto-bundle/pom.xml
@@ -67,9 +67,6 @@
                   <include>org.apache.hudi:hudi-common</include>
                   <include>org.apache.hudi:hudi-hadoop-mr</include>
 
-                  <!-- TODO(HUDI-3239) remove this -->
-                  <include>org.scala-lang:scala-library</include>
-
                   <include>org.apache.parquet:parquet-avro</include>
                   <include>org.apache.avro:avro</include>
                   <include>org.codehaus.jackson:*</include>
@@ -190,14 +187,6 @@
       </exclusions>
     </dependency>
 
-    <!-- Scala -->
-    <!-- TODO(HUDI-3239) remove this dep -->
-    <dependency>
-      <groupId>org.scala-lang</groupId>
-      <artifactId>scala-library</artifactId>
-      <version>${scala.version}</version>
-    </dependency>
-
     <!-- Parquet -->
     <dependency>
       <groupId>org.apache.parquet</groupId>
diff --git a/packaging/hudi-trino-bundle/pom.xml b/packaging/hudi-trino-bundle/pom.xml
index a7f41ecaf177a..adf73f1bb0b83 100644
--- a/packaging/hudi-trino-bundle/pom.xml
+++ b/packaging/hudi-trino-bundle/pom.xml
@@ -68,9 +68,6 @@
                   <include>org.apache.hudi:hudi-common</include>
                   <include>org.apache.hudi:hudi-hadoop-mr</include>
 
-                  <!-- TODO(HUDI-3239) remove this -->
-                  <include>org.scala-lang:scala-library</include>
-
                   <include>org.apache.parquet:parquet-avro</include>
                   <include>org.apache.avro:avro</include>
                   <include>org.codehaus.jackson:*</include>
@@ -189,14 +186,6 @@
       </exclusions>
     </dependency>
 
-    <!-- Scala -->
-    <!-- TODO(HUDI-3239) remove this dep -->
-    <dependency>
-      <groupId>org.scala-lang</groupId>
-      <artifactId>scala-library</artifactId>
-      <version>${scala.version}</version>
-    </dependency>
-
     <!-- HBase -->
     <dependency>
       <groupId>org.apache.hbase</groupId>
diff --git a/pom.xml b/pom.xml
index 2778885e8312e..1b28ae1bb9a49 100644
--- a/pom.xml
+++ b/pom.xml
@@ -117,9 +117,9 @@
     <http.version>4.4.1</http.version>
     <spark.version>${spark2.version}</spark.version>
     <sparkbundle.version></sparkbundle.version>
-    <flink.version>1.13.1</flink.version>
+    <flink.version>1.14.3</flink.version>
     <spark2.version>2.4.4</spark2.version>
-    <spark3.version>3.2.0</spark3.version>
+    <spark3.version>3.2.1</spark3.version>
     <hudi.spark.module>hudi-spark2</hudi.spark.module>
     <hudi.spark.common.module>hudi-spark2-common</hudi.spark.common.module>
     <avro.version>1.8.2</avro.version>
@@ -164,7 +164,7 @@
     <antlr.version>4.7</antlr.version>
     <aws.sdk.version>1.12.22</aws.sdk.version>
     <proto.version>3.17.3</proto.version>
-    <protoc.version>3.1.0</protoc.version>
+    <protoc.version>3.11.4</protoc.version>
     <dynamodb.lockclient.version>1.1.0</dynamodb.lockclient.version>
     <dynamodb-local.port>8000</dynamodb-local.port>
     <dynamodb-local.endpoint>http://localhost:${dynamodb-local.port}</dynamodb-local.endpoint>
@@ -349,6 +349,7 @@
           <configuration>
             <rerunFailingTestsCount>3</rerunFailingTestsCount>
             <argLine>@{argLine}</argLine>
+            <trimStackTrace>false</trimStackTrace>
             <systemPropertyVariables>
               <log4j.configuration>
                 ${surefire-log4j.file}
@@ -1119,6 +1120,12 @@
         <artifactId>awaitility</artifactId>
         <version>${awaitility.version}</version>
         <scope>test</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>org.objenesis</groupId>
+            <artifactId>objenesis</artifactId>
+          </exclusion>
+        </exclusions>
       </dependency>
 
       <dependency>
@@ -1586,7 +1593,9 @@
         <hudi.spark.common.module>hudi-spark3-common</hudi.spark.common.module>
         <scalatest.version>3.1.0</scalatest.version>
         <kafka.version>2.4.1</kafka.version>
-        <parquet.version>1.12.1</parquet.version>
+        <parquet.version>1.12.2</parquet.version>
+        <avro.version>1.10.2</avro.version>
+        <orc.version>1.6.12</orc.version>
         <fasterxml.version>${fasterxml.spark3.version}</fasterxml.version>
         <fasterxml.jackson.databind.version>${fasterxml.spark3.version}</fasterxml.jackson.databind.version>
         <fasterxml.jackson.module.scala.version>${fasterxml.spark3.version}</fasterxml.jackson.module.scala.version>
diff --git a/rfc/README.md b/rfc/README.md
index 63b81a884fc2c..a9587d1d79cf3 100644
--- a/rfc/README.md
+++ b/rfc/README.md
@@ -60,12 +60,13 @@ The list of all RFCs can be found here.
 | 34 | [Hudi BigQuery Integration (WIP)](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=188745980) | `UNDER REVIEW` |
 | 35 | [Make Flink MOR table writing streaming friendly](https://cwiki.apache.org/confluence/display/HUDI/RFC-35%3A+Make+Flink+MOR+table+writing+streaming+friendly) | `UNDER REVIEW` |
 | 36 | [HUDI Metastore Server](https://cwiki.apache.org/confluence/display/HUDI/%5BWIP%5D+RFC-36%3A+HUDI+Metastore+Server) | `UNDER REVIEW` |
-| 37 | [Hudi metadata based bloom index] | `UNDER REVIEW` |
-| 38 | [Spark Datasource V2 Integration] | `UNDER REVIEW` |
+| 37 | [Hudi Metadata based Bloom Index](rfc-37/rfc-37.md) | `IN PROGRESS` |
+| 38 | [Spark Datasource V2 Integration](./rfc-38/rfc-38.md) | `IN PROGRESS` |
 | 39 | [Incremental source for Debezium](./rfc-39/rfc-39.md) | `IN PROGRESS` |
-| 40 | [Hudi Connector for Trino] | `UNDER REVIEW` |
+| 40 | [Hudi Connector for Trino](./rfc-40/rfc-40.md) | `IN PROGRESS` |
 | 41 | [Hudi Snowflake Integration] | `UNDER REVIEW` |
 | 42 | [Consistent Hashing Index](./rfc-42/rfc-42.md) | `UNDER REVIEW` |
 | 43 | [Compaction / Clustering Service](./rfc-43/rfc-43.md) | `UNDER REVIEW` |
 | 44 | [Hudi Connector for Presto](./rfc-44/rfc-44.md) | `UNDER REVIEW` |
 | 45 | [Asynchronous Metadata Indexing](./rfc-45/rfc-45.md) | `UNDER REVIEW` |
+| 46 | [Optimizing Record Payload Handling](./rfc-46/rfc-46.md) | `UNDER REVIEW` |
diff --git a/rfc/rfc-27/col_stats.png b/rfc/rfc-27/col_stats.png
new file mode 100644
index 0000000000000..76aa6ab44e7c2
Binary files /dev/null and b/rfc/rfc-27/col_stats.png differ
diff --git a/rfc/rfc-27/rfc-27.md b/rfc/rfc-27/rfc-27.md
new file mode 100644
index 0000000000000..3b00af7c140a0
--- /dev/null
+++ b/rfc/rfc-27/rfc-27.md
@@ -0,0 +1,444 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+# RFC-[27]: [Data skipping Index to improve query performance]
+
+## Proposers
+
+- @manojpec
+- @shivnarayan
+- @satish.kotha
+
+## Approvers
+- @rmpifer
+- @uditme
+
+## Status
+
+JIRA: https://issues.apache.org/jira/browse/HUDI-1822
+
+> Please keep the status updated in `rfc/README.md`.
+
+## Abstract
+
+Query engines typically scan large amounts of irrelevant data for query planning and execution. Some workarounds are 
+available to reduce amount of irrelevant data scanned. These include
+- Partition pruning
+- File pruning <br>
+    - Some data file formats contain metadata including range information for certain columns (for parquet, this metadata 
+    is stored in footer).
+    - As part of query planning, all range information from data files is read.
+    - Irrelevant data files are then pruned based on predicates and available range information
+    
+Partition pruning typically puts the burden on users to select partitions where the data may exist. File pruning approach 
+  is expensive and does not scale if there are large number of partitions and data files to be scanned. So we propose a 
+  new solution to store additional information as part of Hudi metadata table to implement data skipping index. The 
+  goals of data skipping index is to provide:
+
+- Global index: Users query for information they need without need for specifying partitions. Index can effectively find 
+  data files in the table.
+- Improve query plan: Efficiently find data files that have information for specified query predicates.
+- Support multiple types of index: Initial implementation may provide range index. But goal is provide flexible 
+  framework to implement other types of index (e.g. bloom)
+
+## Background
+RFC-15 added metadata table support to Hudi for optimized file listing. RFC-37 is adding metadata index and column stats 
+as another partition to metadata table. This RFC will piggyback on the column stats partition that RFC-37 will be adding 
+to metadata table. 
+
+Notes: Effectiveness of the index will be proportional to how data is layed out. If every file contains data for 
+commonly specified query predicate, index may not be very effective.
+
+## Implementation
+At a high level there are 3 components to implement index support: 
+- Storage format
+- Metadata generation
+- Query engine integration.
+
+### Column_Stats Index/Partition
+We want to support multiple types of index (range, bloom etc). So it is important to generate different types of record
+for different columns. Focus of this RFC will be column range or column stats index. i.e min, max values, null counts etc.
+Users can configure the commonly queried columns and columns stats partition in metadata table will store all stats pertaining
+to the configured columns for every valid data file where the column is present.
+
+Similar to how we generate records for files partition in metadata table, we will generate HoodieMetadataRecord
+for column stats partition on any commit that gets applied to metadata table. Basic building block of metadata table used 
+for file listing will be used for this column stats partition as well (how updates are applied to metadata table,
+how invalid data is ignored, etc)
+
+Column_stats partition stores statistics for all indexed columns in the Hudi data table. The index maintained in this 
+partition helps
+Predicate pushing/data skipping - file filtering based on column predicates
+
+For the purpose of column predicate filtering, this partition can store statistics for any column as per configs.
+
+So, high level requirement for this column_stats partition is (pertaining to this RFC):
+ - Given a list of columns and predicates(and optionally partitions), return a list of matching file names
+
+### Storage format
+To cater to the above requirement, we plan to encode column name, partition path and file name to the keys in HFile. 
+Since HFile supports efficient range/prefix search, our look up should be very fast.
+
+![Column Stats Partition](col_stats.png)
+
+We plan to generate unique and random and unique hash IDs for all 3 components
+- ColumnID :
+    - base64(hash32(column name))
+    - on-disk size = 12bytes per col_stat per file 
+- PartitionID:
+    - base64(hash32(partition name))
+    - on-disk size = 12bytes per partition 
+- FileID:
+    - base64(hash128(file name))
+    - on-disk size = 24bytes per file
+
+#### Design Choices for ID generation
+1. Incremental IDs: Sequentially increasing IDs can be generated in the context of the ongoing commit/write. ID can always start at 1 and to make the full ID unique enough, sequential IDs can be appended with the ongoing commit time.
+ a. Pros:
+    ID is simple to generate, doesn't depend on key lookups for resuming the ID generation across writers.
+    Overall ID can be shorter than Hash based IDs and can still be unique
+    Differential/delta encoding goes good with sequential numbers and can get high compression ratio (though we didn't see this in the tests)
+ b. Cons:
+    Same column can be given several IDs across several commits spilled over several files. Complex merging logic is needed to coalesce them all when looking up for any interested columns.
+    Doesn't go good with schema evolution. Even without schema evolution, changing IDs for the same column by itself is small schema evolution problem.
+   
+2. Hash IDs: Hashing utilities can be used to generate unique and random IDs of any length for the given column/partition/file name.
+ a. Pros:
+    Deterministic Name to ID generation
+    Reverse lookup of ID to name is possible by relatively much smaller meta index read
+    ID length can be controlled for the scaling needs
+    Sharding and locality can be controlled by prefixing with more bits (doable by Incremental IDs also)
+ b. Cons:
+    Big scale deployments demand a huge ID space for files there by needing to generate 128 bits hashes
+    These are usually 32 digit hex chars, taking up at least 32 bytes/ID on disk. However, base64 encoding can help to shave off few bytes and get them to 24 bytes.
+    Takes up larger space in-memory and on-disk compared to Sequential IDs. Theoretically, the compression ratio should be lesser compared to Sequential IDs.
+   
+Key format in column_stats partition<br/> 
+- [colId][PartitionId][FileId]  
+- [colId]+"agg"+[PartitionId]
+
+First type will be used to store one entry per column per file. And second type will be used to store one aggregated 
+entry per column per partition. This will be a fixed size key. Lookups don't have to search for ID delimiters as in the 
+case of incremental IDs.
+
+These key encodings fit in well to serve our requirements.
+Since we are using Hfile as the format, all keys are going to be sorted and hence range read will be very effective for
+our use-case as we have chosen the key format consciously having this in mind.  
+
+Given a list of columns and optionally partitions, return a list of matching file names.
+
+1. We can do prefix search of [ColumnID] or [ColumnID][PartitionID]
+    - If both columnId and partitionIds are supplied, we will do range read of [colId][partitionId].
+    - If list of partitions not available as part of query, we will first look up [colId]+"agg" to do prefix search 
+   for partition level stats. Filter for those partitions which matches the predicates and then follow (1) as in previous line.
+    
+2. Fetch only interested entries for [colId][partitionId] list.
+3. Will look up the stats and filter for matching FileIDs
+4. Reverse lookup in Files partition to get FileID to FileName mapping.
+   
+Note: 
+As you could see here, reverse look up of FileId to fileName mapping has to go into "Files" partition to satisfy our requirement.
+So, "Files" partition will be added with additional entries of fileId to fileName mappings on the write path. 
+
+#### Sharding: 
+Any partition in metadata table needs to be instantiated with N file groups/shards upfront. "Files" partition is small and hence
+we went with just one file group. But for record level index, we can't go with single file group and had to shard the data. 
+We will employ some kind of hashing mechanism for key to file group mapping. On the write path, entries will be sharded 
+and written to different file groups. On the read path, key to be looked up will be hashed to find the right file group
+to be looked up. For wild card search, all file groups will be looked up. 
+
+// To be revisited.<br/>
+We plan to instantiate the number of file groups in column stats partition based on number of columns being indexed. 
+We can't estimate the data scale upfront, to which the table might grow eventually and hence have to go with some estimates.
+So a rough idea is to instantiate one file group for 10 columns being indexed. Or get some rough input from the user whether 
+the table will be a small/medium/large scale and determine based on that. 
+
+Similar to how we generate records for files partition in metadata table, we will generate HoodieMetadataRecord 
+for column stats partition on any commit that gets applied to metadata table.  
+
+### Metadata generation 
+The existing metadata payload schema will be extended and shared for this new "column_stats" partition also. The type 
+field will be used to detect the column stats payload record. Here is the schema for the column stats payload record.
+
+```
+    "namespace": "org.apache.hudi.avro.model",
+    "type": "record",
+    "name": "HoodieMetadataRecord",
+    "doc": "A record saved within the Metadata Table",
+    "fields": [
+        {
+            "name": "key",
+            "type": "string"
+        },
+        {
+            "name": "type",
+            "doc": "Type of the metadata record",
+            "type": "int"
+        },
+        {   "name": "filesystemMetadata",
+           .
+           .
+           .
+        }, 
+        {
+            "name": "ColumnStatsMetadata",
+            "doc": "Contains information about column statistics for all data files in the table",
+            "type": [
+                "null",
+                {
+                    "type": "record",
+                    "name": "HoodieColumnStats",
+                    "fields": [
+                        {
+                            "name": "rangeLow",
+                            "type": [
+                                "null",
+                                "bytes"
+                            ],
+                            "doc": "Low end of the range. For now, this is a String. Based on main data table schema, we can convert it to appropriate type"
+                        },
+                        {
+                            "name": "rangeHigh",
+                            "type": [
+                                "null",
+                                "bytes"
+                            ],
+                            "doc": "High end of the range. For now, this is a String. Based on main data table schema, we can convert it to appropriate type"
+                        },
+                        {
+                            "name":"total_values",
+                            "type":["long", "null"],
+                            "doc" : "Stores total values for this column in the resepective data file"
+                         },
+                        {
+                            "name":"total_nulls",
+                            "type":["long", "null"],
+                            "doc" : "Stores total null values for this column in the resepective data file"
+                         },
+                        {
+                            "name":"total_nans",
+                            "type":["long", "null"],
+                            "doc" : "Stores total Nan values for this column in the resepective data file"
+                         },
+                        {
+                            "name":"total_size_on_disk",
+                            "type":["long", "null"],
+                            "doc" : "Stores total size occupied by this column on disk corresponding to the resepective data file"
+                         },                                                                           
+                        {
+                            "name": "isDeleted",
+                            "type": "boolean",
+                            "doc": "True if this file has been deleted"
+                        }
+                    ]
+                }
+            ]
+        }
+```
+
+Column stats records hold all stats for the file. The key for the column stat record would be an
+encoded string as discussed earlier. 
+
+```
+key = base64_encode(hash64(column name) + hash64(partition name) + hash128(file path))
+key = base64_encode(hash64(column name) + "agg" + hash64(partition name))
+```
+
+While Hash based IDs have quite a few desirable properties in the context of Hudi index lookups, there is an impact
+on the column level schema changes though. Refer to [Schema Evolution](#Schema-Evolution) section for more details.
+
+#### Writer flow
+Let's walk through the writer flow to update column_stats partition in metadata table.
+
+1. Files partition - prepare records for adding // just calling out whats required in the context of column_stats 
+   partition. General files partition will be updated as usual to store file listing information.
+    - FileID => FileName mapping entries
+    - PartitionID => PartitionName entry, if not already exists
+    - Since these IDs are hash based IDs, no look up of prior usages is required here. If not, we need to know what was
+   the last assigned ID and then go about assigning new incremental/sequential IDs, which slows down the performance significantly
+2. Column_stats partition - prepare records for adding
+    - [ColumnID][PartitionID][FileID] => ColumnStat
+    - [ColumnId]"agg"[PartitionId] => ColumnStat
+    - This involves reading the base file footers to fetch min max and other stats to populate values for the record.
+d. Commit all these records to metadata table.
+
+We need to ensure we have all sufficient info in WriteStatus/Commit Metadata that gets passed to metadata writer for 
+every commit. Reading parquet footers and meta is unavoidable, but other than that, we should try to embed all other info 
+in the WriteStatus.
+
+### Index integrations with query engines
+
+#### Spark
+We already added support for z-ordering with 0.10.0. So, we will re-use data skipping code paths from there. 
+
+Here is the high level flow of z-ordering:
+##### Write path
+1. Sort the data (Z-order/Hilbert/Linear)
+    - Being triggered by Clustering (right now)
+    - RDDSpatialCurveOptimizationSortPartitioner
+2. Build "Col Stats" Index (.hoodie/.colstatsindex)
+    - Upon Clustering completion we invoke ColumnStatsIndexHelper.updateColumnStatsIndexFor
+
+##### Read path
+1. (Spark SQL) Asks for a list of files to fetch data from
+    - HoodieFileIndex.listFiles
+2. HoodieFileIndex will read Col Stats Index and apply the data predicates to fetch list of candidate files from it
+3. Returns it back to Spark
+
+Given this, lets see how we can integrate the new column_stats partition.
+
+##### Z-order Write path
+1. Sort the data (Z-order/Hilbert/Linear)
+    - Being triggered by Clustering (right now)
+    - RDDSpatialCurveOptimizationSortPartitioner
+2. Do not do anything. 
+    - Upon Clustering completion, replace commit will get applied to metadata table by default if metadata is enabled. 
+
+##### Read path
+1. (Spark SQL) Asks for a list of files to fetch data from
+    - HoodieFileIndex.listFiles
+2. HoodieFileIndex will read Col Stats partition in metadata table and apply the data predicates to fetch list of candidate files from it
+3. Returns it back to Spark
+
+One caveat:
+But we can't get rid of z-order index completely though right away. If metadata table is not build out yet or has entered 
+an inconsistent state and is not usable, we have to go the existing way of building an index at the end of z-order clustering.
+
+### Predicate filtering 
+
+#### How to apply query predicates in Hudi?
+Query predicates are normally constructed in a tree like structure so this will follow same pattern. The proposal is 
+create a mapping utility from “Engine” query predicates to a HudiExpression. This way filtering logic is engine agnostic
+
+For AND and OR operators we can translate to a tree node with left and right expressions. An example is shown below of what the structure would look
+
+```java
+public class HudiExpressionParentNode implements HudiExpression {
+   HudiExpression left;
+   HudiExpression right;
+    
+   @override
+   boolean evaluate() {
+        left.evaluate() && right.evaluate()
+   }
+}
+```
+
+For LEAF nodes we can create expression which contains the operator and value we are comparing to determine whether the 
+file group may have data relevant to this query. The common search expressions for the leaf nodes:
+
+1. Equal to - if value in search expression greater than or equal to lower bound and is less than or equal to upper bound 
+   in file’s column statistics then true, else false
+2. Less than - if value in search expression is greater than lower bound in file’s column statistics then true, else false
+3. Less than or equal to - if value in search expression is greater than or equal to lower bound in file’s column statistics 
+   then true, else false
+4. Greater than - if value in search expression is lower than upper bound in file’s column statistics then true, else false
+5. Greater than or equal to - if value in search expression is lower than or equal to upper bound in file’s column statistics
+   then true, else false
+
+True tells us that there is a possibility that the file contains data which matches the search expression and to include 
+in result set. False tells us that there is no possibility this file contains any data which matches the search 
+expression and to exclude from the results.
+
+```java
+public class HudiExpressionLeafNode implements HudiExpression {
+    
+   Operator op; // (EQ, LT, LTEQ, GT, GTEQ)
+   T literal; // (INT, DOUBLE, FLOAT value)
+   String column;
+    
+   @override
+   boolean evaluate()
+}
+```
+
+This way we can call evaluate on the root HudiExpression tree and it will determine whether the entire expression is 
+satisfied for the file group.
+
+#### Hive
+In order for us to implement predicate push down in Hive we need to have access to the query predicate. Query predicate 
+is not passed to InputFormat by default. HiveStoragePredicateHandler interface needs to be implemented in order to 
+provide query predicate to InputFormat and for this we need to create a custom HiveStorageHandler. Therefore we will 
+be creating new storage handler HudiStorageHandler.
+
+```java
+public interface HiveStorageHandler extends Configurable {
+  public Class<? extends InputFormat> getInputFormatClass();
+  public Class<? extends OutputFormat> getOutputFormatClass();
+  public Class<? extends SerDe> getSerDeClass();
+  public HiveMetaHook getMetaHook();
+  public void configureTableJobProperties(
+    TableDesc tableDesc,
+    Map<String, String> jobProperties);
+}
+```
+
+Everything will remain same with input format, output format, and serde classes being used in existing Hudi tables 
+registered in Hive (HoodieParquetInputFormat still being used).  HudiStorageHandler would implement HiveStorageHandler 
+and HiveStoragePredicateHandler.
+
+Hive adds the query predicate returned by the Storage Handler to the job configuration. This job configuration is then 
+supplied to the Input Format. It can be fetched and deserialized using the following:
+
+```java
+    String hiveFilter = jobConf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
+    if (hiveFilter != null) {
+      ExprNodeGenericFuncDesc exprNodeDesc = SerializationUtilities
+              .deserializeObject(hiveFilter, ExprNodeGenericFuncDesc.class);
+      SearchArgument sarg = ConvertAstToSearchArg.create(job, exprNodeDesc);
+```
+
+The SearchArgument contains an ExpressionTree and a list of PredicateLeaf. The ExpressionTree is a tree structure used 
+to define the query predicate. If operator is defined as OR, AND, or NOT this indicates there are children expressions, 
+normally LEAFs.
+
+```java
+public class ExpressionTree {
+  public enum Operator {OR, AND, NOT, LEAF, CONSTANT}
+  private final Operator operator;
+  private final List<ExpressionTree> children;
+  private int leaf;
+```
+
+If operator in ExpressionTree is defined as LEAF it corresponds to a PredicateLeaf defined in the Search Argument. 
+PredicateLeaf will contain information about the query predicate such as operator, column name, and literal which is 
+being compared
+
+```java
+   private final org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf.Operator operator;
+        private final Type type;
+        private String columnName;
+        private final Object literal;
+        private final List<Object> literalList;
+
+```
+
+We can use this information and the SearchArgument to generate our HudiExpression. Then in HoodieParquetInputFormat.listStatus() 
+after fetching files from FileSystemView for the remaining file groups we can apply HudieExpression using column metadata.
+
+#### Presto
+To be filled. 
+
+## Rollout/Adoption Plan
+
+- What impact (if any) will there be on existing users?
+- If we are changing behavior how will we phase out the older behavior?
+- If we need special migration tools, describe them here.
+- When will we remove the existing behavior
+
+## Test Plan
+
+Describe in few sentences how the RFC will be tested. How will we know that the implementation works as expected? How will we know nothing broke?.
\ No newline at end of file
diff --git a/rfc/rfc-37/metadata_index_1.png b/rfc/rfc-37/metadata_index_1.png
new file mode 100644
index 0000000000000..40b834f40f9b8
Binary files /dev/null and b/rfc/rfc-37/metadata_index_1.png differ
diff --git a/rfc/rfc-37/metadata_index_bloom_partition.png b/rfc/rfc-37/metadata_index_bloom_partition.png
new file mode 100644
index 0000000000000..8ada4b7f2c18f
Binary files /dev/null and b/rfc/rfc-37/metadata_index_bloom_partition.png differ
diff --git a/rfc/rfc-37/metadata_index_col_stats.png b/rfc/rfc-37/metadata_index_col_stats.png
new file mode 100644
index 0000000000000..02a77fe0dd6d2
Binary files /dev/null and b/rfc/rfc-37/metadata_index_col_stats.png differ
diff --git a/rfc/rfc-37/rfc-37.md b/rfc/rfc-37/rfc-37.md
new file mode 100644
index 0000000000000..28d27b399482e
--- /dev/null
+++ b/rfc/rfc-37/rfc-37.md
@@ -0,0 +1,329 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+# RFC-37: Metadata based Bloom Index
+
+## Proposers
+- @nsivabalan
+- @manojpec
+
+## Approvers
+ - @vinothchandar
+ - @satishkotha
+
+## Status
+JIRA: https://issues.apache.org/jira/browse/HUDI-2703
+
+## Abstract
+Hudi maintains several indices to locate/map incoming records to file groups during writes. Most commonly used record
+index is the HoodieBloomIndex. Larger tables and global index has performance issues as the bloom filter from a large
+number of data files needed to be read and looked up. Reading from several files over the cloud object storage like S3
+also faces request throttling issues. We are proposing to build a new Metadata index (metadata table based bloom index)
+to boost the performance of existing bloom index.
+
+## Background
+
+HoodieBloomIndex is used to find the location of incoming records during every write. Bloom index assists Hudi in
+deterministically routing records to a given file group and to distinguish inserts vs updates. This aggregate bloom
+index is built from several bloom filters stored in the base file footers. Prior to bloom filter lookup, the file
+pruning for the incoming records is also done based on the record key min/max stats stored in the base file footers. In
+this RFC, we plan to build a new index for the bloom filters under the metadata table which to assist in bloom index
+based record location tagging. This overlaps
+with [RFC-27 Data skipping index ](https://cwiki.apache.org/confluence/display/HUDI/RFC-27+Data+skipping+index+to+improve+query+performance)
+in the read path for improving the query performance.
+
+## Design
+HoodieBloomIndex involves the following steps to find the right location of incoming records
+
+1. Find all the interested partitions and list all its data files.
+2. File Pruning: Load record key min/max details from all the interested data file footers. Filter files and generate
+   files to keys mapping for the incoming records based on the key ranges using range interval tree built from
+   previously loaded min/max details.
+3. Bloom Filter lookup: Filter files and prune files to keys mapping for the incoming keys mapping based on the bloom
+   filter key lookup
+4. Final Look up in actual data files to find the right location of every incoming record
+
+As we could see from step 1 and 2, we are in need of min and max values for "_hoodie_record_key" and bloom filters
+from all interested data files to perform the location tagging. In this design, we will add these key stats and
+bloom filter to the metadata table and thereby able to quickly load the interested details and do faster lookups.
+
+Metadata table already has one partition `files` to help in partition file listing. For the metadata table based
+indices, we are proposing to add following two new partitions:
+1. `bloom_filter` - for the file level bloom filter
+2. `column_stats` - for the key range stats
+
+Why metadata table:
+Metadata table uses HBase HFile - the tree map file format to store and retrieve data. HFile is an indexed file format
+and supports map like faster lookups by keys. Since, we will be storing stats/bloom for every file and the index will do
+lookups based on files, we should be able to benefit from the faster lookups in HFile.
+
+<img src="metadata_index_1.png" alt="High Level Metadata Index Design" width="480"/>
+
+Following sections will talk about different partitions, key formats and then dive into the data and control flows.
+
+### MetaIndex/BloomFilter:
+
+A new partition `bloom_filter` will be added under the metadata table. Bloom filters from all the base files in the
+data table will be added here. Metadata table is already in the HFile format. The existing metadata payload schema will
+be extended and shared for this partition also. The type field will be used to detect the bloom filter payload record.
+Here is the schema for the bloom filter payload record.
+```
+    {
+        "doc": "Metadata Index of bloom filters for all data files in the user table",
+        "name": "BloomFilterMetadata",
+        "type": [
+            "null",
+            {
+                "doc": "Data file bloom filter details",
+                "name": "HoodieMetadataBloomFilter",
+                "type": "record",
+                "fields": [
+                    {
+                        "doc": "Bloom filter type code",
+                        "name": "type",
+                        "type": "string"
+                    },
+                    {
+                        "doc": "Instant timestamp when this metadata was created/updated",
+                        "name": "timestamp",
+                        "type": "string"
+                    },
+                    {
+                        "doc": "Bloom filter binary byte array",
+                        "name": "bloomFilter",
+                        "type": "bytes"
+                    },
+                    {
+                        "doc": "Bloom filter entry valid/deleted flag",
+                        "name": "isDeleted",
+                        "type": "boolean"
+                    }
+                ]
+            }
+        ]
+    }
+```
+
+The key for the bloom filter record would be an encoded string representing the partition and base file combo. The
+partition and the file names are converted to deterministic hash based IDs, and then they are base64 encoded. Hash based
+IDs are easy to generate for the incoming new inserts records and for the lookup for the updated records. It doesn't
+need any dictionary to be added for the reverse lookups. Hash bits are chosen based on the cardinality and the collision
+probability desired for the support max scale deployment. Base64 encoding the hash IDs further reduces the on-disk
+storage space for these keys.
+
+```
+key = base64_encode(concat(hash64(partition name), hash128(file name)))
+```
+
+<img src="metadata_index_bloom_partition.png" alt="Bloom filter partition" width="500"/>
+
+### MetaIndex/ColumnStats:
+
+Another new partition `column_stats` will also be added under the metadata table to make the record key lookup code path
+much more performant. This metadata index also helps in the data skipping (please look at RFC-27 for more details). In
+the context of faster record key lookups for the update use cases, proposing `column_stats` index to be used for
+file pruning when generating the file to candidate keys mapping for the update records.The existing metadata payload
+schema will be extended and shared for this partition also. The type field will be used to detect the column stats
+payload record. Here is the schema for the column stats payload record.
+
+```
+    {
+        "doc": "Metadata Index of column statistics for all data files in the user table",
+        "name": "ColumnStatsMetadata",
+        "type": [
+            "null",
+            {
+                "doc": "Data file column statistics",
+                "name": "HoodieColumnStats",
+                "type": "record",
+                "fields": [
+                    {
+                        "doc": "File name for which this column statistics applies",
+                        "name": "fileName",
+                        "type": [
+                            "null",
+                            "string"
+                        ]
+                    },
+                    {
+                        "doc": "Minimum value in the range. Based on user data table schema, we can convert this to appropriate type",
+                        "name": "minValue",
+                        "type": [
+                            "null",
+                            "string"
+                        ]
+                    },
+                    {
+                        "doc": "Maximum value in the range. Based on user data table schema, we can convert it to appropriate type",
+                        "name": "maxValue",
+                        "type": [
+                            "null",
+                            "string"
+                        ]
+                    },
+                    {
+                        "doc": "Total count of values",
+                        "name": "valueCount",
+                        "type": [
+                            "null",
+                            "long"
+                        ]
+                    },
+                    {
+                        "doc": "Total count of null values",
+                        "name": "nullCount",
+                        "type": [
+                            "null",
+                            "long"
+                        ]
+                    },
+                    {
+                        "doc": "Total storage size on disk",
+                        "name": "totalSize",
+                        "type": [
+                            "null",
+                            "long"
+                        ]
+                    },
+                    {
+                        "doc": "Total uncompressed storage size on disk",
+                        "name": "totalUncompressedSize",
+                        "type": [
+                            "null",
+                            "long"
+                        ]
+                    },
+                    {
+                        "doc": "Column range entry valid/deleted flag",
+                        "name": "isDeleted",
+                        "type": "boolean"
+                    }
+                ]
+            }
+        ]
+    }
+```
+
+Column stats records hold key ranges (min and max) for the file. The key for the column stat record would be an
+encoded string representing the tuple set of column name, partition name and the base file. The string names of
+these fields are converted to deterministic hash based IDs, and then they are base64 encoded, just like the
+bloom filter key.
+
+```
+key = base64_encode(concat(hash64(column name), hash64(partition name), hash128(file name)))
+```
+
+While Hash based IDs have quite a few desirable properties in the context of Hudi index lookups, there is an impact
+on the column level schema changes though. Refer to [Schema Evolution](#Schema-Evolution) section for more details.
+
+Below picture gives a pictorial representation of Column stats partition in metadata table.
+<img src="metadata_index_col_stats.png" alt="Column Stats Partition" width="480"/>
+
+### Metadata Index lookup:
+
+For the incoming upsert records, given their keys, tag their current location. The new algorithm for the
+index lookup would be
+
+1. Generate the list of partitions and the list of keys under each partition to be looked up
+2. For all the involved partitions, load all its file list
+3. Level 1: Range pruning using `column_stats` index:
+   1. For each of the record key, generate the column stats index lookup key based on the tuple
+      (__hoodie_record_key, partition name, file path)
+   2. Meta index lookup with the above key and if available get the value payload with the column stats details
+   3. Prune the partition and its candidate files based on the range comparisons
+4. Level 2: Record pruning using `bloom_filter`  index:
+   1. From the shortlisted file candidates per partition, generate bloom filter index lookup key based on the tuple
+      (partition name, file path)
+   2. Meta index lookup with the above key to load the base file bloom filter
+   3. Bloom filter lookup for the record key to generate the candidate keys that are probably available in the base file
+5. Level 3: Record validation
+   1. Given the list of files and their candidate keys from above pruning, do the actual file lookup to confirm the keys
+   2. Return the location (file id) of the final matching keys
+
+### Schema Evolution:
+
+HashID based key are deterministically generated from the tuple input. That is, for the tuple consisting of column name,
+partition name and file name, the key generated would always be the same. So, a table where the schema gets changed over
+time would have an impact on the keys already generated. The most common schema evolution use cases like change of
+column type, adding a new column are not affected though. Other relatively uncommon use cases like column name rename,
+dropping a column and adding a column with dropped name would have indices referring them more than needed. This would
+lead to the index lookup matching stale/new records across evolved schemas.
+
+To avoid looking up stale/new index records, here are the design options we have:
+1. (Preferred) Query rewrite / Result recordset pruning
+   1. Schema evolution layer should introduce query rewrite stage to detect evolved schemas for the input query and
+      optionally include additional predicates to the query
+   2. The resultant recordset can also be pruned based on the commit time and the schema change time
+3. Making input tuple set schema aware 
+   1. Along with column name, partition name and file path, a version/tag can also be added to make the key
+      generated very schema specific. But, this choice has a performance impact as the lookup now has to be more of a
+      prefix based instead of pointed lookups. That is, index lookup have to return records for all the versions/tags
+      and pruning on top of this have to be done.
+
+## Implementation 
+
+1. No change to the HoodieIndex public interface. 
+```
+  /**
+   * Looks up the index and tags each incoming record with a location of a file that contains
+   * the row (if it is actually present).
+   */
+  @PublicAPIMethod(maturity = ApiMaturityLevel.EVOLVING)
+  public abstract HoodieData<HoodieRecord<T>> tagLocation(
+      HoodieData<HoodieRecord<T>> records, HoodieEngineContext context,
+      HoodieTable hoodieTable) throws HoodieIndexException;
+```
+2. HoodieBloomIndex::explodeRecordsWithFileComparisons() will be extended to check for a new config `hoodie.metadata.file_pruning.enable`
+   and if enabled, metadata table based column stat will be used for file pruning based on key ranges.
+3. 
+
+
+### Writer flow: 
+Let's walk through the writer flow to update these partitions.
+
+Whenever a new commit is getting applied to metadata table, we do the following.<br>
+1. Files partition - prepare records for adding
+2. Column_stats partition - prepare records for adding
+[ColumnIndexID][PartitionIndexID][FileIndexID] => ColumnStats
+This involves reading the base file footers to fetch min max values for each column
+3. Bloom_filter partition - prepare records for adding
+[PartitionIndexID][FileIndexID] => BloomFilter
+This involves reading the base file footers.
+We can amortize the cost across (2) and (3) and just read it once and prepare/populate records for both partitions.  
+4. Commit all these records to metadata table.
+
+We need to ensure we have all sufficient info in WriteStatus get sent to metadata writer for every commit. 
+
+### Reader flow:
+When a new batch of write is ingested into Hudi, we need to tag the records with their 
+original file group location. And this index will leverage both the partitions to deduce the
+record key => file name mappings. Refer to Metadata Index lookup section for more details.
+
+## Rollout/Adoption Plan 
+* Release 0.10.0 is a FlagDay release. Mean, the old metadata table will be wiped out and a new one will be built.
+* Metadata Index feature is planning for 0.10.x version. Any preparatory changes/features (
+  like `Metadata new indexing for existing tables`, RFC proposal and doc pending) that are needed to have this feature
+  in the later minor release need to be rolled out as part of 0.10.0
+* TODO: More details on the rollout plan
+
+## Test Plan
+* Functionality
+  * Tag location for existing keys
+  * Tag location for non-existing keys
+* Performance
+  * Prove Metadata based indices are helping upsert use cases
+* Upgrade
+* TODO: More details on the test plan
diff --git a/rfc/rfc-38/1.png b/rfc/rfc-38/1.png
new file mode 100644
index 0000000000000..44238888c9b81
Binary files /dev/null and b/rfc/rfc-38/1.png differ
diff --git a/rfc/rfc-38/rfc-38.md b/rfc/rfc-38/rfc-38.md
new file mode 100644
index 0000000000000..d007bd0b663da
--- /dev/null
+++ b/rfc/rfc-38/rfc-38.md
@@ -0,0 +1,283 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+# RFC-38: Spark Datasource V2 Integration
+
+## Proposers
+
+- @leesf
+
+## Approvers
+- @vinothchandar
+- @xishiyan
+- @YannByron
+
+## Status
+
+JIRA: https://issues.apache.org/jira/browse/HUDI-1297
+
+## Abstract
+
+Today, Hudi still uses V1 api and relies heavily on RDD api to index, repartition and so on given the flexibility of RDD api, 
+it works fine in v1 api, using datasource V1 api, Hudi provides complete read/write, update, 
+and small file auto handling capabilities, all things work well. 
+However, with the continuous development and evolving of datasource V2 api, 
+the datasource v2 api has stabilized.Taking into account the datasource v1 api is too old and the spark community 
+no longer spends more resources to maintain v1 api, so consider migrating to DataSource V2 api, 
+and use more pushdown filters provided by V2 api and 
+integrate with [RFC-27](https://cwiki.apache.org/confluence/display/HUDI/RFC-27+Data+skipping+index+to+improve+query+performance) 
+to provide more powerful query capabilities. Also we could leverage it after V2 api get evolved or optimized again.
+
+
+## Background
+
+The current Hudi read and write paths use DataSource V1 api, and the implementation class is `DefaultSource`
+
+```scala
+/**
+* Hoodie Spark Datasource, for reading and writing hoodie tables
+*
+*/
+class DefaultSource extends RelationProvider
+with SchemaRelationProvider
+with CreatableRelationProvider
+with DataSourceRegister
+with StreamSinkProvider
+with StreamSourceProvider
+with Serializable {
+...
+}
+```
+
+As for writing(batch write), the following method will be called.
+```scala
+override def createRelation(sqlContext: SQLContext,
+mode: SaveMode,
+optParams: Map[String, String],
+df: DataFrame): BaseRelation = {
+val parameters = HoodieWriterUtils.parametersWithWriteDefaults(optParams)
+val translatedOptions = DataSourceWriteOptions.translateSqlOptions(parameters)
+val dfWithoutMetaCols = df.drop(HoodieRecord.HOODIE_META_COLUMNS.asScala:_*)
+
+    if (translatedOptions(OPERATION.key).equals(BOOTSTRAP_OPERATION_OPT_VAL)) {
+      HoodieSparkSqlWriter.bootstrap(sqlContext, mode, translatedOptions, dfWithoutMetaCols)
+    } else {
+      HoodieSparkSqlWriter.write(sqlContext, mode, translatedOptions, dfWithoutMetaCols)
+    }
+    new HoodieEmptyRelation(sqlContext, dfWithoutMetaCols.schema)
+}
+```
+
+Regarding querying, the following method will return a `BaseRelation`（if not provide schema）
+
+```scala
+override def createRelation(sqlContext: SQLContext,
+parameters: Map[String, String]): BaseRelation = {
+createRelation(sqlContext, parameters, null)
+}
+```
+
+For streaming writing and reading, DefaultSource#createSink and DefaultSource#createSource are called respectively.
+In 0.9.0 version , the bulk_insert row mode was introduced to speed up bulk_insert, which implements the `SupportsWrite` v2 api and uses `HoodieDataSourceInternalTable` for writing, 
+right now only bulk_insert operation is supported.
+
+## Implementation
+
+Spark provides a complete V2 api, such as `CatalogPlugin`, `SupportsWrite`, `SupportsRead`, and various pushdown filters,  
+such as `SupportsPushDownFilters`, `SupportsPushDownAggregates`, `SupportsPushDownRequiredColumns`
+
+We would define the key abstraction of call `HoodieInternalV2Table`, which inherits the `Table`, `SupportsWrite`, `SupportsRead` 
+interfaces to provide writing and reading capabilities.
+
+### Writing Path
+
+Hudi relies heavily on some RDD APIs on write path, such as the indexing to determine where the record is update or insert, 
+this refactoring work is relatively large or impossible to migrate to v2 write path under datasource v2 api. 
+So we can fallback to write to v1 since Spark provides the `V1Write` interface to bridge the V1 and V2 api in 3.2.0
+
+The writing path code snippet is below
+
+```scala
+class HoodieInternalV2Table extends Table with SupportsWrite with V2TableWithV1Fallback {
+
+  override def name(): String = {
+    //
+  }
+
+  override def schema(): StructType = {
+    // get hudi table schema
+  }
+
+  override def partitioning(): Array[Transform] = {
+    // get partitioning of hudi table.
+  }
+
+  override def capabilities(): Set[TableCapability] = {
+    // Set(BATCH_WRITE, BATCH_READ,TRUNCATE,...)
+  }
+
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
+    // HoodieV1WriteBuilder
+  }
+}
+```
+
+The definition of `HoodieV1WriteBuilder` shows below.
+
+```scala
+private class HoodieV1WriteBuilder(writeOptions: CaseInsensitiveStringMap,
+                                     hoodieCatalogTable: HoodieCatalogTable,
+                                     spark: SparkSession)
+  extends SupportsTruncate with SupportsOverwrite with ProvidesHoodieConfig {
+
+  override def truncate(): HoodieV1WriteBuilder = {
+    this
+  }
+
+  override def overwrite(filters: Array[Filter]): WriteBuilder = {
+    this
+  }
+
+  override def build(): V1Write = new V1Write {
+    override def toInsertableRelation: InsertableRelation = {
+      //IntertableRelation
+    }
+  }
+}
+```
+
+### Querying path
+
+For v2 querying, Spark provides various pushdown filters, such as `SupportsPushDownFilters`, `SupportsPushDownAggregates`, 
+`SupportsPushDownRequiredColumns`, `SupportsRuntimeFiltering` and so on, which is more clear and flexible than v1 interface.
+Also, v2 interface provides the capability to read the columnar format file such as parquet and orc format file, one more thing 
+is that v2 interface provides the capability to split and define the number of partitions for users, which provides the possibility
+to split more accurate splits and accelerate query speed on Hudi side.
+However, for querying, in first stage we also fallback to v1 read path, which means we need convert 
+`DataSourceV2Relation` to `DefaultSource` in analysis stage to make the changes well controlled.
+The code snippet shows below, the `HoodieSpark3Analysis` should be injected if spark version is equal or larger than 3.2.0.
+
+```scala
+
+case class HoodieSpark3Analysis(sparkSession: SparkSession) extends Rule[LogicalPlan]
+  with SparkAdapterSupport with ProvidesHoodieConfig {
+
+  override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsDown {
+    case dsv2@DataSourceV2Relation(d: HoodieInternalV2Table, _, _, _, _) =>
+      val output = dsv2.output
+      val catalogTable = if (d.catalogTable.isDefined) {
+        Some(d.v1Table)
+      } else {
+        None
+      }
+      val relation = new DefaultSource().createRelation(new SQLContext(sparkSession),
+        buildHoodieConfig(d.hoodieCatalogTable))
+      LogicalRelation(relation, output, catalogTable, isStreaming = false)
+  }
+}
+
+```
+In the second stage, we would make use of v2 reading interface and define `HoodieBatchScanBuilder` to provide querying 
+capability. The workflow of querying process is shown in below figure.  
+`PartitionReaderFactory` located in the Driver and the `PartitionReader` located in the Executor.
+
+![](./1.png)
+
+The querying path code sample is below
+
+```scala
+class HoodieBatchScanBuilder extends ScanBuilder with SupportsPushDownFilters with SupportsPushDownRequiredColumns {
+override def build(): Scan = {
+// HoodieScan
+}
+
+override def pushFilters(filters: Array[Filter]): Array[Filter] = {
+// record the filters
+}
+
+override def pushedFilters(): Array[Filter] = {
+// pushed filters
+}
+
+override def pruneColumns(requiredSchema: StructType): Unit = {
+// record the pruned columns
+}
+}
+```
+
+### Table Meta Management
+
+Implementing the `CatalogPlugin` interface to manage the metadata of the Hudi table and 
+define the core abstraction called `HoodieCatalog`, 
+and the code sample is below.
+
+```scala
+class HoodieCatalog extends DelegatingCatalogExtension
+with StagingTableCatalog {
+  override def loadTable(ident: Identifier): Table = {
+    // HoodieDatasouceTable
+  }
+
+  override def createTable(ident: Identifier, 
+                           schema: StructType, 
+                           partitions: Array[Transform], 
+                           properties: util.Map[String, String]): Table = {
+      // create hudi table
+  }
+    
+  override def dropTable(Identifier ident): Boolean = {
+    // drop hudi table
+  }
+    
+  override def alterTable(Identifier ident, TableChange... changes): Table = {
+    // check schema compability
+    // HoodieDatasouceTable
+  }
+    
+  override def stageReplace(ident: Identifier, 
+                            schema: StructType, 
+                            partitions: Array[Transform], 
+                            properties: util.Map[String, String]): StagedTable = {
+    // StagedHoodieTable
+  }
+    
+  override def stageCreateOrReplace(ident: Identifier, 
+                                    schema: StructType, 
+                                    partitions: Array[Transform], 
+                                    properties: util.Map[String, String]): StagedTable = {
+    // StagedHoodieTable
+  }
+} 
+```
+
+Users would set the spark session config spark.sql.catalog.spark_catalog to org.apache.hudi.catalog.HoodieCatalog to load the HoodieCatalogto manage hudi tables.
+
+## Rollout/Adoption Plan
+
+- What impact (if any) will there be on existing users?
+
+there is no impact on existing users, but users would specify the new catalog to manager hudi tables or other tables.
+
+- If we are changing behavior how will we phase out the older behavior?
+
+we should keep compatibility of v1 version and make it transparent for users to migrate to v2 api.
+
+## Test Plan
+
+[ ] PoC for catalog plugin
+[ ] PoC for writing path with UTs
+[ ] Poc for querying path with UTs
+[ ] E2E tests
+[ ] Benchmark for v1 and v2 writing and querying
\ No newline at end of file
diff --git a/rfc/rfc-40/Hudi_Connector.png b/rfc/rfc-40/Hudi_Connector.png
new file mode 100644
index 0000000000000..ddb388da4548d
Binary files /dev/null and b/rfc/rfc-40/Hudi_Connector.png differ
diff --git a/rfc/rfc-40/rfc-40.md b/rfc/rfc-40/rfc-40.md
new file mode 100644
index 0000000000000..2525071551264
--- /dev/null
+++ b/rfc/rfc-40/rfc-40.md
@@ -0,0 +1,282 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+# RFC-40: Hudi Connector for Trino
+
+## Proposers
+
+- @codope
+- @yihua
+
+## Approvers
+
+- @bvaradar
+- @vinothchandar
+
+## Status
+
+JIRA: https://issues.apache.org/jira/browse/HUDI-2687
+
+> Please keep the status updated in `rfc/README.md`.
+
+## Abstract
+
+Today, Hudi supports snapshot queries on Copy-On-Write (COW) tables and read-optimized queries on Merge-On-Read (MOR)
+tables with Trino, through the input format based integration in the Hive connector. This approach has known performance
+limitations with very large tables. Moreover, as Hudi keeps getting better, a new plugin to provide access to Hudi data
+and metadata will help in unlocking capabilities such as metadata-based listing, full schema evolution, etc. for the
+Trino users. A separate Hudi connector would also allow its independent evolution without having to worry about
+hacking/breaking the Hive connector. A separate connector also falls in line with our vision when we think of a
+standalone timeline server or a lake cache to balance the tradeoff between writing and querying.
+
+## Background
+
+The current Trino integration relies on a custom annotation `@UseFileSplitsFromInputFormat`. Any input format that has
+this annotation would fetch splits by invoking the corresponding input format’s `getSplits()` method instead of Trino's
+Hive connector native split loading logic. For instance, realtime queries on Hudi tables queried via Trino, this would
+be a simple call to `HoodieParquetRealtimeInputFormat.getSplits()`. This approach has known performance limitations
+due to the way Trino's split loading is designed, causing redundant Hudi table metadata listing while loading splits. 
+This issue has been fixed in Presto and the work to upstream those changes to Trino is [in progress](https://github.com/trinodb/trino/pull/9641).
+
+A connector enables Trino to communicate with external data sources. The connector interface is composed of four parts:
+the Metadata API, Data Location API, Data Source API, and Data Sink API. These APIs are designed to allow performant
+implementations of connectors within the environment of Trino's distributed execution engine. For an overview of the
+Trino architecture please see [Trino concepts](https://trino.io/docs/current/overview/concepts.html).
+
+### Trino query execution model
+
+When Trino executes a query, it does so by breaking up the execution into a hierarchy of **stages**. A single stage is
+implemented as a series of **tasks** distributed over a network of Trino workers. Tasks operate on **splits**, which are
+partitions of a larger data set. Tasks at the source stage produce data in the form of **pages**, which are a collection
+of rows in columnar format. These pages flow to other intermediate downstream stages.
+
+## Implementation
+
+Trino provides a service provider interface (SPI), which is a type of API used to implement a connector. By implementing
+the SPI in a connector, Trino can use standard operations internally to connect to any data source and perform
+operations on any data source. The connector takes care of the details relevant to the specific data source.
+
+Hudi connector will implement three parts of the API:
+
+- Operations to fetch table/view/schema metadata.
+- Operations to produce logical units of data partitioning, so that Trino can parallelize reads and writes.
+- Data sources and sinks that convert the source data to/from the in-memory format expected by the query engine.
+
+Hudi connector will be registered as a plugin, which will be loaded by Trino server at startup. The entry point will
+be `HudiPlugin`, an implementation of the `Plugin` interface. Instances of Hudi connector are created by a
+ConnectorFactory instance which is created when Trino calls `getConnectorFactory()` on the plugin.
+A class-diagrammatic view of the different components is shown below.
+![](Hudi_Connector.png)
+
+### Operations to fetch table/view/schema metadata
+
+The `ConnectorMetadata` interface provides important methods that are responsible for allowing Trino to look at lists of
+schemas, lists of tables, lists of columns, and other metadata about a particular data source. The implementation of
+this interface will create the `HoodieTableMetaClient` and pass it to the connector table handle through which Trino 
+can access metadata of a Hudi table.
+
+
+### Operations to produce logical units of data partitioning
+
+We will need to implement the `ConnectorSplit` and `ConnectorSplitManager` interfaces. Hudi splits will be similar to
+how Hive connector describes splits in the form of a path to a file with offset and length that indicate which part of
+the file needs to be processed.
+
+```java
+public class HudiSplit
+    implements ConnectorSplit {
+  private final String path;
+  private final long start;
+  private final long length;
+  private final long fileSize;
+  private final List<HostAddress> addresses;
+  private final TupleDomain<HiveColumnHandle> predicate;
+  private final List<HivePartitionKey> partitionKeys;
+  private final SplitWeight splitWeight;
+}
+```
+
+The split manager will partition the data for a table into the individual chunks that Trino will distribute to workers
+for processing. This is where the partition loader logic will reside. While listing the files for each Hudi partition,
+the split manager will create one or more splits per file. Additionally, split generation is dynamic based on size to 
+futher improve the performance (see [query planning optimization](#query-planning-optimization) for more details).
+
+During query execution, the Trino coordinator tracks all splits available for processing and the locations where tasks
+are running on workers and processing splits. As tasks finish processing and are producing more splits for downstream
+processing, the coordinator continues to schedule tasks until no splits remain for processing. Once all splits are
+processed on the workers, all data is available, and the coordinator can make the result available to the client.
+
+To support file listing for different query modes in Hudi, i.e., Read Optimized, Snapshot, and Incremental, Hudi
+connector provides the abstraction of `HudiFileListing` which can be extended to contain custom logic of generating the
+particular partitions to scan for a query and file listing for a partition.  `HudiFileListing` abstraction relies on
+`HudiPartitionInfo` to get the information of a partition, including relative partition path, partition name base on
+Hive Metastore, key-value pairs of this partition, and predicates for the partition columns.  We plan to support
+Read Optimized query for COW table first.  In the future, we'd like to merge the file listing abstraction into Hudi repo
+so that such common file listing functionality can be reused across different query engines.
+
+```java
+public abstract class HudiFileListing {
+  public abstract List<HudiPartitionInfo> getPartitionsToScan();
+  public abstract List<FileStatus> listStatus(HudiPartitionInfo partitionInfo);
+}
+
+public abstract class HudiPartitionInfo {
+  protected final Table table;
+  protected final List<HiveColumnHandle> partitionColumnHandles;
+  protected final TupleDomain<HiveColumnHandle> constraintSummary;
+  // Relative partition path
+  protected String relativePartitionPath;
+  // Hive partition name containing partition column key-value pairs
+  protected String hivePartitionName;
+  // List of partition keys containing column key-value pairs 
+  protected List<HivePartitionKey> hivePartitionKeys;
+}
+```
+
+### Data source
+
+As mentioned in the query execution model, tasks in the source stage produce data in the form of pages. The Connector
+Data Source API returns pages when it is passed a split, and operators typically consume input pages, perform
+computation, and produce output pages. This is where we will implement `ConnectorPageSourceProvider` interface to create
+page source.
+
+```java
+public class HudiPageSourceProvider
+    implements ConnectorPageSourceProvider {
+  private final HdfsEnvironment hdfsEnvironment;
+  private final FileFormatDataSourceStats fileFormatDataSourceStats;
+  private final ParquetReaderOptions parquetReaderOptions;
+  private final DateTimeZone timeZone;
+}
+```
+
+We could have different page sources for different base file formats like parquet, orc and avro.  To adapt to these
+different formats, We add an abstraction named `HudiPageSourceCreator` so that different base file format has its
+corresponding logic to create `ConnectorPageSource` instance.  For the parquet format, we plan to implement
+`HudiParquetPageSourceCreator` by extending `HudiPageSourceCreator` and reuse the `ParquetPageSource` creation in the
+Hive connector. This has the advantage of using Trino's custom `ParquetReader` that can efficiently skip data sections
+by using statistics in file headers/footers. This is also where we will handle the column projections and build
+predicates for the parquet reader.
+
+```java
+public abstract class HudiPageSourceCreator {
+  public abstract ConnectorPageSource createPageSource(
+      Configuration configuration,
+      ConnectorIdentity identity,
+      List<HiveColumnHandle> regularColumns,
+      HudiSplit hudiSplit);
+}
+```
+
+### Snapshot queries on MOR table
+
+This requires merging base file and log files.
+One way is to use the `HoodieRealtimeRecordReader` which can do compacted reading.
+However, this means we will have to give up Trino's optimized parquet reader.
+Another way is to enumerate the merged splits and use the native reader.
+This can be done in `HoodieRealtimeInputFormatUtils#getRealtimeSplits()` which is invoked in `HoodieParquetRealtimeInputFormat`.
+We can reuse this logic for reading MOR table via the connector.
+
+In summary, Trino coordinator uses the metadata and split manager APIs to gather information about the table and partitions to
+generate a query plan and logical splits of the table contents. Each split is processed by a task in the Trino worker.
+Here, workers invoke the page source APIs as tasks produce data in the form of pages.
+Subsequently, native (parquet) reader read the block of pages while executing the query.
+
+## Query Planning Optimization
+
+We make several design decisions to optimize the query planning in the Hudi connector.
+
+### Background loading of Hudi splits
+
+Simply fetching all Hudi splits in a single thread synchronously in the `HudiSplitSource` significantly degrades the
+query performances since the Trino coordinator cannot hand out the splits to the workers for execution until all the 
+splits are generated.  To remove the bottleneck, we add a background split loader, `HudiSplitBackgroundLoader`, to
+load the Hudi splits asynchronously.  Once the query planning begins, the background split loader is initialized, and
+starts to run immediately, regardless of whether the coordinator asks for the next batch of splits (i.e.,
+`HudiSplitSource::getNextBatch`).  The background load keeps generating new splits to an internal connector split queue.
+When the coordinator asks for the next batch of splits by calling `HudiSplitSource::getNextBatch`, the method fetches
+the available splits from the internal connector split queue.
+
+The background split loader internally has a pipeline of processing:
+- Fetching partition information: this step collects the information of all the partitions that need to be read for
+file listing.
+- Listing files in partitions: this step lists all the files per partition. Since each partition is independent of
+another, we list each partition in a concurrent manner.  To improve performance of file listing, there is a thread pool
+so each thread takes a partition from a queue and does the file listing, until all the partitions are processed.
+- Generating splits from each file: this step generates the splits from the files listed in the second step.  Similarly,
+there is a thread pool so each thread takes a file from a queue and does the split generation.
+
+The background loader keeps track of the progress of split generation and reports the status to `HudiSplitSource`.
+
+### Batching Hive metastore calls
+
+It is expensive to make RPC calls to Hive metastore to fetch information.  For example, using `HiveMetastore::getPartition`
+to get the information of a single partition takes around 100ms.  Parallelizing the RPC calls to metastore is not enough
+to meet the performance requirements, e.g., it takes 5-6 seconds for getting the information of 200 partitions using
+`HiveMetastore::getPartition`, even with parallelism, due to the bottleneck at the Hive metastore serving the calls.
+
+To address this issue, we batch the partition information fetching using `HiveMetastore::getPartitionsByNames`.  Instead
+of fetching the information of one partition per call, such a method provides the ability to fetch the information of
+multiple partitions per call.  In this way, the number of calls to Hive metastore to fetch the information of all
+partitions can be drastically reduced.  We use exponential increased batch size, starting from 10, with the maximum of
+100, i.e., with the batch size sequence of `"10, 20, 40, 80, 100, 100, ..."`.  Using this optimization, it only takes
+around 500ms to get the information of 200 partitions.
+
+### Dynamic size-based split weight
+
+Trino schedules a batch of splits for the page source provider to create pages. Trino decides the number
+of splits in the batch using a quota of 100.  By default, each split has a uniform weight of 1, thus each batch has
+100 splits.  If the splits are small in size, there may not be enough splits in the workers for processing, leading
+to inefficient execution.  Like Hive split, Hudi split incorporates the size-based split weight so that smaller splits
+get lower weights.  Trino then packs more splits in a batch if each has a smaller size, thus guaranteeing that each
+batch has enough data to process.
+
+### Improving listing
+In order to improve listing, we assume that the path exists,
+and so we bypass the `FileSystem#exists` check in `AbstractHoodieTableFileSystemView` while fetching latest base files.
+The connector will also support metadata-based listing which will retrieve partition listings from Hudi's internal metadata table. 
+This should further help improve the performance.
+
+## Rollout/Adoption Plan
+
+- What impact (if any) will there be on existing users?
+
+There will be no impact on existing users because this is a new connector. It does not change the behavior of current
+integration through the existing Hive connector. It gives users more choice.
+
+- What do we lose if we move away from the Hive connector?
+
+Hive connector takes advantage of [caching](https://trino.io/docs/current/connector/hive-caching.html) to reduce load on
+object storage. We will need to use or implement a cache file system like [Rubix](https://github.com/qubole/rubix) that
+is optimized for columnar formats and object stores. This is being tracked by [HUDI-3339](https://issues.apache.org/jira/browse/HUDI-3339).
+
+- If we need special migration tools, describe them here.
+
+The implementation assumes that Hudi tables are synced to Hive. There is no Trino support for migrating Hive tables to
+Hudi, so we need to either use the Hudi APIs or write custom Spark jobs to migrate the tables to Hudi.
+
+- When will we remove the existing behavior?
+
+We are not proposing to remove the existing behavior. We hope that we will have a critical mass of users who will like
+to use the new Hudi connector. That said, we whould continue to support the current integration.
+
+## Test Plan
+
+- [x] POC for snapshot query on COW table
+- [x] Unit tests for the connector
+- [ ] Product integration tests
+- [x] Benchmark snapshot query for large tables
diff --git a/rfc/rfc-46/rfc-46.md b/rfc/rfc-46/rfc-46.md
new file mode 100644
index 0000000000000..8b0feff2343db
--- /dev/null
+++ b/rfc/rfc-46/rfc-46.md
@@ -0,0 +1,159 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+# RFC-46: Optimize Record Payload handling
+
+## Proposers
+
+- @alexeykudinkin
+
+## Approvers
+ - @vinothchandar
+ - @nsivabalan
+ - @xushiyan
+
+## Status
+
+JIRA: https://issues.apache.org/jira/browse/HUDI-3217
+
+> Please keep the status updated in `rfc/README.md`.
+
+## Abstract
+
+Avro historically has been a centerpiece of the Hudi architecture: it's a default representation that many components expect
+when dealing with records (during merge, column value extractions, writing into storage, etc). 
+
+While having a single format of the record representation is certainly making implementation of some components simpler, 
+it bears unavoidable performance penalty of de-/serialization loop: every record handled by Hudi has to be converted
+from (low-level) engine-specific representation (`Row` for Spark, `RowData` for Flink, `ArrayWritable` for Hive) into intermediate 
+one (Avro), with some operations (like clustering, compaction) potentially incurring this penalty multiple times (on read- 
+and write-paths). 
+
+As such, goal of this effort is to remove the need of conversion from engine-specific internal representations to Avro 
+while handling records. 
+
+## Background
+
+Historically, Avro has settled in as de-facto intermediate representation of the record's payload since the early days of Hudi.
+As project matured and the scale of the installations grew, necessity to convert into an intermediate representation quickly 
+become a noticeable bottleneck in terms of performance of critical Hudi flows. 
+
+At the center of it is the hierarchy of `HoodieRecordPayload`s, which is used to hold individual record's payload 
+providing an APIs like `preCombine`, `combineAndGetUpdateValue` to combine it with other record using some user-defined semantic. 
+
+## Implementation
+
+### Revisiting Record Classes Hierarchy
+
+To achieve stated goals of avoiding unnecessary conversions into intermediate representation (Avro), existing Hudi workflows
+operating on individual records will have to be refactored and laid out in a way that would be _unassuming about internal 
+representation_ of the record, ie code should be working w/ a record as an _opaque object_: exposing certain API to access 
+crucial data (precombine, primary, partition keys, etc), but not providing the access to the raw payload.
+
+Having existing workflows re-structured in such a way around a record being an opaque object, would allow us to encapsulate 
+internal representation of the record w/in its class hierarchy, which in turn would allow for us to hold engine-specific (Spark, Flink, etc)
+representations of the records w/o exposing purely engine-agnostic components to it. 
+
+Following (high-level) steps are proposed: 
+
+1. Promote `HoodieRecord` to become a standardized API of interacting with a single record, that will be  
+   1. Replacing all accesses from `HoodieRecordPayload`
+   2. Split into interface and engine-specific implementations (holding internal engine-specific representation of the payload) 
+   3. Implementing new standardized record-level APIs (like `getPartitionKey` , `getRecordKey`, etc)
+   4. Staying **internal** component, that will **NOT** contain any user-defined semantic (like merging)
+2. Extract Record Combining (Merge) API from `HoodieRecordPayload` into a standalone, stateless component (engine). Such component will be
+   1. Abstracted as stateless object providing API to combine records (according to predefined semantics) for engines (Spark, Flink) of interest
+   2. Plug-in point for user-defined combination semantics
+3. Gradually deprecate, phase-out and eventually remove `HoodieRecordPayload` abstraction
+
+Phasing out usage of `HoodieRecordPayload` will also bring the benefit of avoiding to use Java reflection in the hot-path, which
+is known to have poor performance (compared to non-reflection based instantiation).
+
+#### Combine API Engine
+
+Stateless component interface providing for API Combining Records will look like following:
+
+```java
+interface HoodieRecordCombiningEngine {
+  
+  default HoodieRecord precombine(HoodieRecord older, HoodieRecord newer) {
+    if (spark) {
+      precombineSpark((SparkHoodieRecord) older, (SparkHoodieRecord) newer);
+    } else if (flink) {
+      // precombine for Flink
+    }
+  }
+
+   /**
+    * Spark-specific implementation 
+    */
+  SparkHoodieRecord precombineSpark(SparkHoodieRecord older, SparkHoodieRecord newer);
+  
+  // ...
+}
+```
+Where user can provide their own subclass implementing such interface for the engines of interest.
+
+#### Migration from `HoodieRecordPayload` to `HoodieRecordCombiningEngine`
+
+To warrant backward-compatibility (BWC) on the code-level with already created subclasses of `HoodieRecordPayload` currently
+already used in production by Hudi users, we will provide a BWC-bridge in the form of instance of `HoodieRecordCombiningEngine`, that will 
+be using user-defined subclass of `HoodieRecordPayload` to combine the records.
+
+Leveraging such bridge will make provide for seamless BWC migration to the 0.11 release, however will be removing the performance 
+benefit of this refactoring, since it would unavoidably have to perform conversion to intermediate representation (Avro). To realize
+full-suite of benefits of this refactoring, users will have to migrate their merging logic out of `HoodieRecordPayload` subclass and into
+new `HoodieRecordCombiningEngine` implementation.
+
+### Refactoring Flows Directly Interacting w/ Records:
+
+As was called out prior to achieve the goal of being able to sustain engine-internal representations being held by `HoodieRecord` 
+class w/o compromising major components' neutrality (ie being engine-agnostic), such components directly interacting w/
+records' payloads today will have to be refactored to instead interact w/ standardized `HoodieRecord`s API.
+
+Following major components will be refactored:
+
+1. `HoodieWriteHandle`s will be  
+   1. Accepting `HoodieRecord` instead of raw Avro payload (avoiding Avro conversion)
+   2. Using Combining API engine to merge records (when necessary) 
+   3. Passes `HoodieRecord` as is to `FileWriter`
+2. `HoodieFileWriter`s will be 
+   1. Accepting `HoodieRecord`
+   2. Will be engine-specific (so that they're able to handle internal record representation)
+3. `HoodieRealtimeRecordReader`s 
+   1. API will be returning opaque `HoodieRecord` instead of raw Avro payload
+
+
+## Rollout/Adoption Plan
+
+ - What impact (if any) will there be on existing users? 
+   - Users of the Hudi will observe considerably better performance for most of the routine operations: writing, reading, compaction, clustering, etc due to avoiding the superfluous intermediate de-/serialization penalty
+   - By default, modified hierarchy would still leverage 
+   - Users will need to rebase their logic of combining records by creating a subclass of `HoodieRecordPayload`, and instead subclass newly created interface `HoodieRecordCombiningEngine` to get full-suite of performance benefits 
+ - If we are changing behavior how will we phase out the older behavior?
+   - Older behavior leveraging `HoodieRecordPayload` for merging will be marked as deprecated in 0.11, and subsequently removed in 0.1x
+ - If we need special migration tools, describe them here.
+   - No special migration tools will be necessary (other than BWC-bridge to make sure users can use 0.11 out of the box, and there are no breaking changes to the public API)
+ - When will we remove the existing behavior
+   - In subsequent releases (either 0.12 or 1.0) 
+
+## Test Plan
+
+This refactoring will not be modifying any existing Hudi semantics other than the aforementioned, and as such to guarantee preservation of the 
+logical correctness of the many flows that will be affected by the refactoring we will rely on the existing set of test-suites.
+
+Nevertheless, we will run corresponding set of benchmarks stressing the flows being affected by the refactoring to validate
+that there are considerable performance advantage of abandoning conversion into intermediate representation completely.
\ No newline at end of file
diff --git a/style/scalastyle.xml b/style/scalastyle.xml
index 2ba4042be0ca4..74d7b9d73a203 100644
--- a/style/scalastyle.xml
+++ b/style/scalastyle.xml
@@ -113,7 +113,7 @@
  </check>
  <check level="error" class="org.scalastyle.scalariform.NumberOfMethodsInTypeChecker" enabled="true">
   <parameters>
-   <parameter name="maxMethods"><![CDATA[30]]></parameter>
+   <parameter name="maxMethods"><![CDATA[40]]></parameter>
   </parameters>
  </check>
  <check level="error" class="org.scalastyle.scalariform.PublicMethodsHaveTypeChecker" enabled="false"/>